agno 2.2.13__py3-none-any.whl → 2.4.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (383) hide show
  1. agno/agent/__init__.py +6 -0
  2. agno/agent/agent.py +5252 -3145
  3. agno/agent/remote.py +525 -0
  4. agno/api/api.py +2 -0
  5. agno/client/__init__.py +3 -0
  6. agno/client/a2a/__init__.py +10 -0
  7. agno/client/a2a/client.py +554 -0
  8. agno/client/a2a/schemas.py +112 -0
  9. agno/client/a2a/utils.py +369 -0
  10. agno/client/os.py +2669 -0
  11. agno/compression/__init__.py +3 -0
  12. agno/compression/manager.py +247 -0
  13. agno/culture/manager.py +2 -2
  14. agno/db/base.py +927 -6
  15. agno/db/dynamo/dynamo.py +788 -2
  16. agno/db/dynamo/schemas.py +128 -0
  17. agno/db/dynamo/utils.py +26 -3
  18. agno/db/firestore/firestore.py +674 -50
  19. agno/db/firestore/schemas.py +41 -0
  20. agno/db/firestore/utils.py +25 -10
  21. agno/db/gcs_json/gcs_json_db.py +506 -3
  22. agno/db/gcs_json/utils.py +14 -2
  23. agno/db/in_memory/in_memory_db.py +203 -4
  24. agno/db/in_memory/utils.py +14 -2
  25. agno/db/json/json_db.py +498 -2
  26. agno/db/json/utils.py +14 -2
  27. agno/db/migrations/manager.py +199 -0
  28. agno/db/migrations/utils.py +19 -0
  29. agno/db/migrations/v1_to_v2.py +54 -16
  30. agno/db/migrations/versions/__init__.py +0 -0
  31. agno/db/migrations/versions/v2_3_0.py +977 -0
  32. agno/db/mongo/async_mongo.py +1013 -39
  33. agno/db/mongo/mongo.py +684 -4
  34. agno/db/mongo/schemas.py +48 -0
  35. agno/db/mongo/utils.py +17 -0
  36. agno/db/mysql/__init__.py +2 -1
  37. agno/db/mysql/async_mysql.py +2958 -0
  38. agno/db/mysql/mysql.py +722 -53
  39. agno/db/mysql/schemas.py +77 -11
  40. agno/db/mysql/utils.py +151 -8
  41. agno/db/postgres/async_postgres.py +1254 -137
  42. agno/db/postgres/postgres.py +2316 -93
  43. agno/db/postgres/schemas.py +153 -21
  44. agno/db/postgres/utils.py +22 -7
  45. agno/db/redis/redis.py +531 -3
  46. agno/db/redis/schemas.py +36 -0
  47. agno/db/redis/utils.py +31 -15
  48. agno/db/schemas/evals.py +1 -0
  49. agno/db/schemas/memory.py +20 -9
  50. agno/db/singlestore/schemas.py +70 -1
  51. agno/db/singlestore/singlestore.py +737 -74
  52. agno/db/singlestore/utils.py +13 -3
  53. agno/db/sqlite/async_sqlite.py +1069 -89
  54. agno/db/sqlite/schemas.py +133 -1
  55. agno/db/sqlite/sqlite.py +2203 -165
  56. agno/db/sqlite/utils.py +21 -11
  57. agno/db/surrealdb/models.py +25 -0
  58. agno/db/surrealdb/surrealdb.py +603 -1
  59. agno/db/utils.py +60 -0
  60. agno/eval/__init__.py +26 -3
  61. agno/eval/accuracy.py +25 -12
  62. agno/eval/agent_as_judge.py +871 -0
  63. agno/eval/base.py +29 -0
  64. agno/eval/performance.py +10 -4
  65. agno/eval/reliability.py +22 -13
  66. agno/eval/utils.py +2 -1
  67. agno/exceptions.py +42 -0
  68. agno/hooks/__init__.py +3 -0
  69. agno/hooks/decorator.py +164 -0
  70. agno/integrations/discord/client.py +13 -2
  71. agno/knowledge/__init__.py +4 -0
  72. agno/knowledge/chunking/code.py +90 -0
  73. agno/knowledge/chunking/document.py +65 -4
  74. agno/knowledge/chunking/fixed.py +4 -1
  75. agno/knowledge/chunking/markdown.py +102 -11
  76. agno/knowledge/chunking/recursive.py +2 -2
  77. agno/knowledge/chunking/semantic.py +130 -48
  78. agno/knowledge/chunking/strategy.py +18 -0
  79. agno/knowledge/embedder/azure_openai.py +0 -1
  80. agno/knowledge/embedder/google.py +1 -1
  81. agno/knowledge/embedder/mistral.py +1 -1
  82. agno/knowledge/embedder/nebius.py +1 -1
  83. agno/knowledge/embedder/openai.py +16 -12
  84. agno/knowledge/filesystem.py +412 -0
  85. agno/knowledge/knowledge.py +4261 -1199
  86. agno/knowledge/protocol.py +134 -0
  87. agno/knowledge/reader/arxiv_reader.py +3 -2
  88. agno/knowledge/reader/base.py +9 -7
  89. agno/knowledge/reader/csv_reader.py +91 -42
  90. agno/knowledge/reader/docx_reader.py +9 -10
  91. agno/knowledge/reader/excel_reader.py +225 -0
  92. agno/knowledge/reader/field_labeled_csv_reader.py +38 -48
  93. agno/knowledge/reader/firecrawl_reader.py +3 -2
  94. agno/knowledge/reader/json_reader.py +16 -22
  95. agno/knowledge/reader/markdown_reader.py +15 -14
  96. agno/knowledge/reader/pdf_reader.py +33 -28
  97. agno/knowledge/reader/pptx_reader.py +9 -10
  98. agno/knowledge/reader/reader_factory.py +135 -1
  99. agno/knowledge/reader/s3_reader.py +8 -16
  100. agno/knowledge/reader/tavily_reader.py +3 -3
  101. agno/knowledge/reader/text_reader.py +15 -14
  102. agno/knowledge/reader/utils/__init__.py +17 -0
  103. agno/knowledge/reader/utils/spreadsheet.py +114 -0
  104. agno/knowledge/reader/web_search_reader.py +8 -65
  105. agno/knowledge/reader/website_reader.py +16 -13
  106. agno/knowledge/reader/wikipedia_reader.py +36 -3
  107. agno/knowledge/reader/youtube_reader.py +3 -2
  108. agno/knowledge/remote_content/__init__.py +33 -0
  109. agno/knowledge/remote_content/config.py +266 -0
  110. agno/knowledge/remote_content/remote_content.py +105 -17
  111. agno/knowledge/utils.py +76 -22
  112. agno/learn/__init__.py +71 -0
  113. agno/learn/config.py +463 -0
  114. agno/learn/curate.py +185 -0
  115. agno/learn/machine.py +725 -0
  116. agno/learn/schemas.py +1114 -0
  117. agno/learn/stores/__init__.py +38 -0
  118. agno/learn/stores/decision_log.py +1156 -0
  119. agno/learn/stores/entity_memory.py +3275 -0
  120. agno/learn/stores/learned_knowledge.py +1583 -0
  121. agno/learn/stores/protocol.py +117 -0
  122. agno/learn/stores/session_context.py +1217 -0
  123. agno/learn/stores/user_memory.py +1495 -0
  124. agno/learn/stores/user_profile.py +1220 -0
  125. agno/learn/utils.py +209 -0
  126. agno/media.py +22 -6
  127. agno/memory/__init__.py +14 -1
  128. agno/memory/manager.py +223 -8
  129. agno/memory/strategies/__init__.py +15 -0
  130. agno/memory/strategies/base.py +66 -0
  131. agno/memory/strategies/summarize.py +196 -0
  132. agno/memory/strategies/types.py +37 -0
  133. agno/models/aimlapi/aimlapi.py +17 -0
  134. agno/models/anthropic/claude.py +434 -59
  135. agno/models/aws/bedrock.py +121 -20
  136. agno/models/aws/claude.py +131 -274
  137. agno/models/azure/ai_foundry.py +10 -6
  138. agno/models/azure/openai_chat.py +33 -10
  139. agno/models/base.py +1162 -561
  140. agno/models/cerebras/cerebras.py +120 -24
  141. agno/models/cerebras/cerebras_openai.py +21 -2
  142. agno/models/cohere/chat.py +65 -6
  143. agno/models/cometapi/cometapi.py +18 -1
  144. agno/models/dashscope/dashscope.py +2 -3
  145. agno/models/deepinfra/deepinfra.py +18 -1
  146. agno/models/deepseek/deepseek.py +69 -3
  147. agno/models/fireworks/fireworks.py +18 -1
  148. agno/models/google/gemini.py +959 -89
  149. agno/models/google/utils.py +22 -0
  150. agno/models/groq/groq.py +48 -18
  151. agno/models/huggingface/huggingface.py +17 -6
  152. agno/models/ibm/watsonx.py +16 -6
  153. agno/models/internlm/internlm.py +18 -1
  154. agno/models/langdb/langdb.py +13 -1
  155. agno/models/litellm/chat.py +88 -9
  156. agno/models/litellm/litellm_openai.py +18 -1
  157. agno/models/message.py +24 -5
  158. agno/models/meta/llama.py +40 -13
  159. agno/models/meta/llama_openai.py +22 -21
  160. agno/models/metrics.py +12 -0
  161. agno/models/mistral/mistral.py +8 -4
  162. agno/models/n1n/__init__.py +3 -0
  163. agno/models/n1n/n1n.py +57 -0
  164. agno/models/nebius/nebius.py +6 -7
  165. agno/models/nvidia/nvidia.py +20 -3
  166. agno/models/ollama/__init__.py +2 -0
  167. agno/models/ollama/chat.py +17 -6
  168. agno/models/ollama/responses.py +100 -0
  169. agno/models/openai/__init__.py +2 -0
  170. agno/models/openai/chat.py +117 -26
  171. agno/models/openai/open_responses.py +46 -0
  172. agno/models/openai/responses.py +110 -32
  173. agno/models/openrouter/__init__.py +2 -0
  174. agno/models/openrouter/openrouter.py +67 -2
  175. agno/models/openrouter/responses.py +146 -0
  176. agno/models/perplexity/perplexity.py +19 -1
  177. agno/models/portkey/portkey.py +7 -6
  178. agno/models/requesty/requesty.py +19 -2
  179. agno/models/response.py +20 -2
  180. agno/models/sambanova/sambanova.py +20 -3
  181. agno/models/siliconflow/siliconflow.py +19 -2
  182. agno/models/together/together.py +20 -3
  183. agno/models/vercel/v0.py +20 -3
  184. agno/models/vertexai/claude.py +124 -4
  185. agno/models/vllm/vllm.py +19 -14
  186. agno/models/xai/xai.py +19 -2
  187. agno/os/app.py +467 -137
  188. agno/os/auth.py +253 -5
  189. agno/os/config.py +22 -0
  190. agno/os/interfaces/a2a/a2a.py +7 -6
  191. agno/os/interfaces/a2a/router.py +635 -26
  192. agno/os/interfaces/a2a/utils.py +32 -33
  193. agno/os/interfaces/agui/agui.py +5 -3
  194. agno/os/interfaces/agui/router.py +26 -16
  195. agno/os/interfaces/agui/utils.py +97 -57
  196. agno/os/interfaces/base.py +7 -7
  197. agno/os/interfaces/slack/router.py +16 -7
  198. agno/os/interfaces/slack/slack.py +7 -7
  199. agno/os/interfaces/whatsapp/router.py +35 -7
  200. agno/os/interfaces/whatsapp/security.py +3 -1
  201. agno/os/interfaces/whatsapp/whatsapp.py +11 -8
  202. agno/os/managers.py +326 -0
  203. agno/os/mcp.py +652 -79
  204. agno/os/middleware/__init__.py +4 -0
  205. agno/os/middleware/jwt.py +718 -115
  206. agno/os/middleware/trailing_slash.py +27 -0
  207. agno/os/router.py +105 -1558
  208. agno/os/routers/agents/__init__.py +3 -0
  209. agno/os/routers/agents/router.py +655 -0
  210. agno/os/routers/agents/schema.py +288 -0
  211. agno/os/routers/components/__init__.py +3 -0
  212. agno/os/routers/components/components.py +475 -0
  213. agno/os/routers/database.py +155 -0
  214. agno/os/routers/evals/evals.py +111 -18
  215. agno/os/routers/evals/schemas.py +38 -5
  216. agno/os/routers/evals/utils.py +80 -11
  217. agno/os/routers/health.py +3 -3
  218. agno/os/routers/knowledge/knowledge.py +284 -35
  219. agno/os/routers/knowledge/schemas.py +14 -2
  220. agno/os/routers/memory/memory.py +274 -11
  221. agno/os/routers/memory/schemas.py +44 -3
  222. agno/os/routers/metrics/metrics.py +30 -15
  223. agno/os/routers/metrics/schemas.py +10 -6
  224. agno/os/routers/registry/__init__.py +3 -0
  225. agno/os/routers/registry/registry.py +337 -0
  226. agno/os/routers/session/session.py +143 -14
  227. agno/os/routers/teams/__init__.py +3 -0
  228. agno/os/routers/teams/router.py +550 -0
  229. agno/os/routers/teams/schema.py +280 -0
  230. agno/os/routers/traces/__init__.py +3 -0
  231. agno/os/routers/traces/schemas.py +414 -0
  232. agno/os/routers/traces/traces.py +549 -0
  233. agno/os/routers/workflows/__init__.py +3 -0
  234. agno/os/routers/workflows/router.py +757 -0
  235. agno/os/routers/workflows/schema.py +139 -0
  236. agno/os/schema.py +157 -584
  237. agno/os/scopes.py +469 -0
  238. agno/os/settings.py +3 -0
  239. agno/os/utils.py +574 -185
  240. agno/reasoning/anthropic.py +85 -1
  241. agno/reasoning/azure_ai_foundry.py +93 -1
  242. agno/reasoning/deepseek.py +102 -2
  243. agno/reasoning/default.py +6 -7
  244. agno/reasoning/gemini.py +87 -3
  245. agno/reasoning/groq.py +109 -2
  246. agno/reasoning/helpers.py +6 -7
  247. agno/reasoning/manager.py +1238 -0
  248. agno/reasoning/ollama.py +93 -1
  249. agno/reasoning/openai.py +115 -1
  250. agno/reasoning/vertexai.py +85 -1
  251. agno/registry/__init__.py +3 -0
  252. agno/registry/registry.py +68 -0
  253. agno/remote/__init__.py +3 -0
  254. agno/remote/base.py +581 -0
  255. agno/run/__init__.py +2 -4
  256. agno/run/agent.py +134 -19
  257. agno/run/base.py +49 -1
  258. agno/run/cancel.py +65 -52
  259. agno/run/cancellation_management/__init__.py +9 -0
  260. agno/run/cancellation_management/base.py +78 -0
  261. agno/run/cancellation_management/in_memory_cancellation_manager.py +100 -0
  262. agno/run/cancellation_management/redis_cancellation_manager.py +236 -0
  263. agno/run/requirement.py +181 -0
  264. agno/run/team.py +111 -19
  265. agno/run/workflow.py +2 -1
  266. agno/session/agent.py +57 -92
  267. agno/session/summary.py +1 -1
  268. agno/session/team.py +62 -115
  269. agno/session/workflow.py +353 -57
  270. agno/skills/__init__.py +17 -0
  271. agno/skills/agent_skills.py +377 -0
  272. agno/skills/errors.py +32 -0
  273. agno/skills/loaders/__init__.py +4 -0
  274. agno/skills/loaders/base.py +27 -0
  275. agno/skills/loaders/local.py +216 -0
  276. agno/skills/skill.py +65 -0
  277. agno/skills/utils.py +107 -0
  278. agno/skills/validator.py +277 -0
  279. agno/table.py +10 -0
  280. agno/team/__init__.py +5 -1
  281. agno/team/remote.py +447 -0
  282. agno/team/team.py +3769 -2202
  283. agno/tools/brandfetch.py +27 -18
  284. agno/tools/browserbase.py +225 -16
  285. agno/tools/crawl4ai.py +3 -0
  286. agno/tools/duckduckgo.py +25 -71
  287. agno/tools/exa.py +0 -21
  288. agno/tools/file.py +14 -13
  289. agno/tools/file_generation.py +12 -6
  290. agno/tools/firecrawl.py +15 -7
  291. agno/tools/function.py +94 -113
  292. agno/tools/google_bigquery.py +11 -2
  293. agno/tools/google_drive.py +4 -3
  294. agno/tools/knowledge.py +9 -4
  295. agno/tools/mcp/mcp.py +301 -18
  296. agno/tools/mcp/multi_mcp.py +269 -14
  297. agno/tools/mem0.py +11 -10
  298. agno/tools/memory.py +47 -46
  299. agno/tools/mlx_transcribe.py +10 -7
  300. agno/tools/models/nebius.py +5 -5
  301. agno/tools/models_labs.py +20 -10
  302. agno/tools/nano_banana.py +151 -0
  303. agno/tools/parallel.py +0 -7
  304. agno/tools/postgres.py +76 -36
  305. agno/tools/python.py +14 -6
  306. agno/tools/reasoning.py +30 -23
  307. agno/tools/redshift.py +406 -0
  308. agno/tools/shopify.py +1519 -0
  309. agno/tools/spotify.py +919 -0
  310. agno/tools/tavily.py +4 -1
  311. agno/tools/toolkit.py +253 -18
  312. agno/tools/websearch.py +93 -0
  313. agno/tools/website.py +1 -1
  314. agno/tools/wikipedia.py +1 -1
  315. agno/tools/workflow.py +56 -48
  316. agno/tools/yfinance.py +12 -11
  317. agno/tracing/__init__.py +12 -0
  318. agno/tracing/exporter.py +161 -0
  319. agno/tracing/schemas.py +276 -0
  320. agno/tracing/setup.py +112 -0
  321. agno/utils/agent.py +251 -10
  322. agno/utils/cryptography.py +22 -0
  323. agno/utils/dttm.py +33 -0
  324. agno/utils/events.py +264 -7
  325. agno/utils/hooks.py +111 -3
  326. agno/utils/http.py +161 -2
  327. agno/utils/mcp.py +49 -8
  328. agno/utils/media.py +22 -1
  329. agno/utils/models/ai_foundry.py +9 -2
  330. agno/utils/models/claude.py +20 -5
  331. agno/utils/models/cohere.py +9 -2
  332. agno/utils/models/llama.py +9 -2
  333. agno/utils/models/mistral.py +4 -2
  334. agno/utils/os.py +0 -0
  335. agno/utils/print_response/agent.py +99 -16
  336. agno/utils/print_response/team.py +223 -24
  337. agno/utils/print_response/workflow.py +0 -2
  338. agno/utils/prompts.py +8 -6
  339. agno/utils/remote.py +23 -0
  340. agno/utils/response.py +1 -13
  341. agno/utils/string.py +91 -2
  342. agno/utils/team.py +62 -12
  343. agno/utils/tokens.py +657 -0
  344. agno/vectordb/base.py +15 -2
  345. agno/vectordb/cassandra/cassandra.py +1 -1
  346. agno/vectordb/chroma/__init__.py +2 -1
  347. agno/vectordb/chroma/chromadb.py +468 -23
  348. agno/vectordb/clickhouse/clickhousedb.py +1 -1
  349. agno/vectordb/couchbase/couchbase.py +6 -2
  350. agno/vectordb/lancedb/lance_db.py +7 -38
  351. agno/vectordb/lightrag/lightrag.py +7 -6
  352. agno/vectordb/milvus/milvus.py +118 -84
  353. agno/vectordb/mongodb/__init__.py +2 -1
  354. agno/vectordb/mongodb/mongodb.py +14 -31
  355. agno/vectordb/pgvector/pgvector.py +120 -66
  356. agno/vectordb/pineconedb/pineconedb.py +2 -19
  357. agno/vectordb/qdrant/__init__.py +2 -1
  358. agno/vectordb/qdrant/qdrant.py +33 -56
  359. agno/vectordb/redis/__init__.py +2 -1
  360. agno/vectordb/redis/redisdb.py +19 -31
  361. agno/vectordb/singlestore/singlestore.py +17 -9
  362. agno/vectordb/surrealdb/surrealdb.py +2 -38
  363. agno/vectordb/weaviate/__init__.py +2 -1
  364. agno/vectordb/weaviate/weaviate.py +7 -3
  365. agno/workflow/__init__.py +5 -1
  366. agno/workflow/agent.py +2 -2
  367. agno/workflow/condition.py +12 -10
  368. agno/workflow/loop.py +28 -9
  369. agno/workflow/parallel.py +21 -13
  370. agno/workflow/remote.py +362 -0
  371. agno/workflow/router.py +12 -9
  372. agno/workflow/step.py +261 -36
  373. agno/workflow/steps.py +12 -8
  374. agno/workflow/types.py +40 -77
  375. agno/workflow/workflow.py +939 -213
  376. {agno-2.2.13.dist-info → agno-2.4.3.dist-info}/METADATA +134 -181
  377. agno-2.4.3.dist-info/RECORD +677 -0
  378. {agno-2.2.13.dist-info → agno-2.4.3.dist-info}/WHEEL +1 -1
  379. agno/tools/googlesearch.py +0 -98
  380. agno/tools/memori.py +0 -339
  381. agno-2.2.13.dist-info/RECORD +0 -575
  382. {agno-2.2.13.dist-info → agno-2.4.3.dist-info}/licenses/LICENSE +0 -0
  383. {agno-2.2.13.dist-info → agno-2.4.3.dist-info}/top_level.txt +0 -0
agno/models/base.py CHANGED
@@ -5,9 +5,10 @@ from abc import ABC, abstractmethod
5
5
  from dataclasses import dataclass, field
6
6
  from hashlib import md5
7
7
  from pathlib import Path
8
- from time import time
8
+ from time import sleep, time
9
9
  from types import AsyncGeneratorType, GeneratorType
10
10
  from typing import (
11
+ TYPE_CHECKING,
11
12
  Any,
12
13
  AsyncIterator,
13
14
  Dict,
@@ -15,21 +16,26 @@ from typing import (
15
16
  List,
16
17
  Literal,
17
18
  Optional,
19
+ Sequence,
18
20
  Tuple,
19
21
  Type,
20
22
  Union,
21
23
  get_args,
22
24
  )
25
+
26
+ if TYPE_CHECKING:
27
+ from agno.compression.manager import CompressionManager
23
28
  from uuid import uuid4
24
29
 
25
30
  from pydantic import BaseModel
26
31
 
27
- from agno.exceptions import AgentRunException
32
+ from agno.exceptions import AgentRunException, ModelProviderError, RetryableModelProviderError
28
33
  from agno.media import Audio, File, Image, Video
29
34
  from agno.models.message import Citations, Message
30
35
  from agno.models.metrics import Metrics
31
36
  from agno.models.response import ModelResponse, ModelResponseEvent, ToolExecution
32
37
  from agno.run.agent import CustomEvent, RunContentEvent, RunOutput, RunOutputEvent
38
+ from agno.run.requirement import RunRequirement
33
39
  from agno.run.team import RunContentEvent as TeamRunContentEvent
34
40
  from agno.run.team import TeamRunOutput, TeamRunOutputEvent
35
41
  from agno.run.workflow import WorkflowRunOutputEvent
@@ -145,15 +151,284 @@ class Model(ABC):
145
151
  cache_ttl: Optional[int] = None
146
152
  cache_dir: Optional[str] = None
147
153
 
154
+ # Retry configuration for model provider errors
155
+ # Number of retries to attempt when a ModelProviderError occurs
156
+ retries: int = 0
157
+ # Delay between retries (in seconds)
158
+ delay_between_retries: int = 1
159
+ # Exponential backoff: if True, the delay between retries is doubled each time
160
+ exponential_backoff: bool = False
161
+ # Enable retrying a model invocation once with a guidance message.
162
+ # This is useful for known errors avoidable with extra instructions.
163
+ retry_with_guidance: bool = True
164
+ # Set the number of times to retry the model invocation with guidance.
165
+ retry_with_guidance_limit: int = 1
166
+
148
167
  def __post_init__(self):
149
168
  if self.provider is None and self.name is not None:
150
169
  self.provider = f"{self.name} ({self.id})"
151
170
 
171
+ def _get_retry_delay(self, attempt: int) -> float:
172
+ """Calculate the delay before the next retry attempt."""
173
+ if self.exponential_backoff:
174
+ return self.delay_between_retries * (2**attempt)
175
+ return self.delay_between_retries
176
+
177
+ def _is_retryable_error(self, error: ModelProviderError) -> bool:
178
+ """Determine if an error is worth retrying.
179
+
180
+ Non-retryable errors include:
181
+ - Client errors (400, 401, 403, 413, 422) that won't change on retry
182
+ - Context window/token limit exceeded errors
183
+ - Payload too large errors
184
+
185
+ Retryable errors include:
186
+ - Rate limit errors (429)
187
+ - Server errors (500, 502, 503, 504)
188
+
189
+ Args:
190
+ error: The ModelProviderError to evaluate.
191
+
192
+ Returns:
193
+ True if the error is transient and worth retrying, False otherwise.
194
+ """
195
+ # Non-retryable status codes (client errors that won't change)
196
+ non_retryable_codes = {400, 401, 403, 404, 413, 422}
197
+ if error.status_code in non_retryable_codes:
198
+ return False
199
+
200
+ # Non-retryable error message patterns (context/token limits)
201
+ non_retryable_patterns = [
202
+ "context_length_exceeded",
203
+ "context window",
204
+ "maximum context length",
205
+ "token limit",
206
+ "max_tokens",
207
+ "too many tokens",
208
+ "payload too large",
209
+ "content_too_large",
210
+ "request too large",
211
+ "input too long",
212
+ "exceeds the model",
213
+ ]
214
+ error_msg = str(error.message).lower()
215
+ if any(pattern in error_msg for pattern in non_retryable_patterns):
216
+ return False
217
+
218
+ return True
219
+
220
+ def _invoke_with_retry(self, **kwargs) -> ModelResponse:
221
+ """
222
+ Invoke the model with retry logic for ModelProviderError.
223
+
224
+ This method wraps the invoke() call and retries on ModelProviderError
225
+ with optional exponential backoff.
226
+ """
227
+ last_exception: Optional[ModelProviderError] = None
228
+
229
+ for attempt in range(self.retries + 1):
230
+ try:
231
+ retries_with_guidance_count = kwargs.pop("retries_with_guidance_count", 0)
232
+ return self.invoke(**kwargs)
233
+ except ModelProviderError as e:
234
+ last_exception = e
235
+ # Check if error is non-retryable
236
+ if not self._is_retryable_error(e):
237
+ log_error(f"Non-retryable model provider error: {e}")
238
+ raise
239
+ if attempt < self.retries:
240
+ delay = self._get_retry_delay(attempt)
241
+ log_warning(
242
+ f"Model provider error (attempt {attempt + 1}/{self.retries + 1}): {e}. Retrying in {delay}s..."
243
+ )
244
+ sleep(delay)
245
+ else:
246
+ if self.retries > 0:
247
+ log_error(f"Model provider error after {self.retries + 1} attempts: {e}")
248
+ except RetryableModelProviderError as e:
249
+ current_count = retries_with_guidance_count
250
+ if current_count >= self.retry_with_guidance_limit:
251
+ raise ModelProviderError(
252
+ message=f"Max retries with guidance reached. Error: {e.original_error}",
253
+ model_name=self.name,
254
+ model_id=self.id,
255
+ )
256
+ kwargs.pop("retry_with_guidance", None)
257
+ kwargs["retries_with_guidance_count"] = current_count + 1
258
+
259
+ # Append the guidance message to help the model avoid the error in the next invoke.
260
+ kwargs["messages"].append(Message(role="user", content=e.retry_guidance_message, temporary=True))
261
+
262
+ return self._invoke_with_retry(**kwargs, retry_with_guidance=True)
263
+
264
+ # If we've exhausted all retries, raise the last exception
265
+ raise last_exception # type: ignore
266
+
267
+ async def _ainvoke_with_retry(self, **kwargs) -> ModelResponse:
268
+ """
269
+ Asynchronously invoke the model with retry logic for ModelProviderError.
270
+
271
+ This method wraps the ainvoke() call and retries on ModelProviderError
272
+ with optional exponential backoff.
273
+ """
274
+ last_exception: Optional[ModelProviderError] = None
275
+
276
+ for attempt in range(self.retries + 1):
277
+ try:
278
+ retries_with_guidance_count = kwargs.pop("retries_with_guidance_count", 0)
279
+ return await self.ainvoke(**kwargs)
280
+ except ModelProviderError as e:
281
+ last_exception = e
282
+ # Check if error is non-retryable
283
+ if not self._is_retryable_error(e):
284
+ log_error(f"Non-retryable model provider error: {e}")
285
+ raise
286
+ if attempt < self.retries:
287
+ delay = self._get_retry_delay(attempt)
288
+ log_warning(
289
+ f"Model provider error (attempt {attempt + 1}/{self.retries + 1}): {e}. Retrying in {delay}s..."
290
+ )
291
+ await asyncio.sleep(delay)
292
+ else:
293
+ if self.retries > 0:
294
+ log_error(f"Model provider error after {self.retries + 1} attempts: {e}")
295
+ except RetryableModelProviderError as e:
296
+ current_count = retries_with_guidance_count
297
+ if current_count >= self.retry_with_guidance_limit:
298
+ raise ModelProviderError(
299
+ message=f"Max retries with guidance reached. Error: {e.original_error}",
300
+ model_name=self.name,
301
+ model_id=self.id,
302
+ )
303
+
304
+ kwargs.pop("retry_with_guidance", None)
305
+ kwargs["retries_with_guidance_count"] = current_count + 1
306
+
307
+ # Append the guidance message to help the model avoid the error in the next invoke.
308
+ kwargs["messages"].append(Message(role="user", content=e.retry_guidance_message, temporary=True))
309
+
310
+ return await self._ainvoke_with_retry(**kwargs, retry_with_guidance=True)
311
+
312
+ # If we've exhausted all retries, raise the last exception
313
+ raise last_exception # type: ignore
314
+
315
+ def _invoke_stream_with_retry(self, **kwargs) -> Iterator[ModelResponse]:
316
+ """
317
+ Invoke the model stream with retry logic for ModelProviderError.
318
+
319
+ This method wraps the invoke_stream() call and retries on ModelProviderError
320
+ with optional exponential backoff. Note that retries restart the entire stream.
321
+ """
322
+ last_exception: Optional[ModelProviderError] = None
323
+
324
+ for attempt in range(self.retries + 1):
325
+ try:
326
+ retries_with_guidance_count = kwargs.pop("retries_with_guidance_count", 0)
327
+ yield from self.invoke_stream(**kwargs)
328
+ return # Success, exit the retry loop
329
+ except ModelProviderError as e:
330
+ last_exception = e
331
+ # Check if error is non-retryable (e.g., context window exceeded, auth errors)
332
+ if not self._is_retryable_error(e):
333
+ log_error(f"Non-retryable model provider error: {e}")
334
+ raise
335
+ if attempt < self.retries:
336
+ delay = self._get_retry_delay(attempt)
337
+ log_warning(
338
+ f"Model provider error during stream (attempt {attempt + 1}/{self.retries + 1}): {e}. "
339
+ f"Retrying in {delay}s..."
340
+ )
341
+ sleep(delay)
342
+ else:
343
+ if self.retries > 0:
344
+ log_error(f"Model provider error after {self.retries + 1} attempts: {e}")
345
+ except RetryableModelProviderError as e:
346
+ current_count = retries_with_guidance_count
347
+ if current_count >= self.retry_with_guidance_limit:
348
+ raise ModelProviderError(
349
+ message=f"Max retries with guidance reached. Error: {e.original_error}",
350
+ model_name=self.name,
351
+ model_id=self.id,
352
+ )
353
+
354
+ kwargs.pop("retry_with_guidance", None)
355
+ kwargs["retries_with_guidance_count"] = current_count + 1
356
+
357
+ # Append the guidance message to help the model avoid the error in the next invoke.
358
+ kwargs["messages"].append(Message(role="user", content=e.retry_guidance_message, temporary=True))
359
+
360
+ yield from self._invoke_stream_with_retry(**kwargs, retry_with_guidance=True)
361
+ return # Success, exit after regeneration
362
+
363
+ # If we've exhausted all retries, raise the last exception
364
+ raise last_exception # type: ignore
365
+
366
+ async def _ainvoke_stream_with_retry(self, **kwargs) -> AsyncIterator[ModelResponse]:
367
+ """
368
+ Asynchronously invoke the model stream with retry logic for ModelProviderError.
369
+
370
+ This method wraps the ainvoke_stream() call and retries on ModelProviderError
371
+ with optional exponential backoff. Note that retries restart the entire stream.
372
+ """
373
+ last_exception: Optional[ModelProviderError] = None
374
+
375
+ for attempt in range(self.retries + 1):
376
+ try:
377
+ retries_with_guidance_count = kwargs.pop("retries_with_guidance_count", 0)
378
+ async for response in self.ainvoke_stream(**kwargs):
379
+ yield response
380
+ return # Success, exit the retry loop
381
+ except ModelProviderError as e:
382
+ last_exception = e
383
+ # Check if error is non-retryable
384
+ if not self._is_retryable_error(e):
385
+ log_error(f"Non-retryable model provider error: {e}")
386
+ raise
387
+ if attempt < self.retries:
388
+ delay = self._get_retry_delay(attempt)
389
+ log_warning(
390
+ f"Model provider error during stream (attempt {attempt + 1}/{self.retries + 1}): {e}. "
391
+ f"Retrying in {delay}s..."
392
+ )
393
+ await asyncio.sleep(delay)
394
+ else:
395
+ if self.retries > 0:
396
+ log_error(f"Model provider error after {self.retries + 1} attempts: {e}")
397
+ except RetryableModelProviderError as e:
398
+ current_count = retries_with_guidance_count
399
+ if current_count >= self.retry_with_guidance_limit:
400
+ raise ModelProviderError(
401
+ message=f"Max retries with guidance reached. Error: {e.original_error}",
402
+ model_name=self.name,
403
+ model_id=self.id,
404
+ )
405
+
406
+ kwargs.pop("retry_with_guidance", None)
407
+ kwargs["retries_with_guidance_count"] = current_count + 1
408
+
409
+ # Append the guidance message to help the model avoid the error in the next invoke.
410
+ kwargs["messages"].append(Message(role="user", content=e.retry_guidance_message, temporary=True))
411
+
412
+ async for response in self._ainvoke_stream_with_retry(**kwargs, retry_with_guidance=True):
413
+ yield response
414
+ return # Success, exit after regeneration
415
+
416
+ # If we've exhausted all retries, raise the last exception
417
+ raise last_exception # type: ignore
418
+
152
419
  def to_dict(self) -> Dict[str, Any]:
153
420
  fields = {"name", "id", "provider"}
154
421
  _dict = {field: getattr(self, field) for field in fields if getattr(self, field) is not None}
155
422
  return _dict
156
423
 
424
+ def _remove_temporary_messages(self, messages: List[Message]) -> None:
425
+ """Remove temporary messages from the given list.
426
+
427
+ Args:
428
+ messages: The list of messages to filter (modified in place).
429
+ """
430
+ messages[:] = [m for m in messages if not m.temporary]
431
+
157
432
  def get_provider(self) -> str:
158
433
  return self.provider or self.name or self.__class__.__name__
159
434
 
@@ -303,6 +578,29 @@ class Model(ABC):
303
578
  _tool_dicts.append(tool)
304
579
  return _tool_dicts
305
580
 
581
+ def count_tokens(
582
+ self,
583
+ messages: List[Message],
584
+ tools: Optional[Sequence[Union[Function, Dict[str, Any]]]] = None,
585
+ output_schema: Optional[Union[Dict, Type[BaseModel]]] = None,
586
+ ) -> int:
587
+ from agno.utils.tokens import count_tokens
588
+
589
+ return count_tokens(
590
+ messages,
591
+ tools=list(tools) if tools else None,
592
+ model_id=self.id,
593
+ output_schema=output_schema,
594
+ )
595
+
596
+ async def acount_tokens(
597
+ self,
598
+ messages: List[Message],
599
+ tools: Optional[Sequence[Union[Function, Dict[str, Any]]]] = None,
600
+ output_schema: Optional[Union[Dict, Type[BaseModel]]] = None,
601
+ ) -> int:
602
+ return self.count_tokens(messages, tools, output_schema=output_schema)
603
+
306
604
  def response(
307
605
  self,
308
606
  messages: List[Message],
@@ -312,6 +610,7 @@ class Model(ABC):
312
610
  tool_call_limit: Optional[int] = None,
313
611
  run_response: Optional[Union[RunOutput, TeamRunOutput]] = None,
314
612
  send_media_to_model: bool = True,
613
+ compression_manager: Optional["CompressionManager"] = None,
315
614
  ) -> ModelResponse:
316
615
  """
317
616
  Generate a response from the model.
@@ -325,155 +624,194 @@ class Model(ABC):
325
624
  run_response: Run response to use
326
625
  send_media_to_model: Whether to send media to the model
327
626
  """
627
+ try:
628
+ # Check cache if enabled
629
+ if self.cache_response:
630
+ cache_key = self._get_model_cache_key(
631
+ messages, stream=False, response_format=response_format, tools=tools
632
+ )
633
+ cached_data = self._get_cached_model_response(cache_key)
328
634
 
329
- # Check cache if enabled
330
- if self.cache_response:
331
- cache_key = self._get_model_cache_key(messages, stream=False, response_format=response_format, tools=tools)
332
- cached_data = self._get_cached_model_response(cache_key)
333
-
334
- if cached_data:
335
- log_info("Cache hit for model response")
336
- return self._model_response_from_cache(cached_data)
337
-
338
- log_debug(f"{self.get_provider()} Response Start", center=True, symbol="-")
339
- log_debug(f"Model: {self.id}", center=True, symbol="-")
340
-
341
- _log_messages(messages)
342
- model_response = ModelResponse()
343
-
344
- function_call_count = 0
345
-
346
- _tool_dicts = self._format_tools(tools) if tools is not None else []
347
- _functions = {tool.name: tool for tool in tools if isinstance(tool, Function)} if tools is not None else {}
348
-
349
- while True:
350
- # Get response from model
351
- assistant_message = Message(role=self.assistant_message_role)
352
- self._process_model_response(
353
- messages=messages,
354
- assistant_message=assistant_message,
355
- model_response=model_response,
356
- response_format=response_format,
357
- tools=_tool_dicts,
358
- tool_choice=tool_choice or self._tool_choice,
359
- run_response=run_response,
360
- )
635
+ if cached_data:
636
+ log_info("Cache hit for model response")
637
+ return self._model_response_from_cache(cached_data)
361
638
 
362
- # Add assistant message to messages
363
- messages.append(assistant_message)
639
+ log_debug(f"{self.get_provider()} Response Start", center=True, symbol="-")
640
+ log_debug(f"Model: {self.id}", center=True, symbol="-")
364
641
 
365
- # Log response and metrics
366
- assistant_message.log(metrics=True)
642
+ _log_messages(messages)
643
+ model_response = ModelResponse()
367
644
 
368
- # Handle tool calls if present
369
- if assistant_message.tool_calls:
370
- # Prepare function calls
371
- function_calls_to_run = self._prepare_function_calls(
372
- assistant_message=assistant_message,
645
+ function_call_count = 0
646
+
647
+ _tool_dicts = self._format_tools(tools) if tools is not None else []
648
+ _functions = {tool.name: tool for tool in tools if isinstance(tool, Function)} if tools is not None else {}
649
+
650
+ _compress_tool_results = compression_manager is not None and compression_manager.compress_tool_results
651
+ _compression_manager = compression_manager if _compress_tool_results else None
652
+
653
+ while True:
654
+ # Compress tool results if compression is enabled and threshold is met
655
+ if _compression_manager is not None and _compression_manager.should_compress(
656
+ messages, tools, model=self, response_format=response_format
657
+ ):
658
+ _compression_manager.compress(messages)
659
+
660
+ # Get response from model
661
+ assistant_message = Message(role=self.assistant_message_role)
662
+ self._process_model_response(
373
663
  messages=messages,
664
+ assistant_message=assistant_message,
374
665
  model_response=model_response,
375
- functions=_functions,
666
+ response_format=response_format,
667
+ tools=_tool_dicts,
668
+ tool_choice=tool_choice or self._tool_choice,
669
+ run_response=run_response,
670
+ compress_tool_results=_compress_tool_results,
376
671
  )
377
- function_call_results: List[Message] = []
378
-
379
- # Execute function calls
380
- for function_call_response in self.run_function_calls(
381
- function_calls=function_calls_to_run,
382
- function_call_results=function_call_results,
383
- current_function_call_count=function_call_count,
384
- function_call_limit=tool_call_limit,
385
- ):
386
- if isinstance(function_call_response, ModelResponse):
387
- # The session state is updated by the function call
388
- if function_call_response.updated_session_state is not None:
389
- model_response.updated_session_state = function_call_response.updated_session_state
390
-
391
- # Media artifacts are generated by the function call
392
- if function_call_response.images is not None:
393
- if model_response.images is None:
394
- model_response.images = []
395
- model_response.images.extend(function_call_response.images)
396
-
397
- if function_call_response.audios is not None:
398
- if model_response.audios is None:
399
- model_response.audios = []
400
- model_response.audios.extend(function_call_response.audios)
401
-
402
- if function_call_response.videos is not None:
403
- if model_response.videos is None:
404
- model_response.videos = []
405
- model_response.videos.extend(function_call_response.videos)
406
-
407
- if function_call_response.files is not None:
408
- if model_response.files is None:
409
- model_response.files = []
410
- model_response.files.extend(function_call_response.files)
411
-
412
- if (
413
- function_call_response.event
414
- in [
672
+
673
+ # Add assistant message to messages
674
+ messages.append(assistant_message)
675
+
676
+ # Log response and metrics
677
+ assistant_message.log(metrics=True, use_compressed_content=_compress_tool_results)
678
+
679
+ # Handle tool calls if present
680
+ if assistant_message.tool_calls:
681
+ # Prepare function calls
682
+ function_calls_to_run = self._prepare_function_calls(
683
+ assistant_message=assistant_message,
684
+ messages=messages,
685
+ model_response=model_response,
686
+ functions=_functions,
687
+ )
688
+ function_call_results: List[Message] = []
689
+
690
+ # Execute function calls
691
+ for function_call_response in self.run_function_calls(
692
+ function_calls=function_calls_to_run,
693
+ function_call_results=function_call_results,
694
+ current_function_call_count=function_call_count,
695
+ function_call_limit=tool_call_limit,
696
+ ):
697
+ if isinstance(function_call_response, ModelResponse):
698
+ # The session state is updated by the function call
699
+ if function_call_response.updated_session_state is not None:
700
+ model_response.updated_session_state = function_call_response.updated_session_state
701
+
702
+ # Media artifacts are generated by the function call
703
+ if function_call_response.images is not None:
704
+ if model_response.images is None:
705
+ model_response.images = []
706
+ model_response.images.extend(function_call_response.images)
707
+
708
+ if function_call_response.audios is not None:
709
+ if model_response.audios is None:
710
+ model_response.audios = []
711
+ model_response.audios.extend(function_call_response.audios)
712
+
713
+ if function_call_response.videos is not None:
714
+ if model_response.videos is None:
715
+ model_response.videos = []
716
+ model_response.videos.extend(function_call_response.videos)
717
+
718
+ if function_call_response.files is not None:
719
+ if model_response.files is None:
720
+ model_response.files = []
721
+ model_response.files.extend(function_call_response.files)
722
+
723
+ if (
724
+ function_call_response.event
725
+ in [
726
+ ModelResponseEvent.tool_call_completed.value,
727
+ ModelResponseEvent.tool_call_paused.value,
728
+ ]
729
+ and function_call_response.tool_executions is not None
730
+ ):
731
+ # Record the tool execution in the model response
732
+ if model_response.tool_executions is None:
733
+ model_response.tool_executions = []
734
+ model_response.tool_executions.extend(function_call_response.tool_executions)
735
+
736
+ # If the tool is currently paused (HITL flow), add the requirement to the run response
737
+ if (
738
+ function_call_response.event == ModelResponseEvent.tool_call_paused.value
739
+ and run_response is not None
740
+ ):
741
+ current_tool_execution = function_call_response.tool_executions[-1]
742
+ if run_response.requirements is None:
743
+ run_response.requirements = []
744
+ run_response.requirements.append(
745
+ RunRequirement(tool_execution=current_tool_execution)
746
+ )
747
+
748
+ elif function_call_response.event not in [
749
+ ModelResponseEvent.tool_call_started.value,
415
750
  ModelResponseEvent.tool_call_completed.value,
416
- ModelResponseEvent.tool_call_paused.value,
417
- ]
418
- and function_call_response.tool_executions is not None
419
- ):
420
- if model_response.tool_executions is None:
421
- model_response.tool_executions = []
422
- model_response.tool_executions.extend(function_call_response.tool_executions)
423
-
424
- elif function_call_response.event not in [
425
- ModelResponseEvent.tool_call_started.value,
426
- ModelResponseEvent.tool_call_completed.value,
427
- ]:
428
- if function_call_response.content:
429
- model_response.content += function_call_response.content # type: ignore
430
-
431
- # Add a function call for each successful execution
432
- function_call_count += len(function_call_results)
433
-
434
- # Format and add results to messages
435
- self.format_function_call_results(
436
- messages=messages, function_call_results=function_call_results, **model_response.extra or {}
437
- )
751
+ ]:
752
+ if function_call_response.content:
753
+ model_response.content += function_call_response.content # type: ignore
438
754
 
439
- if any(msg.images or msg.videos or msg.audio or msg.files for msg in function_call_results):
440
- # Handle function call media
441
- self._handle_function_call_media(
755
+ # Add a function call for each successful execution
756
+ function_call_count += len(function_call_results)
757
+
758
+ # Format and add results to messages
759
+ self.format_function_call_results(
442
760
  messages=messages,
443
761
  function_call_results=function_call_results,
444
- send_media_to_model=send_media_to_model,
762
+ compress_tool_results=_compress_tool_results,
763
+ **model_response.extra or {},
445
764
  )
446
765
 
447
- for function_call_result in function_call_results:
448
- function_call_result.log(metrics=True)
766
+ if any(msg.images or msg.videos or msg.audio or msg.files for msg in function_call_results):
767
+ # Handle function call media
768
+ self._handle_function_call_media(
769
+ messages=messages,
770
+ function_call_results=function_call_results,
771
+ send_media_to_model=send_media_to_model,
772
+ )
773
+
774
+ for function_call_result in function_call_results:
775
+ function_call_result.log(metrics=True, use_compressed_content=_compress_tool_results)
449
776
 
450
- # Check if we should stop after tool calls
451
- if any(m.stop_after_tool_call for m in function_call_results):
452
- break
777
+ # Check if we should stop after tool calls
778
+ if any(m.stop_after_tool_call for m in function_call_results):
779
+ break
453
780
 
454
- # If we have any tool calls that require confirmation, break the loop
455
- if any(tc.requires_confirmation for tc in model_response.tool_executions or []):
456
- break
781
+ # If we have any tool calls that require confirmation, break the loop
782
+ if any(tc.requires_confirmation for tc in model_response.tool_executions or []):
783
+ break
457
784
 
458
- # If we have any tool calls that require external execution, break the loop
459
- if any(tc.external_execution_required for tc in model_response.tool_executions or []):
460
- break
785
+ # If we have any tool calls that require external execution, break the loop
786
+ if any(tc.external_execution_required for tc in model_response.tool_executions or []):
787
+ break
461
788
 
462
- # If we have any tool calls that require user input, break the loop
463
- if any(tc.requires_user_input for tc in model_response.tool_executions or []):
464
- break
789
+ # If we have any tool calls that require user input, break the loop
790
+ if any(tc.requires_user_input for tc in model_response.tool_executions or []):
791
+ break
465
792
 
466
- # Continue loop to get next response
467
- continue
793
+ # Continue loop to get next response
794
+ continue
468
795
 
469
- # No tool calls or finished processing them
470
- break
796
+ # No tool calls or finished processing them
797
+ break
471
798
 
472
- log_debug(f"{self.get_provider()} Response End", center=True, symbol="-")
799
+ log_debug(f"{self.get_provider()} Response End", center=True, symbol="-")
473
800
 
474
- # Save to cache if enabled
475
- if self.cache_response:
476
- self._save_model_response_to_cache(cache_key, model_response, is_streaming=False)
801
+ # Save to cache if enabled
802
+ if self.cache_response:
803
+ self._save_model_response_to_cache(cache_key, model_response, is_streaming=False)
804
+ finally:
805
+ # Close the Gemini client
806
+ if self.__class__.__name__ == "Gemini" and self.client is not None: # type: ignore
807
+ try:
808
+ self.client.close() # type: ignore
809
+ self.client = None
810
+ except AttributeError:
811
+ log_warning(
812
+ "Your Gemini client is outdated. For Agno to properly handle the lifecycle of the client,"
813
+ " please upgrade Gemini to the latest version: pip install -U google-genai"
814
+ )
477
815
 
478
816
  return model_response
479
817
 
@@ -486,157 +824,198 @@ class Model(ABC):
486
824
  tool_call_limit: Optional[int] = None,
487
825
  run_response: Optional[Union[RunOutput, TeamRunOutput]] = None,
488
826
  send_media_to_model: bool = True,
827
+ compression_manager: Optional["CompressionManager"] = None,
489
828
  ) -> ModelResponse:
490
829
  """
491
830
  Generate an asynchronous response from the model.
492
831
  """
493
832
 
494
- # Check cache if enabled
495
- if self.cache_response:
496
- cache_key = self._get_model_cache_key(messages, stream=False, response_format=response_format, tools=tools)
497
- cached_data = self._get_cached_model_response(cache_key)
498
-
499
- if cached_data:
500
- log_info("Cache hit for model response")
501
- return self._model_response_from_cache(cached_data)
502
-
503
- log_debug(f"{self.get_provider()} Async Response Start", center=True, symbol="-")
504
- log_debug(f"Model: {self.id}", center=True, symbol="-")
505
- _log_messages(messages)
506
- model_response = ModelResponse()
507
-
508
- _tool_dicts = self._format_tools(tools) if tools is not None else []
509
- _functions = {tool.name: tool for tool in tools if isinstance(tool, Function)} if tools is not None else {}
510
-
511
- function_call_count = 0
512
-
513
- while True:
514
- # Get response from model
515
- assistant_message = Message(role=self.assistant_message_role)
516
- await self._aprocess_model_response(
517
- messages=messages,
518
- assistant_message=assistant_message,
519
- model_response=model_response,
520
- response_format=response_format,
521
- tools=_tool_dicts,
522
- tool_choice=tool_choice or self._tool_choice,
523
- run_response=run_response,
524
- )
833
+ try:
834
+ # Check cache if enabled
835
+ if self.cache_response:
836
+ cache_key = self._get_model_cache_key(
837
+ messages, stream=False, response_format=response_format, tools=tools
838
+ )
839
+ cached_data = self._get_cached_model_response(cache_key)
525
840
 
526
- # Add assistant message to messages
527
- messages.append(assistant_message)
841
+ if cached_data:
842
+ log_info("Cache hit for model response")
843
+ return self._model_response_from_cache(cached_data)
528
844
 
529
- # Log response and metrics
530
- assistant_message.log(metrics=True)
845
+ log_debug(f"{self.get_provider()} Async Response Start", center=True, symbol="-")
846
+ log_debug(f"Model: {self.id}", center=True, symbol="-")
847
+ _log_messages(messages)
848
+ model_response = ModelResponse()
531
849
 
532
- # Handle tool calls if present
533
- if assistant_message.tool_calls:
534
- # Prepare function calls
535
- function_calls_to_run = self._prepare_function_calls(
536
- assistant_message=assistant_message,
850
+ _tool_dicts = self._format_tools(tools) if tools is not None else []
851
+ _functions = {tool.name: tool for tool in tools if isinstance(tool, Function)} if tools is not None else {}
852
+
853
+ _compress_tool_results = compression_manager is not None and compression_manager.compress_tool_results
854
+ _compression_manager = compression_manager if _compress_tool_results else None
855
+
856
+ function_call_count = 0
857
+
858
+ while True:
859
+ # Compress existing tool results BEFORE making API call to avoid context overflow
860
+ if _compression_manager is not None and await _compression_manager.ashould_compress(
861
+ messages, tools, model=self, response_format=response_format
862
+ ):
863
+ await _compression_manager.acompress(messages)
864
+
865
+ # Get response from model
866
+ assistant_message = Message(role=self.assistant_message_role)
867
+ await self._aprocess_model_response(
537
868
  messages=messages,
869
+ assistant_message=assistant_message,
538
870
  model_response=model_response,
539
- functions=_functions,
871
+ response_format=response_format,
872
+ tools=_tool_dicts,
873
+ tool_choice=tool_choice or self._tool_choice,
874
+ run_response=run_response,
875
+ compress_tool_results=_compress_tool_results,
540
876
  )
541
- function_call_results: List[Message] = []
542
-
543
- # Execute function calls
544
- async for function_call_response in self.arun_function_calls(
545
- function_calls=function_calls_to_run,
546
- function_call_results=function_call_results,
547
- current_function_call_count=function_call_count,
548
- function_call_limit=tool_call_limit,
549
- ):
550
- if isinstance(function_call_response, ModelResponse):
551
- # The session state is updated by the function call
552
- if function_call_response.updated_session_state is not None:
553
- model_response.updated_session_state = function_call_response.updated_session_state
554
-
555
- # Media artifacts are generated by the function call
556
- if function_call_response.images is not None:
557
- if model_response.images is None:
558
- model_response.images = []
559
- model_response.images.extend(function_call_response.images)
560
-
561
- if function_call_response.audios is not None:
562
- if model_response.audios is None:
563
- model_response.audios = []
564
- model_response.audios.extend(function_call_response.audios)
565
-
566
- if function_call_response.videos is not None:
567
- if model_response.videos is None:
568
- model_response.videos = []
569
- model_response.videos.extend(function_call_response.videos)
570
-
571
- if function_call_response.files is not None:
572
- if model_response.files is None:
573
- model_response.files = []
574
- model_response.files.extend(function_call_response.files)
575
-
576
- if (
577
- function_call_response.event
578
- in [
877
+
878
+ # Add assistant message to messages
879
+ messages.append(assistant_message)
880
+
881
+ # Log response and metrics
882
+ assistant_message.log(metrics=True)
883
+
884
+ # Handle tool calls if present
885
+ if assistant_message.tool_calls:
886
+ # Prepare function calls
887
+ function_calls_to_run = self._prepare_function_calls(
888
+ assistant_message=assistant_message,
889
+ messages=messages,
890
+ model_response=model_response,
891
+ functions=_functions,
892
+ )
893
+ function_call_results: List[Message] = []
894
+
895
+ # Execute function calls
896
+ async for function_call_response in self.arun_function_calls(
897
+ function_calls=function_calls_to_run,
898
+ function_call_results=function_call_results,
899
+ current_function_call_count=function_call_count,
900
+ function_call_limit=tool_call_limit,
901
+ ):
902
+ if isinstance(function_call_response, ModelResponse):
903
+ # The session state is updated by the function call
904
+ if function_call_response.updated_session_state is not None:
905
+ model_response.updated_session_state = function_call_response.updated_session_state
906
+
907
+ # Media artifacts are generated by the function call
908
+ if function_call_response.images is not None:
909
+ if model_response.images is None:
910
+ model_response.images = []
911
+ model_response.images.extend(function_call_response.images)
912
+
913
+ if function_call_response.audios is not None:
914
+ if model_response.audios is None:
915
+ model_response.audios = []
916
+ model_response.audios.extend(function_call_response.audios)
917
+
918
+ if function_call_response.videos is not None:
919
+ if model_response.videos is None:
920
+ model_response.videos = []
921
+ model_response.videos.extend(function_call_response.videos)
922
+
923
+ if function_call_response.files is not None:
924
+ if model_response.files is None:
925
+ model_response.files = []
926
+ model_response.files.extend(function_call_response.files)
927
+
928
+ if (
929
+ function_call_response.event
930
+ in [
931
+ ModelResponseEvent.tool_call_completed.value,
932
+ ModelResponseEvent.tool_call_paused.value,
933
+ ]
934
+ and function_call_response.tool_executions is not None
935
+ ):
936
+ if model_response.tool_executions is None:
937
+ model_response.tool_executions = []
938
+ model_response.tool_executions.extend(function_call_response.tool_executions)
939
+
940
+ # If the tool is currently paused (HITL flow), add the requirement to the run response
941
+ if (
942
+ function_call_response.event == ModelResponseEvent.tool_call_paused.value
943
+ and run_response is not None
944
+ ):
945
+ current_tool_execution = function_call_response.tool_executions[-1]
946
+ if run_response.requirements is None:
947
+ run_response.requirements = []
948
+ run_response.requirements.append(
949
+ RunRequirement(tool_execution=current_tool_execution)
950
+ )
951
+
952
+ elif function_call_response.event not in [
953
+ ModelResponseEvent.tool_call_started.value,
579
954
  ModelResponseEvent.tool_call_completed.value,
580
- ModelResponseEvent.tool_call_paused.value,
581
- ]
582
- and function_call_response.tool_executions is not None
583
- ):
584
- if model_response.tool_executions is None:
585
- model_response.tool_executions = []
586
- model_response.tool_executions.extend(function_call_response.tool_executions)
587
- elif function_call_response.event not in [
588
- ModelResponseEvent.tool_call_started.value,
589
- ModelResponseEvent.tool_call_completed.value,
590
- ]:
591
- if function_call_response.content:
592
- model_response.content += function_call_response.content # type: ignore
593
-
594
- # Add a function call for each successful execution
595
- function_call_count += len(function_call_results)
596
-
597
- # Format and add results to messages
598
- self.format_function_call_results(
599
- messages=messages, function_call_results=function_call_results, **model_response.extra or {}
600
- )
955
+ ]:
956
+ if function_call_response.content:
957
+ model_response.content += function_call_response.content # type: ignore
601
958
 
602
- if any(msg.images or msg.videos or msg.audio or msg.files for msg in function_call_results):
603
- # Handle function call media
604
- self._handle_function_call_media(
959
+ # Add a function call for each successful execution
960
+ function_call_count += len(function_call_results)
961
+
962
+ # Format and add results to messages
963
+ self.format_function_call_results(
605
964
  messages=messages,
606
965
  function_call_results=function_call_results,
607
- send_media_to_model=send_media_to_model,
966
+ compress_tool_results=_compress_tool_results,
967
+ **model_response.extra or {},
608
968
  )
609
969
 
610
- for function_call_result in function_call_results:
611
- function_call_result.log(metrics=True)
970
+ if any(msg.images or msg.videos or msg.audio or msg.files for msg in function_call_results):
971
+ # Handle function call media
972
+ self._handle_function_call_media(
973
+ messages=messages,
974
+ function_call_results=function_call_results,
975
+ send_media_to_model=send_media_to_model,
976
+ )
612
977
 
613
- # Check if we should stop after tool calls
614
- if any(m.stop_after_tool_call for m in function_call_results):
615
- break
978
+ for function_call_result in function_call_results:
979
+ function_call_result.log(metrics=True, use_compressed_content=_compress_tool_results)
616
980
 
617
- # If we have any tool calls that require confirmation, break the loop
618
- if any(tc.requires_confirmation for tc in model_response.tool_executions or []):
619
- break
981
+ # Check if we should stop after tool calls
982
+ if any(m.stop_after_tool_call for m in function_call_results):
983
+ break
620
984
 
621
- # If we have any tool calls that require external execution, break the loop
622
- if any(tc.external_execution_required for tc in model_response.tool_executions or []):
623
- break
985
+ # If we have any tool calls that require confirmation, break the loop
986
+ if any(tc.requires_confirmation for tc in model_response.tool_executions or []):
987
+ break
624
988
 
625
- # If we have any tool calls that require user input, break the loop
626
- if any(tc.requires_user_input for tc in model_response.tool_executions or []):
627
- break
989
+ # If we have any tool calls that require external execution, break the loop
990
+ if any(tc.external_execution_required for tc in model_response.tool_executions or []):
991
+ break
628
992
 
629
- # Continue loop to get next response
630
- continue
993
+ # If we have any tool calls that require user input, break the loop
994
+ if any(tc.requires_user_input for tc in model_response.tool_executions or []):
995
+ break
631
996
 
632
- # No tool calls or finished processing them
633
- break
997
+ # Continue loop to get next response
998
+ continue
634
999
 
635
- log_debug(f"{self.get_provider()} Async Response End", center=True, symbol="-")
1000
+ # No tool calls or finished processing them
1001
+ break
636
1002
 
637
- # Save to cache if enabled
638
- if self.cache_response:
639
- self._save_model_response_to_cache(cache_key, model_response, is_streaming=False)
1003
+ log_debug(f"{self.get_provider()} Async Response End", center=True, symbol="-")
1004
+
1005
+ # Save to cache if enabled
1006
+ if self.cache_response:
1007
+ self._save_model_response_to_cache(cache_key, model_response, is_streaming=False)
1008
+ finally:
1009
+ # Close the Gemini client
1010
+ if self.__class__.__name__ == "Gemini" and self.client is not None:
1011
+ try:
1012
+ await self.client.aio.aclose() # type: ignore
1013
+ self.client = None
1014
+ except AttributeError:
1015
+ log_warning(
1016
+ "Your Gemini client is outdated. For Agno to properly handle the lifecycle of the client,"
1017
+ " please upgrade Gemini to the latest version: pip install -U google-genai"
1018
+ )
640
1019
 
641
1020
  return model_response
642
1021
 
@@ -649,6 +1028,7 @@ class Model(ABC):
649
1028
  tools: Optional[List[Dict[str, Any]]] = None,
650
1029
  tool_choice: Optional[Union[str, Dict[str, Any]]] = None,
651
1030
  run_response: Optional[Union[RunOutput, TeamRunOutput]] = None,
1031
+ compress_tool_results: bool = False,
652
1032
  ) -> None:
653
1033
  """
654
1034
  Process a single model response and return the assistant message and whether to continue.
@@ -656,14 +1036,15 @@ class Model(ABC):
656
1036
  Returns:
657
1037
  Tuple[Message, bool]: (assistant_message, should_continue)
658
1038
  """
659
- # Generate response
660
- provider_response = self.invoke(
1039
+ # Generate response with retry logic for ModelProviderError
1040
+ provider_response = self._invoke_with_retry(
661
1041
  assistant_message=assistant_message,
662
1042
  messages=messages,
663
1043
  response_format=response_format,
664
1044
  tools=tools,
665
1045
  tool_choice=tool_choice or self._tool_choice,
666
1046
  run_response=run_response,
1047
+ compress_tool_results=compress_tool_results,
667
1048
  )
668
1049
 
669
1050
  # Populate the assistant message
@@ -694,6 +1075,8 @@ class Model(ABC):
694
1075
  model_response.extra.update(provider_response.extra)
695
1076
  if provider_response.provider_data is not None:
696
1077
  model_response.provider_data = provider_response.provider_data
1078
+ if provider_response.response_usage is not None:
1079
+ model_response.response_usage = provider_response.response_usage
697
1080
 
698
1081
  async def _aprocess_model_response(
699
1082
  self,
@@ -704,6 +1087,7 @@ class Model(ABC):
704
1087
  tools: Optional[List[Dict[str, Any]]] = None,
705
1088
  tool_choice: Optional[Union[str, Dict[str, Any]]] = None,
706
1089
  run_response: Optional[Union[RunOutput, TeamRunOutput]] = None,
1090
+ compress_tool_results: bool = False,
707
1091
  ) -> None:
708
1092
  """
709
1093
  Process a single async model response and return the assistant message and whether to continue.
@@ -711,14 +1095,15 @@ class Model(ABC):
711
1095
  Returns:
712
1096
  Tuple[Message, bool]: (assistant_message, should_continue)
713
1097
  """
714
- # Generate response
715
- provider_response = await self.ainvoke(
1098
+ # Generate response with retry logic for ModelProviderError
1099
+ provider_response = await self._ainvoke_with_retry(
716
1100
  messages=messages,
717
1101
  response_format=response_format,
718
1102
  tools=tools,
719
1103
  tool_choice=tool_choice or self._tool_choice,
720
1104
  assistant_message=assistant_message,
721
1105
  run_response=run_response,
1106
+ compress_tool_results=compress_tool_results,
722
1107
  )
723
1108
 
724
1109
  # Populate the assistant message
@@ -749,6 +1134,8 @@ class Model(ABC):
749
1134
  model_response.extra.update(provider_response.extra)
750
1135
  if provider_response.provider_data is not None:
751
1136
  model_response.provider_data = provider_response.provider_data
1137
+ if provider_response.response_usage is not None:
1138
+ model_response.response_usage = provider_response.response_usage
752
1139
 
753
1140
  def _populate_assistant_message(
754
1141
  self,
@@ -829,18 +1216,20 @@ class Model(ABC):
829
1216
  tools: Optional[List[Dict[str, Any]]] = None,
830
1217
  tool_choice: Optional[Union[str, Dict[str, Any]]] = None,
831
1218
  run_response: Optional[Union[RunOutput, TeamRunOutput]] = None,
1219
+ compress_tool_results: bool = False,
832
1220
  ) -> Iterator[ModelResponse]:
833
1221
  """
834
- Process a streaming response from the model.
1222
+ Process a streaming response from the model with retry logic for ModelProviderError.
835
1223
  """
836
1224
 
837
- for response_delta in self.invoke_stream(
1225
+ for response_delta in self._invoke_stream_with_retry(
838
1226
  messages=messages,
839
1227
  assistant_message=assistant_message,
840
1228
  response_format=response_format,
841
1229
  tools=tools,
842
1230
  tool_choice=tool_choice or self._tool_choice,
843
1231
  run_response=run_response,
1232
+ compress_tool_results=compress_tool_results,
844
1233
  ):
845
1234
  for model_response_delta in self._populate_stream_data(
846
1235
  stream_data=stream_data,
@@ -861,147 +1250,207 @@ class Model(ABC):
861
1250
  stream_model_response: bool = True,
862
1251
  run_response: Optional[Union[RunOutput, TeamRunOutput]] = None,
863
1252
  send_media_to_model: bool = True,
1253
+ compression_manager: Optional["CompressionManager"] = None,
864
1254
  ) -> Iterator[Union[ModelResponse, RunOutputEvent, TeamRunOutputEvent]]:
865
1255
  """
866
1256
  Generate a streaming response from the model.
867
1257
  """
1258
+ try:
1259
+ # Check cache if enabled - capture key BEFORE streaming to avoid mismatch
1260
+ cache_key = None
1261
+ if self.cache_response:
1262
+ cache_key = self._get_model_cache_key(
1263
+ messages, stream=True, response_format=response_format, tools=tools
1264
+ )
1265
+ cached_data = self._get_cached_model_response(cache_key)
868
1266
 
869
- # Check cache if enabled - capture key BEFORE streaming to avoid mismatch
870
- cache_key = None
871
- if self.cache_response:
872
- cache_key = self._get_model_cache_key(messages, stream=True, response_format=response_format, tools=tools)
873
- cached_data = self._get_cached_model_response(cache_key)
1267
+ if cached_data:
1268
+ log_info("Cache hit for streaming model response")
1269
+ # Yield cached responses
1270
+ for response in self._streaming_responses_from_cache(cached_data["streaming_responses"]):
1271
+ yield response
1272
+ return
874
1273
 
875
- if cached_data:
876
- log_info("Cache hit for streaming model response")
877
- # Yield cached responses
878
- for response in self._streaming_responses_from_cache(cached_data["streaming_responses"]):
879
- yield response
880
- return
1274
+ log_info("Cache miss for streaming model response")
881
1275
 
882
- log_info("Cache miss for streaming model response")
1276
+ # Track streaming responses for caching
1277
+ streaming_responses: List[ModelResponse] = []
883
1278
 
884
- # Track streaming responses for caching
885
- streaming_responses: List[ModelResponse] = []
1279
+ log_debug(f"{self.get_provider()} Response Stream Start", center=True, symbol="-")
1280
+ log_debug(f"Model: {self.id}", center=True, symbol="-")
1281
+ _log_messages(messages)
886
1282
 
887
- log_debug(f"{self.get_provider()} Response Stream Start", center=True, symbol="-")
888
- log_debug(f"Model: {self.id}", center=True, symbol="-")
889
- _log_messages(messages)
1283
+ _tool_dicts = self._format_tools(tools) if tools is not None else []
1284
+ _functions = {tool.name: tool for tool in tools if isinstance(tool, Function)} if tools is not None else {}
890
1285
 
891
- _tool_dicts = self._format_tools(tools) if tools is not None else []
892
- _functions = {tool.name: tool for tool in tools if isinstance(tool, Function)} if tools is not None else {}
1286
+ _compress_tool_results = compression_manager is not None and compression_manager.compress_tool_results
1287
+ _compression_manager = compression_manager if _compress_tool_results else None
893
1288
 
894
- function_call_count = 0
1289
+ function_call_count = 0
895
1290
 
896
- while True:
897
- assistant_message = Message(role=self.assistant_message_role)
898
- # Create assistant message and stream data
899
- stream_data = MessageData()
900
- model_response = ModelResponse()
901
- if stream_model_response:
902
- # Generate response
903
- for response in self.process_response_stream(
904
- messages=messages,
905
- assistant_message=assistant_message,
906
- stream_data=stream_data,
907
- response_format=response_format,
908
- tools=_tool_dicts,
909
- tool_choice=tool_choice or self._tool_choice,
910
- run_response=run_response,
1291
+ while True:
1292
+ # Compress existing tool results BEFORE invoke
1293
+ if _compression_manager is not None and _compression_manager.should_compress(
1294
+ messages, tools, model=self, response_format=response_format
911
1295
  ):
912
- if self.cache_response and isinstance(response, ModelResponse):
913
- streaming_responses.append(response)
914
- yield response
1296
+ # Emit compression started event
1297
+ yield ModelResponse(event=ModelResponseEvent.compression_started.value)
1298
+ _compression_manager.compress(messages)
1299
+ # Emit compression completed event with stats
1300
+ yield ModelResponse(
1301
+ event=ModelResponseEvent.compression_completed.value,
1302
+ compression_stats=_compression_manager.stats.copy(),
1303
+ )
915
1304
 
916
- else:
917
- self._process_model_response(
918
- messages=messages,
919
- assistant_message=assistant_message,
920
- model_response=model_response,
921
- response_format=response_format,
922
- tools=_tool_dicts,
923
- tool_choice=tool_choice or self._tool_choice,
924
- )
925
- if self.cache_response:
926
- streaming_responses.append(model_response)
927
- yield model_response
928
-
929
- # Add assistant message to messages
930
- messages.append(assistant_message)
931
- assistant_message.log(metrics=True)
932
-
933
- # Handle tool calls if present
934
- if assistant_message.tool_calls is not None:
935
- # Prepare function calls
936
- function_calls_to_run: List[FunctionCall] = self.get_function_calls_to_run(
937
- assistant_message=assistant_message, messages=messages, functions=_functions
938
- )
939
- function_call_results: List[Message] = []
940
-
941
- # Execute function calls
942
- for function_call_response in self.run_function_calls(
943
- function_calls=function_calls_to_run,
944
- function_call_results=function_call_results,
945
- current_function_call_count=function_call_count,
946
- function_call_limit=tool_call_limit,
947
- ):
948
- if self.cache_response and isinstance(function_call_response, ModelResponse):
949
- streaming_responses.append(function_call_response)
950
- yield function_call_response
1305
+ assistant_message = Message(role=self.assistant_message_role)
1306
+ # Create assistant message and stream data
1307
+ stream_data = MessageData()
1308
+ model_response = ModelResponse()
951
1309
 
952
- # Add a function call for each successful execution
953
- function_call_count += len(function_call_results)
1310
+ # Emit LLM request started event
1311
+ yield ModelResponse(event=ModelResponseEvent.model_request_started.value)
954
1312
 
955
- # Format and add results to messages
956
- if stream_data and stream_data.extra is not None:
957
- self.format_function_call_results(
958
- messages=messages, function_call_results=function_call_results, **stream_data.extra
1313
+ if stream_model_response:
1314
+ # Generate response
1315
+ for response in self.process_response_stream(
1316
+ messages=messages,
1317
+ assistant_message=assistant_message,
1318
+ stream_data=stream_data,
1319
+ response_format=response_format,
1320
+ tools=_tool_dicts,
1321
+ tool_choice=tool_choice or self._tool_choice,
1322
+ run_response=run_response,
1323
+ compress_tool_results=_compress_tool_results,
1324
+ ):
1325
+ if self.cache_response and isinstance(response, ModelResponse):
1326
+ streaming_responses.append(response)
1327
+ yield response
1328
+
1329
+ else:
1330
+ self._process_model_response(
1331
+ messages=messages,
1332
+ assistant_message=assistant_message,
1333
+ model_response=model_response,
1334
+ response_format=response_format,
1335
+ tools=_tool_dicts,
1336
+ tool_choice=tool_choice or self._tool_choice,
1337
+ run_response=run_response,
1338
+ compress_tool_results=_compress_tool_results,
959
1339
  )
960
- elif model_response and model_response.extra is not None:
961
- self.format_function_call_results(
962
- messages=messages, function_call_results=function_call_results, **model_response.extra
1340
+ if self.cache_response:
1341
+ streaming_responses.append(model_response)
1342
+ yield model_response
1343
+
1344
+ # Add assistant message to messages
1345
+ messages.append(assistant_message)
1346
+ assistant_message.log(metrics=True)
1347
+
1348
+ # Emit LLM request completed event with metrics
1349
+ llm_metrics = assistant_message.metrics
1350
+ yield ModelResponse(
1351
+ event=ModelResponseEvent.model_request_completed.value,
1352
+ input_tokens=llm_metrics.input_tokens if llm_metrics else None,
1353
+ output_tokens=llm_metrics.output_tokens if llm_metrics else None,
1354
+ total_tokens=llm_metrics.total_tokens if llm_metrics else None,
1355
+ time_to_first_token=llm_metrics.time_to_first_token if llm_metrics else None,
1356
+ reasoning_tokens=llm_metrics.reasoning_tokens if llm_metrics else None,
1357
+ cache_read_tokens=llm_metrics.cache_read_tokens if llm_metrics else None,
1358
+ cache_write_tokens=llm_metrics.cache_write_tokens if llm_metrics else None,
1359
+ )
1360
+
1361
+ # Handle tool calls if present
1362
+ if assistant_message.tool_calls is not None:
1363
+ # Prepare function calls
1364
+ function_calls_to_run: List[FunctionCall] = self.get_function_calls_to_run(
1365
+ assistant_message=assistant_message, messages=messages, functions=_functions
963
1366
  )
964
- else:
965
- self.format_function_call_results(messages=messages, function_call_results=function_call_results)
1367
+ function_call_results: List[Message] = []
966
1368
 
967
- # Handle function call media
968
- if any(msg.images or msg.videos or msg.audio or msg.files for msg in function_call_results):
969
- self._handle_function_call_media(
970
- messages=messages,
1369
+ # Execute function calls
1370
+ for function_call_response in self.run_function_calls(
1371
+ function_calls=function_calls_to_run,
971
1372
  function_call_results=function_call_results,
972
- send_media_to_model=send_media_to_model,
973
- )
1373
+ current_function_call_count=function_call_count,
1374
+ function_call_limit=tool_call_limit,
1375
+ ):
1376
+ if self.cache_response and isinstance(function_call_response, ModelResponse):
1377
+ streaming_responses.append(function_call_response)
1378
+ yield function_call_response
1379
+
1380
+ # Add a function call for each successful execution
1381
+ function_call_count += len(function_call_results)
1382
+
1383
+ # Format and add results to messages
1384
+ if stream_data and stream_data.extra is not None:
1385
+ self.format_function_call_results(
1386
+ messages=messages,
1387
+ function_call_results=function_call_results,
1388
+ compress_tool_results=_compress_tool_results,
1389
+ **stream_data.extra,
1390
+ )
1391
+ elif model_response and model_response.extra is not None:
1392
+ self.format_function_call_results(
1393
+ messages=messages,
1394
+ function_call_results=function_call_results,
1395
+ compress_tool_results=_compress_tool_results,
1396
+ **model_response.extra,
1397
+ )
1398
+ else:
1399
+ self.format_function_call_results(
1400
+ messages=messages,
1401
+ function_call_results=function_call_results,
1402
+ compress_tool_results=_compress_tool_results,
1403
+ )
974
1404
 
975
- for function_call_result in function_call_results:
976
- function_call_result.log(metrics=True)
1405
+ # Handle function call media
1406
+ if any(msg.images or msg.videos or msg.audio or msg.files for msg in function_call_results):
1407
+ self._handle_function_call_media(
1408
+ messages=messages,
1409
+ function_call_results=function_call_results,
1410
+ send_media_to_model=send_media_to_model,
1411
+ )
977
1412
 
978
- # Check if we should stop after tool calls
979
- if any(m.stop_after_tool_call for m in function_call_results):
980
- break
1413
+ for function_call_result in function_call_results:
1414
+ function_call_result.log(metrics=True, use_compressed_content=_compress_tool_results)
981
1415
 
982
- # If we have any tool calls that require confirmation, break the loop
983
- if any(fc.function.requires_confirmation for fc in function_calls_to_run):
984
- break
1416
+ # Check if we should stop after tool calls
1417
+ if any(m.stop_after_tool_call for m in function_call_results):
1418
+ break
985
1419
 
986
- # If we have any tool calls that require external execution, break the loop
987
- if any(fc.function.external_execution for fc in function_calls_to_run):
988
- break
1420
+ # If we have any tool calls that require confirmation, break the loop
1421
+ if any(fc.function.requires_confirmation for fc in function_calls_to_run):
1422
+ break
989
1423
 
990
- # If we have any tool calls that require user input, break the loop
991
- if any(fc.function.requires_user_input for fc in function_calls_to_run):
992
- break
1424
+ # If we have any tool calls that require external execution, break the loop
1425
+ if any(fc.function.external_execution for fc in function_calls_to_run):
1426
+ break
993
1427
 
994
- # Continue loop to get next response
995
- continue
1428
+ # If we have any tool calls that require user input, break the loop
1429
+ if any(fc.function.requires_user_input for fc in function_calls_to_run):
1430
+ break
996
1431
 
997
- # No tool calls or finished processing them
998
- break
1432
+ # Continue loop to get next response
1433
+ continue
999
1434
 
1000
- log_debug(f"{self.get_provider()} Response Stream End", center=True, symbol="-")
1435
+ # No tool calls or finished processing them
1436
+ break
1001
1437
 
1002
- # Save streaming responses to cache if enabled
1003
- if self.cache_response and cache_key and streaming_responses:
1004
- self._save_streaming_responses_to_cache(cache_key, streaming_responses)
1438
+ log_debug(f"{self.get_provider()} Response Stream End", center=True, symbol="-")
1439
+
1440
+ # Save streaming responses to cache if enabled
1441
+ if self.cache_response and cache_key and streaming_responses:
1442
+ self._save_streaming_responses_to_cache(cache_key, streaming_responses)
1443
+ finally:
1444
+ # Close the Gemini client
1445
+ if self.__class__.__name__ == "Gemini" and self.client is not None:
1446
+ try:
1447
+ self.client.close() # type: ignore
1448
+ self.client = None
1449
+ except AttributeError:
1450
+ log_warning(
1451
+ "Your Gemini client is outdated. For Agno to properly handle the lifecycle of the client,"
1452
+ " please upgrade Gemini to the latest version: pip install -U google-genai"
1453
+ )
1005
1454
 
1006
1455
  async def aprocess_response_stream(
1007
1456
  self,
@@ -1012,18 +1461,20 @@ class Model(ABC):
1012
1461
  tools: Optional[List[Dict[str, Any]]] = None,
1013
1462
  tool_choice: Optional[Union[str, Dict[str, Any]]] = None,
1014
1463
  run_response: Optional[Union[RunOutput, TeamRunOutput]] = None,
1464
+ compress_tool_results: bool = False,
1015
1465
  ) -> AsyncIterator[ModelResponse]:
1016
1466
  """
1017
- Process a streaming response from the model.
1467
+ Process a streaming response from the model with retry logic for ModelProviderError.
1018
1468
  """
1019
- async for response_delta in self.ainvoke_stream(
1469
+ async for response_delta in self._ainvoke_stream_with_retry(
1020
1470
  messages=messages,
1021
1471
  assistant_message=assistant_message,
1022
1472
  response_format=response_format,
1023
1473
  tools=tools,
1024
1474
  tool_choice=tool_choice or self._tool_choice,
1025
1475
  run_response=run_response,
1026
- ): # type: ignore
1476
+ compress_tool_results=compress_tool_results,
1477
+ ):
1027
1478
  for model_response_delta in self._populate_stream_data(
1028
1479
  stream_data=stream_data,
1029
1480
  model_response_delta=response_delta,
@@ -1043,148 +1494,208 @@ class Model(ABC):
1043
1494
  stream_model_response: bool = True,
1044
1495
  run_response: Optional[Union[RunOutput, TeamRunOutput]] = None,
1045
1496
  send_media_to_model: bool = True,
1497
+ compression_manager: Optional["CompressionManager"] = None,
1046
1498
  ) -> AsyncIterator[Union[ModelResponse, RunOutputEvent, TeamRunOutputEvent]]:
1047
1499
  """
1048
1500
  Generate an asynchronous streaming response from the model.
1049
1501
  """
1502
+ try:
1503
+ # Check cache if enabled - capture key BEFORE streaming to avoid mismatch
1504
+ cache_key = None
1505
+ if self.cache_response:
1506
+ cache_key = self._get_model_cache_key(
1507
+ messages, stream=True, response_format=response_format, tools=tools
1508
+ )
1509
+ cached_data = self._get_cached_model_response(cache_key)
1050
1510
 
1051
- # Check cache if enabled - capture key BEFORE streaming to avoid mismatch
1052
- cache_key = None
1053
- if self.cache_response:
1054
- cache_key = self._get_model_cache_key(messages, stream=True, response_format=response_format, tools=tools)
1055
- cached_data = self._get_cached_model_response(cache_key)
1511
+ if cached_data:
1512
+ log_info("Cache hit for async streaming model response")
1513
+ # Yield cached responses
1514
+ for response in self._streaming_responses_from_cache(cached_data["streaming_responses"]):
1515
+ yield response
1516
+ return
1056
1517
 
1057
- if cached_data:
1058
- log_info("Cache hit for async streaming model response")
1059
- # Yield cached responses
1060
- for response in self._streaming_responses_from_cache(cached_data["streaming_responses"]):
1061
- yield response
1062
- return
1518
+ log_info("Cache miss for async streaming model response")
1063
1519
 
1064
- log_info("Cache miss for async streaming model response")
1520
+ # Track streaming responses for caching
1521
+ streaming_responses: List[ModelResponse] = []
1065
1522
 
1066
- # Track streaming responses for caching
1067
- streaming_responses: List[ModelResponse] = []
1523
+ log_debug(f"{self.get_provider()} Async Response Stream Start", center=True, symbol="-")
1524
+ log_debug(f"Model: {self.id}", center=True, symbol="-")
1525
+ _log_messages(messages)
1068
1526
 
1069
- log_debug(f"{self.get_provider()} Async Response Stream Start", center=True, symbol="-")
1070
- log_debug(f"Model: {self.id}", center=True, symbol="-")
1071
- _log_messages(messages)
1527
+ _tool_dicts = self._format_tools(tools) if tools is not None else []
1528
+ _functions = {tool.name: tool for tool in tools if isinstance(tool, Function)} if tools is not None else {}
1072
1529
 
1073
- _tool_dicts = self._format_tools(tools) if tools is not None else []
1074
- _functions = {tool.name: tool for tool in tools if isinstance(tool, Function)} if tools is not None else {}
1530
+ _compress_tool_results = compression_manager is not None and compression_manager.compress_tool_results
1531
+ _compression_manager = compression_manager if _compress_tool_results else None
1075
1532
 
1076
- function_call_count = 0
1533
+ function_call_count = 0
1077
1534
 
1078
- while True:
1079
- # Create assistant message and stream data
1080
- assistant_message = Message(role=self.assistant_message_role)
1081
- stream_data = MessageData()
1082
- model_response = ModelResponse()
1083
- if stream_model_response:
1084
- # Generate response
1085
- async for model_response in self.aprocess_response_stream(
1086
- messages=messages,
1087
- assistant_message=assistant_message,
1088
- stream_data=stream_data,
1089
- response_format=response_format,
1090
- tools=_tool_dicts,
1091
- tool_choice=tool_choice or self._tool_choice,
1092
- run_response=run_response,
1535
+ while True:
1536
+ # Compress existing tool results BEFORE making API call to avoid context overflow
1537
+ if _compression_manager is not None and await _compression_manager.ashould_compress(
1538
+ messages, tools, model=self, response_format=response_format
1093
1539
  ):
1094
- if self.cache_response and isinstance(model_response, ModelResponse):
1540
+ # Emit compression started event
1541
+ yield ModelResponse(event=ModelResponseEvent.compression_started.value)
1542
+ await _compression_manager.acompress(messages)
1543
+ # Emit compression completed event with stats
1544
+ yield ModelResponse(
1545
+ event=ModelResponseEvent.compression_completed.value,
1546
+ compression_stats=_compression_manager.stats.copy(),
1547
+ )
1548
+
1549
+ # Create assistant message and stream data
1550
+ assistant_message = Message(role=self.assistant_message_role)
1551
+ stream_data = MessageData()
1552
+ model_response = ModelResponse()
1553
+
1554
+ # Emit LLM request started event
1555
+ yield ModelResponse(event=ModelResponseEvent.model_request_started.value)
1556
+
1557
+ if stream_model_response:
1558
+ # Generate response
1559
+ async for model_response in self.aprocess_response_stream(
1560
+ messages=messages,
1561
+ assistant_message=assistant_message,
1562
+ stream_data=stream_data,
1563
+ response_format=response_format,
1564
+ tools=_tool_dicts,
1565
+ tool_choice=tool_choice or self._tool_choice,
1566
+ run_response=run_response,
1567
+ compress_tool_results=_compress_tool_results,
1568
+ ):
1569
+ if self.cache_response and isinstance(model_response, ModelResponse):
1570
+ streaming_responses.append(model_response)
1571
+ yield model_response
1572
+
1573
+ else:
1574
+ await self._aprocess_model_response(
1575
+ messages=messages,
1576
+ assistant_message=assistant_message,
1577
+ model_response=model_response,
1578
+ response_format=response_format,
1579
+ tools=_tool_dicts,
1580
+ tool_choice=tool_choice or self._tool_choice,
1581
+ run_response=run_response,
1582
+ compress_tool_results=_compress_tool_results,
1583
+ )
1584
+ if self.cache_response:
1095
1585
  streaming_responses.append(model_response)
1096
1586
  yield model_response
1097
1587
 
1098
- else:
1099
- await self._aprocess_model_response(
1100
- messages=messages,
1101
- assistant_message=assistant_message,
1102
- model_response=model_response,
1103
- response_format=response_format,
1104
- tools=_tool_dicts,
1105
- tool_choice=tool_choice or self._tool_choice,
1106
- run_response=run_response,
1107
- )
1108
- if self.cache_response:
1109
- streaming_responses.append(model_response)
1110
- yield model_response
1111
-
1112
- # Add assistant message to messages
1113
- messages.append(assistant_message)
1114
- assistant_message.log(metrics=True)
1115
-
1116
- # Handle tool calls if present
1117
- if assistant_message.tool_calls is not None:
1118
- # Prepare function calls
1119
- function_calls_to_run: List[FunctionCall] = self.get_function_calls_to_run(
1120
- assistant_message=assistant_message, messages=messages, functions=_functions
1121
- )
1122
- function_call_results: List[Message] = []
1123
-
1124
- # Execute function calls
1125
- async for function_call_response in self.arun_function_calls(
1126
- function_calls=function_calls_to_run,
1127
- function_call_results=function_call_results,
1128
- current_function_call_count=function_call_count,
1129
- function_call_limit=tool_call_limit,
1130
- ):
1131
- if self.cache_response and isinstance(function_call_response, ModelResponse):
1132
- streaming_responses.append(function_call_response)
1133
- yield function_call_response
1588
+ # Add assistant message to messages
1589
+ messages.append(assistant_message)
1590
+ assistant_message.log(metrics=True)
1134
1591
 
1135
- # Add a function call for each successful execution
1136
- function_call_count += len(function_call_results)
1592
+ # Emit LLM request completed event with metrics
1593
+ llm_metrics = assistant_message.metrics
1594
+ yield ModelResponse(
1595
+ event=ModelResponseEvent.model_request_completed.value,
1596
+ input_tokens=llm_metrics.input_tokens if llm_metrics else None,
1597
+ output_tokens=llm_metrics.output_tokens if llm_metrics else None,
1598
+ total_tokens=llm_metrics.total_tokens if llm_metrics else None,
1599
+ time_to_first_token=llm_metrics.time_to_first_token if llm_metrics else None,
1600
+ reasoning_tokens=llm_metrics.reasoning_tokens if llm_metrics else None,
1601
+ cache_read_tokens=llm_metrics.cache_read_tokens if llm_metrics else None,
1602
+ cache_write_tokens=llm_metrics.cache_write_tokens if llm_metrics else None,
1603
+ )
1137
1604
 
1138
- # Format and add results to messages
1139
- if stream_data and stream_data.extra is not None:
1140
- self.format_function_call_results(
1141
- messages=messages, function_call_results=function_call_results, **stream_data.extra
1142
- )
1143
- elif model_response and model_response.extra is not None:
1144
- self.format_function_call_results(
1145
- messages=messages, function_call_results=function_call_results, **model_response.extra or {}
1605
+ # Handle tool calls if present
1606
+ if assistant_message.tool_calls is not None:
1607
+ # Prepare function calls
1608
+ function_calls_to_run: List[FunctionCall] = self.get_function_calls_to_run(
1609
+ assistant_message=assistant_message, messages=messages, functions=_functions
1146
1610
  )
1147
- else:
1148
- self.format_function_call_results(messages=messages, function_call_results=function_call_results)
1611
+ function_call_results: List[Message] = []
1149
1612
 
1150
- # Handle function call media
1151
- if any(msg.images or msg.videos or msg.audio or msg.files for msg in function_call_results):
1152
- self._handle_function_call_media(
1153
- messages=messages,
1613
+ # Execute function calls
1614
+ async for function_call_response in self.arun_function_calls(
1615
+ function_calls=function_calls_to_run,
1154
1616
  function_call_results=function_call_results,
1155
- send_media_to_model=send_media_to_model,
1156
- )
1617
+ current_function_call_count=function_call_count,
1618
+ function_call_limit=tool_call_limit,
1619
+ ):
1620
+ if self.cache_response and isinstance(function_call_response, ModelResponse):
1621
+ streaming_responses.append(function_call_response)
1622
+ yield function_call_response
1623
+
1624
+ # Add a function call for each successful execution
1625
+ function_call_count += len(function_call_results)
1626
+
1627
+ # Format and add results to messages
1628
+ if stream_data and stream_data.extra is not None:
1629
+ self.format_function_call_results(
1630
+ messages=messages,
1631
+ function_call_results=function_call_results,
1632
+ compress_tool_results=_compress_tool_results,
1633
+ **stream_data.extra,
1634
+ )
1635
+ elif model_response and model_response.extra is not None:
1636
+ self.format_function_call_results(
1637
+ messages=messages,
1638
+ function_call_results=function_call_results,
1639
+ compress_tool_results=_compress_tool_results,
1640
+ **model_response.extra or {},
1641
+ )
1642
+ else:
1643
+ self.format_function_call_results(
1644
+ messages=messages,
1645
+ function_call_results=function_call_results,
1646
+ compress_tool_results=_compress_tool_results,
1647
+ )
1648
+
1649
+ # Handle function call media
1650
+ if any(msg.images or msg.videos or msg.audio or msg.files for msg in function_call_results):
1651
+ self._handle_function_call_media(
1652
+ messages=messages,
1653
+ function_call_results=function_call_results,
1654
+ send_media_to_model=send_media_to_model,
1655
+ )
1157
1656
 
1158
- for function_call_result in function_call_results:
1159
- function_call_result.log(metrics=True)
1657
+ for function_call_result in function_call_results:
1658
+ function_call_result.log(metrics=True, use_compressed_content=_compress_tool_results)
1160
1659
 
1161
- # Check if we should stop after tool calls
1162
- if any(m.stop_after_tool_call for m in function_call_results):
1163
- break
1660
+ # Check if we should stop after tool calls
1661
+ if any(m.stop_after_tool_call for m in function_call_results):
1662
+ break
1164
1663
 
1165
- # If we have any tool calls that require confirmation, break the loop
1166
- if any(fc.function.requires_confirmation for fc in function_calls_to_run):
1167
- break
1664
+ # If we have any tool calls that require confirmation, break the loop
1665
+ if any(fc.function.requires_confirmation for fc in function_calls_to_run):
1666
+ break
1168
1667
 
1169
- # If we have any tool calls that require external execution, break the loop
1170
- if any(fc.function.external_execution for fc in function_calls_to_run):
1171
- break
1668
+ # If we have any tool calls that require external execution, break the loop
1669
+ if any(fc.function.external_execution for fc in function_calls_to_run):
1670
+ break
1172
1671
 
1173
- # If we have any tool calls that require user input, break the loop
1174
- if any(fc.function.requires_user_input for fc in function_calls_to_run):
1175
- break
1672
+ # If we have any tool calls that require user input, break the loop
1673
+ if any(fc.function.requires_user_input for fc in function_calls_to_run):
1674
+ break
1176
1675
 
1177
- # Continue loop to get next response
1178
- continue
1676
+ # Continue loop to get next response
1677
+ continue
1179
1678
 
1180
- # No tool calls or finished processing them
1181
- break
1679
+ # No tool calls or finished processing them
1680
+ break
1182
1681
 
1183
- log_debug(f"{self.get_provider()} Async Response Stream End", center=True, symbol="-")
1682
+ log_debug(f"{self.get_provider()} Async Response Stream End", center=True, symbol="-")
1184
1683
 
1185
- # Save streaming responses to cache if enabled
1186
- if self.cache_response and cache_key and streaming_responses:
1187
- self._save_streaming_responses_to_cache(cache_key, streaming_responses)
1684
+ # Save streaming responses to cache if enabled
1685
+ if self.cache_response and cache_key and streaming_responses:
1686
+ self._save_streaming_responses_to_cache(cache_key, streaming_responses)
1687
+
1688
+ finally:
1689
+ # Close the Gemini client
1690
+ if self.__class__.__name__ == "Gemini" and self.client is not None:
1691
+ try:
1692
+ await self.client.aio.aclose() # type: ignore
1693
+ self.client = None
1694
+ except AttributeError:
1695
+ log_warning(
1696
+ "Your Gemini client is outdated. For Agno to properly handle the lifecycle of the client,"
1697
+ " please upgrade Gemini to the latest version: pip install -U google-genai"
1698
+ )
1188
1699
 
1189
1700
  def _populate_assistant_message_from_stream_data(
1190
1701
  self, assistant_message: Message, stream_data: MessageData
@@ -1433,11 +1944,15 @@ class Model(ABC):
1433
1944
 
1434
1945
  # Run function calls sequentially
1435
1946
  function_execution_result: FunctionExecutionResult = FunctionExecutionResult(status="failure")
1947
+ stop_after_tool_call_from_exception = False
1436
1948
  try:
1437
1949
  function_execution_result = function_call.execute()
1438
1950
  except AgentRunException as a_exc:
1439
1951
  # Update additional messages from function call
1440
1952
  _handle_agent_exception(a_exc, additional_input)
1953
+ # If stop_execution is True, mark that we should stop after this tool call
1954
+ if a_exc.stop_execution:
1955
+ stop_after_tool_call_from_exception = True
1441
1956
  # Set function call success to False if an exception occurred
1442
1957
  except Exception as e:
1443
1958
  log_error(f"Error executing function {function_call.function.name}: {e}")
@@ -1452,44 +1967,59 @@ class Model(ABC):
1452
1967
  function_call_output: str = ""
1453
1968
 
1454
1969
  if isinstance(function_execution_result.result, (GeneratorType, collections.abc.Iterator)):
1455
- for item in function_execution_result.result:
1456
- # This function yields agent/team/workflow run events
1457
- if (
1458
- isinstance(item, tuple(get_args(RunOutputEvent)))
1459
- or isinstance(item, tuple(get_args(TeamRunOutputEvent)))
1460
- or isinstance(item, tuple(get_args(WorkflowRunOutputEvent)))
1461
- ):
1462
- # We only capture content events for output accumulation
1463
- if isinstance(item, RunContentEvent) or isinstance(item, TeamRunContentEvent):
1464
- if item.content is not None and isinstance(item.content, BaseModel):
1465
- function_call_output += item.content.model_dump_json()
1466
- else:
1467
- # Capture output
1468
- function_call_output += item.content or ""
1970
+ try:
1971
+ for item in function_execution_result.result:
1972
+ # This function yields agent/team/workflow run events
1973
+ if (
1974
+ isinstance(item, tuple(get_args(RunOutputEvent)))
1975
+ or isinstance(item, tuple(get_args(TeamRunOutputEvent)))
1976
+ or isinstance(item, tuple(get_args(WorkflowRunOutputEvent)))
1977
+ ):
1978
+ # We only capture content events for output accumulation
1979
+ if isinstance(item, RunContentEvent) or isinstance(item, TeamRunContentEvent):
1980
+ if item.content is not None and isinstance(item.content, BaseModel):
1981
+ function_call_output += item.content.model_dump_json()
1982
+ else:
1983
+ # Capture output
1984
+ function_call_output += item.content or ""
1985
+
1986
+ if function_call.function.show_result and item.content is not None:
1987
+ yield ModelResponse(content=item.content)
1469
1988
 
1470
- if function_call.function.show_result and item.content is not None:
1471
- yield ModelResponse(content=item.content)
1989
+ if isinstance(item, CustomEvent):
1990
+ function_call_output += str(item)
1991
+ item.tool_call_id = function_call.call_id
1472
1992
 
1473
- if isinstance(item, CustomEvent):
1474
- function_call_output += str(item)
1993
+ # For WorkflowCompletedEvent, extract content for final output
1994
+ from agno.run.workflow import WorkflowCompletedEvent
1475
1995
 
1476
- # For WorkflowCompletedEvent, extract content for final output
1477
- from agno.run.workflow import WorkflowCompletedEvent
1996
+ if isinstance(item, WorkflowCompletedEvent):
1997
+ if item.content is not None:
1998
+ if isinstance(item.content, BaseModel):
1999
+ function_call_output += item.content.model_dump_json()
2000
+ else:
2001
+ function_call_output += str(item.content)
1478
2002
 
1479
- if isinstance(item, WorkflowCompletedEvent):
1480
- if item.content is not None:
1481
- if isinstance(item.content, BaseModel):
1482
- function_call_output += item.content.model_dump_json()
1483
- else:
1484
- function_call_output += str(item.content)
2003
+ # Yield the event itself to bubble it up
2004
+ yield item
1485
2005
 
1486
- # Yield the event itself to bubble it up
1487
- yield item
2006
+ else:
2007
+ function_call_output += str(item)
2008
+ if function_call.function.show_result and item is not None:
2009
+ yield ModelResponse(content=str(item))
2010
+ except Exception as e:
2011
+ log_error(f"Error while iterating function result generator for {function_call.function.name}: {e}")
2012
+ function_call.error = str(e)
2013
+ function_call_success = False
1488
2014
 
1489
- else:
1490
- function_call_output += str(item)
1491
- if function_call.function.show_result and item is not None:
1492
- yield ModelResponse(content=str(item))
2015
+ # For generators, re-capture updated_session_state after consumption
2016
+ # since session_state modifications were made during iteration
2017
+ if function_execution_result.updated_session_state is None:
2018
+ if (
2019
+ function_call.function._run_context is not None
2020
+ and function_call.function._run_context.session_state is not None
2021
+ ):
2022
+ function_execution_result.updated_session_state = function_call.function._run_context.session_state
1493
2023
  else:
1494
2024
  from agno.tools.function import ToolResult
1495
2025
 
@@ -1521,6 +2051,9 @@ class Model(ABC):
1521
2051
  timer=function_call_timer,
1522
2052
  function_execution_result=function_execution_result,
1523
2053
  )
2054
+ # Override stop_after_tool_call if set by exception
2055
+ if stop_after_tool_call_from_exception:
2056
+ function_call_result.stop_after_tool_call = True
1524
2057
  yield ModelResponse(
1525
2058
  content=f"{function_call.get_call_str()} completed in {function_call_timer.elapsed:.4f}s. ",
1526
2059
  tool_executions=[
@@ -1568,7 +2101,7 @@ class Model(ABC):
1568
2101
 
1569
2102
  paused_tool_executions = []
1570
2103
 
1571
- # The function cannot be executed without user confirmation
2104
+ # The function requires user confirmation (HITL)
1572
2105
  if fc.function.requires_confirmation:
1573
2106
  paused_tool_executions.append(
1574
2107
  ToolExecution(
@@ -1578,7 +2111,8 @@ class Model(ABC):
1578
2111
  requires_confirmation=True,
1579
2112
  )
1580
2113
  )
1581
- # If the function requires user input, we yield a message to the user
2114
+
2115
+ # The function requires user input (HITL)
1582
2116
  if fc.function.requires_user_input:
1583
2117
  user_input_schema = fc.function.user_input_schema
1584
2118
  if fc.arguments and user_input_schema:
@@ -1596,15 +2130,26 @@ class Model(ABC):
1596
2130
  user_input_schema=user_input_schema,
1597
2131
  )
1598
2132
  )
1599
- # If the function is from the user control flow tools, we handle it here
2133
+
2134
+ # If the function is from the user control flow (HITL) tools, we handle it here
1600
2135
  if fc.function.name == "get_user_input" and fc.arguments and fc.arguments.get("user_input_fields"):
1601
2136
  user_input_schema = []
1602
2137
  for input_field in fc.arguments.get("user_input_fields", []):
1603
2138
  field_type = input_field.get("field_type")
1604
- try:
1605
- python_type = eval(field_type) if isinstance(field_type, str) else field_type
1606
- except (NameError, SyntaxError):
1607
- python_type = str # Default to str if type is invalid
2139
+ if isinstance(field_type, str):
2140
+ type_mapping = {
2141
+ "str": str,
2142
+ "int": int,
2143
+ "float": float,
2144
+ "bool": bool,
2145
+ "list": list,
2146
+ "dict": dict,
2147
+ }
2148
+ python_type = type_mapping.get(field_type, str)
2149
+ elif isinstance(field_type, type):
2150
+ python_type = field_type
2151
+ else:
2152
+ python_type = str
1608
2153
  user_input_schema.append(
1609
2154
  UserInputField(
1610
2155
  name=input_field.get("field_name"),
@@ -1622,7 +2167,8 @@ class Model(ABC):
1622
2167
  user_input_schema=user_input_schema,
1623
2168
  )
1624
2169
  )
1625
- # If the function requires external execution, we yield a message to the user
2170
+
2171
+ # The function requires external execution (HITL)
1626
2172
  if fc.function.external_execution:
1627
2173
  paused_tool_executions.append(
1628
2174
  ToolExecution(
@@ -1755,10 +2301,20 @@ class Model(ABC):
1755
2301
  user_input_schema = []
1756
2302
  for input_field in fc.arguments.get("user_input_fields", []):
1757
2303
  field_type = input_field.get("field_type")
1758
- try:
1759
- python_type = eval(field_type) if isinstance(field_type, str) else field_type
1760
- except (NameError, SyntaxError):
1761
- python_type = str # Default to str if type is invalid
2304
+ if isinstance(field_type, str):
2305
+ type_mapping = {
2306
+ "str": str,
2307
+ "int": int,
2308
+ "float": float,
2309
+ "bool": bool,
2310
+ "list": list,
2311
+ "dict": dict,
2312
+ }
2313
+ python_type = type_mapping.get(field_type, str)
2314
+ elif isinstance(field_type, type):
2315
+ python_type = field_type
2316
+ else:
2317
+ python_type = str
1762
2318
  user_input_schema.append(
1763
2319
  UserInputField(
1764
2320
  name=input_field.get("field_name"),
@@ -1875,6 +2431,7 @@ class Model(ABC):
1875
2431
 
1876
2432
  if isinstance(item, CustomEvent):
1877
2433
  function_call_output += str(item)
2434
+ item.tool_call_id = function_call.call_id
1878
2435
 
1879
2436
  # For WorkflowCompletedEvent, extract content for final output
1880
2437
  from agno.run.workflow import WorkflowCompletedEvent
@@ -1952,18 +2509,26 @@ class Model(ABC):
1952
2509
  if async_gen_index in async_generator_outputs:
1953
2510
  _, async_function_call_output, error = async_generator_outputs[async_gen_index]
1954
2511
  if error:
1955
- log_error(f"Error in async generator: {error}")
1956
- raise error
2512
+ # Handle async generator exceptions gracefully like sync generators
2513
+ log_error(
2514
+ f"Error while iterating async generator for {function_call.function.name}: {error}"
2515
+ )
2516
+ function_call.error = str(error)
2517
+ function_call_success = False
1957
2518
  break
1958
2519
  async_gen_index += 1
1959
2520
 
1960
2521
  updated_session_state = function_execution_result.updated_session_state
1961
2522
 
1962
2523
  # Handle AgentRunException
2524
+ stop_after_tool_call_from_exception = False
1963
2525
  if isinstance(function_call_success, AgentRunException):
1964
2526
  a_exc = function_call_success
1965
2527
  # Update additional messages from function call
1966
2528
  _handle_agent_exception(a_exc, additional_input)
2529
+ # If stop_execution is True, mark that we should stop after this tool call
2530
+ if a_exc.stop_execution:
2531
+ stop_after_tool_call_from_exception = True
1967
2532
  # Set function call success to False if an exception occurred
1968
2533
  function_call_success = False
1969
2534
 
@@ -1975,33 +2540,62 @@ class Model(ABC):
1975
2540
  function_call_output = async_function_call_output
1976
2541
  # Events from async generators were already yielded in real-time above
1977
2542
  elif isinstance(function_call.result, (GeneratorType, collections.abc.Iterator)):
1978
- for item in function_call.result:
1979
- # This function yields agent/team/workflow run events
1980
- if isinstance(
1981
- item,
1982
- tuple(get_args(RunOutputEvent))
1983
- + tuple(get_args(TeamRunOutputEvent))
1984
- + tuple(get_args(WorkflowRunOutputEvent)),
2543
+ try:
2544
+ for item in function_call.result:
2545
+ # This function yields agent/team/workflow run events
2546
+ if isinstance(
2547
+ item,
2548
+ tuple(get_args(RunOutputEvent))
2549
+ + tuple(get_args(TeamRunOutputEvent))
2550
+ + tuple(get_args(WorkflowRunOutputEvent)),
2551
+ ):
2552
+ # We only capture content events
2553
+ if isinstance(item, RunContentEvent) or isinstance(item, TeamRunContentEvent):
2554
+ if item.content is not None and isinstance(item.content, BaseModel):
2555
+ function_call_output += item.content.model_dump_json()
2556
+ else:
2557
+ # Capture output
2558
+ function_call_output += item.content or ""
2559
+
2560
+ if function_call.function.show_result and item.content is not None:
2561
+ yield ModelResponse(content=item.content)
2562
+ continue
2563
+
2564
+ elif isinstance(item, CustomEvent):
2565
+ function_call_output += str(item)
2566
+ item.tool_call_id = function_call.call_id
2567
+
2568
+ # Yield the event itself to bubble it up
2569
+ yield item
2570
+ else:
2571
+ function_call_output += str(item)
2572
+ if function_call.function.show_result and item is not None:
2573
+ yield ModelResponse(content=str(item))
2574
+ except Exception as e:
2575
+ log_error(f"Error while iterating function result generator for {function_call.function.name}: {e}")
2576
+ function_call.error = str(e)
2577
+ function_call_success = False
2578
+
2579
+ # For generators (sync or async), re-capture updated_session_state after consumption
2580
+ # since session_state modifications were made during iteration
2581
+ if async_function_call_output is not None or isinstance(
2582
+ function_call.result,
2583
+ (GeneratorType, collections.abc.Iterator, AsyncGeneratorType, collections.abc.AsyncIterator),
2584
+ ):
2585
+ if updated_session_state is None:
2586
+ if (
2587
+ function_call.function._run_context is not None
2588
+ and function_call.function._run_context.session_state is not None
1985
2589
  ):
1986
- # We only capture content events
1987
- if isinstance(item, RunContentEvent) or isinstance(item, TeamRunContentEvent):
1988
- if item.content is not None and isinstance(item.content, BaseModel):
1989
- function_call_output += item.content.model_dump_json()
1990
- else:
1991
- # Capture output
1992
- function_call_output += item.content or ""
1993
-
1994
- if function_call.function.show_result and item.content is not None:
1995
- yield ModelResponse(content=item.content)
1996
- continue
2590
+ updated_session_state = function_call.function._run_context.session_state
1997
2591
 
1998
- # Yield the event itself to bubble it up
1999
- yield item
2000
- else:
2001
- function_call_output += str(item)
2002
- if function_call.function.show_result and item is not None:
2003
- yield ModelResponse(content=str(item))
2004
- else:
2592
+ if not (
2593
+ async_function_call_output is not None
2594
+ or isinstance(
2595
+ function_call.result,
2596
+ (GeneratorType, collections.abc.Iterator, AsyncGeneratorType, collections.abc.AsyncIterator),
2597
+ )
2598
+ ):
2005
2599
  from agno.tools.function import ToolResult
2006
2600
 
2007
2601
  if isinstance(function_execution_result.result, ToolResult):
@@ -2030,6 +2624,9 @@ class Model(ABC):
2030
2624
  timer=function_call_timer,
2031
2625
  function_execution_result=function_execution_result,
2032
2626
  )
2627
+ # Override stop_after_tool_call if set by exception
2628
+ if stop_after_tool_call_from_exception:
2629
+ function_call_result.stop_after_tool_call = True
2033
2630
  yield ModelResponse(
2034
2631
  content=f"{function_call.get_call_str()} completed in {function_call_timer.elapsed:.4f}s. ",
2035
2632
  tool_executions=[
@@ -2079,7 +2676,11 @@ class Model(ABC):
2079
2676
  return function_calls_to_run
2080
2677
 
2081
2678
  def format_function_call_results(
2082
- self, messages: List[Message], function_call_results: List[Message], **kwargs
2679
+ self,
2680
+ messages: List[Message],
2681
+ function_call_results: List[Message],
2682
+ compress_tool_results: bool = False,
2683
+ **kwargs,
2083
2684
  ) -> None:
2084
2685
  """
2085
2686
  Format function call results.