agno 2.1.2__py3-none-any.whl → 2.3.13__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (314) hide show
  1. agno/agent/agent.py +5540 -2273
  2. agno/api/api.py +2 -0
  3. agno/api/os.py +1 -1
  4. agno/compression/__init__.py +3 -0
  5. agno/compression/manager.py +247 -0
  6. agno/culture/__init__.py +3 -0
  7. agno/culture/manager.py +956 -0
  8. agno/db/async_postgres/__init__.py +3 -0
  9. agno/db/base.py +689 -6
  10. agno/db/dynamo/dynamo.py +933 -37
  11. agno/db/dynamo/schemas.py +174 -10
  12. agno/db/dynamo/utils.py +63 -4
  13. agno/db/firestore/firestore.py +831 -9
  14. agno/db/firestore/schemas.py +51 -0
  15. agno/db/firestore/utils.py +102 -4
  16. agno/db/gcs_json/gcs_json_db.py +660 -12
  17. agno/db/gcs_json/utils.py +60 -26
  18. agno/db/in_memory/in_memory_db.py +287 -14
  19. agno/db/in_memory/utils.py +60 -2
  20. agno/db/json/json_db.py +590 -14
  21. agno/db/json/utils.py +60 -26
  22. agno/db/migrations/manager.py +199 -0
  23. agno/db/migrations/v1_to_v2.py +43 -13
  24. agno/db/migrations/versions/__init__.py +0 -0
  25. agno/db/migrations/versions/v2_3_0.py +938 -0
  26. agno/db/mongo/__init__.py +15 -1
  27. agno/db/mongo/async_mongo.py +2760 -0
  28. agno/db/mongo/mongo.py +879 -11
  29. agno/db/mongo/schemas.py +42 -0
  30. agno/db/mongo/utils.py +80 -8
  31. agno/db/mysql/__init__.py +2 -1
  32. agno/db/mysql/async_mysql.py +2912 -0
  33. agno/db/mysql/mysql.py +946 -68
  34. agno/db/mysql/schemas.py +72 -10
  35. agno/db/mysql/utils.py +198 -7
  36. agno/db/postgres/__init__.py +2 -1
  37. agno/db/postgres/async_postgres.py +2579 -0
  38. agno/db/postgres/postgres.py +942 -57
  39. agno/db/postgres/schemas.py +81 -18
  40. agno/db/postgres/utils.py +164 -2
  41. agno/db/redis/redis.py +671 -7
  42. agno/db/redis/schemas.py +50 -0
  43. agno/db/redis/utils.py +65 -7
  44. agno/db/schemas/__init__.py +2 -1
  45. agno/db/schemas/culture.py +120 -0
  46. agno/db/schemas/evals.py +1 -0
  47. agno/db/schemas/memory.py +17 -2
  48. agno/db/singlestore/schemas.py +63 -0
  49. agno/db/singlestore/singlestore.py +949 -83
  50. agno/db/singlestore/utils.py +60 -2
  51. agno/db/sqlite/__init__.py +2 -1
  52. agno/db/sqlite/async_sqlite.py +2911 -0
  53. agno/db/sqlite/schemas.py +62 -0
  54. agno/db/sqlite/sqlite.py +965 -46
  55. agno/db/sqlite/utils.py +169 -8
  56. agno/db/surrealdb/__init__.py +3 -0
  57. agno/db/surrealdb/metrics.py +292 -0
  58. agno/db/surrealdb/models.py +334 -0
  59. agno/db/surrealdb/queries.py +71 -0
  60. agno/db/surrealdb/surrealdb.py +1908 -0
  61. agno/db/surrealdb/utils.py +147 -0
  62. agno/db/utils.py +2 -0
  63. agno/eval/__init__.py +10 -0
  64. agno/eval/accuracy.py +75 -55
  65. agno/eval/agent_as_judge.py +861 -0
  66. agno/eval/base.py +29 -0
  67. agno/eval/performance.py +16 -7
  68. agno/eval/reliability.py +28 -16
  69. agno/eval/utils.py +35 -17
  70. agno/exceptions.py +27 -2
  71. agno/filters.py +354 -0
  72. agno/guardrails/prompt_injection.py +1 -0
  73. agno/hooks/__init__.py +3 -0
  74. agno/hooks/decorator.py +164 -0
  75. agno/integrations/discord/client.py +1 -1
  76. agno/knowledge/chunking/agentic.py +13 -10
  77. agno/knowledge/chunking/fixed.py +4 -1
  78. agno/knowledge/chunking/semantic.py +9 -4
  79. agno/knowledge/chunking/strategy.py +59 -15
  80. agno/knowledge/embedder/fastembed.py +1 -1
  81. agno/knowledge/embedder/nebius.py +1 -1
  82. agno/knowledge/embedder/ollama.py +8 -0
  83. agno/knowledge/embedder/openai.py +8 -8
  84. agno/knowledge/embedder/sentence_transformer.py +6 -2
  85. agno/knowledge/embedder/vllm.py +262 -0
  86. agno/knowledge/knowledge.py +1618 -318
  87. agno/knowledge/reader/base.py +6 -2
  88. agno/knowledge/reader/csv_reader.py +8 -10
  89. agno/knowledge/reader/docx_reader.py +5 -6
  90. agno/knowledge/reader/field_labeled_csv_reader.py +16 -20
  91. agno/knowledge/reader/json_reader.py +5 -4
  92. agno/knowledge/reader/markdown_reader.py +8 -8
  93. agno/knowledge/reader/pdf_reader.py +17 -19
  94. agno/knowledge/reader/pptx_reader.py +101 -0
  95. agno/knowledge/reader/reader_factory.py +32 -3
  96. agno/knowledge/reader/s3_reader.py +3 -3
  97. agno/knowledge/reader/tavily_reader.py +193 -0
  98. agno/knowledge/reader/text_reader.py +22 -10
  99. agno/knowledge/reader/web_search_reader.py +1 -48
  100. agno/knowledge/reader/website_reader.py +10 -10
  101. agno/knowledge/reader/wikipedia_reader.py +33 -1
  102. agno/knowledge/types.py +1 -0
  103. agno/knowledge/utils.py +72 -7
  104. agno/media.py +22 -6
  105. agno/memory/__init__.py +14 -1
  106. agno/memory/manager.py +544 -83
  107. agno/memory/strategies/__init__.py +15 -0
  108. agno/memory/strategies/base.py +66 -0
  109. agno/memory/strategies/summarize.py +196 -0
  110. agno/memory/strategies/types.py +37 -0
  111. agno/models/aimlapi/aimlapi.py +17 -0
  112. agno/models/anthropic/claude.py +515 -40
  113. agno/models/aws/bedrock.py +102 -21
  114. agno/models/aws/claude.py +131 -274
  115. agno/models/azure/ai_foundry.py +41 -19
  116. agno/models/azure/openai_chat.py +39 -8
  117. agno/models/base.py +1249 -525
  118. agno/models/cerebras/cerebras.py +91 -21
  119. agno/models/cerebras/cerebras_openai.py +21 -2
  120. agno/models/cohere/chat.py +40 -6
  121. agno/models/cometapi/cometapi.py +18 -1
  122. agno/models/dashscope/dashscope.py +2 -3
  123. agno/models/deepinfra/deepinfra.py +18 -1
  124. agno/models/deepseek/deepseek.py +69 -3
  125. agno/models/fireworks/fireworks.py +18 -1
  126. agno/models/google/gemini.py +877 -80
  127. agno/models/google/utils.py +22 -0
  128. agno/models/groq/groq.py +51 -18
  129. agno/models/huggingface/huggingface.py +17 -6
  130. agno/models/ibm/watsonx.py +16 -6
  131. agno/models/internlm/internlm.py +18 -1
  132. agno/models/langdb/langdb.py +13 -1
  133. agno/models/litellm/chat.py +44 -9
  134. agno/models/litellm/litellm_openai.py +18 -1
  135. agno/models/message.py +28 -5
  136. agno/models/meta/llama.py +47 -14
  137. agno/models/meta/llama_openai.py +22 -17
  138. agno/models/mistral/mistral.py +8 -4
  139. agno/models/nebius/nebius.py +6 -7
  140. agno/models/nvidia/nvidia.py +20 -3
  141. agno/models/ollama/chat.py +24 -8
  142. agno/models/openai/chat.py +104 -29
  143. agno/models/openai/responses.py +101 -81
  144. agno/models/openrouter/openrouter.py +60 -3
  145. agno/models/perplexity/perplexity.py +17 -1
  146. agno/models/portkey/portkey.py +7 -6
  147. agno/models/requesty/requesty.py +24 -4
  148. agno/models/response.py +73 -2
  149. agno/models/sambanova/sambanova.py +20 -3
  150. agno/models/siliconflow/siliconflow.py +19 -2
  151. agno/models/together/together.py +20 -3
  152. agno/models/utils.py +254 -8
  153. agno/models/vercel/v0.py +20 -3
  154. agno/models/vertexai/__init__.py +0 -0
  155. agno/models/vertexai/claude.py +190 -0
  156. agno/models/vllm/vllm.py +19 -14
  157. agno/models/xai/xai.py +19 -2
  158. agno/os/app.py +549 -152
  159. agno/os/auth.py +190 -3
  160. agno/os/config.py +23 -0
  161. agno/os/interfaces/a2a/router.py +8 -11
  162. agno/os/interfaces/a2a/utils.py +1 -1
  163. agno/os/interfaces/agui/router.py +18 -3
  164. agno/os/interfaces/agui/utils.py +152 -39
  165. agno/os/interfaces/slack/router.py +55 -37
  166. agno/os/interfaces/slack/slack.py +9 -1
  167. agno/os/interfaces/whatsapp/router.py +0 -1
  168. agno/os/interfaces/whatsapp/security.py +3 -1
  169. agno/os/mcp.py +110 -52
  170. agno/os/middleware/__init__.py +2 -0
  171. agno/os/middleware/jwt.py +676 -112
  172. agno/os/router.py +40 -1478
  173. agno/os/routers/agents/__init__.py +3 -0
  174. agno/os/routers/agents/router.py +599 -0
  175. agno/os/routers/agents/schema.py +261 -0
  176. agno/os/routers/evals/evals.py +96 -39
  177. agno/os/routers/evals/schemas.py +65 -33
  178. agno/os/routers/evals/utils.py +80 -10
  179. agno/os/routers/health.py +10 -4
  180. agno/os/routers/knowledge/knowledge.py +196 -38
  181. agno/os/routers/knowledge/schemas.py +82 -22
  182. agno/os/routers/memory/memory.py +279 -52
  183. agno/os/routers/memory/schemas.py +46 -17
  184. agno/os/routers/metrics/metrics.py +20 -8
  185. agno/os/routers/metrics/schemas.py +16 -16
  186. agno/os/routers/session/session.py +462 -34
  187. agno/os/routers/teams/__init__.py +3 -0
  188. agno/os/routers/teams/router.py +512 -0
  189. agno/os/routers/teams/schema.py +257 -0
  190. agno/os/routers/traces/__init__.py +3 -0
  191. agno/os/routers/traces/schemas.py +414 -0
  192. agno/os/routers/traces/traces.py +499 -0
  193. agno/os/routers/workflows/__init__.py +3 -0
  194. agno/os/routers/workflows/router.py +624 -0
  195. agno/os/routers/workflows/schema.py +75 -0
  196. agno/os/schema.py +256 -693
  197. agno/os/scopes.py +469 -0
  198. agno/os/utils.py +514 -36
  199. agno/reasoning/anthropic.py +80 -0
  200. agno/reasoning/gemini.py +73 -0
  201. agno/reasoning/openai.py +5 -0
  202. agno/reasoning/vertexai.py +76 -0
  203. agno/run/__init__.py +6 -0
  204. agno/run/agent.py +155 -32
  205. agno/run/base.py +55 -3
  206. agno/run/requirement.py +181 -0
  207. agno/run/team.py +125 -38
  208. agno/run/workflow.py +72 -18
  209. agno/session/agent.py +102 -89
  210. agno/session/summary.py +56 -15
  211. agno/session/team.py +164 -90
  212. agno/session/workflow.py +405 -40
  213. agno/table.py +10 -0
  214. agno/team/team.py +3974 -1903
  215. agno/tools/dalle.py +2 -4
  216. agno/tools/eleven_labs.py +23 -25
  217. agno/tools/exa.py +21 -16
  218. agno/tools/file.py +153 -23
  219. agno/tools/file_generation.py +16 -10
  220. agno/tools/firecrawl.py +15 -7
  221. agno/tools/function.py +193 -38
  222. agno/tools/gmail.py +238 -14
  223. agno/tools/google_drive.py +271 -0
  224. agno/tools/googlecalendar.py +36 -8
  225. agno/tools/googlesheets.py +20 -5
  226. agno/tools/jira.py +20 -0
  227. agno/tools/mcp/__init__.py +10 -0
  228. agno/tools/mcp/mcp.py +331 -0
  229. agno/tools/mcp/multi_mcp.py +347 -0
  230. agno/tools/mcp/params.py +24 -0
  231. agno/tools/mcp_toolbox.py +3 -3
  232. agno/tools/models/nebius.py +5 -5
  233. agno/tools/models_labs.py +20 -10
  234. agno/tools/nano_banana.py +151 -0
  235. agno/tools/notion.py +204 -0
  236. agno/tools/parallel.py +314 -0
  237. agno/tools/postgres.py +76 -36
  238. agno/tools/redshift.py +406 -0
  239. agno/tools/scrapegraph.py +1 -1
  240. agno/tools/shopify.py +1519 -0
  241. agno/tools/slack.py +18 -3
  242. agno/tools/spotify.py +919 -0
  243. agno/tools/tavily.py +146 -0
  244. agno/tools/toolkit.py +25 -0
  245. agno/tools/workflow.py +8 -1
  246. agno/tools/yfinance.py +12 -11
  247. agno/tracing/__init__.py +12 -0
  248. agno/tracing/exporter.py +157 -0
  249. agno/tracing/schemas.py +276 -0
  250. agno/tracing/setup.py +111 -0
  251. agno/utils/agent.py +938 -0
  252. agno/utils/cryptography.py +22 -0
  253. agno/utils/dttm.py +33 -0
  254. agno/utils/events.py +151 -3
  255. agno/utils/gemini.py +15 -5
  256. agno/utils/hooks.py +118 -4
  257. agno/utils/http.py +113 -2
  258. agno/utils/knowledge.py +12 -5
  259. agno/utils/log.py +1 -0
  260. agno/utils/mcp.py +92 -2
  261. agno/utils/media.py +187 -1
  262. agno/utils/merge_dict.py +3 -3
  263. agno/utils/message.py +60 -0
  264. agno/utils/models/ai_foundry.py +9 -2
  265. agno/utils/models/claude.py +49 -14
  266. agno/utils/models/cohere.py +9 -2
  267. agno/utils/models/llama.py +9 -2
  268. agno/utils/models/mistral.py +4 -2
  269. agno/utils/print_response/agent.py +109 -16
  270. agno/utils/print_response/team.py +223 -30
  271. agno/utils/print_response/workflow.py +251 -34
  272. agno/utils/streamlit.py +1 -1
  273. agno/utils/team.py +98 -9
  274. agno/utils/tokens.py +657 -0
  275. agno/vectordb/base.py +39 -7
  276. agno/vectordb/cassandra/cassandra.py +21 -5
  277. agno/vectordb/chroma/chromadb.py +43 -12
  278. agno/vectordb/clickhouse/clickhousedb.py +21 -5
  279. agno/vectordb/couchbase/couchbase.py +29 -5
  280. agno/vectordb/lancedb/lance_db.py +92 -181
  281. agno/vectordb/langchaindb/langchaindb.py +24 -4
  282. agno/vectordb/lightrag/lightrag.py +17 -3
  283. agno/vectordb/llamaindex/llamaindexdb.py +25 -5
  284. agno/vectordb/milvus/milvus.py +50 -37
  285. agno/vectordb/mongodb/__init__.py +7 -1
  286. agno/vectordb/mongodb/mongodb.py +36 -30
  287. agno/vectordb/pgvector/pgvector.py +201 -77
  288. agno/vectordb/pineconedb/pineconedb.py +41 -23
  289. agno/vectordb/qdrant/qdrant.py +67 -54
  290. agno/vectordb/redis/__init__.py +9 -0
  291. agno/vectordb/redis/redisdb.py +682 -0
  292. agno/vectordb/singlestore/singlestore.py +50 -29
  293. agno/vectordb/surrealdb/surrealdb.py +31 -41
  294. agno/vectordb/upstashdb/upstashdb.py +34 -6
  295. agno/vectordb/weaviate/weaviate.py +53 -14
  296. agno/workflow/__init__.py +2 -0
  297. agno/workflow/agent.py +299 -0
  298. agno/workflow/condition.py +120 -18
  299. agno/workflow/loop.py +77 -10
  300. agno/workflow/parallel.py +231 -143
  301. agno/workflow/router.py +118 -17
  302. agno/workflow/step.py +609 -170
  303. agno/workflow/steps.py +73 -6
  304. agno/workflow/types.py +96 -21
  305. agno/workflow/workflow.py +2039 -262
  306. {agno-2.1.2.dist-info → agno-2.3.13.dist-info}/METADATA +201 -66
  307. agno-2.3.13.dist-info/RECORD +613 -0
  308. agno/tools/googlesearch.py +0 -98
  309. agno/tools/mcp.py +0 -679
  310. agno/tools/memori.py +0 -339
  311. agno-2.1.2.dist-info/RECORD +0 -543
  312. {agno-2.1.2.dist-info → agno-2.3.13.dist-info}/WHEEL +0 -0
  313. {agno-2.1.2.dist-info → agno-2.3.13.dist-info}/licenses/LICENSE +0 -0
  314. {agno-2.1.2.dist-info → agno-2.3.13.dist-info}/top_level.txt +0 -0
agno/eval/base.py ADDED
@@ -0,0 +1,29 @@
1
+ from abc import ABC, abstractmethod
2
+ from typing import Union
3
+
4
+ from agno.run.agent import RunInput, RunOutput
5
+ from agno.run.team import TeamRunInput, TeamRunOutput
6
+
7
+
8
+ class BaseEval(ABC):
9
+ """Abstract base class for all evaluations."""
10
+
11
+ @abstractmethod
12
+ def pre_check(self, run_input: Union[RunInput, TeamRunInput]) -> None:
13
+ """Perform sync pre-evals."""
14
+ pass
15
+
16
+ @abstractmethod
17
+ async def async_pre_check(self, run_input: Union[RunInput, TeamRunInput]) -> None:
18
+ """Perform async pre-evals."""
19
+ pass
20
+
21
+ @abstractmethod
22
+ def post_check(self, run_output: Union[RunOutput, TeamRunOutput]) -> None:
23
+ """Perform sync post-evals."""
24
+ pass
25
+
26
+ @abstractmethod
27
+ async def async_post_check(self, run_output: Union[RunOutput, TeamRunOutput]) -> None:
28
+ """Perform async post-evals."""
29
+ pass
agno/eval/performance.py CHANGED
@@ -3,10 +3,10 @@ import gc
3
3
  import tracemalloc
4
4
  from dataclasses import dataclass, field
5
5
  from os import getenv
6
- from typing import TYPE_CHECKING, Any, Callable, Dict, List, Optional
6
+ from typing import TYPE_CHECKING, Any, Callable, Dict, List, Optional, Union
7
7
  from uuid import uuid4
8
8
 
9
- from agno.db.base import BaseDb
9
+ from agno.db.base import AsyncBaseDb, BaseDb
10
10
  from agno.db.schemas.evals import EvalType
11
11
  from agno.eval.utils import async_log_eval, log_eval_run, store_result_in_file
12
12
  from agno.utils.log import log_debug, set_log_level_to_debug, set_log_level_to_info
@@ -222,7 +222,7 @@ class PerformanceEval:
222
222
  # Enable debug logs
223
223
  debug_mode: bool = getenv("AGNO_DEBUG", "false").lower() == "true"
224
224
  # The database to store Evaluation results
225
- db: Optional[BaseDb] = None
225
+ db: Optional[Union[BaseDb, AsyncBaseDb]] = None
226
226
 
227
227
  # Telemetry settings
228
228
  # telemetry=True logs minimal telemetry for analytics
@@ -491,17 +491,23 @@ class PerformanceEval:
491
491
  6. Print results as requested
492
492
  7. Log results to the Agno platform if requested
493
493
  """
494
+ if isinstance(self.db, AsyncBaseDb):
495
+ raise ValueError("run() is not supported with an async DB. Please use arun() instead.")
496
+
494
497
  from rich.console import Console
495
498
  from rich.live import Live
496
499
  from rich.status import Status
497
500
 
501
+ # Generate unique run_id for this execution (don't modify self.eval_id due to concurrency)
502
+ run_id = str(uuid4())
503
+
498
504
  run_times = []
499
505
  memory_usages = []
500
506
  previous_snapshot = None
501
507
 
502
508
  self._set_log_level()
503
509
 
504
- log_debug(f"************ Evaluation Start: {self.eval_id} ************")
510
+ log_debug(f"************ Evaluation Start: {run_id} ************")
505
511
 
506
512
  # Add a spinner while running the evaluations
507
513
  console = Console()
@@ -612,7 +618,7 @@ class PerformanceEval:
612
618
  ),
613
619
  )
614
620
 
615
- log_debug(f"*********** Evaluation End: {self.eval_id} ***********")
621
+ log_debug(f"*********** Evaluation End: {run_id} ***********")
616
622
  return self.result
617
623
 
618
624
  async def arun(
@@ -638,13 +644,16 @@ class PerformanceEval:
638
644
  from rich.live import Live
639
645
  from rich.status import Status
640
646
 
647
+ # Generate unique run_id for this execution (don't modify self.eval_id due to concurrency)
648
+ run_id = str(uuid4())
649
+
641
650
  run_times = []
642
651
  memory_usages = []
643
652
  previous_snapshot = None
644
653
 
645
654
  self._set_log_level()
646
655
 
647
- log_debug(f"************ Evaluation Start: {self.eval_id} ************")
656
+ log_debug(f"************ Evaluation Start: {run_id} ************")
648
657
 
649
658
  # Add a spinner while running the evaluations
650
659
  console = Console()
@@ -755,7 +764,7 @@ class PerformanceEval:
755
764
  ),
756
765
  )
757
766
 
758
- log_debug(f"*********** Evaluation End: {self.eval_id} ***********")
767
+ log_debug(f"*********** Evaluation End: {run_id} ***********")
759
768
  return self.result
760
769
 
761
770
  def _get_telemetry_data(self) -> Dict[str, Any]:
agno/eval/reliability.py CHANGED
@@ -1,9 +1,10 @@
1
1
  from dataclasses import asdict, dataclass, field
2
2
  from os import getenv
3
- from typing import TYPE_CHECKING, Any, Dict, List, Optional
3
+ from typing import TYPE_CHECKING, Any, Dict, List, Optional, Union
4
4
  from uuid import uuid4
5
5
 
6
- from agno.db.base import BaseDb
6
+ from agno.db.base import AsyncBaseDb, BaseDb
7
+ from agno.run.team import TeamRunOutput
7
8
 
8
9
  if TYPE_CHECKING:
9
10
  from rich.console import Console
@@ -11,7 +12,6 @@ if TYPE_CHECKING:
11
12
  from agno.agent import RunOutput
12
13
  from agno.db.schemas.evals import EvalType
13
14
  from agno.eval.utils import async_log_eval, log_eval_run, store_result_in_file
14
- from agno.run.team import TeamRunOutput
15
15
  from agno.utils.log import logger
16
16
 
17
17
 
@@ -63,7 +63,7 @@ class ReliabilityEval:
63
63
  # Enable debug logs
64
64
  debug_mode: bool = getenv("AGNO_DEBUG", "false").lower() == "true"
65
65
  # The database to store Evaluation results
66
- db: Optional[BaseDb] = None
66
+ db: Optional[Union[BaseDb, AsyncBaseDb]] = None
67
67
 
68
68
  # Telemetry settings
69
69
  # telemetry=True logs minimal telemetry for analytics
@@ -71,6 +71,9 @@ class ReliabilityEval:
71
71
  telemetry: bool = True
72
72
 
73
73
  def run(self, *, print_results: bool = False) -> Optional[ReliabilityResult]:
74
+ if isinstance(self.db, AsyncBaseDb):
75
+ raise ValueError("run() is not supported with an async DB. Please use arun() instead.")
76
+
74
77
  if self.agent_response is None and self.team_response is None:
75
78
  raise ValueError("You need to provide 'agent_response' or 'team_response' to run the evaluation.")
76
79
 
@@ -83,6 +86,9 @@ class ReliabilityEval:
83
86
  from rich.live import Live
84
87
  from rich.status import Status
85
88
 
89
+ # Generate unique run_id for this execution (don't modify self.eval_id due to concurrency)
90
+ run_id = str(uuid4())
91
+
86
92
  # Add a spinner while running the evaluations
87
93
  console = Console()
88
94
  with Live(console=console, transient=True) as live_log:
@@ -115,7 +121,7 @@ class ReliabilityEval:
115
121
  if not tool_name:
116
122
  continue
117
123
  else:
118
- if tool_name not in self.expected_tool_calls: # type: ignore
124
+ if self.expected_tool_calls is not None and tool_name not in self.expected_tool_calls:
119
125
  failed_tool_calls.append(tool_call.get("function", {}).get("name"))
120
126
  else:
121
127
  passed_tool_calls.append(tool_call.get("function", {}).get("name"))
@@ -180,7 +186,7 @@ class ReliabilityEval:
180
186
  ),
181
187
  )
182
188
 
183
- logger.debug(f"*********** Evaluation End: {self.eval_id} ***********")
189
+ logger.debug(f"*********** Evaluation End: {run_id} ***********")
184
190
  return self.result
185
191
 
186
192
  async def arun(self, *, print_results: bool = False) -> Optional[ReliabilityResult]:
@@ -196,6 +202,9 @@ class ReliabilityEval:
196
202
  from rich.live import Live
197
203
  from rich.status import Status
198
204
 
205
+ # Generate unique run_id for this execution (don't modify self.eval_id due to concurrency)
206
+ run_id = str(uuid4())
207
+
199
208
  # Add a spinner while running the evaluations
200
209
  console = Console()
201
210
  with Live(console=console, transient=True) as live_log:
@@ -220,15 +229,18 @@ class ReliabilityEval:
220
229
 
221
230
  failed_tool_calls = []
222
231
  passed_tool_calls = []
223
- for tool_call in actual_tool_calls: # type: ignore
224
- tool_name = tool_call.get("function", {}).get("name")
225
- if not tool_name:
226
- continue
227
- else:
228
- if tool_name not in self.expected_tool_calls: # type: ignore
229
- failed_tool_calls.append(tool_call.get("function", {}).get("name"))
232
+ if not actual_tool_calls:
233
+ failed_tool_calls = self.expected_tool_calls or []
234
+ else:
235
+ for tool_call in actual_tool_calls: # type: ignore
236
+ tool_name = tool_call.get("function", {}).get("name")
237
+ if not tool_name:
238
+ continue
230
239
  else:
231
- passed_tool_calls.append(tool_call.get("function", {}).get("name"))
240
+ if self.expected_tool_calls is not None and tool_name not in self.expected_tool_calls:
241
+ failed_tool_calls.append(tool_call.get("function", {}).get("name"))
242
+ else:
243
+ passed_tool_calls.append(tool_call.get("function", {}).get("name"))
232
244
 
233
245
  self.result = ReliabilityResult(
234
246
  eval_status="PASSED" if len(failed_tool_calls) == 0 else "FAILED",
@@ -241,7 +253,7 @@ class ReliabilityEval:
241
253
  store_result_in_file(
242
254
  file_path=self.file_path_to_save_results,
243
255
  name=self.name,
244
- eval_id=self.eval_id,
256
+ eval_id=run_id,
245
257
  result=self.result,
246
258
  )
247
259
 
@@ -290,7 +302,7 @@ class ReliabilityEval:
290
302
  ),
291
303
  )
292
304
 
293
- logger.debug(f"*********** Evaluation End: {self.eval_id} ***********")
305
+ logger.debug(f"*********** Evaluation End: {run_id} ***********")
294
306
  return self.result
295
307
 
296
308
  def _get_telemetry_data(self) -> Dict[str, Any]:
agno/eval/utils.py CHANGED
@@ -2,12 +2,13 @@ from dataclasses import asdict
2
2
  from pathlib import Path
3
3
  from typing import TYPE_CHECKING, Optional, Union
4
4
 
5
- from agno.db.base import BaseDb
5
+ from agno.db.base import AsyncBaseDb, BaseDb
6
6
  from agno.db.schemas.evals import EvalRunRecord, EvalType
7
7
  from agno.utils.log import log_debug, logger
8
8
 
9
9
  if TYPE_CHECKING:
10
10
  from agno.eval.accuracy import AccuracyResult
11
+ from agno.eval.agent_as_judge import AgentAsJudgeResult
11
12
  from agno.eval.performance import PerformanceResult
12
13
  from agno.eval.reliability import ReliabilityResult
13
14
 
@@ -49,7 +50,7 @@ def log_eval_run(
49
50
 
50
51
 
51
52
  async def async_log_eval(
52
- db: BaseDb,
53
+ db: Union[BaseDb, AsyncBaseDb],
53
54
  run_id: str,
54
55
  run_data: dict,
55
56
  eval_type: EvalType,
@@ -65,28 +66,45 @@ async def async_log_eval(
65
66
  """Call the API to create an evaluation run."""
66
67
 
67
68
  try:
68
- db.create_eval_run(
69
- EvalRunRecord(
70
- run_id=run_id,
71
- eval_type=eval_type,
72
- eval_data=run_data,
73
- eval_input=eval_input,
74
- agent_id=agent_id,
75
- model_id=model_id,
76
- model_provider=model_provider,
77
- name=name,
78
- evaluated_component_name=evaluated_component_name,
79
- team_id=team_id,
80
- workflow_id=workflow_id,
69
+ if isinstance(db, AsyncBaseDb):
70
+ await db.create_eval_run(
71
+ EvalRunRecord(
72
+ run_id=run_id,
73
+ eval_type=eval_type,
74
+ eval_data=run_data,
75
+ eval_input=eval_input,
76
+ agent_id=agent_id,
77
+ model_id=model_id,
78
+ model_provider=model_provider,
79
+ name=name,
80
+ evaluated_component_name=evaluated_component_name,
81
+ team_id=team_id,
82
+ workflow_id=workflow_id,
83
+ )
84
+ )
85
+ else:
86
+ db.create_eval_run(
87
+ EvalRunRecord(
88
+ run_id=run_id,
89
+ eval_type=eval_type,
90
+ eval_data=run_data,
91
+ eval_input=eval_input,
92
+ agent_id=agent_id,
93
+ model_id=model_id,
94
+ model_provider=model_provider,
95
+ name=name,
96
+ evaluated_component_name=evaluated_component_name,
97
+ team_id=team_id,
98
+ workflow_id=workflow_id,
99
+ )
81
100
  )
82
- )
83
101
  except Exception as e:
84
102
  log_debug(f"Could not create agent event: {e}")
85
103
 
86
104
 
87
105
  def store_result_in_file(
88
106
  file_path: str,
89
- result: Union["AccuracyResult", "PerformanceResult", "ReliabilityResult"],
107
+ result: Union["AccuracyResult", "AgentAsJudgeResult", "PerformanceResult", "ReliabilityResult"],
90
108
  eval_id: Optional[str] = None,
91
109
  name: Optional[str] = None,
92
110
  ):
agno/exceptions.py CHANGED
@@ -1,3 +1,4 @@
1
+ from dataclasses import dataclass
1
2
  from enum import Enum
2
3
  from typing import Any, Dict, List, Optional, Union
3
4
 
@@ -77,6 +78,17 @@ class AgnoError(Exception):
77
78
  return str(self.message)
78
79
 
79
80
 
81
+ class ModelAuthenticationError(AgnoError):
82
+ """Raised when model authentication fails."""
83
+
84
+ def __init__(self, message: str, status_code: int = 401, model_name: Optional[str] = None):
85
+ super().__init__(message, status_code)
86
+ self.model_name = model_name
87
+
88
+ self.type = "model_authentication_error"
89
+ self.error_id = "model_authentication_error"
90
+
91
+
80
92
  class ModelProviderError(AgnoError):
81
93
  """Exception raised when a model provider returns an error."""
82
94
 
@@ -130,7 +142,10 @@ class InputCheckError(Exception):
130
142
  ):
131
143
  super().__init__(message)
132
144
  self.type = "input_check_error"
133
- self.error_id = check_trigger.value
145
+ if isinstance(check_trigger, CheckTrigger):
146
+ self.error_id = check_trigger.value
147
+ else:
148
+ self.error_id = str(check_trigger)
134
149
 
135
150
  self.message = message
136
151
  self.check_trigger = check_trigger
@@ -148,8 +163,18 @@ class OutputCheckError(Exception):
148
163
  ):
149
164
  super().__init__(message)
150
165
  self.type = "output_check_error"
151
- self.error_id = check_trigger.value
166
+ if isinstance(check_trigger, CheckTrigger):
167
+ self.error_id = check_trigger.value
168
+ else:
169
+ self.error_id = str(check_trigger)
152
170
 
153
171
  self.message = message
154
172
  self.check_trigger = check_trigger
155
173
  self.additional_data = additional_data
174
+
175
+
176
+ @dataclass
177
+ class RetryableModelProviderError(Exception):
178
+ original_error: Optional[str] = None
179
+ # Guidance message to retry a model invocation after an error
180
+ retry_guidance_message: Optional[str] = None