agno 2.1.2__py3-none-any.whl → 2.3.13__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (314) hide show
  1. agno/agent/agent.py +5540 -2273
  2. agno/api/api.py +2 -0
  3. agno/api/os.py +1 -1
  4. agno/compression/__init__.py +3 -0
  5. agno/compression/manager.py +247 -0
  6. agno/culture/__init__.py +3 -0
  7. agno/culture/manager.py +956 -0
  8. agno/db/async_postgres/__init__.py +3 -0
  9. agno/db/base.py +689 -6
  10. agno/db/dynamo/dynamo.py +933 -37
  11. agno/db/dynamo/schemas.py +174 -10
  12. agno/db/dynamo/utils.py +63 -4
  13. agno/db/firestore/firestore.py +831 -9
  14. agno/db/firestore/schemas.py +51 -0
  15. agno/db/firestore/utils.py +102 -4
  16. agno/db/gcs_json/gcs_json_db.py +660 -12
  17. agno/db/gcs_json/utils.py +60 -26
  18. agno/db/in_memory/in_memory_db.py +287 -14
  19. agno/db/in_memory/utils.py +60 -2
  20. agno/db/json/json_db.py +590 -14
  21. agno/db/json/utils.py +60 -26
  22. agno/db/migrations/manager.py +199 -0
  23. agno/db/migrations/v1_to_v2.py +43 -13
  24. agno/db/migrations/versions/__init__.py +0 -0
  25. agno/db/migrations/versions/v2_3_0.py +938 -0
  26. agno/db/mongo/__init__.py +15 -1
  27. agno/db/mongo/async_mongo.py +2760 -0
  28. agno/db/mongo/mongo.py +879 -11
  29. agno/db/mongo/schemas.py +42 -0
  30. agno/db/mongo/utils.py +80 -8
  31. agno/db/mysql/__init__.py +2 -1
  32. agno/db/mysql/async_mysql.py +2912 -0
  33. agno/db/mysql/mysql.py +946 -68
  34. agno/db/mysql/schemas.py +72 -10
  35. agno/db/mysql/utils.py +198 -7
  36. agno/db/postgres/__init__.py +2 -1
  37. agno/db/postgres/async_postgres.py +2579 -0
  38. agno/db/postgres/postgres.py +942 -57
  39. agno/db/postgres/schemas.py +81 -18
  40. agno/db/postgres/utils.py +164 -2
  41. agno/db/redis/redis.py +671 -7
  42. agno/db/redis/schemas.py +50 -0
  43. agno/db/redis/utils.py +65 -7
  44. agno/db/schemas/__init__.py +2 -1
  45. agno/db/schemas/culture.py +120 -0
  46. agno/db/schemas/evals.py +1 -0
  47. agno/db/schemas/memory.py +17 -2
  48. agno/db/singlestore/schemas.py +63 -0
  49. agno/db/singlestore/singlestore.py +949 -83
  50. agno/db/singlestore/utils.py +60 -2
  51. agno/db/sqlite/__init__.py +2 -1
  52. agno/db/sqlite/async_sqlite.py +2911 -0
  53. agno/db/sqlite/schemas.py +62 -0
  54. agno/db/sqlite/sqlite.py +965 -46
  55. agno/db/sqlite/utils.py +169 -8
  56. agno/db/surrealdb/__init__.py +3 -0
  57. agno/db/surrealdb/metrics.py +292 -0
  58. agno/db/surrealdb/models.py +334 -0
  59. agno/db/surrealdb/queries.py +71 -0
  60. agno/db/surrealdb/surrealdb.py +1908 -0
  61. agno/db/surrealdb/utils.py +147 -0
  62. agno/db/utils.py +2 -0
  63. agno/eval/__init__.py +10 -0
  64. agno/eval/accuracy.py +75 -55
  65. agno/eval/agent_as_judge.py +861 -0
  66. agno/eval/base.py +29 -0
  67. agno/eval/performance.py +16 -7
  68. agno/eval/reliability.py +28 -16
  69. agno/eval/utils.py +35 -17
  70. agno/exceptions.py +27 -2
  71. agno/filters.py +354 -0
  72. agno/guardrails/prompt_injection.py +1 -0
  73. agno/hooks/__init__.py +3 -0
  74. agno/hooks/decorator.py +164 -0
  75. agno/integrations/discord/client.py +1 -1
  76. agno/knowledge/chunking/agentic.py +13 -10
  77. agno/knowledge/chunking/fixed.py +4 -1
  78. agno/knowledge/chunking/semantic.py +9 -4
  79. agno/knowledge/chunking/strategy.py +59 -15
  80. agno/knowledge/embedder/fastembed.py +1 -1
  81. agno/knowledge/embedder/nebius.py +1 -1
  82. agno/knowledge/embedder/ollama.py +8 -0
  83. agno/knowledge/embedder/openai.py +8 -8
  84. agno/knowledge/embedder/sentence_transformer.py +6 -2
  85. agno/knowledge/embedder/vllm.py +262 -0
  86. agno/knowledge/knowledge.py +1618 -318
  87. agno/knowledge/reader/base.py +6 -2
  88. agno/knowledge/reader/csv_reader.py +8 -10
  89. agno/knowledge/reader/docx_reader.py +5 -6
  90. agno/knowledge/reader/field_labeled_csv_reader.py +16 -20
  91. agno/knowledge/reader/json_reader.py +5 -4
  92. agno/knowledge/reader/markdown_reader.py +8 -8
  93. agno/knowledge/reader/pdf_reader.py +17 -19
  94. agno/knowledge/reader/pptx_reader.py +101 -0
  95. agno/knowledge/reader/reader_factory.py +32 -3
  96. agno/knowledge/reader/s3_reader.py +3 -3
  97. agno/knowledge/reader/tavily_reader.py +193 -0
  98. agno/knowledge/reader/text_reader.py +22 -10
  99. agno/knowledge/reader/web_search_reader.py +1 -48
  100. agno/knowledge/reader/website_reader.py +10 -10
  101. agno/knowledge/reader/wikipedia_reader.py +33 -1
  102. agno/knowledge/types.py +1 -0
  103. agno/knowledge/utils.py +72 -7
  104. agno/media.py +22 -6
  105. agno/memory/__init__.py +14 -1
  106. agno/memory/manager.py +544 -83
  107. agno/memory/strategies/__init__.py +15 -0
  108. agno/memory/strategies/base.py +66 -0
  109. agno/memory/strategies/summarize.py +196 -0
  110. agno/memory/strategies/types.py +37 -0
  111. agno/models/aimlapi/aimlapi.py +17 -0
  112. agno/models/anthropic/claude.py +515 -40
  113. agno/models/aws/bedrock.py +102 -21
  114. agno/models/aws/claude.py +131 -274
  115. agno/models/azure/ai_foundry.py +41 -19
  116. agno/models/azure/openai_chat.py +39 -8
  117. agno/models/base.py +1249 -525
  118. agno/models/cerebras/cerebras.py +91 -21
  119. agno/models/cerebras/cerebras_openai.py +21 -2
  120. agno/models/cohere/chat.py +40 -6
  121. agno/models/cometapi/cometapi.py +18 -1
  122. agno/models/dashscope/dashscope.py +2 -3
  123. agno/models/deepinfra/deepinfra.py +18 -1
  124. agno/models/deepseek/deepseek.py +69 -3
  125. agno/models/fireworks/fireworks.py +18 -1
  126. agno/models/google/gemini.py +877 -80
  127. agno/models/google/utils.py +22 -0
  128. agno/models/groq/groq.py +51 -18
  129. agno/models/huggingface/huggingface.py +17 -6
  130. agno/models/ibm/watsonx.py +16 -6
  131. agno/models/internlm/internlm.py +18 -1
  132. agno/models/langdb/langdb.py +13 -1
  133. agno/models/litellm/chat.py +44 -9
  134. agno/models/litellm/litellm_openai.py +18 -1
  135. agno/models/message.py +28 -5
  136. agno/models/meta/llama.py +47 -14
  137. agno/models/meta/llama_openai.py +22 -17
  138. agno/models/mistral/mistral.py +8 -4
  139. agno/models/nebius/nebius.py +6 -7
  140. agno/models/nvidia/nvidia.py +20 -3
  141. agno/models/ollama/chat.py +24 -8
  142. agno/models/openai/chat.py +104 -29
  143. agno/models/openai/responses.py +101 -81
  144. agno/models/openrouter/openrouter.py +60 -3
  145. agno/models/perplexity/perplexity.py +17 -1
  146. agno/models/portkey/portkey.py +7 -6
  147. agno/models/requesty/requesty.py +24 -4
  148. agno/models/response.py +73 -2
  149. agno/models/sambanova/sambanova.py +20 -3
  150. agno/models/siliconflow/siliconflow.py +19 -2
  151. agno/models/together/together.py +20 -3
  152. agno/models/utils.py +254 -8
  153. agno/models/vercel/v0.py +20 -3
  154. agno/models/vertexai/__init__.py +0 -0
  155. agno/models/vertexai/claude.py +190 -0
  156. agno/models/vllm/vllm.py +19 -14
  157. agno/models/xai/xai.py +19 -2
  158. agno/os/app.py +549 -152
  159. agno/os/auth.py +190 -3
  160. agno/os/config.py +23 -0
  161. agno/os/interfaces/a2a/router.py +8 -11
  162. agno/os/interfaces/a2a/utils.py +1 -1
  163. agno/os/interfaces/agui/router.py +18 -3
  164. agno/os/interfaces/agui/utils.py +152 -39
  165. agno/os/interfaces/slack/router.py +55 -37
  166. agno/os/interfaces/slack/slack.py +9 -1
  167. agno/os/interfaces/whatsapp/router.py +0 -1
  168. agno/os/interfaces/whatsapp/security.py +3 -1
  169. agno/os/mcp.py +110 -52
  170. agno/os/middleware/__init__.py +2 -0
  171. agno/os/middleware/jwt.py +676 -112
  172. agno/os/router.py +40 -1478
  173. agno/os/routers/agents/__init__.py +3 -0
  174. agno/os/routers/agents/router.py +599 -0
  175. agno/os/routers/agents/schema.py +261 -0
  176. agno/os/routers/evals/evals.py +96 -39
  177. agno/os/routers/evals/schemas.py +65 -33
  178. agno/os/routers/evals/utils.py +80 -10
  179. agno/os/routers/health.py +10 -4
  180. agno/os/routers/knowledge/knowledge.py +196 -38
  181. agno/os/routers/knowledge/schemas.py +82 -22
  182. agno/os/routers/memory/memory.py +279 -52
  183. agno/os/routers/memory/schemas.py +46 -17
  184. agno/os/routers/metrics/metrics.py +20 -8
  185. agno/os/routers/metrics/schemas.py +16 -16
  186. agno/os/routers/session/session.py +462 -34
  187. agno/os/routers/teams/__init__.py +3 -0
  188. agno/os/routers/teams/router.py +512 -0
  189. agno/os/routers/teams/schema.py +257 -0
  190. agno/os/routers/traces/__init__.py +3 -0
  191. agno/os/routers/traces/schemas.py +414 -0
  192. agno/os/routers/traces/traces.py +499 -0
  193. agno/os/routers/workflows/__init__.py +3 -0
  194. agno/os/routers/workflows/router.py +624 -0
  195. agno/os/routers/workflows/schema.py +75 -0
  196. agno/os/schema.py +256 -693
  197. agno/os/scopes.py +469 -0
  198. agno/os/utils.py +514 -36
  199. agno/reasoning/anthropic.py +80 -0
  200. agno/reasoning/gemini.py +73 -0
  201. agno/reasoning/openai.py +5 -0
  202. agno/reasoning/vertexai.py +76 -0
  203. agno/run/__init__.py +6 -0
  204. agno/run/agent.py +155 -32
  205. agno/run/base.py +55 -3
  206. agno/run/requirement.py +181 -0
  207. agno/run/team.py +125 -38
  208. agno/run/workflow.py +72 -18
  209. agno/session/agent.py +102 -89
  210. agno/session/summary.py +56 -15
  211. agno/session/team.py +164 -90
  212. agno/session/workflow.py +405 -40
  213. agno/table.py +10 -0
  214. agno/team/team.py +3974 -1903
  215. agno/tools/dalle.py +2 -4
  216. agno/tools/eleven_labs.py +23 -25
  217. agno/tools/exa.py +21 -16
  218. agno/tools/file.py +153 -23
  219. agno/tools/file_generation.py +16 -10
  220. agno/tools/firecrawl.py +15 -7
  221. agno/tools/function.py +193 -38
  222. agno/tools/gmail.py +238 -14
  223. agno/tools/google_drive.py +271 -0
  224. agno/tools/googlecalendar.py +36 -8
  225. agno/tools/googlesheets.py +20 -5
  226. agno/tools/jira.py +20 -0
  227. agno/tools/mcp/__init__.py +10 -0
  228. agno/tools/mcp/mcp.py +331 -0
  229. agno/tools/mcp/multi_mcp.py +347 -0
  230. agno/tools/mcp/params.py +24 -0
  231. agno/tools/mcp_toolbox.py +3 -3
  232. agno/tools/models/nebius.py +5 -5
  233. agno/tools/models_labs.py +20 -10
  234. agno/tools/nano_banana.py +151 -0
  235. agno/tools/notion.py +204 -0
  236. agno/tools/parallel.py +314 -0
  237. agno/tools/postgres.py +76 -36
  238. agno/tools/redshift.py +406 -0
  239. agno/tools/scrapegraph.py +1 -1
  240. agno/tools/shopify.py +1519 -0
  241. agno/tools/slack.py +18 -3
  242. agno/tools/spotify.py +919 -0
  243. agno/tools/tavily.py +146 -0
  244. agno/tools/toolkit.py +25 -0
  245. agno/tools/workflow.py +8 -1
  246. agno/tools/yfinance.py +12 -11
  247. agno/tracing/__init__.py +12 -0
  248. agno/tracing/exporter.py +157 -0
  249. agno/tracing/schemas.py +276 -0
  250. agno/tracing/setup.py +111 -0
  251. agno/utils/agent.py +938 -0
  252. agno/utils/cryptography.py +22 -0
  253. agno/utils/dttm.py +33 -0
  254. agno/utils/events.py +151 -3
  255. agno/utils/gemini.py +15 -5
  256. agno/utils/hooks.py +118 -4
  257. agno/utils/http.py +113 -2
  258. agno/utils/knowledge.py +12 -5
  259. agno/utils/log.py +1 -0
  260. agno/utils/mcp.py +92 -2
  261. agno/utils/media.py +187 -1
  262. agno/utils/merge_dict.py +3 -3
  263. agno/utils/message.py +60 -0
  264. agno/utils/models/ai_foundry.py +9 -2
  265. agno/utils/models/claude.py +49 -14
  266. agno/utils/models/cohere.py +9 -2
  267. agno/utils/models/llama.py +9 -2
  268. agno/utils/models/mistral.py +4 -2
  269. agno/utils/print_response/agent.py +109 -16
  270. agno/utils/print_response/team.py +223 -30
  271. agno/utils/print_response/workflow.py +251 -34
  272. agno/utils/streamlit.py +1 -1
  273. agno/utils/team.py +98 -9
  274. agno/utils/tokens.py +657 -0
  275. agno/vectordb/base.py +39 -7
  276. agno/vectordb/cassandra/cassandra.py +21 -5
  277. agno/vectordb/chroma/chromadb.py +43 -12
  278. agno/vectordb/clickhouse/clickhousedb.py +21 -5
  279. agno/vectordb/couchbase/couchbase.py +29 -5
  280. agno/vectordb/lancedb/lance_db.py +92 -181
  281. agno/vectordb/langchaindb/langchaindb.py +24 -4
  282. agno/vectordb/lightrag/lightrag.py +17 -3
  283. agno/vectordb/llamaindex/llamaindexdb.py +25 -5
  284. agno/vectordb/milvus/milvus.py +50 -37
  285. agno/vectordb/mongodb/__init__.py +7 -1
  286. agno/vectordb/mongodb/mongodb.py +36 -30
  287. agno/vectordb/pgvector/pgvector.py +201 -77
  288. agno/vectordb/pineconedb/pineconedb.py +41 -23
  289. agno/vectordb/qdrant/qdrant.py +67 -54
  290. agno/vectordb/redis/__init__.py +9 -0
  291. agno/vectordb/redis/redisdb.py +682 -0
  292. agno/vectordb/singlestore/singlestore.py +50 -29
  293. agno/vectordb/surrealdb/surrealdb.py +31 -41
  294. agno/vectordb/upstashdb/upstashdb.py +34 -6
  295. agno/vectordb/weaviate/weaviate.py +53 -14
  296. agno/workflow/__init__.py +2 -0
  297. agno/workflow/agent.py +299 -0
  298. agno/workflow/condition.py +120 -18
  299. agno/workflow/loop.py +77 -10
  300. agno/workflow/parallel.py +231 -143
  301. agno/workflow/router.py +118 -17
  302. agno/workflow/step.py +609 -170
  303. agno/workflow/steps.py +73 -6
  304. agno/workflow/types.py +96 -21
  305. agno/workflow/workflow.py +2039 -262
  306. {agno-2.1.2.dist-info → agno-2.3.13.dist-info}/METADATA +201 -66
  307. agno-2.3.13.dist-info/RECORD +613 -0
  308. agno/tools/googlesearch.py +0 -98
  309. agno/tools/mcp.py +0 -679
  310. agno/tools/memori.py +0 -339
  311. agno-2.1.2.dist-info/RECORD +0 -543
  312. {agno-2.1.2.dist-info → agno-2.3.13.dist-info}/WHEEL +0 -0
  313. {agno-2.1.2.dist-info → agno-2.3.13.dist-info}/licenses/LICENSE +0 -0
  314. {agno-2.1.2.dist-info → agno-2.3.13.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,147 @@
1
+ import dataclasses
2
+ from typing import Any, Dict, Optional, Sequence, TypeVar, Union, cast
3
+
4
+ from surrealdb import BlockingHttpSurrealConnection, BlockingWsSurrealConnection, Surreal
5
+
6
+ from agno.db.schemas.culture import CulturalKnowledge
7
+ from agno.utils.log import logger
8
+
9
+ RecordType = TypeVar("RecordType")
10
+
11
+
12
+ def build_client(
13
+ url: str, creds: dict[str, str], ns: str, db: str
14
+ ) -> Union[BlockingWsSurrealConnection, BlockingHttpSurrealConnection]:
15
+ client = Surreal(url=url)
16
+ client.signin(creds)
17
+ client.use(namespace=ns, database=db)
18
+ return client
19
+
20
+
21
+ def _query_aux(
22
+ client: Union[BlockingWsSurrealConnection, BlockingHttpSurrealConnection],
23
+ query: str,
24
+ vars: dict[str, Any],
25
+ ) -> Union[list, dict, str, int]:
26
+ try:
27
+ response = client.query(query, vars)
28
+ except Exception as e:
29
+ msg = f"!! Query execution error: {query} with {vars}, Error: {e}"
30
+ logger.error(msg)
31
+ raise RuntimeError(msg)
32
+ return response
33
+
34
+
35
+ def query(
36
+ client: Union[BlockingWsSurrealConnection, BlockingHttpSurrealConnection],
37
+ query: str,
38
+ vars: dict[str, Any],
39
+ record_type: type[RecordType],
40
+ ) -> Sequence[RecordType]:
41
+ response = _query_aux(client, query, vars)
42
+ if isinstance(response, list):
43
+ if dataclasses.is_dataclass(record_type) and hasattr(record_type, "from_dict"):
44
+ return [getattr(record_type, "from_dict").__call__(x) for x in response]
45
+ else:
46
+ result: list[RecordType] = []
47
+ for x in response:
48
+ if isinstance(x, dict):
49
+ result.append(record_type(**x))
50
+ else:
51
+ result.append(record_type.__call__(x))
52
+ return result
53
+ else:
54
+ raise ValueError(f"Unexpected response type: {type(response)}")
55
+
56
+
57
+ def query_one(
58
+ client: Union[BlockingWsSurrealConnection, BlockingHttpSurrealConnection],
59
+ query: str,
60
+ vars: dict[str, Any],
61
+ record_type: type[RecordType],
62
+ ) -> Optional[RecordType]:
63
+ response = _query_aux(client, query, vars)
64
+ if response is None:
65
+ return None
66
+ elif not isinstance(response, list):
67
+ if dataclasses.is_dataclass(record_type) and hasattr(record_type, "from_dict"):
68
+ return getattr(record_type, "from_dict").__call__(response)
69
+ elif isinstance(response, dict):
70
+ return record_type(**response)
71
+ else:
72
+ return record_type.__call__(response)
73
+ elif isinstance(response, list):
74
+ # Handle list responses - SurrealDB might return a list with a single element
75
+ if len(response) == 1 and isinstance(response[0], dict):
76
+ result = response[0]
77
+ if dataclasses.is_dataclass(record_type) and hasattr(record_type, "from_dict"):
78
+ return getattr(record_type, "from_dict").__call__(result)
79
+ elif record_type is dict:
80
+ return cast(RecordType, result)
81
+ else:
82
+ return record_type(**result)
83
+ elif len(response) == 0:
84
+ return None
85
+ else:
86
+ raise ValueError(f"Expected single record, got {len(response)} records: {response}")
87
+ else:
88
+ raise ValueError(f"Unexpected response type: {type(response)}")
89
+
90
+
91
+ # -- Cultural Knowledge util methods --
92
+
93
+
94
+ def serialize_cultural_knowledge_for_db(cultural_knowledge: CulturalKnowledge) -> Dict[str, Any]:
95
+ """Serialize a CulturalKnowledge object for database storage.
96
+
97
+ Converts the model's separate content, categories, and notes fields
98
+ into a single dict for the database content field.
99
+
100
+ Args:
101
+ cultural_knowledge (CulturalKnowledge): The cultural knowledge object to serialize.
102
+
103
+ Returns:
104
+ Dict[str, Any]: A dictionary with content, categories, and notes.
105
+ """
106
+ content_dict: Dict[str, Any] = {}
107
+ if cultural_knowledge.content is not None:
108
+ content_dict["content"] = cultural_knowledge.content
109
+ if cultural_knowledge.categories is not None:
110
+ content_dict["categories"] = cultural_knowledge.categories
111
+ if cultural_knowledge.notes is not None:
112
+ content_dict["notes"] = cultural_knowledge.notes
113
+
114
+ return content_dict if content_dict else {}
115
+
116
+
117
+ def deserialize_cultural_knowledge_from_db(db_row: Dict[str, Any]) -> CulturalKnowledge:
118
+ """Deserialize a database row to a CulturalKnowledge object.
119
+
120
+ The database stores content as a dict containing content, categories, and notes.
121
+ This method extracts those fields and converts them back to the model format.
122
+
123
+ Args:
124
+ db_row (Dict[str, Any]): The database row as a dictionary.
125
+
126
+ Returns:
127
+ CulturalKnowledge: The cultural knowledge object.
128
+ """
129
+ # Extract content, categories, and notes from the content field
130
+ content_json = db_row.get("content", {}) or {}
131
+
132
+ return CulturalKnowledge.from_dict(
133
+ {
134
+ "id": db_row.get("id"),
135
+ "name": db_row.get("name"),
136
+ "summary": db_row.get("summary"),
137
+ "content": content_json.get("content"),
138
+ "categories": content_json.get("categories"),
139
+ "notes": content_json.get("notes"),
140
+ "metadata": db_row.get("metadata"),
141
+ "input": db_row.get("input"),
142
+ "created_at": db_row.get("created_at"),
143
+ "updated_at": db_row.get("updated_at"),
144
+ "agent_id": db_row.get("agent_id"),
145
+ "team_id": db_row.get("team_id"),
146
+ }
147
+ )
agno/db/utils.py CHANGED
@@ -20,6 +20,8 @@ class CustomJSONEncoder(json.JSONEncoder):
20
20
  return obj.to_dict()
21
21
  elif isinstance(obj, Metrics):
22
22
  return obj.to_dict()
23
+ elif isinstance(obj, type):
24
+ return str(obj)
23
25
 
24
26
  return super().default(obj)
25
27
 
agno/eval/__init__.py CHANGED
@@ -1,4 +1,10 @@
1
1
  from agno.eval.accuracy import AccuracyAgentResponse, AccuracyEval, AccuracyEvaluation, AccuracyResult
2
+ from agno.eval.agent_as_judge import (
3
+ AgentAsJudgeEval,
4
+ AgentAsJudgeEvaluation,
5
+ AgentAsJudgeResult,
6
+ )
7
+ from agno.eval.base import BaseEval
2
8
  from agno.eval.performance import PerformanceEval, PerformanceResult
3
9
  from agno.eval.reliability import ReliabilityEval, ReliabilityResult
4
10
 
@@ -7,6 +13,10 @@ __all__ = [
7
13
  "AccuracyEvaluation",
8
14
  "AccuracyResult",
9
15
  "AccuracyEval",
16
+ "AgentAsJudgeEval",
17
+ "AgentAsJudgeEvaluation",
18
+ "AgentAsJudgeResult",
19
+ "BaseEval",
10
20
  "PerformanceEval",
11
21
  "PerformanceResult",
12
22
  "ReliabilityEval",
agno/eval/accuracy.py CHANGED
@@ -7,13 +7,13 @@ from uuid import uuid4
7
7
  from pydantic import BaseModel, Field
8
8
 
9
9
  from agno.agent import Agent
10
- from agno.db.base import BaseDb
10
+ from agno.db.base import AsyncBaseDb, BaseDb
11
11
  from agno.db.schemas.evals import EvalType
12
12
  from agno.eval.utils import async_log_eval, log_eval_run, store_result_in_file
13
13
  from agno.exceptions import EvalError
14
14
  from agno.models.base import Model
15
15
  from agno.team.team import Team
16
- from agno.utils.log import logger, set_log_level_to_debug, set_log_level_to_info
16
+ from agno.utils.log import log_error, logger, set_log_level_to_debug, set_log_level_to_info
17
17
 
18
18
  if TYPE_CHECKING:
19
19
  from rich.console import Console
@@ -176,7 +176,7 @@ class AccuracyEval:
176
176
  # Enable debug logs
177
177
  debug_mode: bool = getenv("AGNO_DEBUG", "false").lower() == "true"
178
178
  # The database to store Evaluation results
179
- db: Optional[BaseDb] = None
179
+ db: Optional[Union[BaseDb, AsyncBaseDb]] = None
180
180
 
181
181
  # Telemetry settings
182
182
  # telemetry=True logs minimal telemetry for analytics
@@ -282,7 +282,8 @@ Remember: You must only compare the agent_output to the expected_output. The exp
282
282
  ) -> Optional[AccuracyEvaluation]:
283
283
  """Orchestrate the evaluation process."""
284
284
  try:
285
- accuracy_agent_response = evaluator_agent.run(evaluation_input).content
285
+ response = evaluator_agent.run(evaluation_input, stream=False)
286
+ accuracy_agent_response = response.content
286
287
  if accuracy_agent_response is None or not isinstance(accuracy_agent_response, AccuracyAgentResponse):
287
288
  raise EvalError(f"Evaluator Agent returned an invalid response: {accuracy_agent_response}")
288
289
  return AccuracyEvaluation(
@@ -306,7 +307,7 @@ Remember: You must only compare the agent_output to the expected_output. The exp
306
307
  ) -> Optional[AccuracyEvaluation]:
307
308
  """Orchestrate the evaluation process asynchronously."""
308
309
  try:
309
- response = await evaluator_agent.arun(evaluation_input)
310
+ response = await evaluator_agent.arun(evaluation_input, stream=False)
310
311
  accuracy_agent_response = response.content
311
312
  if accuracy_agent_response is None or not isinstance(accuracy_agent_response, AccuracyAgentResponse):
312
313
  raise EvalError(f"Evaluator Agent returned an invalid response: {accuracy_agent_response}")
@@ -327,6 +328,9 @@ Remember: You must only compare the agent_output to the expected_output. The exp
327
328
  print_summary: bool = True,
328
329
  print_results: bool = True,
329
330
  ) -> Optional[AccuracyResult]:
331
+ if isinstance(self.db, AsyncBaseDb):
332
+ raise ValueError("run() is not supported with an async DB. Please use arun() instead.")
333
+
330
334
  if self.agent is None and self.team is None:
331
335
  logger.error("You need to provide one of 'agent' or 'team' to run the evaluation.")
332
336
  return None
@@ -356,10 +360,14 @@ Remember: You must only compare the agent_output to the expected_output. The exp
356
360
  status = Status(f"Running evaluation {i + 1}...", spinner="dots", speed=1.0, refresh_per_second=10)
357
361
  live_log.update(status)
358
362
 
363
+ agent_session_id = f"eval_{self.eval_id}_{i + 1}"
364
+
359
365
  if self.agent is not None:
360
- output = self.agent.run(input=eval_input).content
366
+ agent_response = self.agent.run(input=eval_input, session_id=agent_session_id, stream=False)
367
+ output = agent_response.content
361
368
  elif self.team is not None:
362
- output = self.team.run(input=eval_input).content
369
+ team_response = self.team.run(input=eval_input, session_id=agent_session_id, stream=False)
370
+ output = team_response.content
363
371
 
364
372
  if not output:
365
373
  logger.error(f"Failed to generate a valid answer on iteration {i + 1}: {output}")
@@ -497,12 +505,14 @@ Remember: You must only compare the agent_output to the expected_output. The exp
497
505
  status = Status(f"Running evaluation {i + 1}...", spinner="dots", speed=1.0, refresh_per_second=10)
498
506
  live_log.update(status)
499
507
 
508
+ agent_session_id = f"eval_{self.eval_id}_{i + 1}"
509
+
500
510
  if self.agent is not None:
501
- response = await self.agent.arun(input=eval_input)
502
- output = response.content
511
+ agent_response = await self.agent.arun(input=eval_input, session_id=agent_session_id, stream=False)
512
+ output = agent_response.content
503
513
  elif self.team is not None:
504
- response = await self.team.arun(input=eval_input) # type: ignore
505
- output = response.content
514
+ team_response = await self.team.arun(input=eval_input, session_id=agent_session_id, stream=False)
515
+ output = team_response.content
506
516
 
507
517
  if not output:
508
518
  logger.error(f"Failed to generate a valid answer on iteration {i + 1}: {output}")
@@ -609,11 +619,14 @@ Remember: You must only compare the agent_output to the expected_output. The exp
609
619
  print_results: bool = True,
610
620
  ) -> Optional[AccuracyResult]:
611
621
  """Run the evaluation logic against the given answer, instead of generating an answer with the Agent"""
622
+ # Generate unique run_id for this execution (don't modify self.eval_id due to concurrency)
623
+ run_id = str(uuid4())
624
+
612
625
  set_log_level_to_debug() if self.debug_mode else set_log_level_to_info()
613
626
 
614
627
  self.result = AccuracyResult()
615
628
 
616
- logger.debug(f"************ Evaluation Start: {self.eval_id} ************")
629
+ logger.debug(f"************ Evaluation Start: {run_id} ************")
617
630
 
618
631
  evaluator_agent = self.get_evaluator_agent()
619
632
  eval_input = self.get_eval_input()
@@ -661,47 +674,51 @@ Remember: You must only compare the agent_output to the expected_output. The exp
661
674
  )
662
675
  # Log results to the Agno DB if requested
663
676
  if self.db:
664
- if self.agent is not None:
665
- agent_id = self.agent.id
666
- team_id = None
667
- model_id = self.agent.model.id if self.agent.model is not None else None
668
- model_provider = self.agent.model.provider if self.agent.model is not None else None
669
- evaluated_component_name = self.agent.name
670
- elif self.team is not None:
671
- agent_id = None
672
- team_id = self.team.id
673
- model_id = self.team.model.id if self.team.model is not None else None
674
- model_provider = self.team.model.provider if self.team.model is not None else None
675
- evaluated_component_name = self.team.name
676
- else:
677
- agent_id = None
678
- team_id = None
679
- model_id = None
680
- model_provider = None
681
- evaluated_component_name = None
682
-
683
- log_eval_input = {
684
- "additional_guidelines": self.additional_guidelines,
685
- "additional_context": self.additional_context,
686
- "num_iterations": self.num_iterations,
687
- "expected_output": self.expected_output,
688
- "input": self.input,
689
- }
677
+ if isinstance(self.db, AsyncBaseDb):
678
+ log_error("You are using an async DB in a non-async method. The evaluation won't be stored in the DB.")
690
679
 
691
- log_eval_run(
692
- db=self.db,
693
- run_id=self.eval_id, # type: ignore
694
- run_data=asdict(self.result),
695
- eval_type=EvalType.ACCURACY,
696
- name=self.name if self.name is not None else None,
697
- agent_id=agent_id,
698
- team_id=team_id,
699
- model_id=model_id,
700
- model_provider=model_provider,
701
- evaluated_component_name=evaluated_component_name,
702
- workflow_id=None,
703
- eval_input=log_eval_input,
704
- )
680
+ else:
681
+ if self.agent is not None:
682
+ agent_id = self.agent.id
683
+ team_id = None
684
+ model_id = self.agent.model.id if self.agent.model is not None else None
685
+ model_provider = self.agent.model.provider if self.agent.model is not None else None
686
+ evaluated_component_name = self.agent.name
687
+ elif self.team is not None:
688
+ agent_id = None
689
+ team_id = self.team.id
690
+ model_id = self.team.model.id if self.team.model is not None else None
691
+ model_provider = self.team.model.provider if self.team.model is not None else None
692
+ evaluated_component_name = self.team.name
693
+ else:
694
+ agent_id = None
695
+ team_id = None
696
+ model_id = None
697
+ model_provider = None
698
+ evaluated_component_name = None
699
+
700
+ log_eval_input = {
701
+ "additional_guidelines": self.additional_guidelines,
702
+ "additional_context": self.additional_context,
703
+ "num_iterations": self.num_iterations,
704
+ "expected_output": self.expected_output,
705
+ "input": self.input,
706
+ }
707
+
708
+ log_eval_run(
709
+ db=self.db,
710
+ run_id=self.eval_id, # type: ignore
711
+ run_data=asdict(self.result),
712
+ eval_type=EvalType.ACCURACY,
713
+ name=self.name if self.name is not None else None,
714
+ agent_id=agent_id,
715
+ team_id=team_id,
716
+ model_id=model_id,
717
+ model_provider=model_provider,
718
+ evaluated_component_name=evaluated_component_name,
719
+ workflow_id=None,
720
+ eval_input=log_eval_input,
721
+ )
705
722
 
706
723
  if self.telemetry:
707
724
  from agno.api.evals import EvalRunCreate, create_eval_run_telemetry
@@ -714,7 +731,7 @@ Remember: You must only compare the agent_output to the expected_output. The exp
714
731
  ),
715
732
  )
716
733
 
717
- logger.debug(f"*********** Evaluation End: {self.eval_id} ***********")
734
+ logger.debug(f"*********** Evaluation End: {run_id} ***********")
718
735
  return self.result
719
736
 
720
737
  async def arun_with_output(
@@ -725,11 +742,14 @@ Remember: You must only compare the agent_output to the expected_output. The exp
725
742
  print_results: bool = True,
726
743
  ) -> Optional[AccuracyResult]:
727
744
  """Run the evaluation logic against the given answer, instead of generating an answer with the Agent"""
745
+ # Generate unique run_id for this execution (don't modify self.eval_id due to concurrency)
746
+ run_id = str(uuid4())
747
+
728
748
  set_log_level_to_debug() if self.debug_mode else set_log_level_to_info()
729
749
 
730
750
  self.result = AccuracyResult()
731
751
 
732
- logger.debug(f"************ Evaluation Start: {self.eval_id} ************")
752
+ logger.debug(f"************ Evaluation Start: {run_id} ************")
733
753
 
734
754
  evaluator_agent = self.get_evaluator_agent()
735
755
  eval_input = self.get_eval_input()
@@ -813,7 +833,7 @@ Remember: You must only compare the agent_output to the expected_output. The exp
813
833
  eval_input=log_eval_input,
814
834
  )
815
835
 
816
- logger.debug(f"*********** Evaluation End: {self.eval_id} ***********")
836
+ logger.debug(f"*********** Evaluation End: {run_id} ***********")
817
837
  return self.result
818
838
 
819
839
  def _get_telemetry_data(self) -> Dict[str, Any]: