agno 0.1.2__py3-none-any.whl → 2.3.13__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (723) hide show
  1. agno/__init__.py +8 -0
  2. agno/agent/__init__.py +44 -5
  3. agno/agent/agent.py +10531 -2975
  4. agno/api/agent.py +14 -53
  5. agno/api/api.py +7 -46
  6. agno/api/evals.py +22 -0
  7. agno/api/os.py +17 -0
  8. agno/api/routes.py +6 -25
  9. agno/api/schemas/__init__.py +9 -0
  10. agno/api/schemas/agent.py +6 -9
  11. agno/api/schemas/evals.py +16 -0
  12. agno/api/schemas/os.py +14 -0
  13. agno/api/schemas/team.py +10 -10
  14. agno/api/schemas/utils.py +21 -0
  15. agno/api/schemas/workflows.py +16 -0
  16. agno/api/settings.py +53 -0
  17. agno/api/team.py +22 -26
  18. agno/api/workflow.py +28 -0
  19. agno/cloud/aws/base.py +214 -0
  20. agno/cloud/aws/s3/__init__.py +2 -0
  21. agno/cloud/aws/s3/api_client.py +43 -0
  22. agno/cloud/aws/s3/bucket.py +195 -0
  23. agno/cloud/aws/s3/object.py +57 -0
  24. agno/compression/__init__.py +3 -0
  25. agno/compression/manager.py +247 -0
  26. agno/culture/__init__.py +3 -0
  27. agno/culture/manager.py +956 -0
  28. agno/db/__init__.py +24 -0
  29. agno/db/async_postgres/__init__.py +3 -0
  30. agno/db/base.py +946 -0
  31. agno/db/dynamo/__init__.py +3 -0
  32. agno/db/dynamo/dynamo.py +2781 -0
  33. agno/db/dynamo/schemas.py +442 -0
  34. agno/db/dynamo/utils.py +743 -0
  35. agno/db/firestore/__init__.py +3 -0
  36. agno/db/firestore/firestore.py +2379 -0
  37. agno/db/firestore/schemas.py +181 -0
  38. agno/db/firestore/utils.py +376 -0
  39. agno/db/gcs_json/__init__.py +3 -0
  40. agno/db/gcs_json/gcs_json_db.py +1791 -0
  41. agno/db/gcs_json/utils.py +228 -0
  42. agno/db/in_memory/__init__.py +3 -0
  43. agno/db/in_memory/in_memory_db.py +1312 -0
  44. agno/db/in_memory/utils.py +230 -0
  45. agno/db/json/__init__.py +3 -0
  46. agno/db/json/json_db.py +1777 -0
  47. agno/db/json/utils.py +230 -0
  48. agno/db/migrations/manager.py +199 -0
  49. agno/db/migrations/v1_to_v2.py +635 -0
  50. agno/db/migrations/versions/v2_3_0.py +938 -0
  51. agno/db/mongo/__init__.py +17 -0
  52. agno/db/mongo/async_mongo.py +2760 -0
  53. agno/db/mongo/mongo.py +2597 -0
  54. agno/db/mongo/schemas.py +119 -0
  55. agno/db/mongo/utils.py +276 -0
  56. agno/db/mysql/__init__.py +4 -0
  57. agno/db/mysql/async_mysql.py +2912 -0
  58. agno/db/mysql/mysql.py +2923 -0
  59. agno/db/mysql/schemas.py +186 -0
  60. agno/db/mysql/utils.py +488 -0
  61. agno/db/postgres/__init__.py +4 -0
  62. agno/db/postgres/async_postgres.py +2579 -0
  63. agno/db/postgres/postgres.py +2870 -0
  64. agno/db/postgres/schemas.py +187 -0
  65. agno/db/postgres/utils.py +442 -0
  66. agno/db/redis/__init__.py +3 -0
  67. agno/db/redis/redis.py +2141 -0
  68. agno/db/redis/schemas.py +159 -0
  69. agno/db/redis/utils.py +346 -0
  70. agno/db/schemas/__init__.py +4 -0
  71. agno/db/schemas/culture.py +120 -0
  72. agno/db/schemas/evals.py +34 -0
  73. agno/db/schemas/knowledge.py +40 -0
  74. agno/db/schemas/memory.py +61 -0
  75. agno/db/singlestore/__init__.py +3 -0
  76. agno/db/singlestore/schemas.py +179 -0
  77. agno/db/singlestore/singlestore.py +2877 -0
  78. agno/db/singlestore/utils.py +384 -0
  79. agno/db/sqlite/__init__.py +4 -0
  80. agno/db/sqlite/async_sqlite.py +2911 -0
  81. agno/db/sqlite/schemas.py +181 -0
  82. agno/db/sqlite/sqlite.py +2908 -0
  83. agno/db/sqlite/utils.py +429 -0
  84. agno/db/surrealdb/__init__.py +3 -0
  85. agno/db/surrealdb/metrics.py +292 -0
  86. agno/db/surrealdb/models.py +334 -0
  87. agno/db/surrealdb/queries.py +71 -0
  88. agno/db/surrealdb/surrealdb.py +1908 -0
  89. agno/db/surrealdb/utils.py +147 -0
  90. agno/db/utils.py +118 -0
  91. agno/eval/__init__.py +24 -0
  92. agno/eval/accuracy.py +666 -276
  93. agno/eval/agent_as_judge.py +861 -0
  94. agno/eval/base.py +29 -0
  95. agno/eval/performance.py +779 -0
  96. agno/eval/reliability.py +241 -62
  97. agno/eval/utils.py +120 -0
  98. agno/exceptions.py +143 -1
  99. agno/filters.py +354 -0
  100. agno/guardrails/__init__.py +6 -0
  101. agno/guardrails/base.py +19 -0
  102. agno/guardrails/openai.py +144 -0
  103. agno/guardrails/pii.py +94 -0
  104. agno/guardrails/prompt_injection.py +52 -0
  105. agno/hooks/__init__.py +3 -0
  106. agno/hooks/decorator.py +164 -0
  107. agno/integrations/discord/__init__.py +3 -0
  108. agno/integrations/discord/client.py +203 -0
  109. agno/knowledge/__init__.py +5 -1
  110. agno/{document → knowledge}/chunking/agentic.py +22 -14
  111. agno/{document → knowledge}/chunking/document.py +2 -2
  112. agno/{document → knowledge}/chunking/fixed.py +7 -6
  113. agno/knowledge/chunking/markdown.py +151 -0
  114. agno/{document → knowledge}/chunking/recursive.py +15 -3
  115. agno/knowledge/chunking/row.py +39 -0
  116. agno/knowledge/chunking/semantic.py +91 -0
  117. agno/knowledge/chunking/strategy.py +165 -0
  118. agno/knowledge/content.py +74 -0
  119. agno/knowledge/document/__init__.py +5 -0
  120. agno/{document → knowledge/document}/base.py +12 -2
  121. agno/knowledge/embedder/__init__.py +5 -0
  122. agno/knowledge/embedder/aws_bedrock.py +343 -0
  123. agno/knowledge/embedder/azure_openai.py +210 -0
  124. agno/{embedder → knowledge/embedder}/base.py +8 -0
  125. agno/knowledge/embedder/cohere.py +323 -0
  126. agno/knowledge/embedder/fastembed.py +62 -0
  127. agno/{embedder → knowledge/embedder}/fireworks.py +1 -1
  128. agno/knowledge/embedder/google.py +258 -0
  129. agno/knowledge/embedder/huggingface.py +94 -0
  130. agno/knowledge/embedder/jina.py +182 -0
  131. agno/knowledge/embedder/langdb.py +22 -0
  132. agno/knowledge/embedder/mistral.py +206 -0
  133. agno/knowledge/embedder/nebius.py +13 -0
  134. agno/knowledge/embedder/ollama.py +154 -0
  135. agno/knowledge/embedder/openai.py +195 -0
  136. agno/knowledge/embedder/sentence_transformer.py +63 -0
  137. agno/{embedder → knowledge/embedder}/together.py +1 -1
  138. agno/knowledge/embedder/vllm.py +262 -0
  139. agno/knowledge/embedder/voyageai.py +165 -0
  140. agno/knowledge/knowledge.py +3006 -0
  141. agno/knowledge/reader/__init__.py +7 -0
  142. agno/knowledge/reader/arxiv_reader.py +81 -0
  143. agno/knowledge/reader/base.py +95 -0
  144. agno/knowledge/reader/csv_reader.py +164 -0
  145. agno/knowledge/reader/docx_reader.py +82 -0
  146. agno/knowledge/reader/field_labeled_csv_reader.py +290 -0
  147. agno/knowledge/reader/firecrawl_reader.py +201 -0
  148. agno/knowledge/reader/json_reader.py +88 -0
  149. agno/knowledge/reader/markdown_reader.py +137 -0
  150. agno/knowledge/reader/pdf_reader.py +431 -0
  151. agno/knowledge/reader/pptx_reader.py +101 -0
  152. agno/knowledge/reader/reader_factory.py +313 -0
  153. agno/knowledge/reader/s3_reader.py +89 -0
  154. agno/knowledge/reader/tavily_reader.py +193 -0
  155. agno/knowledge/reader/text_reader.py +127 -0
  156. agno/knowledge/reader/web_search_reader.py +325 -0
  157. agno/knowledge/reader/website_reader.py +455 -0
  158. agno/knowledge/reader/wikipedia_reader.py +91 -0
  159. agno/knowledge/reader/youtube_reader.py +78 -0
  160. agno/knowledge/remote_content/remote_content.py +88 -0
  161. agno/knowledge/reranker/__init__.py +3 -0
  162. agno/{reranker → knowledge/reranker}/base.py +1 -1
  163. agno/{reranker → knowledge/reranker}/cohere.py +2 -2
  164. agno/knowledge/reranker/infinity.py +195 -0
  165. agno/knowledge/reranker/sentence_transformer.py +54 -0
  166. agno/knowledge/types.py +39 -0
  167. agno/knowledge/utils.py +234 -0
  168. agno/media.py +439 -95
  169. agno/memory/__init__.py +16 -3
  170. agno/memory/manager.py +1474 -123
  171. agno/memory/strategies/__init__.py +15 -0
  172. agno/memory/strategies/base.py +66 -0
  173. agno/memory/strategies/summarize.py +196 -0
  174. agno/memory/strategies/types.py +37 -0
  175. agno/models/aimlapi/__init__.py +5 -0
  176. agno/models/aimlapi/aimlapi.py +62 -0
  177. agno/models/anthropic/__init__.py +4 -0
  178. agno/models/anthropic/claude.py +960 -496
  179. agno/models/aws/__init__.py +15 -0
  180. agno/models/aws/bedrock.py +686 -451
  181. agno/models/aws/claude.py +190 -183
  182. agno/models/azure/__init__.py +18 -1
  183. agno/models/azure/ai_foundry.py +489 -0
  184. agno/models/azure/openai_chat.py +89 -40
  185. agno/models/base.py +2477 -550
  186. agno/models/cerebras/__init__.py +12 -0
  187. agno/models/cerebras/cerebras.py +565 -0
  188. agno/models/cerebras/cerebras_openai.py +131 -0
  189. agno/models/cohere/__init__.py +4 -0
  190. agno/models/cohere/chat.py +306 -492
  191. agno/models/cometapi/__init__.py +5 -0
  192. agno/models/cometapi/cometapi.py +74 -0
  193. agno/models/dashscope/__init__.py +5 -0
  194. agno/models/dashscope/dashscope.py +90 -0
  195. agno/models/deepinfra/__init__.py +5 -0
  196. agno/models/deepinfra/deepinfra.py +45 -0
  197. agno/models/deepseek/__init__.py +4 -0
  198. agno/models/deepseek/deepseek.py +110 -9
  199. agno/models/fireworks/__init__.py +4 -0
  200. agno/models/fireworks/fireworks.py +19 -22
  201. agno/models/google/__init__.py +3 -7
  202. agno/models/google/gemini.py +1717 -662
  203. agno/models/google/utils.py +22 -0
  204. agno/models/groq/__init__.py +4 -0
  205. agno/models/groq/groq.py +391 -666
  206. agno/models/huggingface/__init__.py +4 -0
  207. agno/models/huggingface/huggingface.py +266 -538
  208. agno/models/ibm/__init__.py +5 -0
  209. agno/models/ibm/watsonx.py +432 -0
  210. agno/models/internlm/__init__.py +3 -0
  211. agno/models/internlm/internlm.py +20 -3
  212. agno/models/langdb/__init__.py +1 -0
  213. agno/models/langdb/langdb.py +60 -0
  214. agno/models/litellm/__init__.py +14 -0
  215. agno/models/litellm/chat.py +503 -0
  216. agno/models/litellm/litellm_openai.py +42 -0
  217. agno/models/llama_cpp/__init__.py +5 -0
  218. agno/models/llama_cpp/llama_cpp.py +22 -0
  219. agno/models/lmstudio/__init__.py +5 -0
  220. agno/models/lmstudio/lmstudio.py +25 -0
  221. agno/models/message.py +361 -39
  222. agno/models/meta/__init__.py +12 -0
  223. agno/models/meta/llama.py +502 -0
  224. agno/models/meta/llama_openai.py +79 -0
  225. agno/models/metrics.py +120 -0
  226. agno/models/mistral/__init__.py +4 -0
  227. agno/models/mistral/mistral.py +293 -393
  228. agno/models/nebius/__init__.py +3 -0
  229. agno/models/nebius/nebius.py +53 -0
  230. agno/models/nexus/__init__.py +3 -0
  231. agno/models/nexus/nexus.py +22 -0
  232. agno/models/nvidia/__init__.py +4 -0
  233. agno/models/nvidia/nvidia.py +22 -3
  234. agno/models/ollama/__init__.py +4 -2
  235. agno/models/ollama/chat.py +257 -492
  236. agno/models/openai/__init__.py +7 -0
  237. agno/models/openai/chat.py +725 -770
  238. agno/models/openai/like.py +16 -2
  239. agno/models/openai/responses.py +1121 -0
  240. agno/models/openrouter/__init__.py +4 -0
  241. agno/models/openrouter/openrouter.py +62 -5
  242. agno/models/perplexity/__init__.py +5 -0
  243. agno/models/perplexity/perplexity.py +203 -0
  244. agno/models/portkey/__init__.py +3 -0
  245. agno/models/portkey/portkey.py +82 -0
  246. agno/models/requesty/__init__.py +5 -0
  247. agno/models/requesty/requesty.py +69 -0
  248. agno/models/response.py +177 -7
  249. agno/models/sambanova/__init__.py +4 -0
  250. agno/models/sambanova/sambanova.py +23 -4
  251. agno/models/siliconflow/__init__.py +5 -0
  252. agno/models/siliconflow/siliconflow.py +42 -0
  253. agno/models/together/__init__.py +4 -0
  254. agno/models/together/together.py +21 -164
  255. agno/models/utils.py +266 -0
  256. agno/models/vercel/__init__.py +3 -0
  257. agno/models/vercel/v0.py +43 -0
  258. agno/models/vertexai/__init__.py +0 -1
  259. agno/models/vertexai/claude.py +190 -0
  260. agno/models/vllm/__init__.py +3 -0
  261. agno/models/vllm/vllm.py +83 -0
  262. agno/models/xai/__init__.py +2 -0
  263. agno/models/xai/xai.py +111 -7
  264. agno/os/__init__.py +3 -0
  265. agno/os/app.py +1027 -0
  266. agno/os/auth.py +244 -0
  267. agno/os/config.py +126 -0
  268. agno/os/interfaces/__init__.py +1 -0
  269. agno/os/interfaces/a2a/__init__.py +3 -0
  270. agno/os/interfaces/a2a/a2a.py +42 -0
  271. agno/os/interfaces/a2a/router.py +249 -0
  272. agno/os/interfaces/a2a/utils.py +924 -0
  273. agno/os/interfaces/agui/__init__.py +3 -0
  274. agno/os/interfaces/agui/agui.py +47 -0
  275. agno/os/interfaces/agui/router.py +147 -0
  276. agno/os/interfaces/agui/utils.py +574 -0
  277. agno/os/interfaces/base.py +25 -0
  278. agno/os/interfaces/slack/__init__.py +3 -0
  279. agno/os/interfaces/slack/router.py +148 -0
  280. agno/os/interfaces/slack/security.py +30 -0
  281. agno/os/interfaces/slack/slack.py +47 -0
  282. agno/os/interfaces/whatsapp/__init__.py +3 -0
  283. agno/os/interfaces/whatsapp/router.py +210 -0
  284. agno/os/interfaces/whatsapp/security.py +55 -0
  285. agno/os/interfaces/whatsapp/whatsapp.py +36 -0
  286. agno/os/mcp.py +293 -0
  287. agno/os/middleware/__init__.py +9 -0
  288. agno/os/middleware/jwt.py +797 -0
  289. agno/os/router.py +258 -0
  290. agno/os/routers/__init__.py +3 -0
  291. agno/os/routers/agents/__init__.py +3 -0
  292. agno/os/routers/agents/router.py +599 -0
  293. agno/os/routers/agents/schema.py +261 -0
  294. agno/os/routers/evals/__init__.py +3 -0
  295. agno/os/routers/evals/evals.py +450 -0
  296. agno/os/routers/evals/schemas.py +174 -0
  297. agno/os/routers/evals/utils.py +231 -0
  298. agno/os/routers/health.py +31 -0
  299. agno/os/routers/home.py +52 -0
  300. agno/os/routers/knowledge/__init__.py +3 -0
  301. agno/os/routers/knowledge/knowledge.py +1008 -0
  302. agno/os/routers/knowledge/schemas.py +178 -0
  303. agno/os/routers/memory/__init__.py +3 -0
  304. agno/os/routers/memory/memory.py +661 -0
  305. agno/os/routers/memory/schemas.py +88 -0
  306. agno/os/routers/metrics/__init__.py +3 -0
  307. agno/os/routers/metrics/metrics.py +190 -0
  308. agno/os/routers/metrics/schemas.py +47 -0
  309. agno/os/routers/session/__init__.py +3 -0
  310. agno/os/routers/session/session.py +997 -0
  311. agno/os/routers/teams/__init__.py +3 -0
  312. agno/os/routers/teams/router.py +512 -0
  313. agno/os/routers/teams/schema.py +257 -0
  314. agno/os/routers/traces/__init__.py +3 -0
  315. agno/os/routers/traces/schemas.py +414 -0
  316. agno/os/routers/traces/traces.py +499 -0
  317. agno/os/routers/workflows/__init__.py +3 -0
  318. agno/os/routers/workflows/router.py +624 -0
  319. agno/os/routers/workflows/schema.py +75 -0
  320. agno/os/schema.py +534 -0
  321. agno/os/scopes.py +469 -0
  322. agno/{playground → os}/settings.py +7 -15
  323. agno/os/utils.py +973 -0
  324. agno/reasoning/anthropic.py +80 -0
  325. agno/reasoning/azure_ai_foundry.py +67 -0
  326. agno/reasoning/deepseek.py +63 -0
  327. agno/reasoning/default.py +97 -0
  328. agno/reasoning/gemini.py +73 -0
  329. agno/reasoning/groq.py +71 -0
  330. agno/reasoning/helpers.py +24 -1
  331. agno/reasoning/ollama.py +67 -0
  332. agno/reasoning/openai.py +86 -0
  333. agno/reasoning/step.py +2 -1
  334. agno/reasoning/vertexai.py +76 -0
  335. agno/run/__init__.py +6 -0
  336. agno/run/agent.py +822 -0
  337. agno/run/base.py +247 -0
  338. agno/run/cancel.py +81 -0
  339. agno/run/requirement.py +181 -0
  340. agno/run/team.py +767 -0
  341. agno/run/workflow.py +708 -0
  342. agno/session/__init__.py +10 -0
  343. agno/session/agent.py +260 -0
  344. agno/session/summary.py +265 -0
  345. agno/session/team.py +342 -0
  346. agno/session/workflow.py +501 -0
  347. agno/table.py +10 -0
  348. agno/team/__init__.py +37 -0
  349. agno/team/team.py +9536 -0
  350. agno/tools/__init__.py +7 -0
  351. agno/tools/agentql.py +120 -0
  352. agno/tools/airflow.py +22 -12
  353. agno/tools/api.py +122 -0
  354. agno/tools/apify.py +276 -83
  355. agno/tools/{arxiv_toolkit.py → arxiv.py} +20 -12
  356. agno/tools/aws_lambda.py +28 -7
  357. agno/tools/aws_ses.py +66 -0
  358. agno/tools/baidusearch.py +11 -4
  359. agno/tools/bitbucket.py +292 -0
  360. agno/tools/brandfetch.py +213 -0
  361. agno/tools/bravesearch.py +106 -0
  362. agno/tools/brightdata.py +367 -0
  363. agno/tools/browserbase.py +209 -0
  364. agno/tools/calcom.py +32 -23
  365. agno/tools/calculator.py +24 -37
  366. agno/tools/cartesia.py +187 -0
  367. agno/tools/{clickup_tool.py → clickup.py} +17 -28
  368. agno/tools/confluence.py +91 -26
  369. agno/tools/crawl4ai.py +139 -43
  370. agno/tools/csv_toolkit.py +28 -22
  371. agno/tools/dalle.py +36 -22
  372. agno/tools/daytona.py +475 -0
  373. agno/tools/decorator.py +169 -14
  374. agno/tools/desi_vocal.py +23 -11
  375. agno/tools/discord.py +32 -29
  376. agno/tools/docker.py +716 -0
  377. agno/tools/duckdb.py +76 -81
  378. agno/tools/duckduckgo.py +43 -40
  379. agno/tools/e2b.py +703 -0
  380. agno/tools/eleven_labs.py +65 -54
  381. agno/tools/email.py +13 -5
  382. agno/tools/evm.py +129 -0
  383. agno/tools/exa.py +324 -42
  384. agno/tools/fal.py +39 -35
  385. agno/tools/file.py +196 -30
  386. agno/tools/file_generation.py +356 -0
  387. agno/tools/financial_datasets.py +288 -0
  388. agno/tools/firecrawl.py +108 -33
  389. agno/tools/function.py +960 -122
  390. agno/tools/giphy.py +34 -12
  391. agno/tools/github.py +1294 -97
  392. agno/tools/gmail.py +922 -0
  393. agno/tools/google_bigquery.py +117 -0
  394. agno/tools/google_drive.py +271 -0
  395. agno/tools/google_maps.py +253 -0
  396. agno/tools/googlecalendar.py +607 -107
  397. agno/tools/googlesheets.py +377 -0
  398. agno/tools/hackernews.py +20 -12
  399. agno/tools/jina.py +24 -14
  400. agno/tools/jira.py +48 -19
  401. agno/tools/knowledge.py +218 -0
  402. agno/tools/linear.py +82 -43
  403. agno/tools/linkup.py +58 -0
  404. agno/tools/local_file_system.py +15 -7
  405. agno/tools/lumalab.py +41 -26
  406. agno/tools/mcp/__init__.py +10 -0
  407. agno/tools/mcp/mcp.py +331 -0
  408. agno/tools/mcp/multi_mcp.py +347 -0
  409. agno/tools/mcp/params.py +24 -0
  410. agno/tools/mcp_toolbox.py +284 -0
  411. agno/tools/mem0.py +193 -0
  412. agno/tools/memory.py +419 -0
  413. agno/tools/mlx_transcribe.py +11 -9
  414. agno/tools/models/azure_openai.py +190 -0
  415. agno/tools/models/gemini.py +203 -0
  416. agno/tools/models/groq.py +158 -0
  417. agno/tools/models/morph.py +186 -0
  418. agno/tools/models/nebius.py +124 -0
  419. agno/tools/models_labs.py +163 -82
  420. agno/tools/moviepy_video.py +18 -13
  421. agno/tools/nano_banana.py +151 -0
  422. agno/tools/neo4j.py +134 -0
  423. agno/tools/newspaper.py +15 -4
  424. agno/tools/newspaper4k.py +19 -6
  425. agno/tools/notion.py +204 -0
  426. agno/tools/openai.py +181 -17
  427. agno/tools/openbb.py +27 -20
  428. agno/tools/opencv.py +321 -0
  429. agno/tools/openweather.py +233 -0
  430. agno/tools/oxylabs.py +385 -0
  431. agno/tools/pandas.py +25 -15
  432. agno/tools/parallel.py +314 -0
  433. agno/tools/postgres.py +238 -185
  434. agno/tools/pubmed.py +125 -13
  435. agno/tools/python.py +48 -35
  436. agno/tools/reasoning.py +283 -0
  437. agno/tools/reddit.py +207 -29
  438. agno/tools/redshift.py +406 -0
  439. agno/tools/replicate.py +69 -26
  440. agno/tools/resend.py +11 -6
  441. agno/tools/scrapegraph.py +179 -19
  442. agno/tools/searxng.py +23 -31
  443. agno/tools/serpapi.py +15 -10
  444. agno/tools/serper.py +255 -0
  445. agno/tools/shell.py +23 -12
  446. agno/tools/shopify.py +1519 -0
  447. agno/tools/slack.py +56 -14
  448. agno/tools/sleep.py +8 -6
  449. agno/tools/spider.py +35 -11
  450. agno/tools/spotify.py +919 -0
  451. agno/tools/sql.py +34 -19
  452. agno/tools/tavily.py +158 -8
  453. agno/tools/telegram.py +18 -8
  454. agno/tools/todoist.py +218 -0
  455. agno/tools/toolkit.py +134 -9
  456. agno/tools/trafilatura.py +388 -0
  457. agno/tools/trello.py +25 -28
  458. agno/tools/twilio.py +18 -9
  459. agno/tools/user_control_flow.py +78 -0
  460. agno/tools/valyu.py +228 -0
  461. agno/tools/visualization.py +467 -0
  462. agno/tools/webbrowser.py +28 -0
  463. agno/tools/webex.py +76 -0
  464. agno/tools/website.py +23 -19
  465. agno/tools/webtools.py +45 -0
  466. agno/tools/whatsapp.py +286 -0
  467. agno/tools/wikipedia.py +28 -19
  468. agno/tools/workflow.py +285 -0
  469. agno/tools/{twitter.py → x.py} +142 -46
  470. agno/tools/yfinance.py +41 -39
  471. agno/tools/youtube.py +34 -17
  472. agno/tools/zendesk.py +15 -5
  473. agno/tools/zep.py +454 -0
  474. agno/tools/zoom.py +86 -37
  475. agno/tracing/__init__.py +12 -0
  476. agno/tracing/exporter.py +157 -0
  477. agno/tracing/schemas.py +276 -0
  478. agno/tracing/setup.py +111 -0
  479. agno/utils/agent.py +938 -0
  480. agno/utils/audio.py +37 -1
  481. agno/utils/certs.py +27 -0
  482. agno/utils/code_execution.py +11 -0
  483. agno/utils/common.py +103 -20
  484. agno/utils/cryptography.py +22 -0
  485. agno/utils/dttm.py +33 -0
  486. agno/utils/events.py +700 -0
  487. agno/utils/functions.py +107 -37
  488. agno/utils/gemini.py +426 -0
  489. agno/utils/hooks.py +171 -0
  490. agno/utils/http.py +185 -0
  491. agno/utils/json_schema.py +159 -37
  492. agno/utils/knowledge.py +36 -0
  493. agno/utils/location.py +19 -0
  494. agno/utils/log.py +221 -8
  495. agno/utils/mcp.py +214 -0
  496. agno/utils/media.py +335 -14
  497. agno/utils/merge_dict.py +22 -1
  498. agno/utils/message.py +77 -2
  499. agno/utils/models/ai_foundry.py +50 -0
  500. agno/utils/models/claude.py +373 -0
  501. agno/utils/models/cohere.py +94 -0
  502. agno/utils/models/llama.py +85 -0
  503. agno/utils/models/mistral.py +100 -0
  504. agno/utils/models/openai_responses.py +140 -0
  505. agno/utils/models/schema_utils.py +153 -0
  506. agno/utils/models/watsonx.py +41 -0
  507. agno/utils/openai.py +257 -0
  508. agno/utils/pickle.py +1 -1
  509. agno/utils/pprint.py +124 -8
  510. agno/utils/print_response/agent.py +930 -0
  511. agno/utils/print_response/team.py +1914 -0
  512. agno/utils/print_response/workflow.py +1668 -0
  513. agno/utils/prompts.py +111 -0
  514. agno/utils/reasoning.py +108 -0
  515. agno/utils/response.py +163 -0
  516. agno/utils/serialize.py +32 -0
  517. agno/utils/shell.py +4 -4
  518. agno/utils/streamlit.py +487 -0
  519. agno/utils/string.py +204 -51
  520. agno/utils/team.py +139 -0
  521. agno/utils/timer.py +9 -2
  522. agno/utils/tokens.py +657 -0
  523. agno/utils/tools.py +19 -1
  524. agno/utils/whatsapp.py +305 -0
  525. agno/utils/yaml_io.py +3 -3
  526. agno/vectordb/__init__.py +2 -0
  527. agno/vectordb/base.py +87 -9
  528. agno/vectordb/cassandra/__init__.py +5 -1
  529. agno/vectordb/cassandra/cassandra.py +383 -27
  530. agno/vectordb/chroma/__init__.py +4 -0
  531. agno/vectordb/chroma/chromadb.py +748 -83
  532. agno/vectordb/clickhouse/__init__.py +7 -1
  533. agno/vectordb/clickhouse/clickhousedb.py +554 -53
  534. agno/vectordb/couchbase/__init__.py +3 -0
  535. agno/vectordb/couchbase/couchbase.py +1446 -0
  536. agno/vectordb/lancedb/__init__.py +5 -0
  537. agno/vectordb/lancedb/lance_db.py +730 -98
  538. agno/vectordb/langchaindb/__init__.py +5 -0
  539. agno/vectordb/langchaindb/langchaindb.py +163 -0
  540. agno/vectordb/lightrag/__init__.py +5 -0
  541. agno/vectordb/lightrag/lightrag.py +388 -0
  542. agno/vectordb/llamaindex/__init__.py +3 -0
  543. agno/vectordb/llamaindex/llamaindexdb.py +166 -0
  544. agno/vectordb/milvus/__init__.py +3 -0
  545. agno/vectordb/milvus/milvus.py +966 -78
  546. agno/vectordb/mongodb/__init__.py +9 -1
  547. agno/vectordb/mongodb/mongodb.py +1175 -172
  548. agno/vectordb/pgvector/__init__.py +8 -0
  549. agno/vectordb/pgvector/pgvector.py +599 -115
  550. agno/vectordb/pineconedb/__init__.py +5 -1
  551. agno/vectordb/pineconedb/pineconedb.py +406 -43
  552. agno/vectordb/qdrant/__init__.py +4 -0
  553. agno/vectordb/qdrant/qdrant.py +914 -61
  554. agno/vectordb/redis/__init__.py +9 -0
  555. agno/vectordb/redis/redisdb.py +682 -0
  556. agno/vectordb/singlestore/__init__.py +8 -1
  557. agno/vectordb/singlestore/singlestore.py +771 -0
  558. agno/vectordb/surrealdb/__init__.py +3 -0
  559. agno/vectordb/surrealdb/surrealdb.py +663 -0
  560. agno/vectordb/upstashdb/__init__.py +5 -0
  561. agno/vectordb/upstashdb/upstashdb.py +718 -0
  562. agno/vectordb/weaviate/__init__.py +8 -0
  563. agno/vectordb/weaviate/index.py +15 -0
  564. agno/vectordb/weaviate/weaviate.py +1009 -0
  565. agno/workflow/__init__.py +23 -1
  566. agno/workflow/agent.py +299 -0
  567. agno/workflow/condition.py +759 -0
  568. agno/workflow/loop.py +756 -0
  569. agno/workflow/parallel.py +853 -0
  570. agno/workflow/router.py +723 -0
  571. agno/workflow/step.py +1564 -0
  572. agno/workflow/steps.py +613 -0
  573. agno/workflow/types.py +556 -0
  574. agno/workflow/workflow.py +4327 -514
  575. agno-2.3.13.dist-info/METADATA +639 -0
  576. agno-2.3.13.dist-info/RECORD +613 -0
  577. {agno-0.1.2.dist-info → agno-2.3.13.dist-info}/WHEEL +1 -1
  578. agno-2.3.13.dist-info/licenses/LICENSE +201 -0
  579. agno/api/playground.py +0 -91
  580. agno/api/schemas/playground.py +0 -22
  581. agno/api/schemas/user.py +0 -22
  582. agno/api/schemas/workspace.py +0 -46
  583. agno/api/user.py +0 -160
  584. agno/api/workspace.py +0 -151
  585. agno/cli/auth_server.py +0 -118
  586. agno/cli/config.py +0 -275
  587. agno/cli/console.py +0 -88
  588. agno/cli/credentials.py +0 -23
  589. agno/cli/entrypoint.py +0 -571
  590. agno/cli/operator.py +0 -355
  591. agno/cli/settings.py +0 -85
  592. agno/cli/ws/ws_cli.py +0 -817
  593. agno/constants.py +0 -13
  594. agno/document/__init__.py +0 -1
  595. agno/document/chunking/semantic.py +0 -47
  596. agno/document/chunking/strategy.py +0 -31
  597. agno/document/reader/__init__.py +0 -1
  598. agno/document/reader/arxiv_reader.py +0 -41
  599. agno/document/reader/base.py +0 -22
  600. agno/document/reader/csv_reader.py +0 -84
  601. agno/document/reader/docx_reader.py +0 -46
  602. agno/document/reader/firecrawl_reader.py +0 -99
  603. agno/document/reader/json_reader.py +0 -43
  604. agno/document/reader/pdf_reader.py +0 -219
  605. agno/document/reader/s3/pdf_reader.py +0 -46
  606. agno/document/reader/s3/text_reader.py +0 -51
  607. agno/document/reader/text_reader.py +0 -41
  608. agno/document/reader/website_reader.py +0 -175
  609. agno/document/reader/youtube_reader.py +0 -50
  610. agno/embedder/__init__.py +0 -1
  611. agno/embedder/azure_openai.py +0 -86
  612. agno/embedder/cohere.py +0 -72
  613. agno/embedder/fastembed.py +0 -37
  614. agno/embedder/google.py +0 -73
  615. agno/embedder/huggingface.py +0 -54
  616. agno/embedder/mistral.py +0 -80
  617. agno/embedder/ollama.py +0 -57
  618. agno/embedder/openai.py +0 -74
  619. agno/embedder/sentence_transformer.py +0 -38
  620. agno/embedder/voyageai.py +0 -64
  621. agno/eval/perf.py +0 -201
  622. agno/file/__init__.py +0 -1
  623. agno/file/file.py +0 -16
  624. agno/file/local/csv.py +0 -32
  625. agno/file/local/txt.py +0 -19
  626. agno/infra/app.py +0 -240
  627. agno/infra/base.py +0 -144
  628. agno/infra/context.py +0 -20
  629. agno/infra/db_app.py +0 -52
  630. agno/infra/resource.py +0 -205
  631. agno/infra/resources.py +0 -55
  632. agno/knowledge/agent.py +0 -230
  633. agno/knowledge/arxiv.py +0 -22
  634. agno/knowledge/combined.py +0 -22
  635. agno/knowledge/csv.py +0 -28
  636. agno/knowledge/csv_url.py +0 -19
  637. agno/knowledge/document.py +0 -20
  638. agno/knowledge/docx.py +0 -30
  639. agno/knowledge/json.py +0 -28
  640. agno/knowledge/langchain.py +0 -71
  641. agno/knowledge/llamaindex.py +0 -66
  642. agno/knowledge/pdf.py +0 -28
  643. agno/knowledge/pdf_url.py +0 -26
  644. agno/knowledge/s3/base.py +0 -60
  645. agno/knowledge/s3/pdf.py +0 -21
  646. agno/knowledge/s3/text.py +0 -23
  647. agno/knowledge/text.py +0 -30
  648. agno/knowledge/website.py +0 -88
  649. agno/knowledge/wikipedia.py +0 -31
  650. agno/knowledge/youtube.py +0 -22
  651. agno/memory/agent.py +0 -392
  652. agno/memory/classifier.py +0 -104
  653. agno/memory/db/__init__.py +0 -1
  654. agno/memory/db/base.py +0 -42
  655. agno/memory/db/mongodb.py +0 -189
  656. agno/memory/db/postgres.py +0 -203
  657. agno/memory/db/sqlite.py +0 -193
  658. agno/memory/memory.py +0 -15
  659. agno/memory/row.py +0 -36
  660. agno/memory/summarizer.py +0 -192
  661. agno/memory/summary.py +0 -19
  662. agno/memory/workflow.py +0 -38
  663. agno/models/google/gemini_openai.py +0 -26
  664. agno/models/ollama/hermes.py +0 -221
  665. agno/models/ollama/tools.py +0 -362
  666. agno/models/vertexai/gemini.py +0 -595
  667. agno/playground/__init__.py +0 -3
  668. agno/playground/async_router.py +0 -421
  669. agno/playground/deploy.py +0 -249
  670. agno/playground/operator.py +0 -92
  671. agno/playground/playground.py +0 -91
  672. agno/playground/schemas.py +0 -76
  673. agno/playground/serve.py +0 -55
  674. agno/playground/sync_router.py +0 -405
  675. agno/reasoning/agent.py +0 -68
  676. agno/run/response.py +0 -112
  677. agno/storage/agent/__init__.py +0 -0
  678. agno/storage/agent/base.py +0 -38
  679. agno/storage/agent/dynamodb.py +0 -350
  680. agno/storage/agent/json.py +0 -92
  681. agno/storage/agent/mongodb.py +0 -228
  682. agno/storage/agent/postgres.py +0 -367
  683. agno/storage/agent/session.py +0 -79
  684. agno/storage/agent/singlestore.py +0 -303
  685. agno/storage/agent/sqlite.py +0 -357
  686. agno/storage/agent/yaml.py +0 -93
  687. agno/storage/workflow/__init__.py +0 -0
  688. agno/storage/workflow/base.py +0 -40
  689. agno/storage/workflow/mongodb.py +0 -233
  690. agno/storage/workflow/postgres.py +0 -366
  691. agno/storage/workflow/session.py +0 -60
  692. agno/storage/workflow/sqlite.py +0 -359
  693. agno/tools/googlesearch.py +0 -88
  694. agno/utils/defaults.py +0 -57
  695. agno/utils/filesystem.py +0 -39
  696. agno/utils/git.py +0 -52
  697. agno/utils/json_io.py +0 -30
  698. agno/utils/load_env.py +0 -19
  699. agno/utils/py_io.py +0 -19
  700. agno/utils/pyproject.py +0 -18
  701. agno/utils/resource_filter.py +0 -31
  702. agno/vectordb/singlestore/s2vectordb.py +0 -390
  703. agno/vectordb/singlestore/s2vectordb2.py +0 -355
  704. agno/workspace/__init__.py +0 -0
  705. agno/workspace/config.py +0 -325
  706. agno/workspace/enums.py +0 -6
  707. agno/workspace/helpers.py +0 -48
  708. agno/workspace/operator.py +0 -758
  709. agno/workspace/settings.py +0 -63
  710. agno-0.1.2.dist-info/LICENSE +0 -375
  711. agno-0.1.2.dist-info/METADATA +0 -502
  712. agno-0.1.2.dist-info/RECORD +0 -352
  713. agno-0.1.2.dist-info/entry_points.txt +0 -3
  714. /agno/{cli → db/migrations}/__init__.py +0 -0
  715. /agno/{cli/ws → db/migrations/versions}/__init__.py +0 -0
  716. /agno/{document/chunking/__init__.py → db/schemas/metrics.py} +0 -0
  717. /agno/{document/reader/s3 → integrations}/__init__.py +0 -0
  718. /agno/{file/local → knowledge/chunking}/__init__.py +0 -0
  719. /agno/{infra → knowledge/remote_content}/__init__.py +0 -0
  720. /agno/{knowledge/s3 → tools/models}/__init__.py +0 -0
  721. /agno/{reranker → utils/models}/__init__.py +0 -0
  722. /agno/{storage → utils/print_response}/__init__.py +0 -0
  723. {agno-0.1.2.dist-info → agno-2.3.13.dist-info}/top_level.txt +0 -0
agno/eval/accuracy.py CHANGED
@@ -1,14 +1,19 @@
1
1
  from dataclasses import asdict, dataclass, field
2
2
  from os import getenv
3
- from pathlib import Path
4
- from typing import TYPE_CHECKING, Callable, List, Optional, Union
3
+ from textwrap import dedent
4
+ from typing import TYPE_CHECKING, Any, Callable, Dict, List, Optional, Union
5
5
  from uuid import uuid4
6
6
 
7
7
  from pydantic import BaseModel, Field
8
8
 
9
- from agno.agent import Agent, RunResponse
9
+ from agno.agent import Agent
10
+ from agno.db.base import AsyncBaseDb, BaseDb
11
+ from agno.db.schemas.evals import EvalType
12
+ from agno.eval.utils import async_log_eval, log_eval_run, store_result_in_file
13
+ from agno.exceptions import EvalError
10
14
  from agno.models.base import Model
11
- from agno.utils.log import logger, set_log_level_to_debug, set_log_level_to_info
15
+ from agno.team.team import Team
16
+ from agno.utils.log import log_error, logger, set_log_level_to_debug, set_log_level_to_info
12
17
 
13
18
  if TYPE_CHECKING:
14
19
  from rich.console import Console
@@ -21,9 +26,9 @@ class AccuracyAgentResponse(BaseModel):
21
26
 
22
27
  @dataclass
23
28
  class AccuracyEvaluation:
24
- question: str
25
- answer: str
26
- expected_answer: str
29
+ input: str
30
+ output: str
31
+ expected_output: str
27
32
  score: int
28
33
  reason: str
29
34
 
@@ -44,9 +49,9 @@ class AccuracyEvaluation:
44
49
  title_style="bold sky_blue1",
45
50
  title_justify="center",
46
51
  )
47
- results_table.add_row("Question", self.question)
48
- results_table.add_row("Answer", self.answer)
49
- results_table.add_row("Expected Answer", self.expected_answer)
52
+ results_table.add_row("Input", self.input)
53
+ results_table.add_row("Output", self.output)
54
+ results_table.add_row("Expected Output", self.expected_output)
50
55
  results_table.add_row("Accuracy Score", f"{str(self.score)}/10")
51
56
  results_table.add_row("Accuracy Reason", Markdown(self.reason))
52
57
  console.print(results_table)
@@ -92,11 +97,18 @@ class AccuracyResult:
92
97
  title_justify="center",
93
98
  )
94
99
  summary_table.add_row("Number of Runs", f"{len(self.results)}")
95
- summary_table.add_row("Average Score", f"{self.avg_score:.2f}")
96
- summary_table.add_row("Mean Score", f"{self.mean_score:.2f}")
97
- summary_table.add_row("Minimum Score", f"{self.min_score:.2f}")
98
- summary_table.add_row("Maximum Score", f"{self.max_score:.2f}")
99
- summary_table.add_row("Standard Deviation", f"{self.std_dev_score:.2f}")
100
+
101
+ if self.avg_score is not None:
102
+ summary_table.add_row("Average Score", f"{self.avg_score:.2f}")
103
+ if self.mean_score is not None:
104
+ summary_table.add_row("Mean Score", f"{self.mean_score:.2f}")
105
+ if self.min_score is not None:
106
+ summary_table.add_row("Minimum Score", f"{self.min_score:.2f}")
107
+ if self.max_score is not None:
108
+ summary_table.add_row("Maximum Score", f"{self.max_score:.2f}")
109
+ if self.std_dev_score is not None:
110
+ summary_table.add_row("Standard Deviation", f"{self.std_dev_score:.2f}")
111
+
100
112
  console.print(summary_table)
101
113
 
102
114
  def print_results(self, console: Optional["Console"] = None):
@@ -116,9 +128,9 @@ class AccuracyResult:
116
128
  title_justify="center",
117
129
  )
118
130
  for result in self.results:
119
- results_table.add_row("Question", result.question)
120
- results_table.add_row("Answer", result.answer)
121
- results_table.add_row("Expected Answer", result.expected_answer)
131
+ results_table.add_row("Input", result.input)
132
+ results_table.add_row("Output", result.output)
133
+ results_table.add_row("Expected Output", result.expected_output)
122
134
  results_table.add_row("Accuracy Score", f"{str(result.score)}/10")
123
135
  results_table.add_row("Accuracy Reason", result.reason)
124
136
  console.print(results_table)
@@ -126,61 +138,53 @@ class AccuracyResult:
126
138
 
127
139
  @dataclass
128
140
  class AccuracyEval:
129
- """Evaluate the accuracy of an agent's answer."""
141
+ """Interface to evaluate the accuracy of an Agent or Team, given a prompt and expected answer"""
142
+
143
+ # Input to evaluate
144
+ input: Union[str, Callable]
145
+ # Expected answer to the input
146
+ expected_output: Union[str, Callable]
147
+ # Agent to evaluate
148
+ agent: Optional[Agent] = None
149
+ # Team to evaluate
150
+ team: Optional[Team] = None
130
151
 
131
152
  # Evaluation name
132
153
  name: Optional[str] = None
133
- # Evaluation UUID (autogenerated if not set)
134
- eval_id: Optional[str] = None
154
+ # Evaluation UUID
155
+ eval_id: str = field(default_factory=lambda: str(uuid4()))
156
+ # Number of iterations to run
157
+ num_iterations: int = 1
158
+ # Result of the evaluation
159
+ result: Optional[AccuracyResult] = None
135
160
 
136
- # Model used to evaluate the answer
161
+ # Model for the evaluator agent
137
162
  model: Optional[Model] = None
138
-
139
- # Evaluate an Agent
140
- agent: Optional[Agent] = None
141
- # Question to evaluate (can also be provided with the run method)
142
- question: Optional[Union[str, Callable]] = None
143
- # Answer to evaluate (can also be provided with the run method)
144
- answer: Optional[Union[str, Callable]] = None
145
- # Expected Answer for the question (can also be provided with the run method)
146
- expected_answer: Optional[Union[str, Callable]] = None
147
-
163
+ # Agent used to evaluate the answer
148
164
  evaluator_agent: Optional[Agent] = None
149
165
  # Guidelines for the evaluator agent
150
- evaluator_guidelines: Optional[List[str]] = None
166
+ additional_guidelines: Optional[Union[str, List[str]]] = None
151
167
  # Additional context to the evaluator agent
152
- evaluator_context: Optional[str] = None
153
-
154
- # Number of iterations to run
155
- num_iterations: int = 3
156
- # Result of the evaluation
157
- result: Optional[AccuracyResult] = None
168
+ additional_context: Optional[str] = None
158
169
 
159
170
  # Print summary of results
160
171
  print_summary: bool = False
161
172
  # Print detailed results
162
173
  print_results: bool = False
163
- # Save the result to a file
164
- save_result_to_file: Optional[str] = None
165
-
166
- # debug_mode=True enables debug logs
167
- debug_mode: bool = False
168
-
169
- def set_eval_id(self) -> str:
170
- if self.eval_id is None:
171
- self.eval_id = str(uuid4())
172
- logger.debug(f"*********** Evaluation ID: {self.eval_id} ***********")
173
- return self.eval_id
174
-
175
- def set_debug_mode(self) -> None:
176
- if self.debug_mode or getenv("AGNO_DEBUG", "false").lower() == "true":
177
- self.debug_mode = True
178
- set_log_level_to_debug()
179
- logger.debug("Debug logs enabled")
180
- else:
181
- set_log_level_to_info()
182
-
183
- def get_evaluator_agent(self, question: str, expected_answer: str) -> Agent:
174
+ # If set, results will be saved in the given file path
175
+ file_path_to_save_results: Optional[str] = None
176
+ # Enable debug logs
177
+ debug_mode: bool = getenv("AGNO_DEBUG", "false").lower() == "true"
178
+ # The database to store Evaluation results
179
+ db: Optional[Union[BaseDb, AsyncBaseDb]] = None
180
+
181
+ # Telemetry settings
182
+ # telemetry=True logs minimal telemetry for analytics
183
+ # This helps us improve our Evals and provide better support
184
+ telemetry: bool = True
185
+
186
+ def get_evaluator_agent(self) -> Agent:
187
+ """Return the evaluator agent. If not provided, build it based on the evaluator fields and default instructions."""
184
188
  if self.evaluator_agent is not None:
185
189
  return self.evaluator_agent
186
190
 
@@ -189,269 +193,655 @@ class AccuracyEval:
189
193
  try:
190
194
  from agno.models.openai import OpenAIChat
191
195
 
192
- model = OpenAIChat(id="gpt-4o-mini")
196
+ model = OpenAIChat(id="o4-mini")
193
197
  except (ModuleNotFoundError, ImportError) as e:
194
198
  logger.exception(e)
195
- logger.error(
199
+ raise EvalError(
196
200
  "Agno uses `openai` as the default model provider. Please run `pip install openai` to use the default evaluator."
197
201
  )
198
- exit(1)
199
202
 
200
- evaluator_guidelines = ""
201
- if self.evaluator_guidelines is not None and len(self.evaluator_guidelines) > 0:
202
- evaluator_guidelines = "\n## Guidelines for the Agent's answer:\n"
203
- evaluator_guidelines += "\n- ".join(self.evaluator_guidelines)
204
- evaluator_guidelines += "\n"
205
-
206
- evaluator_context = ""
207
- if self.evaluator_context is not None and len(self.evaluator_context) > 0:
208
- evaluator_context = "## Additional Context:\n"
209
- evaluator_context += self.evaluator_context
210
- evaluator_context += "\n"
203
+ additional_guidelines = ""
204
+ if self.additional_guidelines is not None:
205
+ additional_guidelines = "\n## Additional Guidelines\n"
206
+ if isinstance(self.additional_guidelines, str):
207
+ additional_guidelines += self.additional_guidelines
208
+ else:
209
+ additional_guidelines += "\n- ".join(self.additional_guidelines)
210
+ additional_guidelines += "\n"
211
+
212
+ additional_context = ""
213
+ if self.additional_context is not None and len(self.additional_context) > 0:
214
+ additional_context = "\n## Additional Context\n"
215
+ additional_context += self.additional_context
216
+ additional_context += "\n"
211
217
 
212
218
  return Agent(
213
- model=OpenAIChat(id="gpt-4o-mini"),
219
+ model=model,
214
220
  description=f"""\
215
- You are an Agent Evaluator tasked with assessing the accuracy of an AI Agent's answer compared to an expected answer for a given question.
216
- Your task is to provide a detailed analysis and assign a score on a scale of 1 to 10, where 10 indicates a perfect match to the expected answer.
217
-
218
- ## Question:
219
- {question}
220
-
221
- ## Expected Answer:
222
- {expected_answer}
223
-
224
- ## Evaluation Criteria:
225
- 1. Accuracy of information
226
- 2. Completeness of the answer
227
- 3. Relevance to the question
228
- 4. Use of key concepts and ideas
229
- 5. Overall structure and clarity of presentation
230
- {evaluator_guidelines}{evaluator_context}
231
- ## Instructions:
232
- 1. Carefully compare the AI Agent's answer to the expected answer.
233
- 2. Provide a detailed analysis, highlighting:
234
- - Specific similarities and differences
235
- - Key points included or missed
236
- - Any inaccuracies or misconceptions
237
- 3. Explicitly reference the evaluation criteria and any provided guidelines in your reasoning.
238
- 4. Assign a score from 1 to 10 (use only whole numbers) based on the following scale:
239
- 1-2: Completely incorrect or irrelevant
240
- 3-4: Major inaccuracies or missing crucial information
241
- 5-6: Partially correct, but with significant omissions or errors
221
+ You are an expert judge tasked with comparing the quality of an AI Agents output to a user-provided expected output. You must assume the expected_output is correct - even if you personally disagree.
222
+
223
+ ## Evaluation Inputs
224
+ - agent_input: The original task or query given to the Agent.
225
+ - expected_output: The correct response to the task (provided by the user).
226
+ - NOTE: You must assume the expected_output is correct - even if you personally disagree.
227
+ - agent_output: The response generated by the Agent.
228
+
229
+ ## Evaluation Criteria
230
+ - Accuracy: How closely does the agent_output match the expected_output?
231
+ - Completeness: Does the agent_output include all the key elements of the expected_output?
232
+
233
+ ## Instructions
234
+ 1. Compare the agent_output only to the expected_output, not what you think the expected_output should be.
235
+ 2. Do not judge the correctness of the expected_output itself. Your role is only to compare the two outputs, the user provided expected_output is correct.
236
+ 3. Follow the additional guidelines if provided.
237
+ 4. Provide a detailed analysis including:
238
+ - Specific similarities and differences
239
+ - Important points included or omitted
240
+ - Any inaccuracies, paraphrasing errors, or structural differences
241
+ 5. Reference the criteria explicitly in your reasoning.
242
+ 6. Assign a score from 1 to 10 (whole numbers only):
243
+ 1-2: Completely incorrect or irrelevant.
244
+ 3-4: Major inaccuracies or missing key information.
245
+ 5-6: Partially correct, but with significant issues.
242
246
  7-8: Mostly accurate and complete, with minor issues
243
- 9-10: Highly accurate and complete, matching the expected answer closely
244
-
245
- Your evaluation should be objective, thorough, and well-reasoned. Provide specific examples from both answers to support your assessment.""",
246
- response_model=AccuracyAgentResponse,
247
+ 9-10: Highly accurate and complete, matching the expected answer and given guidelines closely.
248
+ {additional_guidelines}{additional_context}
249
+ Remember: You must only compare the agent_output to the expected_output. The expected_output is correct as it was provided by the user.
250
+ """,
251
+ output_schema=AccuracyAgentResponse,
247
252
  structured_outputs=True,
248
253
  )
249
254
 
250
- def get_question_to_evaluate(self, question: Optional[Union[str, Callable]] = None) -> Optional[str]:
251
- """Get the question to evaluate."""
252
- try:
253
- # Get question from the run method
254
- if question is not None:
255
- if isinstance(question, str):
256
- return question
257
- elif callable(question):
258
- _question = question()
259
- if isinstance(_question, str):
260
- return _question
261
- else:
262
- logger.error("Question is not a string")
263
- else:
264
- logger.error("Question is not a string or callable")
265
-
266
- # Get the question from the eval
267
- if self.question is not None:
268
- if isinstance(self.question, str):
269
- return self.question
270
- elif callable(self.question):
271
- _question = self.question()
272
- if isinstance(_question, str):
273
- return _question
274
- else:
275
- logger.error("Question is not a string")
276
- else:
277
- logger.error("Question is not a string or callable")
278
- except Exception as e:
279
- logger.error(f"Failed to get question to evaluate: {e}")
280
- return None
281
-
282
- def get_answer_to_evaluate(
283
- self, question: str, answer: Optional[Union[str, Callable]] = None
284
- ) -> Optional[RunResponse]:
285
- """Get the answer to evaluate.
286
-
287
- Priority:
288
- 1. Answer provided with the run method
289
- 2. Answer provided with the eval
290
- 3. Answer from the agent
291
- """
255
+ def get_eval_expected_output(self) -> str:
256
+ """Return the eval expected answer. If it is a callable, call it and return the resulting string"""
257
+ if callable(self.expected_output):
258
+ _output = self.expected_output()
259
+ if isinstance(_output, str):
260
+ return _output
261
+ else:
262
+ raise EvalError(f"The expected output needs to be or return a string, but it returned: {type(_output)}")
263
+ return self.expected_output
264
+
265
+ def get_eval_input(self) -> str:
266
+ """Return the evaluation input. If it is a callable, call it and return the resulting string"""
267
+ if callable(self.input):
268
+ _input = self.input()
269
+ if isinstance(_input, str):
270
+ return _input
271
+ else:
272
+ raise EvalError(f"The eval input needs to be or return a string, but it returned: {type(_input)}")
273
+ return self.input
274
+
275
+ def evaluate_answer(
276
+ self,
277
+ input: str,
278
+ evaluator_agent: Agent,
279
+ evaluation_input: str,
280
+ evaluator_expected_output: str,
281
+ agent_output: str,
282
+ ) -> Optional[AccuracyEvaluation]:
283
+ """Orchestrate the evaluation process."""
292
284
  try:
293
- # Get answer from the run method
294
- if answer is not None:
295
- if isinstance(answer, str):
296
- return RunResponse(content=answer)
297
- elif callable(answer):
298
- _answer = answer()
299
- if isinstance(_answer, str):
300
- return RunResponse(content=_answer)
301
- else:
302
- logger.error("Answer is not a string")
303
- else:
304
- logger.error("Answer is not a string or callable")
305
-
306
- # Get answer from the eval
307
- if self.answer is not None:
308
- if isinstance(self.answer, str):
309
- return RunResponse(content=self.answer)
310
- elif callable(self.answer):
311
- _answer = self.answer()
312
- if isinstance(_answer, str):
313
- return RunResponse(content=_answer)
314
- else:
315
- logger.error("Answer is not a string")
316
- else:
317
- logger.error("Answer is not a string or callable")
318
-
319
- # Get answer from the agent
320
- if self.agent is not None and question is not None:
321
- logger.debug("Getting answer from agent")
322
- return self.agent.run(question)
285
+ response = evaluator_agent.run(evaluation_input, stream=False)
286
+ accuracy_agent_response = response.content
287
+ if accuracy_agent_response is None or not isinstance(accuracy_agent_response, AccuracyAgentResponse):
288
+ raise EvalError(f"Evaluator Agent returned an invalid response: {accuracy_agent_response}")
289
+ return AccuracyEvaluation(
290
+ input=input,
291
+ output=agent_output,
292
+ expected_output=evaluator_expected_output,
293
+ score=accuracy_agent_response.accuracy_score,
294
+ reason=accuracy_agent_response.accuracy_reason,
295
+ )
323
296
  except Exception as e:
324
- logger.error(f"Failed to get answer to evaluate: {e}")
325
- return None
297
+ logger.exception(f"Failed to evaluate accuracy: {e}")
298
+ return None
326
299
 
327
- def get_expected_answer_to_evaluate(self, expected_answer: Optional[Union[str, Callable]] = None) -> Optional[str]:
328
- """Get the expected answer to evaluate."""
300
+ async def aevaluate_answer(
301
+ self,
302
+ input: str,
303
+ evaluator_agent: Agent,
304
+ evaluation_input: str,
305
+ evaluator_expected_output: str,
306
+ agent_output: str,
307
+ ) -> Optional[AccuracyEvaluation]:
308
+ """Orchestrate the evaluation process asynchronously."""
329
309
  try:
330
- # Get expected_answer from the run method
331
- if expected_answer is not None:
332
- if isinstance(expected_answer, str):
333
- return expected_answer
334
- elif callable(expected_answer):
335
- _expected_answer = expected_answer()
336
- if isinstance(_expected_answer, str):
337
- return _expected_answer
338
- else:
339
- logger.error("Expected Answer is not a string")
340
- else:
341
- logger.error("Expected Answer is not a string or callable")
342
-
343
- # Get the expected_answer from the eval
344
- if self.expected_answer is not None:
345
- if isinstance(self.expected_answer, str):
346
- return self.expected_answer
347
- elif callable(self.expected_answer):
348
- _expected_answer = self.expected_answer()
349
- if isinstance(_expected_answer, str):
350
- return _expected_answer
351
- else:
352
- logger.error("Expected Answer is not a string")
353
- else:
354
- logger.error("Expected Answer is not a string or callable")
310
+ response = await evaluator_agent.arun(evaluation_input, stream=False)
311
+ accuracy_agent_response = response.content
312
+ if accuracy_agent_response is None or not isinstance(accuracy_agent_response, AccuracyAgentResponse):
313
+ raise EvalError(f"Evaluator Agent returned an invalid response: {accuracy_agent_response}")
314
+ return AccuracyEvaluation(
315
+ input=input,
316
+ output=agent_output,
317
+ expected_output=evaluator_expected_output,
318
+ score=accuracy_agent_response.accuracy_score,
319
+ reason=accuracy_agent_response.accuracy_reason,
320
+ )
355
321
  except Exception as e:
356
- logger.error(f"Failed to get expected answer to evaluate: {e}")
357
- return None
322
+ logger.exception(f"Failed to evaluate accuracy asynchronously: {e}")
323
+ return None
358
324
 
359
325
  def run(
360
326
  self,
361
327
  *,
362
- question: Optional[Union[str, Callable]] = None,
363
- expected_answer: Optional[Union[str, Callable]] = None,
364
- answer: Optional[Union[str, Callable]] = None,
365
328
  print_summary: bool = True,
366
329
  print_results: bool = True,
367
330
  ) -> Optional[AccuracyResult]:
331
+ if isinstance(self.db, AsyncBaseDb):
332
+ raise ValueError("run() is not supported with an async DB. Please use arun() instead.")
333
+
334
+ if self.agent is None and self.team is None:
335
+ logger.error("You need to provide one of 'agent' or 'team' to run the evaluation.")
336
+ return None
337
+
338
+ if self.agent is not None and self.team is not None:
339
+ logger.error("Provide only one of 'agent' or 'team' to run the evaluation.")
340
+ return None
341
+
368
342
  from rich.console import Console
369
343
  from rich.live import Live
370
344
  from rich.status import Status
371
345
 
372
- self.set_eval_id()
373
- self.set_debug_mode()
346
+ set_log_level_to_debug() if self.debug_mode else set_log_level_to_info()
347
+
374
348
  self.result = AccuracyResult()
375
- self.print_results = print_results
376
- self.print_summary = print_summary
377
349
 
378
- question_to_evaluate: Optional[str] = self.get_question_to_evaluate(question=question)
379
- if question_to_evaluate is None:
380
- logger.error("No Question to evaluate.")
350
+ logger.debug(f"************ Evaluation Start: {self.eval_id} ************")
351
+
352
+ # Add a spinner while running the evaluations
353
+ console = Console()
354
+ with Live(console=console, transient=True) as live_log:
355
+ evaluator_agent = self.get_evaluator_agent()
356
+ eval_input = self.get_eval_input()
357
+ eval_expected_output = self.get_eval_expected_output()
358
+
359
+ for i in range(self.num_iterations):
360
+ status = Status(f"Running evaluation {i + 1}...", spinner="dots", speed=1.0, refresh_per_second=10)
361
+ live_log.update(status)
362
+
363
+ agent_session_id = f"eval_{self.eval_id}_{i + 1}"
364
+
365
+ if self.agent is not None:
366
+ agent_response = self.agent.run(input=eval_input, session_id=agent_session_id, stream=False)
367
+ output = agent_response.content
368
+ elif self.team is not None:
369
+ team_response = self.team.run(input=eval_input, session_id=agent_session_id, stream=False)
370
+ output = team_response.content
371
+
372
+ if not output:
373
+ logger.error(f"Failed to generate a valid answer on iteration {i + 1}: {output}")
374
+ continue
375
+
376
+ evaluation_input = dedent(f"""\
377
+ <agent_input>
378
+ {eval_input}
379
+ </agent_input>
380
+
381
+ <expected_output>
382
+ {eval_expected_output}
383
+ </expected_output>
384
+
385
+ <agent_output>
386
+ {output}
387
+ </agent_output>\
388
+ """)
389
+ logger.debug(f"Agent output #{i + 1}: {output}")
390
+ result = self.evaluate_answer(
391
+ input=eval_input,
392
+ evaluator_agent=evaluator_agent,
393
+ evaluation_input=evaluation_input,
394
+ evaluator_expected_output=eval_expected_output,
395
+ agent_output=output,
396
+ )
397
+ if result is None:
398
+ logger.error(f"Failed to evaluate accuracy on iteration {i + 1}")
399
+ continue
400
+
401
+ self.result.results.append(result)
402
+ self.result.compute_stats()
403
+ status.update(f"Eval iteration {i + 1} finished")
404
+
405
+ status.stop()
406
+
407
+ # Save result to file if requested
408
+ if self.file_path_to_save_results is not None and self.result is not None:
409
+ store_result_in_file(
410
+ file_path=self.file_path_to_save_results,
411
+ name=self.name,
412
+ eval_id=self.eval_id,
413
+ result=self.result,
414
+ )
415
+
416
+ # Print results if requested
417
+ if self.print_results or print_results:
418
+ self.result.print_results(console)
419
+ if self.print_summary or print_summary:
420
+ self.result.print_summary(console)
421
+
422
+ # Log results to the Agno DB if requested
423
+ if self.agent is not None:
424
+ agent_id = self.agent.id
425
+ team_id = None
426
+ model_id = self.agent.model.id if self.agent.model is not None else None
427
+ model_provider = self.agent.model.provider if self.agent.model is not None else None
428
+ evaluated_component_name = self.agent.name
429
+ elif self.team is not None:
430
+ agent_id = None
431
+ team_id = self.team.id
432
+ model_id = self.team.model.id if self.team.model is not None else None
433
+ model_provider = self.team.model.provider if self.team.model is not None else None
434
+ evaluated_component_name = self.team.name
435
+
436
+ if self.db:
437
+ log_eval_input = {
438
+ "additional_guidelines": self.additional_guidelines,
439
+ "additional_context": self.additional_context,
440
+ "num_iterations": self.num_iterations,
441
+ "expected_output": self.expected_output,
442
+ "input": self.input,
443
+ }
444
+
445
+ log_eval_run(
446
+ db=self.db,
447
+ run_id=self.eval_id, # type: ignore
448
+ run_data=asdict(self.result),
449
+ eval_type=EvalType.ACCURACY,
450
+ agent_id=agent_id,
451
+ team_id=team_id,
452
+ model_id=model_id,
453
+ model_provider=model_provider,
454
+ name=self.name if self.name is not None else None,
455
+ evaluated_component_name=evaluated_component_name,
456
+ eval_input=log_eval_input,
457
+ )
458
+
459
+ if self.telemetry:
460
+ from agno.api.evals import EvalRunCreate, create_eval_run_telemetry
461
+
462
+ create_eval_run_telemetry(
463
+ eval_run=EvalRunCreate(
464
+ run_id=self.eval_id,
465
+ eval_type=EvalType.ACCURACY,
466
+ data=self._get_telemetry_data(),
467
+ ),
468
+ )
469
+
470
+ logger.debug(f"*********** Evaluation {self.eval_id} Finished ***********")
471
+ return self.result
472
+
473
+ async def arun(
474
+ self,
475
+ *,
476
+ print_summary: bool = True,
477
+ print_results: bool = True,
478
+ ) -> Optional[AccuracyResult]:
479
+ if self.agent is None and self.team is None:
480
+ logger.error("You need to provide one of 'agent' or 'team' to run the evaluation.")
381
481
  return None
382
482
 
383
- expected_answer_to_evaluate: Optional[str] = self.get_expected_answer_to_evaluate(
384
- expected_answer=expected_answer
385
- )
386
- if expected_answer_to_evaluate is None:
387
- logger.error("No Expected Answer to evaluate.")
483
+ if self.agent is not None and self.team is not None:
484
+ logger.error("Provide only one of 'agent' or 'team' to run the evaluation.")
388
485
  return None
389
486
 
390
- logger.debug(f"************ Evaluation Start: {self.eval_id} ************")
391
- logger.debug(f"Question: {question_to_evaluate}")
392
- logger.debug(f"Expected Answer: {expected_answer_to_evaluate}")
393
- logger.debug("***********************************************************")
487
+ from rich.console import Console
488
+ from rich.live import Live
489
+ from rich.status import Status
394
490
 
395
- evaluator_agent: Agent = self.get_evaluator_agent(
396
- question=question_to_evaluate, expected_answer=expected_answer_to_evaluate
397
- )
491
+ set_log_level_to_debug() if self.debug_mode else set_log_level_to_info()
492
+
493
+ self.result = AccuracyResult()
494
+
495
+ logger.debug(f"************ Evaluation Start: {self.eval_id} ************")
398
496
 
399
497
  # Add a spinner while running the evaluations
400
498
  console = Console()
401
499
  with Live(console=console, transient=True) as live_log:
500
+ evaluator_agent = self.get_evaluator_agent()
501
+ eval_input = self.get_eval_input()
502
+ eval_expected_output = self.get_eval_expected_output()
503
+
402
504
  for i in range(self.num_iterations):
403
505
  status = Status(f"Running evaluation {i + 1}...", spinner="dots", speed=1.0, refresh_per_second=10)
404
506
  live_log.update(status)
405
507
 
406
- answer_to_evaluate: Optional[RunResponse] = self.get_answer_to_evaluate(
407
- question=question_to_evaluate, answer=answer
508
+ agent_session_id = f"eval_{self.eval_id}_{i + 1}"
509
+
510
+ if self.agent is not None:
511
+ agent_response = await self.agent.arun(input=eval_input, session_id=agent_session_id, stream=False)
512
+ output = agent_response.content
513
+ elif self.team is not None:
514
+ team_response = await self.team.arun(input=eval_input, session_id=agent_session_id, stream=False)
515
+ output = team_response.content
516
+
517
+ if not output:
518
+ logger.error(f"Failed to generate a valid answer on iteration {i + 1}: {output}")
519
+ continue
520
+
521
+ evaluation_input = dedent(f"""\
522
+ <agent_input>
523
+ {eval_input}
524
+ </agent_input>
525
+
526
+ <expected_output>
527
+ {eval_expected_output}
528
+ </expected_output>
529
+
530
+ <agent_output>
531
+ {output}
532
+ </agent_output>\
533
+ """)
534
+ logger.debug(f"Agent output #{i + 1}: {output}")
535
+ result = await self.aevaluate_answer(
536
+ input=eval_input,
537
+ evaluator_agent=evaluator_agent,
538
+ evaluation_input=evaluation_input,
539
+ evaluator_expected_output=eval_expected_output,
540
+ agent_output=output,
408
541
  )
409
- if answer_to_evaluate is None:
410
- logger.error("No Answer to evaluate.")
542
+ if result is None:
543
+ logger.error(f"Failed to evaluate accuracy on iteration {i + 1}")
411
544
  continue
412
545
 
413
- try:
414
- logger.debug(f"Answer #{i + 1}: {answer_to_evaluate.content}")
415
- accuracy_agent_response = evaluator_agent.run(answer_to_evaluate.content).content
416
- if accuracy_agent_response is None or not isinstance(
417
- accuracy_agent_response, AccuracyAgentResponse
418
- ):
419
- logger.error("Evaluator Agent returned an invalid response")
420
- continue
421
-
422
- accuracy_evaluation = AccuracyEvaluation(
423
- question=question_to_evaluate,
424
- answer=answer_to_evaluate.content, # type: ignore
425
- expected_answer=expected_answer_to_evaluate,
426
- score=accuracy_agent_response.accuracy_score,
427
- reason=accuracy_agent_response.accuracy_reason,
428
- )
429
- if self.print_results:
430
- accuracy_evaluation.print_eval(console)
431
- self.result.results.append(accuracy_evaluation)
432
- self.result.compute_stats()
433
- status.update(f"Running evaluation {i + 1}... Done")
434
- except Exception as e:
435
- logger.exception(f"Failed to evaluate accuracy, run #{i + 1}: {e}")
436
- return None
437
-
438
- status.stop()
439
-
440
- # -*- Save result to file if save_result_to_file is set
441
- if self.save_result_to_file is not None and self.result is not None:
442
- try:
443
- import json
546
+ self.result.results.append(result)
547
+ self.result.compute_stats()
548
+ status.update(f"Eval iteration {i + 1} finished")
549
+
550
+ status.stop()
551
+
552
+ # Save result to file if requested
553
+ if self.file_path_to_save_results is not None and self.result is not None:
554
+ store_result_in_file(
555
+ file_path=self.file_path_to_save_results,
556
+ name=self.name,
557
+ eval_id=self.eval_id,
558
+ result=self.result,
559
+ )
560
+
561
+ # Print results if requested
562
+ if self.print_results or print_results:
563
+ self.result.print_results(console)
564
+ if self.print_summary or print_summary:
565
+ self.result.print_summary(console)
444
566
 
445
- fn_path = Path(self.save_result_to_file.format(name=self.name, eval_id=self.eval_id))
446
- if not fn_path.parent.exists():
447
- fn_path.parent.mkdir(parents=True, exist_ok=True)
448
- fn_path.write_text(json.dumps(asdict(self.result), indent=4))
449
- except Exception as e:
450
- logger.warning(f"Failed to save result to file: {e}")
567
+ if self.agent is not None:
568
+ agent_id = self.agent.id
569
+ team_id = None
570
+ model_id = self.agent.model.id if self.agent.model is not None else None
571
+ model_provider = self.agent.model.provider if self.agent.model is not None else None
572
+ evaluated_component_name = self.agent.name
573
+ elif self.team is not None:
574
+ agent_id = None
575
+ team_id = self.team.id
576
+ model_id = self.team.model.id if self.team.model is not None else None
577
+ model_provider = self.team.model.provider if self.team.model is not None else None
578
+ evaluated_component_name = self.team.name
579
+
580
+ # Log results to the Agno DB if requested
581
+ if self.db:
582
+ log_eval_input = {
583
+ "additional_guidelines": self.additional_guidelines,
584
+ "additional_context": self.additional_context,
585
+ "num_iterations": self.num_iterations,
586
+ "expected_output": self.expected_output,
587
+ "input": self.input,
588
+ }
589
+ await async_log_eval(
590
+ db=self.db,
591
+ run_id=self.eval_id, # type: ignore
592
+ run_data=asdict(self.result),
593
+ eval_type=EvalType.ACCURACY,
594
+ agent_id=agent_id,
595
+ model_id=model_id,
596
+ model_provider=model_provider,
597
+ name=self.name if self.name is not None else None,
598
+ evaluated_component_name=evaluated_component_name,
599
+ team_id=team_id,
600
+ workflow_id=None,
601
+ eval_input=log_eval_input,
602
+ )
603
+
604
+ if self.telemetry:
605
+ from agno.api.evals import EvalRunCreate, async_create_eval_run_telemetry
606
+
607
+ await async_create_eval_run_telemetry(
608
+ eval_run=EvalRunCreate(run_id=self.eval_id, eval_type=EvalType.ACCURACY),
609
+ )
610
+
611
+ logger.debug(f"*********** Evaluation {self.eval_id} Finished ***********")
612
+ return self.result
451
613
 
452
- # Show results
453
- if self.print_summary or self.print_results:
454
- self.result.print_summary(console)
614
+ def run_with_output(
615
+ self,
616
+ *,
617
+ output: str,
618
+ print_summary: bool = True,
619
+ print_results: bool = True,
620
+ ) -> Optional[AccuracyResult]:
621
+ """Run the evaluation logic against the given answer, instead of generating an answer with the Agent"""
622
+ # Generate unique run_id for this execution (don't modify self.eval_id due to concurrency)
623
+ run_id = str(uuid4())
624
+
625
+ set_log_level_to_debug() if self.debug_mode else set_log_level_to_info()
626
+
627
+ self.result = AccuracyResult()
628
+
629
+ logger.debug(f"************ Evaluation Start: {run_id} ************")
630
+
631
+ evaluator_agent = self.get_evaluator_agent()
632
+ eval_input = self.get_eval_input()
633
+ eval_expected_output = self.get_eval_expected_output()
634
+
635
+ evaluation_input = dedent(f"""\
636
+ <agent_input>
637
+ {eval_input}
638
+ </agent_input>
455
639
 
456
- logger.debug(f"*********** Evaluation End: {self.eval_id} ***********")
640
+ <expected_output>
641
+ {eval_expected_output}
642
+ </expected_output>
643
+
644
+ <agent_output>
645
+ {output}
646
+ </agent_output>\
647
+ """)
648
+
649
+ result = self.evaluate_answer(
650
+ input=eval_input,
651
+ evaluator_agent=evaluator_agent,
652
+ evaluation_input=evaluation_input,
653
+ evaluator_expected_output=eval_expected_output,
654
+ agent_output=output,
655
+ )
656
+
657
+ if result is not None:
658
+ self.result.results.append(result)
659
+ self.result.compute_stats()
660
+
661
+ # Print results if requested
662
+ if self.print_results or print_results:
663
+ self.result.print_results()
664
+ if self.print_summary or print_summary:
665
+ self.result.print_summary()
666
+
667
+ # Save result to file if requested
668
+ if self.file_path_to_save_results is not None:
669
+ store_result_in_file(
670
+ file_path=self.file_path_to_save_results,
671
+ name=self.name,
672
+ eval_id=self.eval_id,
673
+ result=self.result,
674
+ )
675
+ # Log results to the Agno DB if requested
676
+ if self.db:
677
+ if isinstance(self.db, AsyncBaseDb):
678
+ log_error("You are using an async DB in a non-async method. The evaluation won't be stored in the DB.")
679
+
680
+ else:
681
+ if self.agent is not None:
682
+ agent_id = self.agent.id
683
+ team_id = None
684
+ model_id = self.agent.model.id if self.agent.model is not None else None
685
+ model_provider = self.agent.model.provider if self.agent.model is not None else None
686
+ evaluated_component_name = self.agent.name
687
+ elif self.team is not None:
688
+ agent_id = None
689
+ team_id = self.team.id
690
+ model_id = self.team.model.id if self.team.model is not None else None
691
+ model_provider = self.team.model.provider if self.team.model is not None else None
692
+ evaluated_component_name = self.team.name
693
+ else:
694
+ agent_id = None
695
+ team_id = None
696
+ model_id = None
697
+ model_provider = None
698
+ evaluated_component_name = None
699
+
700
+ log_eval_input = {
701
+ "additional_guidelines": self.additional_guidelines,
702
+ "additional_context": self.additional_context,
703
+ "num_iterations": self.num_iterations,
704
+ "expected_output": self.expected_output,
705
+ "input": self.input,
706
+ }
707
+
708
+ log_eval_run(
709
+ db=self.db,
710
+ run_id=self.eval_id, # type: ignore
711
+ run_data=asdict(self.result),
712
+ eval_type=EvalType.ACCURACY,
713
+ name=self.name if self.name is not None else None,
714
+ agent_id=agent_id,
715
+ team_id=team_id,
716
+ model_id=model_id,
717
+ model_provider=model_provider,
718
+ evaluated_component_name=evaluated_component_name,
719
+ workflow_id=None,
720
+ eval_input=log_eval_input,
721
+ )
722
+
723
+ if self.telemetry:
724
+ from agno.api.evals import EvalRunCreate, create_eval_run_telemetry
725
+
726
+ create_eval_run_telemetry(
727
+ eval_run=EvalRunCreate(
728
+ run_id=self.eval_id,
729
+ eval_type=EvalType.ACCURACY,
730
+ data=self._get_telemetry_data(),
731
+ ),
732
+ )
733
+
734
+ logger.debug(f"*********** Evaluation End: {run_id} ***********")
735
+ return self.result
736
+
737
+ async def arun_with_output(
738
+ self,
739
+ *,
740
+ output: str,
741
+ print_summary: bool = True,
742
+ print_results: bool = True,
743
+ ) -> Optional[AccuracyResult]:
744
+ """Run the evaluation logic against the given answer, instead of generating an answer with the Agent"""
745
+ # Generate unique run_id for this execution (don't modify self.eval_id due to concurrency)
746
+ run_id = str(uuid4())
747
+
748
+ set_log_level_to_debug() if self.debug_mode else set_log_level_to_info()
749
+
750
+ self.result = AccuracyResult()
751
+
752
+ logger.debug(f"************ Evaluation Start: {run_id} ************")
753
+
754
+ evaluator_agent = self.get_evaluator_agent()
755
+ eval_input = self.get_eval_input()
756
+ eval_expected_output = self.get_eval_expected_output()
757
+
758
+ evaluation_input = dedent(f"""\
759
+ <agent_input>
760
+ {eval_input}
761
+ </agent_input>
762
+
763
+ <expected_output>
764
+ {eval_expected_output}
765
+ </expected_output>
766
+
767
+ <agent_output>
768
+ {output}
769
+ </agent_output>\
770
+ """)
771
+
772
+ result = await self.aevaluate_answer(
773
+ input=eval_input,
774
+ evaluator_agent=evaluator_agent,
775
+ evaluation_input=evaluation_input,
776
+ evaluator_expected_output=eval_expected_output,
777
+ agent_output=output,
778
+ )
779
+
780
+ if result is not None:
781
+ self.result.results.append(result)
782
+ self.result.compute_stats()
783
+
784
+ # Print results if requested
785
+ if self.print_results or print_results:
786
+ self.result.print_results()
787
+ if self.print_summary or print_summary:
788
+ self.result.print_summary()
789
+
790
+ # Save result to file if requested
791
+ if self.file_path_to_save_results is not None:
792
+ store_result_in_file(
793
+ file_path=self.file_path_to_save_results,
794
+ name=self.name,
795
+ eval_id=self.eval_id,
796
+ result=self.result,
797
+ )
798
+ # Log results to the Agno DB if requested
799
+ if self.db:
800
+ if self.agent is not None:
801
+ agent_id = self.agent.id
802
+ team_id = None
803
+ model_id = self.agent.model.id if self.agent.model is not None else None
804
+ model_provider = self.agent.model.provider if self.agent.model is not None else None
805
+ evaluated_component_name = self.agent.name
806
+ elif self.team is not None:
807
+ agent_id = None
808
+ team_id = self.team.id
809
+ model_id = self.team.model.id if self.team.model is not None else None
810
+ model_provider = self.team.model.provider if self.team.model is not None else None
811
+ evaluated_component_name = self.team.name
812
+
813
+ log_eval_input = {
814
+ "additional_guidelines": self.additional_guidelines,
815
+ "additional_context": self.additional_context,
816
+ "num_iterations": self.num_iterations,
817
+ "expected_output": self.expected_output,
818
+ "input": self.input,
819
+ }
820
+
821
+ await async_log_eval(
822
+ db=self.db,
823
+ run_id=self.eval_id, # type: ignore
824
+ run_data=asdict(self.result),
825
+ eval_type=EvalType.ACCURACY,
826
+ name=self.name if self.name is not None else None,
827
+ agent_id=agent_id,
828
+ team_id=team_id,
829
+ model_id=model_id,
830
+ model_provider=model_provider,
831
+ evaluated_component_name=evaluated_component_name,
832
+ workflow_id=None,
833
+ eval_input=log_eval_input,
834
+ )
835
+
836
+ logger.debug(f"*********** Evaluation End: {run_id} ***********")
457
837
  return self.result
838
+
839
+ def _get_telemetry_data(self) -> Dict[str, Any]:
840
+ """Get the telemetry data for the evaluation"""
841
+ return {
842
+ "agent_id": self.agent.id if self.agent else None,
843
+ "team_id": self.team.id if self.team else None,
844
+ "model_id": self.agent.model.id if self.agent and self.agent.model else None,
845
+ "model_provider": self.agent.model.provider if self.agent and self.agent.model else None,
846
+ "num_iterations": self.num_iterations,
847
+ }