agno 2.2.13__py3-none-any.whl → 2.4.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (383) hide show
  1. agno/agent/__init__.py +6 -0
  2. agno/agent/agent.py +5252 -3145
  3. agno/agent/remote.py +525 -0
  4. agno/api/api.py +2 -0
  5. agno/client/__init__.py +3 -0
  6. agno/client/a2a/__init__.py +10 -0
  7. agno/client/a2a/client.py +554 -0
  8. agno/client/a2a/schemas.py +112 -0
  9. agno/client/a2a/utils.py +369 -0
  10. agno/client/os.py +2669 -0
  11. agno/compression/__init__.py +3 -0
  12. agno/compression/manager.py +247 -0
  13. agno/culture/manager.py +2 -2
  14. agno/db/base.py +927 -6
  15. agno/db/dynamo/dynamo.py +788 -2
  16. agno/db/dynamo/schemas.py +128 -0
  17. agno/db/dynamo/utils.py +26 -3
  18. agno/db/firestore/firestore.py +674 -50
  19. agno/db/firestore/schemas.py +41 -0
  20. agno/db/firestore/utils.py +25 -10
  21. agno/db/gcs_json/gcs_json_db.py +506 -3
  22. agno/db/gcs_json/utils.py +14 -2
  23. agno/db/in_memory/in_memory_db.py +203 -4
  24. agno/db/in_memory/utils.py +14 -2
  25. agno/db/json/json_db.py +498 -2
  26. agno/db/json/utils.py +14 -2
  27. agno/db/migrations/manager.py +199 -0
  28. agno/db/migrations/utils.py +19 -0
  29. agno/db/migrations/v1_to_v2.py +54 -16
  30. agno/db/migrations/versions/__init__.py +0 -0
  31. agno/db/migrations/versions/v2_3_0.py +977 -0
  32. agno/db/mongo/async_mongo.py +1013 -39
  33. agno/db/mongo/mongo.py +684 -4
  34. agno/db/mongo/schemas.py +48 -0
  35. agno/db/mongo/utils.py +17 -0
  36. agno/db/mysql/__init__.py +2 -1
  37. agno/db/mysql/async_mysql.py +2958 -0
  38. agno/db/mysql/mysql.py +722 -53
  39. agno/db/mysql/schemas.py +77 -11
  40. agno/db/mysql/utils.py +151 -8
  41. agno/db/postgres/async_postgres.py +1254 -137
  42. agno/db/postgres/postgres.py +2316 -93
  43. agno/db/postgres/schemas.py +153 -21
  44. agno/db/postgres/utils.py +22 -7
  45. agno/db/redis/redis.py +531 -3
  46. agno/db/redis/schemas.py +36 -0
  47. agno/db/redis/utils.py +31 -15
  48. agno/db/schemas/evals.py +1 -0
  49. agno/db/schemas/memory.py +20 -9
  50. agno/db/singlestore/schemas.py +70 -1
  51. agno/db/singlestore/singlestore.py +737 -74
  52. agno/db/singlestore/utils.py +13 -3
  53. agno/db/sqlite/async_sqlite.py +1069 -89
  54. agno/db/sqlite/schemas.py +133 -1
  55. agno/db/sqlite/sqlite.py +2203 -165
  56. agno/db/sqlite/utils.py +21 -11
  57. agno/db/surrealdb/models.py +25 -0
  58. agno/db/surrealdb/surrealdb.py +603 -1
  59. agno/db/utils.py +60 -0
  60. agno/eval/__init__.py +26 -3
  61. agno/eval/accuracy.py +25 -12
  62. agno/eval/agent_as_judge.py +871 -0
  63. agno/eval/base.py +29 -0
  64. agno/eval/performance.py +10 -4
  65. agno/eval/reliability.py +22 -13
  66. agno/eval/utils.py +2 -1
  67. agno/exceptions.py +42 -0
  68. agno/hooks/__init__.py +3 -0
  69. agno/hooks/decorator.py +164 -0
  70. agno/integrations/discord/client.py +13 -2
  71. agno/knowledge/__init__.py +4 -0
  72. agno/knowledge/chunking/code.py +90 -0
  73. agno/knowledge/chunking/document.py +65 -4
  74. agno/knowledge/chunking/fixed.py +4 -1
  75. agno/knowledge/chunking/markdown.py +102 -11
  76. agno/knowledge/chunking/recursive.py +2 -2
  77. agno/knowledge/chunking/semantic.py +130 -48
  78. agno/knowledge/chunking/strategy.py +18 -0
  79. agno/knowledge/embedder/azure_openai.py +0 -1
  80. agno/knowledge/embedder/google.py +1 -1
  81. agno/knowledge/embedder/mistral.py +1 -1
  82. agno/knowledge/embedder/nebius.py +1 -1
  83. agno/knowledge/embedder/openai.py +16 -12
  84. agno/knowledge/filesystem.py +412 -0
  85. agno/knowledge/knowledge.py +4261 -1199
  86. agno/knowledge/protocol.py +134 -0
  87. agno/knowledge/reader/arxiv_reader.py +3 -2
  88. agno/knowledge/reader/base.py +9 -7
  89. agno/knowledge/reader/csv_reader.py +91 -42
  90. agno/knowledge/reader/docx_reader.py +9 -10
  91. agno/knowledge/reader/excel_reader.py +225 -0
  92. agno/knowledge/reader/field_labeled_csv_reader.py +38 -48
  93. agno/knowledge/reader/firecrawl_reader.py +3 -2
  94. agno/knowledge/reader/json_reader.py +16 -22
  95. agno/knowledge/reader/markdown_reader.py +15 -14
  96. agno/knowledge/reader/pdf_reader.py +33 -28
  97. agno/knowledge/reader/pptx_reader.py +9 -10
  98. agno/knowledge/reader/reader_factory.py +135 -1
  99. agno/knowledge/reader/s3_reader.py +8 -16
  100. agno/knowledge/reader/tavily_reader.py +3 -3
  101. agno/knowledge/reader/text_reader.py +15 -14
  102. agno/knowledge/reader/utils/__init__.py +17 -0
  103. agno/knowledge/reader/utils/spreadsheet.py +114 -0
  104. agno/knowledge/reader/web_search_reader.py +8 -65
  105. agno/knowledge/reader/website_reader.py +16 -13
  106. agno/knowledge/reader/wikipedia_reader.py +36 -3
  107. agno/knowledge/reader/youtube_reader.py +3 -2
  108. agno/knowledge/remote_content/__init__.py +33 -0
  109. agno/knowledge/remote_content/config.py +266 -0
  110. agno/knowledge/remote_content/remote_content.py +105 -17
  111. agno/knowledge/utils.py +76 -22
  112. agno/learn/__init__.py +71 -0
  113. agno/learn/config.py +463 -0
  114. agno/learn/curate.py +185 -0
  115. agno/learn/machine.py +725 -0
  116. agno/learn/schemas.py +1114 -0
  117. agno/learn/stores/__init__.py +38 -0
  118. agno/learn/stores/decision_log.py +1156 -0
  119. agno/learn/stores/entity_memory.py +3275 -0
  120. agno/learn/stores/learned_knowledge.py +1583 -0
  121. agno/learn/stores/protocol.py +117 -0
  122. agno/learn/stores/session_context.py +1217 -0
  123. agno/learn/stores/user_memory.py +1495 -0
  124. agno/learn/stores/user_profile.py +1220 -0
  125. agno/learn/utils.py +209 -0
  126. agno/media.py +22 -6
  127. agno/memory/__init__.py +14 -1
  128. agno/memory/manager.py +223 -8
  129. agno/memory/strategies/__init__.py +15 -0
  130. agno/memory/strategies/base.py +66 -0
  131. agno/memory/strategies/summarize.py +196 -0
  132. agno/memory/strategies/types.py +37 -0
  133. agno/models/aimlapi/aimlapi.py +17 -0
  134. agno/models/anthropic/claude.py +434 -59
  135. agno/models/aws/bedrock.py +121 -20
  136. agno/models/aws/claude.py +131 -274
  137. agno/models/azure/ai_foundry.py +10 -6
  138. agno/models/azure/openai_chat.py +33 -10
  139. agno/models/base.py +1162 -561
  140. agno/models/cerebras/cerebras.py +120 -24
  141. agno/models/cerebras/cerebras_openai.py +21 -2
  142. agno/models/cohere/chat.py +65 -6
  143. agno/models/cometapi/cometapi.py +18 -1
  144. agno/models/dashscope/dashscope.py +2 -3
  145. agno/models/deepinfra/deepinfra.py +18 -1
  146. agno/models/deepseek/deepseek.py +69 -3
  147. agno/models/fireworks/fireworks.py +18 -1
  148. agno/models/google/gemini.py +959 -89
  149. agno/models/google/utils.py +22 -0
  150. agno/models/groq/groq.py +48 -18
  151. agno/models/huggingface/huggingface.py +17 -6
  152. agno/models/ibm/watsonx.py +16 -6
  153. agno/models/internlm/internlm.py +18 -1
  154. agno/models/langdb/langdb.py +13 -1
  155. agno/models/litellm/chat.py +88 -9
  156. agno/models/litellm/litellm_openai.py +18 -1
  157. agno/models/message.py +24 -5
  158. agno/models/meta/llama.py +40 -13
  159. agno/models/meta/llama_openai.py +22 -21
  160. agno/models/metrics.py +12 -0
  161. agno/models/mistral/mistral.py +8 -4
  162. agno/models/n1n/__init__.py +3 -0
  163. agno/models/n1n/n1n.py +57 -0
  164. agno/models/nebius/nebius.py +6 -7
  165. agno/models/nvidia/nvidia.py +20 -3
  166. agno/models/ollama/__init__.py +2 -0
  167. agno/models/ollama/chat.py +17 -6
  168. agno/models/ollama/responses.py +100 -0
  169. agno/models/openai/__init__.py +2 -0
  170. agno/models/openai/chat.py +117 -26
  171. agno/models/openai/open_responses.py +46 -0
  172. agno/models/openai/responses.py +110 -32
  173. agno/models/openrouter/__init__.py +2 -0
  174. agno/models/openrouter/openrouter.py +67 -2
  175. agno/models/openrouter/responses.py +146 -0
  176. agno/models/perplexity/perplexity.py +19 -1
  177. agno/models/portkey/portkey.py +7 -6
  178. agno/models/requesty/requesty.py +19 -2
  179. agno/models/response.py +20 -2
  180. agno/models/sambanova/sambanova.py +20 -3
  181. agno/models/siliconflow/siliconflow.py +19 -2
  182. agno/models/together/together.py +20 -3
  183. agno/models/vercel/v0.py +20 -3
  184. agno/models/vertexai/claude.py +124 -4
  185. agno/models/vllm/vllm.py +19 -14
  186. agno/models/xai/xai.py +19 -2
  187. agno/os/app.py +467 -137
  188. agno/os/auth.py +253 -5
  189. agno/os/config.py +22 -0
  190. agno/os/interfaces/a2a/a2a.py +7 -6
  191. agno/os/interfaces/a2a/router.py +635 -26
  192. agno/os/interfaces/a2a/utils.py +32 -33
  193. agno/os/interfaces/agui/agui.py +5 -3
  194. agno/os/interfaces/agui/router.py +26 -16
  195. agno/os/interfaces/agui/utils.py +97 -57
  196. agno/os/interfaces/base.py +7 -7
  197. agno/os/interfaces/slack/router.py +16 -7
  198. agno/os/interfaces/slack/slack.py +7 -7
  199. agno/os/interfaces/whatsapp/router.py +35 -7
  200. agno/os/interfaces/whatsapp/security.py +3 -1
  201. agno/os/interfaces/whatsapp/whatsapp.py +11 -8
  202. agno/os/managers.py +326 -0
  203. agno/os/mcp.py +652 -79
  204. agno/os/middleware/__init__.py +4 -0
  205. agno/os/middleware/jwt.py +718 -115
  206. agno/os/middleware/trailing_slash.py +27 -0
  207. agno/os/router.py +105 -1558
  208. agno/os/routers/agents/__init__.py +3 -0
  209. agno/os/routers/agents/router.py +655 -0
  210. agno/os/routers/agents/schema.py +288 -0
  211. agno/os/routers/components/__init__.py +3 -0
  212. agno/os/routers/components/components.py +475 -0
  213. agno/os/routers/database.py +155 -0
  214. agno/os/routers/evals/evals.py +111 -18
  215. agno/os/routers/evals/schemas.py +38 -5
  216. agno/os/routers/evals/utils.py +80 -11
  217. agno/os/routers/health.py +3 -3
  218. agno/os/routers/knowledge/knowledge.py +284 -35
  219. agno/os/routers/knowledge/schemas.py +14 -2
  220. agno/os/routers/memory/memory.py +274 -11
  221. agno/os/routers/memory/schemas.py +44 -3
  222. agno/os/routers/metrics/metrics.py +30 -15
  223. agno/os/routers/metrics/schemas.py +10 -6
  224. agno/os/routers/registry/__init__.py +3 -0
  225. agno/os/routers/registry/registry.py +337 -0
  226. agno/os/routers/session/session.py +143 -14
  227. agno/os/routers/teams/__init__.py +3 -0
  228. agno/os/routers/teams/router.py +550 -0
  229. agno/os/routers/teams/schema.py +280 -0
  230. agno/os/routers/traces/__init__.py +3 -0
  231. agno/os/routers/traces/schemas.py +414 -0
  232. agno/os/routers/traces/traces.py +549 -0
  233. agno/os/routers/workflows/__init__.py +3 -0
  234. agno/os/routers/workflows/router.py +757 -0
  235. agno/os/routers/workflows/schema.py +139 -0
  236. agno/os/schema.py +157 -584
  237. agno/os/scopes.py +469 -0
  238. agno/os/settings.py +3 -0
  239. agno/os/utils.py +574 -185
  240. agno/reasoning/anthropic.py +85 -1
  241. agno/reasoning/azure_ai_foundry.py +93 -1
  242. agno/reasoning/deepseek.py +102 -2
  243. agno/reasoning/default.py +6 -7
  244. agno/reasoning/gemini.py +87 -3
  245. agno/reasoning/groq.py +109 -2
  246. agno/reasoning/helpers.py +6 -7
  247. agno/reasoning/manager.py +1238 -0
  248. agno/reasoning/ollama.py +93 -1
  249. agno/reasoning/openai.py +115 -1
  250. agno/reasoning/vertexai.py +85 -1
  251. agno/registry/__init__.py +3 -0
  252. agno/registry/registry.py +68 -0
  253. agno/remote/__init__.py +3 -0
  254. agno/remote/base.py +581 -0
  255. agno/run/__init__.py +2 -4
  256. agno/run/agent.py +134 -19
  257. agno/run/base.py +49 -1
  258. agno/run/cancel.py +65 -52
  259. agno/run/cancellation_management/__init__.py +9 -0
  260. agno/run/cancellation_management/base.py +78 -0
  261. agno/run/cancellation_management/in_memory_cancellation_manager.py +100 -0
  262. agno/run/cancellation_management/redis_cancellation_manager.py +236 -0
  263. agno/run/requirement.py +181 -0
  264. agno/run/team.py +111 -19
  265. agno/run/workflow.py +2 -1
  266. agno/session/agent.py +57 -92
  267. agno/session/summary.py +1 -1
  268. agno/session/team.py +62 -115
  269. agno/session/workflow.py +353 -57
  270. agno/skills/__init__.py +17 -0
  271. agno/skills/agent_skills.py +377 -0
  272. agno/skills/errors.py +32 -0
  273. agno/skills/loaders/__init__.py +4 -0
  274. agno/skills/loaders/base.py +27 -0
  275. agno/skills/loaders/local.py +216 -0
  276. agno/skills/skill.py +65 -0
  277. agno/skills/utils.py +107 -0
  278. agno/skills/validator.py +277 -0
  279. agno/table.py +10 -0
  280. agno/team/__init__.py +5 -1
  281. agno/team/remote.py +447 -0
  282. agno/team/team.py +3769 -2202
  283. agno/tools/brandfetch.py +27 -18
  284. agno/tools/browserbase.py +225 -16
  285. agno/tools/crawl4ai.py +3 -0
  286. agno/tools/duckduckgo.py +25 -71
  287. agno/tools/exa.py +0 -21
  288. agno/tools/file.py +14 -13
  289. agno/tools/file_generation.py +12 -6
  290. agno/tools/firecrawl.py +15 -7
  291. agno/tools/function.py +94 -113
  292. agno/tools/google_bigquery.py +11 -2
  293. agno/tools/google_drive.py +4 -3
  294. agno/tools/knowledge.py +9 -4
  295. agno/tools/mcp/mcp.py +301 -18
  296. agno/tools/mcp/multi_mcp.py +269 -14
  297. agno/tools/mem0.py +11 -10
  298. agno/tools/memory.py +47 -46
  299. agno/tools/mlx_transcribe.py +10 -7
  300. agno/tools/models/nebius.py +5 -5
  301. agno/tools/models_labs.py +20 -10
  302. agno/tools/nano_banana.py +151 -0
  303. agno/tools/parallel.py +0 -7
  304. agno/tools/postgres.py +76 -36
  305. agno/tools/python.py +14 -6
  306. agno/tools/reasoning.py +30 -23
  307. agno/tools/redshift.py +406 -0
  308. agno/tools/shopify.py +1519 -0
  309. agno/tools/spotify.py +919 -0
  310. agno/tools/tavily.py +4 -1
  311. agno/tools/toolkit.py +253 -18
  312. agno/tools/websearch.py +93 -0
  313. agno/tools/website.py +1 -1
  314. agno/tools/wikipedia.py +1 -1
  315. agno/tools/workflow.py +56 -48
  316. agno/tools/yfinance.py +12 -11
  317. agno/tracing/__init__.py +12 -0
  318. agno/tracing/exporter.py +161 -0
  319. agno/tracing/schemas.py +276 -0
  320. agno/tracing/setup.py +112 -0
  321. agno/utils/agent.py +251 -10
  322. agno/utils/cryptography.py +22 -0
  323. agno/utils/dttm.py +33 -0
  324. agno/utils/events.py +264 -7
  325. agno/utils/hooks.py +111 -3
  326. agno/utils/http.py +161 -2
  327. agno/utils/mcp.py +49 -8
  328. agno/utils/media.py +22 -1
  329. agno/utils/models/ai_foundry.py +9 -2
  330. agno/utils/models/claude.py +20 -5
  331. agno/utils/models/cohere.py +9 -2
  332. agno/utils/models/llama.py +9 -2
  333. agno/utils/models/mistral.py +4 -2
  334. agno/utils/os.py +0 -0
  335. agno/utils/print_response/agent.py +99 -16
  336. agno/utils/print_response/team.py +223 -24
  337. agno/utils/print_response/workflow.py +0 -2
  338. agno/utils/prompts.py +8 -6
  339. agno/utils/remote.py +23 -0
  340. agno/utils/response.py +1 -13
  341. agno/utils/string.py +91 -2
  342. agno/utils/team.py +62 -12
  343. agno/utils/tokens.py +657 -0
  344. agno/vectordb/base.py +15 -2
  345. agno/vectordb/cassandra/cassandra.py +1 -1
  346. agno/vectordb/chroma/__init__.py +2 -1
  347. agno/vectordb/chroma/chromadb.py +468 -23
  348. agno/vectordb/clickhouse/clickhousedb.py +1 -1
  349. agno/vectordb/couchbase/couchbase.py +6 -2
  350. agno/vectordb/lancedb/lance_db.py +7 -38
  351. agno/vectordb/lightrag/lightrag.py +7 -6
  352. agno/vectordb/milvus/milvus.py +118 -84
  353. agno/vectordb/mongodb/__init__.py +2 -1
  354. agno/vectordb/mongodb/mongodb.py +14 -31
  355. agno/vectordb/pgvector/pgvector.py +120 -66
  356. agno/vectordb/pineconedb/pineconedb.py +2 -19
  357. agno/vectordb/qdrant/__init__.py +2 -1
  358. agno/vectordb/qdrant/qdrant.py +33 -56
  359. agno/vectordb/redis/__init__.py +2 -1
  360. agno/vectordb/redis/redisdb.py +19 -31
  361. agno/vectordb/singlestore/singlestore.py +17 -9
  362. agno/vectordb/surrealdb/surrealdb.py +2 -38
  363. agno/vectordb/weaviate/__init__.py +2 -1
  364. agno/vectordb/weaviate/weaviate.py +7 -3
  365. agno/workflow/__init__.py +5 -1
  366. agno/workflow/agent.py +2 -2
  367. agno/workflow/condition.py +12 -10
  368. agno/workflow/loop.py +28 -9
  369. agno/workflow/parallel.py +21 -13
  370. agno/workflow/remote.py +362 -0
  371. agno/workflow/router.py +12 -9
  372. agno/workflow/step.py +261 -36
  373. agno/workflow/steps.py +12 -8
  374. agno/workflow/types.py +40 -77
  375. agno/workflow/workflow.py +939 -213
  376. {agno-2.2.13.dist-info → agno-2.4.3.dist-info}/METADATA +134 -181
  377. agno-2.4.3.dist-info/RECORD +677 -0
  378. {agno-2.2.13.dist-info → agno-2.4.3.dist-info}/WHEEL +1 -1
  379. agno/tools/googlesearch.py +0 -98
  380. agno/tools/memori.py +0 -339
  381. agno-2.2.13.dist-info/RECORD +0 -575
  382. {agno-2.2.13.dist-info → agno-2.4.3.dist-info}/licenses/LICENSE +0 -0
  383. {agno-2.2.13.dist-info → agno-2.4.3.dist-info}/top_level.txt +0 -0
@@ -2,20 +2,25 @@ import logging
2
2
  from copy import deepcopy
3
3
  from typing import List, Optional, Union, cast
4
4
 
5
- from fastapi import APIRouter, Depends, HTTPException, Query
5
+ from fastapi import APIRouter, Depends, HTTPException, Query, Request
6
6
 
7
- from agno.agent.agent import Agent
7
+ from agno.agent import Agent, RemoteAgent
8
8
  from agno.db.base import AsyncBaseDb, BaseDb
9
9
  from agno.db.schemas.evals import EvalFilterType, EvalType
10
10
  from agno.models.utils import get_model
11
- from agno.os.auth import get_authentication_dependency
11
+ from agno.os.auth import get_auth_token_from_request, get_authentication_dependency
12
12
  from agno.os.routers.evals.schemas import (
13
13
  DeleteEvalRunsRequest,
14
14
  EvalRunInput,
15
15
  EvalSchema,
16
16
  UpdateEvalRunRequest,
17
17
  )
18
- from agno.os.routers.evals.utils import run_accuracy_eval, run_performance_eval, run_reliability_eval
18
+ from agno.os.routers.evals.utils import (
19
+ run_accuracy_eval,
20
+ run_agent_as_judge_eval,
21
+ run_performance_eval,
22
+ run_reliability_eval,
23
+ )
19
24
  from agno.os.schema import (
20
25
  BadRequestResponse,
21
26
  InternalServerErrorResponse,
@@ -28,15 +33,17 @@ from agno.os.schema import (
28
33
  )
29
34
  from agno.os.settings import AgnoAPISettings
30
35
  from agno.os.utils import get_agent_by_id, get_db, get_team_by_id
31
- from agno.team.team import Team
36
+ from agno.remote.base import RemoteDb
37
+ from agno.team import RemoteTeam, Team
38
+ from agno.utils.log import log_warning
32
39
 
33
40
  logger = logging.getLogger(__name__)
34
41
 
35
42
 
36
43
  def get_eval_router(
37
- dbs: dict[str, list[Union[BaseDb, AsyncBaseDb]]],
38
- agents: Optional[List[Agent]] = None,
39
- teams: Optional[List[Team]] = None,
44
+ dbs: dict[str, list[Union[BaseDb, AsyncBaseDb, RemoteDb]]],
45
+ agents: Optional[List[Union[Agent, RemoteAgent]]] = None,
46
+ teams: Optional[List[Union[Team, RemoteTeam]]] = None,
40
47
  settings: AgnoAPISettings = AgnoAPISettings(),
41
48
  ) -> APIRouter:
42
49
  """Create eval router with comprehensive OpenAPI documentation for agent/team evaluation endpoints."""
@@ -56,9 +63,9 @@ def get_eval_router(
56
63
 
57
64
  def attach_routes(
58
65
  router: APIRouter,
59
- dbs: dict[str, list[Union[BaseDb, AsyncBaseDb]]],
60
- agents: Optional[List[Agent]] = None,
61
- teams: Optional[List[Team]] = None,
66
+ dbs: dict[str, list[Union[BaseDb, AsyncBaseDb, RemoteDb]]],
67
+ agents: Optional[List[Union[Agent, RemoteAgent]]] = None,
68
+ teams: Optional[List[Union[Team, RemoteTeam]]] = None,
62
69
  ) -> APIRouter:
63
70
  @router.get(
64
71
  "/eval-runs",
@@ -104,14 +111,15 @@ def attach_routes(
104
111
  },
105
112
  )
106
113
  async def get_eval_runs(
114
+ request: Request,
107
115
  agent_id: Optional[str] = Query(default=None, description="Agent ID"),
108
116
  team_id: Optional[str] = Query(default=None, description="Team ID"),
109
117
  workflow_id: Optional[str] = Query(default=None, description="Workflow ID"),
110
118
  model_id: Optional[str] = Query(default=None, description="Model ID"),
111
119
  filter_type: Optional[EvalFilterType] = Query(default=None, description="Filter type", alias="type"),
112
120
  eval_types: Optional[List[EvalType]] = Depends(parse_eval_types_filter),
113
- limit: Optional[int] = Query(default=20, description="Number of eval runs to return"),
114
- page: Optional[int] = Query(default=1, description="Page number"),
121
+ limit: Optional[int] = Query(default=20, description="Number of eval runs to return", ge=1),
122
+ page: Optional[int] = Query(default=1, description="Page number", ge=0),
115
123
  sort_by: Optional[str] = Query(default="created_at", description="Field to sort by"),
116
124
  sort_order: Optional[SortOrder] = Query(default="desc", description="Sort order (asc or desc)"),
117
125
  db_id: Optional[str] = Query(default=None, description="The ID of the database to use"),
@@ -119,6 +127,23 @@ def attach_routes(
119
127
  ) -> PaginatedResponse[EvalSchema]:
120
128
  db = await get_db(dbs, db_id, table)
121
129
 
130
+ if isinstance(db, RemoteDb):
131
+ auth_token = get_auth_token_from_request(request)
132
+ headers = {"Authorization": f"Bearer {auth_token}"} if auth_token else None
133
+ return await db.get_eval_runs(
134
+ limit=limit,
135
+ page=page,
136
+ sort_by=sort_by,
137
+ sort_order=sort_order.value if sort_order else None,
138
+ agent_id=agent_id,
139
+ team_id=team_id,
140
+ workflow_id=workflow_id,
141
+ model_id=model_id,
142
+ eval_types=eval_types,
143
+ filter_type=filter_type.value if filter_type else None,
144
+ headers=headers,
145
+ )
146
+
122
147
  if isinstance(db, AsyncBaseDb):
123
148
  db = cast(AsyncBaseDb, db)
124
149
  eval_runs, total_count = await db.get_eval_runs(
@@ -197,11 +222,17 @@ def attach_routes(
197
222
  },
198
223
  )
199
224
  async def get_eval_run(
225
+ request: Request,
200
226
  eval_run_id: str,
201
227
  db_id: Optional[str] = Query(default=None, description="The ID of the database to use"),
202
228
  table: Optional[str] = Query(default=None, description="Table to query eval run from"),
203
229
  ) -> EvalSchema:
204
230
  db = await get_db(dbs, db_id, table)
231
+ if isinstance(db, RemoteDb):
232
+ auth_token = get_auth_token_from_request(request)
233
+ headers = {"Authorization": f"Bearer {auth_token}"} if auth_token else None
234
+ return await db.get_eval_run(eval_run_id=eval_run_id, db_id=db_id, table=table, headers=headers)
235
+
205
236
  if isinstance(db, AsyncBaseDb):
206
237
  db = cast(AsyncBaseDb, db)
207
238
  eval_run = await db.get_eval_run(eval_run_id=eval_run_id, deserialize=False)
@@ -224,12 +255,20 @@ def attach_routes(
224
255
  },
225
256
  )
226
257
  async def delete_eval_runs(
258
+ http_request: Request,
227
259
  request: DeleteEvalRunsRequest,
228
260
  db_id: Optional[str] = Query(default=None, description="Database ID to use for deletion"),
229
261
  table: Optional[str] = Query(default=None, description="Table to use for deletion"),
230
262
  ) -> None:
231
263
  try:
232
264
  db = await get_db(dbs, db_id, table)
265
+ if isinstance(db, RemoteDb):
266
+ auth_token = get_auth_token_from_request(http_request)
267
+ headers = {"Authorization": f"Bearer {auth_token}"} if auth_token else None
268
+ return await db.delete_eval_runs(
269
+ eval_run_ids=request.eval_run_ids, db_id=db_id, table=table, headers=headers
270
+ )
271
+
233
272
  if isinstance(db, AsyncBaseDb):
234
273
  db = cast(AsyncBaseDb, db)
235
274
  await db.delete_eval_runs(eval_run_ids=request.eval_run_ids)
@@ -277,6 +316,7 @@ def attach_routes(
277
316
  },
278
317
  )
279
318
  async def update_eval_run(
319
+ http_request: Request,
280
320
  eval_run_id: str,
281
321
  request: UpdateEvalRunRequest,
282
322
  db_id: Optional[str] = Query(default=None, description="The ID of the database to use"),
@@ -284,6 +324,13 @@ def attach_routes(
284
324
  ) -> EvalSchema:
285
325
  try:
286
326
  db = await get_db(dbs, db_id, table)
327
+ if isinstance(db, RemoteDb):
328
+ auth_token = get_auth_token_from_request(http_request)
329
+ headers = {"Authorization": f"Bearer {auth_token}"} if auth_token else None
330
+ return await db.update_eval_run(
331
+ eval_run_id=eval_run_id, name=request.name, db_id=db_id, table=table, headers=headers
332
+ )
333
+
287
334
  if isinstance(db, AsyncBaseDb):
288
335
  db = cast(AsyncBaseDb, db)
289
336
  eval_run = await db.rename_eval_run(eval_run_id=eval_run_id, name=request.name, deserialize=False)
@@ -304,7 +351,7 @@ def attach_routes(
304
351
  operation_id="run_eval",
305
352
  summary="Execute Evaluation",
306
353
  description=(
307
- "Run evaluation tests on agents or teams. Supports accuracy, performance, and reliability evaluations. "
354
+ "Run evaluation tests on agents or teams. Supports accuracy, agent-as-judge, performance, and reliability evaluations. "
308
355
  "Requires either agent_id or team_id, but not both."
309
356
  ),
310
357
  responses={
@@ -338,11 +385,29 @@ def attach_routes(
338
385
  },
339
386
  )
340
387
  async def run_eval(
388
+ request: Request,
341
389
  eval_run_input: EvalRunInput,
342
390
  db_id: Optional[str] = Query(default=None, description="Database ID to use for evaluation"),
343
391
  table: Optional[str] = Query(default=None, description="Table to use for evaluation"),
344
392
  ) -> Optional[EvalSchema]:
345
393
  db = await get_db(dbs, db_id, table)
394
+ if isinstance(db, RemoteDb):
395
+ auth_token = get_auth_token_from_request(request)
396
+ headers = {"Authorization": f"Bearer {auth_token}"} if auth_token else None
397
+ return await db.create_eval_run(
398
+ eval_type=eval_run_input.eval_type,
399
+ input_text=eval_run_input.input,
400
+ agent_id=eval_run_input.agent_id,
401
+ team_id=eval_run_input.team_id,
402
+ model_id=eval_run_input.model_id,
403
+ model_provider=eval_run_input.model_provider,
404
+ expected_output=eval_run_input.expected_output,
405
+ expected_tool_calls=eval_run_input.expected_tool_calls,
406
+ num_iterations=eval_run_input.num_iterations,
407
+ db_id=db_id,
408
+ table=table,
409
+ headers=headers,
410
+ )
346
411
 
347
412
  if eval_run_input.agent_id and eval_run_input.team_id:
348
413
  raise HTTPException(status_code=400, detail="Only one of agent_id or team_id must be provided")
@@ -351,6 +416,9 @@ def attach_routes(
351
416
  agent = get_agent_by_id(agent_id=eval_run_input.agent_id, agents=agents)
352
417
  if not agent:
353
418
  raise HTTPException(status_code=404, detail=f"Agent with id '{eval_run_input.agent_id}' not found")
419
+ if isinstance(agent, RemoteAgent):
420
+ log_warning("Evaluation against remote agents are not supported yet")
421
+ return None
354
422
 
355
423
  default_model = None
356
424
  if (
@@ -373,7 +441,11 @@ def attach_routes(
373
441
  team = get_team_by_id(team_id=eval_run_input.team_id, teams=teams)
374
442
  if not team:
375
443
  raise HTTPException(status_code=404, detail=f"Team with id '{eval_run_input.team_id}' not found")
444
+ if isinstance(team, RemoteTeam):
445
+ log_warning("Evaluation against remote teams are not supported yet")
446
+ return None
376
447
 
448
+ # If model_id/model_provider specified, override team's model temporarily
377
449
  default_model = None
378
450
  if (
379
451
  hasattr(team, "model")
@@ -381,13 +453,13 @@ def attach_routes(
381
453
  and eval_run_input.model_id is not None
382
454
  and eval_run_input.model_provider is not None
383
455
  ):
384
- default_model = deepcopy(team.model)
456
+ default_model = deepcopy(team.model) # Save original
385
457
  if eval_run_input.model_id != team.model.id or eval_run_input.model_provider != team.model.provider:
386
458
  model_provider = eval_run_input.model_provider.lower()
387
459
  model_id = eval_run_input.model_id.lower()
388
460
  model_string = f"{model_provider}:{model_id}"
389
461
  model = get_model(model_string)
390
- team.model = model
462
+ team.model = model # Override temporarily
391
463
 
392
464
  agent = None
393
465
 
@@ -396,16 +468,37 @@ def attach_routes(
396
468
 
397
469
  # Run the evaluation
398
470
  if eval_run_input.eval_type == EvalType.ACCURACY:
471
+ if isinstance(agent, RemoteAgent) or isinstance(team, RemoteTeam):
472
+ # TODO: Handle remote evaluation
473
+ log_warning("Evaluation against remote agents are not supported yet")
474
+ return None
399
475
  return await run_accuracy_eval(
400
476
  eval_run_input=eval_run_input, db=db, agent=agent, team=team, default_model=default_model
401
477
  )
402
478
 
479
+ elif eval_run_input.eval_type == EvalType.AGENT_AS_JUDGE:
480
+ return await run_agent_as_judge_eval(
481
+ eval_run_input=eval_run_input,
482
+ db=db,
483
+ agent=agent,
484
+ team=team,
485
+ default_model=default_model, # type: ignore
486
+ )
487
+
403
488
  elif eval_run_input.eval_type == EvalType.PERFORMANCE:
489
+ if isinstance(agent, RemoteAgent) or isinstance(team, RemoteTeam):
490
+ # TODO: Handle remote evaluation
491
+ log_warning("Evaluation against remote agents are not supported yet")
492
+ return None
404
493
  return await run_performance_eval(
405
494
  eval_run_input=eval_run_input, db=db, agent=agent, team=team, default_model=default_model
406
495
  )
407
496
 
408
497
  else:
498
+ if isinstance(agent, RemoteAgent) or isinstance(team, RemoteTeam):
499
+ # TODO: Handle remote evaluation
500
+ log_warning("Evaluation against remote agents are not supported yet")
501
+ return None
409
502
  return await run_reliability_eval(
410
503
  eval_run_input=eval_run_input, db=db, agent=agent, team=team, default_model=default_model
411
504
  )
@@ -416,8 +509,8 @@ def attach_routes(
416
509
  def parse_eval_types_filter(
417
510
  eval_types: Optional[str] = Query(
418
511
  default=None,
419
- description="Comma-separated eval types (accuracy,performance,reliability)",
420
- examples=["accuracy,performance"],
512
+ description="Comma-separated eval types (accuracy,agent_as_judge,performance,reliability)",
513
+ examples=["accuracy,agent_as_judge,performance,reliability"],
421
514
  ),
422
515
  ) -> Optional[List[EvalType]]:
423
516
  """Parse comma-separated eval types into EvalType enums for filtering evaluation runs."""
@@ -1,14 +1,16 @@
1
1
  from dataclasses import asdict
2
- from datetime import datetime, timezone
3
- from typing import Any, Dict, List, Optional
2
+ from datetime import datetime
3
+ from typing import Any, Dict, List, Literal, Optional
4
4
 
5
5
  from pydantic import BaseModel, Field
6
6
 
7
7
  from agno.db.schemas.evals import EvalType
8
- from agno.eval import AccuracyResult, PerformanceResult, ReliabilityResult
8
+ from agno.eval import AccuracyResult, AgentAsJudgeResult, PerformanceResult, ReliabilityResult
9
9
  from agno.eval.accuracy import AccuracyEval
10
+ from agno.eval.agent_as_judge import AgentAsJudgeEval
10
11
  from agno.eval.performance import PerformanceEval
11
12
  from agno.eval.reliability import ReliabilityEval
13
+ from agno.os.utils import to_utc_datetime
12
14
 
13
15
 
14
16
  class EvalRunInput(BaseModel):
@@ -27,6 +29,15 @@ class EvalRunInput(BaseModel):
27
29
  # Accuracy eval specific fields
28
30
  expected_output: Optional[str] = Field(None, description="Expected output for accuracy evaluation")
29
31
 
32
+ # AgentAsJudge eval specific fields
33
+ criteria: Optional[str] = Field(None, description="Evaluation criteria for agent-as-judge evaluation")
34
+ scoring_strategy: Optional[Literal["numeric", "binary"]] = Field(
35
+ "binary", description="Scoring strategy: 'numeric' (1-10 with threshold) or 'binary' (PASS/FAIL)"
36
+ )
37
+ threshold: Optional[int] = Field(
38
+ 7, description="Score threshold for pass/fail (1-10), only used with numeric scoring", ge=1, le=10
39
+ )
40
+
30
41
  # Performance eval specific fields
31
42
  warmup_runs: int = Field(0, description="Number of warmup runs before measuring performance", ge=0, le=10)
32
43
 
@@ -64,8 +75,8 @@ class EvalSchema(BaseModel):
64
75
  eval_type=eval_run["eval_type"],
65
76
  eval_data=eval_run["eval_data"],
66
77
  eval_input=eval_run.get("eval_input"),
67
- created_at=datetime.fromtimestamp(eval_run["created_at"], tz=timezone.utc),
68
- updated_at=datetime.fromtimestamp(eval_run["updated_at"], tz=timezone.utc),
78
+ created_at=to_utc_datetime(eval_run.get("created_at")),
79
+ updated_at=to_utc_datetime(eval_run.get("updated_at")),
69
80
  )
70
81
 
71
82
  @classmethod
@@ -89,6 +100,28 @@ class EvalSchema(BaseModel):
89
100
  eval_data=asdict(result),
90
101
  )
91
102
 
103
+ @classmethod
104
+ def from_agent_as_judge_eval(
105
+ cls,
106
+ agent_as_judge_eval: AgentAsJudgeEval,
107
+ result: AgentAsJudgeResult,
108
+ model_id: Optional[str] = None,
109
+ model_provider: Optional[str] = None,
110
+ agent_id: Optional[str] = None,
111
+ team_id: Optional[str] = None,
112
+ ) -> "EvalSchema":
113
+ return cls(
114
+ id=result.run_id,
115
+ name=agent_as_judge_eval.name,
116
+ agent_id=agent_id,
117
+ team_id=team_id,
118
+ workflow_id=None,
119
+ model_id=model_id,
120
+ model_provider=model_provider,
121
+ eval_type=EvalType.AGENT_AS_JUDGE,
122
+ eval_data=asdict(result),
123
+ )
124
+
92
125
  @classmethod
93
126
  def from_performance_eval(
94
127
  cls,
@@ -2,14 +2,15 @@ from typing import Optional, Union
2
2
 
3
3
  from fastapi import HTTPException
4
4
 
5
- from agno.agent.agent import Agent
5
+ from agno.agent import Agent, RemoteAgent
6
6
  from agno.db.base import AsyncBaseDb, BaseDb
7
7
  from agno.eval.accuracy import AccuracyEval
8
+ from agno.eval.agent_as_judge import AgentAsJudgeEval
8
9
  from agno.eval.performance import PerformanceEval
9
10
  from agno.eval.reliability import ReliabilityEval
10
11
  from agno.models.base import Model
11
12
  from agno.os.routers.evals.schemas import EvalRunInput, EvalSchema
12
- from agno.team.team import Team
13
+ from agno.team import RemoteTeam, Team
13
14
 
14
15
 
15
16
  async def run_accuracy_eval(
@@ -36,12 +37,13 @@ async def run_accuracy_eval(
36
37
  model=default_model,
37
38
  )
38
39
 
39
- result = accuracy_eval.run(print_results=False, print_summary=False)
40
+ result = await accuracy_eval.arun(print_results=False, print_summary=False)
40
41
  if not result:
41
42
  raise HTTPException(status_code=500, detail="Failed to run accuracy evaluation")
42
43
 
43
44
  eval_run = EvalSchema.from_accuracy_eval(accuracy_eval=accuracy_eval, result=result)
44
45
 
46
+ # Restore original model after eval
45
47
  if default_model is not None:
46
48
  if agent is not None:
47
49
  agent.model = default_model
@@ -51,6 +53,70 @@ async def run_accuracy_eval(
51
53
  return eval_run
52
54
 
53
55
 
56
+ async def run_agent_as_judge_eval(
57
+ eval_run_input: EvalRunInput,
58
+ db: Union[BaseDb, AsyncBaseDb],
59
+ agent: Optional[Union[Agent, RemoteAgent]] = None,
60
+ team: Optional[Union[Team, RemoteTeam]] = None,
61
+ default_model: Optional[Model] = None,
62
+ ) -> EvalSchema:
63
+ """Run an AgentAsJudge evaluation for the given agent or team"""
64
+ if not eval_run_input.criteria:
65
+ raise HTTPException(status_code=400, detail="criteria is required for agent-as-judge evaluation")
66
+
67
+ # Run agent/team to get output
68
+ if agent:
69
+ agent_response = await agent.arun(eval_run_input.input, stream=False)
70
+ output = str(agent_response.content) if agent_response.content else ""
71
+ agent_id = agent.id
72
+ team_id = None
73
+ elif team:
74
+ team_response = await team.arun(eval_run_input.input, stream=False)
75
+ output = str(team_response.content) if team_response.content else ""
76
+ agent_id = None
77
+ team_id = team.id
78
+ else:
79
+ raise HTTPException(status_code=400, detail="Either agent_id or team_id must be provided")
80
+
81
+ agent_as_judge_eval = AgentAsJudgeEval(
82
+ db=db,
83
+ criteria=eval_run_input.criteria,
84
+ scoring_strategy=eval_run_input.scoring_strategy or "binary",
85
+ threshold=eval_run_input.threshold or 7,
86
+ additional_guidelines=eval_run_input.additional_guidelines,
87
+ name=eval_run_input.name,
88
+ model=default_model,
89
+ )
90
+
91
+ result = await agent_as_judge_eval.arun(
92
+ input=eval_run_input.input, output=output, print_results=False, print_summary=False
93
+ )
94
+ if not result:
95
+ raise HTTPException(status_code=500, detail="Failed to run agent as judge evaluation")
96
+
97
+ # Use evaluator's model
98
+ eval_model_id = agent_as_judge_eval.model.id if agent_as_judge_eval.model is not None else None
99
+ eval_model_provider = agent_as_judge_eval.model.provider if agent_as_judge_eval.model is not None else None
100
+
101
+ eval_run = EvalSchema.from_agent_as_judge_eval(
102
+ agent_as_judge_eval=agent_as_judge_eval,
103
+ result=result,
104
+ agent_id=agent_id,
105
+ team_id=team_id,
106
+ model_id=eval_model_id,
107
+ model_provider=eval_model_provider,
108
+ )
109
+
110
+ # Restore original model after eval
111
+ if default_model is not None:
112
+ if agent is not None and isinstance(agent, Agent):
113
+ agent.model = default_model
114
+ elif team is not None and isinstance(team, Team):
115
+ team.model = default_model
116
+
117
+ return eval_run
118
+
119
+
54
120
  async def run_performance_eval(
55
121
  eval_run_input: EvalRunInput,
56
122
  db: Union[BaseDb, AsyncBaseDb],
@@ -61,16 +127,16 @@ async def run_performance_eval(
61
127
  """Run a performance evaluation for the given agent or team"""
62
128
  if agent:
63
129
 
64
- def run_component(): # type: ignore
65
- return agent.run(eval_run_input.input)
130
+ async def run_component(): # type: ignore
131
+ return await agent.arun(eval_run_input.input, stream=False)
66
132
 
67
133
  model_id = agent.model.id if agent and agent.model else None
68
134
  model_provider = agent.model.provider if agent and agent.model else None
69
135
 
70
136
  elif team:
71
137
 
72
- def run_component():
73
- return team.run(eval_run_input.input)
138
+ async def run_component(): # type: ignore
139
+ return await team.arun(eval_run_input.input, stream=False)
74
140
 
75
141
  model_id = team.model.id if team and team.model else None
76
142
  model_provider = team.model.provider if team and team.model else None
@@ -86,7 +152,8 @@ async def run_performance_eval(
86
152
  model_id=model_id,
87
153
  model_provider=model_provider,
88
154
  )
89
- result = performance_eval.run(print_results=False, print_summary=False)
155
+
156
+ result = await performance_eval.arun(print_results=False, print_summary=False)
90
157
  if not result:
91
158
  raise HTTPException(status_code=500, detail="Failed to run performance evaluation")
92
159
 
@@ -99,6 +166,7 @@ async def run_performance_eval(
99
166
  model_provider=model_provider,
100
167
  )
101
168
 
169
+ # Restore original model after eval
102
170
  if default_model is not None:
103
171
  if agent is not None:
104
172
  agent.model = default_model
@@ -120,7 +188,7 @@ async def run_reliability_eval(
120
188
  raise HTTPException(status_code=400, detail="expected_tool_calls is required for reliability evaluations")
121
189
 
122
190
  if agent:
123
- agent_response = agent.run(eval_run_input.input)
191
+ agent_response = await agent.arun(eval_run_input.input, stream=False)
124
192
  reliability_eval = ReliabilityEval(
125
193
  db=db,
126
194
  name=eval_run_input.name,
@@ -131,7 +199,7 @@ async def run_reliability_eval(
131
199
  model_provider = agent.model.provider if agent and agent.model else None
132
200
 
133
201
  elif team:
134
- team_response = team.run(eval_run_input.input)
202
+ team_response = await team.arun(eval_run_input.input, stream=False)
135
203
  reliability_eval = ReliabilityEval(
136
204
  db=db,
137
205
  name=eval_run_input.name,
@@ -141,7 +209,7 @@ async def run_reliability_eval(
141
209
  model_id = team.model.id if team and team.model else None
142
210
  model_provider = team.model.provider if team and team.model else None
143
211
 
144
- result = reliability_eval.run(print_results=False)
212
+ result = await reliability_eval.arun(print_results=False)
145
213
  if not result:
146
214
  raise HTTPException(status_code=500, detail="Failed to run reliability evaluation")
147
215
 
@@ -153,6 +221,7 @@ async def run_reliability_eval(
153
221
  model_provider=model_provider,
154
222
  )
155
223
 
224
+ # Restore original model after eval
156
225
  if default_model is not None:
157
226
  if agent is not None:
158
227
  agent.model = default_model
agno/os/routers/health.py CHANGED
@@ -8,7 +8,7 @@ from agno.os.schema import HealthResponse
8
8
  def get_health_router(health_endpoint: str = "/health") -> APIRouter:
9
9
  router = APIRouter(tags=["Health"])
10
10
 
11
- started_time_stamp = datetime.now(timezone.utc).timestamp()
11
+ started_at = datetime.now(timezone.utc)
12
12
 
13
13
  @router.get(
14
14
  health_endpoint,
@@ -20,12 +20,12 @@ def get_health_router(health_endpoint: str = "/health") -> APIRouter:
20
20
  200: {
21
21
  "description": "API is healthy and operational",
22
22
  "content": {
23
- "application/json": {"example": {"status": "ok", "instantiated_at": str(started_time_stamp)}}
23
+ "application/json": {"example": {"status": "ok", "instantiated_at": "2025-06-10T12:00:00Z"}}
24
24
  },
25
25
  }
26
26
  },
27
27
  )
28
28
  async def health_check() -> HealthResponse:
29
- return HealthResponse(status="ok", instantiated_at=str(started_time_stamp))
29
+ return HealthResponse(status="ok", instantiated_at=started_at)
30
30
 
31
31
  return router