agno 2.1.2__py3-none-any.whl → 2.3.13__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (314) hide show
  1. agno/agent/agent.py +5540 -2273
  2. agno/api/api.py +2 -0
  3. agno/api/os.py +1 -1
  4. agno/compression/__init__.py +3 -0
  5. agno/compression/manager.py +247 -0
  6. agno/culture/__init__.py +3 -0
  7. agno/culture/manager.py +956 -0
  8. agno/db/async_postgres/__init__.py +3 -0
  9. agno/db/base.py +689 -6
  10. agno/db/dynamo/dynamo.py +933 -37
  11. agno/db/dynamo/schemas.py +174 -10
  12. agno/db/dynamo/utils.py +63 -4
  13. agno/db/firestore/firestore.py +831 -9
  14. agno/db/firestore/schemas.py +51 -0
  15. agno/db/firestore/utils.py +102 -4
  16. agno/db/gcs_json/gcs_json_db.py +660 -12
  17. agno/db/gcs_json/utils.py +60 -26
  18. agno/db/in_memory/in_memory_db.py +287 -14
  19. agno/db/in_memory/utils.py +60 -2
  20. agno/db/json/json_db.py +590 -14
  21. agno/db/json/utils.py +60 -26
  22. agno/db/migrations/manager.py +199 -0
  23. agno/db/migrations/v1_to_v2.py +43 -13
  24. agno/db/migrations/versions/__init__.py +0 -0
  25. agno/db/migrations/versions/v2_3_0.py +938 -0
  26. agno/db/mongo/__init__.py +15 -1
  27. agno/db/mongo/async_mongo.py +2760 -0
  28. agno/db/mongo/mongo.py +879 -11
  29. agno/db/mongo/schemas.py +42 -0
  30. agno/db/mongo/utils.py +80 -8
  31. agno/db/mysql/__init__.py +2 -1
  32. agno/db/mysql/async_mysql.py +2912 -0
  33. agno/db/mysql/mysql.py +946 -68
  34. agno/db/mysql/schemas.py +72 -10
  35. agno/db/mysql/utils.py +198 -7
  36. agno/db/postgres/__init__.py +2 -1
  37. agno/db/postgres/async_postgres.py +2579 -0
  38. agno/db/postgres/postgres.py +942 -57
  39. agno/db/postgres/schemas.py +81 -18
  40. agno/db/postgres/utils.py +164 -2
  41. agno/db/redis/redis.py +671 -7
  42. agno/db/redis/schemas.py +50 -0
  43. agno/db/redis/utils.py +65 -7
  44. agno/db/schemas/__init__.py +2 -1
  45. agno/db/schemas/culture.py +120 -0
  46. agno/db/schemas/evals.py +1 -0
  47. agno/db/schemas/memory.py +17 -2
  48. agno/db/singlestore/schemas.py +63 -0
  49. agno/db/singlestore/singlestore.py +949 -83
  50. agno/db/singlestore/utils.py +60 -2
  51. agno/db/sqlite/__init__.py +2 -1
  52. agno/db/sqlite/async_sqlite.py +2911 -0
  53. agno/db/sqlite/schemas.py +62 -0
  54. agno/db/sqlite/sqlite.py +965 -46
  55. agno/db/sqlite/utils.py +169 -8
  56. agno/db/surrealdb/__init__.py +3 -0
  57. agno/db/surrealdb/metrics.py +292 -0
  58. agno/db/surrealdb/models.py +334 -0
  59. agno/db/surrealdb/queries.py +71 -0
  60. agno/db/surrealdb/surrealdb.py +1908 -0
  61. agno/db/surrealdb/utils.py +147 -0
  62. agno/db/utils.py +2 -0
  63. agno/eval/__init__.py +10 -0
  64. agno/eval/accuracy.py +75 -55
  65. agno/eval/agent_as_judge.py +861 -0
  66. agno/eval/base.py +29 -0
  67. agno/eval/performance.py +16 -7
  68. agno/eval/reliability.py +28 -16
  69. agno/eval/utils.py +35 -17
  70. agno/exceptions.py +27 -2
  71. agno/filters.py +354 -0
  72. agno/guardrails/prompt_injection.py +1 -0
  73. agno/hooks/__init__.py +3 -0
  74. agno/hooks/decorator.py +164 -0
  75. agno/integrations/discord/client.py +1 -1
  76. agno/knowledge/chunking/agentic.py +13 -10
  77. agno/knowledge/chunking/fixed.py +4 -1
  78. agno/knowledge/chunking/semantic.py +9 -4
  79. agno/knowledge/chunking/strategy.py +59 -15
  80. agno/knowledge/embedder/fastembed.py +1 -1
  81. agno/knowledge/embedder/nebius.py +1 -1
  82. agno/knowledge/embedder/ollama.py +8 -0
  83. agno/knowledge/embedder/openai.py +8 -8
  84. agno/knowledge/embedder/sentence_transformer.py +6 -2
  85. agno/knowledge/embedder/vllm.py +262 -0
  86. agno/knowledge/knowledge.py +1618 -318
  87. agno/knowledge/reader/base.py +6 -2
  88. agno/knowledge/reader/csv_reader.py +8 -10
  89. agno/knowledge/reader/docx_reader.py +5 -6
  90. agno/knowledge/reader/field_labeled_csv_reader.py +16 -20
  91. agno/knowledge/reader/json_reader.py +5 -4
  92. agno/knowledge/reader/markdown_reader.py +8 -8
  93. agno/knowledge/reader/pdf_reader.py +17 -19
  94. agno/knowledge/reader/pptx_reader.py +101 -0
  95. agno/knowledge/reader/reader_factory.py +32 -3
  96. agno/knowledge/reader/s3_reader.py +3 -3
  97. agno/knowledge/reader/tavily_reader.py +193 -0
  98. agno/knowledge/reader/text_reader.py +22 -10
  99. agno/knowledge/reader/web_search_reader.py +1 -48
  100. agno/knowledge/reader/website_reader.py +10 -10
  101. agno/knowledge/reader/wikipedia_reader.py +33 -1
  102. agno/knowledge/types.py +1 -0
  103. agno/knowledge/utils.py +72 -7
  104. agno/media.py +22 -6
  105. agno/memory/__init__.py +14 -1
  106. agno/memory/manager.py +544 -83
  107. agno/memory/strategies/__init__.py +15 -0
  108. agno/memory/strategies/base.py +66 -0
  109. agno/memory/strategies/summarize.py +196 -0
  110. agno/memory/strategies/types.py +37 -0
  111. agno/models/aimlapi/aimlapi.py +17 -0
  112. agno/models/anthropic/claude.py +515 -40
  113. agno/models/aws/bedrock.py +102 -21
  114. agno/models/aws/claude.py +131 -274
  115. agno/models/azure/ai_foundry.py +41 -19
  116. agno/models/azure/openai_chat.py +39 -8
  117. agno/models/base.py +1249 -525
  118. agno/models/cerebras/cerebras.py +91 -21
  119. agno/models/cerebras/cerebras_openai.py +21 -2
  120. agno/models/cohere/chat.py +40 -6
  121. agno/models/cometapi/cometapi.py +18 -1
  122. agno/models/dashscope/dashscope.py +2 -3
  123. agno/models/deepinfra/deepinfra.py +18 -1
  124. agno/models/deepseek/deepseek.py +69 -3
  125. agno/models/fireworks/fireworks.py +18 -1
  126. agno/models/google/gemini.py +877 -80
  127. agno/models/google/utils.py +22 -0
  128. agno/models/groq/groq.py +51 -18
  129. agno/models/huggingface/huggingface.py +17 -6
  130. agno/models/ibm/watsonx.py +16 -6
  131. agno/models/internlm/internlm.py +18 -1
  132. agno/models/langdb/langdb.py +13 -1
  133. agno/models/litellm/chat.py +44 -9
  134. agno/models/litellm/litellm_openai.py +18 -1
  135. agno/models/message.py +28 -5
  136. agno/models/meta/llama.py +47 -14
  137. agno/models/meta/llama_openai.py +22 -17
  138. agno/models/mistral/mistral.py +8 -4
  139. agno/models/nebius/nebius.py +6 -7
  140. agno/models/nvidia/nvidia.py +20 -3
  141. agno/models/ollama/chat.py +24 -8
  142. agno/models/openai/chat.py +104 -29
  143. agno/models/openai/responses.py +101 -81
  144. agno/models/openrouter/openrouter.py +60 -3
  145. agno/models/perplexity/perplexity.py +17 -1
  146. agno/models/portkey/portkey.py +7 -6
  147. agno/models/requesty/requesty.py +24 -4
  148. agno/models/response.py +73 -2
  149. agno/models/sambanova/sambanova.py +20 -3
  150. agno/models/siliconflow/siliconflow.py +19 -2
  151. agno/models/together/together.py +20 -3
  152. agno/models/utils.py +254 -8
  153. agno/models/vercel/v0.py +20 -3
  154. agno/models/vertexai/__init__.py +0 -0
  155. agno/models/vertexai/claude.py +190 -0
  156. agno/models/vllm/vllm.py +19 -14
  157. agno/models/xai/xai.py +19 -2
  158. agno/os/app.py +549 -152
  159. agno/os/auth.py +190 -3
  160. agno/os/config.py +23 -0
  161. agno/os/interfaces/a2a/router.py +8 -11
  162. agno/os/interfaces/a2a/utils.py +1 -1
  163. agno/os/interfaces/agui/router.py +18 -3
  164. agno/os/interfaces/agui/utils.py +152 -39
  165. agno/os/interfaces/slack/router.py +55 -37
  166. agno/os/interfaces/slack/slack.py +9 -1
  167. agno/os/interfaces/whatsapp/router.py +0 -1
  168. agno/os/interfaces/whatsapp/security.py +3 -1
  169. agno/os/mcp.py +110 -52
  170. agno/os/middleware/__init__.py +2 -0
  171. agno/os/middleware/jwt.py +676 -112
  172. agno/os/router.py +40 -1478
  173. agno/os/routers/agents/__init__.py +3 -0
  174. agno/os/routers/agents/router.py +599 -0
  175. agno/os/routers/agents/schema.py +261 -0
  176. agno/os/routers/evals/evals.py +96 -39
  177. agno/os/routers/evals/schemas.py +65 -33
  178. agno/os/routers/evals/utils.py +80 -10
  179. agno/os/routers/health.py +10 -4
  180. agno/os/routers/knowledge/knowledge.py +196 -38
  181. agno/os/routers/knowledge/schemas.py +82 -22
  182. agno/os/routers/memory/memory.py +279 -52
  183. agno/os/routers/memory/schemas.py +46 -17
  184. agno/os/routers/metrics/metrics.py +20 -8
  185. agno/os/routers/metrics/schemas.py +16 -16
  186. agno/os/routers/session/session.py +462 -34
  187. agno/os/routers/teams/__init__.py +3 -0
  188. agno/os/routers/teams/router.py +512 -0
  189. agno/os/routers/teams/schema.py +257 -0
  190. agno/os/routers/traces/__init__.py +3 -0
  191. agno/os/routers/traces/schemas.py +414 -0
  192. agno/os/routers/traces/traces.py +499 -0
  193. agno/os/routers/workflows/__init__.py +3 -0
  194. agno/os/routers/workflows/router.py +624 -0
  195. agno/os/routers/workflows/schema.py +75 -0
  196. agno/os/schema.py +256 -693
  197. agno/os/scopes.py +469 -0
  198. agno/os/utils.py +514 -36
  199. agno/reasoning/anthropic.py +80 -0
  200. agno/reasoning/gemini.py +73 -0
  201. agno/reasoning/openai.py +5 -0
  202. agno/reasoning/vertexai.py +76 -0
  203. agno/run/__init__.py +6 -0
  204. agno/run/agent.py +155 -32
  205. agno/run/base.py +55 -3
  206. agno/run/requirement.py +181 -0
  207. agno/run/team.py +125 -38
  208. agno/run/workflow.py +72 -18
  209. agno/session/agent.py +102 -89
  210. agno/session/summary.py +56 -15
  211. agno/session/team.py +164 -90
  212. agno/session/workflow.py +405 -40
  213. agno/table.py +10 -0
  214. agno/team/team.py +3974 -1903
  215. agno/tools/dalle.py +2 -4
  216. agno/tools/eleven_labs.py +23 -25
  217. agno/tools/exa.py +21 -16
  218. agno/tools/file.py +153 -23
  219. agno/tools/file_generation.py +16 -10
  220. agno/tools/firecrawl.py +15 -7
  221. agno/tools/function.py +193 -38
  222. agno/tools/gmail.py +238 -14
  223. agno/tools/google_drive.py +271 -0
  224. agno/tools/googlecalendar.py +36 -8
  225. agno/tools/googlesheets.py +20 -5
  226. agno/tools/jira.py +20 -0
  227. agno/tools/mcp/__init__.py +10 -0
  228. agno/tools/mcp/mcp.py +331 -0
  229. agno/tools/mcp/multi_mcp.py +347 -0
  230. agno/tools/mcp/params.py +24 -0
  231. agno/tools/mcp_toolbox.py +3 -3
  232. agno/tools/models/nebius.py +5 -5
  233. agno/tools/models_labs.py +20 -10
  234. agno/tools/nano_banana.py +151 -0
  235. agno/tools/notion.py +204 -0
  236. agno/tools/parallel.py +314 -0
  237. agno/tools/postgres.py +76 -36
  238. agno/tools/redshift.py +406 -0
  239. agno/tools/scrapegraph.py +1 -1
  240. agno/tools/shopify.py +1519 -0
  241. agno/tools/slack.py +18 -3
  242. agno/tools/spotify.py +919 -0
  243. agno/tools/tavily.py +146 -0
  244. agno/tools/toolkit.py +25 -0
  245. agno/tools/workflow.py +8 -1
  246. agno/tools/yfinance.py +12 -11
  247. agno/tracing/__init__.py +12 -0
  248. agno/tracing/exporter.py +157 -0
  249. agno/tracing/schemas.py +276 -0
  250. agno/tracing/setup.py +111 -0
  251. agno/utils/agent.py +938 -0
  252. agno/utils/cryptography.py +22 -0
  253. agno/utils/dttm.py +33 -0
  254. agno/utils/events.py +151 -3
  255. agno/utils/gemini.py +15 -5
  256. agno/utils/hooks.py +118 -4
  257. agno/utils/http.py +113 -2
  258. agno/utils/knowledge.py +12 -5
  259. agno/utils/log.py +1 -0
  260. agno/utils/mcp.py +92 -2
  261. agno/utils/media.py +187 -1
  262. agno/utils/merge_dict.py +3 -3
  263. agno/utils/message.py +60 -0
  264. agno/utils/models/ai_foundry.py +9 -2
  265. agno/utils/models/claude.py +49 -14
  266. agno/utils/models/cohere.py +9 -2
  267. agno/utils/models/llama.py +9 -2
  268. agno/utils/models/mistral.py +4 -2
  269. agno/utils/print_response/agent.py +109 -16
  270. agno/utils/print_response/team.py +223 -30
  271. agno/utils/print_response/workflow.py +251 -34
  272. agno/utils/streamlit.py +1 -1
  273. agno/utils/team.py +98 -9
  274. agno/utils/tokens.py +657 -0
  275. agno/vectordb/base.py +39 -7
  276. agno/vectordb/cassandra/cassandra.py +21 -5
  277. agno/vectordb/chroma/chromadb.py +43 -12
  278. agno/vectordb/clickhouse/clickhousedb.py +21 -5
  279. agno/vectordb/couchbase/couchbase.py +29 -5
  280. agno/vectordb/lancedb/lance_db.py +92 -181
  281. agno/vectordb/langchaindb/langchaindb.py +24 -4
  282. agno/vectordb/lightrag/lightrag.py +17 -3
  283. agno/vectordb/llamaindex/llamaindexdb.py +25 -5
  284. agno/vectordb/milvus/milvus.py +50 -37
  285. agno/vectordb/mongodb/__init__.py +7 -1
  286. agno/vectordb/mongodb/mongodb.py +36 -30
  287. agno/vectordb/pgvector/pgvector.py +201 -77
  288. agno/vectordb/pineconedb/pineconedb.py +41 -23
  289. agno/vectordb/qdrant/qdrant.py +67 -54
  290. agno/vectordb/redis/__init__.py +9 -0
  291. agno/vectordb/redis/redisdb.py +682 -0
  292. agno/vectordb/singlestore/singlestore.py +50 -29
  293. agno/vectordb/surrealdb/surrealdb.py +31 -41
  294. agno/vectordb/upstashdb/upstashdb.py +34 -6
  295. agno/vectordb/weaviate/weaviate.py +53 -14
  296. agno/workflow/__init__.py +2 -0
  297. agno/workflow/agent.py +299 -0
  298. agno/workflow/condition.py +120 -18
  299. agno/workflow/loop.py +77 -10
  300. agno/workflow/parallel.py +231 -143
  301. agno/workflow/router.py +118 -17
  302. agno/workflow/step.py +609 -170
  303. agno/workflow/steps.py +73 -6
  304. agno/workflow/types.py +96 -21
  305. agno/workflow/workflow.py +2039 -262
  306. {agno-2.1.2.dist-info → agno-2.3.13.dist-info}/METADATA +201 -66
  307. agno-2.3.13.dist-info/RECORD +613 -0
  308. agno/tools/googlesearch.py +0 -98
  309. agno/tools/mcp.py +0 -679
  310. agno/tools/memori.py +0 -339
  311. agno-2.1.2.dist-info/RECORD +0 -543
  312. {agno-2.1.2.dist-info → agno-2.3.13.dist-info}/WHEEL +0 -0
  313. {agno-2.1.2.dist-info → agno-2.3.13.dist-info}/licenses/LICENSE +0 -0
  314. {agno-2.1.2.dist-info → agno-2.3.13.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,861 @@
1
+ from dataclasses import asdict, dataclass, field
2
+ from inspect import iscoroutinefunction
3
+ from os import getenv
4
+ from textwrap import dedent
5
+ from typing import TYPE_CHECKING, Any, Callable, Dict, List, Literal, Optional, Union
6
+ from uuid import uuid4
7
+
8
+ from pydantic import BaseModel, Field
9
+
10
+ from agno.agent import Agent
11
+ from agno.db.base import AsyncBaseDb, BaseDb
12
+ from agno.db.schemas.evals import EvalType
13
+ from agno.eval.base import BaseEval
14
+ from agno.eval.utils import async_log_eval, log_eval_run, store_result_in_file
15
+ from agno.exceptions import EvalError
16
+ from agno.models.base import Model
17
+ from agno.run.agent import RunInput, RunOutput
18
+ from agno.run.team import TeamRunInput, TeamRunOutput
19
+ from agno.utils.log import log_warning, logger, set_log_level_to_debug, set_log_level_to_info
20
+
21
+ if TYPE_CHECKING:
22
+ from rich.console import Console
23
+
24
+
25
+ class NumericJudgeResponse(BaseModel):
26
+ """Response schema for numeric scoring mode."""
27
+
28
+ score: int = Field(..., ge=1, le=10, description="Score between 1 and 10.")
29
+ reason: str = Field(..., description="Detailed reasoning for the evaluation.")
30
+
31
+
32
+ class BinaryJudgeResponse(BaseModel):
33
+ """Response schema for binary scoring mode."""
34
+
35
+ passed: bool = Field(..., description="Pass/fail result.")
36
+ reason: str = Field(..., description="Detailed reasoning for the evaluation.")
37
+
38
+
39
+ @dataclass
40
+ class AgentAsJudgeEvaluation:
41
+ """Result of a single agent-as-judge evaluation."""
42
+
43
+ input: str
44
+ output: str
45
+ criteria: str
46
+ score: Optional[int]
47
+ reason: str
48
+ passed: bool
49
+
50
+ def print_eval(self, console: Optional["Console"] = None):
51
+ from rich.box import ROUNDED
52
+ from rich.console import Console
53
+ from rich.markdown import Markdown
54
+ from rich.table import Table
55
+
56
+ if console is None:
57
+ console = Console()
58
+
59
+ status_style = "green" if self.passed else "red"
60
+ status_text = "PASSED" if self.passed else "FAILED"
61
+
62
+ results_table = Table(
63
+ box=ROUNDED,
64
+ border_style="blue",
65
+ show_header=False,
66
+ title="[ Agent As Judge Evaluation ]",
67
+ title_style="bold sky_blue1",
68
+ title_justify="center",
69
+ )
70
+ results_table.add_row("Input", self.input[:200] + "..." if len(self.input) > 200 else self.input)
71
+ results_table.add_row("Output", self.output[:200] + "..." if len(self.output) > 200 else self.output)
72
+ if self.score is not None:
73
+ results_table.add_row("Score", f"{self.score}/10")
74
+ results_table.add_row("Status", f"[{status_style}]{status_text}[/{status_style}]")
75
+ results_table.add_row("Reason", Markdown(self.reason))
76
+ console.print(results_table)
77
+
78
+
79
+ @dataclass
80
+ class AgentAsJudgeResult:
81
+ """Aggregated results from agent-as-judge evaluations."""
82
+
83
+ run_id: str
84
+ results: List[AgentAsJudgeEvaluation] = field(default_factory=list)
85
+ avg_score: Optional[float] = field(init=False)
86
+ min_score: Optional[float] = field(init=False)
87
+ max_score: Optional[float] = field(init=False)
88
+ std_dev_score: Optional[float] = field(init=False)
89
+ pass_rate: float = field(init=False)
90
+
91
+ def __post_init__(self):
92
+ self.compute_stats()
93
+
94
+ def compute_stats(self):
95
+ import statistics
96
+
97
+ if self.results and len(self.results) > 0:
98
+ passed = [r.passed for r in self.results]
99
+ self.pass_rate = sum(passed) / len(passed) * 100
100
+
101
+ # Compute score statistics only for numeric mode (where score is not None)
102
+ scores = [r.score for r in self.results if r.score is not None]
103
+ if scores:
104
+ self.avg_score = statistics.mean(scores)
105
+ self.min_score = min(scores)
106
+ self.max_score = max(scores)
107
+ self.std_dev_score = statistics.stdev(scores) if len(scores) > 1 else 0.0
108
+ else:
109
+ # Binary mode - no scores
110
+ self.avg_score = None
111
+ self.min_score = None
112
+ self.max_score = None
113
+ self.std_dev_score = None
114
+ else:
115
+ self.avg_score = None
116
+ self.min_score = None
117
+ self.max_score = None
118
+ self.std_dev_score = None
119
+ self.pass_rate = 0.0
120
+
121
+ def print_summary(self, console: Optional["Console"] = None):
122
+ from rich.box import ROUNDED
123
+ from rich.console import Console
124
+ from rich.table import Table
125
+
126
+ if console is None:
127
+ console = Console()
128
+
129
+ summary_table = Table(
130
+ box=ROUNDED,
131
+ border_style="blue",
132
+ show_header=False,
133
+ title="[ Agent As Judge Evaluation Summary ]",
134
+ title_style="bold sky_blue1",
135
+ title_justify="center",
136
+ padding=(0, 2), # Add horizontal padding to make table wider
137
+ min_width=45, # Ensure table is wide enough for title
138
+ )
139
+
140
+ num_results = len(self.results)
141
+ summary_table.add_row("Number of Evaluations", f"{num_results}")
142
+ summary_table.add_row("Pass Rate", f"{self.pass_rate:.1f}%")
143
+
144
+ # Only show score statistics for numeric mode (when scores exist)
145
+ if self.avg_score is not None:
146
+ # For single evaluation, show "Score" instead of statistics
147
+ if num_results == 1:
148
+ summary_table.add_row("Score", f"{self.avg_score:.2f}/10")
149
+ # For multiple evaluations, show full statistics
150
+ elif num_results > 1:
151
+ summary_table.add_row("Average Score", f"{self.avg_score:.2f}/10")
152
+ summary_table.add_row("Min Score", f"{self.min_score:.2f}/10")
153
+ summary_table.add_row("Max Score", f"{self.max_score:.2f}/10")
154
+ if self.std_dev_score and self.std_dev_score > 0:
155
+ summary_table.add_row("Std Deviation", f"{self.std_dev_score:.2f}")
156
+
157
+ console.print(summary_table)
158
+
159
+ def print_results(self, console: Optional["Console"] = None):
160
+ for result in self.results:
161
+ result.print_eval(console)
162
+
163
+
164
+ @dataclass
165
+ class AgentAsJudgeEval(BaseEval):
166
+ """Evaluate agent outputs using custom criteria with an LLM judge."""
167
+
168
+ # Core evaluation fields
169
+ criteria: str = ""
170
+ scoring_strategy: Literal["numeric", "binary"] = "binary"
171
+ threshold: int = 7 # Only used for numeric strategy
172
+ on_fail: Optional[Callable[["AgentAsJudgeEvaluation"], None]] = None
173
+ additional_guidelines: Optional[Union[str, List[str]]] = None
174
+
175
+ # Evaluation metadata
176
+ name: Optional[str] = None
177
+
178
+ # Model configuration
179
+ model: Optional[Model] = None
180
+ evaluator_agent: Optional[Agent] = None
181
+
182
+ # Output options
183
+ print_summary: bool = False
184
+ print_results: bool = False
185
+ file_path_to_save_results: Optional[str] = None
186
+ debug_mode: bool = getenv("AGNO_DEBUG", "false").lower() == "true"
187
+ db: Optional[Union[BaseDb, AsyncBaseDb]] = None
188
+ telemetry: bool = True
189
+ run_in_background: bool = False
190
+
191
+ def __post_init__(self):
192
+ """Validate scoring_strategy and threshold."""
193
+ if self.scoring_strategy == "numeric" and not 1 <= self.threshold <= 10:
194
+ raise ValueError(f"threshold must be between 1 and 10, got {self.threshold}")
195
+
196
+ def get_evaluator_agent(self) -> Agent:
197
+ """Return the evaluator agent. If not provided, build it based on the model and criteria."""
198
+ # Select response schema based on scoring strategy
199
+ response_schema = NumericJudgeResponse if self.scoring_strategy == "numeric" else BinaryJudgeResponse
200
+
201
+ if self.evaluator_agent is not None:
202
+ # Ensure custom evaluator has the required output_schema for structured responses
203
+ self.evaluator_agent.output_schema = response_schema
204
+ return self.evaluator_agent
205
+
206
+ model = self.model
207
+ if model is None:
208
+ try:
209
+ from agno.models.openai import OpenAIChat
210
+
211
+ model = OpenAIChat(id="gpt-5-mini")
212
+ except (ModuleNotFoundError, ImportError) as e:
213
+ logger.exception(e)
214
+ raise EvalError(
215
+ "Agno uses `openai` as the default model provider. Please run `pip install openai` to use the default evaluator."
216
+ )
217
+
218
+ # Build instructions based on scoring strategy
219
+ instructions_parts = ["## Criteria", self.criteria, ""]
220
+
221
+ if self.scoring_strategy == "numeric":
222
+ instructions_parts.extend(
223
+ [
224
+ "## Scoring (1-10)",
225
+ "- 1-2: Completely fails the criteria",
226
+ "- 3-4: Major issues",
227
+ "- 5-6: Partial success with significant issues",
228
+ "- 7-8: Mostly meets criteria with minor issues",
229
+ "- 9-10: Fully meets or exceeds criteria",
230
+ "",
231
+ "## Instructions",
232
+ "1. Carefully evaluate the output against the criteria above",
233
+ "2. Provide a score from 1-10",
234
+ "3. Provide detailed reasoning that references specific parts of the output",
235
+ ]
236
+ )
237
+ else: # binary
238
+ instructions_parts.extend(
239
+ [
240
+ "## Evaluation",
241
+ "Determine if the output PASSES or FAILS the criteria above.",
242
+ "",
243
+ "## Instructions",
244
+ "1. Carefully evaluate the output against the criteria above",
245
+ "2. Decide if it passes (true) or fails (false)",
246
+ "3. Provide detailed reasoning that references specific parts of the output",
247
+ ]
248
+ )
249
+
250
+ # Add additional guidelines if provided
251
+ if self.additional_guidelines:
252
+ instructions_parts.append("")
253
+ instructions_parts.append("## Additional Guidelines")
254
+ if isinstance(self.additional_guidelines, str):
255
+ instructions_parts.append(self.additional_guidelines)
256
+ else:
257
+ for guideline in self.additional_guidelines:
258
+ instructions_parts.append(f"- {guideline}")
259
+
260
+ # Add closing instruction
261
+ instructions_parts.append("")
262
+ instructions_parts.append("Be objective and thorough in your evaluation.")
263
+
264
+ return Agent(
265
+ model=model,
266
+ description="You are an expert evaluator. Score outputs objectively based on the provided criteria.",
267
+ instructions="\n".join(instructions_parts),
268
+ output_schema=response_schema,
269
+ )
270
+
271
+ def _evaluate(self, input: str, output: str, evaluator_agent: Agent) -> Optional[AgentAsJudgeEvaluation]:
272
+ """Evaluate a single input/output pair."""
273
+ try:
274
+ prompt = dedent(f"""\
275
+ <input>
276
+ {input}
277
+ </input>
278
+
279
+ <output>
280
+ {output}
281
+ </output>
282
+ """)
283
+
284
+ response = evaluator_agent.run(prompt, stream=False)
285
+ judge_response = response.content
286
+ if not isinstance(judge_response, (NumericJudgeResponse, BinaryJudgeResponse)):
287
+ raise EvalError(f"Invalid response: {judge_response}")
288
+
289
+ # Determine pass/fail based on scoring strategy and response type
290
+ if isinstance(judge_response, NumericJudgeResponse):
291
+ score = judge_response.score
292
+ passed = score >= self.threshold
293
+ else: # BinaryJudgeResponse
294
+ score = None
295
+ passed = judge_response.passed
296
+
297
+ evaluation = AgentAsJudgeEvaluation(
298
+ input=input,
299
+ output=output,
300
+ criteria=self.criteria,
301
+ score=score,
302
+ reason=judge_response.reason,
303
+ passed=passed,
304
+ )
305
+
306
+ # Trigger on_fail callback if evaluation failed
307
+ if not passed and self.on_fail:
308
+ try:
309
+ if iscoroutinefunction(self.on_fail):
310
+ log_warning(
311
+ f"Cannot use async on_fail callback with sync evaluation. Use arun() instead. Skipping callback: {self.on_fail.__name__}"
312
+ )
313
+ else:
314
+ self.on_fail(evaluation)
315
+ except Exception as e:
316
+ logger.warning(f"on_fail callback error: {e}")
317
+
318
+ return evaluation
319
+ except Exception as e:
320
+ logger.exception(f"Evaluation failed: {e}")
321
+ return None
322
+
323
+ async def _aevaluate(self, input: str, output: str, evaluator_agent: Agent) -> Optional[AgentAsJudgeEvaluation]:
324
+ """Evaluate a single input/output pair asynchronously."""
325
+ try:
326
+ prompt = dedent(f"""\
327
+ <input>
328
+ {input}
329
+ </input>
330
+
331
+ <output>
332
+ {output}
333
+ </output>
334
+ """)
335
+
336
+ response = await evaluator_agent.arun(prompt, stream=False)
337
+ judge_response = response.content
338
+ if not isinstance(judge_response, (NumericJudgeResponse, BinaryJudgeResponse)):
339
+ raise EvalError(f"Invalid response: {judge_response}")
340
+
341
+ # Determine pass/fail based on response type
342
+ if isinstance(judge_response, NumericJudgeResponse):
343
+ score = judge_response.score
344
+ passed = score >= self.threshold
345
+ else: # BinaryJudgeResponse
346
+ score = None
347
+ passed = judge_response.passed
348
+
349
+ evaluation = AgentAsJudgeEvaluation(
350
+ input=input,
351
+ output=output,
352
+ criteria=self.criteria,
353
+ score=score,
354
+ reason=judge_response.reason,
355
+ passed=passed,
356
+ )
357
+
358
+ # Trigger on_fail callback if evaluation failed
359
+ if not passed and self.on_fail:
360
+ try:
361
+ if iscoroutinefunction(self.on_fail):
362
+ await self.on_fail(evaluation)
363
+ else:
364
+ self.on_fail(evaluation)
365
+ except Exception as e:
366
+ logger.warning(f"on_fail callback error: {e}")
367
+
368
+ return evaluation
369
+ except Exception as e:
370
+ logger.exception(f"Async evaluation failed: {e}")
371
+ return None
372
+
373
+ def _log_eval_to_db(
374
+ self,
375
+ run_id: str,
376
+ result: AgentAsJudgeResult,
377
+ agent_id: Optional[str] = None,
378
+ model_id: Optional[str] = None,
379
+ model_provider: Optional[str] = None,
380
+ team_id: Optional[str] = None,
381
+ evaluated_component_name: Optional[str] = None,
382
+ ) -> None:
383
+ """Helper to log evaluation to database."""
384
+ if not self.db:
385
+ return
386
+
387
+ log_eval_run(
388
+ db=self.db, # type: ignore
389
+ run_id=run_id,
390
+ run_data=asdict(result),
391
+ eval_type=EvalType.AGENT_AS_JUDGE,
392
+ agent_id=agent_id,
393
+ model_id=model_id,
394
+ model_provider=model_provider,
395
+ name=self.name,
396
+ team_id=team_id,
397
+ evaluated_component_name=evaluated_component_name,
398
+ eval_input={
399
+ "criteria": self.criteria,
400
+ "scoring_strategy": self.scoring_strategy,
401
+ "threshold": self.threshold if self.scoring_strategy == "numeric" else None,
402
+ "additional_guidelines": self.additional_guidelines,
403
+ },
404
+ )
405
+
406
+ async def _async_log_eval_to_db(
407
+ self,
408
+ run_id: str,
409
+ result: AgentAsJudgeResult,
410
+ agent_id: Optional[str] = None,
411
+ model_id: Optional[str] = None,
412
+ model_provider: Optional[str] = None,
413
+ team_id: Optional[str] = None,
414
+ evaluated_component_name: Optional[str] = None,
415
+ ) -> None:
416
+ """Helper to log evaluation to database asynchronously."""
417
+ if not self.db:
418
+ return
419
+
420
+ await async_log_eval(
421
+ db=self.db,
422
+ run_id=run_id,
423
+ run_data=asdict(result),
424
+ eval_type=EvalType.AGENT_AS_JUDGE,
425
+ agent_id=agent_id,
426
+ model_id=model_id,
427
+ model_provider=model_provider,
428
+ name=self.name,
429
+ team_id=team_id,
430
+ evaluated_component_name=evaluated_component_name,
431
+ eval_input={
432
+ "criteria": self.criteria,
433
+ "scoring_strategy": self.scoring_strategy,
434
+ "threshold": self.threshold if self.scoring_strategy == "numeric" else None,
435
+ "additional_guidelines": self.additional_guidelines,
436
+ },
437
+ )
438
+
439
+ def run(
440
+ self,
441
+ *,
442
+ input: Optional[str] = None,
443
+ output: Optional[str] = None,
444
+ cases: Optional[List[Dict[str, str]]] = None,
445
+ print_summary: bool = False,
446
+ print_results: bool = False,
447
+ ) -> Optional[AgentAsJudgeResult]:
448
+ """Evaluate input/output against the criteria.
449
+
450
+ Supports both single evaluation and batch evaluation:
451
+
452
+ Args:
453
+ input: Input text for single evaluation
454
+ output: Output text for single evaluation
455
+ cases: List of input/output pairs for batch evaluation
456
+ print_summary: Whether to print summary
457
+ print_results: Whether to print detailed results
458
+ """
459
+ # Generate unique run_id for this execution
460
+ run_id = str(uuid4())
461
+
462
+ # Validate parameters
463
+ single_mode = input is not None or output is not None
464
+ batch_mode = cases is not None
465
+
466
+ if single_mode and batch_mode:
467
+ raise ValueError("Provide either (input, output) OR cases, not both")
468
+
469
+ if not single_mode and not batch_mode:
470
+ raise ValueError("Must provide either (input, output) OR cases")
471
+
472
+ # Batch mode if cases provided
473
+ if batch_mode and cases is not None:
474
+ return self._run_batch(cases=cases, run_id=run_id, print_summary=print_summary, print_results=print_results)
475
+
476
+ # Validate single mode has both input and output
477
+ if input is None or output is None:
478
+ raise ValueError("Both input and output are required for single evaluation")
479
+
480
+ # Single evaluation logic
481
+ from rich.console import Console
482
+ from rich.live import Live
483
+ from rich.status import Status
484
+
485
+ if isinstance(self.db, AsyncBaseDb):
486
+ raise ValueError("Use arun() with async DB.")
487
+
488
+ set_log_level_to_debug() if self.debug_mode else set_log_level_to_info()
489
+ result = AgentAsJudgeResult(run_id=run_id)
490
+
491
+ console = Console()
492
+ with Live(console=console, transient=True) as live_log:
493
+ evaluator = self.get_evaluator_agent()
494
+
495
+ status = Status("Running evaluation...", spinner="dots", speed=1.0, refresh_per_second=10)
496
+ live_log.update(status)
497
+
498
+ evaluation = self._evaluate(input=input, output=output, evaluator_agent=evaluator)
499
+
500
+ if evaluation:
501
+ result.results.append(evaluation)
502
+ result.compute_stats()
503
+
504
+ status.stop()
505
+
506
+ # Save result to file
507
+ if self.file_path_to_save_results:
508
+ store_result_in_file(
509
+ file_path=self.file_path_to_save_results,
510
+ result=result,
511
+ eval_id=run_id,
512
+ name=self.name,
513
+ )
514
+
515
+ # Print results
516
+ if self.print_results or print_results:
517
+ result.print_results(console)
518
+ if self.print_summary or print_summary:
519
+ result.print_summary(console)
520
+
521
+ # Log to DB
522
+ self._log_eval_to_db(run_id=run_id, result=result)
523
+
524
+ if self.telemetry:
525
+ from agno.api.evals import EvalRunCreate, create_eval_run_telemetry
526
+
527
+ create_eval_run_telemetry(
528
+ eval_run=EvalRunCreate(
529
+ run_id=run_id, eval_type=EvalType.AGENT_AS_JUDGE, data=self._get_telemetry_data(result)
530
+ )
531
+ )
532
+
533
+ return result
534
+
535
+ async def arun(
536
+ self,
537
+ *,
538
+ input: Optional[str] = None,
539
+ output: Optional[str] = None,
540
+ cases: Optional[List[Dict[str, str]]] = None,
541
+ print_summary: bool = False,
542
+ print_results: bool = False,
543
+ ) -> Optional[AgentAsJudgeResult]:
544
+ """Evaluate input/output against the criteria asynchronously.
545
+
546
+ Supports both single evaluation and batch evaluation:
547
+
548
+ Args:
549
+ input: Input text for single evaluation
550
+ output: Output text for single evaluation
551
+ cases: List of input/output pairs for batch evaluation
552
+ print_summary: Whether to print summary
553
+ print_results: Whether to print detailed results
554
+ """
555
+ # Generate unique run_id for this execution
556
+ run_id = str(uuid4())
557
+
558
+ # Validate parameters
559
+ single_mode = input is not None or output is not None
560
+ batch_mode = cases is not None
561
+
562
+ if single_mode and batch_mode:
563
+ raise ValueError("Provide either (input, output) OR cases, not both")
564
+
565
+ if not single_mode and not batch_mode:
566
+ raise ValueError("Must provide either (input, output) OR cases")
567
+
568
+ # Batch mode if cases provided
569
+ if batch_mode and cases is not None:
570
+ return await self._arun_batch(
571
+ cases=cases, run_id=run_id, print_summary=print_summary, print_results=print_results
572
+ )
573
+
574
+ # Validate single mode has both input and output
575
+ if input is None or output is None:
576
+ raise ValueError("Both input and output are required for single evaluation")
577
+
578
+ # Single evaluation logic
579
+ from rich.console import Console
580
+ from rich.live import Live
581
+ from rich.status import Status
582
+
583
+ set_log_level_to_debug() if self.debug_mode else set_log_level_to_info()
584
+ result = AgentAsJudgeResult(run_id=run_id)
585
+
586
+ console = Console()
587
+ with Live(console=console, transient=True) as live_log:
588
+ evaluator = self.get_evaluator_agent()
589
+
590
+ status = Status("Running evaluation...", spinner="dots", speed=1.0, refresh_per_second=10)
591
+ live_log.update(status)
592
+
593
+ evaluation = await self._aevaluate(input=input, output=output, evaluator_agent=evaluator)
594
+
595
+ if evaluation:
596
+ result.results.append(evaluation)
597
+ result.compute_stats()
598
+
599
+ status.stop()
600
+
601
+ # Save result to file
602
+ if self.file_path_to_save_results:
603
+ store_result_in_file(
604
+ file_path=self.file_path_to_save_results,
605
+ result=result,
606
+ eval_id=run_id,
607
+ name=self.name,
608
+ )
609
+
610
+ # Print results
611
+ if self.print_results or print_results:
612
+ result.print_results(console)
613
+ if self.print_summary or print_summary:
614
+ result.print_summary(console)
615
+
616
+ # Log to DB
617
+ await self._async_log_eval_to_db(run_id=run_id, result=result)
618
+
619
+ if self.telemetry:
620
+ from agno.api.evals import EvalRunCreate, async_create_eval_run_telemetry
621
+
622
+ await async_create_eval_run_telemetry(
623
+ eval_run=EvalRunCreate(
624
+ run_id=run_id, eval_type=EvalType.AGENT_AS_JUDGE, data=self._get_telemetry_data(result)
625
+ )
626
+ )
627
+
628
+ return result
629
+
630
+ def _run_batch(
631
+ self,
632
+ cases: List[Dict[str, str]],
633
+ run_id: str,
634
+ *,
635
+ print_summary: bool = True,
636
+ print_results: bool = False,
637
+ ) -> Optional[AgentAsJudgeResult]:
638
+ """Private helper: Evaluate multiple input/output pairs.
639
+
640
+ Args:
641
+ cases: List of dicts with 'input' and 'output' keys
642
+ run_id: Unique ID for this evaluation run
643
+ """
644
+ from rich.console import Console
645
+ from rich.live import Live
646
+ from rich.status import Status
647
+
648
+ if isinstance(self.db, AsyncBaseDb):
649
+ raise ValueError("Use arun() with async DB.")
650
+
651
+ set_log_level_to_debug() if self.debug_mode else set_log_level_to_info()
652
+ result = AgentAsJudgeResult(run_id=run_id)
653
+
654
+ console = Console()
655
+ with Live(console=console, transient=True) as live_log:
656
+ evaluator = self.get_evaluator_agent()
657
+
658
+ for i, case in enumerate(cases):
659
+ status = Status(f"Evaluating {i + 1}/{len(cases)}...", spinner="dots")
660
+ live_log.update(status)
661
+
662
+ evaluation = self._evaluate(input=case["input"], output=case["output"], evaluator_agent=evaluator)
663
+ if evaluation:
664
+ result.results.append(evaluation)
665
+ result.compute_stats()
666
+
667
+ status.stop()
668
+
669
+ # Save result to file
670
+ if self.file_path_to_save_results:
671
+ store_result_in_file(
672
+ file_path=self.file_path_to_save_results,
673
+ result=result,
674
+ eval_id=run_id,
675
+ name=self.name,
676
+ )
677
+
678
+ # Print results
679
+ if self.print_results or print_results:
680
+ result.print_results(console)
681
+ if self.print_summary or print_summary:
682
+ result.print_summary(console)
683
+
684
+ # Log to DB
685
+ self._log_eval_to_db(run_id=run_id, result=result)
686
+
687
+ if self.telemetry:
688
+ from agno.api.evals import EvalRunCreate, create_eval_run_telemetry
689
+
690
+ create_eval_run_telemetry(
691
+ eval_run=EvalRunCreate(
692
+ run_id=run_id, eval_type=EvalType.AGENT_AS_JUDGE, data=self._get_telemetry_data(result)
693
+ )
694
+ )
695
+
696
+ return result
697
+
698
+ async def _arun_batch(
699
+ self,
700
+ cases: List[Dict[str, str]],
701
+ run_id: str,
702
+ *,
703
+ print_summary: bool = True,
704
+ print_results: bool = False,
705
+ ) -> Optional[AgentAsJudgeResult]:
706
+ """Private helper: Evaluate multiple input/output pairs asynchronously.
707
+
708
+ Args:
709
+ cases: List of dicts with 'input' and 'output' keys
710
+ run_id: Unique ID for this evaluation run
711
+ """
712
+ from rich.console import Console
713
+ from rich.live import Live
714
+ from rich.status import Status
715
+
716
+ set_log_level_to_debug() if self.debug_mode else set_log_level_to_info()
717
+ result = AgentAsJudgeResult(run_id=run_id)
718
+
719
+ console = Console()
720
+ with Live(console=console, transient=True) as live_log:
721
+ evaluator = self.get_evaluator_agent()
722
+
723
+ for i, case in enumerate(cases):
724
+ status = Status(f"Evaluating {i + 1}/{len(cases)}...", spinner="dots")
725
+ live_log.update(status)
726
+
727
+ evaluation = await self._aevaluate(
728
+ input=case["input"], output=case["output"], evaluator_agent=evaluator
729
+ )
730
+ if evaluation:
731
+ result.results.append(evaluation)
732
+ result.compute_stats()
733
+
734
+ status.stop()
735
+
736
+ # Save result to file
737
+ if self.file_path_to_save_results:
738
+ store_result_in_file(
739
+ file_path=self.file_path_to_save_results,
740
+ result=result,
741
+ eval_id=run_id,
742
+ name=self.name,
743
+ )
744
+
745
+ # Print results
746
+ if self.print_results or print_results:
747
+ result.print_results(console)
748
+ if self.print_summary or print_summary:
749
+ result.print_summary(console)
750
+
751
+ # Log to DB
752
+ await self._async_log_eval_to_db(run_id=run_id, result=result)
753
+
754
+ if self.telemetry:
755
+ from agno.api.evals import EvalRunCreate, async_create_eval_run_telemetry
756
+
757
+ await async_create_eval_run_telemetry(
758
+ eval_run=EvalRunCreate(
759
+ run_id=run_id, eval_type=EvalType.AGENT_AS_JUDGE, data=self._get_telemetry_data(result)
760
+ )
761
+ )
762
+
763
+ return result
764
+
765
+ def _get_telemetry_data(self, result: Optional[AgentAsJudgeResult] = None) -> Dict[str, Any]:
766
+ return {
767
+ "criteria_length": len(self.criteria) if self.criteria else 0,
768
+ "scoring_strategy": self.scoring_strategy,
769
+ "threshold": self.threshold if self.scoring_strategy == "numeric" else None,
770
+ "num_results": len(result.results) if result else 0,
771
+ }
772
+
773
+ # BaseEval hook methods
774
+ def pre_check(self, run_input: Union[RunInput, TeamRunInput]) -> None:
775
+ raise ValueError("Pre-hooks are not supported")
776
+
777
+ async def async_pre_check(self, run_input: Union[RunInput, TeamRunInput]) -> None:
778
+ raise ValueError("Pre-hooks are not supported")
779
+
780
+ def post_check(self, run_output: Union[RunOutput, TeamRunOutput]) -> None:
781
+ """Perform sync post-check to evaluate agent output."""
782
+ input_str = run_output.input.input_content_string() if run_output.input else ""
783
+ output_str = str(run_output.content) if run_output.content else ""
784
+
785
+ # Temporarily disable DB logging
786
+ original_db = self.db
787
+ self.db = None
788
+
789
+ # Run evaluation and capture result
790
+ result = self.run(
791
+ input=input_str, output=output_str, print_results=self.print_results, print_summary=self.print_summary
792
+ )
793
+
794
+ # Restore DB and log with context from run_output
795
+ self.db = original_db
796
+
797
+ if isinstance(self.db, AsyncBaseDb):
798
+ raise ValueError("post_check() requires sync DB. Use async_post_check() with async DB.")
799
+
800
+ # Extract metadata from run_output
801
+ if isinstance(run_output, RunOutput):
802
+ agent_id = run_output.agent_id
803
+ team_id = None
804
+ model_id = run_output.model
805
+ model_provider = run_output.model_provider
806
+ elif isinstance(run_output, TeamRunOutput):
807
+ agent_id = None
808
+ team_id = run_output.team_id
809
+ model_id = run_output.model
810
+ model_provider = run_output.model_provider
811
+
812
+ # Log to DB if we have a valid result (use run_id from result)
813
+ if result:
814
+ self._log_eval_to_db(
815
+ run_id=result.run_id,
816
+ result=result,
817
+ agent_id=agent_id,
818
+ model_id=model_id,
819
+ model_provider=model_provider,
820
+ team_id=team_id,
821
+ )
822
+
823
+ async def async_post_check(self, run_output: Union[RunOutput, TeamRunOutput]) -> None:
824
+ """Perform async post-check to evaluate agent output."""
825
+ input_str = run_output.input.input_content_string() if run_output.input else ""
826
+ output_str = str(run_output.content) if run_output.content else ""
827
+
828
+ # Temporarily disable DB logging
829
+ original_db = self.db
830
+ self.db = None
831
+
832
+ # Run evaluation and capture result
833
+ result = await self.arun(
834
+ input=input_str, output=output_str, print_results=self.print_results, print_summary=self.print_summary
835
+ )
836
+
837
+ # Restore DB and log with context from run_output
838
+ self.db = original_db
839
+
840
+ # Extract metadata from run_output
841
+ if isinstance(run_output, RunOutput):
842
+ agent_id = run_output.agent_id
843
+ team_id = None
844
+ model_id = run_output.model
845
+ model_provider = run_output.model_provider
846
+ elif isinstance(run_output, TeamRunOutput):
847
+ agent_id = None
848
+ team_id = run_output.team_id
849
+ model_id = run_output.model
850
+ model_provider = run_output.model_provider
851
+
852
+ # Log to DB if we have a valid result (use run_id from result)
853
+ if result:
854
+ await self._async_log_eval_to_db(
855
+ run_id=result.run_id,
856
+ result=result,
857
+ agent_id=agent_id,
858
+ model_id=model_id,
859
+ model_provider=model_provider,
860
+ team_id=team_id,
861
+ )