agno 2.3.8__py3-none-any.whl → 2.3.9__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (62) hide show
  1. agno/agent/agent.py +134 -82
  2. agno/db/mysql/__init__.py +2 -1
  3. agno/db/mysql/async_mysql.py +2888 -0
  4. agno/db/mysql/mysql.py +17 -8
  5. agno/db/mysql/utils.py +139 -6
  6. agno/db/postgres/async_postgres.py +10 -5
  7. agno/db/postgres/postgres.py +7 -2
  8. agno/db/schemas/evals.py +1 -0
  9. agno/db/singlestore/singlestore.py +5 -1
  10. agno/db/sqlite/async_sqlite.py +2 -2
  11. agno/eval/__init__.py +10 -0
  12. agno/eval/agent_as_judge.py +860 -0
  13. agno/eval/base.py +29 -0
  14. agno/eval/utils.py +2 -1
  15. agno/exceptions.py +7 -0
  16. agno/knowledge/embedder/openai.py +8 -8
  17. agno/knowledge/knowledge.py +1142 -176
  18. agno/media.py +22 -6
  19. agno/models/aws/claude.py +8 -7
  20. agno/models/base.py +27 -1
  21. agno/models/deepseek/deepseek.py +67 -0
  22. agno/models/google/gemini.py +65 -11
  23. agno/models/google/utils.py +22 -0
  24. agno/models/message.py +2 -0
  25. agno/models/openai/chat.py +4 -0
  26. agno/os/app.py +64 -74
  27. agno/os/interfaces/a2a/router.py +3 -4
  28. agno/os/interfaces/agui/router.py +2 -0
  29. agno/os/router.py +3 -1607
  30. agno/os/routers/agents/__init__.py +3 -0
  31. agno/os/routers/agents/router.py +581 -0
  32. agno/os/routers/agents/schema.py +261 -0
  33. agno/os/routers/evals/evals.py +26 -6
  34. agno/os/routers/evals/schemas.py +34 -2
  35. agno/os/routers/evals/utils.py +101 -20
  36. agno/os/routers/knowledge/knowledge.py +1 -1
  37. agno/os/routers/teams/__init__.py +3 -0
  38. agno/os/routers/teams/router.py +496 -0
  39. agno/os/routers/teams/schema.py +257 -0
  40. agno/os/routers/workflows/__init__.py +3 -0
  41. agno/os/routers/workflows/router.py +545 -0
  42. agno/os/routers/workflows/schema.py +75 -0
  43. agno/os/schema.py +1 -559
  44. agno/os/utils.py +139 -2
  45. agno/team/team.py +73 -16
  46. agno/tools/file_generation.py +12 -6
  47. agno/tools/firecrawl.py +15 -7
  48. agno/utils/hooks.py +64 -5
  49. agno/utils/http.py +2 -2
  50. agno/utils/media.py +11 -1
  51. agno/utils/print_response/agent.py +8 -0
  52. agno/utils/print_response/team.py +8 -0
  53. agno/vectordb/pgvector/pgvector.py +88 -51
  54. agno/workflow/parallel.py +3 -3
  55. agno/workflow/step.py +14 -2
  56. agno/workflow/types.py +38 -2
  57. agno/workflow/workflow.py +12 -4
  58. {agno-2.3.8.dist-info → agno-2.3.9.dist-info}/METADATA +7 -2
  59. {agno-2.3.8.dist-info → agno-2.3.9.dist-info}/RECORD +62 -49
  60. {agno-2.3.8.dist-info → agno-2.3.9.dist-info}/WHEEL +0 -0
  61. {agno-2.3.8.dist-info → agno-2.3.9.dist-info}/licenses/LICENSE +0 -0
  62. {agno-2.3.8.dist-info → agno-2.3.9.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,261 @@
1
+ from typing import Any, Dict, Optional
2
+ from uuid import uuid4
3
+
4
+ from pydantic import BaseModel
5
+
6
+ from agno.agent import Agent
7
+ from agno.models.message import Message
8
+ from agno.os.schema import ModelResponse
9
+ from agno.os.utils import (
10
+ format_tools,
11
+ get_agent_input_schema_dict,
12
+ )
13
+ from agno.run import RunContext
14
+ from agno.run.agent import RunOutput
15
+ from agno.session import AgentSession
16
+ from agno.utils.agent import aexecute_instructions, aexecute_system_message
17
+
18
+
19
+ class AgentResponse(BaseModel):
20
+ id: Optional[str] = None
21
+ name: Optional[str] = None
22
+ db_id: Optional[str] = None
23
+ model: Optional[ModelResponse] = None
24
+ tools: Optional[Dict[str, Any]] = None
25
+ sessions: Optional[Dict[str, Any]] = None
26
+ knowledge: Optional[Dict[str, Any]] = None
27
+ memory: Optional[Dict[str, Any]] = None
28
+ reasoning: Optional[Dict[str, Any]] = None
29
+ default_tools: Optional[Dict[str, Any]] = None
30
+ system_message: Optional[Dict[str, Any]] = None
31
+ extra_messages: Optional[Dict[str, Any]] = None
32
+ response_settings: Optional[Dict[str, Any]] = None
33
+ introduction: Optional[str] = None
34
+ streaming: Optional[Dict[str, Any]] = None
35
+ metadata: Optional[Dict[str, Any]] = None
36
+ input_schema: Optional[Dict[str, Any]] = None
37
+
38
+ @classmethod
39
+ async def from_agent(cls, agent: Agent) -> "AgentResponse":
40
+ def filter_meaningful_config(d: Dict[str, Any], defaults: Dict[str, Any]) -> Optional[Dict[str, Any]]:
41
+ """Filter out fields that match their default values, keeping only meaningful user configurations"""
42
+ filtered = {}
43
+ for key, value in d.items():
44
+ if value is None:
45
+ continue
46
+ # Skip if value matches the default exactly
47
+ if key in defaults and value == defaults[key]:
48
+ continue
49
+ # Keep non-default values
50
+ filtered[key] = value
51
+ return filtered if filtered else None
52
+
53
+ # Define default values for filtering
54
+ agent_defaults = {
55
+ # Sessions defaults
56
+ "add_history_to_context": False,
57
+ "num_history_runs": 3,
58
+ "enable_session_summaries": False,
59
+ "search_session_history": False,
60
+ "cache_session": False,
61
+ # Knowledge defaults
62
+ "add_references": False,
63
+ "references_format": "json",
64
+ "enable_agentic_knowledge_filters": False,
65
+ # Memory defaults
66
+ "enable_agentic_memory": False,
67
+ "enable_user_memories": False,
68
+ # Reasoning defaults
69
+ "reasoning": False,
70
+ "reasoning_min_steps": 1,
71
+ "reasoning_max_steps": 10,
72
+ # Default tools defaults
73
+ "read_chat_history": False,
74
+ "search_knowledge": True,
75
+ "update_knowledge": False,
76
+ "read_tool_call_history": False,
77
+ # System message defaults
78
+ "system_message_role": "system",
79
+ "build_context": True,
80
+ "markdown": False,
81
+ "add_name_to_context": False,
82
+ "add_datetime_to_context": False,
83
+ "add_location_to_context": False,
84
+ "resolve_in_context": True,
85
+ # Extra messages defaults
86
+ "user_message_role": "user",
87
+ "build_user_context": True,
88
+ # Response settings defaults
89
+ "retries": 0,
90
+ "delay_between_retries": 1,
91
+ "exponential_backoff": False,
92
+ "parse_response": True,
93
+ "use_json_mode": False,
94
+ # Streaming defaults
95
+ "stream_events": False,
96
+ "stream_intermediate_steps": False,
97
+ }
98
+
99
+ session_id = str(uuid4())
100
+ run_id = str(uuid4())
101
+ agent_tools = await agent.aget_tools(
102
+ session=AgentSession(session_id=session_id, session_data={}),
103
+ run_response=RunOutput(run_id=run_id, session_id=session_id),
104
+ run_context=RunContext(run_id=run_id, session_id=session_id, user_id=agent.user_id),
105
+ check_mcp_tools=False,
106
+ )
107
+ formatted_tools = format_tools(agent_tools) if agent_tools else None
108
+
109
+ additional_input = agent.additional_input
110
+ if additional_input and isinstance(additional_input[0], Message):
111
+ additional_input = [message.to_dict() for message in additional_input] # type: ignore
112
+
113
+ # Build model only if it has at least one non-null field
114
+ model_name = agent.model.name if (agent.model and agent.model.name) else None
115
+ model_provider = agent.model.provider if (agent.model and agent.model.provider) else None
116
+ model_id = agent.model.id if (agent.model and agent.model.id) else None
117
+ _agent_model_data: Dict[str, Any] = {}
118
+ if model_name is not None:
119
+ _agent_model_data["name"] = model_name
120
+ if model_id is not None:
121
+ _agent_model_data["model"] = model_id
122
+ if model_provider is not None:
123
+ _agent_model_data["provider"] = model_provider
124
+
125
+ session_table = agent.db.session_table_name if agent.db else None
126
+ knowledge_table = agent.db.knowledge_table_name if agent.db and agent.knowledge else None
127
+
128
+ tools_info = {
129
+ "tools": formatted_tools,
130
+ "tool_call_limit": agent.tool_call_limit,
131
+ "tool_choice": agent.tool_choice,
132
+ }
133
+
134
+ sessions_info = {
135
+ "session_table": session_table,
136
+ "add_history_to_context": agent.add_history_to_context,
137
+ "enable_session_summaries": agent.enable_session_summaries,
138
+ "num_history_runs": agent.num_history_runs,
139
+ "search_session_history": agent.search_session_history,
140
+ "num_history_sessions": agent.num_history_sessions,
141
+ "cache_session": agent.cache_session,
142
+ }
143
+
144
+ knowledge_info = {
145
+ "knowledge_table": knowledge_table,
146
+ "enable_agentic_knowledge_filters": agent.enable_agentic_knowledge_filters,
147
+ "knowledge_filters": agent.knowledge_filters,
148
+ "references_format": agent.references_format,
149
+ }
150
+
151
+ memory_info: Optional[Dict[str, Any]] = None
152
+ if agent.memory_manager is not None:
153
+ memory_info = {
154
+ "enable_agentic_memory": agent.enable_agentic_memory,
155
+ "enable_user_memories": agent.enable_user_memories,
156
+ "metadata": agent.metadata,
157
+ "memory_table": agent.db.memory_table_name if agent.db and agent.enable_user_memories else None,
158
+ }
159
+
160
+ if agent.memory_manager.model is not None:
161
+ memory_info["model"] = ModelResponse(
162
+ name=agent.memory_manager.model.name,
163
+ model=agent.memory_manager.model.id,
164
+ provider=agent.memory_manager.model.provider,
165
+ ).model_dump()
166
+
167
+ reasoning_info: Dict[str, Any] = {
168
+ "reasoning": agent.reasoning,
169
+ "reasoning_agent_id": agent.reasoning_agent.id if agent.reasoning_agent else None,
170
+ "reasoning_min_steps": agent.reasoning_min_steps,
171
+ "reasoning_max_steps": agent.reasoning_max_steps,
172
+ }
173
+
174
+ if agent.reasoning_model:
175
+ reasoning_info["reasoning_model"] = ModelResponse(
176
+ name=agent.reasoning_model.name,
177
+ model=agent.reasoning_model.id,
178
+ provider=agent.reasoning_model.provider,
179
+ ).model_dump()
180
+
181
+ default_tools_info = {
182
+ "read_chat_history": agent.read_chat_history,
183
+ "search_knowledge": agent.search_knowledge,
184
+ "update_knowledge": agent.update_knowledge,
185
+ "read_tool_call_history": agent.read_tool_call_history,
186
+ }
187
+
188
+ instructions = agent.instructions if agent.instructions else None
189
+ if instructions and callable(instructions):
190
+ instructions = await aexecute_instructions(instructions=instructions, agent=agent)
191
+
192
+ system_message = agent.system_message if agent.system_message else None
193
+ if system_message and callable(system_message):
194
+ system_message = await aexecute_system_message(system_message=system_message, agent=agent)
195
+
196
+ system_message_info = {
197
+ "system_message": str(system_message) if system_message else None,
198
+ "system_message_role": agent.system_message_role,
199
+ "build_context": agent.build_context,
200
+ "description": agent.description,
201
+ "instructions": instructions,
202
+ "expected_output": agent.expected_output,
203
+ "additional_context": agent.additional_context,
204
+ "markdown": agent.markdown,
205
+ "add_name_to_context": agent.add_name_to_context,
206
+ "add_datetime_to_context": agent.add_datetime_to_context,
207
+ "add_location_to_context": agent.add_location_to_context,
208
+ "timezone_identifier": agent.timezone_identifier,
209
+ "resolve_in_context": agent.resolve_in_context,
210
+ }
211
+
212
+ extra_messages_info = {
213
+ "additional_input": additional_input, # type: ignore
214
+ "user_message_role": agent.user_message_role,
215
+ "build_user_context": agent.build_user_context,
216
+ }
217
+
218
+ response_settings_info: Dict[str, Any] = {
219
+ "retries": agent.retries,
220
+ "delay_between_retries": agent.delay_between_retries,
221
+ "exponential_backoff": agent.exponential_backoff,
222
+ "output_schema_name": agent.output_schema.__name__ if agent.output_schema else None,
223
+ "parser_model_prompt": agent.parser_model_prompt,
224
+ "parse_response": agent.parse_response,
225
+ "structured_outputs": agent.structured_outputs,
226
+ "use_json_mode": agent.use_json_mode,
227
+ "save_response_to_file": agent.save_response_to_file,
228
+ }
229
+
230
+ if agent.parser_model:
231
+ response_settings_info["parser_model"] = ModelResponse(
232
+ name=agent.parser_model.name,
233
+ model=agent.parser_model.id,
234
+ provider=agent.parser_model.provider,
235
+ ).model_dump()
236
+
237
+ streaming_info = {
238
+ "stream": agent.stream,
239
+ "stream_events": agent.stream_events,
240
+ "stream_intermediate_steps": agent.stream_intermediate_steps,
241
+ }
242
+
243
+ return AgentResponse(
244
+ id=agent.id,
245
+ name=agent.name,
246
+ db_id=agent.db.id if agent.db else None,
247
+ model=ModelResponse(**_agent_model_data) if _agent_model_data else None,
248
+ tools=filter_meaningful_config(tools_info, {}),
249
+ sessions=filter_meaningful_config(sessions_info, agent_defaults),
250
+ knowledge=filter_meaningful_config(knowledge_info, agent_defaults),
251
+ memory=filter_meaningful_config(memory_info, agent_defaults) if memory_info else None,
252
+ reasoning=filter_meaningful_config(reasoning_info, agent_defaults),
253
+ default_tools=filter_meaningful_config(default_tools_info, agent_defaults),
254
+ system_message=filter_meaningful_config(system_message_info, agent_defaults),
255
+ extra_messages=filter_meaningful_config(extra_messages_info, agent_defaults),
256
+ response_settings=filter_meaningful_config(response_settings_info, agent_defaults),
257
+ streaming=filter_meaningful_config(streaming_info, agent_defaults),
258
+ introduction=agent.introduction,
259
+ metadata=agent.metadata,
260
+ input_schema=get_agent_input_schema_dict(agent),
261
+ )
@@ -15,7 +15,12 @@ from agno.os.routers.evals.schemas import (
15
15
  EvalSchema,
16
16
  UpdateEvalRunRequest,
17
17
  )
18
- from agno.os.routers.evals.utils import run_accuracy_eval, run_performance_eval, run_reliability_eval
18
+ from agno.os.routers.evals.utils import (
19
+ run_accuracy_eval,
20
+ run_agent_as_judge_eval,
21
+ run_performance_eval,
22
+ run_reliability_eval,
23
+ )
19
24
  from agno.os.schema import (
20
25
  BadRequestResponse,
21
26
  InternalServerErrorResponse,
@@ -119,6 +124,15 @@ def attach_routes(
119
124
  ) -> PaginatedResponse[EvalSchema]:
120
125
  db = await get_db(dbs, db_id, table)
121
126
 
127
+ # TODO: Delete me:
128
+ # Filtering out agent-as-judge by default for now,
129
+ # as they are not supported yet in the AgentOS UI.
130
+ eval_types = eval_types or [
131
+ EvalType.ACCURACY,
132
+ EvalType.PERFORMANCE,
133
+ EvalType.RELIABILITY,
134
+ ]
135
+
122
136
  if isinstance(db, AsyncBaseDb):
123
137
  db = cast(AsyncBaseDb, db)
124
138
  eval_runs, total_count = await db.get_eval_runs(
@@ -304,7 +318,7 @@ def attach_routes(
304
318
  operation_id="run_eval",
305
319
  summary="Execute Evaluation",
306
320
  description=(
307
- "Run evaluation tests on agents or teams. Supports accuracy, performance, and reliability evaluations. "
321
+ "Run evaluation tests on agents or teams. Supports accuracy, agent-as-judge, performance, and reliability evaluations. "
308
322
  "Requires either agent_id or team_id, but not both."
309
323
  ),
310
324
  responses={
@@ -374,6 +388,7 @@ def attach_routes(
374
388
  if not team:
375
389
  raise HTTPException(status_code=404, detail=f"Team with id '{eval_run_input.team_id}' not found")
376
390
 
391
+ # If model_id/model_provider specified, override team's model temporarily
377
392
  default_model = None
378
393
  if (
379
394
  hasattr(team, "model")
@@ -381,13 +396,13 @@ def attach_routes(
381
396
  and eval_run_input.model_id is not None
382
397
  and eval_run_input.model_provider is not None
383
398
  ):
384
- default_model = deepcopy(team.model)
399
+ default_model = deepcopy(team.model) # Save original
385
400
  if eval_run_input.model_id != team.model.id or eval_run_input.model_provider != team.model.provider:
386
401
  model_provider = eval_run_input.model_provider.lower()
387
402
  model_id = eval_run_input.model_id.lower()
388
403
  model_string = f"{model_provider}:{model_id}"
389
404
  model = get_model(model_string)
390
- team.model = model
405
+ team.model = model # Override temporarily
391
406
 
392
407
  agent = None
393
408
 
@@ -400,6 +415,11 @@ def attach_routes(
400
415
  eval_run_input=eval_run_input, db=db, agent=agent, team=team, default_model=default_model
401
416
  )
402
417
 
418
+ elif eval_run_input.eval_type == EvalType.AGENT_AS_JUDGE:
419
+ return await run_agent_as_judge_eval(
420
+ eval_run_input=eval_run_input, db=db, agent=agent, team=team, default_model=default_model
421
+ )
422
+
403
423
  elif eval_run_input.eval_type == EvalType.PERFORMANCE:
404
424
  return await run_performance_eval(
405
425
  eval_run_input=eval_run_input, db=db, agent=agent, team=team, default_model=default_model
@@ -416,8 +436,8 @@ def attach_routes(
416
436
  def parse_eval_types_filter(
417
437
  eval_types: Optional[str] = Query(
418
438
  default=None,
419
- description="Comma-separated eval types (accuracy,performance,reliability)",
420
- examples=["accuracy,performance"],
439
+ description="Comma-separated eval types (accuracy,agent_as_judge,performance,reliability)",
440
+ examples=["accuracy,agent_as_judge,performance,reliability"],
421
441
  ),
422
442
  ) -> Optional[List[EvalType]]:
423
443
  """Parse comma-separated eval types into EvalType enums for filtering evaluation runs."""
@@ -1,12 +1,13 @@
1
1
  from dataclasses import asdict
2
2
  from datetime import datetime, timezone
3
- from typing import Any, Dict, List, Optional
3
+ from typing import Any, Dict, List, Literal, Optional
4
4
 
5
5
  from pydantic import BaseModel, Field
6
6
 
7
7
  from agno.db.schemas.evals import EvalType
8
- from agno.eval import AccuracyResult, PerformanceResult, ReliabilityResult
8
+ from agno.eval import AccuracyResult, AgentAsJudgeResult, PerformanceResult, ReliabilityResult
9
9
  from agno.eval.accuracy import AccuracyEval
10
+ from agno.eval.agent_as_judge import AgentAsJudgeEval
10
11
  from agno.eval.performance import PerformanceEval
11
12
  from agno.eval.reliability import ReliabilityEval
12
13
 
@@ -27,6 +28,15 @@ class EvalRunInput(BaseModel):
27
28
  # Accuracy eval specific fields
28
29
  expected_output: Optional[str] = Field(None, description="Expected output for accuracy evaluation")
29
30
 
31
+ # AgentAsJudge eval specific fields
32
+ criteria: Optional[str] = Field(None, description="Evaluation criteria for agent-as-judge evaluation")
33
+ scoring_strategy: Optional[Literal["numeric", "binary"]] = Field(
34
+ "binary", description="Scoring strategy: 'numeric' (1-10 with threshold) or 'binary' (PASS/FAIL)"
35
+ )
36
+ threshold: Optional[int] = Field(
37
+ 7, description="Score threshold for pass/fail (1-10), only used with numeric scoring", ge=1, le=10
38
+ )
39
+
30
40
  # Performance eval specific fields
31
41
  warmup_runs: int = Field(0, description="Number of warmup runs before measuring performance", ge=0, le=10)
32
42
 
@@ -89,6 +99,28 @@ class EvalSchema(BaseModel):
89
99
  eval_data=asdict(result),
90
100
  )
91
101
 
102
+ @classmethod
103
+ def from_agent_as_judge_eval(
104
+ cls,
105
+ agent_as_judge_eval: AgentAsJudgeEval,
106
+ result: AgentAsJudgeResult,
107
+ model_id: Optional[str] = None,
108
+ model_provider: Optional[str] = None,
109
+ agent_id: Optional[str] = None,
110
+ team_id: Optional[str] = None,
111
+ ) -> "EvalSchema":
112
+ return cls(
113
+ id=result.run_id,
114
+ name=agent_as_judge_eval.name,
115
+ agent_id=agent_id,
116
+ team_id=team_id,
117
+ workflow_id=None,
118
+ model_id=model_id,
119
+ model_provider=model_provider,
120
+ eval_type=EvalType.AGENT_AS_JUDGE,
121
+ eval_data=asdict(result),
122
+ )
123
+
92
124
  @classmethod
93
125
  def from_performance_eval(
94
126
  cls,
@@ -5,6 +5,7 @@ from fastapi import HTTPException
5
5
  from agno.agent.agent import Agent
6
6
  from agno.db.base import AsyncBaseDb, BaseDb
7
7
  from agno.eval.accuracy import AccuracyEval
8
+ from agno.eval.agent_as_judge import AgentAsJudgeEval
8
9
  from agno.eval.performance import PerformanceEval
9
10
  from agno.eval.reliability import ReliabilityEval
10
11
  from agno.models.base import Model
@@ -36,15 +37,77 @@ async def run_accuracy_eval(
36
37
  model=default_model,
37
38
  )
38
39
 
39
- if isinstance(db, AsyncBaseDb):
40
- result = await accuracy_eval.arun(print_results=False, print_summary=False)
41
- else:
42
- result = accuracy_eval.run(print_results=False, print_summary=False)
40
+ result = await accuracy_eval.arun(print_results=False, print_summary=False)
43
41
  if not result:
44
42
  raise HTTPException(status_code=500, detail="Failed to run accuracy evaluation")
45
43
 
46
44
  eval_run = EvalSchema.from_accuracy_eval(accuracy_eval=accuracy_eval, result=result)
47
45
 
46
+ # Restore original model after eval
47
+ if default_model is not None:
48
+ if agent is not None:
49
+ agent.model = default_model
50
+ elif team is not None:
51
+ team.model = default_model
52
+
53
+ return eval_run
54
+
55
+
56
+ async def run_agent_as_judge_eval(
57
+ eval_run_input: EvalRunInput,
58
+ db: Union[BaseDb, AsyncBaseDb],
59
+ agent: Optional[Agent] = None,
60
+ team: Optional[Team] = None,
61
+ default_model: Optional[Model] = None,
62
+ ) -> EvalSchema:
63
+ """Run an AgentAsJudge evaluation for the given agent or team"""
64
+ if not eval_run_input.criteria:
65
+ raise HTTPException(status_code=400, detail="criteria is required for agent-as-judge evaluation")
66
+
67
+ # Run agent/team to get output
68
+ if agent:
69
+ agent_response = await agent.arun(eval_run_input.input)
70
+ output = str(agent_response.content) if agent_response.content else ""
71
+ model_id = agent.model.id if agent and agent.model else None
72
+ model_provider = agent.model.provider if agent and agent.model else None
73
+ agent_id = agent.id
74
+ team_id = None
75
+ elif team:
76
+ team_response = await team.arun(eval_run_input.input)
77
+ output = str(team_response.content) if team_response.content else ""
78
+ model_id = team.model.id if team and team.model else None
79
+ model_provider = team.model.provider if team and team.model else None
80
+ agent_id = None
81
+ team_id = team.id
82
+ else:
83
+ raise HTTPException(status_code=400, detail="Either agent_id or team_id must be provided")
84
+
85
+ agent_as_judge_eval = AgentAsJudgeEval(
86
+ db=db,
87
+ criteria=eval_run_input.criteria,
88
+ scoring_strategy=eval_run_input.scoring_strategy or "binary",
89
+ threshold=eval_run_input.threshold or 7,
90
+ additional_guidelines=eval_run_input.additional_guidelines,
91
+ name=eval_run_input.name,
92
+ model=default_model,
93
+ )
94
+
95
+ result = await agent_as_judge_eval.arun(
96
+ input=eval_run_input.input, output=output, print_results=False, print_summary=False
97
+ )
98
+ if not result:
99
+ raise HTTPException(status_code=500, detail="Failed to run agent as judge evaluation")
100
+
101
+ eval_run = EvalSchema.from_agent_as_judge_eval(
102
+ agent_as_judge_eval=agent_as_judge_eval,
103
+ result=result,
104
+ agent_id=agent_id,
105
+ team_id=team_id,
106
+ model_id=model_id,
107
+ model_provider=model_provider,
108
+ )
109
+
110
+ # Restore original model after eval
48
111
  if default_model is not None:
49
112
  if agent is not None:
50
113
  agent.model = default_model
@@ -62,21 +125,39 @@ async def run_performance_eval(
62
125
  default_model: Optional[Model] = None,
63
126
  ) -> EvalSchema:
64
127
  """Run a performance evaluation for the given agent or team"""
65
- if agent:
128
+ # Create sync or async function based on DB type
129
+ if isinstance(db, AsyncBaseDb):
130
+ if agent:
66
131
 
67
- def run_component(): # type: ignore
68
- return agent.run(eval_run_input.input)
132
+ async def run_component(): # type: ignore
133
+ return await agent.arun(eval_run_input.input)
69
134
 
70
- model_id = agent.model.id if agent and agent.model else None
71
- model_provider = agent.model.provider if agent and agent.model else None
135
+ model_id = agent.model.id if agent and agent.model else None
136
+ model_provider = agent.model.provider if agent and agent.model else None
72
137
 
73
- elif team:
138
+ elif team:
74
139
 
75
- def run_component():
76
- return team.run(eval_run_input.input)
140
+ async def run_component(): # type: ignore
141
+ return await team.arun(eval_run_input.input)
77
142
 
78
- model_id = team.model.id if team and team.model else None
79
- model_provider = team.model.provider if team and team.model else None
143
+ model_id = team.model.id if team and team.model else None
144
+ model_provider = team.model.provider if team and team.model else None
145
+ else:
146
+ if agent:
147
+
148
+ def run_component(): # type: ignore
149
+ return agent.run(eval_run_input.input)
150
+
151
+ model_id = agent.model.id if agent and agent.model else None
152
+ model_provider = agent.model.provider if agent and agent.model else None
153
+
154
+ elif team:
155
+
156
+ def run_component():
157
+ return team.run(eval_run_input.input)
158
+
159
+ model_id = team.model.id if team and team.model else None
160
+ model_provider = team.model.provider if team and team.model else None
80
161
 
81
162
  performance_eval = PerformanceEval(
82
163
  db=db,
@@ -90,6 +171,7 @@ async def run_performance_eval(
90
171
  model_provider=model_provider,
91
172
  )
92
173
 
174
+ # PerformanceEval needs sync/async check because it wraps a function
93
175
  if isinstance(db, AsyncBaseDb):
94
176
  result = await performance_eval.arun(print_results=False, print_summary=False)
95
177
  else:
@@ -106,6 +188,7 @@ async def run_performance_eval(
106
188
  model_provider=model_provider,
107
189
  )
108
190
 
191
+ # Restore original model after eval
109
192
  if default_model is not None:
110
193
  if agent is not None:
111
194
  agent.model = default_model
@@ -127,7 +210,7 @@ async def run_reliability_eval(
127
210
  raise HTTPException(status_code=400, detail="expected_tool_calls is required for reliability evaluations")
128
211
 
129
212
  if agent:
130
- agent_response = agent.run(eval_run_input.input)
213
+ agent_response = await agent.arun(eval_run_input.input)
131
214
  reliability_eval = ReliabilityEval(
132
215
  db=db,
133
216
  name=eval_run_input.name,
@@ -138,7 +221,7 @@ async def run_reliability_eval(
138
221
  model_provider = agent.model.provider if agent and agent.model else None
139
222
 
140
223
  elif team:
141
- team_response = team.run(eval_run_input.input)
224
+ team_response = await team.arun(eval_run_input.input)
142
225
  reliability_eval = ReliabilityEval(
143
226
  db=db,
144
227
  name=eval_run_input.name,
@@ -148,10 +231,7 @@ async def run_reliability_eval(
148
231
  model_id = team.model.id if team and team.model else None
149
232
  model_provider = team.model.provider if team and team.model else None
150
233
 
151
- if isinstance(db, AsyncBaseDb):
152
- result = await reliability_eval.arun(print_results=False)
153
- else:
154
- result = reliability_eval.run(print_results=False)
234
+ result = await reliability_eval.arun(print_results=False)
155
235
  if not result:
156
236
  raise HTTPException(status_code=500, detail="Failed to run reliability evaluation")
157
237
 
@@ -163,6 +243,7 @@ async def run_reliability_eval(
163
243
  model_provider=model_provider,
164
244
  )
165
245
 
246
+ # Restore original model after eval
166
247
  if default_model is not None:
167
248
  if agent is not None:
168
249
  agent.model = default_model
@@ -981,7 +981,7 @@ async def process_content(
981
981
  log_debug(f"Set chunking strategy: {chunker}")
982
982
 
983
983
  log_debug(f"Using reader: {content.reader.__class__.__name__}")
984
- await knowledge._load_content(content, upsert=False, skip_if_exists=True)
984
+ await knowledge._load_content_async(content, upsert=False, skip_if_exists=True)
985
985
  log_info(f"Content {content.id} processed successfully")
986
986
  except Exception as e:
987
987
  log_info(f"Error processing content: {e}")
@@ -0,0 +1,3 @@
1
+ from agno.os.routers.teams.router import get_team_router
2
+
3
+ __all__ = ["get_team_router"]