agno 2.3.8__py3-none-any.whl → 2.3.10__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (66) hide show
  1. agno/agent/agent.py +134 -94
  2. agno/db/mysql/__init__.py +2 -1
  3. agno/db/mysql/async_mysql.py +2888 -0
  4. agno/db/mysql/mysql.py +17 -8
  5. agno/db/mysql/utils.py +139 -6
  6. agno/db/postgres/async_postgres.py +10 -5
  7. agno/db/postgres/postgres.py +7 -2
  8. agno/db/schemas/evals.py +1 -0
  9. agno/db/singlestore/singlestore.py +5 -1
  10. agno/db/sqlite/async_sqlite.py +3 -3
  11. agno/eval/__init__.py +10 -0
  12. agno/eval/accuracy.py +11 -8
  13. agno/eval/agent_as_judge.py +861 -0
  14. agno/eval/base.py +29 -0
  15. agno/eval/utils.py +2 -1
  16. agno/exceptions.py +7 -0
  17. agno/knowledge/embedder/openai.py +8 -8
  18. agno/knowledge/knowledge.py +1142 -176
  19. agno/media.py +22 -6
  20. agno/models/aws/claude.py +8 -7
  21. agno/models/base.py +61 -2
  22. agno/models/deepseek/deepseek.py +67 -0
  23. agno/models/google/gemini.py +134 -51
  24. agno/models/google/utils.py +22 -0
  25. agno/models/message.py +5 -0
  26. agno/models/openai/chat.py +4 -0
  27. agno/os/app.py +64 -74
  28. agno/os/interfaces/a2a/router.py +3 -4
  29. agno/os/interfaces/agui/router.py +2 -0
  30. agno/os/router.py +3 -1607
  31. agno/os/routers/agents/__init__.py +3 -0
  32. agno/os/routers/agents/router.py +581 -0
  33. agno/os/routers/agents/schema.py +261 -0
  34. agno/os/routers/evals/evals.py +26 -6
  35. agno/os/routers/evals/schemas.py +34 -2
  36. agno/os/routers/evals/utils.py +77 -18
  37. agno/os/routers/knowledge/knowledge.py +1 -1
  38. agno/os/routers/teams/__init__.py +3 -0
  39. agno/os/routers/teams/router.py +496 -0
  40. agno/os/routers/teams/schema.py +257 -0
  41. agno/os/routers/workflows/__init__.py +3 -0
  42. agno/os/routers/workflows/router.py +545 -0
  43. agno/os/routers/workflows/schema.py +75 -0
  44. agno/os/schema.py +1 -559
  45. agno/os/utils.py +139 -2
  46. agno/team/team.py +87 -24
  47. agno/tools/file_generation.py +12 -6
  48. agno/tools/firecrawl.py +15 -7
  49. agno/tools/function.py +37 -23
  50. agno/tools/shopify.py +1519 -0
  51. agno/tools/spotify.py +2 -5
  52. agno/utils/hooks.py +64 -5
  53. agno/utils/http.py +2 -2
  54. agno/utils/media.py +11 -1
  55. agno/utils/print_response/agent.py +8 -0
  56. agno/utils/print_response/team.py +8 -0
  57. agno/vectordb/pgvector/pgvector.py +88 -51
  58. agno/workflow/parallel.py +5 -3
  59. agno/workflow/step.py +14 -2
  60. agno/workflow/types.py +38 -2
  61. agno/workflow/workflow.py +12 -4
  62. {agno-2.3.8.dist-info → agno-2.3.10.dist-info}/METADATA +7 -2
  63. {agno-2.3.8.dist-info → agno-2.3.10.dist-info}/RECORD +66 -52
  64. {agno-2.3.8.dist-info → agno-2.3.10.dist-info}/WHEEL +0 -0
  65. {agno-2.3.8.dist-info → agno-2.3.10.dist-info}/licenses/LICENSE +0 -0
  66. {agno-2.3.8.dist-info → agno-2.3.10.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,261 @@
1
+ from typing import Any, Dict, Optional
2
+ from uuid import uuid4
3
+
4
+ from pydantic import BaseModel
5
+
6
+ from agno.agent import Agent
7
+ from agno.models.message import Message
8
+ from agno.os.schema import ModelResponse
9
+ from agno.os.utils import (
10
+ format_tools,
11
+ get_agent_input_schema_dict,
12
+ )
13
+ from agno.run import RunContext
14
+ from agno.run.agent import RunOutput
15
+ from agno.session import AgentSession
16
+ from agno.utils.agent import aexecute_instructions, aexecute_system_message
17
+
18
+
19
+ class AgentResponse(BaseModel):
20
+ id: Optional[str] = None
21
+ name: Optional[str] = None
22
+ db_id: Optional[str] = None
23
+ model: Optional[ModelResponse] = None
24
+ tools: Optional[Dict[str, Any]] = None
25
+ sessions: Optional[Dict[str, Any]] = None
26
+ knowledge: Optional[Dict[str, Any]] = None
27
+ memory: Optional[Dict[str, Any]] = None
28
+ reasoning: Optional[Dict[str, Any]] = None
29
+ default_tools: Optional[Dict[str, Any]] = None
30
+ system_message: Optional[Dict[str, Any]] = None
31
+ extra_messages: Optional[Dict[str, Any]] = None
32
+ response_settings: Optional[Dict[str, Any]] = None
33
+ introduction: Optional[str] = None
34
+ streaming: Optional[Dict[str, Any]] = None
35
+ metadata: Optional[Dict[str, Any]] = None
36
+ input_schema: Optional[Dict[str, Any]] = None
37
+
38
+ @classmethod
39
+ async def from_agent(cls, agent: Agent) -> "AgentResponse":
40
+ def filter_meaningful_config(d: Dict[str, Any], defaults: Dict[str, Any]) -> Optional[Dict[str, Any]]:
41
+ """Filter out fields that match their default values, keeping only meaningful user configurations"""
42
+ filtered = {}
43
+ for key, value in d.items():
44
+ if value is None:
45
+ continue
46
+ # Skip if value matches the default exactly
47
+ if key in defaults and value == defaults[key]:
48
+ continue
49
+ # Keep non-default values
50
+ filtered[key] = value
51
+ return filtered if filtered else None
52
+
53
+ # Define default values for filtering
54
+ agent_defaults = {
55
+ # Sessions defaults
56
+ "add_history_to_context": False,
57
+ "num_history_runs": 3,
58
+ "enable_session_summaries": False,
59
+ "search_session_history": False,
60
+ "cache_session": False,
61
+ # Knowledge defaults
62
+ "add_references": False,
63
+ "references_format": "json",
64
+ "enable_agentic_knowledge_filters": False,
65
+ # Memory defaults
66
+ "enable_agentic_memory": False,
67
+ "enable_user_memories": False,
68
+ # Reasoning defaults
69
+ "reasoning": False,
70
+ "reasoning_min_steps": 1,
71
+ "reasoning_max_steps": 10,
72
+ # Default tools defaults
73
+ "read_chat_history": False,
74
+ "search_knowledge": True,
75
+ "update_knowledge": False,
76
+ "read_tool_call_history": False,
77
+ # System message defaults
78
+ "system_message_role": "system",
79
+ "build_context": True,
80
+ "markdown": False,
81
+ "add_name_to_context": False,
82
+ "add_datetime_to_context": False,
83
+ "add_location_to_context": False,
84
+ "resolve_in_context": True,
85
+ # Extra messages defaults
86
+ "user_message_role": "user",
87
+ "build_user_context": True,
88
+ # Response settings defaults
89
+ "retries": 0,
90
+ "delay_between_retries": 1,
91
+ "exponential_backoff": False,
92
+ "parse_response": True,
93
+ "use_json_mode": False,
94
+ # Streaming defaults
95
+ "stream_events": False,
96
+ "stream_intermediate_steps": False,
97
+ }
98
+
99
+ session_id = str(uuid4())
100
+ run_id = str(uuid4())
101
+ agent_tools = await agent.aget_tools(
102
+ session=AgentSession(session_id=session_id, session_data={}),
103
+ run_response=RunOutput(run_id=run_id, session_id=session_id),
104
+ run_context=RunContext(run_id=run_id, session_id=session_id, user_id=agent.user_id),
105
+ check_mcp_tools=False,
106
+ )
107
+ formatted_tools = format_tools(agent_tools) if agent_tools else None
108
+
109
+ additional_input = agent.additional_input
110
+ if additional_input and isinstance(additional_input[0], Message):
111
+ additional_input = [message.to_dict() for message in additional_input] # type: ignore
112
+
113
+ # Build model only if it has at least one non-null field
114
+ model_name = agent.model.name if (agent.model and agent.model.name) else None
115
+ model_provider = agent.model.provider if (agent.model and agent.model.provider) else None
116
+ model_id = agent.model.id if (agent.model and agent.model.id) else None
117
+ _agent_model_data: Dict[str, Any] = {}
118
+ if model_name is not None:
119
+ _agent_model_data["name"] = model_name
120
+ if model_id is not None:
121
+ _agent_model_data["model"] = model_id
122
+ if model_provider is not None:
123
+ _agent_model_data["provider"] = model_provider
124
+
125
+ session_table = agent.db.session_table_name if agent.db else None
126
+ knowledge_table = agent.db.knowledge_table_name if agent.db and agent.knowledge else None
127
+
128
+ tools_info = {
129
+ "tools": formatted_tools,
130
+ "tool_call_limit": agent.tool_call_limit,
131
+ "tool_choice": agent.tool_choice,
132
+ }
133
+
134
+ sessions_info = {
135
+ "session_table": session_table,
136
+ "add_history_to_context": agent.add_history_to_context,
137
+ "enable_session_summaries": agent.enable_session_summaries,
138
+ "num_history_runs": agent.num_history_runs,
139
+ "search_session_history": agent.search_session_history,
140
+ "num_history_sessions": agent.num_history_sessions,
141
+ "cache_session": agent.cache_session,
142
+ }
143
+
144
+ knowledge_info = {
145
+ "knowledge_table": knowledge_table,
146
+ "enable_agentic_knowledge_filters": agent.enable_agentic_knowledge_filters,
147
+ "knowledge_filters": agent.knowledge_filters,
148
+ "references_format": agent.references_format,
149
+ }
150
+
151
+ memory_info: Optional[Dict[str, Any]] = None
152
+ if agent.memory_manager is not None:
153
+ memory_info = {
154
+ "enable_agentic_memory": agent.enable_agentic_memory,
155
+ "enable_user_memories": agent.enable_user_memories,
156
+ "metadata": agent.metadata,
157
+ "memory_table": agent.db.memory_table_name if agent.db and agent.enable_user_memories else None,
158
+ }
159
+
160
+ if agent.memory_manager.model is not None:
161
+ memory_info["model"] = ModelResponse(
162
+ name=agent.memory_manager.model.name,
163
+ model=agent.memory_manager.model.id,
164
+ provider=agent.memory_manager.model.provider,
165
+ ).model_dump()
166
+
167
+ reasoning_info: Dict[str, Any] = {
168
+ "reasoning": agent.reasoning,
169
+ "reasoning_agent_id": agent.reasoning_agent.id if agent.reasoning_agent else None,
170
+ "reasoning_min_steps": agent.reasoning_min_steps,
171
+ "reasoning_max_steps": agent.reasoning_max_steps,
172
+ }
173
+
174
+ if agent.reasoning_model:
175
+ reasoning_info["reasoning_model"] = ModelResponse(
176
+ name=agent.reasoning_model.name,
177
+ model=agent.reasoning_model.id,
178
+ provider=agent.reasoning_model.provider,
179
+ ).model_dump()
180
+
181
+ default_tools_info = {
182
+ "read_chat_history": agent.read_chat_history,
183
+ "search_knowledge": agent.search_knowledge,
184
+ "update_knowledge": agent.update_knowledge,
185
+ "read_tool_call_history": agent.read_tool_call_history,
186
+ }
187
+
188
+ instructions = agent.instructions if agent.instructions else None
189
+ if instructions and callable(instructions):
190
+ instructions = await aexecute_instructions(instructions=instructions, agent=agent)
191
+
192
+ system_message = agent.system_message if agent.system_message else None
193
+ if system_message and callable(system_message):
194
+ system_message = await aexecute_system_message(system_message=system_message, agent=agent)
195
+
196
+ system_message_info = {
197
+ "system_message": str(system_message) if system_message else None,
198
+ "system_message_role": agent.system_message_role,
199
+ "build_context": agent.build_context,
200
+ "description": agent.description,
201
+ "instructions": instructions,
202
+ "expected_output": agent.expected_output,
203
+ "additional_context": agent.additional_context,
204
+ "markdown": agent.markdown,
205
+ "add_name_to_context": agent.add_name_to_context,
206
+ "add_datetime_to_context": agent.add_datetime_to_context,
207
+ "add_location_to_context": agent.add_location_to_context,
208
+ "timezone_identifier": agent.timezone_identifier,
209
+ "resolve_in_context": agent.resolve_in_context,
210
+ }
211
+
212
+ extra_messages_info = {
213
+ "additional_input": additional_input, # type: ignore
214
+ "user_message_role": agent.user_message_role,
215
+ "build_user_context": agent.build_user_context,
216
+ }
217
+
218
+ response_settings_info: Dict[str, Any] = {
219
+ "retries": agent.retries,
220
+ "delay_between_retries": agent.delay_between_retries,
221
+ "exponential_backoff": agent.exponential_backoff,
222
+ "output_schema_name": agent.output_schema.__name__ if agent.output_schema else None,
223
+ "parser_model_prompt": agent.parser_model_prompt,
224
+ "parse_response": agent.parse_response,
225
+ "structured_outputs": agent.structured_outputs,
226
+ "use_json_mode": agent.use_json_mode,
227
+ "save_response_to_file": agent.save_response_to_file,
228
+ }
229
+
230
+ if agent.parser_model:
231
+ response_settings_info["parser_model"] = ModelResponse(
232
+ name=agent.parser_model.name,
233
+ model=agent.parser_model.id,
234
+ provider=agent.parser_model.provider,
235
+ ).model_dump()
236
+
237
+ streaming_info = {
238
+ "stream": agent.stream,
239
+ "stream_events": agent.stream_events,
240
+ "stream_intermediate_steps": agent.stream_intermediate_steps,
241
+ }
242
+
243
+ return AgentResponse(
244
+ id=agent.id,
245
+ name=agent.name,
246
+ db_id=agent.db.id if agent.db else None,
247
+ model=ModelResponse(**_agent_model_data) if _agent_model_data else None,
248
+ tools=filter_meaningful_config(tools_info, {}),
249
+ sessions=filter_meaningful_config(sessions_info, agent_defaults),
250
+ knowledge=filter_meaningful_config(knowledge_info, agent_defaults),
251
+ memory=filter_meaningful_config(memory_info, agent_defaults) if memory_info else None,
252
+ reasoning=filter_meaningful_config(reasoning_info, agent_defaults),
253
+ default_tools=filter_meaningful_config(default_tools_info, agent_defaults),
254
+ system_message=filter_meaningful_config(system_message_info, agent_defaults),
255
+ extra_messages=filter_meaningful_config(extra_messages_info, agent_defaults),
256
+ response_settings=filter_meaningful_config(response_settings_info, agent_defaults),
257
+ streaming=filter_meaningful_config(streaming_info, agent_defaults),
258
+ introduction=agent.introduction,
259
+ metadata=agent.metadata,
260
+ input_schema=get_agent_input_schema_dict(agent),
261
+ )
@@ -15,7 +15,12 @@ from agno.os.routers.evals.schemas import (
15
15
  EvalSchema,
16
16
  UpdateEvalRunRequest,
17
17
  )
18
- from agno.os.routers.evals.utils import run_accuracy_eval, run_performance_eval, run_reliability_eval
18
+ from agno.os.routers.evals.utils import (
19
+ run_accuracy_eval,
20
+ run_agent_as_judge_eval,
21
+ run_performance_eval,
22
+ run_reliability_eval,
23
+ )
19
24
  from agno.os.schema import (
20
25
  BadRequestResponse,
21
26
  InternalServerErrorResponse,
@@ -119,6 +124,15 @@ def attach_routes(
119
124
  ) -> PaginatedResponse[EvalSchema]:
120
125
  db = await get_db(dbs, db_id, table)
121
126
 
127
+ # TODO: Delete me:
128
+ # Filtering out agent-as-judge by default for now,
129
+ # as they are not supported yet in the AgentOS UI.
130
+ eval_types = eval_types or [
131
+ EvalType.ACCURACY,
132
+ EvalType.PERFORMANCE,
133
+ EvalType.RELIABILITY,
134
+ ]
135
+
122
136
  if isinstance(db, AsyncBaseDb):
123
137
  db = cast(AsyncBaseDb, db)
124
138
  eval_runs, total_count = await db.get_eval_runs(
@@ -304,7 +318,7 @@ def attach_routes(
304
318
  operation_id="run_eval",
305
319
  summary="Execute Evaluation",
306
320
  description=(
307
- "Run evaluation tests on agents or teams. Supports accuracy, performance, and reliability evaluations. "
321
+ "Run evaluation tests on agents or teams. Supports accuracy, agent-as-judge, performance, and reliability evaluations. "
308
322
  "Requires either agent_id or team_id, but not both."
309
323
  ),
310
324
  responses={
@@ -374,6 +388,7 @@ def attach_routes(
374
388
  if not team:
375
389
  raise HTTPException(status_code=404, detail=f"Team with id '{eval_run_input.team_id}' not found")
376
390
 
391
+ # If model_id/model_provider specified, override team's model temporarily
377
392
  default_model = None
378
393
  if (
379
394
  hasattr(team, "model")
@@ -381,13 +396,13 @@ def attach_routes(
381
396
  and eval_run_input.model_id is not None
382
397
  and eval_run_input.model_provider is not None
383
398
  ):
384
- default_model = deepcopy(team.model)
399
+ default_model = deepcopy(team.model) # Save original
385
400
  if eval_run_input.model_id != team.model.id or eval_run_input.model_provider != team.model.provider:
386
401
  model_provider = eval_run_input.model_provider.lower()
387
402
  model_id = eval_run_input.model_id.lower()
388
403
  model_string = f"{model_provider}:{model_id}"
389
404
  model = get_model(model_string)
390
- team.model = model
405
+ team.model = model # Override temporarily
391
406
 
392
407
  agent = None
393
408
 
@@ -400,6 +415,11 @@ def attach_routes(
400
415
  eval_run_input=eval_run_input, db=db, agent=agent, team=team, default_model=default_model
401
416
  )
402
417
 
418
+ elif eval_run_input.eval_type == EvalType.AGENT_AS_JUDGE:
419
+ return await run_agent_as_judge_eval(
420
+ eval_run_input=eval_run_input, db=db, agent=agent, team=team, default_model=default_model
421
+ )
422
+
403
423
  elif eval_run_input.eval_type == EvalType.PERFORMANCE:
404
424
  return await run_performance_eval(
405
425
  eval_run_input=eval_run_input, db=db, agent=agent, team=team, default_model=default_model
@@ -416,8 +436,8 @@ def attach_routes(
416
436
  def parse_eval_types_filter(
417
437
  eval_types: Optional[str] = Query(
418
438
  default=None,
419
- description="Comma-separated eval types (accuracy,performance,reliability)",
420
- examples=["accuracy,performance"],
439
+ description="Comma-separated eval types (accuracy,agent_as_judge,performance,reliability)",
440
+ examples=["accuracy,agent_as_judge,performance,reliability"],
421
441
  ),
422
442
  ) -> Optional[List[EvalType]]:
423
443
  """Parse comma-separated eval types into EvalType enums for filtering evaluation runs."""
@@ -1,12 +1,13 @@
1
1
  from dataclasses import asdict
2
2
  from datetime import datetime, timezone
3
- from typing import Any, Dict, List, Optional
3
+ from typing import Any, Dict, List, Literal, Optional
4
4
 
5
5
  from pydantic import BaseModel, Field
6
6
 
7
7
  from agno.db.schemas.evals import EvalType
8
- from agno.eval import AccuracyResult, PerformanceResult, ReliabilityResult
8
+ from agno.eval import AccuracyResult, AgentAsJudgeResult, PerformanceResult, ReliabilityResult
9
9
  from agno.eval.accuracy import AccuracyEval
10
+ from agno.eval.agent_as_judge import AgentAsJudgeEval
10
11
  from agno.eval.performance import PerformanceEval
11
12
  from agno.eval.reliability import ReliabilityEval
12
13
 
@@ -27,6 +28,15 @@ class EvalRunInput(BaseModel):
27
28
  # Accuracy eval specific fields
28
29
  expected_output: Optional[str] = Field(None, description="Expected output for accuracy evaluation")
29
30
 
31
+ # AgentAsJudge eval specific fields
32
+ criteria: Optional[str] = Field(None, description="Evaluation criteria for agent-as-judge evaluation")
33
+ scoring_strategy: Optional[Literal["numeric", "binary"]] = Field(
34
+ "binary", description="Scoring strategy: 'numeric' (1-10 with threshold) or 'binary' (PASS/FAIL)"
35
+ )
36
+ threshold: Optional[int] = Field(
37
+ 7, description="Score threshold for pass/fail (1-10), only used with numeric scoring", ge=1, le=10
38
+ )
39
+
30
40
  # Performance eval specific fields
31
41
  warmup_runs: int = Field(0, description="Number of warmup runs before measuring performance", ge=0, le=10)
32
42
 
@@ -89,6 +99,28 @@ class EvalSchema(BaseModel):
89
99
  eval_data=asdict(result),
90
100
  )
91
101
 
102
+ @classmethod
103
+ def from_agent_as_judge_eval(
104
+ cls,
105
+ agent_as_judge_eval: AgentAsJudgeEval,
106
+ result: AgentAsJudgeResult,
107
+ model_id: Optional[str] = None,
108
+ model_provider: Optional[str] = None,
109
+ agent_id: Optional[str] = None,
110
+ team_id: Optional[str] = None,
111
+ ) -> "EvalSchema":
112
+ return cls(
113
+ id=result.run_id,
114
+ name=agent_as_judge_eval.name,
115
+ agent_id=agent_id,
116
+ team_id=team_id,
117
+ workflow_id=None,
118
+ model_id=model_id,
119
+ model_provider=model_provider,
120
+ eval_type=EvalType.AGENT_AS_JUDGE,
121
+ eval_data=asdict(result),
122
+ )
123
+
92
124
  @classmethod
93
125
  def from_performance_eval(
94
126
  cls,
@@ -5,6 +5,7 @@ from fastapi import HTTPException
5
5
  from agno.agent.agent import Agent
6
6
  from agno.db.base import AsyncBaseDb, BaseDb
7
7
  from agno.eval.accuracy import AccuracyEval
8
+ from agno.eval.agent_as_judge import AgentAsJudgeEval
8
9
  from agno.eval.performance import PerformanceEval
9
10
  from agno.eval.reliability import ReliabilityEval
10
11
  from agno.models.base import Model
@@ -36,15 +37,77 @@ async def run_accuracy_eval(
36
37
  model=default_model,
37
38
  )
38
39
 
39
- if isinstance(db, AsyncBaseDb):
40
- result = await accuracy_eval.arun(print_results=False, print_summary=False)
41
- else:
42
- result = accuracy_eval.run(print_results=False, print_summary=False)
40
+ result = await accuracy_eval.arun(print_results=False, print_summary=False)
43
41
  if not result:
44
42
  raise HTTPException(status_code=500, detail="Failed to run accuracy evaluation")
45
43
 
46
44
  eval_run = EvalSchema.from_accuracy_eval(accuracy_eval=accuracy_eval, result=result)
47
45
 
46
+ # Restore original model after eval
47
+ if default_model is not None:
48
+ if agent is not None:
49
+ agent.model = default_model
50
+ elif team is not None:
51
+ team.model = default_model
52
+
53
+ return eval_run
54
+
55
+
56
+ async def run_agent_as_judge_eval(
57
+ eval_run_input: EvalRunInput,
58
+ db: Union[BaseDb, AsyncBaseDb],
59
+ agent: Optional[Agent] = None,
60
+ team: Optional[Team] = None,
61
+ default_model: Optional[Model] = None,
62
+ ) -> EvalSchema:
63
+ """Run an AgentAsJudge evaluation for the given agent or team"""
64
+ if not eval_run_input.criteria:
65
+ raise HTTPException(status_code=400, detail="criteria is required for agent-as-judge evaluation")
66
+
67
+ # Run agent/team to get output
68
+ if agent:
69
+ agent_response = await agent.arun(eval_run_input.input, stream=False)
70
+ output = str(agent_response.content) if agent_response.content else ""
71
+ model_id = agent.model.id if agent and agent.model else None
72
+ model_provider = agent.model.provider if agent and agent.model else None
73
+ agent_id = agent.id
74
+ team_id = None
75
+ elif team:
76
+ team_response = await team.arun(eval_run_input.input, stream=False)
77
+ output = str(team_response.content) if team_response.content else ""
78
+ model_id = team.model.id if team and team.model else None
79
+ model_provider = team.model.provider if team and team.model else None
80
+ agent_id = None
81
+ team_id = team.id
82
+ else:
83
+ raise HTTPException(status_code=400, detail="Either agent_id or team_id must be provided")
84
+
85
+ agent_as_judge_eval = AgentAsJudgeEval(
86
+ db=db,
87
+ criteria=eval_run_input.criteria,
88
+ scoring_strategy=eval_run_input.scoring_strategy or "binary",
89
+ threshold=eval_run_input.threshold or 7,
90
+ additional_guidelines=eval_run_input.additional_guidelines,
91
+ name=eval_run_input.name,
92
+ model=default_model,
93
+ )
94
+
95
+ result = await agent_as_judge_eval.arun(
96
+ input=eval_run_input.input, output=output, print_results=False, print_summary=False
97
+ )
98
+ if not result:
99
+ raise HTTPException(status_code=500, detail="Failed to run agent as judge evaluation")
100
+
101
+ eval_run = EvalSchema.from_agent_as_judge_eval(
102
+ agent_as_judge_eval=agent_as_judge_eval,
103
+ result=result,
104
+ agent_id=agent_id,
105
+ team_id=team_id,
106
+ model_id=model_id,
107
+ model_provider=model_provider,
108
+ )
109
+
110
+ # Restore original model after eval
48
111
  if default_model is not None:
49
112
  if agent is not None:
50
113
  agent.model = default_model
@@ -64,16 +127,16 @@ async def run_performance_eval(
64
127
  """Run a performance evaluation for the given agent or team"""
65
128
  if agent:
66
129
 
67
- def run_component(): # type: ignore
68
- return agent.run(eval_run_input.input)
130
+ async def run_component(): # type: ignore
131
+ return await agent.arun(eval_run_input.input, stream=False)
69
132
 
70
133
  model_id = agent.model.id if agent and agent.model else None
71
134
  model_provider = agent.model.provider if agent and agent.model else None
72
135
 
73
136
  elif team:
74
137
 
75
- def run_component():
76
- return team.run(eval_run_input.input)
138
+ async def run_component(): # type: ignore
139
+ return await team.arun(eval_run_input.input, stream=False)
77
140
 
78
141
  model_id = team.model.id if team and team.model else None
79
142
  model_provider = team.model.provider if team and team.model else None
@@ -90,10 +153,7 @@ async def run_performance_eval(
90
153
  model_provider=model_provider,
91
154
  )
92
155
 
93
- if isinstance(db, AsyncBaseDb):
94
- result = await performance_eval.arun(print_results=False, print_summary=False)
95
- else:
96
- result = performance_eval.run(print_results=False, print_summary=False)
156
+ result = await performance_eval.arun(print_results=False, print_summary=False)
97
157
  if not result:
98
158
  raise HTTPException(status_code=500, detail="Failed to run performance evaluation")
99
159
 
@@ -106,6 +166,7 @@ async def run_performance_eval(
106
166
  model_provider=model_provider,
107
167
  )
108
168
 
169
+ # Restore original model after eval
109
170
  if default_model is not None:
110
171
  if agent is not None:
111
172
  agent.model = default_model
@@ -127,7 +188,7 @@ async def run_reliability_eval(
127
188
  raise HTTPException(status_code=400, detail="expected_tool_calls is required for reliability evaluations")
128
189
 
129
190
  if agent:
130
- agent_response = agent.run(eval_run_input.input)
191
+ agent_response = await agent.arun(eval_run_input.input, stream=False)
131
192
  reliability_eval = ReliabilityEval(
132
193
  db=db,
133
194
  name=eval_run_input.name,
@@ -138,7 +199,7 @@ async def run_reliability_eval(
138
199
  model_provider = agent.model.provider if agent and agent.model else None
139
200
 
140
201
  elif team:
141
- team_response = team.run(eval_run_input.input)
202
+ team_response = await team.arun(eval_run_input.input, stream=False)
142
203
  reliability_eval = ReliabilityEval(
143
204
  db=db,
144
205
  name=eval_run_input.name,
@@ -148,10 +209,7 @@ async def run_reliability_eval(
148
209
  model_id = team.model.id if team and team.model else None
149
210
  model_provider = team.model.provider if team and team.model else None
150
211
 
151
- if isinstance(db, AsyncBaseDb):
152
- result = await reliability_eval.arun(print_results=False)
153
- else:
154
- result = reliability_eval.run(print_results=False)
212
+ result = await reliability_eval.arun(print_results=False)
155
213
  if not result:
156
214
  raise HTTPException(status_code=500, detail="Failed to run reliability evaluation")
157
215
 
@@ -163,6 +221,7 @@ async def run_reliability_eval(
163
221
  model_provider=model_provider,
164
222
  )
165
223
 
224
+ # Restore original model after eval
166
225
  if default_model is not None:
167
226
  if agent is not None:
168
227
  agent.model = default_model
@@ -981,7 +981,7 @@ async def process_content(
981
981
  log_debug(f"Set chunking strategy: {chunker}")
982
982
 
983
983
  log_debug(f"Using reader: {content.reader.__class__.__name__}")
984
- await knowledge._load_content(content, upsert=False, skip_if_exists=True)
984
+ await knowledge._load_content_async(content, upsert=False, skip_if_exists=True)
985
985
  log_info(f"Content {content.id} processed successfully")
986
986
  except Exception as e:
987
987
  log_info(f"Error processing content: {e}")
@@ -0,0 +1,3 @@
1
+ from agno.os.routers.teams.router import get_team_router
2
+
3
+ __all__ = ["get_team_router"]