agno 2.2.13__py3-none-any.whl → 2.4.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- agno/agent/__init__.py +6 -0
- agno/agent/agent.py +5252 -3145
- agno/agent/remote.py +525 -0
- agno/api/api.py +2 -0
- agno/client/__init__.py +3 -0
- agno/client/a2a/__init__.py +10 -0
- agno/client/a2a/client.py +554 -0
- agno/client/a2a/schemas.py +112 -0
- agno/client/a2a/utils.py +369 -0
- agno/client/os.py +2669 -0
- agno/compression/__init__.py +3 -0
- agno/compression/manager.py +247 -0
- agno/culture/manager.py +2 -2
- agno/db/base.py +927 -6
- agno/db/dynamo/dynamo.py +788 -2
- agno/db/dynamo/schemas.py +128 -0
- agno/db/dynamo/utils.py +26 -3
- agno/db/firestore/firestore.py +674 -50
- agno/db/firestore/schemas.py +41 -0
- agno/db/firestore/utils.py +25 -10
- agno/db/gcs_json/gcs_json_db.py +506 -3
- agno/db/gcs_json/utils.py +14 -2
- agno/db/in_memory/in_memory_db.py +203 -4
- agno/db/in_memory/utils.py +14 -2
- agno/db/json/json_db.py +498 -2
- agno/db/json/utils.py +14 -2
- agno/db/migrations/manager.py +199 -0
- agno/db/migrations/utils.py +19 -0
- agno/db/migrations/v1_to_v2.py +54 -16
- agno/db/migrations/versions/__init__.py +0 -0
- agno/db/migrations/versions/v2_3_0.py +977 -0
- agno/db/mongo/async_mongo.py +1013 -39
- agno/db/mongo/mongo.py +684 -4
- agno/db/mongo/schemas.py +48 -0
- agno/db/mongo/utils.py +17 -0
- agno/db/mysql/__init__.py +2 -1
- agno/db/mysql/async_mysql.py +2958 -0
- agno/db/mysql/mysql.py +722 -53
- agno/db/mysql/schemas.py +77 -11
- agno/db/mysql/utils.py +151 -8
- agno/db/postgres/async_postgres.py +1254 -137
- agno/db/postgres/postgres.py +2316 -93
- agno/db/postgres/schemas.py +153 -21
- agno/db/postgres/utils.py +22 -7
- agno/db/redis/redis.py +531 -3
- agno/db/redis/schemas.py +36 -0
- agno/db/redis/utils.py +31 -15
- agno/db/schemas/evals.py +1 -0
- agno/db/schemas/memory.py +20 -9
- agno/db/singlestore/schemas.py +70 -1
- agno/db/singlestore/singlestore.py +737 -74
- agno/db/singlestore/utils.py +13 -3
- agno/db/sqlite/async_sqlite.py +1069 -89
- agno/db/sqlite/schemas.py +133 -1
- agno/db/sqlite/sqlite.py +2203 -165
- agno/db/sqlite/utils.py +21 -11
- agno/db/surrealdb/models.py +25 -0
- agno/db/surrealdb/surrealdb.py +603 -1
- agno/db/utils.py +60 -0
- agno/eval/__init__.py +26 -3
- agno/eval/accuracy.py +25 -12
- agno/eval/agent_as_judge.py +871 -0
- agno/eval/base.py +29 -0
- agno/eval/performance.py +10 -4
- agno/eval/reliability.py +22 -13
- agno/eval/utils.py +2 -1
- agno/exceptions.py +42 -0
- agno/hooks/__init__.py +3 -0
- agno/hooks/decorator.py +164 -0
- agno/integrations/discord/client.py +13 -2
- agno/knowledge/__init__.py +4 -0
- agno/knowledge/chunking/code.py +90 -0
- agno/knowledge/chunking/document.py +65 -4
- agno/knowledge/chunking/fixed.py +4 -1
- agno/knowledge/chunking/markdown.py +102 -11
- agno/knowledge/chunking/recursive.py +2 -2
- agno/knowledge/chunking/semantic.py +130 -48
- agno/knowledge/chunking/strategy.py +18 -0
- agno/knowledge/embedder/azure_openai.py +0 -1
- agno/knowledge/embedder/google.py +1 -1
- agno/knowledge/embedder/mistral.py +1 -1
- agno/knowledge/embedder/nebius.py +1 -1
- agno/knowledge/embedder/openai.py +16 -12
- agno/knowledge/filesystem.py +412 -0
- agno/knowledge/knowledge.py +4261 -1199
- agno/knowledge/protocol.py +134 -0
- agno/knowledge/reader/arxiv_reader.py +3 -2
- agno/knowledge/reader/base.py +9 -7
- agno/knowledge/reader/csv_reader.py +91 -42
- agno/knowledge/reader/docx_reader.py +9 -10
- agno/knowledge/reader/excel_reader.py +225 -0
- agno/knowledge/reader/field_labeled_csv_reader.py +38 -48
- agno/knowledge/reader/firecrawl_reader.py +3 -2
- agno/knowledge/reader/json_reader.py +16 -22
- agno/knowledge/reader/markdown_reader.py +15 -14
- agno/knowledge/reader/pdf_reader.py +33 -28
- agno/knowledge/reader/pptx_reader.py +9 -10
- agno/knowledge/reader/reader_factory.py +135 -1
- agno/knowledge/reader/s3_reader.py +8 -16
- agno/knowledge/reader/tavily_reader.py +3 -3
- agno/knowledge/reader/text_reader.py +15 -14
- agno/knowledge/reader/utils/__init__.py +17 -0
- agno/knowledge/reader/utils/spreadsheet.py +114 -0
- agno/knowledge/reader/web_search_reader.py +8 -65
- agno/knowledge/reader/website_reader.py +16 -13
- agno/knowledge/reader/wikipedia_reader.py +36 -3
- agno/knowledge/reader/youtube_reader.py +3 -2
- agno/knowledge/remote_content/__init__.py +33 -0
- agno/knowledge/remote_content/config.py +266 -0
- agno/knowledge/remote_content/remote_content.py +105 -17
- agno/knowledge/utils.py +76 -22
- agno/learn/__init__.py +71 -0
- agno/learn/config.py +463 -0
- agno/learn/curate.py +185 -0
- agno/learn/machine.py +725 -0
- agno/learn/schemas.py +1114 -0
- agno/learn/stores/__init__.py +38 -0
- agno/learn/stores/decision_log.py +1156 -0
- agno/learn/stores/entity_memory.py +3275 -0
- agno/learn/stores/learned_knowledge.py +1583 -0
- agno/learn/stores/protocol.py +117 -0
- agno/learn/stores/session_context.py +1217 -0
- agno/learn/stores/user_memory.py +1495 -0
- agno/learn/stores/user_profile.py +1220 -0
- agno/learn/utils.py +209 -0
- agno/media.py +22 -6
- agno/memory/__init__.py +14 -1
- agno/memory/manager.py +223 -8
- agno/memory/strategies/__init__.py +15 -0
- agno/memory/strategies/base.py +66 -0
- agno/memory/strategies/summarize.py +196 -0
- agno/memory/strategies/types.py +37 -0
- agno/models/aimlapi/aimlapi.py +17 -0
- agno/models/anthropic/claude.py +434 -59
- agno/models/aws/bedrock.py +121 -20
- agno/models/aws/claude.py +131 -274
- agno/models/azure/ai_foundry.py +10 -6
- agno/models/azure/openai_chat.py +33 -10
- agno/models/base.py +1162 -561
- agno/models/cerebras/cerebras.py +120 -24
- agno/models/cerebras/cerebras_openai.py +21 -2
- agno/models/cohere/chat.py +65 -6
- agno/models/cometapi/cometapi.py +18 -1
- agno/models/dashscope/dashscope.py +2 -3
- agno/models/deepinfra/deepinfra.py +18 -1
- agno/models/deepseek/deepseek.py +69 -3
- agno/models/fireworks/fireworks.py +18 -1
- agno/models/google/gemini.py +959 -89
- agno/models/google/utils.py +22 -0
- agno/models/groq/groq.py +48 -18
- agno/models/huggingface/huggingface.py +17 -6
- agno/models/ibm/watsonx.py +16 -6
- agno/models/internlm/internlm.py +18 -1
- agno/models/langdb/langdb.py +13 -1
- agno/models/litellm/chat.py +88 -9
- agno/models/litellm/litellm_openai.py +18 -1
- agno/models/message.py +24 -5
- agno/models/meta/llama.py +40 -13
- agno/models/meta/llama_openai.py +22 -21
- agno/models/metrics.py +12 -0
- agno/models/mistral/mistral.py +8 -4
- agno/models/n1n/__init__.py +3 -0
- agno/models/n1n/n1n.py +57 -0
- agno/models/nebius/nebius.py +6 -7
- agno/models/nvidia/nvidia.py +20 -3
- agno/models/ollama/__init__.py +2 -0
- agno/models/ollama/chat.py +17 -6
- agno/models/ollama/responses.py +100 -0
- agno/models/openai/__init__.py +2 -0
- agno/models/openai/chat.py +117 -26
- agno/models/openai/open_responses.py +46 -0
- agno/models/openai/responses.py +110 -32
- agno/models/openrouter/__init__.py +2 -0
- agno/models/openrouter/openrouter.py +67 -2
- agno/models/openrouter/responses.py +146 -0
- agno/models/perplexity/perplexity.py +19 -1
- agno/models/portkey/portkey.py +7 -6
- agno/models/requesty/requesty.py +19 -2
- agno/models/response.py +20 -2
- agno/models/sambanova/sambanova.py +20 -3
- agno/models/siliconflow/siliconflow.py +19 -2
- agno/models/together/together.py +20 -3
- agno/models/vercel/v0.py +20 -3
- agno/models/vertexai/claude.py +124 -4
- agno/models/vllm/vllm.py +19 -14
- agno/models/xai/xai.py +19 -2
- agno/os/app.py +467 -137
- agno/os/auth.py +253 -5
- agno/os/config.py +22 -0
- agno/os/interfaces/a2a/a2a.py +7 -6
- agno/os/interfaces/a2a/router.py +635 -26
- agno/os/interfaces/a2a/utils.py +32 -33
- agno/os/interfaces/agui/agui.py +5 -3
- agno/os/interfaces/agui/router.py +26 -16
- agno/os/interfaces/agui/utils.py +97 -57
- agno/os/interfaces/base.py +7 -7
- agno/os/interfaces/slack/router.py +16 -7
- agno/os/interfaces/slack/slack.py +7 -7
- agno/os/interfaces/whatsapp/router.py +35 -7
- agno/os/interfaces/whatsapp/security.py +3 -1
- agno/os/interfaces/whatsapp/whatsapp.py +11 -8
- agno/os/managers.py +326 -0
- agno/os/mcp.py +652 -79
- agno/os/middleware/__init__.py +4 -0
- agno/os/middleware/jwt.py +718 -115
- agno/os/middleware/trailing_slash.py +27 -0
- agno/os/router.py +105 -1558
- agno/os/routers/agents/__init__.py +3 -0
- agno/os/routers/agents/router.py +655 -0
- agno/os/routers/agents/schema.py +288 -0
- agno/os/routers/components/__init__.py +3 -0
- agno/os/routers/components/components.py +475 -0
- agno/os/routers/database.py +155 -0
- agno/os/routers/evals/evals.py +111 -18
- agno/os/routers/evals/schemas.py +38 -5
- agno/os/routers/evals/utils.py +80 -11
- agno/os/routers/health.py +3 -3
- agno/os/routers/knowledge/knowledge.py +284 -35
- agno/os/routers/knowledge/schemas.py +14 -2
- agno/os/routers/memory/memory.py +274 -11
- agno/os/routers/memory/schemas.py +44 -3
- agno/os/routers/metrics/metrics.py +30 -15
- agno/os/routers/metrics/schemas.py +10 -6
- agno/os/routers/registry/__init__.py +3 -0
- agno/os/routers/registry/registry.py +337 -0
- agno/os/routers/session/session.py +143 -14
- agno/os/routers/teams/__init__.py +3 -0
- agno/os/routers/teams/router.py +550 -0
- agno/os/routers/teams/schema.py +280 -0
- agno/os/routers/traces/__init__.py +3 -0
- agno/os/routers/traces/schemas.py +414 -0
- agno/os/routers/traces/traces.py +549 -0
- agno/os/routers/workflows/__init__.py +3 -0
- agno/os/routers/workflows/router.py +757 -0
- agno/os/routers/workflows/schema.py +139 -0
- agno/os/schema.py +157 -584
- agno/os/scopes.py +469 -0
- agno/os/settings.py +3 -0
- agno/os/utils.py +574 -185
- agno/reasoning/anthropic.py +85 -1
- agno/reasoning/azure_ai_foundry.py +93 -1
- agno/reasoning/deepseek.py +102 -2
- agno/reasoning/default.py +6 -7
- agno/reasoning/gemini.py +87 -3
- agno/reasoning/groq.py +109 -2
- agno/reasoning/helpers.py +6 -7
- agno/reasoning/manager.py +1238 -0
- agno/reasoning/ollama.py +93 -1
- agno/reasoning/openai.py +115 -1
- agno/reasoning/vertexai.py +85 -1
- agno/registry/__init__.py +3 -0
- agno/registry/registry.py +68 -0
- agno/remote/__init__.py +3 -0
- agno/remote/base.py +581 -0
- agno/run/__init__.py +2 -4
- agno/run/agent.py +134 -19
- agno/run/base.py +49 -1
- agno/run/cancel.py +65 -52
- agno/run/cancellation_management/__init__.py +9 -0
- agno/run/cancellation_management/base.py +78 -0
- agno/run/cancellation_management/in_memory_cancellation_manager.py +100 -0
- agno/run/cancellation_management/redis_cancellation_manager.py +236 -0
- agno/run/requirement.py +181 -0
- agno/run/team.py +111 -19
- agno/run/workflow.py +2 -1
- agno/session/agent.py +57 -92
- agno/session/summary.py +1 -1
- agno/session/team.py +62 -115
- agno/session/workflow.py +353 -57
- agno/skills/__init__.py +17 -0
- agno/skills/agent_skills.py +377 -0
- agno/skills/errors.py +32 -0
- agno/skills/loaders/__init__.py +4 -0
- agno/skills/loaders/base.py +27 -0
- agno/skills/loaders/local.py +216 -0
- agno/skills/skill.py +65 -0
- agno/skills/utils.py +107 -0
- agno/skills/validator.py +277 -0
- agno/table.py +10 -0
- agno/team/__init__.py +5 -1
- agno/team/remote.py +447 -0
- agno/team/team.py +3769 -2202
- agno/tools/brandfetch.py +27 -18
- agno/tools/browserbase.py +225 -16
- agno/tools/crawl4ai.py +3 -0
- agno/tools/duckduckgo.py +25 -71
- agno/tools/exa.py +0 -21
- agno/tools/file.py +14 -13
- agno/tools/file_generation.py +12 -6
- agno/tools/firecrawl.py +15 -7
- agno/tools/function.py +94 -113
- agno/tools/google_bigquery.py +11 -2
- agno/tools/google_drive.py +4 -3
- agno/tools/knowledge.py +9 -4
- agno/tools/mcp/mcp.py +301 -18
- agno/tools/mcp/multi_mcp.py +269 -14
- agno/tools/mem0.py +11 -10
- agno/tools/memory.py +47 -46
- agno/tools/mlx_transcribe.py +10 -7
- agno/tools/models/nebius.py +5 -5
- agno/tools/models_labs.py +20 -10
- agno/tools/nano_banana.py +151 -0
- agno/tools/parallel.py +0 -7
- agno/tools/postgres.py +76 -36
- agno/tools/python.py +14 -6
- agno/tools/reasoning.py +30 -23
- agno/tools/redshift.py +406 -0
- agno/tools/shopify.py +1519 -0
- agno/tools/spotify.py +919 -0
- agno/tools/tavily.py +4 -1
- agno/tools/toolkit.py +253 -18
- agno/tools/websearch.py +93 -0
- agno/tools/website.py +1 -1
- agno/tools/wikipedia.py +1 -1
- agno/tools/workflow.py +56 -48
- agno/tools/yfinance.py +12 -11
- agno/tracing/__init__.py +12 -0
- agno/tracing/exporter.py +161 -0
- agno/tracing/schemas.py +276 -0
- agno/tracing/setup.py +112 -0
- agno/utils/agent.py +251 -10
- agno/utils/cryptography.py +22 -0
- agno/utils/dttm.py +33 -0
- agno/utils/events.py +264 -7
- agno/utils/hooks.py +111 -3
- agno/utils/http.py +161 -2
- agno/utils/mcp.py +49 -8
- agno/utils/media.py +22 -1
- agno/utils/models/ai_foundry.py +9 -2
- agno/utils/models/claude.py +20 -5
- agno/utils/models/cohere.py +9 -2
- agno/utils/models/llama.py +9 -2
- agno/utils/models/mistral.py +4 -2
- agno/utils/os.py +0 -0
- agno/utils/print_response/agent.py +99 -16
- agno/utils/print_response/team.py +223 -24
- agno/utils/print_response/workflow.py +0 -2
- agno/utils/prompts.py +8 -6
- agno/utils/remote.py +23 -0
- agno/utils/response.py +1 -13
- agno/utils/string.py +91 -2
- agno/utils/team.py +62 -12
- agno/utils/tokens.py +657 -0
- agno/vectordb/base.py +15 -2
- agno/vectordb/cassandra/cassandra.py +1 -1
- agno/vectordb/chroma/__init__.py +2 -1
- agno/vectordb/chroma/chromadb.py +468 -23
- agno/vectordb/clickhouse/clickhousedb.py +1 -1
- agno/vectordb/couchbase/couchbase.py +6 -2
- agno/vectordb/lancedb/lance_db.py +7 -38
- agno/vectordb/lightrag/lightrag.py +7 -6
- agno/vectordb/milvus/milvus.py +118 -84
- agno/vectordb/mongodb/__init__.py +2 -1
- agno/vectordb/mongodb/mongodb.py +14 -31
- agno/vectordb/pgvector/pgvector.py +120 -66
- agno/vectordb/pineconedb/pineconedb.py +2 -19
- agno/vectordb/qdrant/__init__.py +2 -1
- agno/vectordb/qdrant/qdrant.py +33 -56
- agno/vectordb/redis/__init__.py +2 -1
- agno/vectordb/redis/redisdb.py +19 -31
- agno/vectordb/singlestore/singlestore.py +17 -9
- agno/vectordb/surrealdb/surrealdb.py +2 -38
- agno/vectordb/weaviate/__init__.py +2 -1
- agno/vectordb/weaviate/weaviate.py +7 -3
- agno/workflow/__init__.py +5 -1
- agno/workflow/agent.py +2 -2
- agno/workflow/condition.py +12 -10
- agno/workflow/loop.py +28 -9
- agno/workflow/parallel.py +21 -13
- agno/workflow/remote.py +362 -0
- agno/workflow/router.py +12 -9
- agno/workflow/step.py +261 -36
- agno/workflow/steps.py +12 -8
- agno/workflow/types.py +40 -77
- agno/workflow/workflow.py +939 -213
- {agno-2.2.13.dist-info → agno-2.4.3.dist-info}/METADATA +134 -181
- agno-2.4.3.dist-info/RECORD +677 -0
- {agno-2.2.13.dist-info → agno-2.4.3.dist-info}/WHEEL +1 -1
- agno/tools/googlesearch.py +0 -98
- agno/tools/memori.py +0 -339
- agno-2.2.13.dist-info/RECORD +0 -575
- {agno-2.2.13.dist-info → agno-2.4.3.dist-info}/licenses/LICENSE +0 -0
- {agno-2.2.13.dist-info → agno-2.4.3.dist-info}/top_level.txt +0 -0
agno/os/routers/evals/evals.py
CHANGED
|
@@ -2,20 +2,25 @@ import logging
|
|
|
2
2
|
from copy import deepcopy
|
|
3
3
|
from typing import List, Optional, Union, cast
|
|
4
4
|
|
|
5
|
-
from fastapi import APIRouter, Depends, HTTPException, Query
|
|
5
|
+
from fastapi import APIRouter, Depends, HTTPException, Query, Request
|
|
6
6
|
|
|
7
|
-
from agno.agent
|
|
7
|
+
from agno.agent import Agent, RemoteAgent
|
|
8
8
|
from agno.db.base import AsyncBaseDb, BaseDb
|
|
9
9
|
from agno.db.schemas.evals import EvalFilterType, EvalType
|
|
10
10
|
from agno.models.utils import get_model
|
|
11
|
-
from agno.os.auth import get_authentication_dependency
|
|
11
|
+
from agno.os.auth import get_auth_token_from_request, get_authentication_dependency
|
|
12
12
|
from agno.os.routers.evals.schemas import (
|
|
13
13
|
DeleteEvalRunsRequest,
|
|
14
14
|
EvalRunInput,
|
|
15
15
|
EvalSchema,
|
|
16
16
|
UpdateEvalRunRequest,
|
|
17
17
|
)
|
|
18
|
-
from agno.os.routers.evals.utils import
|
|
18
|
+
from agno.os.routers.evals.utils import (
|
|
19
|
+
run_accuracy_eval,
|
|
20
|
+
run_agent_as_judge_eval,
|
|
21
|
+
run_performance_eval,
|
|
22
|
+
run_reliability_eval,
|
|
23
|
+
)
|
|
19
24
|
from agno.os.schema import (
|
|
20
25
|
BadRequestResponse,
|
|
21
26
|
InternalServerErrorResponse,
|
|
@@ -28,15 +33,17 @@ from agno.os.schema import (
|
|
|
28
33
|
)
|
|
29
34
|
from agno.os.settings import AgnoAPISettings
|
|
30
35
|
from agno.os.utils import get_agent_by_id, get_db, get_team_by_id
|
|
31
|
-
from agno.
|
|
36
|
+
from agno.remote.base import RemoteDb
|
|
37
|
+
from agno.team import RemoteTeam, Team
|
|
38
|
+
from agno.utils.log import log_warning
|
|
32
39
|
|
|
33
40
|
logger = logging.getLogger(__name__)
|
|
34
41
|
|
|
35
42
|
|
|
36
43
|
def get_eval_router(
|
|
37
|
-
dbs: dict[str, list[Union[BaseDb, AsyncBaseDb]]],
|
|
38
|
-
agents: Optional[List[Agent]] = None,
|
|
39
|
-
teams: Optional[List[Team]] = None,
|
|
44
|
+
dbs: dict[str, list[Union[BaseDb, AsyncBaseDb, RemoteDb]]],
|
|
45
|
+
agents: Optional[List[Union[Agent, RemoteAgent]]] = None,
|
|
46
|
+
teams: Optional[List[Union[Team, RemoteTeam]]] = None,
|
|
40
47
|
settings: AgnoAPISettings = AgnoAPISettings(),
|
|
41
48
|
) -> APIRouter:
|
|
42
49
|
"""Create eval router with comprehensive OpenAPI documentation for agent/team evaluation endpoints."""
|
|
@@ -56,9 +63,9 @@ def get_eval_router(
|
|
|
56
63
|
|
|
57
64
|
def attach_routes(
|
|
58
65
|
router: APIRouter,
|
|
59
|
-
dbs: dict[str, list[Union[BaseDb, AsyncBaseDb]]],
|
|
60
|
-
agents: Optional[List[Agent]] = None,
|
|
61
|
-
teams: Optional[List[Team]] = None,
|
|
66
|
+
dbs: dict[str, list[Union[BaseDb, AsyncBaseDb, RemoteDb]]],
|
|
67
|
+
agents: Optional[List[Union[Agent, RemoteAgent]]] = None,
|
|
68
|
+
teams: Optional[List[Union[Team, RemoteTeam]]] = None,
|
|
62
69
|
) -> APIRouter:
|
|
63
70
|
@router.get(
|
|
64
71
|
"/eval-runs",
|
|
@@ -104,14 +111,15 @@ def attach_routes(
|
|
|
104
111
|
},
|
|
105
112
|
)
|
|
106
113
|
async def get_eval_runs(
|
|
114
|
+
request: Request,
|
|
107
115
|
agent_id: Optional[str] = Query(default=None, description="Agent ID"),
|
|
108
116
|
team_id: Optional[str] = Query(default=None, description="Team ID"),
|
|
109
117
|
workflow_id: Optional[str] = Query(default=None, description="Workflow ID"),
|
|
110
118
|
model_id: Optional[str] = Query(default=None, description="Model ID"),
|
|
111
119
|
filter_type: Optional[EvalFilterType] = Query(default=None, description="Filter type", alias="type"),
|
|
112
120
|
eval_types: Optional[List[EvalType]] = Depends(parse_eval_types_filter),
|
|
113
|
-
limit: Optional[int] = Query(default=20, description="Number of eval runs to return"),
|
|
114
|
-
page: Optional[int] = Query(default=1, description="Page number"),
|
|
121
|
+
limit: Optional[int] = Query(default=20, description="Number of eval runs to return", ge=1),
|
|
122
|
+
page: Optional[int] = Query(default=1, description="Page number", ge=0),
|
|
115
123
|
sort_by: Optional[str] = Query(default="created_at", description="Field to sort by"),
|
|
116
124
|
sort_order: Optional[SortOrder] = Query(default="desc", description="Sort order (asc or desc)"),
|
|
117
125
|
db_id: Optional[str] = Query(default=None, description="The ID of the database to use"),
|
|
@@ -119,6 +127,23 @@ def attach_routes(
|
|
|
119
127
|
) -> PaginatedResponse[EvalSchema]:
|
|
120
128
|
db = await get_db(dbs, db_id, table)
|
|
121
129
|
|
|
130
|
+
if isinstance(db, RemoteDb):
|
|
131
|
+
auth_token = get_auth_token_from_request(request)
|
|
132
|
+
headers = {"Authorization": f"Bearer {auth_token}"} if auth_token else None
|
|
133
|
+
return await db.get_eval_runs(
|
|
134
|
+
limit=limit,
|
|
135
|
+
page=page,
|
|
136
|
+
sort_by=sort_by,
|
|
137
|
+
sort_order=sort_order.value if sort_order else None,
|
|
138
|
+
agent_id=agent_id,
|
|
139
|
+
team_id=team_id,
|
|
140
|
+
workflow_id=workflow_id,
|
|
141
|
+
model_id=model_id,
|
|
142
|
+
eval_types=eval_types,
|
|
143
|
+
filter_type=filter_type.value if filter_type else None,
|
|
144
|
+
headers=headers,
|
|
145
|
+
)
|
|
146
|
+
|
|
122
147
|
if isinstance(db, AsyncBaseDb):
|
|
123
148
|
db = cast(AsyncBaseDb, db)
|
|
124
149
|
eval_runs, total_count = await db.get_eval_runs(
|
|
@@ -197,11 +222,17 @@ def attach_routes(
|
|
|
197
222
|
},
|
|
198
223
|
)
|
|
199
224
|
async def get_eval_run(
|
|
225
|
+
request: Request,
|
|
200
226
|
eval_run_id: str,
|
|
201
227
|
db_id: Optional[str] = Query(default=None, description="The ID of the database to use"),
|
|
202
228
|
table: Optional[str] = Query(default=None, description="Table to query eval run from"),
|
|
203
229
|
) -> EvalSchema:
|
|
204
230
|
db = await get_db(dbs, db_id, table)
|
|
231
|
+
if isinstance(db, RemoteDb):
|
|
232
|
+
auth_token = get_auth_token_from_request(request)
|
|
233
|
+
headers = {"Authorization": f"Bearer {auth_token}"} if auth_token else None
|
|
234
|
+
return await db.get_eval_run(eval_run_id=eval_run_id, db_id=db_id, table=table, headers=headers)
|
|
235
|
+
|
|
205
236
|
if isinstance(db, AsyncBaseDb):
|
|
206
237
|
db = cast(AsyncBaseDb, db)
|
|
207
238
|
eval_run = await db.get_eval_run(eval_run_id=eval_run_id, deserialize=False)
|
|
@@ -224,12 +255,20 @@ def attach_routes(
|
|
|
224
255
|
},
|
|
225
256
|
)
|
|
226
257
|
async def delete_eval_runs(
|
|
258
|
+
http_request: Request,
|
|
227
259
|
request: DeleteEvalRunsRequest,
|
|
228
260
|
db_id: Optional[str] = Query(default=None, description="Database ID to use for deletion"),
|
|
229
261
|
table: Optional[str] = Query(default=None, description="Table to use for deletion"),
|
|
230
262
|
) -> None:
|
|
231
263
|
try:
|
|
232
264
|
db = await get_db(dbs, db_id, table)
|
|
265
|
+
if isinstance(db, RemoteDb):
|
|
266
|
+
auth_token = get_auth_token_from_request(http_request)
|
|
267
|
+
headers = {"Authorization": f"Bearer {auth_token}"} if auth_token else None
|
|
268
|
+
return await db.delete_eval_runs(
|
|
269
|
+
eval_run_ids=request.eval_run_ids, db_id=db_id, table=table, headers=headers
|
|
270
|
+
)
|
|
271
|
+
|
|
233
272
|
if isinstance(db, AsyncBaseDb):
|
|
234
273
|
db = cast(AsyncBaseDb, db)
|
|
235
274
|
await db.delete_eval_runs(eval_run_ids=request.eval_run_ids)
|
|
@@ -277,6 +316,7 @@ def attach_routes(
|
|
|
277
316
|
},
|
|
278
317
|
)
|
|
279
318
|
async def update_eval_run(
|
|
319
|
+
http_request: Request,
|
|
280
320
|
eval_run_id: str,
|
|
281
321
|
request: UpdateEvalRunRequest,
|
|
282
322
|
db_id: Optional[str] = Query(default=None, description="The ID of the database to use"),
|
|
@@ -284,6 +324,13 @@ def attach_routes(
|
|
|
284
324
|
) -> EvalSchema:
|
|
285
325
|
try:
|
|
286
326
|
db = await get_db(dbs, db_id, table)
|
|
327
|
+
if isinstance(db, RemoteDb):
|
|
328
|
+
auth_token = get_auth_token_from_request(http_request)
|
|
329
|
+
headers = {"Authorization": f"Bearer {auth_token}"} if auth_token else None
|
|
330
|
+
return await db.update_eval_run(
|
|
331
|
+
eval_run_id=eval_run_id, name=request.name, db_id=db_id, table=table, headers=headers
|
|
332
|
+
)
|
|
333
|
+
|
|
287
334
|
if isinstance(db, AsyncBaseDb):
|
|
288
335
|
db = cast(AsyncBaseDb, db)
|
|
289
336
|
eval_run = await db.rename_eval_run(eval_run_id=eval_run_id, name=request.name, deserialize=False)
|
|
@@ -304,7 +351,7 @@ def attach_routes(
|
|
|
304
351
|
operation_id="run_eval",
|
|
305
352
|
summary="Execute Evaluation",
|
|
306
353
|
description=(
|
|
307
|
-
"Run evaluation tests on agents or teams. Supports accuracy, performance, and reliability evaluations. "
|
|
354
|
+
"Run evaluation tests on agents or teams. Supports accuracy, agent-as-judge, performance, and reliability evaluations. "
|
|
308
355
|
"Requires either agent_id or team_id, but not both."
|
|
309
356
|
),
|
|
310
357
|
responses={
|
|
@@ -338,11 +385,29 @@ def attach_routes(
|
|
|
338
385
|
},
|
|
339
386
|
)
|
|
340
387
|
async def run_eval(
|
|
388
|
+
request: Request,
|
|
341
389
|
eval_run_input: EvalRunInput,
|
|
342
390
|
db_id: Optional[str] = Query(default=None, description="Database ID to use for evaluation"),
|
|
343
391
|
table: Optional[str] = Query(default=None, description="Table to use for evaluation"),
|
|
344
392
|
) -> Optional[EvalSchema]:
|
|
345
393
|
db = await get_db(dbs, db_id, table)
|
|
394
|
+
if isinstance(db, RemoteDb):
|
|
395
|
+
auth_token = get_auth_token_from_request(request)
|
|
396
|
+
headers = {"Authorization": f"Bearer {auth_token}"} if auth_token else None
|
|
397
|
+
return await db.create_eval_run(
|
|
398
|
+
eval_type=eval_run_input.eval_type,
|
|
399
|
+
input_text=eval_run_input.input,
|
|
400
|
+
agent_id=eval_run_input.agent_id,
|
|
401
|
+
team_id=eval_run_input.team_id,
|
|
402
|
+
model_id=eval_run_input.model_id,
|
|
403
|
+
model_provider=eval_run_input.model_provider,
|
|
404
|
+
expected_output=eval_run_input.expected_output,
|
|
405
|
+
expected_tool_calls=eval_run_input.expected_tool_calls,
|
|
406
|
+
num_iterations=eval_run_input.num_iterations,
|
|
407
|
+
db_id=db_id,
|
|
408
|
+
table=table,
|
|
409
|
+
headers=headers,
|
|
410
|
+
)
|
|
346
411
|
|
|
347
412
|
if eval_run_input.agent_id and eval_run_input.team_id:
|
|
348
413
|
raise HTTPException(status_code=400, detail="Only one of agent_id or team_id must be provided")
|
|
@@ -351,6 +416,9 @@ def attach_routes(
|
|
|
351
416
|
agent = get_agent_by_id(agent_id=eval_run_input.agent_id, agents=agents)
|
|
352
417
|
if not agent:
|
|
353
418
|
raise HTTPException(status_code=404, detail=f"Agent with id '{eval_run_input.agent_id}' not found")
|
|
419
|
+
if isinstance(agent, RemoteAgent):
|
|
420
|
+
log_warning("Evaluation against remote agents are not supported yet")
|
|
421
|
+
return None
|
|
354
422
|
|
|
355
423
|
default_model = None
|
|
356
424
|
if (
|
|
@@ -373,7 +441,11 @@ def attach_routes(
|
|
|
373
441
|
team = get_team_by_id(team_id=eval_run_input.team_id, teams=teams)
|
|
374
442
|
if not team:
|
|
375
443
|
raise HTTPException(status_code=404, detail=f"Team with id '{eval_run_input.team_id}' not found")
|
|
444
|
+
if isinstance(team, RemoteTeam):
|
|
445
|
+
log_warning("Evaluation against remote teams are not supported yet")
|
|
446
|
+
return None
|
|
376
447
|
|
|
448
|
+
# If model_id/model_provider specified, override team's model temporarily
|
|
377
449
|
default_model = None
|
|
378
450
|
if (
|
|
379
451
|
hasattr(team, "model")
|
|
@@ -381,13 +453,13 @@ def attach_routes(
|
|
|
381
453
|
and eval_run_input.model_id is not None
|
|
382
454
|
and eval_run_input.model_provider is not None
|
|
383
455
|
):
|
|
384
|
-
default_model = deepcopy(team.model)
|
|
456
|
+
default_model = deepcopy(team.model) # Save original
|
|
385
457
|
if eval_run_input.model_id != team.model.id or eval_run_input.model_provider != team.model.provider:
|
|
386
458
|
model_provider = eval_run_input.model_provider.lower()
|
|
387
459
|
model_id = eval_run_input.model_id.lower()
|
|
388
460
|
model_string = f"{model_provider}:{model_id}"
|
|
389
461
|
model = get_model(model_string)
|
|
390
|
-
team.model = model
|
|
462
|
+
team.model = model # Override temporarily
|
|
391
463
|
|
|
392
464
|
agent = None
|
|
393
465
|
|
|
@@ -396,16 +468,37 @@ def attach_routes(
|
|
|
396
468
|
|
|
397
469
|
# Run the evaluation
|
|
398
470
|
if eval_run_input.eval_type == EvalType.ACCURACY:
|
|
471
|
+
if isinstance(agent, RemoteAgent) or isinstance(team, RemoteTeam):
|
|
472
|
+
# TODO: Handle remote evaluation
|
|
473
|
+
log_warning("Evaluation against remote agents are not supported yet")
|
|
474
|
+
return None
|
|
399
475
|
return await run_accuracy_eval(
|
|
400
476
|
eval_run_input=eval_run_input, db=db, agent=agent, team=team, default_model=default_model
|
|
401
477
|
)
|
|
402
478
|
|
|
479
|
+
elif eval_run_input.eval_type == EvalType.AGENT_AS_JUDGE:
|
|
480
|
+
return await run_agent_as_judge_eval(
|
|
481
|
+
eval_run_input=eval_run_input,
|
|
482
|
+
db=db,
|
|
483
|
+
agent=agent,
|
|
484
|
+
team=team,
|
|
485
|
+
default_model=default_model, # type: ignore
|
|
486
|
+
)
|
|
487
|
+
|
|
403
488
|
elif eval_run_input.eval_type == EvalType.PERFORMANCE:
|
|
489
|
+
if isinstance(agent, RemoteAgent) or isinstance(team, RemoteTeam):
|
|
490
|
+
# TODO: Handle remote evaluation
|
|
491
|
+
log_warning("Evaluation against remote agents are not supported yet")
|
|
492
|
+
return None
|
|
404
493
|
return await run_performance_eval(
|
|
405
494
|
eval_run_input=eval_run_input, db=db, agent=agent, team=team, default_model=default_model
|
|
406
495
|
)
|
|
407
496
|
|
|
408
497
|
else:
|
|
498
|
+
if isinstance(agent, RemoteAgent) or isinstance(team, RemoteTeam):
|
|
499
|
+
# TODO: Handle remote evaluation
|
|
500
|
+
log_warning("Evaluation against remote agents are not supported yet")
|
|
501
|
+
return None
|
|
409
502
|
return await run_reliability_eval(
|
|
410
503
|
eval_run_input=eval_run_input, db=db, agent=agent, team=team, default_model=default_model
|
|
411
504
|
)
|
|
@@ -416,8 +509,8 @@ def attach_routes(
|
|
|
416
509
|
def parse_eval_types_filter(
|
|
417
510
|
eval_types: Optional[str] = Query(
|
|
418
511
|
default=None,
|
|
419
|
-
description="Comma-separated eval types (accuracy,performance,reliability)",
|
|
420
|
-
examples=["accuracy,performance"],
|
|
512
|
+
description="Comma-separated eval types (accuracy,agent_as_judge,performance,reliability)",
|
|
513
|
+
examples=["accuracy,agent_as_judge,performance,reliability"],
|
|
421
514
|
),
|
|
422
515
|
) -> Optional[List[EvalType]]:
|
|
423
516
|
"""Parse comma-separated eval types into EvalType enums for filtering evaluation runs."""
|
agno/os/routers/evals/schemas.py
CHANGED
|
@@ -1,14 +1,16 @@
|
|
|
1
1
|
from dataclasses import asdict
|
|
2
|
-
from datetime import datetime
|
|
3
|
-
from typing import Any, Dict, List, Optional
|
|
2
|
+
from datetime import datetime
|
|
3
|
+
from typing import Any, Dict, List, Literal, Optional
|
|
4
4
|
|
|
5
5
|
from pydantic import BaseModel, Field
|
|
6
6
|
|
|
7
7
|
from agno.db.schemas.evals import EvalType
|
|
8
|
-
from agno.eval import AccuracyResult, PerformanceResult, ReliabilityResult
|
|
8
|
+
from agno.eval import AccuracyResult, AgentAsJudgeResult, PerformanceResult, ReliabilityResult
|
|
9
9
|
from agno.eval.accuracy import AccuracyEval
|
|
10
|
+
from agno.eval.agent_as_judge import AgentAsJudgeEval
|
|
10
11
|
from agno.eval.performance import PerformanceEval
|
|
11
12
|
from agno.eval.reliability import ReliabilityEval
|
|
13
|
+
from agno.os.utils import to_utc_datetime
|
|
12
14
|
|
|
13
15
|
|
|
14
16
|
class EvalRunInput(BaseModel):
|
|
@@ -27,6 +29,15 @@ class EvalRunInput(BaseModel):
|
|
|
27
29
|
# Accuracy eval specific fields
|
|
28
30
|
expected_output: Optional[str] = Field(None, description="Expected output for accuracy evaluation")
|
|
29
31
|
|
|
32
|
+
# AgentAsJudge eval specific fields
|
|
33
|
+
criteria: Optional[str] = Field(None, description="Evaluation criteria for agent-as-judge evaluation")
|
|
34
|
+
scoring_strategy: Optional[Literal["numeric", "binary"]] = Field(
|
|
35
|
+
"binary", description="Scoring strategy: 'numeric' (1-10 with threshold) or 'binary' (PASS/FAIL)"
|
|
36
|
+
)
|
|
37
|
+
threshold: Optional[int] = Field(
|
|
38
|
+
7, description="Score threshold for pass/fail (1-10), only used with numeric scoring", ge=1, le=10
|
|
39
|
+
)
|
|
40
|
+
|
|
30
41
|
# Performance eval specific fields
|
|
31
42
|
warmup_runs: int = Field(0, description="Number of warmup runs before measuring performance", ge=0, le=10)
|
|
32
43
|
|
|
@@ -64,8 +75,8 @@ class EvalSchema(BaseModel):
|
|
|
64
75
|
eval_type=eval_run["eval_type"],
|
|
65
76
|
eval_data=eval_run["eval_data"],
|
|
66
77
|
eval_input=eval_run.get("eval_input"),
|
|
67
|
-
created_at=
|
|
68
|
-
updated_at=
|
|
78
|
+
created_at=to_utc_datetime(eval_run.get("created_at")),
|
|
79
|
+
updated_at=to_utc_datetime(eval_run.get("updated_at")),
|
|
69
80
|
)
|
|
70
81
|
|
|
71
82
|
@classmethod
|
|
@@ -89,6 +100,28 @@ class EvalSchema(BaseModel):
|
|
|
89
100
|
eval_data=asdict(result),
|
|
90
101
|
)
|
|
91
102
|
|
|
103
|
+
@classmethod
|
|
104
|
+
def from_agent_as_judge_eval(
|
|
105
|
+
cls,
|
|
106
|
+
agent_as_judge_eval: AgentAsJudgeEval,
|
|
107
|
+
result: AgentAsJudgeResult,
|
|
108
|
+
model_id: Optional[str] = None,
|
|
109
|
+
model_provider: Optional[str] = None,
|
|
110
|
+
agent_id: Optional[str] = None,
|
|
111
|
+
team_id: Optional[str] = None,
|
|
112
|
+
) -> "EvalSchema":
|
|
113
|
+
return cls(
|
|
114
|
+
id=result.run_id,
|
|
115
|
+
name=agent_as_judge_eval.name,
|
|
116
|
+
agent_id=agent_id,
|
|
117
|
+
team_id=team_id,
|
|
118
|
+
workflow_id=None,
|
|
119
|
+
model_id=model_id,
|
|
120
|
+
model_provider=model_provider,
|
|
121
|
+
eval_type=EvalType.AGENT_AS_JUDGE,
|
|
122
|
+
eval_data=asdict(result),
|
|
123
|
+
)
|
|
124
|
+
|
|
92
125
|
@classmethod
|
|
93
126
|
def from_performance_eval(
|
|
94
127
|
cls,
|
agno/os/routers/evals/utils.py
CHANGED
|
@@ -2,14 +2,15 @@ from typing import Optional, Union
|
|
|
2
2
|
|
|
3
3
|
from fastapi import HTTPException
|
|
4
4
|
|
|
5
|
-
from agno.agent
|
|
5
|
+
from agno.agent import Agent, RemoteAgent
|
|
6
6
|
from agno.db.base import AsyncBaseDb, BaseDb
|
|
7
7
|
from agno.eval.accuracy import AccuracyEval
|
|
8
|
+
from agno.eval.agent_as_judge import AgentAsJudgeEval
|
|
8
9
|
from agno.eval.performance import PerformanceEval
|
|
9
10
|
from agno.eval.reliability import ReliabilityEval
|
|
10
11
|
from agno.models.base import Model
|
|
11
12
|
from agno.os.routers.evals.schemas import EvalRunInput, EvalSchema
|
|
12
|
-
from agno.team
|
|
13
|
+
from agno.team import RemoteTeam, Team
|
|
13
14
|
|
|
14
15
|
|
|
15
16
|
async def run_accuracy_eval(
|
|
@@ -36,12 +37,13 @@ async def run_accuracy_eval(
|
|
|
36
37
|
model=default_model,
|
|
37
38
|
)
|
|
38
39
|
|
|
39
|
-
result = accuracy_eval.
|
|
40
|
+
result = await accuracy_eval.arun(print_results=False, print_summary=False)
|
|
40
41
|
if not result:
|
|
41
42
|
raise HTTPException(status_code=500, detail="Failed to run accuracy evaluation")
|
|
42
43
|
|
|
43
44
|
eval_run = EvalSchema.from_accuracy_eval(accuracy_eval=accuracy_eval, result=result)
|
|
44
45
|
|
|
46
|
+
# Restore original model after eval
|
|
45
47
|
if default_model is not None:
|
|
46
48
|
if agent is not None:
|
|
47
49
|
agent.model = default_model
|
|
@@ -51,6 +53,70 @@ async def run_accuracy_eval(
|
|
|
51
53
|
return eval_run
|
|
52
54
|
|
|
53
55
|
|
|
56
|
+
async def run_agent_as_judge_eval(
|
|
57
|
+
eval_run_input: EvalRunInput,
|
|
58
|
+
db: Union[BaseDb, AsyncBaseDb],
|
|
59
|
+
agent: Optional[Union[Agent, RemoteAgent]] = None,
|
|
60
|
+
team: Optional[Union[Team, RemoteTeam]] = None,
|
|
61
|
+
default_model: Optional[Model] = None,
|
|
62
|
+
) -> EvalSchema:
|
|
63
|
+
"""Run an AgentAsJudge evaluation for the given agent or team"""
|
|
64
|
+
if not eval_run_input.criteria:
|
|
65
|
+
raise HTTPException(status_code=400, detail="criteria is required for agent-as-judge evaluation")
|
|
66
|
+
|
|
67
|
+
# Run agent/team to get output
|
|
68
|
+
if agent:
|
|
69
|
+
agent_response = await agent.arun(eval_run_input.input, stream=False)
|
|
70
|
+
output = str(agent_response.content) if agent_response.content else ""
|
|
71
|
+
agent_id = agent.id
|
|
72
|
+
team_id = None
|
|
73
|
+
elif team:
|
|
74
|
+
team_response = await team.arun(eval_run_input.input, stream=False)
|
|
75
|
+
output = str(team_response.content) if team_response.content else ""
|
|
76
|
+
agent_id = None
|
|
77
|
+
team_id = team.id
|
|
78
|
+
else:
|
|
79
|
+
raise HTTPException(status_code=400, detail="Either agent_id or team_id must be provided")
|
|
80
|
+
|
|
81
|
+
agent_as_judge_eval = AgentAsJudgeEval(
|
|
82
|
+
db=db,
|
|
83
|
+
criteria=eval_run_input.criteria,
|
|
84
|
+
scoring_strategy=eval_run_input.scoring_strategy or "binary",
|
|
85
|
+
threshold=eval_run_input.threshold or 7,
|
|
86
|
+
additional_guidelines=eval_run_input.additional_guidelines,
|
|
87
|
+
name=eval_run_input.name,
|
|
88
|
+
model=default_model,
|
|
89
|
+
)
|
|
90
|
+
|
|
91
|
+
result = await agent_as_judge_eval.arun(
|
|
92
|
+
input=eval_run_input.input, output=output, print_results=False, print_summary=False
|
|
93
|
+
)
|
|
94
|
+
if not result:
|
|
95
|
+
raise HTTPException(status_code=500, detail="Failed to run agent as judge evaluation")
|
|
96
|
+
|
|
97
|
+
# Use evaluator's model
|
|
98
|
+
eval_model_id = agent_as_judge_eval.model.id if agent_as_judge_eval.model is not None else None
|
|
99
|
+
eval_model_provider = agent_as_judge_eval.model.provider if agent_as_judge_eval.model is not None else None
|
|
100
|
+
|
|
101
|
+
eval_run = EvalSchema.from_agent_as_judge_eval(
|
|
102
|
+
agent_as_judge_eval=agent_as_judge_eval,
|
|
103
|
+
result=result,
|
|
104
|
+
agent_id=agent_id,
|
|
105
|
+
team_id=team_id,
|
|
106
|
+
model_id=eval_model_id,
|
|
107
|
+
model_provider=eval_model_provider,
|
|
108
|
+
)
|
|
109
|
+
|
|
110
|
+
# Restore original model after eval
|
|
111
|
+
if default_model is not None:
|
|
112
|
+
if agent is not None and isinstance(agent, Agent):
|
|
113
|
+
agent.model = default_model
|
|
114
|
+
elif team is not None and isinstance(team, Team):
|
|
115
|
+
team.model = default_model
|
|
116
|
+
|
|
117
|
+
return eval_run
|
|
118
|
+
|
|
119
|
+
|
|
54
120
|
async def run_performance_eval(
|
|
55
121
|
eval_run_input: EvalRunInput,
|
|
56
122
|
db: Union[BaseDb, AsyncBaseDb],
|
|
@@ -61,16 +127,16 @@ async def run_performance_eval(
|
|
|
61
127
|
"""Run a performance evaluation for the given agent or team"""
|
|
62
128
|
if agent:
|
|
63
129
|
|
|
64
|
-
def run_component(): # type: ignore
|
|
65
|
-
return agent.
|
|
130
|
+
async def run_component(): # type: ignore
|
|
131
|
+
return await agent.arun(eval_run_input.input, stream=False)
|
|
66
132
|
|
|
67
133
|
model_id = agent.model.id if agent and agent.model else None
|
|
68
134
|
model_provider = agent.model.provider if agent and agent.model else None
|
|
69
135
|
|
|
70
136
|
elif team:
|
|
71
137
|
|
|
72
|
-
def run_component():
|
|
73
|
-
return team.
|
|
138
|
+
async def run_component(): # type: ignore
|
|
139
|
+
return await team.arun(eval_run_input.input, stream=False)
|
|
74
140
|
|
|
75
141
|
model_id = team.model.id if team and team.model else None
|
|
76
142
|
model_provider = team.model.provider if team and team.model else None
|
|
@@ -86,7 +152,8 @@ async def run_performance_eval(
|
|
|
86
152
|
model_id=model_id,
|
|
87
153
|
model_provider=model_provider,
|
|
88
154
|
)
|
|
89
|
-
|
|
155
|
+
|
|
156
|
+
result = await performance_eval.arun(print_results=False, print_summary=False)
|
|
90
157
|
if not result:
|
|
91
158
|
raise HTTPException(status_code=500, detail="Failed to run performance evaluation")
|
|
92
159
|
|
|
@@ -99,6 +166,7 @@ async def run_performance_eval(
|
|
|
99
166
|
model_provider=model_provider,
|
|
100
167
|
)
|
|
101
168
|
|
|
169
|
+
# Restore original model after eval
|
|
102
170
|
if default_model is not None:
|
|
103
171
|
if agent is not None:
|
|
104
172
|
agent.model = default_model
|
|
@@ -120,7 +188,7 @@ async def run_reliability_eval(
|
|
|
120
188
|
raise HTTPException(status_code=400, detail="expected_tool_calls is required for reliability evaluations")
|
|
121
189
|
|
|
122
190
|
if agent:
|
|
123
|
-
agent_response = agent.
|
|
191
|
+
agent_response = await agent.arun(eval_run_input.input, stream=False)
|
|
124
192
|
reliability_eval = ReliabilityEval(
|
|
125
193
|
db=db,
|
|
126
194
|
name=eval_run_input.name,
|
|
@@ -131,7 +199,7 @@ async def run_reliability_eval(
|
|
|
131
199
|
model_provider = agent.model.provider if agent and agent.model else None
|
|
132
200
|
|
|
133
201
|
elif team:
|
|
134
|
-
team_response = team.
|
|
202
|
+
team_response = await team.arun(eval_run_input.input, stream=False)
|
|
135
203
|
reliability_eval = ReliabilityEval(
|
|
136
204
|
db=db,
|
|
137
205
|
name=eval_run_input.name,
|
|
@@ -141,7 +209,7 @@ async def run_reliability_eval(
|
|
|
141
209
|
model_id = team.model.id if team and team.model else None
|
|
142
210
|
model_provider = team.model.provider if team and team.model else None
|
|
143
211
|
|
|
144
|
-
result = reliability_eval.
|
|
212
|
+
result = await reliability_eval.arun(print_results=False)
|
|
145
213
|
if not result:
|
|
146
214
|
raise HTTPException(status_code=500, detail="Failed to run reliability evaluation")
|
|
147
215
|
|
|
@@ -153,6 +221,7 @@ async def run_reliability_eval(
|
|
|
153
221
|
model_provider=model_provider,
|
|
154
222
|
)
|
|
155
223
|
|
|
224
|
+
# Restore original model after eval
|
|
156
225
|
if default_model is not None:
|
|
157
226
|
if agent is not None:
|
|
158
227
|
agent.model = default_model
|
agno/os/routers/health.py
CHANGED
|
@@ -8,7 +8,7 @@ from agno.os.schema import HealthResponse
|
|
|
8
8
|
def get_health_router(health_endpoint: str = "/health") -> APIRouter:
|
|
9
9
|
router = APIRouter(tags=["Health"])
|
|
10
10
|
|
|
11
|
-
|
|
11
|
+
started_at = datetime.now(timezone.utc)
|
|
12
12
|
|
|
13
13
|
@router.get(
|
|
14
14
|
health_endpoint,
|
|
@@ -20,12 +20,12 @@ def get_health_router(health_endpoint: str = "/health") -> APIRouter:
|
|
|
20
20
|
200: {
|
|
21
21
|
"description": "API is healthy and operational",
|
|
22
22
|
"content": {
|
|
23
|
-
"application/json": {"example": {"status": "ok", "instantiated_at":
|
|
23
|
+
"application/json": {"example": {"status": "ok", "instantiated_at": "2025-06-10T12:00:00Z"}}
|
|
24
24
|
},
|
|
25
25
|
}
|
|
26
26
|
},
|
|
27
27
|
)
|
|
28
28
|
async def health_check() -> HealthResponse:
|
|
29
|
-
return HealthResponse(status="ok", instantiated_at=
|
|
29
|
+
return HealthResponse(status="ok", instantiated_at=started_at)
|
|
30
30
|
|
|
31
31
|
return router
|