PyPI - agno - Versions diffs - 2.2.13__py3-none-any.whl → 2.4.3__py3-none-any.whl - Mend

agno 2.2.13py3-none-any.whl → 2.4.3py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (383) hide show

agno/agent/__init__.py +6 -0
agno/agent/agent.py +5252 -3145
agno/agent/remote.py +525 -0
agno/api/api.py +2 -0
agno/client/__init__.py +3 -0
agno/client/a2a/__init__.py +10 -0
agno/client/a2a/client.py +554 -0
agno/client/a2a/schemas.py +112 -0
agno/client/a2a/utils.py +369 -0
agno/client/os.py +2669 -0
agno/compression/__init__.py +3 -0
agno/compression/manager.py +247 -0
agno/culture/manager.py +2 -2
agno/db/base.py +927 -6
agno/db/dynamo/dynamo.py +788 -2
agno/db/dynamo/schemas.py +128 -0
agno/db/dynamo/utils.py +26 -3
agno/db/firestore/firestore.py +674 -50
agno/db/firestore/schemas.py +41 -0
agno/db/firestore/utils.py +25 -10
agno/db/gcs_json/gcs_json_db.py +506 -3
agno/db/gcs_json/utils.py +14 -2
agno/db/in_memory/in_memory_db.py +203 -4
agno/db/in_memory/utils.py +14 -2
agno/db/json/json_db.py +498 -2
agno/db/json/utils.py +14 -2
agno/db/migrations/manager.py +199 -0
agno/db/migrations/utils.py +19 -0
agno/db/migrations/v1_to_v2.py +54 -16
agno/db/migrations/versions/__init__.py +0 -0
agno/db/migrations/versions/v2_3_0.py +977 -0
agno/db/mongo/async_mongo.py +1013 -39
agno/db/mongo/mongo.py +684 -4
agno/db/mongo/schemas.py +48 -0
agno/db/mongo/utils.py +17 -0
agno/db/mysql/__init__.py +2 -1
agno/db/mysql/async_mysql.py +2958 -0
agno/db/mysql/mysql.py +722 -53
agno/db/mysql/schemas.py +77 -11
agno/db/mysql/utils.py +151 -8
agno/db/postgres/async_postgres.py +1254 -137
agno/db/postgres/postgres.py +2316 -93
agno/db/postgres/schemas.py +153 -21
agno/db/postgres/utils.py +22 -7
agno/db/redis/redis.py +531 -3
agno/db/redis/schemas.py +36 -0
agno/db/redis/utils.py +31 -15
agno/db/schemas/evals.py +1 -0
agno/db/schemas/memory.py +20 -9
agno/db/singlestore/schemas.py +70 -1
agno/db/singlestore/singlestore.py +737 -74
agno/db/singlestore/utils.py +13 -3
agno/db/sqlite/async_sqlite.py +1069 -89
agno/db/sqlite/schemas.py +133 -1
agno/db/sqlite/sqlite.py +2203 -165
agno/db/sqlite/utils.py +21 -11
agno/db/surrealdb/models.py +25 -0
agno/db/surrealdb/surrealdb.py +603 -1
agno/db/utils.py +60 -0
agno/eval/__init__.py +26 -3
agno/eval/accuracy.py +25 -12
agno/eval/agent_as_judge.py +871 -0
agno/eval/base.py +29 -0
agno/eval/performance.py +10 -4
agno/eval/reliability.py +22 -13
agno/eval/utils.py +2 -1
agno/exceptions.py +42 -0
agno/hooks/__init__.py +3 -0
agno/hooks/decorator.py +164 -0
agno/integrations/discord/client.py +13 -2
agno/knowledge/__init__.py +4 -0
agno/knowledge/chunking/code.py +90 -0
agno/knowledge/chunking/document.py +65 -4
agno/knowledge/chunking/fixed.py +4 -1
agno/knowledge/chunking/markdown.py +102 -11
agno/knowledge/chunking/recursive.py +2 -2
agno/knowledge/chunking/semantic.py +130 -48
agno/knowledge/chunking/strategy.py +18 -0
agno/knowledge/embedder/azure_openai.py +0 -1
agno/knowledge/embedder/google.py +1 -1
agno/knowledge/embedder/mistral.py +1 -1
agno/knowledge/embedder/nebius.py +1 -1
agno/knowledge/embedder/openai.py +16 -12
agno/knowledge/filesystem.py +412 -0
agno/knowledge/knowledge.py +4261 -1199
agno/knowledge/protocol.py +134 -0
agno/knowledge/reader/arxiv_reader.py +3 -2
agno/knowledge/reader/base.py +9 -7
agno/knowledge/reader/csv_reader.py +91 -42
agno/knowledge/reader/docx_reader.py +9 -10
agno/knowledge/reader/excel_reader.py +225 -0
agno/knowledge/reader/field_labeled_csv_reader.py +38 -48
agno/knowledge/reader/firecrawl_reader.py +3 -2
agno/knowledge/reader/json_reader.py +16 -22
agno/knowledge/reader/markdown_reader.py +15 -14
agno/knowledge/reader/pdf_reader.py +33 -28
agno/knowledge/reader/pptx_reader.py +9 -10
agno/knowledge/reader/reader_factory.py +135 -1
agno/knowledge/reader/s3_reader.py +8 -16
agno/knowledge/reader/tavily_reader.py +3 -3
agno/knowledge/reader/text_reader.py +15 -14
agno/knowledge/reader/utils/__init__.py +17 -0
agno/knowledge/reader/utils/spreadsheet.py +114 -0
agno/knowledge/reader/web_search_reader.py +8 -65
agno/knowledge/reader/website_reader.py +16 -13
agno/knowledge/reader/wikipedia_reader.py +36 -3
agno/knowledge/reader/youtube_reader.py +3 -2
agno/knowledge/remote_content/__init__.py +33 -0
agno/knowledge/remote_content/config.py +266 -0
agno/knowledge/remote_content/remote_content.py +105 -17
agno/knowledge/utils.py +76 -22
agno/learn/__init__.py +71 -0
agno/learn/config.py +463 -0
agno/learn/curate.py +185 -0
agno/learn/machine.py +725 -0
agno/learn/schemas.py +1114 -0
agno/learn/stores/__init__.py +38 -0
agno/learn/stores/decision_log.py +1156 -0
agno/learn/stores/entity_memory.py +3275 -0
agno/learn/stores/learned_knowledge.py +1583 -0
agno/learn/stores/protocol.py +117 -0
agno/learn/stores/session_context.py +1217 -0
agno/learn/stores/user_memory.py +1495 -0
agno/learn/stores/user_profile.py +1220 -0
agno/learn/utils.py +209 -0
agno/media.py +22 -6
agno/memory/__init__.py +14 -1
agno/memory/manager.py +223 -8
agno/memory/strategies/__init__.py +15 -0
agno/memory/strategies/base.py +66 -0
agno/memory/strategies/summarize.py +196 -0
agno/memory/strategies/types.py +37 -0
agno/models/aimlapi/aimlapi.py +17 -0
agno/models/anthropic/claude.py +434 -59
agno/models/aws/bedrock.py +121 -20
agno/models/aws/claude.py +131 -274
agno/models/azure/ai_foundry.py +10 -6
agno/models/azure/openai_chat.py +33 -10
agno/models/base.py +1162 -561
agno/models/cerebras/cerebras.py +120 -24
agno/models/cerebras/cerebras_openai.py +21 -2
agno/models/cohere/chat.py +65 -6
agno/models/cometapi/cometapi.py +18 -1
agno/models/dashscope/dashscope.py +2 -3
agno/models/deepinfra/deepinfra.py +18 -1
agno/models/deepseek/deepseek.py +69 -3
agno/models/fireworks/fireworks.py +18 -1
agno/models/google/gemini.py +959 -89
agno/models/google/utils.py +22 -0
agno/models/groq/groq.py +48 -18
agno/models/huggingface/huggingface.py +17 -6
agno/models/ibm/watsonx.py +16 -6
agno/models/internlm/internlm.py +18 -1
agno/models/langdb/langdb.py +13 -1
agno/models/litellm/chat.py +88 -9
agno/models/litellm/litellm_openai.py +18 -1
agno/models/message.py +24 -5
agno/models/meta/llama.py +40 -13
agno/models/meta/llama_openai.py +22 -21
agno/models/metrics.py +12 -0
agno/models/mistral/mistral.py +8 -4
agno/models/n1n/__init__.py +3 -0
agno/models/n1n/n1n.py +57 -0
agno/models/nebius/nebius.py +6 -7
agno/models/nvidia/nvidia.py +20 -3
agno/models/ollama/__init__.py +2 -0
agno/models/ollama/chat.py +17 -6
agno/models/ollama/responses.py +100 -0
agno/models/openai/__init__.py +2 -0
agno/models/openai/chat.py +117 -26
agno/models/openai/open_responses.py +46 -0
agno/models/openai/responses.py +110 -32
agno/models/openrouter/__init__.py +2 -0
agno/models/openrouter/openrouter.py +67 -2
agno/models/openrouter/responses.py +146 -0
agno/models/perplexity/perplexity.py +19 -1
agno/models/portkey/portkey.py +7 -6
agno/models/requesty/requesty.py +19 -2
agno/models/response.py +20 -2
agno/models/sambanova/sambanova.py +20 -3
agno/models/siliconflow/siliconflow.py +19 -2
agno/models/together/together.py +20 -3
agno/models/vercel/v0.py +20 -3
agno/models/vertexai/claude.py +124 -4
agno/models/vllm/vllm.py +19 -14
agno/models/xai/xai.py +19 -2
agno/os/app.py +467 -137
agno/os/auth.py +253 -5
agno/os/config.py +22 -0
agno/os/interfaces/a2a/a2a.py +7 -6
agno/os/interfaces/a2a/router.py +635 -26
agno/os/interfaces/a2a/utils.py +32 -33
agno/os/interfaces/agui/agui.py +5 -3
agno/os/interfaces/agui/router.py +26 -16
agno/os/interfaces/agui/utils.py +97 -57
agno/os/interfaces/base.py +7 -7
agno/os/interfaces/slack/router.py +16 -7
agno/os/interfaces/slack/slack.py +7 -7
agno/os/interfaces/whatsapp/router.py +35 -7
agno/os/interfaces/whatsapp/security.py +3 -1
agno/os/interfaces/whatsapp/whatsapp.py +11 -8
agno/os/managers.py +326 -0
agno/os/mcp.py +652 -79
agno/os/middleware/__init__.py +4 -0
agno/os/middleware/jwt.py +718 -115
agno/os/middleware/trailing_slash.py +27 -0
agno/os/router.py +105 -1558
agno/os/routers/agents/__init__.py +3 -0
agno/os/routers/agents/router.py +655 -0
agno/os/routers/agents/schema.py +288 -0
agno/os/routers/components/__init__.py +3 -0
agno/os/routers/components/components.py +475 -0
agno/os/routers/database.py +155 -0
agno/os/routers/evals/evals.py +111 -18
agno/os/routers/evals/schemas.py +38 -5
agno/os/routers/evals/utils.py +80 -11
agno/os/routers/health.py +3 -3
agno/os/routers/knowledge/knowledge.py +284 -35
agno/os/routers/knowledge/schemas.py +14 -2
agno/os/routers/memory/memory.py +274 -11
agno/os/routers/memory/schemas.py +44 -3
agno/os/routers/metrics/metrics.py +30 -15
agno/os/routers/metrics/schemas.py +10 -6
agno/os/routers/registry/__init__.py +3 -0
agno/os/routers/registry/registry.py +337 -0
agno/os/routers/session/session.py +143 -14
agno/os/routers/teams/__init__.py +3 -0
agno/os/routers/teams/router.py +550 -0
agno/os/routers/teams/schema.py +280 -0
agno/os/routers/traces/__init__.py +3 -0
agno/os/routers/traces/schemas.py +414 -0
agno/os/routers/traces/traces.py +549 -0
agno/os/routers/workflows/__init__.py +3 -0
agno/os/routers/workflows/router.py +757 -0
agno/os/routers/workflows/schema.py +139 -0
agno/os/schema.py +157 -584
agno/os/scopes.py +469 -0
agno/os/settings.py +3 -0
agno/os/utils.py +574 -185
agno/reasoning/anthropic.py +85 -1
agno/reasoning/azure_ai_foundry.py +93 -1
agno/reasoning/deepseek.py +102 -2
agno/reasoning/default.py +6 -7
agno/reasoning/gemini.py +87 -3
agno/reasoning/groq.py +109 -2
agno/reasoning/helpers.py +6 -7
agno/reasoning/manager.py +1238 -0
agno/reasoning/ollama.py +93 -1
agno/reasoning/openai.py +115 -1
agno/reasoning/vertexai.py +85 -1
agno/registry/__init__.py +3 -0
agno/registry/registry.py +68 -0
agno/remote/__init__.py +3 -0
agno/remote/base.py +581 -0
agno/run/__init__.py +2 -4
agno/run/agent.py +134 -19
agno/run/base.py +49 -1
agno/run/cancel.py +65 -52
agno/run/cancellation_management/__init__.py +9 -0
agno/run/cancellation_management/base.py +78 -0
agno/run/cancellation_management/in_memory_cancellation_manager.py +100 -0
agno/run/cancellation_management/redis_cancellation_manager.py +236 -0
agno/run/requirement.py +181 -0
agno/run/team.py +111 -19
agno/run/workflow.py +2 -1
agno/session/agent.py +57 -92
agno/session/summary.py +1 -1
agno/session/team.py +62 -115
agno/session/workflow.py +353 -57
agno/skills/__init__.py +17 -0
agno/skills/agent_skills.py +377 -0
agno/skills/errors.py +32 -0
agno/skills/loaders/__init__.py +4 -0
agno/skills/loaders/base.py +27 -0
agno/skills/loaders/local.py +216 -0
agno/skills/skill.py +65 -0
agno/skills/utils.py +107 -0
agno/skills/validator.py +277 -0
agno/table.py +10 -0
agno/team/__init__.py +5 -1
agno/team/remote.py +447 -0
agno/team/team.py +3769 -2202
agno/tools/brandfetch.py +27 -18
agno/tools/browserbase.py +225 -16
agno/tools/crawl4ai.py +3 -0
agno/tools/duckduckgo.py +25 -71
agno/tools/exa.py +0 -21
agno/tools/file.py +14 -13
agno/tools/file_generation.py +12 -6
agno/tools/firecrawl.py +15 -7
agno/tools/function.py +94 -113
agno/tools/google_bigquery.py +11 -2
agno/tools/google_drive.py +4 -3
agno/tools/knowledge.py +9 -4
agno/tools/mcp/mcp.py +301 -18
agno/tools/mcp/multi_mcp.py +269 -14
agno/tools/mem0.py +11 -10
agno/tools/memory.py +47 -46
agno/tools/mlx_transcribe.py +10 -7
agno/tools/models/nebius.py +5 -5
agno/tools/models_labs.py +20 -10
agno/tools/nano_banana.py +151 -0
agno/tools/parallel.py +0 -7
agno/tools/postgres.py +76 -36
agno/tools/python.py +14 -6
agno/tools/reasoning.py +30 -23
agno/tools/redshift.py +406 -0
agno/tools/shopify.py +1519 -0
agno/tools/spotify.py +919 -0
agno/tools/tavily.py +4 -1
agno/tools/toolkit.py +253 -18
agno/tools/websearch.py +93 -0
agno/tools/website.py +1 -1
agno/tools/wikipedia.py +1 -1
agno/tools/workflow.py +56 -48
agno/tools/yfinance.py +12 -11
agno/tracing/__init__.py +12 -0
agno/tracing/exporter.py +161 -0
agno/tracing/schemas.py +276 -0
agno/tracing/setup.py +112 -0
agno/utils/agent.py +251 -10
agno/utils/cryptography.py +22 -0
agno/utils/dttm.py +33 -0
agno/utils/events.py +264 -7
agno/utils/hooks.py +111 -3
agno/utils/http.py +161 -2
agno/utils/mcp.py +49 -8
agno/utils/media.py +22 -1
agno/utils/models/ai_foundry.py +9 -2
agno/utils/models/claude.py +20 -5
agno/utils/models/cohere.py +9 -2
agno/utils/models/llama.py +9 -2
agno/utils/models/mistral.py +4 -2
agno/utils/os.py +0 -0
agno/utils/print_response/agent.py +99 -16
agno/utils/print_response/team.py +223 -24
agno/utils/print_response/workflow.py +0 -2
agno/utils/prompts.py +8 -6
agno/utils/remote.py +23 -0
agno/utils/response.py +1 -13
agno/utils/string.py +91 -2
agno/utils/team.py +62 -12
agno/utils/tokens.py +657 -0
agno/vectordb/base.py +15 -2
agno/vectordb/cassandra/cassandra.py +1 -1
agno/vectordb/chroma/__init__.py +2 -1
agno/vectordb/chroma/chromadb.py +468 -23
agno/vectordb/clickhouse/clickhousedb.py +1 -1
agno/vectordb/couchbase/couchbase.py +6 -2
agno/vectordb/lancedb/lance_db.py +7 -38
agno/vectordb/lightrag/lightrag.py +7 -6
agno/vectordb/milvus/milvus.py +118 -84
agno/vectordb/mongodb/__init__.py +2 -1
agno/vectordb/mongodb/mongodb.py +14 -31
agno/vectordb/pgvector/pgvector.py +120 -66
agno/vectordb/pineconedb/pineconedb.py +2 -19
agno/vectordb/qdrant/__init__.py +2 -1
agno/vectordb/qdrant/qdrant.py +33 -56
agno/vectordb/redis/__init__.py +2 -1
agno/vectordb/redis/redisdb.py +19 -31
agno/vectordb/singlestore/singlestore.py +17 -9
agno/vectordb/surrealdb/surrealdb.py +2 -38
agno/vectordb/weaviate/__init__.py +2 -1
agno/vectordb/weaviate/weaviate.py +7 -3
agno/workflow/__init__.py +5 -1
agno/workflow/agent.py +2 -2
agno/workflow/condition.py +12 -10
agno/workflow/loop.py +28 -9
agno/workflow/parallel.py +21 -13
agno/workflow/remote.py +362 -0
agno/workflow/router.py +12 -9
agno/workflow/step.py +261 -36
agno/workflow/steps.py +12 -8
agno/workflow/types.py +40 -77
agno/workflow/workflow.py +939 -213
{agno-2.2.13.dist-info → agno-2.4.3.dist-info}/METADATA +134 -181
agno-2.4.3.dist-info/RECORD +677 -0
{agno-2.2.13.dist-info → agno-2.4.3.dist-info}/WHEEL +1 -1
agno/tools/googlesearch.py +0 -98
agno/tools/memori.py +0 -339
agno-2.2.13.dist-info/RECORD +0 -575
{agno-2.2.13.dist-info → agno-2.4.3.dist-info}/licenses/LICENSE +0 -0
{agno-2.2.13.dist-info → agno-2.4.3.dist-info}/top_level.txt +0 -0

agno/os/routers/evals/evals.py CHANGED Viewed

@@ -2,20 +2,25 @@ import logging
 from copy import deepcopy
 from typing import List, Optional, Union, cast
-from fastapi import APIRouter, Depends, HTTPException, Query
+from fastapi import APIRouter, Depends, HTTPException, Query, Request
-from agno.agent.agent import Agent
+from agno.agent import Agent, RemoteAgent
 from agno.db.base import AsyncBaseDb, BaseDb
 from agno.db.schemas.evals import EvalFilterType, EvalType
 from agno.models.utils import get_model
-from agno.os.auth import get_authentication_dependency
+from agno.os.auth import get_auth_token_from_request, get_authentication_dependency
 from agno.os.routers.evals.schemas import (
     DeleteEvalRunsRequest,
     EvalRunInput,
     EvalSchema,
     UpdateEvalRunRequest,
 )
-from agno.os.routers.evals.utils import run_accuracy_eval, run_performance_eval, run_reliability_eval
+from agno.os.routers.evals.utils import (
+    run_accuracy_eval,
+    run_agent_as_judge_eval,
+    run_performance_eval,
+    run_reliability_eval,
+)
 from agno.os.schema import (
     BadRequestResponse,
     InternalServerErrorResponse,
@@ -28,15 +33,17 @@ from agno.os.schema import (
 )
 from agno.os.settings import AgnoAPISettings
 from agno.os.utils import get_agent_by_id, get_db, get_team_by_id
-from agno.team.team import Team
+from agno.remote.base import RemoteDb
+from agno.team import RemoteTeam, Team
+from agno.utils.log import log_warning
 logger = logging.getLogger(__name__)
 def get_eval_router(
-    dbs: dict[str, list[Union[BaseDb, AsyncBaseDb]]],
-    agents: Optional[List[Agent]] = None,
-    teams: Optional[List[Team]] = None,
+    dbs: dict[str, list[Union[BaseDb, AsyncBaseDb, RemoteDb]]],
+    agents: Optional[List[Union[Agent, RemoteAgent]]] = None,
+    teams: Optional[List[Union[Team, RemoteTeam]]] = None,
     settings: AgnoAPISettings = AgnoAPISettings(),
 ) -> APIRouter:
     """Create eval router with comprehensive OpenAPI documentation for agent/team evaluation endpoints."""
@@ -56,9 +63,9 @@ def get_eval_router(
 def attach_routes(
     router: APIRouter,
-    dbs: dict[str, list[Union[BaseDb, AsyncBaseDb]]],
-    agents: Optional[List[Agent]] = None,
-    teams: Optional[List[Team]] = None,
+    dbs: dict[str, list[Union[BaseDb, AsyncBaseDb, RemoteDb]]],
+    agents: Optional[List[Union[Agent, RemoteAgent]]] = None,
+    teams: Optional[List[Union[Team, RemoteTeam]]] = None,
 ) -> APIRouter:
     @router.get(
         "/eval-runs",
@@ -104,14 +111,15 @@ def attach_routes(
         },
     )
     async def get_eval_runs(
+        request: Request,
         agent_id: Optional[str] = Query(default=None, description="Agent ID"),
         team_id: Optional[str] = Query(default=None, description="Team ID"),
         workflow_id: Optional[str] = Query(default=None, description="Workflow ID"),
         model_id: Optional[str] = Query(default=None, description="Model ID"),
         filter_type: Optional[EvalFilterType] = Query(default=None, description="Filter type", alias="type"),
         eval_types: Optional[List[EvalType]] = Depends(parse_eval_types_filter),
-        limit: Optional[int] = Query(default=20, description="Number of eval runs to return"),
-        page: Optional[int] = Query(default=1, description="Page number"),
+        limit: Optional[int] = Query(default=20, description="Number of eval runs to return", ge=1),
+        page: Optional[int] = Query(default=1, description="Page number", ge=0),
         sort_by: Optional[str] = Query(default="created_at", description="Field to sort by"),
         sort_order: Optional[SortOrder] = Query(default="desc", description="Sort order (asc or desc)"),
         db_id: Optional[str] = Query(default=None, description="The ID of the database to use"),
@@ -119,6 +127,23 @@ def attach_routes(
     ) -> PaginatedResponse[EvalSchema]:
         db = await get_db(dbs, db_id, table)
+        if isinstance(db, RemoteDb):
+            auth_token = get_auth_token_from_request(request)
+            headers = {"Authorization": f"Bearer {auth_token}"} if auth_token else None
+            return await db.get_eval_runs(
+                limit=limit,
+                page=page,
+                sort_by=sort_by,
+                sort_order=sort_order.value if sort_order else None,
+                agent_id=agent_id,
+                team_id=team_id,
+                workflow_id=workflow_id,
+                model_id=model_id,
+                eval_types=eval_types,
+                filter_type=filter_type.value if filter_type else None,
+                headers=headers,
+            )
         if isinstance(db, AsyncBaseDb):
             db = cast(AsyncBaseDb, db)
             eval_runs, total_count = await db.get_eval_runs(
@@ -197,11 +222,17 @@ def attach_routes(
         },
     )
     async def get_eval_run(
+        request: Request,
         eval_run_id: str,
         db_id: Optional[str] = Query(default=None, description="The ID of the database to use"),
         table: Optional[str] = Query(default=None, description="Table to query eval run from"),
     ) -> EvalSchema:
         db = await get_db(dbs, db_id, table)
+        if isinstance(db, RemoteDb):
+            auth_token = get_auth_token_from_request(request)
+            headers = {"Authorization": f"Bearer {auth_token}"} if auth_token else None
+            return await db.get_eval_run(eval_run_id=eval_run_id, db_id=db_id, table=table, headers=headers)
         if isinstance(db, AsyncBaseDb):
             db = cast(AsyncBaseDb, db)
             eval_run = await db.get_eval_run(eval_run_id=eval_run_id, deserialize=False)
@@ -224,12 +255,20 @@ def attach_routes(
         },
     )
     async def delete_eval_runs(
+        http_request: Request,
         request: DeleteEvalRunsRequest,
         db_id: Optional[str] = Query(default=None, description="Database ID to use for deletion"),
         table: Optional[str] = Query(default=None, description="Table to use for deletion"),
     ) -> None:
         try:
             db = await get_db(dbs, db_id, table)
+            if isinstance(db, RemoteDb):
+                auth_token = get_auth_token_from_request(http_request)
+                headers = {"Authorization": f"Bearer {auth_token}"} if auth_token else None
+                return await db.delete_eval_runs(
+                    eval_run_ids=request.eval_run_ids, db_id=db_id, table=table, headers=headers
+                )
             if isinstance(db, AsyncBaseDb):
                 db = cast(AsyncBaseDb, db)
                 await db.delete_eval_runs(eval_run_ids=request.eval_run_ids)
@@ -277,6 +316,7 @@ def attach_routes(
         },
     )
     async def update_eval_run(
+        http_request: Request,
         eval_run_id: str,
         request: UpdateEvalRunRequest,
         db_id: Optional[str] = Query(default=None, description="The ID of the database to use"),
@@ -284,6 +324,13 @@ def attach_routes(
     ) -> EvalSchema:
         try:
             db = await get_db(dbs, db_id, table)
+            if isinstance(db, RemoteDb):
+                auth_token = get_auth_token_from_request(http_request)
+                headers = {"Authorization": f"Bearer {auth_token}"} if auth_token else None
+                return await db.update_eval_run(
+                    eval_run_id=eval_run_id, name=request.name, db_id=db_id, table=table, headers=headers
+                )
             if isinstance(db, AsyncBaseDb):
                 db = cast(AsyncBaseDb, db)
                 eval_run = await db.rename_eval_run(eval_run_id=eval_run_id, name=request.name, deserialize=False)
@@ -304,7 +351,7 @@ def attach_routes(
         operation_id="run_eval",
         summary="Execute Evaluation",
         description=(
-            "Run evaluation tests on agents or teams. Supports accuracy, performance, and reliability evaluations. "
+            "Run evaluation tests on agents or teams. Supports accuracy, agent-as-judge, performance, and reliability evaluations. "
             "Requires either agent_id or team_id, but not both."
         ),
         responses={
@@ -338,11 +385,29 @@ def attach_routes(
         },
     )
     async def run_eval(
+        request: Request,
         eval_run_input: EvalRunInput,
         db_id: Optional[str] = Query(default=None, description="Database ID to use for evaluation"),
         table: Optional[str] = Query(default=None, description="Table to use for evaluation"),
     ) -> Optional[EvalSchema]:
         db = await get_db(dbs, db_id, table)
+        if isinstance(db, RemoteDb):
+            auth_token = get_auth_token_from_request(request)
+            headers = {"Authorization": f"Bearer {auth_token}"} if auth_token else None
+            return await db.create_eval_run(
+                eval_type=eval_run_input.eval_type,
+                input_text=eval_run_input.input,
+                agent_id=eval_run_input.agent_id,
+                team_id=eval_run_input.team_id,
+                model_id=eval_run_input.model_id,
+                model_provider=eval_run_input.model_provider,
+                expected_output=eval_run_input.expected_output,
+                expected_tool_calls=eval_run_input.expected_tool_calls,
+                num_iterations=eval_run_input.num_iterations,
+                db_id=db_id,
+                table=table,
+                headers=headers,
+            )
         if eval_run_input.agent_id and eval_run_input.team_id:
             raise HTTPException(status_code=400, detail="Only one of agent_id or team_id must be provided")
@@ -351,6 +416,9 @@ def attach_routes(
             agent = get_agent_by_id(agent_id=eval_run_input.agent_id, agents=agents)
             if not agent:
                 raise HTTPException(status_code=404, detail=f"Agent with id '{eval_run_input.agent_id}' not found")
+            if isinstance(agent, RemoteAgent):
+                log_warning("Evaluation against remote agents are not supported yet")
+                return None
             default_model = None
             if (
@@ -373,7 +441,11 @@ def attach_routes(
             team = get_team_by_id(team_id=eval_run_input.team_id, teams=teams)
             if not team:
                 raise HTTPException(status_code=404, detail=f"Team with id '{eval_run_input.team_id}' not found")
+            if isinstance(team, RemoteTeam):
+                log_warning("Evaluation against remote teams are not supported yet")
+                return None
+            # If model_id/model_provider specified, override team's model temporarily
             default_model = None
             if (
                 hasattr(team, "model")
@@ -381,13 +453,13 @@ def attach_routes(
                 and eval_run_input.model_id is not None
                 and eval_run_input.model_provider is not None
             ):
-                default_model = deepcopy(team.model)
+                default_model = deepcopy(team.model)  # Save original
                 if eval_run_input.model_id != team.model.id or eval_run_input.model_provider != team.model.provider:
                     model_provider = eval_run_input.model_provider.lower()
                     model_id = eval_run_input.model_id.lower()
                     model_string = f"{model_provider}:{model_id}"
                     model = get_model(model_string)
-                    team.model = model
+                    team.model = model  # Override temporarily
             agent = None
@@ -396,16 +468,37 @@ def attach_routes(
         # Run the evaluation
         if eval_run_input.eval_type == EvalType.ACCURACY:
+            if isinstance(agent, RemoteAgent) or isinstance(team, RemoteTeam):
+                # TODO: Handle remote evaluation
+                log_warning("Evaluation against remote agents are not supported yet")
+                return None
             return await run_accuracy_eval(
                 eval_run_input=eval_run_input, db=db, agent=agent, team=team, default_model=default_model
             )
+        elif eval_run_input.eval_type == EvalType.AGENT_AS_JUDGE:
+            return await run_agent_as_judge_eval(
+                eval_run_input=eval_run_input,
+                db=db,
+                agent=agent,
+                team=team,
+                default_model=default_model,  # type: ignore
+            )
         elif eval_run_input.eval_type == EvalType.PERFORMANCE:
+            if isinstance(agent, RemoteAgent) or isinstance(team, RemoteTeam):
+                # TODO: Handle remote evaluation
+                log_warning("Evaluation against remote agents are not supported yet")
+                return None
             return await run_performance_eval(
                 eval_run_input=eval_run_input, db=db, agent=agent, team=team, default_model=default_model
             )
         else:
+            if isinstance(agent, RemoteAgent) or isinstance(team, RemoteTeam):
+                # TODO: Handle remote evaluation
+                log_warning("Evaluation against remote agents are not supported yet")
+                return None
             return await run_reliability_eval(
                 eval_run_input=eval_run_input, db=db, agent=agent, team=team, default_model=default_model
             )
@@ -416,8 +509,8 @@ def attach_routes(
 def parse_eval_types_filter(
     eval_types: Optional[str] = Query(
         default=None,
-        description="Comma-separated eval types (accuracy,performance,reliability)",
-        examples=["accuracy,performance"],
+        description="Comma-separated eval types (accuracy,agent_as_judge,performance,reliability)",
+        examples=["accuracy,agent_as_judge,performance,reliability"],
     ),
 ) -> Optional[List[EvalType]]:
     """Parse comma-separated eval types into EvalType enums for filtering evaluation runs."""

agno/os/routers/evals/schemas.py CHANGED Viewed

@@ -1,14 +1,16 @@
 from dataclasses import asdict
-from datetime import datetime, timezone
-from typing import Any, Dict, List, Optional
+from datetime import datetime
+from typing import Any, Dict, List, Literal, Optional
 from pydantic import BaseModel, Field
 from agno.db.schemas.evals import EvalType
-from agno.eval import AccuracyResult, PerformanceResult, ReliabilityResult
+from agno.eval import AccuracyResult, AgentAsJudgeResult, PerformanceResult, ReliabilityResult
 from agno.eval.accuracy import AccuracyEval
+from agno.eval.agent_as_judge import AgentAsJudgeEval
 from agno.eval.performance import PerformanceEval
 from agno.eval.reliability import ReliabilityEval
+from agno.os.utils import to_utc_datetime
 class EvalRunInput(BaseModel):
@@ -27,6 +29,15 @@ class EvalRunInput(BaseModel):
     # Accuracy eval specific fields
     expected_output: Optional[str] = Field(None, description="Expected output for accuracy evaluation")
+    # AgentAsJudge eval specific fields
+    criteria: Optional[str] = Field(None, description="Evaluation criteria for agent-as-judge evaluation")
+    scoring_strategy: Optional[Literal["numeric", "binary"]] = Field(
+        "binary", description="Scoring strategy: 'numeric' (1-10 with threshold) or 'binary' (PASS/FAIL)"
+    )
+    threshold: Optional[int] = Field(
+        7, description="Score threshold for pass/fail (1-10), only used with numeric scoring", ge=1, le=10
+    )
     # Performance eval specific fields
     warmup_runs: int = Field(0, description="Number of warmup runs before measuring performance", ge=0, le=10)
@@ -64,8 +75,8 @@ class EvalSchema(BaseModel):
             eval_type=eval_run["eval_type"],
             eval_data=eval_run["eval_data"],
             eval_input=eval_run.get("eval_input"),
-            created_at=datetime.fromtimestamp(eval_run["created_at"], tz=timezone.utc),
-            updated_at=datetime.fromtimestamp(eval_run["updated_at"], tz=timezone.utc),
+            created_at=to_utc_datetime(eval_run.get("created_at")),
+            updated_at=to_utc_datetime(eval_run.get("updated_at")),
         )
     @classmethod
@@ -89,6 +100,28 @@ class EvalSchema(BaseModel):
             eval_data=asdict(result),
         )
+    @classmethod
+    def from_agent_as_judge_eval(
+        cls,
+        agent_as_judge_eval: AgentAsJudgeEval,
+        result: AgentAsJudgeResult,
+        model_id: Optional[str] = None,
+        model_provider: Optional[str] = None,
+        agent_id: Optional[str] = None,
+        team_id: Optional[str] = None,
+    ) -> "EvalSchema":
+        return cls(
+            id=result.run_id,
+            name=agent_as_judge_eval.name,
+            agent_id=agent_id,
+            team_id=team_id,
+            workflow_id=None,
+            model_id=model_id,
+            model_provider=model_provider,
+            eval_type=EvalType.AGENT_AS_JUDGE,
+            eval_data=asdict(result),
+        )
     @classmethod
     def from_performance_eval(
         cls,

agno/os/routers/evals/utils.py CHANGED Viewed

@@ -2,14 +2,15 @@ from typing import Optional, Union
 from fastapi import HTTPException
-from agno.agent.agent import Agent
+from agno.agent import Agent, RemoteAgent
 from agno.db.base import AsyncBaseDb, BaseDb
 from agno.eval.accuracy import AccuracyEval
+from agno.eval.agent_as_judge import AgentAsJudgeEval
 from agno.eval.performance import PerformanceEval
 from agno.eval.reliability import ReliabilityEval
 from agno.models.base import Model
 from agno.os.routers.evals.schemas import EvalRunInput, EvalSchema
-from agno.team.team import Team
+from agno.team import RemoteTeam, Team
 async def run_accuracy_eval(
@@ -36,12 +37,13 @@ async def run_accuracy_eval(
         model=default_model,
     )
-    result = accuracy_eval.run(print_results=False, print_summary=False)
+    result = await accuracy_eval.arun(print_results=False, print_summary=False)
     if not result:
         raise HTTPException(status_code=500, detail="Failed to run accuracy evaluation")
     eval_run = EvalSchema.from_accuracy_eval(accuracy_eval=accuracy_eval, result=result)
+    # Restore original model after eval
     if default_model is not None:
         if agent is not None:
             agent.model = default_model
@@ -51,6 +53,70 @@ async def run_accuracy_eval(
     return eval_run
+async def run_agent_as_judge_eval(
+    eval_run_input: EvalRunInput,
+    db: Union[BaseDb, AsyncBaseDb],
+    agent: Optional[Union[Agent, RemoteAgent]] = None,
+    team: Optional[Union[Team, RemoteTeam]] = None,
+    default_model: Optional[Model] = None,
+) -> EvalSchema:
+    """Run an AgentAsJudge evaluation for the given agent or team"""
+    if not eval_run_input.criteria:
+        raise HTTPException(status_code=400, detail="criteria is required for agent-as-judge evaluation")
+    # Run agent/team to get output
+    if agent:
+        agent_response = await agent.arun(eval_run_input.input, stream=False)
+        output = str(agent_response.content) if agent_response.content else ""
+        agent_id = agent.id
+        team_id = None
+    elif team:
+        team_response = await team.arun(eval_run_input.input, stream=False)
+        output = str(team_response.content) if team_response.content else ""
+        agent_id = None
+        team_id = team.id
+    else:
+        raise HTTPException(status_code=400, detail="Either agent_id or team_id must be provided")
+    agent_as_judge_eval = AgentAsJudgeEval(
+        db=db,
+        criteria=eval_run_input.criteria,
+        scoring_strategy=eval_run_input.scoring_strategy or "binary",
+        threshold=eval_run_input.threshold or 7,
+        additional_guidelines=eval_run_input.additional_guidelines,
+        name=eval_run_input.name,
+        model=default_model,
+    )
+    result = await agent_as_judge_eval.arun(
+        input=eval_run_input.input, output=output, print_results=False, print_summary=False
+    )
+    if not result:
+        raise HTTPException(status_code=500, detail="Failed to run agent as judge evaluation")
+    # Use evaluator's model
+    eval_model_id = agent_as_judge_eval.model.id if agent_as_judge_eval.model is not None else None
+    eval_model_provider = agent_as_judge_eval.model.provider if agent_as_judge_eval.model is not None else None
+    eval_run = EvalSchema.from_agent_as_judge_eval(
+        agent_as_judge_eval=agent_as_judge_eval,
+        result=result,
+        agent_id=agent_id,
+        team_id=team_id,
+        model_id=eval_model_id,
+        model_provider=eval_model_provider,
+    )
+    # Restore original model after eval
+    if default_model is not None:
+        if agent is not None and isinstance(agent, Agent):
+            agent.model = default_model
+        elif team is not None and isinstance(team, Team):
+            team.model = default_model
+    return eval_run
 async def run_performance_eval(
     eval_run_input: EvalRunInput,
     db: Union[BaseDb, AsyncBaseDb],
@@ -61,16 +127,16 @@ async def run_performance_eval(
     """Run a performance evaluation for the given agent or team"""
     if agent:
-        def run_component():  # type: ignore
-            return agent.run(eval_run_input.input)
+        async def run_component():  # type: ignore
+            return await agent.arun(eval_run_input.input, stream=False)
         model_id = agent.model.id if agent and agent.model else None
         model_provider = agent.model.provider if agent and agent.model else None
     elif team:
-        def run_component():
-            return team.run(eval_run_input.input)
+        async def run_component():  # type: ignore
+            return await team.arun(eval_run_input.input, stream=False)
         model_id = team.model.id if team and team.model else None
         model_provider = team.model.provider if team and team.model else None
@@ -86,7 +152,8 @@ async def run_performance_eval(
         model_id=model_id,
         model_provider=model_provider,
     )
-    result = performance_eval.run(print_results=False, print_summary=False)
+    result = await performance_eval.arun(print_results=False, print_summary=False)
     if not result:
         raise HTTPException(status_code=500, detail="Failed to run performance evaluation")
@@ -99,6 +166,7 @@ async def run_performance_eval(
         model_provider=model_provider,
     )
+    # Restore original model after eval
     if default_model is not None:
         if agent is not None:
             agent.model = default_model
@@ -120,7 +188,7 @@ async def run_reliability_eval(
         raise HTTPException(status_code=400, detail="expected_tool_calls is required for reliability evaluations")
     if agent:
-        agent_response = agent.run(eval_run_input.input)
+        agent_response = await agent.arun(eval_run_input.input, stream=False)
         reliability_eval = ReliabilityEval(
             db=db,
             name=eval_run_input.name,
@@ -131,7 +199,7 @@ async def run_reliability_eval(
         model_provider = agent.model.provider if agent and agent.model else None
     elif team:
-        team_response = team.run(eval_run_input.input)
+        team_response = await team.arun(eval_run_input.input, stream=False)
         reliability_eval = ReliabilityEval(
             db=db,
             name=eval_run_input.name,
@@ -141,7 +209,7 @@ async def run_reliability_eval(
         model_id = team.model.id if team and team.model else None
         model_provider = team.model.provider if team and team.model else None
-    result = reliability_eval.run(print_results=False)
+    result = await reliability_eval.arun(print_results=False)
     if not result:
         raise HTTPException(status_code=500, detail="Failed to run reliability evaluation")
@@ -153,6 +221,7 @@ async def run_reliability_eval(
         model_provider=model_provider,
     )
+    # Restore original model after eval
     if default_model is not None:
         if agent is not None:
             agent.model = default_model

agno/os/routers/health.py CHANGED Viewed

@@ -8,7 +8,7 @@ from agno.os.schema import HealthResponse
 def get_health_router(health_endpoint: str = "/health") -> APIRouter:
     router = APIRouter(tags=["Health"])
-    started_time_stamp = datetime.now(timezone.utc).timestamp()
+    started_at = datetime.now(timezone.utc)
     @router.get(
         health_endpoint,
@@ -20,12 +20,12 @@ def get_health_router(health_endpoint: str = "/health") -> APIRouter:
             200: {
                 "description": "API is healthy and operational",
                 "content": {
-                    "application/json": {"example": {"status": "ok", "instantiated_at": str(started_time_stamp)}}
+                    "application/json": {"example": {"status": "ok", "instantiated_at": "2025-06-10T12:00:00Z"}}
                 },
             }
         },
     )
     async def health_check() -> HealthResponse:
-        return HealthResponse(status="ok", instantiated_at=str(started_time_stamp))
+        return HealthResponse(status="ok", instantiated_at=started_at)
     return router

agno 2.2.13__py3-none-any.whl → 2.4.3__py3-none-any.whl

agno 2.2.13py3-none-any.whl → 2.4.3py3-none-any.whl