agno 2.1.2__py3-none-any.whl → 2.3.13__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- agno/agent/agent.py +5540 -2273
- agno/api/api.py +2 -0
- agno/api/os.py +1 -1
- agno/compression/__init__.py +3 -0
- agno/compression/manager.py +247 -0
- agno/culture/__init__.py +3 -0
- agno/culture/manager.py +956 -0
- agno/db/async_postgres/__init__.py +3 -0
- agno/db/base.py +689 -6
- agno/db/dynamo/dynamo.py +933 -37
- agno/db/dynamo/schemas.py +174 -10
- agno/db/dynamo/utils.py +63 -4
- agno/db/firestore/firestore.py +831 -9
- agno/db/firestore/schemas.py +51 -0
- agno/db/firestore/utils.py +102 -4
- agno/db/gcs_json/gcs_json_db.py +660 -12
- agno/db/gcs_json/utils.py +60 -26
- agno/db/in_memory/in_memory_db.py +287 -14
- agno/db/in_memory/utils.py +60 -2
- agno/db/json/json_db.py +590 -14
- agno/db/json/utils.py +60 -26
- agno/db/migrations/manager.py +199 -0
- agno/db/migrations/v1_to_v2.py +43 -13
- agno/db/migrations/versions/__init__.py +0 -0
- agno/db/migrations/versions/v2_3_0.py +938 -0
- agno/db/mongo/__init__.py +15 -1
- agno/db/mongo/async_mongo.py +2760 -0
- agno/db/mongo/mongo.py +879 -11
- agno/db/mongo/schemas.py +42 -0
- agno/db/mongo/utils.py +80 -8
- agno/db/mysql/__init__.py +2 -1
- agno/db/mysql/async_mysql.py +2912 -0
- agno/db/mysql/mysql.py +946 -68
- agno/db/mysql/schemas.py +72 -10
- agno/db/mysql/utils.py +198 -7
- agno/db/postgres/__init__.py +2 -1
- agno/db/postgres/async_postgres.py +2579 -0
- agno/db/postgres/postgres.py +942 -57
- agno/db/postgres/schemas.py +81 -18
- agno/db/postgres/utils.py +164 -2
- agno/db/redis/redis.py +671 -7
- agno/db/redis/schemas.py +50 -0
- agno/db/redis/utils.py +65 -7
- agno/db/schemas/__init__.py +2 -1
- agno/db/schemas/culture.py +120 -0
- agno/db/schemas/evals.py +1 -0
- agno/db/schemas/memory.py +17 -2
- agno/db/singlestore/schemas.py +63 -0
- agno/db/singlestore/singlestore.py +949 -83
- agno/db/singlestore/utils.py +60 -2
- agno/db/sqlite/__init__.py +2 -1
- agno/db/sqlite/async_sqlite.py +2911 -0
- agno/db/sqlite/schemas.py +62 -0
- agno/db/sqlite/sqlite.py +965 -46
- agno/db/sqlite/utils.py +169 -8
- agno/db/surrealdb/__init__.py +3 -0
- agno/db/surrealdb/metrics.py +292 -0
- agno/db/surrealdb/models.py +334 -0
- agno/db/surrealdb/queries.py +71 -0
- agno/db/surrealdb/surrealdb.py +1908 -0
- agno/db/surrealdb/utils.py +147 -0
- agno/db/utils.py +2 -0
- agno/eval/__init__.py +10 -0
- agno/eval/accuracy.py +75 -55
- agno/eval/agent_as_judge.py +861 -0
- agno/eval/base.py +29 -0
- agno/eval/performance.py +16 -7
- agno/eval/reliability.py +28 -16
- agno/eval/utils.py +35 -17
- agno/exceptions.py +27 -2
- agno/filters.py +354 -0
- agno/guardrails/prompt_injection.py +1 -0
- agno/hooks/__init__.py +3 -0
- agno/hooks/decorator.py +164 -0
- agno/integrations/discord/client.py +1 -1
- agno/knowledge/chunking/agentic.py +13 -10
- agno/knowledge/chunking/fixed.py +4 -1
- agno/knowledge/chunking/semantic.py +9 -4
- agno/knowledge/chunking/strategy.py +59 -15
- agno/knowledge/embedder/fastembed.py +1 -1
- agno/knowledge/embedder/nebius.py +1 -1
- agno/knowledge/embedder/ollama.py +8 -0
- agno/knowledge/embedder/openai.py +8 -8
- agno/knowledge/embedder/sentence_transformer.py +6 -2
- agno/knowledge/embedder/vllm.py +262 -0
- agno/knowledge/knowledge.py +1618 -318
- agno/knowledge/reader/base.py +6 -2
- agno/knowledge/reader/csv_reader.py +8 -10
- agno/knowledge/reader/docx_reader.py +5 -6
- agno/knowledge/reader/field_labeled_csv_reader.py +16 -20
- agno/knowledge/reader/json_reader.py +5 -4
- agno/knowledge/reader/markdown_reader.py +8 -8
- agno/knowledge/reader/pdf_reader.py +17 -19
- agno/knowledge/reader/pptx_reader.py +101 -0
- agno/knowledge/reader/reader_factory.py +32 -3
- agno/knowledge/reader/s3_reader.py +3 -3
- agno/knowledge/reader/tavily_reader.py +193 -0
- agno/knowledge/reader/text_reader.py +22 -10
- agno/knowledge/reader/web_search_reader.py +1 -48
- agno/knowledge/reader/website_reader.py +10 -10
- agno/knowledge/reader/wikipedia_reader.py +33 -1
- agno/knowledge/types.py +1 -0
- agno/knowledge/utils.py +72 -7
- agno/media.py +22 -6
- agno/memory/__init__.py +14 -1
- agno/memory/manager.py +544 -83
- agno/memory/strategies/__init__.py +15 -0
- agno/memory/strategies/base.py +66 -0
- agno/memory/strategies/summarize.py +196 -0
- agno/memory/strategies/types.py +37 -0
- agno/models/aimlapi/aimlapi.py +17 -0
- agno/models/anthropic/claude.py +515 -40
- agno/models/aws/bedrock.py +102 -21
- agno/models/aws/claude.py +131 -274
- agno/models/azure/ai_foundry.py +41 -19
- agno/models/azure/openai_chat.py +39 -8
- agno/models/base.py +1249 -525
- agno/models/cerebras/cerebras.py +91 -21
- agno/models/cerebras/cerebras_openai.py +21 -2
- agno/models/cohere/chat.py +40 -6
- agno/models/cometapi/cometapi.py +18 -1
- agno/models/dashscope/dashscope.py +2 -3
- agno/models/deepinfra/deepinfra.py +18 -1
- agno/models/deepseek/deepseek.py +69 -3
- agno/models/fireworks/fireworks.py +18 -1
- agno/models/google/gemini.py +877 -80
- agno/models/google/utils.py +22 -0
- agno/models/groq/groq.py +51 -18
- agno/models/huggingface/huggingface.py +17 -6
- agno/models/ibm/watsonx.py +16 -6
- agno/models/internlm/internlm.py +18 -1
- agno/models/langdb/langdb.py +13 -1
- agno/models/litellm/chat.py +44 -9
- agno/models/litellm/litellm_openai.py +18 -1
- agno/models/message.py +28 -5
- agno/models/meta/llama.py +47 -14
- agno/models/meta/llama_openai.py +22 -17
- agno/models/mistral/mistral.py +8 -4
- agno/models/nebius/nebius.py +6 -7
- agno/models/nvidia/nvidia.py +20 -3
- agno/models/ollama/chat.py +24 -8
- agno/models/openai/chat.py +104 -29
- agno/models/openai/responses.py +101 -81
- agno/models/openrouter/openrouter.py +60 -3
- agno/models/perplexity/perplexity.py +17 -1
- agno/models/portkey/portkey.py +7 -6
- agno/models/requesty/requesty.py +24 -4
- agno/models/response.py +73 -2
- agno/models/sambanova/sambanova.py +20 -3
- agno/models/siliconflow/siliconflow.py +19 -2
- agno/models/together/together.py +20 -3
- agno/models/utils.py +254 -8
- agno/models/vercel/v0.py +20 -3
- agno/models/vertexai/__init__.py +0 -0
- agno/models/vertexai/claude.py +190 -0
- agno/models/vllm/vllm.py +19 -14
- agno/models/xai/xai.py +19 -2
- agno/os/app.py +549 -152
- agno/os/auth.py +190 -3
- agno/os/config.py +23 -0
- agno/os/interfaces/a2a/router.py +8 -11
- agno/os/interfaces/a2a/utils.py +1 -1
- agno/os/interfaces/agui/router.py +18 -3
- agno/os/interfaces/agui/utils.py +152 -39
- agno/os/interfaces/slack/router.py +55 -37
- agno/os/interfaces/slack/slack.py +9 -1
- agno/os/interfaces/whatsapp/router.py +0 -1
- agno/os/interfaces/whatsapp/security.py +3 -1
- agno/os/mcp.py +110 -52
- agno/os/middleware/__init__.py +2 -0
- agno/os/middleware/jwt.py +676 -112
- agno/os/router.py +40 -1478
- agno/os/routers/agents/__init__.py +3 -0
- agno/os/routers/agents/router.py +599 -0
- agno/os/routers/agents/schema.py +261 -0
- agno/os/routers/evals/evals.py +96 -39
- agno/os/routers/evals/schemas.py +65 -33
- agno/os/routers/evals/utils.py +80 -10
- agno/os/routers/health.py +10 -4
- agno/os/routers/knowledge/knowledge.py +196 -38
- agno/os/routers/knowledge/schemas.py +82 -22
- agno/os/routers/memory/memory.py +279 -52
- agno/os/routers/memory/schemas.py +46 -17
- agno/os/routers/metrics/metrics.py +20 -8
- agno/os/routers/metrics/schemas.py +16 -16
- agno/os/routers/session/session.py +462 -34
- agno/os/routers/teams/__init__.py +3 -0
- agno/os/routers/teams/router.py +512 -0
- agno/os/routers/teams/schema.py +257 -0
- agno/os/routers/traces/__init__.py +3 -0
- agno/os/routers/traces/schemas.py +414 -0
- agno/os/routers/traces/traces.py +499 -0
- agno/os/routers/workflows/__init__.py +3 -0
- agno/os/routers/workflows/router.py +624 -0
- agno/os/routers/workflows/schema.py +75 -0
- agno/os/schema.py +256 -693
- agno/os/scopes.py +469 -0
- agno/os/utils.py +514 -36
- agno/reasoning/anthropic.py +80 -0
- agno/reasoning/gemini.py +73 -0
- agno/reasoning/openai.py +5 -0
- agno/reasoning/vertexai.py +76 -0
- agno/run/__init__.py +6 -0
- agno/run/agent.py +155 -32
- agno/run/base.py +55 -3
- agno/run/requirement.py +181 -0
- agno/run/team.py +125 -38
- agno/run/workflow.py +72 -18
- agno/session/agent.py +102 -89
- agno/session/summary.py +56 -15
- agno/session/team.py +164 -90
- agno/session/workflow.py +405 -40
- agno/table.py +10 -0
- agno/team/team.py +3974 -1903
- agno/tools/dalle.py +2 -4
- agno/tools/eleven_labs.py +23 -25
- agno/tools/exa.py +21 -16
- agno/tools/file.py +153 -23
- agno/tools/file_generation.py +16 -10
- agno/tools/firecrawl.py +15 -7
- agno/tools/function.py +193 -38
- agno/tools/gmail.py +238 -14
- agno/tools/google_drive.py +271 -0
- agno/tools/googlecalendar.py +36 -8
- agno/tools/googlesheets.py +20 -5
- agno/tools/jira.py +20 -0
- agno/tools/mcp/__init__.py +10 -0
- agno/tools/mcp/mcp.py +331 -0
- agno/tools/mcp/multi_mcp.py +347 -0
- agno/tools/mcp/params.py +24 -0
- agno/tools/mcp_toolbox.py +3 -3
- agno/tools/models/nebius.py +5 -5
- agno/tools/models_labs.py +20 -10
- agno/tools/nano_banana.py +151 -0
- agno/tools/notion.py +204 -0
- agno/tools/parallel.py +314 -0
- agno/tools/postgres.py +76 -36
- agno/tools/redshift.py +406 -0
- agno/tools/scrapegraph.py +1 -1
- agno/tools/shopify.py +1519 -0
- agno/tools/slack.py +18 -3
- agno/tools/spotify.py +919 -0
- agno/tools/tavily.py +146 -0
- agno/tools/toolkit.py +25 -0
- agno/tools/workflow.py +8 -1
- agno/tools/yfinance.py +12 -11
- agno/tracing/__init__.py +12 -0
- agno/tracing/exporter.py +157 -0
- agno/tracing/schemas.py +276 -0
- agno/tracing/setup.py +111 -0
- agno/utils/agent.py +938 -0
- agno/utils/cryptography.py +22 -0
- agno/utils/dttm.py +33 -0
- agno/utils/events.py +151 -3
- agno/utils/gemini.py +15 -5
- agno/utils/hooks.py +118 -4
- agno/utils/http.py +113 -2
- agno/utils/knowledge.py +12 -5
- agno/utils/log.py +1 -0
- agno/utils/mcp.py +92 -2
- agno/utils/media.py +187 -1
- agno/utils/merge_dict.py +3 -3
- agno/utils/message.py +60 -0
- agno/utils/models/ai_foundry.py +9 -2
- agno/utils/models/claude.py +49 -14
- agno/utils/models/cohere.py +9 -2
- agno/utils/models/llama.py +9 -2
- agno/utils/models/mistral.py +4 -2
- agno/utils/print_response/agent.py +109 -16
- agno/utils/print_response/team.py +223 -30
- agno/utils/print_response/workflow.py +251 -34
- agno/utils/streamlit.py +1 -1
- agno/utils/team.py +98 -9
- agno/utils/tokens.py +657 -0
- agno/vectordb/base.py +39 -7
- agno/vectordb/cassandra/cassandra.py +21 -5
- agno/vectordb/chroma/chromadb.py +43 -12
- agno/vectordb/clickhouse/clickhousedb.py +21 -5
- agno/vectordb/couchbase/couchbase.py +29 -5
- agno/vectordb/lancedb/lance_db.py +92 -181
- agno/vectordb/langchaindb/langchaindb.py +24 -4
- agno/vectordb/lightrag/lightrag.py +17 -3
- agno/vectordb/llamaindex/llamaindexdb.py +25 -5
- agno/vectordb/milvus/milvus.py +50 -37
- agno/vectordb/mongodb/__init__.py +7 -1
- agno/vectordb/mongodb/mongodb.py +36 -30
- agno/vectordb/pgvector/pgvector.py +201 -77
- agno/vectordb/pineconedb/pineconedb.py +41 -23
- agno/vectordb/qdrant/qdrant.py +67 -54
- agno/vectordb/redis/__init__.py +9 -0
- agno/vectordb/redis/redisdb.py +682 -0
- agno/vectordb/singlestore/singlestore.py +50 -29
- agno/vectordb/surrealdb/surrealdb.py +31 -41
- agno/vectordb/upstashdb/upstashdb.py +34 -6
- agno/vectordb/weaviate/weaviate.py +53 -14
- agno/workflow/__init__.py +2 -0
- agno/workflow/agent.py +299 -0
- agno/workflow/condition.py +120 -18
- agno/workflow/loop.py +77 -10
- agno/workflow/parallel.py +231 -143
- agno/workflow/router.py +118 -17
- agno/workflow/step.py +609 -170
- agno/workflow/steps.py +73 -6
- agno/workflow/types.py +96 -21
- agno/workflow/workflow.py +2039 -262
- {agno-2.1.2.dist-info → agno-2.3.13.dist-info}/METADATA +201 -66
- agno-2.3.13.dist-info/RECORD +613 -0
- agno/tools/googlesearch.py +0 -98
- agno/tools/mcp.py +0 -679
- agno/tools/memori.py +0 -339
- agno-2.1.2.dist-info/RECORD +0 -543
- {agno-2.1.2.dist-info → agno-2.3.13.dist-info}/WHEEL +0 -0
- {agno-2.1.2.dist-info → agno-2.3.13.dist-info}/licenses/LICENSE +0 -0
- {agno-2.1.2.dist-info → agno-2.3.13.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,261 @@
|
|
|
1
|
+
from typing import Any, Dict, Optional
|
|
2
|
+
from uuid import uuid4
|
|
3
|
+
|
|
4
|
+
from pydantic import BaseModel
|
|
5
|
+
|
|
6
|
+
from agno.agent import Agent
|
|
7
|
+
from agno.models.message import Message
|
|
8
|
+
from agno.os.schema import ModelResponse
|
|
9
|
+
from agno.os.utils import (
|
|
10
|
+
format_tools,
|
|
11
|
+
get_agent_input_schema_dict,
|
|
12
|
+
)
|
|
13
|
+
from agno.run import RunContext
|
|
14
|
+
from agno.run.agent import RunOutput
|
|
15
|
+
from agno.session import AgentSession
|
|
16
|
+
from agno.utils.agent import aexecute_instructions, aexecute_system_message
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
class AgentResponse(BaseModel):
|
|
20
|
+
id: Optional[str] = None
|
|
21
|
+
name: Optional[str] = None
|
|
22
|
+
db_id: Optional[str] = None
|
|
23
|
+
model: Optional[ModelResponse] = None
|
|
24
|
+
tools: Optional[Dict[str, Any]] = None
|
|
25
|
+
sessions: Optional[Dict[str, Any]] = None
|
|
26
|
+
knowledge: Optional[Dict[str, Any]] = None
|
|
27
|
+
memory: Optional[Dict[str, Any]] = None
|
|
28
|
+
reasoning: Optional[Dict[str, Any]] = None
|
|
29
|
+
default_tools: Optional[Dict[str, Any]] = None
|
|
30
|
+
system_message: Optional[Dict[str, Any]] = None
|
|
31
|
+
extra_messages: Optional[Dict[str, Any]] = None
|
|
32
|
+
response_settings: Optional[Dict[str, Any]] = None
|
|
33
|
+
introduction: Optional[str] = None
|
|
34
|
+
streaming: Optional[Dict[str, Any]] = None
|
|
35
|
+
metadata: Optional[Dict[str, Any]] = None
|
|
36
|
+
input_schema: Optional[Dict[str, Any]] = None
|
|
37
|
+
|
|
38
|
+
@classmethod
|
|
39
|
+
async def from_agent(cls, agent: Agent) -> "AgentResponse":
|
|
40
|
+
def filter_meaningful_config(d: Dict[str, Any], defaults: Dict[str, Any]) -> Optional[Dict[str, Any]]:
|
|
41
|
+
"""Filter out fields that match their default values, keeping only meaningful user configurations"""
|
|
42
|
+
filtered = {}
|
|
43
|
+
for key, value in d.items():
|
|
44
|
+
if value is None:
|
|
45
|
+
continue
|
|
46
|
+
# Skip if value matches the default exactly
|
|
47
|
+
if key in defaults and value == defaults[key]:
|
|
48
|
+
continue
|
|
49
|
+
# Keep non-default values
|
|
50
|
+
filtered[key] = value
|
|
51
|
+
return filtered if filtered else None
|
|
52
|
+
|
|
53
|
+
# Define default values for filtering
|
|
54
|
+
agent_defaults = {
|
|
55
|
+
# Sessions defaults
|
|
56
|
+
"add_history_to_context": False,
|
|
57
|
+
"num_history_runs": 3,
|
|
58
|
+
"enable_session_summaries": False,
|
|
59
|
+
"search_session_history": False,
|
|
60
|
+
"cache_session": False,
|
|
61
|
+
# Knowledge defaults
|
|
62
|
+
"add_references": False,
|
|
63
|
+
"references_format": "json",
|
|
64
|
+
"enable_agentic_knowledge_filters": False,
|
|
65
|
+
# Memory defaults
|
|
66
|
+
"enable_agentic_memory": False,
|
|
67
|
+
"enable_user_memories": False,
|
|
68
|
+
# Reasoning defaults
|
|
69
|
+
"reasoning": False,
|
|
70
|
+
"reasoning_min_steps": 1,
|
|
71
|
+
"reasoning_max_steps": 10,
|
|
72
|
+
# Default tools defaults
|
|
73
|
+
"read_chat_history": False,
|
|
74
|
+
"search_knowledge": True,
|
|
75
|
+
"update_knowledge": False,
|
|
76
|
+
"read_tool_call_history": False,
|
|
77
|
+
# System message defaults
|
|
78
|
+
"system_message_role": "system",
|
|
79
|
+
"build_context": True,
|
|
80
|
+
"markdown": False,
|
|
81
|
+
"add_name_to_context": False,
|
|
82
|
+
"add_datetime_to_context": False,
|
|
83
|
+
"add_location_to_context": False,
|
|
84
|
+
"resolve_in_context": True,
|
|
85
|
+
# Extra messages defaults
|
|
86
|
+
"user_message_role": "user",
|
|
87
|
+
"build_user_context": True,
|
|
88
|
+
# Response settings defaults
|
|
89
|
+
"retries": 0,
|
|
90
|
+
"delay_between_retries": 1,
|
|
91
|
+
"exponential_backoff": False,
|
|
92
|
+
"parse_response": True,
|
|
93
|
+
"use_json_mode": False,
|
|
94
|
+
# Streaming defaults
|
|
95
|
+
"stream_events": False,
|
|
96
|
+
"stream_intermediate_steps": False,
|
|
97
|
+
}
|
|
98
|
+
|
|
99
|
+
session_id = str(uuid4())
|
|
100
|
+
run_id = str(uuid4())
|
|
101
|
+
agent_tools = await agent.aget_tools(
|
|
102
|
+
session=AgentSession(session_id=session_id, session_data={}),
|
|
103
|
+
run_response=RunOutput(run_id=run_id, session_id=session_id),
|
|
104
|
+
run_context=RunContext(run_id=run_id, session_id=session_id, user_id=agent.user_id),
|
|
105
|
+
check_mcp_tools=False,
|
|
106
|
+
)
|
|
107
|
+
formatted_tools = format_tools(agent_tools) if agent_tools else None
|
|
108
|
+
|
|
109
|
+
additional_input = agent.additional_input
|
|
110
|
+
if additional_input and isinstance(additional_input[0], Message):
|
|
111
|
+
additional_input = [message.to_dict() for message in additional_input] # type: ignore
|
|
112
|
+
|
|
113
|
+
# Build model only if it has at least one non-null field
|
|
114
|
+
model_name = agent.model.name if (agent.model and agent.model.name) else None
|
|
115
|
+
model_provider = agent.model.provider if (agent.model and agent.model.provider) else None
|
|
116
|
+
model_id = agent.model.id if (agent.model and agent.model.id) else None
|
|
117
|
+
_agent_model_data: Dict[str, Any] = {}
|
|
118
|
+
if model_name is not None:
|
|
119
|
+
_agent_model_data["name"] = model_name
|
|
120
|
+
if model_id is not None:
|
|
121
|
+
_agent_model_data["model"] = model_id
|
|
122
|
+
if model_provider is not None:
|
|
123
|
+
_agent_model_data["provider"] = model_provider
|
|
124
|
+
|
|
125
|
+
session_table = agent.db.session_table_name if agent.db else None
|
|
126
|
+
knowledge_table = agent.db.knowledge_table_name if agent.db and agent.knowledge else None
|
|
127
|
+
|
|
128
|
+
tools_info = {
|
|
129
|
+
"tools": formatted_tools,
|
|
130
|
+
"tool_call_limit": agent.tool_call_limit,
|
|
131
|
+
"tool_choice": agent.tool_choice,
|
|
132
|
+
}
|
|
133
|
+
|
|
134
|
+
sessions_info = {
|
|
135
|
+
"session_table": session_table,
|
|
136
|
+
"add_history_to_context": agent.add_history_to_context,
|
|
137
|
+
"enable_session_summaries": agent.enable_session_summaries,
|
|
138
|
+
"num_history_runs": agent.num_history_runs,
|
|
139
|
+
"search_session_history": agent.search_session_history,
|
|
140
|
+
"num_history_sessions": agent.num_history_sessions,
|
|
141
|
+
"cache_session": agent.cache_session,
|
|
142
|
+
}
|
|
143
|
+
|
|
144
|
+
knowledge_info = {
|
|
145
|
+
"knowledge_table": knowledge_table,
|
|
146
|
+
"enable_agentic_knowledge_filters": agent.enable_agentic_knowledge_filters,
|
|
147
|
+
"knowledge_filters": agent.knowledge_filters,
|
|
148
|
+
"references_format": agent.references_format,
|
|
149
|
+
}
|
|
150
|
+
|
|
151
|
+
memory_info: Optional[Dict[str, Any]] = None
|
|
152
|
+
if agent.memory_manager is not None:
|
|
153
|
+
memory_info = {
|
|
154
|
+
"enable_agentic_memory": agent.enable_agentic_memory,
|
|
155
|
+
"enable_user_memories": agent.enable_user_memories,
|
|
156
|
+
"metadata": agent.metadata,
|
|
157
|
+
"memory_table": agent.db.memory_table_name if agent.db and agent.enable_user_memories else None,
|
|
158
|
+
}
|
|
159
|
+
|
|
160
|
+
if agent.memory_manager.model is not None:
|
|
161
|
+
memory_info["model"] = ModelResponse(
|
|
162
|
+
name=agent.memory_manager.model.name,
|
|
163
|
+
model=agent.memory_manager.model.id,
|
|
164
|
+
provider=agent.memory_manager.model.provider,
|
|
165
|
+
).model_dump()
|
|
166
|
+
|
|
167
|
+
reasoning_info: Dict[str, Any] = {
|
|
168
|
+
"reasoning": agent.reasoning,
|
|
169
|
+
"reasoning_agent_id": agent.reasoning_agent.id if agent.reasoning_agent else None,
|
|
170
|
+
"reasoning_min_steps": agent.reasoning_min_steps,
|
|
171
|
+
"reasoning_max_steps": agent.reasoning_max_steps,
|
|
172
|
+
}
|
|
173
|
+
|
|
174
|
+
if agent.reasoning_model:
|
|
175
|
+
reasoning_info["reasoning_model"] = ModelResponse(
|
|
176
|
+
name=agent.reasoning_model.name,
|
|
177
|
+
model=agent.reasoning_model.id,
|
|
178
|
+
provider=agent.reasoning_model.provider,
|
|
179
|
+
).model_dump()
|
|
180
|
+
|
|
181
|
+
default_tools_info = {
|
|
182
|
+
"read_chat_history": agent.read_chat_history,
|
|
183
|
+
"search_knowledge": agent.search_knowledge,
|
|
184
|
+
"update_knowledge": agent.update_knowledge,
|
|
185
|
+
"read_tool_call_history": agent.read_tool_call_history,
|
|
186
|
+
}
|
|
187
|
+
|
|
188
|
+
instructions = agent.instructions if agent.instructions else None
|
|
189
|
+
if instructions and callable(instructions):
|
|
190
|
+
instructions = await aexecute_instructions(instructions=instructions, agent=agent)
|
|
191
|
+
|
|
192
|
+
system_message = agent.system_message if agent.system_message else None
|
|
193
|
+
if system_message and callable(system_message):
|
|
194
|
+
system_message = await aexecute_system_message(system_message=system_message, agent=agent)
|
|
195
|
+
|
|
196
|
+
system_message_info = {
|
|
197
|
+
"system_message": str(system_message) if system_message else None,
|
|
198
|
+
"system_message_role": agent.system_message_role,
|
|
199
|
+
"build_context": agent.build_context,
|
|
200
|
+
"description": agent.description,
|
|
201
|
+
"instructions": instructions,
|
|
202
|
+
"expected_output": agent.expected_output,
|
|
203
|
+
"additional_context": agent.additional_context,
|
|
204
|
+
"markdown": agent.markdown,
|
|
205
|
+
"add_name_to_context": agent.add_name_to_context,
|
|
206
|
+
"add_datetime_to_context": agent.add_datetime_to_context,
|
|
207
|
+
"add_location_to_context": agent.add_location_to_context,
|
|
208
|
+
"timezone_identifier": agent.timezone_identifier,
|
|
209
|
+
"resolve_in_context": agent.resolve_in_context,
|
|
210
|
+
}
|
|
211
|
+
|
|
212
|
+
extra_messages_info = {
|
|
213
|
+
"additional_input": additional_input, # type: ignore
|
|
214
|
+
"user_message_role": agent.user_message_role,
|
|
215
|
+
"build_user_context": agent.build_user_context,
|
|
216
|
+
}
|
|
217
|
+
|
|
218
|
+
response_settings_info: Dict[str, Any] = {
|
|
219
|
+
"retries": agent.retries,
|
|
220
|
+
"delay_between_retries": agent.delay_between_retries,
|
|
221
|
+
"exponential_backoff": agent.exponential_backoff,
|
|
222
|
+
"output_schema_name": agent.output_schema.__name__ if agent.output_schema else None,
|
|
223
|
+
"parser_model_prompt": agent.parser_model_prompt,
|
|
224
|
+
"parse_response": agent.parse_response,
|
|
225
|
+
"structured_outputs": agent.structured_outputs,
|
|
226
|
+
"use_json_mode": agent.use_json_mode,
|
|
227
|
+
"save_response_to_file": agent.save_response_to_file,
|
|
228
|
+
}
|
|
229
|
+
|
|
230
|
+
if agent.parser_model:
|
|
231
|
+
response_settings_info["parser_model"] = ModelResponse(
|
|
232
|
+
name=agent.parser_model.name,
|
|
233
|
+
model=agent.parser_model.id,
|
|
234
|
+
provider=agent.parser_model.provider,
|
|
235
|
+
).model_dump()
|
|
236
|
+
|
|
237
|
+
streaming_info = {
|
|
238
|
+
"stream": agent.stream,
|
|
239
|
+
"stream_events": agent.stream_events,
|
|
240
|
+
"stream_intermediate_steps": agent.stream_intermediate_steps,
|
|
241
|
+
}
|
|
242
|
+
|
|
243
|
+
return AgentResponse(
|
|
244
|
+
id=agent.id,
|
|
245
|
+
name=agent.name,
|
|
246
|
+
db_id=agent.db.id if agent.db else None,
|
|
247
|
+
model=ModelResponse(**_agent_model_data) if _agent_model_data else None,
|
|
248
|
+
tools=filter_meaningful_config(tools_info, {}),
|
|
249
|
+
sessions=filter_meaningful_config(sessions_info, agent_defaults),
|
|
250
|
+
knowledge=filter_meaningful_config(knowledge_info, agent_defaults),
|
|
251
|
+
memory=filter_meaningful_config(memory_info, agent_defaults) if memory_info else None,
|
|
252
|
+
reasoning=filter_meaningful_config(reasoning_info, agent_defaults),
|
|
253
|
+
default_tools=filter_meaningful_config(default_tools_info, agent_defaults),
|
|
254
|
+
system_message=filter_meaningful_config(system_message_info, agent_defaults),
|
|
255
|
+
extra_messages=filter_meaningful_config(extra_messages_info, agent_defaults),
|
|
256
|
+
response_settings=filter_meaningful_config(response_settings_info, agent_defaults),
|
|
257
|
+
streaming=filter_meaningful_config(streaming_info, agent_defaults),
|
|
258
|
+
introduction=agent.introduction,
|
|
259
|
+
metadata=agent.metadata,
|
|
260
|
+
input_schema=get_agent_input_schema_dict(agent),
|
|
261
|
+
)
|
agno/os/routers/evals/evals.py
CHANGED
|
@@ -1,11 +1,11 @@
|
|
|
1
1
|
import logging
|
|
2
2
|
from copy import deepcopy
|
|
3
|
-
from typing import List, Optional
|
|
3
|
+
from typing import List, Optional, Union, cast
|
|
4
4
|
|
|
5
5
|
from fastapi import APIRouter, Depends, HTTPException, Query
|
|
6
6
|
|
|
7
7
|
from agno.agent.agent import Agent
|
|
8
|
-
from agno.db.base import BaseDb
|
|
8
|
+
from agno.db.base import AsyncBaseDb, BaseDb
|
|
9
9
|
from agno.db.schemas.evals import EvalFilterType, EvalType
|
|
10
10
|
from agno.models.utils import get_model
|
|
11
11
|
from agno.os.auth import get_authentication_dependency
|
|
@@ -15,7 +15,12 @@ from agno.os.routers.evals.schemas import (
|
|
|
15
15
|
EvalSchema,
|
|
16
16
|
UpdateEvalRunRequest,
|
|
17
17
|
)
|
|
18
|
-
from agno.os.routers.evals.utils import
|
|
18
|
+
from agno.os.routers.evals.utils import (
|
|
19
|
+
run_accuracy_eval,
|
|
20
|
+
run_agent_as_judge_eval,
|
|
21
|
+
run_performance_eval,
|
|
22
|
+
run_reliability_eval,
|
|
23
|
+
)
|
|
19
24
|
from agno.os.schema import (
|
|
20
25
|
BadRequestResponse,
|
|
21
26
|
InternalServerErrorResponse,
|
|
@@ -34,7 +39,7 @@ logger = logging.getLogger(__name__)
|
|
|
34
39
|
|
|
35
40
|
|
|
36
41
|
def get_eval_router(
|
|
37
|
-
dbs: dict[str, BaseDb],
|
|
42
|
+
dbs: dict[str, list[Union[BaseDb, AsyncBaseDb]]],
|
|
38
43
|
agents: Optional[List[Agent]] = None,
|
|
39
44
|
teams: Optional[List[Team]] = None,
|
|
40
45
|
settings: AgnoAPISettings = AgnoAPISettings(),
|
|
@@ -55,7 +60,10 @@ def get_eval_router(
|
|
|
55
60
|
|
|
56
61
|
|
|
57
62
|
def attach_routes(
|
|
58
|
-
router: APIRouter,
|
|
63
|
+
router: APIRouter,
|
|
64
|
+
dbs: dict[str, list[Union[BaseDb, AsyncBaseDb]]],
|
|
65
|
+
agents: Optional[List[Agent]] = None,
|
|
66
|
+
teams: Optional[List[Team]] = None,
|
|
59
67
|
) -> APIRouter:
|
|
60
68
|
@router.get(
|
|
61
69
|
"/eval-runs",
|
|
@@ -112,21 +120,48 @@ def attach_routes(
|
|
|
112
120
|
sort_by: Optional[str] = Query(default="created_at", description="Field to sort by"),
|
|
113
121
|
sort_order: Optional[SortOrder] = Query(default="desc", description="Sort order (asc or desc)"),
|
|
114
122
|
db_id: Optional[str] = Query(default=None, description="The ID of the database to use"),
|
|
123
|
+
table: Optional[str] = Query(default=None, description="The database table to use"),
|
|
115
124
|
) -> PaginatedResponse[EvalSchema]:
|
|
116
|
-
db = get_db(dbs, db_id)
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
125
|
+
db = await get_db(dbs, db_id, table)
|
|
126
|
+
|
|
127
|
+
# TODO: Delete me:
|
|
128
|
+
# Filtering out agent-as-judge by default for now,
|
|
129
|
+
# as they are not supported yet in the AgentOS UI.
|
|
130
|
+
eval_types = eval_types or [
|
|
131
|
+
EvalType.ACCURACY,
|
|
132
|
+
EvalType.PERFORMANCE,
|
|
133
|
+
EvalType.RELIABILITY,
|
|
134
|
+
]
|
|
135
|
+
|
|
136
|
+
if isinstance(db, AsyncBaseDb):
|
|
137
|
+
db = cast(AsyncBaseDb, db)
|
|
138
|
+
eval_runs, total_count = await db.get_eval_runs(
|
|
139
|
+
limit=limit,
|
|
140
|
+
page=page,
|
|
141
|
+
sort_by=sort_by,
|
|
142
|
+
sort_order=sort_order,
|
|
143
|
+
agent_id=agent_id,
|
|
144
|
+
team_id=team_id,
|
|
145
|
+
workflow_id=workflow_id,
|
|
146
|
+
model_id=model_id,
|
|
147
|
+
eval_type=eval_types,
|
|
148
|
+
filter_type=filter_type,
|
|
149
|
+
deserialize=False,
|
|
150
|
+
)
|
|
151
|
+
else:
|
|
152
|
+
eval_runs, total_count = db.get_eval_runs( # type: ignore
|
|
153
|
+
limit=limit,
|
|
154
|
+
page=page,
|
|
155
|
+
sort_by=sort_by,
|
|
156
|
+
sort_order=sort_order,
|
|
157
|
+
agent_id=agent_id,
|
|
158
|
+
team_id=team_id,
|
|
159
|
+
workflow_id=workflow_id,
|
|
160
|
+
model_id=model_id,
|
|
161
|
+
eval_type=eval_types,
|
|
162
|
+
filter_type=filter_type,
|
|
163
|
+
deserialize=False,
|
|
164
|
+
)
|
|
130
165
|
|
|
131
166
|
return PaginatedResponse(
|
|
132
167
|
data=[EvalSchema.from_dict(eval_run) for eval_run in eval_runs], # type: ignore
|
|
@@ -178,9 +213,14 @@ def attach_routes(
|
|
|
178
213
|
async def get_eval_run(
|
|
179
214
|
eval_run_id: str,
|
|
180
215
|
db_id: Optional[str] = Query(default=None, description="The ID of the database to use"),
|
|
216
|
+
table: Optional[str] = Query(default=None, description="Table to query eval run from"),
|
|
181
217
|
) -> EvalSchema:
|
|
182
|
-
db = get_db(dbs, db_id)
|
|
183
|
-
|
|
218
|
+
db = await get_db(dbs, db_id, table)
|
|
219
|
+
if isinstance(db, AsyncBaseDb):
|
|
220
|
+
db = cast(AsyncBaseDb, db)
|
|
221
|
+
eval_run = await db.get_eval_run(eval_run_id=eval_run_id, deserialize=False)
|
|
222
|
+
else:
|
|
223
|
+
eval_run = db.get_eval_run(eval_run_id=eval_run_id, deserialize=False)
|
|
184
224
|
if not eval_run:
|
|
185
225
|
raise HTTPException(status_code=404, detail=f"Eval run with id '{eval_run_id}' not found")
|
|
186
226
|
|
|
@@ -200,10 +240,15 @@ def attach_routes(
|
|
|
200
240
|
async def delete_eval_runs(
|
|
201
241
|
request: DeleteEvalRunsRequest,
|
|
202
242
|
db_id: Optional[str] = Query(default=None, description="Database ID to use for deletion"),
|
|
243
|
+
table: Optional[str] = Query(default=None, description="Table to use for deletion"),
|
|
203
244
|
) -> None:
|
|
204
245
|
try:
|
|
205
|
-
db = get_db(dbs, db_id)
|
|
206
|
-
db
|
|
246
|
+
db = await get_db(dbs, db_id, table)
|
|
247
|
+
if isinstance(db, AsyncBaseDb):
|
|
248
|
+
db = cast(AsyncBaseDb, db)
|
|
249
|
+
await db.delete_eval_runs(eval_run_ids=request.eval_run_ids)
|
|
250
|
+
else:
|
|
251
|
+
db.delete_eval_runs(eval_run_ids=request.eval_run_ids)
|
|
207
252
|
except Exception as e:
|
|
208
253
|
raise HTTPException(status_code=500, detail=f"Failed to delete eval runs: {e}")
|
|
209
254
|
|
|
@@ -249,10 +294,15 @@ def attach_routes(
|
|
|
249
294
|
eval_run_id: str,
|
|
250
295
|
request: UpdateEvalRunRequest,
|
|
251
296
|
db_id: Optional[str] = Query(default=None, description="The ID of the database to use"),
|
|
297
|
+
table: Optional[str] = Query(default=None, description="Table to use for rename operation"),
|
|
252
298
|
) -> EvalSchema:
|
|
253
299
|
try:
|
|
254
|
-
db = get_db(dbs, db_id)
|
|
255
|
-
|
|
300
|
+
db = await get_db(dbs, db_id, table)
|
|
301
|
+
if isinstance(db, AsyncBaseDb):
|
|
302
|
+
db = cast(AsyncBaseDb, db)
|
|
303
|
+
eval_run = await db.rename_eval_run(eval_run_id=eval_run_id, name=request.name, deserialize=False)
|
|
304
|
+
else:
|
|
305
|
+
eval_run = db.rename_eval_run(eval_run_id=eval_run_id, name=request.name, deserialize=False)
|
|
256
306
|
except Exception as e:
|
|
257
307
|
raise HTTPException(status_code=500, detail=f"Failed to rename eval run: {e}")
|
|
258
308
|
|
|
@@ -268,7 +318,7 @@ def attach_routes(
|
|
|
268
318
|
operation_id="run_eval",
|
|
269
319
|
summary="Execute Evaluation",
|
|
270
320
|
description=(
|
|
271
|
-
"Run evaluation tests on agents or teams. Supports accuracy, performance, and reliability evaluations. "
|
|
321
|
+
"Run evaluation tests on agents or teams. Supports accuracy, agent-as-judge, performance, and reliability evaluations. "
|
|
272
322
|
"Requires either agent_id or team_id, but not both."
|
|
273
323
|
),
|
|
274
324
|
responses={
|
|
@@ -304,8 +354,9 @@ def attach_routes(
|
|
|
304
354
|
async def run_eval(
|
|
305
355
|
eval_run_input: EvalRunInput,
|
|
306
356
|
db_id: Optional[str] = Query(default=None, description="Database ID to use for evaluation"),
|
|
357
|
+
table: Optional[str] = Query(default=None, description="Table to use for evaluation"),
|
|
307
358
|
) -> Optional[EvalSchema]:
|
|
308
|
-
db = get_db(dbs, db_id)
|
|
359
|
+
db = await get_db(dbs, db_id, table)
|
|
309
360
|
|
|
310
361
|
if eval_run_input.agent_id and eval_run_input.team_id:
|
|
311
362
|
raise HTTPException(status_code=400, detail="Only one of agent_id or team_id must be provided")
|
|
@@ -324,10 +375,10 @@ def attach_routes(
|
|
|
324
375
|
):
|
|
325
376
|
default_model = deepcopy(agent.model)
|
|
326
377
|
if eval_run_input.model_id != agent.model.id or eval_run_input.model_provider != agent.model.provider:
|
|
327
|
-
|
|
328
|
-
|
|
329
|
-
|
|
330
|
-
)
|
|
378
|
+
model_provider = eval_run_input.model_provider.lower()
|
|
379
|
+
model_id = eval_run_input.model_id.lower()
|
|
380
|
+
model_string = f"{model_provider}:{model_id}"
|
|
381
|
+
model = get_model(model_string)
|
|
331
382
|
agent.model = model
|
|
332
383
|
|
|
333
384
|
team = None
|
|
@@ -337,6 +388,7 @@ def attach_routes(
|
|
|
337
388
|
if not team:
|
|
338
389
|
raise HTTPException(status_code=404, detail=f"Team with id '{eval_run_input.team_id}' not found")
|
|
339
390
|
|
|
391
|
+
# If model_id/model_provider specified, override team's model temporarily
|
|
340
392
|
default_model = None
|
|
341
393
|
if (
|
|
342
394
|
hasattr(team, "model")
|
|
@@ -344,13 +396,13 @@ def attach_routes(
|
|
|
344
396
|
and eval_run_input.model_id is not None
|
|
345
397
|
and eval_run_input.model_provider is not None
|
|
346
398
|
):
|
|
347
|
-
default_model = deepcopy(team.model)
|
|
399
|
+
default_model = deepcopy(team.model) # Save original
|
|
348
400
|
if eval_run_input.model_id != team.model.id or eval_run_input.model_provider != team.model.provider:
|
|
349
|
-
|
|
350
|
-
|
|
351
|
-
|
|
352
|
-
)
|
|
353
|
-
team.model = model
|
|
401
|
+
model_provider = eval_run_input.model_provider.lower()
|
|
402
|
+
model_id = eval_run_input.model_id.lower()
|
|
403
|
+
model_string = f"{model_provider}:{model_id}"
|
|
404
|
+
model = get_model(model_string)
|
|
405
|
+
team.model = model # Override temporarily
|
|
354
406
|
|
|
355
407
|
agent = None
|
|
356
408
|
|
|
@@ -363,6 +415,11 @@ def attach_routes(
|
|
|
363
415
|
eval_run_input=eval_run_input, db=db, agent=agent, team=team, default_model=default_model
|
|
364
416
|
)
|
|
365
417
|
|
|
418
|
+
elif eval_run_input.eval_type == EvalType.AGENT_AS_JUDGE:
|
|
419
|
+
return await run_agent_as_judge_eval(
|
|
420
|
+
eval_run_input=eval_run_input, db=db, agent=agent, team=team, default_model=default_model
|
|
421
|
+
)
|
|
422
|
+
|
|
366
423
|
elif eval_run_input.eval_type == EvalType.PERFORMANCE:
|
|
367
424
|
return await run_performance_eval(
|
|
368
425
|
eval_run_input=eval_run_input, db=db, agent=agent, team=team, default_model=default_model
|
|
@@ -379,8 +436,8 @@ def attach_routes(
|
|
|
379
436
|
def parse_eval_types_filter(
|
|
380
437
|
eval_types: Optional[str] = Query(
|
|
381
438
|
default=None,
|
|
382
|
-
description="Comma-separated eval types (accuracy,performance,reliability)",
|
|
383
|
-
examples=["accuracy,performance"],
|
|
439
|
+
description="Comma-separated eval types (accuracy,agent_as_judge,performance,reliability)",
|
|
440
|
+
examples=["accuracy,agent_as_judge,performance,reliability"],
|
|
384
441
|
),
|
|
385
442
|
) -> Optional[List[EvalType]]:
|
|
386
443
|
"""Parse comma-separated eval types into EvalType enums for filtering evaluation runs."""
|
agno/os/routers/evals/schemas.py
CHANGED
|
@@ -1,54 +1,64 @@
|
|
|
1
1
|
from dataclasses import asdict
|
|
2
2
|
from datetime import datetime, timezone
|
|
3
|
-
from typing import Any, Dict, List, Optional
|
|
3
|
+
from typing import Any, Dict, List, Literal, Optional
|
|
4
4
|
|
|
5
|
-
from pydantic import BaseModel
|
|
5
|
+
from pydantic import BaseModel, Field
|
|
6
6
|
|
|
7
7
|
from agno.db.schemas.evals import EvalType
|
|
8
|
-
from agno.eval import AccuracyResult, PerformanceResult, ReliabilityResult
|
|
8
|
+
from agno.eval import AccuracyResult, AgentAsJudgeResult, PerformanceResult, ReliabilityResult
|
|
9
9
|
from agno.eval.accuracy import AccuracyEval
|
|
10
|
+
from agno.eval.agent_as_judge import AgentAsJudgeEval
|
|
10
11
|
from agno.eval.performance import PerformanceEval
|
|
11
12
|
from agno.eval.reliability import ReliabilityEval
|
|
12
13
|
|
|
13
14
|
|
|
14
15
|
class EvalRunInput(BaseModel):
|
|
15
|
-
agent_id: Optional[str] = None
|
|
16
|
-
team_id: Optional[str] = None
|
|
17
|
-
|
|
18
|
-
model_id: Optional[str] = None
|
|
19
|
-
model_provider: Optional[str] = None
|
|
20
|
-
eval_type: EvalType
|
|
21
|
-
input: str
|
|
22
|
-
additional_guidelines: Optional[str] = None
|
|
23
|
-
additional_context: Optional[str] = None
|
|
24
|
-
num_iterations:
|
|
25
|
-
name: Optional[str] = None
|
|
16
|
+
agent_id: Optional[str] = Field(None, description="Agent ID to evaluate")
|
|
17
|
+
team_id: Optional[str] = Field(None, description="Team ID to evaluate")
|
|
18
|
+
|
|
19
|
+
model_id: Optional[str] = Field(None, description="Model ID to use for evaluation")
|
|
20
|
+
model_provider: Optional[str] = Field(None, description="Model provider name")
|
|
21
|
+
eval_type: EvalType = Field(..., description="Type of evaluation to run (accuracy, performance, or reliability)")
|
|
22
|
+
input: str = Field(..., description="Input text/query for the evaluation", min_length=1)
|
|
23
|
+
additional_guidelines: Optional[str] = Field(None, description="Additional guidelines for the evaluation")
|
|
24
|
+
additional_context: Optional[str] = Field(None, description="Additional context for the evaluation")
|
|
25
|
+
num_iterations: int = Field(1, description="Number of times to run the evaluation", ge=1, le=100)
|
|
26
|
+
name: Optional[str] = Field(None, description="Name for this evaluation run")
|
|
26
27
|
|
|
27
28
|
# Accuracy eval specific fields
|
|
28
|
-
expected_output: Optional[str] = None
|
|
29
|
+
expected_output: Optional[str] = Field(None, description="Expected output for accuracy evaluation")
|
|
30
|
+
|
|
31
|
+
# AgentAsJudge eval specific fields
|
|
32
|
+
criteria: Optional[str] = Field(None, description="Evaluation criteria for agent-as-judge evaluation")
|
|
33
|
+
scoring_strategy: Optional[Literal["numeric", "binary"]] = Field(
|
|
34
|
+
"binary", description="Scoring strategy: 'numeric' (1-10 with threshold) or 'binary' (PASS/FAIL)"
|
|
35
|
+
)
|
|
36
|
+
threshold: Optional[int] = Field(
|
|
37
|
+
7, description="Score threshold for pass/fail (1-10), only used with numeric scoring", ge=1, le=10
|
|
38
|
+
)
|
|
29
39
|
|
|
30
40
|
# Performance eval specific fields
|
|
31
|
-
warmup_runs:
|
|
41
|
+
warmup_runs: int = Field(0, description="Number of warmup runs before measuring performance", ge=0, le=10)
|
|
32
42
|
|
|
33
43
|
# Reliability eval specific fields
|
|
34
|
-
expected_tool_calls: Optional[List[str]] = None
|
|
44
|
+
expected_tool_calls: Optional[List[str]] = Field(None, description="Expected tool calls for reliability evaluation")
|
|
35
45
|
|
|
36
46
|
|
|
37
47
|
class EvalSchema(BaseModel):
|
|
38
|
-
id: str
|
|
39
|
-
|
|
40
|
-
agent_id: Optional[str] = None
|
|
41
|
-
model_id: Optional[str] = None
|
|
42
|
-
model_provider: Optional[str] = None
|
|
43
|
-
team_id: Optional[str] = None
|
|
44
|
-
workflow_id: Optional[str] = None
|
|
45
|
-
name: Optional[str] = None
|
|
46
|
-
evaluated_component_name: Optional[str] = None
|
|
47
|
-
eval_type: EvalType
|
|
48
|
-
eval_data: Dict[str, Any]
|
|
49
|
-
eval_input: Optional[Dict[str, Any]] = None
|
|
50
|
-
created_at: Optional[datetime] = None
|
|
51
|
-
updated_at: Optional[datetime] = None
|
|
48
|
+
id: str = Field(..., description="Unique identifier for the evaluation run")
|
|
49
|
+
|
|
50
|
+
agent_id: Optional[str] = Field(None, description="Agent ID that was evaluated")
|
|
51
|
+
model_id: Optional[str] = Field(None, description="Model ID used in evaluation")
|
|
52
|
+
model_provider: Optional[str] = Field(None, description="Model provider name")
|
|
53
|
+
team_id: Optional[str] = Field(None, description="Team ID that was evaluated")
|
|
54
|
+
workflow_id: Optional[str] = Field(None, description="Workflow ID that was evaluated")
|
|
55
|
+
name: Optional[str] = Field(None, description="Name of the evaluation run")
|
|
56
|
+
evaluated_component_name: Optional[str] = Field(None, description="Name of the evaluated component")
|
|
57
|
+
eval_type: EvalType = Field(..., description="Type of evaluation (accuracy, performance, or reliability)")
|
|
58
|
+
eval_data: Dict[str, Any] = Field(..., description="Evaluation results and metrics")
|
|
59
|
+
eval_input: Optional[Dict[str, Any]] = Field(None, description="Input parameters used for the evaluation")
|
|
60
|
+
created_at: Optional[datetime] = Field(None, description="Timestamp when evaluation was created")
|
|
61
|
+
updated_at: Optional[datetime] = Field(None, description="Timestamp when evaluation was last updated")
|
|
52
62
|
|
|
53
63
|
@classmethod
|
|
54
64
|
def from_dict(cls, eval_run: Dict[str, Any]) -> "EvalSchema":
|
|
@@ -89,6 +99,28 @@ class EvalSchema(BaseModel):
|
|
|
89
99
|
eval_data=asdict(result),
|
|
90
100
|
)
|
|
91
101
|
|
|
102
|
+
@classmethod
|
|
103
|
+
def from_agent_as_judge_eval(
|
|
104
|
+
cls,
|
|
105
|
+
agent_as_judge_eval: AgentAsJudgeEval,
|
|
106
|
+
result: AgentAsJudgeResult,
|
|
107
|
+
model_id: Optional[str] = None,
|
|
108
|
+
model_provider: Optional[str] = None,
|
|
109
|
+
agent_id: Optional[str] = None,
|
|
110
|
+
team_id: Optional[str] = None,
|
|
111
|
+
) -> "EvalSchema":
|
|
112
|
+
return cls(
|
|
113
|
+
id=result.run_id,
|
|
114
|
+
name=agent_as_judge_eval.name,
|
|
115
|
+
agent_id=agent_id,
|
|
116
|
+
team_id=team_id,
|
|
117
|
+
workflow_id=None,
|
|
118
|
+
model_id=model_id,
|
|
119
|
+
model_provider=model_provider,
|
|
120
|
+
eval_type=EvalType.AGENT_AS_JUDGE,
|
|
121
|
+
eval_data=asdict(result),
|
|
122
|
+
)
|
|
123
|
+
|
|
92
124
|
@classmethod
|
|
93
125
|
def from_performance_eval(
|
|
94
126
|
cls,
|
|
@@ -135,8 +167,8 @@ class EvalSchema(BaseModel):
|
|
|
135
167
|
|
|
136
168
|
|
|
137
169
|
class DeleteEvalRunsRequest(BaseModel):
|
|
138
|
-
eval_run_ids: List[str]
|
|
170
|
+
eval_run_ids: List[str] = Field(..., description="List of evaluation run IDs to delete", min_length=1)
|
|
139
171
|
|
|
140
172
|
|
|
141
173
|
class UpdateEvalRunRequest(BaseModel):
|
|
142
|
-
name: str
|
|
174
|
+
name: str = Field(..., description="New name for the evaluation run", min_length=1, max_length=255)
|