agno 2.1.2__py3-none-any.whl → 2.3.13__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- agno/agent/agent.py +5540 -2273
- agno/api/api.py +2 -0
- agno/api/os.py +1 -1
- agno/compression/__init__.py +3 -0
- agno/compression/manager.py +247 -0
- agno/culture/__init__.py +3 -0
- agno/culture/manager.py +956 -0
- agno/db/async_postgres/__init__.py +3 -0
- agno/db/base.py +689 -6
- agno/db/dynamo/dynamo.py +933 -37
- agno/db/dynamo/schemas.py +174 -10
- agno/db/dynamo/utils.py +63 -4
- agno/db/firestore/firestore.py +831 -9
- agno/db/firestore/schemas.py +51 -0
- agno/db/firestore/utils.py +102 -4
- agno/db/gcs_json/gcs_json_db.py +660 -12
- agno/db/gcs_json/utils.py +60 -26
- agno/db/in_memory/in_memory_db.py +287 -14
- agno/db/in_memory/utils.py +60 -2
- agno/db/json/json_db.py +590 -14
- agno/db/json/utils.py +60 -26
- agno/db/migrations/manager.py +199 -0
- agno/db/migrations/v1_to_v2.py +43 -13
- agno/db/migrations/versions/__init__.py +0 -0
- agno/db/migrations/versions/v2_3_0.py +938 -0
- agno/db/mongo/__init__.py +15 -1
- agno/db/mongo/async_mongo.py +2760 -0
- agno/db/mongo/mongo.py +879 -11
- agno/db/mongo/schemas.py +42 -0
- agno/db/mongo/utils.py +80 -8
- agno/db/mysql/__init__.py +2 -1
- agno/db/mysql/async_mysql.py +2912 -0
- agno/db/mysql/mysql.py +946 -68
- agno/db/mysql/schemas.py +72 -10
- agno/db/mysql/utils.py +198 -7
- agno/db/postgres/__init__.py +2 -1
- agno/db/postgres/async_postgres.py +2579 -0
- agno/db/postgres/postgres.py +942 -57
- agno/db/postgres/schemas.py +81 -18
- agno/db/postgres/utils.py +164 -2
- agno/db/redis/redis.py +671 -7
- agno/db/redis/schemas.py +50 -0
- agno/db/redis/utils.py +65 -7
- agno/db/schemas/__init__.py +2 -1
- agno/db/schemas/culture.py +120 -0
- agno/db/schemas/evals.py +1 -0
- agno/db/schemas/memory.py +17 -2
- agno/db/singlestore/schemas.py +63 -0
- agno/db/singlestore/singlestore.py +949 -83
- agno/db/singlestore/utils.py +60 -2
- agno/db/sqlite/__init__.py +2 -1
- agno/db/sqlite/async_sqlite.py +2911 -0
- agno/db/sqlite/schemas.py +62 -0
- agno/db/sqlite/sqlite.py +965 -46
- agno/db/sqlite/utils.py +169 -8
- agno/db/surrealdb/__init__.py +3 -0
- agno/db/surrealdb/metrics.py +292 -0
- agno/db/surrealdb/models.py +334 -0
- agno/db/surrealdb/queries.py +71 -0
- agno/db/surrealdb/surrealdb.py +1908 -0
- agno/db/surrealdb/utils.py +147 -0
- agno/db/utils.py +2 -0
- agno/eval/__init__.py +10 -0
- agno/eval/accuracy.py +75 -55
- agno/eval/agent_as_judge.py +861 -0
- agno/eval/base.py +29 -0
- agno/eval/performance.py +16 -7
- agno/eval/reliability.py +28 -16
- agno/eval/utils.py +35 -17
- agno/exceptions.py +27 -2
- agno/filters.py +354 -0
- agno/guardrails/prompt_injection.py +1 -0
- agno/hooks/__init__.py +3 -0
- agno/hooks/decorator.py +164 -0
- agno/integrations/discord/client.py +1 -1
- agno/knowledge/chunking/agentic.py +13 -10
- agno/knowledge/chunking/fixed.py +4 -1
- agno/knowledge/chunking/semantic.py +9 -4
- agno/knowledge/chunking/strategy.py +59 -15
- agno/knowledge/embedder/fastembed.py +1 -1
- agno/knowledge/embedder/nebius.py +1 -1
- agno/knowledge/embedder/ollama.py +8 -0
- agno/knowledge/embedder/openai.py +8 -8
- agno/knowledge/embedder/sentence_transformer.py +6 -2
- agno/knowledge/embedder/vllm.py +262 -0
- agno/knowledge/knowledge.py +1618 -318
- agno/knowledge/reader/base.py +6 -2
- agno/knowledge/reader/csv_reader.py +8 -10
- agno/knowledge/reader/docx_reader.py +5 -6
- agno/knowledge/reader/field_labeled_csv_reader.py +16 -20
- agno/knowledge/reader/json_reader.py +5 -4
- agno/knowledge/reader/markdown_reader.py +8 -8
- agno/knowledge/reader/pdf_reader.py +17 -19
- agno/knowledge/reader/pptx_reader.py +101 -0
- agno/knowledge/reader/reader_factory.py +32 -3
- agno/knowledge/reader/s3_reader.py +3 -3
- agno/knowledge/reader/tavily_reader.py +193 -0
- agno/knowledge/reader/text_reader.py +22 -10
- agno/knowledge/reader/web_search_reader.py +1 -48
- agno/knowledge/reader/website_reader.py +10 -10
- agno/knowledge/reader/wikipedia_reader.py +33 -1
- agno/knowledge/types.py +1 -0
- agno/knowledge/utils.py +72 -7
- agno/media.py +22 -6
- agno/memory/__init__.py +14 -1
- agno/memory/manager.py +544 -83
- agno/memory/strategies/__init__.py +15 -0
- agno/memory/strategies/base.py +66 -0
- agno/memory/strategies/summarize.py +196 -0
- agno/memory/strategies/types.py +37 -0
- agno/models/aimlapi/aimlapi.py +17 -0
- agno/models/anthropic/claude.py +515 -40
- agno/models/aws/bedrock.py +102 -21
- agno/models/aws/claude.py +131 -274
- agno/models/azure/ai_foundry.py +41 -19
- agno/models/azure/openai_chat.py +39 -8
- agno/models/base.py +1249 -525
- agno/models/cerebras/cerebras.py +91 -21
- agno/models/cerebras/cerebras_openai.py +21 -2
- agno/models/cohere/chat.py +40 -6
- agno/models/cometapi/cometapi.py +18 -1
- agno/models/dashscope/dashscope.py +2 -3
- agno/models/deepinfra/deepinfra.py +18 -1
- agno/models/deepseek/deepseek.py +69 -3
- agno/models/fireworks/fireworks.py +18 -1
- agno/models/google/gemini.py +877 -80
- agno/models/google/utils.py +22 -0
- agno/models/groq/groq.py +51 -18
- agno/models/huggingface/huggingface.py +17 -6
- agno/models/ibm/watsonx.py +16 -6
- agno/models/internlm/internlm.py +18 -1
- agno/models/langdb/langdb.py +13 -1
- agno/models/litellm/chat.py +44 -9
- agno/models/litellm/litellm_openai.py +18 -1
- agno/models/message.py +28 -5
- agno/models/meta/llama.py +47 -14
- agno/models/meta/llama_openai.py +22 -17
- agno/models/mistral/mistral.py +8 -4
- agno/models/nebius/nebius.py +6 -7
- agno/models/nvidia/nvidia.py +20 -3
- agno/models/ollama/chat.py +24 -8
- agno/models/openai/chat.py +104 -29
- agno/models/openai/responses.py +101 -81
- agno/models/openrouter/openrouter.py +60 -3
- agno/models/perplexity/perplexity.py +17 -1
- agno/models/portkey/portkey.py +7 -6
- agno/models/requesty/requesty.py +24 -4
- agno/models/response.py +73 -2
- agno/models/sambanova/sambanova.py +20 -3
- agno/models/siliconflow/siliconflow.py +19 -2
- agno/models/together/together.py +20 -3
- agno/models/utils.py +254 -8
- agno/models/vercel/v0.py +20 -3
- agno/models/vertexai/__init__.py +0 -0
- agno/models/vertexai/claude.py +190 -0
- agno/models/vllm/vllm.py +19 -14
- agno/models/xai/xai.py +19 -2
- agno/os/app.py +549 -152
- agno/os/auth.py +190 -3
- agno/os/config.py +23 -0
- agno/os/interfaces/a2a/router.py +8 -11
- agno/os/interfaces/a2a/utils.py +1 -1
- agno/os/interfaces/agui/router.py +18 -3
- agno/os/interfaces/agui/utils.py +152 -39
- agno/os/interfaces/slack/router.py +55 -37
- agno/os/interfaces/slack/slack.py +9 -1
- agno/os/interfaces/whatsapp/router.py +0 -1
- agno/os/interfaces/whatsapp/security.py +3 -1
- agno/os/mcp.py +110 -52
- agno/os/middleware/__init__.py +2 -0
- agno/os/middleware/jwt.py +676 -112
- agno/os/router.py +40 -1478
- agno/os/routers/agents/__init__.py +3 -0
- agno/os/routers/agents/router.py +599 -0
- agno/os/routers/agents/schema.py +261 -0
- agno/os/routers/evals/evals.py +96 -39
- agno/os/routers/evals/schemas.py +65 -33
- agno/os/routers/evals/utils.py +80 -10
- agno/os/routers/health.py +10 -4
- agno/os/routers/knowledge/knowledge.py +196 -38
- agno/os/routers/knowledge/schemas.py +82 -22
- agno/os/routers/memory/memory.py +279 -52
- agno/os/routers/memory/schemas.py +46 -17
- agno/os/routers/metrics/metrics.py +20 -8
- agno/os/routers/metrics/schemas.py +16 -16
- agno/os/routers/session/session.py +462 -34
- agno/os/routers/teams/__init__.py +3 -0
- agno/os/routers/teams/router.py +512 -0
- agno/os/routers/teams/schema.py +257 -0
- agno/os/routers/traces/__init__.py +3 -0
- agno/os/routers/traces/schemas.py +414 -0
- agno/os/routers/traces/traces.py +499 -0
- agno/os/routers/workflows/__init__.py +3 -0
- agno/os/routers/workflows/router.py +624 -0
- agno/os/routers/workflows/schema.py +75 -0
- agno/os/schema.py +256 -693
- agno/os/scopes.py +469 -0
- agno/os/utils.py +514 -36
- agno/reasoning/anthropic.py +80 -0
- agno/reasoning/gemini.py +73 -0
- agno/reasoning/openai.py +5 -0
- agno/reasoning/vertexai.py +76 -0
- agno/run/__init__.py +6 -0
- agno/run/agent.py +155 -32
- agno/run/base.py +55 -3
- agno/run/requirement.py +181 -0
- agno/run/team.py +125 -38
- agno/run/workflow.py +72 -18
- agno/session/agent.py +102 -89
- agno/session/summary.py +56 -15
- agno/session/team.py +164 -90
- agno/session/workflow.py +405 -40
- agno/table.py +10 -0
- agno/team/team.py +3974 -1903
- agno/tools/dalle.py +2 -4
- agno/tools/eleven_labs.py +23 -25
- agno/tools/exa.py +21 -16
- agno/tools/file.py +153 -23
- agno/tools/file_generation.py +16 -10
- agno/tools/firecrawl.py +15 -7
- agno/tools/function.py +193 -38
- agno/tools/gmail.py +238 -14
- agno/tools/google_drive.py +271 -0
- agno/tools/googlecalendar.py +36 -8
- agno/tools/googlesheets.py +20 -5
- agno/tools/jira.py +20 -0
- agno/tools/mcp/__init__.py +10 -0
- agno/tools/mcp/mcp.py +331 -0
- agno/tools/mcp/multi_mcp.py +347 -0
- agno/tools/mcp/params.py +24 -0
- agno/tools/mcp_toolbox.py +3 -3
- agno/tools/models/nebius.py +5 -5
- agno/tools/models_labs.py +20 -10
- agno/tools/nano_banana.py +151 -0
- agno/tools/notion.py +204 -0
- agno/tools/parallel.py +314 -0
- agno/tools/postgres.py +76 -36
- agno/tools/redshift.py +406 -0
- agno/tools/scrapegraph.py +1 -1
- agno/tools/shopify.py +1519 -0
- agno/tools/slack.py +18 -3
- agno/tools/spotify.py +919 -0
- agno/tools/tavily.py +146 -0
- agno/tools/toolkit.py +25 -0
- agno/tools/workflow.py +8 -1
- agno/tools/yfinance.py +12 -11
- agno/tracing/__init__.py +12 -0
- agno/tracing/exporter.py +157 -0
- agno/tracing/schemas.py +276 -0
- agno/tracing/setup.py +111 -0
- agno/utils/agent.py +938 -0
- agno/utils/cryptography.py +22 -0
- agno/utils/dttm.py +33 -0
- agno/utils/events.py +151 -3
- agno/utils/gemini.py +15 -5
- agno/utils/hooks.py +118 -4
- agno/utils/http.py +113 -2
- agno/utils/knowledge.py +12 -5
- agno/utils/log.py +1 -0
- agno/utils/mcp.py +92 -2
- agno/utils/media.py +187 -1
- agno/utils/merge_dict.py +3 -3
- agno/utils/message.py +60 -0
- agno/utils/models/ai_foundry.py +9 -2
- agno/utils/models/claude.py +49 -14
- agno/utils/models/cohere.py +9 -2
- agno/utils/models/llama.py +9 -2
- agno/utils/models/mistral.py +4 -2
- agno/utils/print_response/agent.py +109 -16
- agno/utils/print_response/team.py +223 -30
- agno/utils/print_response/workflow.py +251 -34
- agno/utils/streamlit.py +1 -1
- agno/utils/team.py +98 -9
- agno/utils/tokens.py +657 -0
- agno/vectordb/base.py +39 -7
- agno/vectordb/cassandra/cassandra.py +21 -5
- agno/vectordb/chroma/chromadb.py +43 -12
- agno/vectordb/clickhouse/clickhousedb.py +21 -5
- agno/vectordb/couchbase/couchbase.py +29 -5
- agno/vectordb/lancedb/lance_db.py +92 -181
- agno/vectordb/langchaindb/langchaindb.py +24 -4
- agno/vectordb/lightrag/lightrag.py +17 -3
- agno/vectordb/llamaindex/llamaindexdb.py +25 -5
- agno/vectordb/milvus/milvus.py +50 -37
- agno/vectordb/mongodb/__init__.py +7 -1
- agno/vectordb/mongodb/mongodb.py +36 -30
- agno/vectordb/pgvector/pgvector.py +201 -77
- agno/vectordb/pineconedb/pineconedb.py +41 -23
- agno/vectordb/qdrant/qdrant.py +67 -54
- agno/vectordb/redis/__init__.py +9 -0
- agno/vectordb/redis/redisdb.py +682 -0
- agno/vectordb/singlestore/singlestore.py +50 -29
- agno/vectordb/surrealdb/surrealdb.py +31 -41
- agno/vectordb/upstashdb/upstashdb.py +34 -6
- agno/vectordb/weaviate/weaviate.py +53 -14
- agno/workflow/__init__.py +2 -0
- agno/workflow/agent.py +299 -0
- agno/workflow/condition.py +120 -18
- agno/workflow/loop.py +77 -10
- agno/workflow/parallel.py +231 -143
- agno/workflow/router.py +118 -17
- agno/workflow/step.py +609 -170
- agno/workflow/steps.py +73 -6
- agno/workflow/types.py +96 -21
- agno/workflow/workflow.py +2039 -262
- {agno-2.1.2.dist-info → agno-2.3.13.dist-info}/METADATA +201 -66
- agno-2.3.13.dist-info/RECORD +613 -0
- agno/tools/googlesearch.py +0 -98
- agno/tools/mcp.py +0 -679
- agno/tools/memori.py +0 -339
- agno-2.1.2.dist-info/RECORD +0 -543
- {agno-2.1.2.dist-info → agno-2.3.13.dist-info}/WHEEL +0 -0
- {agno-2.1.2.dist-info → agno-2.3.13.dist-info}/licenses/LICENSE +0 -0
- {agno-2.1.2.dist-info → agno-2.3.13.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,147 @@
|
|
|
1
|
+
import dataclasses
|
|
2
|
+
from typing import Any, Dict, Optional, Sequence, TypeVar, Union, cast
|
|
3
|
+
|
|
4
|
+
from surrealdb import BlockingHttpSurrealConnection, BlockingWsSurrealConnection, Surreal
|
|
5
|
+
|
|
6
|
+
from agno.db.schemas.culture import CulturalKnowledge
|
|
7
|
+
from agno.utils.log import logger
|
|
8
|
+
|
|
9
|
+
RecordType = TypeVar("RecordType")
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
def build_client(
|
|
13
|
+
url: str, creds: dict[str, str], ns: str, db: str
|
|
14
|
+
) -> Union[BlockingWsSurrealConnection, BlockingHttpSurrealConnection]:
|
|
15
|
+
client = Surreal(url=url)
|
|
16
|
+
client.signin(creds)
|
|
17
|
+
client.use(namespace=ns, database=db)
|
|
18
|
+
return client
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
def _query_aux(
|
|
22
|
+
client: Union[BlockingWsSurrealConnection, BlockingHttpSurrealConnection],
|
|
23
|
+
query: str,
|
|
24
|
+
vars: dict[str, Any],
|
|
25
|
+
) -> Union[list, dict, str, int]:
|
|
26
|
+
try:
|
|
27
|
+
response = client.query(query, vars)
|
|
28
|
+
except Exception as e:
|
|
29
|
+
msg = f"!! Query execution error: {query} with {vars}, Error: {e}"
|
|
30
|
+
logger.error(msg)
|
|
31
|
+
raise RuntimeError(msg)
|
|
32
|
+
return response
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
def query(
|
|
36
|
+
client: Union[BlockingWsSurrealConnection, BlockingHttpSurrealConnection],
|
|
37
|
+
query: str,
|
|
38
|
+
vars: dict[str, Any],
|
|
39
|
+
record_type: type[RecordType],
|
|
40
|
+
) -> Sequence[RecordType]:
|
|
41
|
+
response = _query_aux(client, query, vars)
|
|
42
|
+
if isinstance(response, list):
|
|
43
|
+
if dataclasses.is_dataclass(record_type) and hasattr(record_type, "from_dict"):
|
|
44
|
+
return [getattr(record_type, "from_dict").__call__(x) for x in response]
|
|
45
|
+
else:
|
|
46
|
+
result: list[RecordType] = []
|
|
47
|
+
for x in response:
|
|
48
|
+
if isinstance(x, dict):
|
|
49
|
+
result.append(record_type(**x))
|
|
50
|
+
else:
|
|
51
|
+
result.append(record_type.__call__(x))
|
|
52
|
+
return result
|
|
53
|
+
else:
|
|
54
|
+
raise ValueError(f"Unexpected response type: {type(response)}")
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
def query_one(
|
|
58
|
+
client: Union[BlockingWsSurrealConnection, BlockingHttpSurrealConnection],
|
|
59
|
+
query: str,
|
|
60
|
+
vars: dict[str, Any],
|
|
61
|
+
record_type: type[RecordType],
|
|
62
|
+
) -> Optional[RecordType]:
|
|
63
|
+
response = _query_aux(client, query, vars)
|
|
64
|
+
if response is None:
|
|
65
|
+
return None
|
|
66
|
+
elif not isinstance(response, list):
|
|
67
|
+
if dataclasses.is_dataclass(record_type) and hasattr(record_type, "from_dict"):
|
|
68
|
+
return getattr(record_type, "from_dict").__call__(response)
|
|
69
|
+
elif isinstance(response, dict):
|
|
70
|
+
return record_type(**response)
|
|
71
|
+
else:
|
|
72
|
+
return record_type.__call__(response)
|
|
73
|
+
elif isinstance(response, list):
|
|
74
|
+
# Handle list responses - SurrealDB might return a list with a single element
|
|
75
|
+
if len(response) == 1 and isinstance(response[0], dict):
|
|
76
|
+
result = response[0]
|
|
77
|
+
if dataclasses.is_dataclass(record_type) and hasattr(record_type, "from_dict"):
|
|
78
|
+
return getattr(record_type, "from_dict").__call__(result)
|
|
79
|
+
elif record_type is dict:
|
|
80
|
+
return cast(RecordType, result)
|
|
81
|
+
else:
|
|
82
|
+
return record_type(**result)
|
|
83
|
+
elif len(response) == 0:
|
|
84
|
+
return None
|
|
85
|
+
else:
|
|
86
|
+
raise ValueError(f"Expected single record, got {len(response)} records: {response}")
|
|
87
|
+
else:
|
|
88
|
+
raise ValueError(f"Unexpected response type: {type(response)}")
|
|
89
|
+
|
|
90
|
+
|
|
91
|
+
# -- Cultural Knowledge util methods --
|
|
92
|
+
|
|
93
|
+
|
|
94
|
+
def serialize_cultural_knowledge_for_db(cultural_knowledge: CulturalKnowledge) -> Dict[str, Any]:
|
|
95
|
+
"""Serialize a CulturalKnowledge object for database storage.
|
|
96
|
+
|
|
97
|
+
Converts the model's separate content, categories, and notes fields
|
|
98
|
+
into a single dict for the database content field.
|
|
99
|
+
|
|
100
|
+
Args:
|
|
101
|
+
cultural_knowledge (CulturalKnowledge): The cultural knowledge object to serialize.
|
|
102
|
+
|
|
103
|
+
Returns:
|
|
104
|
+
Dict[str, Any]: A dictionary with content, categories, and notes.
|
|
105
|
+
"""
|
|
106
|
+
content_dict: Dict[str, Any] = {}
|
|
107
|
+
if cultural_knowledge.content is not None:
|
|
108
|
+
content_dict["content"] = cultural_knowledge.content
|
|
109
|
+
if cultural_knowledge.categories is not None:
|
|
110
|
+
content_dict["categories"] = cultural_knowledge.categories
|
|
111
|
+
if cultural_knowledge.notes is not None:
|
|
112
|
+
content_dict["notes"] = cultural_knowledge.notes
|
|
113
|
+
|
|
114
|
+
return content_dict if content_dict else {}
|
|
115
|
+
|
|
116
|
+
|
|
117
|
+
def deserialize_cultural_knowledge_from_db(db_row: Dict[str, Any]) -> CulturalKnowledge:
|
|
118
|
+
"""Deserialize a database row to a CulturalKnowledge object.
|
|
119
|
+
|
|
120
|
+
The database stores content as a dict containing content, categories, and notes.
|
|
121
|
+
This method extracts those fields and converts them back to the model format.
|
|
122
|
+
|
|
123
|
+
Args:
|
|
124
|
+
db_row (Dict[str, Any]): The database row as a dictionary.
|
|
125
|
+
|
|
126
|
+
Returns:
|
|
127
|
+
CulturalKnowledge: The cultural knowledge object.
|
|
128
|
+
"""
|
|
129
|
+
# Extract content, categories, and notes from the content field
|
|
130
|
+
content_json = db_row.get("content", {}) or {}
|
|
131
|
+
|
|
132
|
+
return CulturalKnowledge.from_dict(
|
|
133
|
+
{
|
|
134
|
+
"id": db_row.get("id"),
|
|
135
|
+
"name": db_row.get("name"),
|
|
136
|
+
"summary": db_row.get("summary"),
|
|
137
|
+
"content": content_json.get("content"),
|
|
138
|
+
"categories": content_json.get("categories"),
|
|
139
|
+
"notes": content_json.get("notes"),
|
|
140
|
+
"metadata": db_row.get("metadata"),
|
|
141
|
+
"input": db_row.get("input"),
|
|
142
|
+
"created_at": db_row.get("created_at"),
|
|
143
|
+
"updated_at": db_row.get("updated_at"),
|
|
144
|
+
"agent_id": db_row.get("agent_id"),
|
|
145
|
+
"team_id": db_row.get("team_id"),
|
|
146
|
+
}
|
|
147
|
+
)
|
agno/db/utils.py
CHANGED
agno/eval/__init__.py
CHANGED
|
@@ -1,4 +1,10 @@
|
|
|
1
1
|
from agno.eval.accuracy import AccuracyAgentResponse, AccuracyEval, AccuracyEvaluation, AccuracyResult
|
|
2
|
+
from agno.eval.agent_as_judge import (
|
|
3
|
+
AgentAsJudgeEval,
|
|
4
|
+
AgentAsJudgeEvaluation,
|
|
5
|
+
AgentAsJudgeResult,
|
|
6
|
+
)
|
|
7
|
+
from agno.eval.base import BaseEval
|
|
2
8
|
from agno.eval.performance import PerformanceEval, PerformanceResult
|
|
3
9
|
from agno.eval.reliability import ReliabilityEval, ReliabilityResult
|
|
4
10
|
|
|
@@ -7,6 +13,10 @@ __all__ = [
|
|
|
7
13
|
"AccuracyEvaluation",
|
|
8
14
|
"AccuracyResult",
|
|
9
15
|
"AccuracyEval",
|
|
16
|
+
"AgentAsJudgeEval",
|
|
17
|
+
"AgentAsJudgeEvaluation",
|
|
18
|
+
"AgentAsJudgeResult",
|
|
19
|
+
"BaseEval",
|
|
10
20
|
"PerformanceEval",
|
|
11
21
|
"PerformanceResult",
|
|
12
22
|
"ReliabilityEval",
|
agno/eval/accuracy.py
CHANGED
|
@@ -7,13 +7,13 @@ from uuid import uuid4
|
|
|
7
7
|
from pydantic import BaseModel, Field
|
|
8
8
|
|
|
9
9
|
from agno.agent import Agent
|
|
10
|
-
from agno.db.base import BaseDb
|
|
10
|
+
from agno.db.base import AsyncBaseDb, BaseDb
|
|
11
11
|
from agno.db.schemas.evals import EvalType
|
|
12
12
|
from agno.eval.utils import async_log_eval, log_eval_run, store_result_in_file
|
|
13
13
|
from agno.exceptions import EvalError
|
|
14
14
|
from agno.models.base import Model
|
|
15
15
|
from agno.team.team import Team
|
|
16
|
-
from agno.utils.log import logger, set_log_level_to_debug, set_log_level_to_info
|
|
16
|
+
from agno.utils.log import log_error, logger, set_log_level_to_debug, set_log_level_to_info
|
|
17
17
|
|
|
18
18
|
if TYPE_CHECKING:
|
|
19
19
|
from rich.console import Console
|
|
@@ -176,7 +176,7 @@ class AccuracyEval:
|
|
|
176
176
|
# Enable debug logs
|
|
177
177
|
debug_mode: bool = getenv("AGNO_DEBUG", "false").lower() == "true"
|
|
178
178
|
# The database to store Evaluation results
|
|
179
|
-
db: Optional[BaseDb] = None
|
|
179
|
+
db: Optional[Union[BaseDb, AsyncBaseDb]] = None
|
|
180
180
|
|
|
181
181
|
# Telemetry settings
|
|
182
182
|
# telemetry=True logs minimal telemetry for analytics
|
|
@@ -282,7 +282,8 @@ Remember: You must only compare the agent_output to the expected_output. The exp
|
|
|
282
282
|
) -> Optional[AccuracyEvaluation]:
|
|
283
283
|
"""Orchestrate the evaluation process."""
|
|
284
284
|
try:
|
|
285
|
-
|
|
285
|
+
response = evaluator_agent.run(evaluation_input, stream=False)
|
|
286
|
+
accuracy_agent_response = response.content
|
|
286
287
|
if accuracy_agent_response is None or not isinstance(accuracy_agent_response, AccuracyAgentResponse):
|
|
287
288
|
raise EvalError(f"Evaluator Agent returned an invalid response: {accuracy_agent_response}")
|
|
288
289
|
return AccuracyEvaluation(
|
|
@@ -306,7 +307,7 @@ Remember: You must only compare the agent_output to the expected_output. The exp
|
|
|
306
307
|
) -> Optional[AccuracyEvaluation]:
|
|
307
308
|
"""Orchestrate the evaluation process asynchronously."""
|
|
308
309
|
try:
|
|
309
|
-
response = await evaluator_agent.arun(evaluation_input)
|
|
310
|
+
response = await evaluator_agent.arun(evaluation_input, stream=False)
|
|
310
311
|
accuracy_agent_response = response.content
|
|
311
312
|
if accuracy_agent_response is None or not isinstance(accuracy_agent_response, AccuracyAgentResponse):
|
|
312
313
|
raise EvalError(f"Evaluator Agent returned an invalid response: {accuracy_agent_response}")
|
|
@@ -327,6 +328,9 @@ Remember: You must only compare the agent_output to the expected_output. The exp
|
|
|
327
328
|
print_summary: bool = True,
|
|
328
329
|
print_results: bool = True,
|
|
329
330
|
) -> Optional[AccuracyResult]:
|
|
331
|
+
if isinstance(self.db, AsyncBaseDb):
|
|
332
|
+
raise ValueError("run() is not supported with an async DB. Please use arun() instead.")
|
|
333
|
+
|
|
330
334
|
if self.agent is None and self.team is None:
|
|
331
335
|
logger.error("You need to provide one of 'agent' or 'team' to run the evaluation.")
|
|
332
336
|
return None
|
|
@@ -356,10 +360,14 @@ Remember: You must only compare the agent_output to the expected_output. The exp
|
|
|
356
360
|
status = Status(f"Running evaluation {i + 1}...", spinner="dots", speed=1.0, refresh_per_second=10)
|
|
357
361
|
live_log.update(status)
|
|
358
362
|
|
|
363
|
+
agent_session_id = f"eval_{self.eval_id}_{i + 1}"
|
|
364
|
+
|
|
359
365
|
if self.agent is not None:
|
|
360
|
-
|
|
366
|
+
agent_response = self.agent.run(input=eval_input, session_id=agent_session_id, stream=False)
|
|
367
|
+
output = agent_response.content
|
|
361
368
|
elif self.team is not None:
|
|
362
|
-
|
|
369
|
+
team_response = self.team.run(input=eval_input, session_id=agent_session_id, stream=False)
|
|
370
|
+
output = team_response.content
|
|
363
371
|
|
|
364
372
|
if not output:
|
|
365
373
|
logger.error(f"Failed to generate a valid answer on iteration {i + 1}: {output}")
|
|
@@ -497,12 +505,14 @@ Remember: You must only compare the agent_output to the expected_output. The exp
|
|
|
497
505
|
status = Status(f"Running evaluation {i + 1}...", spinner="dots", speed=1.0, refresh_per_second=10)
|
|
498
506
|
live_log.update(status)
|
|
499
507
|
|
|
508
|
+
agent_session_id = f"eval_{self.eval_id}_{i + 1}"
|
|
509
|
+
|
|
500
510
|
if self.agent is not None:
|
|
501
|
-
|
|
502
|
-
output =
|
|
511
|
+
agent_response = await self.agent.arun(input=eval_input, session_id=agent_session_id, stream=False)
|
|
512
|
+
output = agent_response.content
|
|
503
513
|
elif self.team is not None:
|
|
504
|
-
|
|
505
|
-
output =
|
|
514
|
+
team_response = await self.team.arun(input=eval_input, session_id=agent_session_id, stream=False)
|
|
515
|
+
output = team_response.content
|
|
506
516
|
|
|
507
517
|
if not output:
|
|
508
518
|
logger.error(f"Failed to generate a valid answer on iteration {i + 1}: {output}")
|
|
@@ -609,11 +619,14 @@ Remember: You must only compare the agent_output to the expected_output. The exp
|
|
|
609
619
|
print_results: bool = True,
|
|
610
620
|
) -> Optional[AccuracyResult]:
|
|
611
621
|
"""Run the evaluation logic against the given answer, instead of generating an answer with the Agent"""
|
|
622
|
+
# Generate unique run_id for this execution (don't modify self.eval_id due to concurrency)
|
|
623
|
+
run_id = str(uuid4())
|
|
624
|
+
|
|
612
625
|
set_log_level_to_debug() if self.debug_mode else set_log_level_to_info()
|
|
613
626
|
|
|
614
627
|
self.result = AccuracyResult()
|
|
615
628
|
|
|
616
|
-
logger.debug(f"************ Evaluation Start: {
|
|
629
|
+
logger.debug(f"************ Evaluation Start: {run_id} ************")
|
|
617
630
|
|
|
618
631
|
evaluator_agent = self.get_evaluator_agent()
|
|
619
632
|
eval_input = self.get_eval_input()
|
|
@@ -661,47 +674,51 @@ Remember: You must only compare the agent_output to the expected_output. The exp
|
|
|
661
674
|
)
|
|
662
675
|
# Log results to the Agno DB if requested
|
|
663
676
|
if self.db:
|
|
664
|
-
if self.
|
|
665
|
-
|
|
666
|
-
team_id = None
|
|
667
|
-
model_id = self.agent.model.id if self.agent.model is not None else None
|
|
668
|
-
model_provider = self.agent.model.provider if self.agent.model is not None else None
|
|
669
|
-
evaluated_component_name = self.agent.name
|
|
670
|
-
elif self.team is not None:
|
|
671
|
-
agent_id = None
|
|
672
|
-
team_id = self.team.id
|
|
673
|
-
model_id = self.team.model.id if self.team.model is not None else None
|
|
674
|
-
model_provider = self.team.model.provider if self.team.model is not None else None
|
|
675
|
-
evaluated_component_name = self.team.name
|
|
676
|
-
else:
|
|
677
|
-
agent_id = None
|
|
678
|
-
team_id = None
|
|
679
|
-
model_id = None
|
|
680
|
-
model_provider = None
|
|
681
|
-
evaluated_component_name = None
|
|
682
|
-
|
|
683
|
-
log_eval_input = {
|
|
684
|
-
"additional_guidelines": self.additional_guidelines,
|
|
685
|
-
"additional_context": self.additional_context,
|
|
686
|
-
"num_iterations": self.num_iterations,
|
|
687
|
-
"expected_output": self.expected_output,
|
|
688
|
-
"input": self.input,
|
|
689
|
-
}
|
|
677
|
+
if isinstance(self.db, AsyncBaseDb):
|
|
678
|
+
log_error("You are using an async DB in a non-async method. The evaluation won't be stored in the DB.")
|
|
690
679
|
|
|
691
|
-
|
|
692
|
-
|
|
693
|
-
|
|
694
|
-
|
|
695
|
-
|
|
696
|
-
|
|
697
|
-
|
|
698
|
-
|
|
699
|
-
|
|
700
|
-
|
|
701
|
-
|
|
702
|
-
|
|
703
|
-
|
|
704
|
-
|
|
680
|
+
else:
|
|
681
|
+
if self.agent is not None:
|
|
682
|
+
agent_id = self.agent.id
|
|
683
|
+
team_id = None
|
|
684
|
+
model_id = self.agent.model.id if self.agent.model is not None else None
|
|
685
|
+
model_provider = self.agent.model.provider if self.agent.model is not None else None
|
|
686
|
+
evaluated_component_name = self.agent.name
|
|
687
|
+
elif self.team is not None:
|
|
688
|
+
agent_id = None
|
|
689
|
+
team_id = self.team.id
|
|
690
|
+
model_id = self.team.model.id if self.team.model is not None else None
|
|
691
|
+
model_provider = self.team.model.provider if self.team.model is not None else None
|
|
692
|
+
evaluated_component_name = self.team.name
|
|
693
|
+
else:
|
|
694
|
+
agent_id = None
|
|
695
|
+
team_id = None
|
|
696
|
+
model_id = None
|
|
697
|
+
model_provider = None
|
|
698
|
+
evaluated_component_name = None
|
|
699
|
+
|
|
700
|
+
log_eval_input = {
|
|
701
|
+
"additional_guidelines": self.additional_guidelines,
|
|
702
|
+
"additional_context": self.additional_context,
|
|
703
|
+
"num_iterations": self.num_iterations,
|
|
704
|
+
"expected_output": self.expected_output,
|
|
705
|
+
"input": self.input,
|
|
706
|
+
}
|
|
707
|
+
|
|
708
|
+
log_eval_run(
|
|
709
|
+
db=self.db,
|
|
710
|
+
run_id=self.eval_id, # type: ignore
|
|
711
|
+
run_data=asdict(self.result),
|
|
712
|
+
eval_type=EvalType.ACCURACY,
|
|
713
|
+
name=self.name if self.name is not None else None,
|
|
714
|
+
agent_id=agent_id,
|
|
715
|
+
team_id=team_id,
|
|
716
|
+
model_id=model_id,
|
|
717
|
+
model_provider=model_provider,
|
|
718
|
+
evaluated_component_name=evaluated_component_name,
|
|
719
|
+
workflow_id=None,
|
|
720
|
+
eval_input=log_eval_input,
|
|
721
|
+
)
|
|
705
722
|
|
|
706
723
|
if self.telemetry:
|
|
707
724
|
from agno.api.evals import EvalRunCreate, create_eval_run_telemetry
|
|
@@ -714,7 +731,7 @@ Remember: You must only compare the agent_output to the expected_output. The exp
|
|
|
714
731
|
),
|
|
715
732
|
)
|
|
716
733
|
|
|
717
|
-
logger.debug(f"*********** Evaluation End: {
|
|
734
|
+
logger.debug(f"*********** Evaluation End: {run_id} ***********")
|
|
718
735
|
return self.result
|
|
719
736
|
|
|
720
737
|
async def arun_with_output(
|
|
@@ -725,11 +742,14 @@ Remember: You must only compare the agent_output to the expected_output. The exp
|
|
|
725
742
|
print_results: bool = True,
|
|
726
743
|
) -> Optional[AccuracyResult]:
|
|
727
744
|
"""Run the evaluation logic against the given answer, instead of generating an answer with the Agent"""
|
|
745
|
+
# Generate unique run_id for this execution (don't modify self.eval_id due to concurrency)
|
|
746
|
+
run_id = str(uuid4())
|
|
747
|
+
|
|
728
748
|
set_log_level_to_debug() if self.debug_mode else set_log_level_to_info()
|
|
729
749
|
|
|
730
750
|
self.result = AccuracyResult()
|
|
731
751
|
|
|
732
|
-
logger.debug(f"************ Evaluation Start: {
|
|
752
|
+
logger.debug(f"************ Evaluation Start: {run_id} ************")
|
|
733
753
|
|
|
734
754
|
evaluator_agent = self.get_evaluator_agent()
|
|
735
755
|
eval_input = self.get_eval_input()
|
|
@@ -813,7 +833,7 @@ Remember: You must only compare the agent_output to the expected_output. The exp
|
|
|
813
833
|
eval_input=log_eval_input,
|
|
814
834
|
)
|
|
815
835
|
|
|
816
|
-
logger.debug(f"*********** Evaluation End: {
|
|
836
|
+
logger.debug(f"*********** Evaluation End: {run_id} ***********")
|
|
817
837
|
return self.result
|
|
818
838
|
|
|
819
839
|
def _get_telemetry_data(self) -> Dict[str, Any]:
|