agno 2.1.2__py3-none-any.whl → 2.3.13__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- agno/agent/agent.py +5540 -2273
- agno/api/api.py +2 -0
- agno/api/os.py +1 -1
- agno/compression/__init__.py +3 -0
- agno/compression/manager.py +247 -0
- agno/culture/__init__.py +3 -0
- agno/culture/manager.py +956 -0
- agno/db/async_postgres/__init__.py +3 -0
- agno/db/base.py +689 -6
- agno/db/dynamo/dynamo.py +933 -37
- agno/db/dynamo/schemas.py +174 -10
- agno/db/dynamo/utils.py +63 -4
- agno/db/firestore/firestore.py +831 -9
- agno/db/firestore/schemas.py +51 -0
- agno/db/firestore/utils.py +102 -4
- agno/db/gcs_json/gcs_json_db.py +660 -12
- agno/db/gcs_json/utils.py +60 -26
- agno/db/in_memory/in_memory_db.py +287 -14
- agno/db/in_memory/utils.py +60 -2
- agno/db/json/json_db.py +590 -14
- agno/db/json/utils.py +60 -26
- agno/db/migrations/manager.py +199 -0
- agno/db/migrations/v1_to_v2.py +43 -13
- agno/db/migrations/versions/__init__.py +0 -0
- agno/db/migrations/versions/v2_3_0.py +938 -0
- agno/db/mongo/__init__.py +15 -1
- agno/db/mongo/async_mongo.py +2760 -0
- agno/db/mongo/mongo.py +879 -11
- agno/db/mongo/schemas.py +42 -0
- agno/db/mongo/utils.py +80 -8
- agno/db/mysql/__init__.py +2 -1
- agno/db/mysql/async_mysql.py +2912 -0
- agno/db/mysql/mysql.py +946 -68
- agno/db/mysql/schemas.py +72 -10
- agno/db/mysql/utils.py +198 -7
- agno/db/postgres/__init__.py +2 -1
- agno/db/postgres/async_postgres.py +2579 -0
- agno/db/postgres/postgres.py +942 -57
- agno/db/postgres/schemas.py +81 -18
- agno/db/postgres/utils.py +164 -2
- agno/db/redis/redis.py +671 -7
- agno/db/redis/schemas.py +50 -0
- agno/db/redis/utils.py +65 -7
- agno/db/schemas/__init__.py +2 -1
- agno/db/schemas/culture.py +120 -0
- agno/db/schemas/evals.py +1 -0
- agno/db/schemas/memory.py +17 -2
- agno/db/singlestore/schemas.py +63 -0
- agno/db/singlestore/singlestore.py +949 -83
- agno/db/singlestore/utils.py +60 -2
- agno/db/sqlite/__init__.py +2 -1
- agno/db/sqlite/async_sqlite.py +2911 -0
- agno/db/sqlite/schemas.py +62 -0
- agno/db/sqlite/sqlite.py +965 -46
- agno/db/sqlite/utils.py +169 -8
- agno/db/surrealdb/__init__.py +3 -0
- agno/db/surrealdb/metrics.py +292 -0
- agno/db/surrealdb/models.py +334 -0
- agno/db/surrealdb/queries.py +71 -0
- agno/db/surrealdb/surrealdb.py +1908 -0
- agno/db/surrealdb/utils.py +147 -0
- agno/db/utils.py +2 -0
- agno/eval/__init__.py +10 -0
- agno/eval/accuracy.py +75 -55
- agno/eval/agent_as_judge.py +861 -0
- agno/eval/base.py +29 -0
- agno/eval/performance.py +16 -7
- agno/eval/reliability.py +28 -16
- agno/eval/utils.py +35 -17
- agno/exceptions.py +27 -2
- agno/filters.py +354 -0
- agno/guardrails/prompt_injection.py +1 -0
- agno/hooks/__init__.py +3 -0
- agno/hooks/decorator.py +164 -0
- agno/integrations/discord/client.py +1 -1
- agno/knowledge/chunking/agentic.py +13 -10
- agno/knowledge/chunking/fixed.py +4 -1
- agno/knowledge/chunking/semantic.py +9 -4
- agno/knowledge/chunking/strategy.py +59 -15
- agno/knowledge/embedder/fastembed.py +1 -1
- agno/knowledge/embedder/nebius.py +1 -1
- agno/knowledge/embedder/ollama.py +8 -0
- agno/knowledge/embedder/openai.py +8 -8
- agno/knowledge/embedder/sentence_transformer.py +6 -2
- agno/knowledge/embedder/vllm.py +262 -0
- agno/knowledge/knowledge.py +1618 -318
- agno/knowledge/reader/base.py +6 -2
- agno/knowledge/reader/csv_reader.py +8 -10
- agno/knowledge/reader/docx_reader.py +5 -6
- agno/knowledge/reader/field_labeled_csv_reader.py +16 -20
- agno/knowledge/reader/json_reader.py +5 -4
- agno/knowledge/reader/markdown_reader.py +8 -8
- agno/knowledge/reader/pdf_reader.py +17 -19
- agno/knowledge/reader/pptx_reader.py +101 -0
- agno/knowledge/reader/reader_factory.py +32 -3
- agno/knowledge/reader/s3_reader.py +3 -3
- agno/knowledge/reader/tavily_reader.py +193 -0
- agno/knowledge/reader/text_reader.py +22 -10
- agno/knowledge/reader/web_search_reader.py +1 -48
- agno/knowledge/reader/website_reader.py +10 -10
- agno/knowledge/reader/wikipedia_reader.py +33 -1
- agno/knowledge/types.py +1 -0
- agno/knowledge/utils.py +72 -7
- agno/media.py +22 -6
- agno/memory/__init__.py +14 -1
- agno/memory/manager.py +544 -83
- agno/memory/strategies/__init__.py +15 -0
- agno/memory/strategies/base.py +66 -0
- agno/memory/strategies/summarize.py +196 -0
- agno/memory/strategies/types.py +37 -0
- agno/models/aimlapi/aimlapi.py +17 -0
- agno/models/anthropic/claude.py +515 -40
- agno/models/aws/bedrock.py +102 -21
- agno/models/aws/claude.py +131 -274
- agno/models/azure/ai_foundry.py +41 -19
- agno/models/azure/openai_chat.py +39 -8
- agno/models/base.py +1249 -525
- agno/models/cerebras/cerebras.py +91 -21
- agno/models/cerebras/cerebras_openai.py +21 -2
- agno/models/cohere/chat.py +40 -6
- agno/models/cometapi/cometapi.py +18 -1
- agno/models/dashscope/dashscope.py +2 -3
- agno/models/deepinfra/deepinfra.py +18 -1
- agno/models/deepseek/deepseek.py +69 -3
- agno/models/fireworks/fireworks.py +18 -1
- agno/models/google/gemini.py +877 -80
- agno/models/google/utils.py +22 -0
- agno/models/groq/groq.py +51 -18
- agno/models/huggingface/huggingface.py +17 -6
- agno/models/ibm/watsonx.py +16 -6
- agno/models/internlm/internlm.py +18 -1
- agno/models/langdb/langdb.py +13 -1
- agno/models/litellm/chat.py +44 -9
- agno/models/litellm/litellm_openai.py +18 -1
- agno/models/message.py +28 -5
- agno/models/meta/llama.py +47 -14
- agno/models/meta/llama_openai.py +22 -17
- agno/models/mistral/mistral.py +8 -4
- agno/models/nebius/nebius.py +6 -7
- agno/models/nvidia/nvidia.py +20 -3
- agno/models/ollama/chat.py +24 -8
- agno/models/openai/chat.py +104 -29
- agno/models/openai/responses.py +101 -81
- agno/models/openrouter/openrouter.py +60 -3
- agno/models/perplexity/perplexity.py +17 -1
- agno/models/portkey/portkey.py +7 -6
- agno/models/requesty/requesty.py +24 -4
- agno/models/response.py +73 -2
- agno/models/sambanova/sambanova.py +20 -3
- agno/models/siliconflow/siliconflow.py +19 -2
- agno/models/together/together.py +20 -3
- agno/models/utils.py +254 -8
- agno/models/vercel/v0.py +20 -3
- agno/models/vertexai/__init__.py +0 -0
- agno/models/vertexai/claude.py +190 -0
- agno/models/vllm/vllm.py +19 -14
- agno/models/xai/xai.py +19 -2
- agno/os/app.py +549 -152
- agno/os/auth.py +190 -3
- agno/os/config.py +23 -0
- agno/os/interfaces/a2a/router.py +8 -11
- agno/os/interfaces/a2a/utils.py +1 -1
- agno/os/interfaces/agui/router.py +18 -3
- agno/os/interfaces/agui/utils.py +152 -39
- agno/os/interfaces/slack/router.py +55 -37
- agno/os/interfaces/slack/slack.py +9 -1
- agno/os/interfaces/whatsapp/router.py +0 -1
- agno/os/interfaces/whatsapp/security.py +3 -1
- agno/os/mcp.py +110 -52
- agno/os/middleware/__init__.py +2 -0
- agno/os/middleware/jwt.py +676 -112
- agno/os/router.py +40 -1478
- agno/os/routers/agents/__init__.py +3 -0
- agno/os/routers/agents/router.py +599 -0
- agno/os/routers/agents/schema.py +261 -0
- agno/os/routers/evals/evals.py +96 -39
- agno/os/routers/evals/schemas.py +65 -33
- agno/os/routers/evals/utils.py +80 -10
- agno/os/routers/health.py +10 -4
- agno/os/routers/knowledge/knowledge.py +196 -38
- agno/os/routers/knowledge/schemas.py +82 -22
- agno/os/routers/memory/memory.py +279 -52
- agno/os/routers/memory/schemas.py +46 -17
- agno/os/routers/metrics/metrics.py +20 -8
- agno/os/routers/metrics/schemas.py +16 -16
- agno/os/routers/session/session.py +462 -34
- agno/os/routers/teams/__init__.py +3 -0
- agno/os/routers/teams/router.py +512 -0
- agno/os/routers/teams/schema.py +257 -0
- agno/os/routers/traces/__init__.py +3 -0
- agno/os/routers/traces/schemas.py +414 -0
- agno/os/routers/traces/traces.py +499 -0
- agno/os/routers/workflows/__init__.py +3 -0
- agno/os/routers/workflows/router.py +624 -0
- agno/os/routers/workflows/schema.py +75 -0
- agno/os/schema.py +256 -693
- agno/os/scopes.py +469 -0
- agno/os/utils.py +514 -36
- agno/reasoning/anthropic.py +80 -0
- agno/reasoning/gemini.py +73 -0
- agno/reasoning/openai.py +5 -0
- agno/reasoning/vertexai.py +76 -0
- agno/run/__init__.py +6 -0
- agno/run/agent.py +155 -32
- agno/run/base.py +55 -3
- agno/run/requirement.py +181 -0
- agno/run/team.py +125 -38
- agno/run/workflow.py +72 -18
- agno/session/agent.py +102 -89
- agno/session/summary.py +56 -15
- agno/session/team.py +164 -90
- agno/session/workflow.py +405 -40
- agno/table.py +10 -0
- agno/team/team.py +3974 -1903
- agno/tools/dalle.py +2 -4
- agno/tools/eleven_labs.py +23 -25
- agno/tools/exa.py +21 -16
- agno/tools/file.py +153 -23
- agno/tools/file_generation.py +16 -10
- agno/tools/firecrawl.py +15 -7
- agno/tools/function.py +193 -38
- agno/tools/gmail.py +238 -14
- agno/tools/google_drive.py +271 -0
- agno/tools/googlecalendar.py +36 -8
- agno/tools/googlesheets.py +20 -5
- agno/tools/jira.py +20 -0
- agno/tools/mcp/__init__.py +10 -0
- agno/tools/mcp/mcp.py +331 -0
- agno/tools/mcp/multi_mcp.py +347 -0
- agno/tools/mcp/params.py +24 -0
- agno/tools/mcp_toolbox.py +3 -3
- agno/tools/models/nebius.py +5 -5
- agno/tools/models_labs.py +20 -10
- agno/tools/nano_banana.py +151 -0
- agno/tools/notion.py +204 -0
- agno/tools/parallel.py +314 -0
- agno/tools/postgres.py +76 -36
- agno/tools/redshift.py +406 -0
- agno/tools/scrapegraph.py +1 -1
- agno/tools/shopify.py +1519 -0
- agno/tools/slack.py +18 -3
- agno/tools/spotify.py +919 -0
- agno/tools/tavily.py +146 -0
- agno/tools/toolkit.py +25 -0
- agno/tools/workflow.py +8 -1
- agno/tools/yfinance.py +12 -11
- agno/tracing/__init__.py +12 -0
- agno/tracing/exporter.py +157 -0
- agno/tracing/schemas.py +276 -0
- agno/tracing/setup.py +111 -0
- agno/utils/agent.py +938 -0
- agno/utils/cryptography.py +22 -0
- agno/utils/dttm.py +33 -0
- agno/utils/events.py +151 -3
- agno/utils/gemini.py +15 -5
- agno/utils/hooks.py +118 -4
- agno/utils/http.py +113 -2
- agno/utils/knowledge.py +12 -5
- agno/utils/log.py +1 -0
- agno/utils/mcp.py +92 -2
- agno/utils/media.py +187 -1
- agno/utils/merge_dict.py +3 -3
- agno/utils/message.py +60 -0
- agno/utils/models/ai_foundry.py +9 -2
- agno/utils/models/claude.py +49 -14
- agno/utils/models/cohere.py +9 -2
- agno/utils/models/llama.py +9 -2
- agno/utils/models/mistral.py +4 -2
- agno/utils/print_response/agent.py +109 -16
- agno/utils/print_response/team.py +223 -30
- agno/utils/print_response/workflow.py +251 -34
- agno/utils/streamlit.py +1 -1
- agno/utils/team.py +98 -9
- agno/utils/tokens.py +657 -0
- agno/vectordb/base.py +39 -7
- agno/vectordb/cassandra/cassandra.py +21 -5
- agno/vectordb/chroma/chromadb.py +43 -12
- agno/vectordb/clickhouse/clickhousedb.py +21 -5
- agno/vectordb/couchbase/couchbase.py +29 -5
- agno/vectordb/lancedb/lance_db.py +92 -181
- agno/vectordb/langchaindb/langchaindb.py +24 -4
- agno/vectordb/lightrag/lightrag.py +17 -3
- agno/vectordb/llamaindex/llamaindexdb.py +25 -5
- agno/vectordb/milvus/milvus.py +50 -37
- agno/vectordb/mongodb/__init__.py +7 -1
- agno/vectordb/mongodb/mongodb.py +36 -30
- agno/vectordb/pgvector/pgvector.py +201 -77
- agno/vectordb/pineconedb/pineconedb.py +41 -23
- agno/vectordb/qdrant/qdrant.py +67 -54
- agno/vectordb/redis/__init__.py +9 -0
- agno/vectordb/redis/redisdb.py +682 -0
- agno/vectordb/singlestore/singlestore.py +50 -29
- agno/vectordb/surrealdb/surrealdb.py +31 -41
- agno/vectordb/upstashdb/upstashdb.py +34 -6
- agno/vectordb/weaviate/weaviate.py +53 -14
- agno/workflow/__init__.py +2 -0
- agno/workflow/agent.py +299 -0
- agno/workflow/condition.py +120 -18
- agno/workflow/loop.py +77 -10
- agno/workflow/parallel.py +231 -143
- agno/workflow/router.py +118 -17
- agno/workflow/step.py +609 -170
- agno/workflow/steps.py +73 -6
- agno/workflow/types.py +96 -21
- agno/workflow/workflow.py +2039 -262
- {agno-2.1.2.dist-info → agno-2.3.13.dist-info}/METADATA +201 -66
- agno-2.3.13.dist-info/RECORD +613 -0
- agno/tools/googlesearch.py +0 -98
- agno/tools/mcp.py +0 -679
- agno/tools/memori.py +0 -339
- agno-2.1.2.dist-info/RECORD +0 -543
- {agno-2.1.2.dist-info → agno-2.3.13.dist-info}/WHEEL +0 -0
- {agno-2.1.2.dist-info → agno-2.3.13.dist-info}/licenses/LICENSE +0 -0
- {agno-2.1.2.dist-info → agno-2.3.13.dist-info}/top_level.txt +0 -0
agno/eval/base.py
ADDED
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
from abc import ABC, abstractmethod
|
|
2
|
+
from typing import Union
|
|
3
|
+
|
|
4
|
+
from agno.run.agent import RunInput, RunOutput
|
|
5
|
+
from agno.run.team import TeamRunInput, TeamRunOutput
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
class BaseEval(ABC):
|
|
9
|
+
"""Abstract base class for all evaluations."""
|
|
10
|
+
|
|
11
|
+
@abstractmethod
|
|
12
|
+
def pre_check(self, run_input: Union[RunInput, TeamRunInput]) -> None:
|
|
13
|
+
"""Perform sync pre-evals."""
|
|
14
|
+
pass
|
|
15
|
+
|
|
16
|
+
@abstractmethod
|
|
17
|
+
async def async_pre_check(self, run_input: Union[RunInput, TeamRunInput]) -> None:
|
|
18
|
+
"""Perform async pre-evals."""
|
|
19
|
+
pass
|
|
20
|
+
|
|
21
|
+
@abstractmethod
|
|
22
|
+
def post_check(self, run_output: Union[RunOutput, TeamRunOutput]) -> None:
|
|
23
|
+
"""Perform sync post-evals."""
|
|
24
|
+
pass
|
|
25
|
+
|
|
26
|
+
@abstractmethod
|
|
27
|
+
async def async_post_check(self, run_output: Union[RunOutput, TeamRunOutput]) -> None:
|
|
28
|
+
"""Perform async post-evals."""
|
|
29
|
+
pass
|
agno/eval/performance.py
CHANGED
|
@@ -3,10 +3,10 @@ import gc
|
|
|
3
3
|
import tracemalloc
|
|
4
4
|
from dataclasses import dataclass, field
|
|
5
5
|
from os import getenv
|
|
6
|
-
from typing import TYPE_CHECKING, Any, Callable, Dict, List, Optional
|
|
6
|
+
from typing import TYPE_CHECKING, Any, Callable, Dict, List, Optional, Union
|
|
7
7
|
from uuid import uuid4
|
|
8
8
|
|
|
9
|
-
from agno.db.base import BaseDb
|
|
9
|
+
from agno.db.base import AsyncBaseDb, BaseDb
|
|
10
10
|
from agno.db.schemas.evals import EvalType
|
|
11
11
|
from agno.eval.utils import async_log_eval, log_eval_run, store_result_in_file
|
|
12
12
|
from agno.utils.log import log_debug, set_log_level_to_debug, set_log_level_to_info
|
|
@@ -222,7 +222,7 @@ class PerformanceEval:
|
|
|
222
222
|
# Enable debug logs
|
|
223
223
|
debug_mode: bool = getenv("AGNO_DEBUG", "false").lower() == "true"
|
|
224
224
|
# The database to store Evaluation results
|
|
225
|
-
db: Optional[BaseDb] = None
|
|
225
|
+
db: Optional[Union[BaseDb, AsyncBaseDb]] = None
|
|
226
226
|
|
|
227
227
|
# Telemetry settings
|
|
228
228
|
# telemetry=True logs minimal telemetry for analytics
|
|
@@ -491,17 +491,23 @@ class PerformanceEval:
|
|
|
491
491
|
6. Print results as requested
|
|
492
492
|
7. Log results to the Agno platform if requested
|
|
493
493
|
"""
|
|
494
|
+
if isinstance(self.db, AsyncBaseDb):
|
|
495
|
+
raise ValueError("run() is not supported with an async DB. Please use arun() instead.")
|
|
496
|
+
|
|
494
497
|
from rich.console import Console
|
|
495
498
|
from rich.live import Live
|
|
496
499
|
from rich.status import Status
|
|
497
500
|
|
|
501
|
+
# Generate unique run_id for this execution (don't modify self.eval_id due to concurrency)
|
|
502
|
+
run_id = str(uuid4())
|
|
503
|
+
|
|
498
504
|
run_times = []
|
|
499
505
|
memory_usages = []
|
|
500
506
|
previous_snapshot = None
|
|
501
507
|
|
|
502
508
|
self._set_log_level()
|
|
503
509
|
|
|
504
|
-
log_debug(f"************ Evaluation Start: {
|
|
510
|
+
log_debug(f"************ Evaluation Start: {run_id} ************")
|
|
505
511
|
|
|
506
512
|
# Add a spinner while running the evaluations
|
|
507
513
|
console = Console()
|
|
@@ -612,7 +618,7 @@ class PerformanceEval:
|
|
|
612
618
|
),
|
|
613
619
|
)
|
|
614
620
|
|
|
615
|
-
log_debug(f"*********** Evaluation End: {
|
|
621
|
+
log_debug(f"*********** Evaluation End: {run_id} ***********")
|
|
616
622
|
return self.result
|
|
617
623
|
|
|
618
624
|
async def arun(
|
|
@@ -638,13 +644,16 @@ class PerformanceEval:
|
|
|
638
644
|
from rich.live import Live
|
|
639
645
|
from rich.status import Status
|
|
640
646
|
|
|
647
|
+
# Generate unique run_id for this execution (don't modify self.eval_id due to concurrency)
|
|
648
|
+
run_id = str(uuid4())
|
|
649
|
+
|
|
641
650
|
run_times = []
|
|
642
651
|
memory_usages = []
|
|
643
652
|
previous_snapshot = None
|
|
644
653
|
|
|
645
654
|
self._set_log_level()
|
|
646
655
|
|
|
647
|
-
log_debug(f"************ Evaluation Start: {
|
|
656
|
+
log_debug(f"************ Evaluation Start: {run_id} ************")
|
|
648
657
|
|
|
649
658
|
# Add a spinner while running the evaluations
|
|
650
659
|
console = Console()
|
|
@@ -755,7 +764,7 @@ class PerformanceEval:
|
|
|
755
764
|
),
|
|
756
765
|
)
|
|
757
766
|
|
|
758
|
-
log_debug(f"*********** Evaluation End: {
|
|
767
|
+
log_debug(f"*********** Evaluation End: {run_id} ***********")
|
|
759
768
|
return self.result
|
|
760
769
|
|
|
761
770
|
def _get_telemetry_data(self) -> Dict[str, Any]:
|
agno/eval/reliability.py
CHANGED
|
@@ -1,9 +1,10 @@
|
|
|
1
1
|
from dataclasses import asdict, dataclass, field
|
|
2
2
|
from os import getenv
|
|
3
|
-
from typing import TYPE_CHECKING, Any, Dict, List, Optional
|
|
3
|
+
from typing import TYPE_CHECKING, Any, Dict, List, Optional, Union
|
|
4
4
|
from uuid import uuid4
|
|
5
5
|
|
|
6
|
-
from agno.db.base import BaseDb
|
|
6
|
+
from agno.db.base import AsyncBaseDb, BaseDb
|
|
7
|
+
from agno.run.team import TeamRunOutput
|
|
7
8
|
|
|
8
9
|
if TYPE_CHECKING:
|
|
9
10
|
from rich.console import Console
|
|
@@ -11,7 +12,6 @@ if TYPE_CHECKING:
|
|
|
11
12
|
from agno.agent import RunOutput
|
|
12
13
|
from agno.db.schemas.evals import EvalType
|
|
13
14
|
from agno.eval.utils import async_log_eval, log_eval_run, store_result_in_file
|
|
14
|
-
from agno.run.team import TeamRunOutput
|
|
15
15
|
from agno.utils.log import logger
|
|
16
16
|
|
|
17
17
|
|
|
@@ -63,7 +63,7 @@ class ReliabilityEval:
|
|
|
63
63
|
# Enable debug logs
|
|
64
64
|
debug_mode: bool = getenv("AGNO_DEBUG", "false").lower() == "true"
|
|
65
65
|
# The database to store Evaluation results
|
|
66
|
-
db: Optional[BaseDb] = None
|
|
66
|
+
db: Optional[Union[BaseDb, AsyncBaseDb]] = None
|
|
67
67
|
|
|
68
68
|
# Telemetry settings
|
|
69
69
|
# telemetry=True logs minimal telemetry for analytics
|
|
@@ -71,6 +71,9 @@ class ReliabilityEval:
|
|
|
71
71
|
telemetry: bool = True
|
|
72
72
|
|
|
73
73
|
def run(self, *, print_results: bool = False) -> Optional[ReliabilityResult]:
|
|
74
|
+
if isinstance(self.db, AsyncBaseDb):
|
|
75
|
+
raise ValueError("run() is not supported with an async DB. Please use arun() instead.")
|
|
76
|
+
|
|
74
77
|
if self.agent_response is None and self.team_response is None:
|
|
75
78
|
raise ValueError("You need to provide 'agent_response' or 'team_response' to run the evaluation.")
|
|
76
79
|
|
|
@@ -83,6 +86,9 @@ class ReliabilityEval:
|
|
|
83
86
|
from rich.live import Live
|
|
84
87
|
from rich.status import Status
|
|
85
88
|
|
|
89
|
+
# Generate unique run_id for this execution (don't modify self.eval_id due to concurrency)
|
|
90
|
+
run_id = str(uuid4())
|
|
91
|
+
|
|
86
92
|
# Add a spinner while running the evaluations
|
|
87
93
|
console = Console()
|
|
88
94
|
with Live(console=console, transient=True) as live_log:
|
|
@@ -115,7 +121,7 @@ class ReliabilityEval:
|
|
|
115
121
|
if not tool_name:
|
|
116
122
|
continue
|
|
117
123
|
else:
|
|
118
|
-
if tool_name not in self.expected_tool_calls:
|
|
124
|
+
if self.expected_tool_calls is not None and tool_name not in self.expected_tool_calls:
|
|
119
125
|
failed_tool_calls.append(tool_call.get("function", {}).get("name"))
|
|
120
126
|
else:
|
|
121
127
|
passed_tool_calls.append(tool_call.get("function", {}).get("name"))
|
|
@@ -180,7 +186,7 @@ class ReliabilityEval:
|
|
|
180
186
|
),
|
|
181
187
|
)
|
|
182
188
|
|
|
183
|
-
logger.debug(f"*********** Evaluation End: {
|
|
189
|
+
logger.debug(f"*********** Evaluation End: {run_id} ***********")
|
|
184
190
|
return self.result
|
|
185
191
|
|
|
186
192
|
async def arun(self, *, print_results: bool = False) -> Optional[ReliabilityResult]:
|
|
@@ -196,6 +202,9 @@ class ReliabilityEval:
|
|
|
196
202
|
from rich.live import Live
|
|
197
203
|
from rich.status import Status
|
|
198
204
|
|
|
205
|
+
# Generate unique run_id for this execution (don't modify self.eval_id due to concurrency)
|
|
206
|
+
run_id = str(uuid4())
|
|
207
|
+
|
|
199
208
|
# Add a spinner while running the evaluations
|
|
200
209
|
console = Console()
|
|
201
210
|
with Live(console=console, transient=True) as live_log:
|
|
@@ -220,15 +229,18 @@ class ReliabilityEval:
|
|
|
220
229
|
|
|
221
230
|
failed_tool_calls = []
|
|
222
231
|
passed_tool_calls = []
|
|
223
|
-
|
|
224
|
-
|
|
225
|
-
|
|
226
|
-
|
|
227
|
-
|
|
228
|
-
if
|
|
229
|
-
|
|
232
|
+
if not actual_tool_calls:
|
|
233
|
+
failed_tool_calls = self.expected_tool_calls or []
|
|
234
|
+
else:
|
|
235
|
+
for tool_call in actual_tool_calls: # type: ignore
|
|
236
|
+
tool_name = tool_call.get("function", {}).get("name")
|
|
237
|
+
if not tool_name:
|
|
238
|
+
continue
|
|
230
239
|
else:
|
|
231
|
-
|
|
240
|
+
if self.expected_tool_calls is not None and tool_name not in self.expected_tool_calls:
|
|
241
|
+
failed_tool_calls.append(tool_call.get("function", {}).get("name"))
|
|
242
|
+
else:
|
|
243
|
+
passed_tool_calls.append(tool_call.get("function", {}).get("name"))
|
|
232
244
|
|
|
233
245
|
self.result = ReliabilityResult(
|
|
234
246
|
eval_status="PASSED" if len(failed_tool_calls) == 0 else "FAILED",
|
|
@@ -241,7 +253,7 @@ class ReliabilityEval:
|
|
|
241
253
|
store_result_in_file(
|
|
242
254
|
file_path=self.file_path_to_save_results,
|
|
243
255
|
name=self.name,
|
|
244
|
-
eval_id=
|
|
256
|
+
eval_id=run_id,
|
|
245
257
|
result=self.result,
|
|
246
258
|
)
|
|
247
259
|
|
|
@@ -290,7 +302,7 @@ class ReliabilityEval:
|
|
|
290
302
|
),
|
|
291
303
|
)
|
|
292
304
|
|
|
293
|
-
logger.debug(f"*********** Evaluation End: {
|
|
305
|
+
logger.debug(f"*********** Evaluation End: {run_id} ***********")
|
|
294
306
|
return self.result
|
|
295
307
|
|
|
296
308
|
def _get_telemetry_data(self) -> Dict[str, Any]:
|
agno/eval/utils.py
CHANGED
|
@@ -2,12 +2,13 @@ from dataclasses import asdict
|
|
|
2
2
|
from pathlib import Path
|
|
3
3
|
from typing import TYPE_CHECKING, Optional, Union
|
|
4
4
|
|
|
5
|
-
from agno.db.base import BaseDb
|
|
5
|
+
from agno.db.base import AsyncBaseDb, BaseDb
|
|
6
6
|
from agno.db.schemas.evals import EvalRunRecord, EvalType
|
|
7
7
|
from agno.utils.log import log_debug, logger
|
|
8
8
|
|
|
9
9
|
if TYPE_CHECKING:
|
|
10
10
|
from agno.eval.accuracy import AccuracyResult
|
|
11
|
+
from agno.eval.agent_as_judge import AgentAsJudgeResult
|
|
11
12
|
from agno.eval.performance import PerformanceResult
|
|
12
13
|
from agno.eval.reliability import ReliabilityResult
|
|
13
14
|
|
|
@@ -49,7 +50,7 @@ def log_eval_run(
|
|
|
49
50
|
|
|
50
51
|
|
|
51
52
|
async def async_log_eval(
|
|
52
|
-
db: BaseDb,
|
|
53
|
+
db: Union[BaseDb, AsyncBaseDb],
|
|
53
54
|
run_id: str,
|
|
54
55
|
run_data: dict,
|
|
55
56
|
eval_type: EvalType,
|
|
@@ -65,28 +66,45 @@ async def async_log_eval(
|
|
|
65
66
|
"""Call the API to create an evaluation run."""
|
|
66
67
|
|
|
67
68
|
try:
|
|
68
|
-
db
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
69
|
+
if isinstance(db, AsyncBaseDb):
|
|
70
|
+
await db.create_eval_run(
|
|
71
|
+
EvalRunRecord(
|
|
72
|
+
run_id=run_id,
|
|
73
|
+
eval_type=eval_type,
|
|
74
|
+
eval_data=run_data,
|
|
75
|
+
eval_input=eval_input,
|
|
76
|
+
agent_id=agent_id,
|
|
77
|
+
model_id=model_id,
|
|
78
|
+
model_provider=model_provider,
|
|
79
|
+
name=name,
|
|
80
|
+
evaluated_component_name=evaluated_component_name,
|
|
81
|
+
team_id=team_id,
|
|
82
|
+
workflow_id=workflow_id,
|
|
83
|
+
)
|
|
84
|
+
)
|
|
85
|
+
else:
|
|
86
|
+
db.create_eval_run(
|
|
87
|
+
EvalRunRecord(
|
|
88
|
+
run_id=run_id,
|
|
89
|
+
eval_type=eval_type,
|
|
90
|
+
eval_data=run_data,
|
|
91
|
+
eval_input=eval_input,
|
|
92
|
+
agent_id=agent_id,
|
|
93
|
+
model_id=model_id,
|
|
94
|
+
model_provider=model_provider,
|
|
95
|
+
name=name,
|
|
96
|
+
evaluated_component_name=evaluated_component_name,
|
|
97
|
+
team_id=team_id,
|
|
98
|
+
workflow_id=workflow_id,
|
|
99
|
+
)
|
|
81
100
|
)
|
|
82
|
-
)
|
|
83
101
|
except Exception as e:
|
|
84
102
|
log_debug(f"Could not create agent event: {e}")
|
|
85
103
|
|
|
86
104
|
|
|
87
105
|
def store_result_in_file(
|
|
88
106
|
file_path: str,
|
|
89
|
-
result: Union["AccuracyResult", "PerformanceResult", "ReliabilityResult"],
|
|
107
|
+
result: Union["AccuracyResult", "AgentAsJudgeResult", "PerformanceResult", "ReliabilityResult"],
|
|
90
108
|
eval_id: Optional[str] = None,
|
|
91
109
|
name: Optional[str] = None,
|
|
92
110
|
):
|
agno/exceptions.py
CHANGED
|
@@ -1,3 +1,4 @@
|
|
|
1
|
+
from dataclasses import dataclass
|
|
1
2
|
from enum import Enum
|
|
2
3
|
from typing import Any, Dict, List, Optional, Union
|
|
3
4
|
|
|
@@ -77,6 +78,17 @@ class AgnoError(Exception):
|
|
|
77
78
|
return str(self.message)
|
|
78
79
|
|
|
79
80
|
|
|
81
|
+
class ModelAuthenticationError(AgnoError):
|
|
82
|
+
"""Raised when model authentication fails."""
|
|
83
|
+
|
|
84
|
+
def __init__(self, message: str, status_code: int = 401, model_name: Optional[str] = None):
|
|
85
|
+
super().__init__(message, status_code)
|
|
86
|
+
self.model_name = model_name
|
|
87
|
+
|
|
88
|
+
self.type = "model_authentication_error"
|
|
89
|
+
self.error_id = "model_authentication_error"
|
|
90
|
+
|
|
91
|
+
|
|
80
92
|
class ModelProviderError(AgnoError):
|
|
81
93
|
"""Exception raised when a model provider returns an error."""
|
|
82
94
|
|
|
@@ -130,7 +142,10 @@ class InputCheckError(Exception):
|
|
|
130
142
|
):
|
|
131
143
|
super().__init__(message)
|
|
132
144
|
self.type = "input_check_error"
|
|
133
|
-
|
|
145
|
+
if isinstance(check_trigger, CheckTrigger):
|
|
146
|
+
self.error_id = check_trigger.value
|
|
147
|
+
else:
|
|
148
|
+
self.error_id = str(check_trigger)
|
|
134
149
|
|
|
135
150
|
self.message = message
|
|
136
151
|
self.check_trigger = check_trigger
|
|
@@ -148,8 +163,18 @@ class OutputCheckError(Exception):
|
|
|
148
163
|
):
|
|
149
164
|
super().__init__(message)
|
|
150
165
|
self.type = "output_check_error"
|
|
151
|
-
|
|
166
|
+
if isinstance(check_trigger, CheckTrigger):
|
|
167
|
+
self.error_id = check_trigger.value
|
|
168
|
+
else:
|
|
169
|
+
self.error_id = str(check_trigger)
|
|
152
170
|
|
|
153
171
|
self.message = message
|
|
154
172
|
self.check_trigger = check_trigger
|
|
155
173
|
self.additional_data = additional_data
|
|
174
|
+
|
|
175
|
+
|
|
176
|
+
@dataclass
|
|
177
|
+
class RetryableModelProviderError(Exception):
|
|
178
|
+
original_error: Optional[str] = None
|
|
179
|
+
# Guidance message to retry a model invocation after an error
|
|
180
|
+
retry_guidance_message: Optional[str] = None
|