agno 2.1.4__py3-none-any.whl → 2.1.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- agno/agent/agent.py +1767 -535
- agno/db/async_postgres/__init__.py +3 -0
- agno/db/async_postgres/async_postgres.py +1668 -0
- agno/db/async_postgres/schemas.py +124 -0
- agno/db/async_postgres/utils.py +289 -0
- agno/db/base.py +237 -2
- agno/db/dynamo/dynamo.py +2 -2
- agno/db/firestore/firestore.py +2 -2
- agno/db/firestore/utils.py +4 -2
- agno/db/gcs_json/gcs_json_db.py +2 -2
- agno/db/in_memory/in_memory_db.py +2 -2
- agno/db/json/json_db.py +2 -2
- agno/db/migrations/v1_to_v2.py +30 -13
- agno/db/mongo/mongo.py +18 -6
- agno/db/mysql/mysql.py +35 -13
- agno/db/postgres/postgres.py +29 -6
- agno/db/redis/redis.py +2 -2
- agno/db/singlestore/singlestore.py +2 -2
- agno/db/sqlite/sqlite.py +34 -12
- agno/db/sqlite/utils.py +8 -3
- agno/eval/accuracy.py +50 -43
- agno/eval/performance.py +6 -3
- agno/eval/reliability.py +6 -3
- agno/eval/utils.py +33 -16
- agno/exceptions.py +8 -2
- agno/knowledge/knowledge.py +260 -46
- agno/knowledge/reader/pdf_reader.py +4 -6
- agno/knowledge/reader/reader_factory.py +2 -3
- agno/memory/manager.py +241 -33
- agno/models/anthropic/claude.py +37 -0
- agno/os/app.py +8 -7
- agno/os/interfaces/a2a/router.py +3 -5
- agno/os/interfaces/agui/router.py +4 -1
- agno/os/interfaces/agui/utils.py +27 -6
- agno/os/interfaces/slack/router.py +2 -4
- agno/os/mcp.py +98 -41
- agno/os/router.py +23 -0
- agno/os/routers/evals/evals.py +52 -20
- agno/os/routers/evals/utils.py +14 -14
- agno/os/routers/knowledge/knowledge.py +130 -9
- agno/os/routers/knowledge/schemas.py +57 -0
- agno/os/routers/memory/memory.py +116 -44
- agno/os/routers/metrics/metrics.py +16 -6
- agno/os/routers/session/session.py +65 -22
- agno/os/schema.py +36 -0
- agno/os/utils.py +67 -12
- agno/reasoning/anthropic.py +80 -0
- agno/reasoning/gemini.py +73 -0
- agno/reasoning/openai.py +5 -0
- agno/reasoning/vertexai.py +76 -0
- agno/session/workflow.py +3 -3
- agno/team/team.py +918 -175
- agno/tools/googlesheets.py +20 -5
- agno/tools/mcp_toolbox.py +3 -3
- agno/tools/scrapegraph.py +1 -1
- agno/utils/models/claude.py +3 -1
- agno/utils/streamlit.py +1 -1
- agno/vectordb/base.py +22 -1
- agno/vectordb/cassandra/cassandra.py +9 -0
- agno/vectordb/chroma/chromadb.py +26 -6
- agno/vectordb/clickhouse/clickhousedb.py +9 -1
- agno/vectordb/couchbase/couchbase.py +11 -0
- agno/vectordb/lancedb/lance_db.py +20 -0
- agno/vectordb/langchaindb/langchaindb.py +11 -0
- agno/vectordb/lightrag/lightrag.py +9 -0
- agno/vectordb/llamaindex/llamaindexdb.py +15 -1
- agno/vectordb/milvus/milvus.py +23 -0
- agno/vectordb/mongodb/mongodb.py +22 -0
- agno/vectordb/pgvector/pgvector.py +19 -0
- agno/vectordb/pineconedb/pineconedb.py +35 -4
- agno/vectordb/qdrant/qdrant.py +24 -0
- agno/vectordb/singlestore/singlestore.py +25 -17
- agno/vectordb/surrealdb/surrealdb.py +18 -1
- agno/vectordb/upstashdb/upstashdb.py +26 -1
- agno/vectordb/weaviate/weaviate.py +18 -0
- agno/workflow/condition.py +4 -0
- agno/workflow/loop.py +4 -0
- agno/workflow/parallel.py +4 -0
- agno/workflow/router.py +4 -0
- agno/workflow/step.py +22 -14
- agno/workflow/steps.py +4 -0
- agno/workflow/types.py +2 -2
- agno/workflow/workflow.py +328 -61
- {agno-2.1.4.dist-info → agno-2.1.5.dist-info}/METADATA +100 -41
- {agno-2.1.4.dist-info → agno-2.1.5.dist-info}/RECORD +88 -81
- {agno-2.1.4.dist-info → agno-2.1.5.dist-info}/WHEEL +0 -0
- {agno-2.1.4.dist-info → agno-2.1.5.dist-info}/licenses/LICENSE +0 -0
- {agno-2.1.4.dist-info → agno-2.1.5.dist-info}/top_level.txt +0 -0
agno/db/sqlite/sqlite.py
CHANGED
|
@@ -36,9 +36,9 @@ except ImportError:
|
|
|
36
36
|
class SqliteDb(BaseDb):
|
|
37
37
|
def __init__(
|
|
38
38
|
self,
|
|
39
|
+
db_file: Optional[str] = None,
|
|
39
40
|
db_engine: Optional[Engine] = None,
|
|
40
41
|
db_url: Optional[str] = None,
|
|
41
|
-
db_file: Optional[str] = None,
|
|
42
42
|
session_table: Optional[str] = None,
|
|
43
43
|
memory_table: Optional[str] = None,
|
|
44
44
|
metrics_table: Optional[str] = None,
|
|
@@ -56,9 +56,9 @@ class SqliteDb(BaseDb):
|
|
|
56
56
|
4. Create a new database in the current directory
|
|
57
57
|
|
|
58
58
|
Args:
|
|
59
|
+
db_file (Optional[str]): The database file to connect to.
|
|
59
60
|
db_engine (Optional[Engine]): The SQLAlchemy database engine to use.
|
|
60
61
|
db_url (Optional[str]): The database URL to connect to.
|
|
61
|
-
db_file (Optional[str]): The database file to connect to.
|
|
62
62
|
session_table (Optional[str]): Name of the table to store Agent, Team and Workflow sessions.
|
|
63
63
|
memory_table (Optional[str]): Name of the table to store user memories.
|
|
64
64
|
metrics_table (Optional[str]): Name of the table to store metrics.
|
|
@@ -664,7 +664,7 @@ class SqliteDb(BaseDb):
|
|
|
664
664
|
raise e
|
|
665
665
|
|
|
666
666
|
def upsert_sessions(
|
|
667
|
-
self, sessions: List[Session], deserialize: Optional[bool] = True
|
|
667
|
+
self, sessions: List[Session], deserialize: Optional[bool] = True, preserve_updated_at: bool = False
|
|
668
668
|
) -> List[Union[Session, Dict[str, Any]]]:
|
|
669
669
|
"""
|
|
670
670
|
Bulk upsert multiple sessions for improved performance on large datasets.
|
|
@@ -672,6 +672,7 @@ class SqliteDb(BaseDb):
|
|
|
672
672
|
Args:
|
|
673
673
|
sessions (List[Session]): List of sessions to upsert.
|
|
674
674
|
deserialize (Optional[bool]): Whether to deserialize the sessions. Defaults to True.
|
|
675
|
+
preserve_updated_at (bool): If True, preserve the updated_at from the session object.
|
|
675
676
|
|
|
676
677
|
Returns:
|
|
677
678
|
List[Union[Session, Dict[str, Any]]]: List of upserted sessions.
|
|
@@ -715,6 +716,12 @@ class SqliteDb(BaseDb):
|
|
|
715
716
|
agent_data = []
|
|
716
717
|
for session in agent_sessions:
|
|
717
718
|
serialized_session = serialize_session_json_fields(session.to_dict())
|
|
719
|
+
# Use preserved updated_at if flag is set and value exists, otherwise use current time
|
|
720
|
+
updated_at = (
|
|
721
|
+
serialized_session.get("updated_at")
|
|
722
|
+
if preserve_updated_at and serialized_session.get("updated_at")
|
|
723
|
+
else int(time.time())
|
|
724
|
+
)
|
|
718
725
|
agent_data.append(
|
|
719
726
|
{
|
|
720
727
|
"session_id": serialized_session.get("session_id"),
|
|
@@ -727,7 +734,7 @@ class SqliteDb(BaseDb):
|
|
|
727
734
|
"runs": serialized_session.get("runs"),
|
|
728
735
|
"summary": serialized_session.get("summary"),
|
|
729
736
|
"created_at": serialized_session.get("created_at"),
|
|
730
|
-
"updated_at":
|
|
737
|
+
"updated_at": updated_at,
|
|
731
738
|
}
|
|
732
739
|
)
|
|
733
740
|
|
|
@@ -743,7 +750,7 @@ class SqliteDb(BaseDb):
|
|
|
743
750
|
metadata=stmt.excluded.metadata,
|
|
744
751
|
runs=stmt.excluded.runs,
|
|
745
752
|
summary=stmt.excluded.summary,
|
|
746
|
-
updated_at=
|
|
753
|
+
updated_at=stmt.excluded.updated_at,
|
|
747
754
|
),
|
|
748
755
|
)
|
|
749
756
|
sess.execute(stmt, agent_data)
|
|
@@ -768,6 +775,12 @@ class SqliteDb(BaseDb):
|
|
|
768
775
|
team_data = []
|
|
769
776
|
for session in team_sessions:
|
|
770
777
|
serialized_session = serialize_session_json_fields(session.to_dict())
|
|
778
|
+
# Use preserved updated_at if flag is set and value exists, otherwise use current time
|
|
779
|
+
updated_at = (
|
|
780
|
+
serialized_session.get("updated_at")
|
|
781
|
+
if preserve_updated_at and serialized_session.get("updated_at")
|
|
782
|
+
else int(time.time())
|
|
783
|
+
)
|
|
771
784
|
team_data.append(
|
|
772
785
|
{
|
|
773
786
|
"session_id": serialized_session.get("session_id"),
|
|
@@ -777,7 +790,7 @@ class SqliteDb(BaseDb):
|
|
|
777
790
|
"runs": serialized_session.get("runs"),
|
|
778
791
|
"summary": serialized_session.get("summary"),
|
|
779
792
|
"created_at": serialized_session.get("created_at"),
|
|
780
|
-
"updated_at":
|
|
793
|
+
"updated_at": updated_at,
|
|
781
794
|
"team_data": serialized_session.get("team_data"),
|
|
782
795
|
"session_data": serialized_session.get("session_data"),
|
|
783
796
|
"metadata": serialized_session.get("metadata"),
|
|
@@ -796,7 +809,7 @@ class SqliteDb(BaseDb):
|
|
|
796
809
|
metadata=stmt.excluded.metadata,
|
|
797
810
|
runs=stmt.excluded.runs,
|
|
798
811
|
summary=stmt.excluded.summary,
|
|
799
|
-
updated_at=
|
|
812
|
+
updated_at=stmt.excluded.updated_at,
|
|
800
813
|
),
|
|
801
814
|
)
|
|
802
815
|
sess.execute(stmt, team_data)
|
|
@@ -821,6 +834,12 @@ class SqliteDb(BaseDb):
|
|
|
821
834
|
workflow_data = []
|
|
822
835
|
for session in workflow_sessions:
|
|
823
836
|
serialized_session = serialize_session_json_fields(session.to_dict())
|
|
837
|
+
# Use preserved updated_at if flag is set and value exists, otherwise use current time
|
|
838
|
+
updated_at = (
|
|
839
|
+
serialized_session.get("updated_at")
|
|
840
|
+
if preserve_updated_at and serialized_session.get("updated_at")
|
|
841
|
+
else int(time.time())
|
|
842
|
+
)
|
|
824
843
|
workflow_data.append(
|
|
825
844
|
{
|
|
826
845
|
"session_id": serialized_session.get("session_id"),
|
|
@@ -830,7 +849,7 @@ class SqliteDb(BaseDb):
|
|
|
830
849
|
"runs": serialized_session.get("runs"),
|
|
831
850
|
"summary": serialized_session.get("summary"),
|
|
832
851
|
"created_at": serialized_session.get("created_at"),
|
|
833
|
-
"updated_at":
|
|
852
|
+
"updated_at": updated_at,
|
|
834
853
|
"workflow_data": serialized_session.get("workflow_data"),
|
|
835
854
|
"session_data": serialized_session.get("session_data"),
|
|
836
855
|
"metadata": serialized_session.get("metadata"),
|
|
@@ -849,7 +868,7 @@ class SqliteDb(BaseDb):
|
|
|
849
868
|
metadata=stmt.excluded.metadata,
|
|
850
869
|
runs=stmt.excluded.runs,
|
|
851
870
|
summary=stmt.excluded.summary,
|
|
852
|
-
updated_at=
|
|
871
|
+
updated_at=stmt.excluded.updated_at,
|
|
853
872
|
),
|
|
854
873
|
)
|
|
855
874
|
sess.execute(stmt, workflow_data)
|
|
@@ -1224,7 +1243,7 @@ class SqliteDb(BaseDb):
|
|
|
1224
1243
|
raise e
|
|
1225
1244
|
|
|
1226
1245
|
def upsert_memories(
|
|
1227
|
-
self, memories: List[UserMemory], deserialize: Optional[bool] = True
|
|
1246
|
+
self, memories: List[UserMemory], deserialize: Optional[bool] = True, preserve_updated_at: bool = False
|
|
1228
1247
|
) -> List[Union[UserMemory, Dict[str, Any]]]:
|
|
1229
1248
|
"""
|
|
1230
1249
|
Bulk upsert multiple user memories for improved performance on large datasets.
|
|
@@ -1255,10 +1274,13 @@ class SqliteDb(BaseDb):
|
|
|
1255
1274
|
]
|
|
1256
1275
|
# Prepare bulk data
|
|
1257
1276
|
bulk_data = []
|
|
1277
|
+
current_time = int(time.time())
|
|
1258
1278
|
for memory in memories:
|
|
1259
1279
|
if memory.memory_id is None:
|
|
1260
1280
|
memory.memory_id = str(uuid4())
|
|
1261
1281
|
|
|
1282
|
+
# Use preserved updated_at if flag is set and value exists, otherwise use current time
|
|
1283
|
+
updated_at = memory.updated_at if preserve_updated_at and memory.updated_at else current_time
|
|
1262
1284
|
bulk_data.append(
|
|
1263
1285
|
{
|
|
1264
1286
|
"user_id": memory.user_id,
|
|
@@ -1267,7 +1289,7 @@ class SqliteDb(BaseDb):
|
|
|
1267
1289
|
"memory_id": memory.memory_id,
|
|
1268
1290
|
"memory": memory.memory,
|
|
1269
1291
|
"topics": memory.topics,
|
|
1270
|
-
"updated_at":
|
|
1292
|
+
"updated_at": updated_at,
|
|
1271
1293
|
}
|
|
1272
1294
|
)
|
|
1273
1295
|
|
|
@@ -1284,7 +1306,7 @@ class SqliteDb(BaseDb):
|
|
|
1284
1306
|
input=stmt.excluded.input,
|
|
1285
1307
|
agent_id=stmt.excluded.agent_id,
|
|
1286
1308
|
team_id=stmt.excluded.team_id,
|
|
1287
|
-
updated_at=
|
|
1309
|
+
updated_at=stmt.excluded.updated_at,
|
|
1288
1310
|
),
|
|
1289
1311
|
)
|
|
1290
1312
|
sess.execute(stmt, bulk_data)
|
agno/db/sqlite/utils.py
CHANGED
|
@@ -179,9 +179,11 @@ def calculate_date_metrics(date_to_process: date, sessions_data: dict) -> dict:
|
|
|
179
179
|
for session in sessions:
|
|
180
180
|
if session.get("user_id"):
|
|
181
181
|
all_user_ids.add(session["user_id"])
|
|
182
|
-
|
|
182
|
+
|
|
183
|
+
# Parse runs from JSON string
|
|
183
184
|
if runs := session.get("runs", []):
|
|
184
|
-
runs = json.loads(runs)
|
|
185
|
+
runs = json.loads(runs) if isinstance(runs, str) else runs
|
|
186
|
+
metrics[runs_count_key] += len(runs)
|
|
185
187
|
for run in runs:
|
|
186
188
|
if model_id := run.get("model"):
|
|
187
189
|
model_provider = run.get("model_provider", "")
|
|
@@ -189,7 +191,10 @@ def calculate_date_metrics(date_to_process: date, sessions_data: dict) -> dict:
|
|
|
189
191
|
model_counts.get(f"{model_id}:{model_provider}", 0) + 1
|
|
190
192
|
)
|
|
191
193
|
|
|
192
|
-
|
|
194
|
+
# Parse session_data from JSON string
|
|
195
|
+
session_data = session.get("session_data", {})
|
|
196
|
+
if isinstance(session_data, str):
|
|
197
|
+
session_data = json.loads(session_data)
|
|
193
198
|
session_metrics = session_data.get("session_metrics", {})
|
|
194
199
|
for field in token_metrics:
|
|
195
200
|
token_metrics[field] += session_metrics.get(field, 0)
|
agno/eval/accuracy.py
CHANGED
|
@@ -7,13 +7,13 @@ from uuid import uuid4
|
|
|
7
7
|
from pydantic import BaseModel, Field
|
|
8
8
|
|
|
9
9
|
from agno.agent import Agent
|
|
10
|
-
from agno.db.base import BaseDb
|
|
10
|
+
from agno.db.base import AsyncBaseDb, BaseDb
|
|
11
11
|
from agno.db.schemas.evals import EvalType
|
|
12
12
|
from agno.eval.utils import async_log_eval, log_eval_run, store_result_in_file
|
|
13
13
|
from agno.exceptions import EvalError
|
|
14
14
|
from agno.models.base import Model
|
|
15
15
|
from agno.team.team import Team
|
|
16
|
-
from agno.utils.log import logger, set_log_level_to_debug, set_log_level_to_info
|
|
16
|
+
from agno.utils.log import log_error, logger, set_log_level_to_debug, set_log_level_to_info
|
|
17
17
|
|
|
18
18
|
if TYPE_CHECKING:
|
|
19
19
|
from rich.console import Console
|
|
@@ -176,7 +176,7 @@ class AccuracyEval:
|
|
|
176
176
|
# Enable debug logs
|
|
177
177
|
debug_mode: bool = getenv("AGNO_DEBUG", "false").lower() == "true"
|
|
178
178
|
# The database to store Evaluation results
|
|
179
|
-
db: Optional[BaseDb] = None
|
|
179
|
+
db: Optional[Union[BaseDb, AsyncBaseDb]] = None
|
|
180
180
|
|
|
181
181
|
# Telemetry settings
|
|
182
182
|
# telemetry=True logs minimal telemetry for analytics
|
|
@@ -327,6 +327,9 @@ Remember: You must only compare the agent_output to the expected_output. The exp
|
|
|
327
327
|
print_summary: bool = True,
|
|
328
328
|
print_results: bool = True,
|
|
329
329
|
) -> Optional[AccuracyResult]:
|
|
330
|
+
if isinstance(self.db, AsyncBaseDb):
|
|
331
|
+
raise ValueError("run() is not supported with an async DB. Please use arun() instead.")
|
|
332
|
+
|
|
330
333
|
if self.agent is None and self.team is None:
|
|
331
334
|
logger.error("You need to provide one of 'agent' or 'team' to run the evaluation.")
|
|
332
335
|
return None
|
|
@@ -661,47 +664,51 @@ Remember: You must only compare the agent_output to the expected_output. The exp
|
|
|
661
664
|
)
|
|
662
665
|
# Log results to the Agno DB if requested
|
|
663
666
|
if self.db:
|
|
664
|
-
if self.
|
|
665
|
-
|
|
666
|
-
team_id = None
|
|
667
|
-
model_id = self.agent.model.id if self.agent.model is not None else None
|
|
668
|
-
model_provider = self.agent.model.provider if self.agent.model is not None else None
|
|
669
|
-
evaluated_component_name = self.agent.name
|
|
670
|
-
elif self.team is not None:
|
|
671
|
-
agent_id = None
|
|
672
|
-
team_id = self.team.id
|
|
673
|
-
model_id = self.team.model.id if self.team.model is not None else None
|
|
674
|
-
model_provider = self.team.model.provider if self.team.model is not None else None
|
|
675
|
-
evaluated_component_name = self.team.name
|
|
676
|
-
else:
|
|
677
|
-
agent_id = None
|
|
678
|
-
team_id = None
|
|
679
|
-
model_id = None
|
|
680
|
-
model_provider = None
|
|
681
|
-
evaluated_component_name = None
|
|
667
|
+
if isinstance(self.db, AsyncBaseDb):
|
|
668
|
+
log_error("You are using an async DB in a non-async method. The evaluation won't be stored in the DB.")
|
|
682
669
|
|
|
683
|
-
|
|
684
|
-
|
|
685
|
-
|
|
686
|
-
|
|
687
|
-
|
|
688
|
-
|
|
689
|
-
|
|
690
|
-
|
|
691
|
-
|
|
692
|
-
|
|
693
|
-
|
|
694
|
-
|
|
695
|
-
|
|
696
|
-
|
|
697
|
-
|
|
698
|
-
|
|
699
|
-
|
|
700
|
-
|
|
701
|
-
|
|
702
|
-
|
|
703
|
-
|
|
704
|
-
|
|
670
|
+
else:
|
|
671
|
+
if self.agent is not None:
|
|
672
|
+
agent_id = self.agent.id
|
|
673
|
+
team_id = None
|
|
674
|
+
model_id = self.agent.model.id if self.agent.model is not None else None
|
|
675
|
+
model_provider = self.agent.model.provider if self.agent.model is not None else None
|
|
676
|
+
evaluated_component_name = self.agent.name
|
|
677
|
+
elif self.team is not None:
|
|
678
|
+
agent_id = None
|
|
679
|
+
team_id = self.team.id
|
|
680
|
+
model_id = self.team.model.id if self.team.model is not None else None
|
|
681
|
+
model_provider = self.team.model.provider if self.team.model is not None else None
|
|
682
|
+
evaluated_component_name = self.team.name
|
|
683
|
+
else:
|
|
684
|
+
agent_id = None
|
|
685
|
+
team_id = None
|
|
686
|
+
model_id = None
|
|
687
|
+
model_provider = None
|
|
688
|
+
evaluated_component_name = None
|
|
689
|
+
|
|
690
|
+
log_eval_input = {
|
|
691
|
+
"additional_guidelines": self.additional_guidelines,
|
|
692
|
+
"additional_context": self.additional_context,
|
|
693
|
+
"num_iterations": self.num_iterations,
|
|
694
|
+
"expected_output": self.expected_output,
|
|
695
|
+
"input": self.input,
|
|
696
|
+
}
|
|
697
|
+
|
|
698
|
+
log_eval_run(
|
|
699
|
+
db=self.db,
|
|
700
|
+
run_id=self.eval_id, # type: ignore
|
|
701
|
+
run_data=asdict(self.result),
|
|
702
|
+
eval_type=EvalType.ACCURACY,
|
|
703
|
+
name=self.name if self.name is not None else None,
|
|
704
|
+
agent_id=agent_id,
|
|
705
|
+
team_id=team_id,
|
|
706
|
+
model_id=model_id,
|
|
707
|
+
model_provider=model_provider,
|
|
708
|
+
evaluated_component_name=evaluated_component_name,
|
|
709
|
+
workflow_id=None,
|
|
710
|
+
eval_input=log_eval_input,
|
|
711
|
+
)
|
|
705
712
|
|
|
706
713
|
if self.telemetry:
|
|
707
714
|
from agno.api.evals import EvalRunCreate, create_eval_run_telemetry
|
agno/eval/performance.py
CHANGED
|
@@ -3,10 +3,10 @@ import gc
|
|
|
3
3
|
import tracemalloc
|
|
4
4
|
from dataclasses import dataclass, field
|
|
5
5
|
from os import getenv
|
|
6
|
-
from typing import TYPE_CHECKING, Any, Callable, Dict, List, Optional
|
|
6
|
+
from typing import TYPE_CHECKING, Any, Callable, Dict, List, Optional, Union
|
|
7
7
|
from uuid import uuid4
|
|
8
8
|
|
|
9
|
-
from agno.db.base import BaseDb
|
|
9
|
+
from agno.db.base import AsyncBaseDb, BaseDb
|
|
10
10
|
from agno.db.schemas.evals import EvalType
|
|
11
11
|
from agno.eval.utils import async_log_eval, log_eval_run, store_result_in_file
|
|
12
12
|
from agno.utils.log import log_debug, set_log_level_to_debug, set_log_level_to_info
|
|
@@ -222,7 +222,7 @@ class PerformanceEval:
|
|
|
222
222
|
# Enable debug logs
|
|
223
223
|
debug_mode: bool = getenv("AGNO_DEBUG", "false").lower() == "true"
|
|
224
224
|
# The database to store Evaluation results
|
|
225
|
-
db: Optional[BaseDb] = None
|
|
225
|
+
db: Optional[Union[BaseDb, AsyncBaseDb]] = None
|
|
226
226
|
|
|
227
227
|
# Telemetry settings
|
|
228
228
|
# telemetry=True logs minimal telemetry for analytics
|
|
@@ -491,6 +491,9 @@ class PerformanceEval:
|
|
|
491
491
|
6. Print results as requested
|
|
492
492
|
7. Log results to the Agno platform if requested
|
|
493
493
|
"""
|
|
494
|
+
if isinstance(self.db, AsyncBaseDb):
|
|
495
|
+
raise ValueError("run() is not supported with an async DB. Please use arun() instead.")
|
|
496
|
+
|
|
494
497
|
from rich.console import Console
|
|
495
498
|
from rich.live import Live
|
|
496
499
|
from rich.status import Status
|
agno/eval/reliability.py
CHANGED
|
@@ -1,9 +1,9 @@
|
|
|
1
1
|
from dataclasses import asdict, dataclass, field
|
|
2
2
|
from os import getenv
|
|
3
|
-
from typing import TYPE_CHECKING, Any, Dict, List, Optional
|
|
3
|
+
from typing import TYPE_CHECKING, Any, Dict, List, Optional, Union
|
|
4
4
|
from uuid import uuid4
|
|
5
5
|
|
|
6
|
-
from agno.db.base import BaseDb
|
|
6
|
+
from agno.db.base import AsyncBaseDb, BaseDb
|
|
7
7
|
|
|
8
8
|
if TYPE_CHECKING:
|
|
9
9
|
from rich.console import Console
|
|
@@ -63,7 +63,7 @@ class ReliabilityEval:
|
|
|
63
63
|
# Enable debug logs
|
|
64
64
|
debug_mode: bool = getenv("AGNO_DEBUG", "false").lower() == "true"
|
|
65
65
|
# The database to store Evaluation results
|
|
66
|
-
db: Optional[BaseDb] = None
|
|
66
|
+
db: Optional[Union[BaseDb, AsyncBaseDb]] = None
|
|
67
67
|
|
|
68
68
|
# Telemetry settings
|
|
69
69
|
# telemetry=True logs minimal telemetry for analytics
|
|
@@ -71,6 +71,9 @@ class ReliabilityEval:
|
|
|
71
71
|
telemetry: bool = True
|
|
72
72
|
|
|
73
73
|
def run(self, *, print_results: bool = False) -> Optional[ReliabilityResult]:
|
|
74
|
+
if isinstance(self.db, AsyncBaseDb):
|
|
75
|
+
raise ValueError("run() is not supported with an async DB. Please use arun() instead.")
|
|
76
|
+
|
|
74
77
|
if self.agent_response is None and self.team_response is None:
|
|
75
78
|
raise ValueError("You need to provide 'agent_response' or 'team_response' to run the evaluation.")
|
|
76
79
|
|
agno/eval/utils.py
CHANGED
|
@@ -2,7 +2,7 @@ from dataclasses import asdict
|
|
|
2
2
|
from pathlib import Path
|
|
3
3
|
from typing import TYPE_CHECKING, Optional, Union
|
|
4
4
|
|
|
5
|
-
from agno.db.base import BaseDb
|
|
5
|
+
from agno.db.base import AsyncBaseDb, BaseDb
|
|
6
6
|
from agno.db.schemas.evals import EvalRunRecord, EvalType
|
|
7
7
|
from agno.utils.log import log_debug, logger
|
|
8
8
|
|
|
@@ -49,7 +49,7 @@ def log_eval_run(
|
|
|
49
49
|
|
|
50
50
|
|
|
51
51
|
async def async_log_eval(
|
|
52
|
-
db: BaseDb,
|
|
52
|
+
db: Union[BaseDb, AsyncBaseDb],
|
|
53
53
|
run_id: str,
|
|
54
54
|
run_data: dict,
|
|
55
55
|
eval_type: EvalType,
|
|
@@ -65,21 +65,38 @@ async def async_log_eval(
|
|
|
65
65
|
"""Call the API to create an evaluation run."""
|
|
66
66
|
|
|
67
67
|
try:
|
|
68
|
-
db
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
68
|
+
if isinstance(db, AsyncBaseDb):
|
|
69
|
+
await db.create_eval_run(
|
|
70
|
+
EvalRunRecord(
|
|
71
|
+
run_id=run_id,
|
|
72
|
+
eval_type=eval_type,
|
|
73
|
+
eval_data=run_data,
|
|
74
|
+
eval_input=eval_input,
|
|
75
|
+
agent_id=agent_id,
|
|
76
|
+
model_id=model_id,
|
|
77
|
+
model_provider=model_provider,
|
|
78
|
+
name=name,
|
|
79
|
+
evaluated_component_name=evaluated_component_name,
|
|
80
|
+
team_id=team_id,
|
|
81
|
+
workflow_id=workflow_id,
|
|
82
|
+
)
|
|
83
|
+
)
|
|
84
|
+
else:
|
|
85
|
+
db.create_eval_run(
|
|
86
|
+
EvalRunRecord(
|
|
87
|
+
run_id=run_id,
|
|
88
|
+
eval_type=eval_type,
|
|
89
|
+
eval_data=run_data,
|
|
90
|
+
eval_input=eval_input,
|
|
91
|
+
agent_id=agent_id,
|
|
92
|
+
model_id=model_id,
|
|
93
|
+
model_provider=model_provider,
|
|
94
|
+
name=name,
|
|
95
|
+
evaluated_component_name=evaluated_component_name,
|
|
96
|
+
team_id=team_id,
|
|
97
|
+
workflow_id=workflow_id,
|
|
98
|
+
)
|
|
81
99
|
)
|
|
82
|
-
)
|
|
83
100
|
except Exception as e:
|
|
84
101
|
log_debug(f"Could not create agent event: {e}")
|
|
85
102
|
|
agno/exceptions.py
CHANGED
|
@@ -130,7 +130,10 @@ class InputCheckError(Exception):
|
|
|
130
130
|
):
|
|
131
131
|
super().__init__(message)
|
|
132
132
|
self.type = "input_check_error"
|
|
133
|
-
|
|
133
|
+
if isinstance(check_trigger, CheckTrigger):
|
|
134
|
+
self.error_id = check_trigger.value
|
|
135
|
+
else:
|
|
136
|
+
self.error_id = str(check_trigger)
|
|
134
137
|
|
|
135
138
|
self.message = message
|
|
136
139
|
self.check_trigger = check_trigger
|
|
@@ -148,7 +151,10 @@ class OutputCheckError(Exception):
|
|
|
148
151
|
):
|
|
149
152
|
super().__init__(message)
|
|
150
153
|
self.type = "output_check_error"
|
|
151
|
-
|
|
154
|
+
if isinstance(check_trigger, CheckTrigger):
|
|
155
|
+
self.error_id = check_trigger.value
|
|
156
|
+
else:
|
|
157
|
+
self.error_id = str(check_trigger)
|
|
152
158
|
|
|
153
159
|
self.message = message
|
|
154
160
|
self.check_trigger = check_trigger
|