agno 0.1.2__py3-none-any.whl → 2.3.13__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- agno/__init__.py +8 -0
- agno/agent/__init__.py +44 -5
- agno/agent/agent.py +10531 -2975
- agno/api/agent.py +14 -53
- agno/api/api.py +7 -46
- agno/api/evals.py +22 -0
- agno/api/os.py +17 -0
- agno/api/routes.py +6 -25
- agno/api/schemas/__init__.py +9 -0
- agno/api/schemas/agent.py +6 -9
- agno/api/schemas/evals.py +16 -0
- agno/api/schemas/os.py +14 -0
- agno/api/schemas/team.py +10 -10
- agno/api/schemas/utils.py +21 -0
- agno/api/schemas/workflows.py +16 -0
- agno/api/settings.py +53 -0
- agno/api/team.py +22 -26
- agno/api/workflow.py +28 -0
- agno/cloud/aws/base.py +214 -0
- agno/cloud/aws/s3/__init__.py +2 -0
- agno/cloud/aws/s3/api_client.py +43 -0
- agno/cloud/aws/s3/bucket.py +195 -0
- agno/cloud/aws/s3/object.py +57 -0
- agno/compression/__init__.py +3 -0
- agno/compression/manager.py +247 -0
- agno/culture/__init__.py +3 -0
- agno/culture/manager.py +956 -0
- agno/db/__init__.py +24 -0
- agno/db/async_postgres/__init__.py +3 -0
- agno/db/base.py +946 -0
- agno/db/dynamo/__init__.py +3 -0
- agno/db/dynamo/dynamo.py +2781 -0
- agno/db/dynamo/schemas.py +442 -0
- agno/db/dynamo/utils.py +743 -0
- agno/db/firestore/__init__.py +3 -0
- agno/db/firestore/firestore.py +2379 -0
- agno/db/firestore/schemas.py +181 -0
- agno/db/firestore/utils.py +376 -0
- agno/db/gcs_json/__init__.py +3 -0
- agno/db/gcs_json/gcs_json_db.py +1791 -0
- agno/db/gcs_json/utils.py +228 -0
- agno/db/in_memory/__init__.py +3 -0
- agno/db/in_memory/in_memory_db.py +1312 -0
- agno/db/in_memory/utils.py +230 -0
- agno/db/json/__init__.py +3 -0
- agno/db/json/json_db.py +1777 -0
- agno/db/json/utils.py +230 -0
- agno/db/migrations/manager.py +199 -0
- agno/db/migrations/v1_to_v2.py +635 -0
- agno/db/migrations/versions/v2_3_0.py +938 -0
- agno/db/mongo/__init__.py +17 -0
- agno/db/mongo/async_mongo.py +2760 -0
- agno/db/mongo/mongo.py +2597 -0
- agno/db/mongo/schemas.py +119 -0
- agno/db/mongo/utils.py +276 -0
- agno/db/mysql/__init__.py +4 -0
- agno/db/mysql/async_mysql.py +2912 -0
- agno/db/mysql/mysql.py +2923 -0
- agno/db/mysql/schemas.py +186 -0
- agno/db/mysql/utils.py +488 -0
- agno/db/postgres/__init__.py +4 -0
- agno/db/postgres/async_postgres.py +2579 -0
- agno/db/postgres/postgres.py +2870 -0
- agno/db/postgres/schemas.py +187 -0
- agno/db/postgres/utils.py +442 -0
- agno/db/redis/__init__.py +3 -0
- agno/db/redis/redis.py +2141 -0
- agno/db/redis/schemas.py +159 -0
- agno/db/redis/utils.py +346 -0
- agno/db/schemas/__init__.py +4 -0
- agno/db/schemas/culture.py +120 -0
- agno/db/schemas/evals.py +34 -0
- agno/db/schemas/knowledge.py +40 -0
- agno/db/schemas/memory.py +61 -0
- agno/db/singlestore/__init__.py +3 -0
- agno/db/singlestore/schemas.py +179 -0
- agno/db/singlestore/singlestore.py +2877 -0
- agno/db/singlestore/utils.py +384 -0
- agno/db/sqlite/__init__.py +4 -0
- agno/db/sqlite/async_sqlite.py +2911 -0
- agno/db/sqlite/schemas.py +181 -0
- agno/db/sqlite/sqlite.py +2908 -0
- agno/db/sqlite/utils.py +429 -0
- agno/db/surrealdb/__init__.py +3 -0
- agno/db/surrealdb/metrics.py +292 -0
- agno/db/surrealdb/models.py +334 -0
- agno/db/surrealdb/queries.py +71 -0
- agno/db/surrealdb/surrealdb.py +1908 -0
- agno/db/surrealdb/utils.py +147 -0
- agno/db/utils.py +118 -0
- agno/eval/__init__.py +24 -0
- agno/eval/accuracy.py +666 -276
- agno/eval/agent_as_judge.py +861 -0
- agno/eval/base.py +29 -0
- agno/eval/performance.py +779 -0
- agno/eval/reliability.py +241 -62
- agno/eval/utils.py +120 -0
- agno/exceptions.py +143 -1
- agno/filters.py +354 -0
- agno/guardrails/__init__.py +6 -0
- agno/guardrails/base.py +19 -0
- agno/guardrails/openai.py +144 -0
- agno/guardrails/pii.py +94 -0
- agno/guardrails/prompt_injection.py +52 -0
- agno/hooks/__init__.py +3 -0
- agno/hooks/decorator.py +164 -0
- agno/integrations/discord/__init__.py +3 -0
- agno/integrations/discord/client.py +203 -0
- agno/knowledge/__init__.py +5 -1
- agno/{document → knowledge}/chunking/agentic.py +22 -14
- agno/{document → knowledge}/chunking/document.py +2 -2
- agno/{document → knowledge}/chunking/fixed.py +7 -6
- agno/knowledge/chunking/markdown.py +151 -0
- agno/{document → knowledge}/chunking/recursive.py +15 -3
- agno/knowledge/chunking/row.py +39 -0
- agno/knowledge/chunking/semantic.py +91 -0
- agno/knowledge/chunking/strategy.py +165 -0
- agno/knowledge/content.py +74 -0
- agno/knowledge/document/__init__.py +5 -0
- agno/{document → knowledge/document}/base.py +12 -2
- agno/knowledge/embedder/__init__.py +5 -0
- agno/knowledge/embedder/aws_bedrock.py +343 -0
- agno/knowledge/embedder/azure_openai.py +210 -0
- agno/{embedder → knowledge/embedder}/base.py +8 -0
- agno/knowledge/embedder/cohere.py +323 -0
- agno/knowledge/embedder/fastembed.py +62 -0
- agno/{embedder → knowledge/embedder}/fireworks.py +1 -1
- agno/knowledge/embedder/google.py +258 -0
- agno/knowledge/embedder/huggingface.py +94 -0
- agno/knowledge/embedder/jina.py +182 -0
- agno/knowledge/embedder/langdb.py +22 -0
- agno/knowledge/embedder/mistral.py +206 -0
- agno/knowledge/embedder/nebius.py +13 -0
- agno/knowledge/embedder/ollama.py +154 -0
- agno/knowledge/embedder/openai.py +195 -0
- agno/knowledge/embedder/sentence_transformer.py +63 -0
- agno/{embedder → knowledge/embedder}/together.py +1 -1
- agno/knowledge/embedder/vllm.py +262 -0
- agno/knowledge/embedder/voyageai.py +165 -0
- agno/knowledge/knowledge.py +3006 -0
- agno/knowledge/reader/__init__.py +7 -0
- agno/knowledge/reader/arxiv_reader.py +81 -0
- agno/knowledge/reader/base.py +95 -0
- agno/knowledge/reader/csv_reader.py +164 -0
- agno/knowledge/reader/docx_reader.py +82 -0
- agno/knowledge/reader/field_labeled_csv_reader.py +290 -0
- agno/knowledge/reader/firecrawl_reader.py +201 -0
- agno/knowledge/reader/json_reader.py +88 -0
- agno/knowledge/reader/markdown_reader.py +137 -0
- agno/knowledge/reader/pdf_reader.py +431 -0
- agno/knowledge/reader/pptx_reader.py +101 -0
- agno/knowledge/reader/reader_factory.py +313 -0
- agno/knowledge/reader/s3_reader.py +89 -0
- agno/knowledge/reader/tavily_reader.py +193 -0
- agno/knowledge/reader/text_reader.py +127 -0
- agno/knowledge/reader/web_search_reader.py +325 -0
- agno/knowledge/reader/website_reader.py +455 -0
- agno/knowledge/reader/wikipedia_reader.py +91 -0
- agno/knowledge/reader/youtube_reader.py +78 -0
- agno/knowledge/remote_content/remote_content.py +88 -0
- agno/knowledge/reranker/__init__.py +3 -0
- agno/{reranker → knowledge/reranker}/base.py +1 -1
- agno/{reranker → knowledge/reranker}/cohere.py +2 -2
- agno/knowledge/reranker/infinity.py +195 -0
- agno/knowledge/reranker/sentence_transformer.py +54 -0
- agno/knowledge/types.py +39 -0
- agno/knowledge/utils.py +234 -0
- agno/media.py +439 -95
- agno/memory/__init__.py +16 -3
- agno/memory/manager.py +1474 -123
- agno/memory/strategies/__init__.py +15 -0
- agno/memory/strategies/base.py +66 -0
- agno/memory/strategies/summarize.py +196 -0
- agno/memory/strategies/types.py +37 -0
- agno/models/aimlapi/__init__.py +5 -0
- agno/models/aimlapi/aimlapi.py +62 -0
- agno/models/anthropic/__init__.py +4 -0
- agno/models/anthropic/claude.py +960 -496
- agno/models/aws/__init__.py +15 -0
- agno/models/aws/bedrock.py +686 -451
- agno/models/aws/claude.py +190 -183
- agno/models/azure/__init__.py +18 -1
- agno/models/azure/ai_foundry.py +489 -0
- agno/models/azure/openai_chat.py +89 -40
- agno/models/base.py +2477 -550
- agno/models/cerebras/__init__.py +12 -0
- agno/models/cerebras/cerebras.py +565 -0
- agno/models/cerebras/cerebras_openai.py +131 -0
- agno/models/cohere/__init__.py +4 -0
- agno/models/cohere/chat.py +306 -492
- agno/models/cometapi/__init__.py +5 -0
- agno/models/cometapi/cometapi.py +74 -0
- agno/models/dashscope/__init__.py +5 -0
- agno/models/dashscope/dashscope.py +90 -0
- agno/models/deepinfra/__init__.py +5 -0
- agno/models/deepinfra/deepinfra.py +45 -0
- agno/models/deepseek/__init__.py +4 -0
- agno/models/deepseek/deepseek.py +110 -9
- agno/models/fireworks/__init__.py +4 -0
- agno/models/fireworks/fireworks.py +19 -22
- agno/models/google/__init__.py +3 -7
- agno/models/google/gemini.py +1717 -662
- agno/models/google/utils.py +22 -0
- agno/models/groq/__init__.py +4 -0
- agno/models/groq/groq.py +391 -666
- agno/models/huggingface/__init__.py +4 -0
- agno/models/huggingface/huggingface.py +266 -538
- agno/models/ibm/__init__.py +5 -0
- agno/models/ibm/watsonx.py +432 -0
- agno/models/internlm/__init__.py +3 -0
- agno/models/internlm/internlm.py +20 -3
- agno/models/langdb/__init__.py +1 -0
- agno/models/langdb/langdb.py +60 -0
- agno/models/litellm/__init__.py +14 -0
- agno/models/litellm/chat.py +503 -0
- agno/models/litellm/litellm_openai.py +42 -0
- agno/models/llama_cpp/__init__.py +5 -0
- agno/models/llama_cpp/llama_cpp.py +22 -0
- agno/models/lmstudio/__init__.py +5 -0
- agno/models/lmstudio/lmstudio.py +25 -0
- agno/models/message.py +361 -39
- agno/models/meta/__init__.py +12 -0
- agno/models/meta/llama.py +502 -0
- agno/models/meta/llama_openai.py +79 -0
- agno/models/metrics.py +120 -0
- agno/models/mistral/__init__.py +4 -0
- agno/models/mistral/mistral.py +293 -393
- agno/models/nebius/__init__.py +3 -0
- agno/models/nebius/nebius.py +53 -0
- agno/models/nexus/__init__.py +3 -0
- agno/models/nexus/nexus.py +22 -0
- agno/models/nvidia/__init__.py +4 -0
- agno/models/nvidia/nvidia.py +22 -3
- agno/models/ollama/__init__.py +4 -2
- agno/models/ollama/chat.py +257 -492
- agno/models/openai/__init__.py +7 -0
- agno/models/openai/chat.py +725 -770
- agno/models/openai/like.py +16 -2
- agno/models/openai/responses.py +1121 -0
- agno/models/openrouter/__init__.py +4 -0
- agno/models/openrouter/openrouter.py +62 -5
- agno/models/perplexity/__init__.py +5 -0
- agno/models/perplexity/perplexity.py +203 -0
- agno/models/portkey/__init__.py +3 -0
- agno/models/portkey/portkey.py +82 -0
- agno/models/requesty/__init__.py +5 -0
- agno/models/requesty/requesty.py +69 -0
- agno/models/response.py +177 -7
- agno/models/sambanova/__init__.py +4 -0
- agno/models/sambanova/sambanova.py +23 -4
- agno/models/siliconflow/__init__.py +5 -0
- agno/models/siliconflow/siliconflow.py +42 -0
- agno/models/together/__init__.py +4 -0
- agno/models/together/together.py +21 -164
- agno/models/utils.py +266 -0
- agno/models/vercel/__init__.py +3 -0
- agno/models/vercel/v0.py +43 -0
- agno/models/vertexai/__init__.py +0 -1
- agno/models/vertexai/claude.py +190 -0
- agno/models/vllm/__init__.py +3 -0
- agno/models/vllm/vllm.py +83 -0
- agno/models/xai/__init__.py +2 -0
- agno/models/xai/xai.py +111 -7
- agno/os/__init__.py +3 -0
- agno/os/app.py +1027 -0
- agno/os/auth.py +244 -0
- agno/os/config.py +126 -0
- agno/os/interfaces/__init__.py +1 -0
- agno/os/interfaces/a2a/__init__.py +3 -0
- agno/os/interfaces/a2a/a2a.py +42 -0
- agno/os/interfaces/a2a/router.py +249 -0
- agno/os/interfaces/a2a/utils.py +924 -0
- agno/os/interfaces/agui/__init__.py +3 -0
- agno/os/interfaces/agui/agui.py +47 -0
- agno/os/interfaces/agui/router.py +147 -0
- agno/os/interfaces/agui/utils.py +574 -0
- agno/os/interfaces/base.py +25 -0
- agno/os/interfaces/slack/__init__.py +3 -0
- agno/os/interfaces/slack/router.py +148 -0
- agno/os/interfaces/slack/security.py +30 -0
- agno/os/interfaces/slack/slack.py +47 -0
- agno/os/interfaces/whatsapp/__init__.py +3 -0
- agno/os/interfaces/whatsapp/router.py +210 -0
- agno/os/interfaces/whatsapp/security.py +55 -0
- agno/os/interfaces/whatsapp/whatsapp.py +36 -0
- agno/os/mcp.py +293 -0
- agno/os/middleware/__init__.py +9 -0
- agno/os/middleware/jwt.py +797 -0
- agno/os/router.py +258 -0
- agno/os/routers/__init__.py +3 -0
- agno/os/routers/agents/__init__.py +3 -0
- agno/os/routers/agents/router.py +599 -0
- agno/os/routers/agents/schema.py +261 -0
- agno/os/routers/evals/__init__.py +3 -0
- agno/os/routers/evals/evals.py +450 -0
- agno/os/routers/evals/schemas.py +174 -0
- agno/os/routers/evals/utils.py +231 -0
- agno/os/routers/health.py +31 -0
- agno/os/routers/home.py +52 -0
- agno/os/routers/knowledge/__init__.py +3 -0
- agno/os/routers/knowledge/knowledge.py +1008 -0
- agno/os/routers/knowledge/schemas.py +178 -0
- agno/os/routers/memory/__init__.py +3 -0
- agno/os/routers/memory/memory.py +661 -0
- agno/os/routers/memory/schemas.py +88 -0
- agno/os/routers/metrics/__init__.py +3 -0
- agno/os/routers/metrics/metrics.py +190 -0
- agno/os/routers/metrics/schemas.py +47 -0
- agno/os/routers/session/__init__.py +3 -0
- agno/os/routers/session/session.py +997 -0
- agno/os/routers/teams/__init__.py +3 -0
- agno/os/routers/teams/router.py +512 -0
- agno/os/routers/teams/schema.py +257 -0
- agno/os/routers/traces/__init__.py +3 -0
- agno/os/routers/traces/schemas.py +414 -0
- agno/os/routers/traces/traces.py +499 -0
- agno/os/routers/workflows/__init__.py +3 -0
- agno/os/routers/workflows/router.py +624 -0
- agno/os/routers/workflows/schema.py +75 -0
- agno/os/schema.py +534 -0
- agno/os/scopes.py +469 -0
- agno/{playground → os}/settings.py +7 -15
- agno/os/utils.py +973 -0
- agno/reasoning/anthropic.py +80 -0
- agno/reasoning/azure_ai_foundry.py +67 -0
- agno/reasoning/deepseek.py +63 -0
- agno/reasoning/default.py +97 -0
- agno/reasoning/gemini.py +73 -0
- agno/reasoning/groq.py +71 -0
- agno/reasoning/helpers.py +24 -1
- agno/reasoning/ollama.py +67 -0
- agno/reasoning/openai.py +86 -0
- agno/reasoning/step.py +2 -1
- agno/reasoning/vertexai.py +76 -0
- agno/run/__init__.py +6 -0
- agno/run/agent.py +822 -0
- agno/run/base.py +247 -0
- agno/run/cancel.py +81 -0
- agno/run/requirement.py +181 -0
- agno/run/team.py +767 -0
- agno/run/workflow.py +708 -0
- agno/session/__init__.py +10 -0
- agno/session/agent.py +260 -0
- agno/session/summary.py +265 -0
- agno/session/team.py +342 -0
- agno/session/workflow.py +501 -0
- agno/table.py +10 -0
- agno/team/__init__.py +37 -0
- agno/team/team.py +9536 -0
- agno/tools/__init__.py +7 -0
- agno/tools/agentql.py +120 -0
- agno/tools/airflow.py +22 -12
- agno/tools/api.py +122 -0
- agno/tools/apify.py +276 -83
- agno/tools/{arxiv_toolkit.py → arxiv.py} +20 -12
- agno/tools/aws_lambda.py +28 -7
- agno/tools/aws_ses.py +66 -0
- agno/tools/baidusearch.py +11 -4
- agno/tools/bitbucket.py +292 -0
- agno/tools/brandfetch.py +213 -0
- agno/tools/bravesearch.py +106 -0
- agno/tools/brightdata.py +367 -0
- agno/tools/browserbase.py +209 -0
- agno/tools/calcom.py +32 -23
- agno/tools/calculator.py +24 -37
- agno/tools/cartesia.py +187 -0
- agno/tools/{clickup_tool.py → clickup.py} +17 -28
- agno/tools/confluence.py +91 -26
- agno/tools/crawl4ai.py +139 -43
- agno/tools/csv_toolkit.py +28 -22
- agno/tools/dalle.py +36 -22
- agno/tools/daytona.py +475 -0
- agno/tools/decorator.py +169 -14
- agno/tools/desi_vocal.py +23 -11
- agno/tools/discord.py +32 -29
- agno/tools/docker.py +716 -0
- agno/tools/duckdb.py +76 -81
- agno/tools/duckduckgo.py +43 -40
- agno/tools/e2b.py +703 -0
- agno/tools/eleven_labs.py +65 -54
- agno/tools/email.py +13 -5
- agno/tools/evm.py +129 -0
- agno/tools/exa.py +324 -42
- agno/tools/fal.py +39 -35
- agno/tools/file.py +196 -30
- agno/tools/file_generation.py +356 -0
- agno/tools/financial_datasets.py +288 -0
- agno/tools/firecrawl.py +108 -33
- agno/tools/function.py +960 -122
- agno/tools/giphy.py +34 -12
- agno/tools/github.py +1294 -97
- agno/tools/gmail.py +922 -0
- agno/tools/google_bigquery.py +117 -0
- agno/tools/google_drive.py +271 -0
- agno/tools/google_maps.py +253 -0
- agno/tools/googlecalendar.py +607 -107
- agno/tools/googlesheets.py +377 -0
- agno/tools/hackernews.py +20 -12
- agno/tools/jina.py +24 -14
- agno/tools/jira.py +48 -19
- agno/tools/knowledge.py +218 -0
- agno/tools/linear.py +82 -43
- agno/tools/linkup.py +58 -0
- agno/tools/local_file_system.py +15 -7
- agno/tools/lumalab.py +41 -26
- agno/tools/mcp/__init__.py +10 -0
- agno/tools/mcp/mcp.py +331 -0
- agno/tools/mcp/multi_mcp.py +347 -0
- agno/tools/mcp/params.py +24 -0
- agno/tools/mcp_toolbox.py +284 -0
- agno/tools/mem0.py +193 -0
- agno/tools/memory.py +419 -0
- agno/tools/mlx_transcribe.py +11 -9
- agno/tools/models/azure_openai.py +190 -0
- agno/tools/models/gemini.py +203 -0
- agno/tools/models/groq.py +158 -0
- agno/tools/models/morph.py +186 -0
- agno/tools/models/nebius.py +124 -0
- agno/tools/models_labs.py +163 -82
- agno/tools/moviepy_video.py +18 -13
- agno/tools/nano_banana.py +151 -0
- agno/tools/neo4j.py +134 -0
- agno/tools/newspaper.py +15 -4
- agno/tools/newspaper4k.py +19 -6
- agno/tools/notion.py +204 -0
- agno/tools/openai.py +181 -17
- agno/tools/openbb.py +27 -20
- agno/tools/opencv.py +321 -0
- agno/tools/openweather.py +233 -0
- agno/tools/oxylabs.py +385 -0
- agno/tools/pandas.py +25 -15
- agno/tools/parallel.py +314 -0
- agno/tools/postgres.py +238 -185
- agno/tools/pubmed.py +125 -13
- agno/tools/python.py +48 -35
- agno/tools/reasoning.py +283 -0
- agno/tools/reddit.py +207 -29
- agno/tools/redshift.py +406 -0
- agno/tools/replicate.py +69 -26
- agno/tools/resend.py +11 -6
- agno/tools/scrapegraph.py +179 -19
- agno/tools/searxng.py +23 -31
- agno/tools/serpapi.py +15 -10
- agno/tools/serper.py +255 -0
- agno/tools/shell.py +23 -12
- agno/tools/shopify.py +1519 -0
- agno/tools/slack.py +56 -14
- agno/tools/sleep.py +8 -6
- agno/tools/spider.py +35 -11
- agno/tools/spotify.py +919 -0
- agno/tools/sql.py +34 -19
- agno/tools/tavily.py +158 -8
- agno/tools/telegram.py +18 -8
- agno/tools/todoist.py +218 -0
- agno/tools/toolkit.py +134 -9
- agno/tools/trafilatura.py +388 -0
- agno/tools/trello.py +25 -28
- agno/tools/twilio.py +18 -9
- agno/tools/user_control_flow.py +78 -0
- agno/tools/valyu.py +228 -0
- agno/tools/visualization.py +467 -0
- agno/tools/webbrowser.py +28 -0
- agno/tools/webex.py +76 -0
- agno/tools/website.py +23 -19
- agno/tools/webtools.py +45 -0
- agno/tools/whatsapp.py +286 -0
- agno/tools/wikipedia.py +28 -19
- agno/tools/workflow.py +285 -0
- agno/tools/{twitter.py → x.py} +142 -46
- agno/tools/yfinance.py +41 -39
- agno/tools/youtube.py +34 -17
- agno/tools/zendesk.py +15 -5
- agno/tools/zep.py +454 -0
- agno/tools/zoom.py +86 -37
- agno/tracing/__init__.py +12 -0
- agno/tracing/exporter.py +157 -0
- agno/tracing/schemas.py +276 -0
- agno/tracing/setup.py +111 -0
- agno/utils/agent.py +938 -0
- agno/utils/audio.py +37 -1
- agno/utils/certs.py +27 -0
- agno/utils/code_execution.py +11 -0
- agno/utils/common.py +103 -20
- agno/utils/cryptography.py +22 -0
- agno/utils/dttm.py +33 -0
- agno/utils/events.py +700 -0
- agno/utils/functions.py +107 -37
- agno/utils/gemini.py +426 -0
- agno/utils/hooks.py +171 -0
- agno/utils/http.py +185 -0
- agno/utils/json_schema.py +159 -37
- agno/utils/knowledge.py +36 -0
- agno/utils/location.py +19 -0
- agno/utils/log.py +221 -8
- agno/utils/mcp.py +214 -0
- agno/utils/media.py +335 -14
- agno/utils/merge_dict.py +22 -1
- agno/utils/message.py +77 -2
- agno/utils/models/ai_foundry.py +50 -0
- agno/utils/models/claude.py +373 -0
- agno/utils/models/cohere.py +94 -0
- agno/utils/models/llama.py +85 -0
- agno/utils/models/mistral.py +100 -0
- agno/utils/models/openai_responses.py +140 -0
- agno/utils/models/schema_utils.py +153 -0
- agno/utils/models/watsonx.py +41 -0
- agno/utils/openai.py +257 -0
- agno/utils/pickle.py +1 -1
- agno/utils/pprint.py +124 -8
- agno/utils/print_response/agent.py +930 -0
- agno/utils/print_response/team.py +1914 -0
- agno/utils/print_response/workflow.py +1668 -0
- agno/utils/prompts.py +111 -0
- agno/utils/reasoning.py +108 -0
- agno/utils/response.py +163 -0
- agno/utils/serialize.py +32 -0
- agno/utils/shell.py +4 -4
- agno/utils/streamlit.py +487 -0
- agno/utils/string.py +204 -51
- agno/utils/team.py +139 -0
- agno/utils/timer.py +9 -2
- agno/utils/tokens.py +657 -0
- agno/utils/tools.py +19 -1
- agno/utils/whatsapp.py +305 -0
- agno/utils/yaml_io.py +3 -3
- agno/vectordb/__init__.py +2 -0
- agno/vectordb/base.py +87 -9
- agno/vectordb/cassandra/__init__.py +5 -1
- agno/vectordb/cassandra/cassandra.py +383 -27
- agno/vectordb/chroma/__init__.py +4 -0
- agno/vectordb/chroma/chromadb.py +748 -83
- agno/vectordb/clickhouse/__init__.py +7 -1
- agno/vectordb/clickhouse/clickhousedb.py +554 -53
- agno/vectordb/couchbase/__init__.py +3 -0
- agno/vectordb/couchbase/couchbase.py +1446 -0
- agno/vectordb/lancedb/__init__.py +5 -0
- agno/vectordb/lancedb/lance_db.py +730 -98
- agno/vectordb/langchaindb/__init__.py +5 -0
- agno/vectordb/langchaindb/langchaindb.py +163 -0
- agno/vectordb/lightrag/__init__.py +5 -0
- agno/vectordb/lightrag/lightrag.py +388 -0
- agno/vectordb/llamaindex/__init__.py +3 -0
- agno/vectordb/llamaindex/llamaindexdb.py +166 -0
- agno/vectordb/milvus/__init__.py +3 -0
- agno/vectordb/milvus/milvus.py +966 -78
- agno/vectordb/mongodb/__init__.py +9 -1
- agno/vectordb/mongodb/mongodb.py +1175 -172
- agno/vectordb/pgvector/__init__.py +8 -0
- agno/vectordb/pgvector/pgvector.py +599 -115
- agno/vectordb/pineconedb/__init__.py +5 -1
- agno/vectordb/pineconedb/pineconedb.py +406 -43
- agno/vectordb/qdrant/__init__.py +4 -0
- agno/vectordb/qdrant/qdrant.py +914 -61
- agno/vectordb/redis/__init__.py +9 -0
- agno/vectordb/redis/redisdb.py +682 -0
- agno/vectordb/singlestore/__init__.py +8 -1
- agno/vectordb/singlestore/singlestore.py +771 -0
- agno/vectordb/surrealdb/__init__.py +3 -0
- agno/vectordb/surrealdb/surrealdb.py +663 -0
- agno/vectordb/upstashdb/__init__.py +5 -0
- agno/vectordb/upstashdb/upstashdb.py +718 -0
- agno/vectordb/weaviate/__init__.py +8 -0
- agno/vectordb/weaviate/index.py +15 -0
- agno/vectordb/weaviate/weaviate.py +1009 -0
- agno/workflow/__init__.py +23 -1
- agno/workflow/agent.py +299 -0
- agno/workflow/condition.py +759 -0
- agno/workflow/loop.py +756 -0
- agno/workflow/parallel.py +853 -0
- agno/workflow/router.py +723 -0
- agno/workflow/step.py +1564 -0
- agno/workflow/steps.py +613 -0
- agno/workflow/types.py +556 -0
- agno/workflow/workflow.py +4327 -514
- agno-2.3.13.dist-info/METADATA +639 -0
- agno-2.3.13.dist-info/RECORD +613 -0
- {agno-0.1.2.dist-info → agno-2.3.13.dist-info}/WHEEL +1 -1
- agno-2.3.13.dist-info/licenses/LICENSE +201 -0
- agno/api/playground.py +0 -91
- agno/api/schemas/playground.py +0 -22
- agno/api/schemas/user.py +0 -22
- agno/api/schemas/workspace.py +0 -46
- agno/api/user.py +0 -160
- agno/api/workspace.py +0 -151
- agno/cli/auth_server.py +0 -118
- agno/cli/config.py +0 -275
- agno/cli/console.py +0 -88
- agno/cli/credentials.py +0 -23
- agno/cli/entrypoint.py +0 -571
- agno/cli/operator.py +0 -355
- agno/cli/settings.py +0 -85
- agno/cli/ws/ws_cli.py +0 -817
- agno/constants.py +0 -13
- agno/document/__init__.py +0 -1
- agno/document/chunking/semantic.py +0 -47
- agno/document/chunking/strategy.py +0 -31
- agno/document/reader/__init__.py +0 -1
- agno/document/reader/arxiv_reader.py +0 -41
- agno/document/reader/base.py +0 -22
- agno/document/reader/csv_reader.py +0 -84
- agno/document/reader/docx_reader.py +0 -46
- agno/document/reader/firecrawl_reader.py +0 -99
- agno/document/reader/json_reader.py +0 -43
- agno/document/reader/pdf_reader.py +0 -219
- agno/document/reader/s3/pdf_reader.py +0 -46
- agno/document/reader/s3/text_reader.py +0 -51
- agno/document/reader/text_reader.py +0 -41
- agno/document/reader/website_reader.py +0 -175
- agno/document/reader/youtube_reader.py +0 -50
- agno/embedder/__init__.py +0 -1
- agno/embedder/azure_openai.py +0 -86
- agno/embedder/cohere.py +0 -72
- agno/embedder/fastembed.py +0 -37
- agno/embedder/google.py +0 -73
- agno/embedder/huggingface.py +0 -54
- agno/embedder/mistral.py +0 -80
- agno/embedder/ollama.py +0 -57
- agno/embedder/openai.py +0 -74
- agno/embedder/sentence_transformer.py +0 -38
- agno/embedder/voyageai.py +0 -64
- agno/eval/perf.py +0 -201
- agno/file/__init__.py +0 -1
- agno/file/file.py +0 -16
- agno/file/local/csv.py +0 -32
- agno/file/local/txt.py +0 -19
- agno/infra/app.py +0 -240
- agno/infra/base.py +0 -144
- agno/infra/context.py +0 -20
- agno/infra/db_app.py +0 -52
- agno/infra/resource.py +0 -205
- agno/infra/resources.py +0 -55
- agno/knowledge/agent.py +0 -230
- agno/knowledge/arxiv.py +0 -22
- agno/knowledge/combined.py +0 -22
- agno/knowledge/csv.py +0 -28
- agno/knowledge/csv_url.py +0 -19
- agno/knowledge/document.py +0 -20
- agno/knowledge/docx.py +0 -30
- agno/knowledge/json.py +0 -28
- agno/knowledge/langchain.py +0 -71
- agno/knowledge/llamaindex.py +0 -66
- agno/knowledge/pdf.py +0 -28
- agno/knowledge/pdf_url.py +0 -26
- agno/knowledge/s3/base.py +0 -60
- agno/knowledge/s3/pdf.py +0 -21
- agno/knowledge/s3/text.py +0 -23
- agno/knowledge/text.py +0 -30
- agno/knowledge/website.py +0 -88
- agno/knowledge/wikipedia.py +0 -31
- agno/knowledge/youtube.py +0 -22
- agno/memory/agent.py +0 -392
- agno/memory/classifier.py +0 -104
- agno/memory/db/__init__.py +0 -1
- agno/memory/db/base.py +0 -42
- agno/memory/db/mongodb.py +0 -189
- agno/memory/db/postgres.py +0 -203
- agno/memory/db/sqlite.py +0 -193
- agno/memory/memory.py +0 -15
- agno/memory/row.py +0 -36
- agno/memory/summarizer.py +0 -192
- agno/memory/summary.py +0 -19
- agno/memory/workflow.py +0 -38
- agno/models/google/gemini_openai.py +0 -26
- agno/models/ollama/hermes.py +0 -221
- agno/models/ollama/tools.py +0 -362
- agno/models/vertexai/gemini.py +0 -595
- agno/playground/__init__.py +0 -3
- agno/playground/async_router.py +0 -421
- agno/playground/deploy.py +0 -249
- agno/playground/operator.py +0 -92
- agno/playground/playground.py +0 -91
- agno/playground/schemas.py +0 -76
- agno/playground/serve.py +0 -55
- agno/playground/sync_router.py +0 -405
- agno/reasoning/agent.py +0 -68
- agno/run/response.py +0 -112
- agno/storage/agent/__init__.py +0 -0
- agno/storage/agent/base.py +0 -38
- agno/storage/agent/dynamodb.py +0 -350
- agno/storage/agent/json.py +0 -92
- agno/storage/agent/mongodb.py +0 -228
- agno/storage/agent/postgres.py +0 -367
- agno/storage/agent/session.py +0 -79
- agno/storage/agent/singlestore.py +0 -303
- agno/storage/agent/sqlite.py +0 -357
- agno/storage/agent/yaml.py +0 -93
- agno/storage/workflow/__init__.py +0 -0
- agno/storage/workflow/base.py +0 -40
- agno/storage/workflow/mongodb.py +0 -233
- agno/storage/workflow/postgres.py +0 -366
- agno/storage/workflow/session.py +0 -60
- agno/storage/workflow/sqlite.py +0 -359
- agno/tools/googlesearch.py +0 -88
- agno/utils/defaults.py +0 -57
- agno/utils/filesystem.py +0 -39
- agno/utils/git.py +0 -52
- agno/utils/json_io.py +0 -30
- agno/utils/load_env.py +0 -19
- agno/utils/py_io.py +0 -19
- agno/utils/pyproject.py +0 -18
- agno/utils/resource_filter.py +0 -31
- agno/vectordb/singlestore/s2vectordb.py +0 -390
- agno/vectordb/singlestore/s2vectordb2.py +0 -355
- agno/workspace/__init__.py +0 -0
- agno/workspace/config.py +0 -325
- agno/workspace/enums.py +0 -6
- agno/workspace/helpers.py +0 -48
- agno/workspace/operator.py +0 -758
- agno/workspace/settings.py +0 -63
- agno-0.1.2.dist-info/LICENSE +0 -375
- agno-0.1.2.dist-info/METADATA +0 -502
- agno-0.1.2.dist-info/RECORD +0 -352
- agno-0.1.2.dist-info/entry_points.txt +0 -3
- /agno/{cli → db/migrations}/__init__.py +0 -0
- /agno/{cli/ws → db/migrations/versions}/__init__.py +0 -0
- /agno/{document/chunking/__init__.py → db/schemas/metrics.py} +0 -0
- /agno/{document/reader/s3 → integrations}/__init__.py +0 -0
- /agno/{file/local → knowledge/chunking}/__init__.py +0 -0
- /agno/{infra → knowledge/remote_content}/__init__.py +0 -0
- /agno/{knowledge/s3 → tools/models}/__init__.py +0 -0
- /agno/{reranker → utils/models}/__init__.py +0 -0
- /agno/{storage → utils/print_response}/__init__.py +0 -0
- {agno-0.1.2.dist-info → agno-2.3.13.dist-info}/top_level.txt +0 -0
agno/eval/accuracy.py
CHANGED
|
@@ -1,14 +1,19 @@
|
|
|
1
1
|
from dataclasses import asdict, dataclass, field
|
|
2
2
|
from os import getenv
|
|
3
|
-
from
|
|
4
|
-
from typing import TYPE_CHECKING, Callable, List, Optional, Union
|
|
3
|
+
from textwrap import dedent
|
|
4
|
+
from typing import TYPE_CHECKING, Any, Callable, Dict, List, Optional, Union
|
|
5
5
|
from uuid import uuid4
|
|
6
6
|
|
|
7
7
|
from pydantic import BaseModel, Field
|
|
8
8
|
|
|
9
|
-
from agno.agent import Agent
|
|
9
|
+
from agno.agent import Agent
|
|
10
|
+
from agno.db.base import AsyncBaseDb, BaseDb
|
|
11
|
+
from agno.db.schemas.evals import EvalType
|
|
12
|
+
from agno.eval.utils import async_log_eval, log_eval_run, store_result_in_file
|
|
13
|
+
from agno.exceptions import EvalError
|
|
10
14
|
from agno.models.base import Model
|
|
11
|
-
from agno.
|
|
15
|
+
from agno.team.team import Team
|
|
16
|
+
from agno.utils.log import log_error, logger, set_log_level_to_debug, set_log_level_to_info
|
|
12
17
|
|
|
13
18
|
if TYPE_CHECKING:
|
|
14
19
|
from rich.console import Console
|
|
@@ -21,9 +26,9 @@ class AccuracyAgentResponse(BaseModel):
|
|
|
21
26
|
|
|
22
27
|
@dataclass
|
|
23
28
|
class AccuracyEvaluation:
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
29
|
+
input: str
|
|
30
|
+
output: str
|
|
31
|
+
expected_output: str
|
|
27
32
|
score: int
|
|
28
33
|
reason: str
|
|
29
34
|
|
|
@@ -44,9 +49,9 @@ class AccuracyEvaluation:
|
|
|
44
49
|
title_style="bold sky_blue1",
|
|
45
50
|
title_justify="center",
|
|
46
51
|
)
|
|
47
|
-
results_table.add_row("
|
|
48
|
-
results_table.add_row("
|
|
49
|
-
results_table.add_row("Expected
|
|
52
|
+
results_table.add_row("Input", self.input)
|
|
53
|
+
results_table.add_row("Output", self.output)
|
|
54
|
+
results_table.add_row("Expected Output", self.expected_output)
|
|
50
55
|
results_table.add_row("Accuracy Score", f"{str(self.score)}/10")
|
|
51
56
|
results_table.add_row("Accuracy Reason", Markdown(self.reason))
|
|
52
57
|
console.print(results_table)
|
|
@@ -92,11 +97,18 @@ class AccuracyResult:
|
|
|
92
97
|
title_justify="center",
|
|
93
98
|
)
|
|
94
99
|
summary_table.add_row("Number of Runs", f"{len(self.results)}")
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
+
|
|
101
|
+
if self.avg_score is not None:
|
|
102
|
+
summary_table.add_row("Average Score", f"{self.avg_score:.2f}")
|
|
103
|
+
if self.mean_score is not None:
|
|
104
|
+
summary_table.add_row("Mean Score", f"{self.mean_score:.2f}")
|
|
105
|
+
if self.min_score is not None:
|
|
106
|
+
summary_table.add_row("Minimum Score", f"{self.min_score:.2f}")
|
|
107
|
+
if self.max_score is not None:
|
|
108
|
+
summary_table.add_row("Maximum Score", f"{self.max_score:.2f}")
|
|
109
|
+
if self.std_dev_score is not None:
|
|
110
|
+
summary_table.add_row("Standard Deviation", f"{self.std_dev_score:.2f}")
|
|
111
|
+
|
|
100
112
|
console.print(summary_table)
|
|
101
113
|
|
|
102
114
|
def print_results(self, console: Optional["Console"] = None):
|
|
@@ -116,9 +128,9 @@ class AccuracyResult:
|
|
|
116
128
|
title_justify="center",
|
|
117
129
|
)
|
|
118
130
|
for result in self.results:
|
|
119
|
-
results_table.add_row("
|
|
120
|
-
results_table.add_row("
|
|
121
|
-
results_table.add_row("Expected
|
|
131
|
+
results_table.add_row("Input", result.input)
|
|
132
|
+
results_table.add_row("Output", result.output)
|
|
133
|
+
results_table.add_row("Expected Output", result.expected_output)
|
|
122
134
|
results_table.add_row("Accuracy Score", f"{str(result.score)}/10")
|
|
123
135
|
results_table.add_row("Accuracy Reason", result.reason)
|
|
124
136
|
console.print(results_table)
|
|
@@ -126,61 +138,53 @@ class AccuracyResult:
|
|
|
126
138
|
|
|
127
139
|
@dataclass
|
|
128
140
|
class AccuracyEval:
|
|
129
|
-
"""
|
|
141
|
+
"""Interface to evaluate the accuracy of an Agent or Team, given a prompt and expected answer"""
|
|
142
|
+
|
|
143
|
+
# Input to evaluate
|
|
144
|
+
input: Union[str, Callable]
|
|
145
|
+
# Expected answer to the input
|
|
146
|
+
expected_output: Union[str, Callable]
|
|
147
|
+
# Agent to evaluate
|
|
148
|
+
agent: Optional[Agent] = None
|
|
149
|
+
# Team to evaluate
|
|
150
|
+
team: Optional[Team] = None
|
|
130
151
|
|
|
131
152
|
# Evaluation name
|
|
132
153
|
name: Optional[str] = None
|
|
133
|
-
# Evaluation UUID
|
|
134
|
-
eval_id:
|
|
154
|
+
# Evaluation UUID
|
|
155
|
+
eval_id: str = field(default_factory=lambda: str(uuid4()))
|
|
156
|
+
# Number of iterations to run
|
|
157
|
+
num_iterations: int = 1
|
|
158
|
+
# Result of the evaluation
|
|
159
|
+
result: Optional[AccuracyResult] = None
|
|
135
160
|
|
|
136
|
-
# Model
|
|
161
|
+
# Model for the evaluator agent
|
|
137
162
|
model: Optional[Model] = None
|
|
138
|
-
|
|
139
|
-
# Evaluate an Agent
|
|
140
|
-
agent: Optional[Agent] = None
|
|
141
|
-
# Question to evaluate (can also be provided with the run method)
|
|
142
|
-
question: Optional[Union[str, Callable]] = None
|
|
143
|
-
# Answer to evaluate (can also be provided with the run method)
|
|
144
|
-
answer: Optional[Union[str, Callable]] = None
|
|
145
|
-
# Expected Answer for the question (can also be provided with the run method)
|
|
146
|
-
expected_answer: Optional[Union[str, Callable]] = None
|
|
147
|
-
|
|
163
|
+
# Agent used to evaluate the answer
|
|
148
164
|
evaluator_agent: Optional[Agent] = None
|
|
149
165
|
# Guidelines for the evaluator agent
|
|
150
|
-
|
|
166
|
+
additional_guidelines: Optional[Union[str, List[str]]] = None
|
|
151
167
|
# Additional context to the evaluator agent
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
# Number of iterations to run
|
|
155
|
-
num_iterations: int = 3
|
|
156
|
-
# Result of the evaluation
|
|
157
|
-
result: Optional[AccuracyResult] = None
|
|
168
|
+
additional_context: Optional[str] = None
|
|
158
169
|
|
|
159
170
|
# Print summary of results
|
|
160
171
|
print_summary: bool = False
|
|
161
172
|
# Print detailed results
|
|
162
173
|
print_results: bool = False
|
|
163
|
-
#
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
def
|
|
176
|
-
|
|
177
|
-
self.debug_mode = True
|
|
178
|
-
set_log_level_to_debug()
|
|
179
|
-
logger.debug("Debug logs enabled")
|
|
180
|
-
else:
|
|
181
|
-
set_log_level_to_info()
|
|
182
|
-
|
|
183
|
-
def get_evaluator_agent(self, question: str, expected_answer: str) -> Agent:
|
|
174
|
+
# If set, results will be saved in the given file path
|
|
175
|
+
file_path_to_save_results: Optional[str] = None
|
|
176
|
+
# Enable debug logs
|
|
177
|
+
debug_mode: bool = getenv("AGNO_DEBUG", "false").lower() == "true"
|
|
178
|
+
# The database to store Evaluation results
|
|
179
|
+
db: Optional[Union[BaseDb, AsyncBaseDb]] = None
|
|
180
|
+
|
|
181
|
+
# Telemetry settings
|
|
182
|
+
# telemetry=True logs minimal telemetry for analytics
|
|
183
|
+
# This helps us improve our Evals and provide better support
|
|
184
|
+
telemetry: bool = True
|
|
185
|
+
|
|
186
|
+
def get_evaluator_agent(self) -> Agent:
|
|
187
|
+
"""Return the evaluator agent. If not provided, build it based on the evaluator fields and default instructions."""
|
|
184
188
|
if self.evaluator_agent is not None:
|
|
185
189
|
return self.evaluator_agent
|
|
186
190
|
|
|
@@ -189,269 +193,655 @@ class AccuracyEval:
|
|
|
189
193
|
try:
|
|
190
194
|
from agno.models.openai import OpenAIChat
|
|
191
195
|
|
|
192
|
-
model = OpenAIChat(id="
|
|
196
|
+
model = OpenAIChat(id="o4-mini")
|
|
193
197
|
except (ModuleNotFoundError, ImportError) as e:
|
|
194
198
|
logger.exception(e)
|
|
195
|
-
|
|
199
|
+
raise EvalError(
|
|
196
200
|
"Agno uses `openai` as the default model provider. Please run `pip install openai` to use the default evaluator."
|
|
197
201
|
)
|
|
198
|
-
exit(1)
|
|
199
202
|
|
|
200
|
-
|
|
201
|
-
if self.
|
|
202
|
-
|
|
203
|
-
|
|
204
|
-
|
|
205
|
-
|
|
206
|
-
|
|
207
|
-
|
|
208
|
-
|
|
209
|
-
|
|
210
|
-
|
|
203
|
+
additional_guidelines = ""
|
|
204
|
+
if self.additional_guidelines is not None:
|
|
205
|
+
additional_guidelines = "\n## Additional Guidelines\n"
|
|
206
|
+
if isinstance(self.additional_guidelines, str):
|
|
207
|
+
additional_guidelines += self.additional_guidelines
|
|
208
|
+
else:
|
|
209
|
+
additional_guidelines += "\n- ".join(self.additional_guidelines)
|
|
210
|
+
additional_guidelines += "\n"
|
|
211
|
+
|
|
212
|
+
additional_context = ""
|
|
213
|
+
if self.additional_context is not None and len(self.additional_context) > 0:
|
|
214
|
+
additional_context = "\n## Additional Context\n"
|
|
215
|
+
additional_context += self.additional_context
|
|
216
|
+
additional_context += "\n"
|
|
211
217
|
|
|
212
218
|
return Agent(
|
|
213
|
-
model=
|
|
219
|
+
model=model,
|
|
214
220
|
description=f"""\
|
|
215
|
-
You are an
|
|
216
|
-
|
|
217
|
-
|
|
218
|
-
|
|
219
|
-
|
|
220
|
-
|
|
221
|
-
|
|
222
|
-
|
|
223
|
-
|
|
224
|
-
|
|
225
|
-
|
|
226
|
-
|
|
227
|
-
|
|
228
|
-
|
|
229
|
-
|
|
230
|
-
|
|
231
|
-
|
|
232
|
-
|
|
233
|
-
|
|
234
|
-
|
|
235
|
-
|
|
236
|
-
|
|
237
|
-
|
|
238
|
-
4
|
|
239
|
-
|
|
240
|
-
3-4: Major inaccuracies or missing crucial information
|
|
241
|
-
5-6: Partially correct, but with significant omissions or errors
|
|
221
|
+
You are an expert judge tasked with comparing the quality of an AI Agent’s output to a user-provided expected output. You must assume the expected_output is correct - even if you personally disagree.
|
|
222
|
+
|
|
223
|
+
## Evaluation Inputs
|
|
224
|
+
- agent_input: The original task or query given to the Agent.
|
|
225
|
+
- expected_output: The correct response to the task (provided by the user).
|
|
226
|
+
- NOTE: You must assume the expected_output is correct - even if you personally disagree.
|
|
227
|
+
- agent_output: The response generated by the Agent.
|
|
228
|
+
|
|
229
|
+
## Evaluation Criteria
|
|
230
|
+
- Accuracy: How closely does the agent_output match the expected_output?
|
|
231
|
+
- Completeness: Does the agent_output include all the key elements of the expected_output?
|
|
232
|
+
|
|
233
|
+
## Instructions
|
|
234
|
+
1. Compare the agent_output only to the expected_output, not what you think the expected_output should be.
|
|
235
|
+
2. Do not judge the correctness of the expected_output itself. Your role is only to compare the two outputs, the user provided expected_output is correct.
|
|
236
|
+
3. Follow the additional guidelines if provided.
|
|
237
|
+
4. Provide a detailed analysis including:
|
|
238
|
+
- Specific similarities and differences
|
|
239
|
+
- Important points included or omitted
|
|
240
|
+
- Any inaccuracies, paraphrasing errors, or structural differences
|
|
241
|
+
5. Reference the criteria explicitly in your reasoning.
|
|
242
|
+
6. Assign a score from 1 to 10 (whole numbers only):
|
|
243
|
+
1-2: Completely incorrect or irrelevant.
|
|
244
|
+
3-4: Major inaccuracies or missing key information.
|
|
245
|
+
5-6: Partially correct, but with significant issues.
|
|
242
246
|
7-8: Mostly accurate and complete, with minor issues
|
|
243
|
-
9-10: Highly accurate and complete, matching the expected answer closely
|
|
244
|
-
|
|
245
|
-
|
|
246
|
-
|
|
247
|
+
9-10: Highly accurate and complete, matching the expected answer and given guidelines closely.
|
|
248
|
+
{additional_guidelines}{additional_context}
|
|
249
|
+
Remember: You must only compare the agent_output to the expected_output. The expected_output is correct as it was provided by the user.
|
|
250
|
+
""",
|
|
251
|
+
output_schema=AccuracyAgentResponse,
|
|
247
252
|
structured_outputs=True,
|
|
248
253
|
)
|
|
249
254
|
|
|
250
|
-
def
|
|
251
|
-
"""
|
|
252
|
-
|
|
253
|
-
|
|
254
|
-
if
|
|
255
|
-
|
|
256
|
-
|
|
257
|
-
|
|
258
|
-
|
|
259
|
-
|
|
260
|
-
|
|
261
|
-
|
|
262
|
-
|
|
263
|
-
|
|
264
|
-
|
|
265
|
-
|
|
266
|
-
|
|
267
|
-
|
|
268
|
-
|
|
269
|
-
|
|
270
|
-
|
|
271
|
-
|
|
272
|
-
|
|
273
|
-
|
|
274
|
-
|
|
275
|
-
|
|
276
|
-
|
|
277
|
-
|
|
278
|
-
|
|
279
|
-
logger.error(f"Failed to get question to evaluate: {e}")
|
|
280
|
-
return None
|
|
281
|
-
|
|
282
|
-
def get_answer_to_evaluate(
|
|
283
|
-
self, question: str, answer: Optional[Union[str, Callable]] = None
|
|
284
|
-
) -> Optional[RunResponse]:
|
|
285
|
-
"""Get the answer to evaluate.
|
|
286
|
-
|
|
287
|
-
Priority:
|
|
288
|
-
1. Answer provided with the run method
|
|
289
|
-
2. Answer provided with the eval
|
|
290
|
-
3. Answer from the agent
|
|
291
|
-
"""
|
|
255
|
+
def get_eval_expected_output(self) -> str:
|
|
256
|
+
"""Return the eval expected answer. If it is a callable, call it and return the resulting string"""
|
|
257
|
+
if callable(self.expected_output):
|
|
258
|
+
_output = self.expected_output()
|
|
259
|
+
if isinstance(_output, str):
|
|
260
|
+
return _output
|
|
261
|
+
else:
|
|
262
|
+
raise EvalError(f"The expected output needs to be or return a string, but it returned: {type(_output)}")
|
|
263
|
+
return self.expected_output
|
|
264
|
+
|
|
265
|
+
def get_eval_input(self) -> str:
|
|
266
|
+
"""Return the evaluation input. If it is a callable, call it and return the resulting string"""
|
|
267
|
+
if callable(self.input):
|
|
268
|
+
_input = self.input()
|
|
269
|
+
if isinstance(_input, str):
|
|
270
|
+
return _input
|
|
271
|
+
else:
|
|
272
|
+
raise EvalError(f"The eval input needs to be or return a string, but it returned: {type(_input)}")
|
|
273
|
+
return self.input
|
|
274
|
+
|
|
275
|
+
def evaluate_answer(
|
|
276
|
+
self,
|
|
277
|
+
input: str,
|
|
278
|
+
evaluator_agent: Agent,
|
|
279
|
+
evaluation_input: str,
|
|
280
|
+
evaluator_expected_output: str,
|
|
281
|
+
agent_output: str,
|
|
282
|
+
) -> Optional[AccuracyEvaluation]:
|
|
283
|
+
"""Orchestrate the evaluation process."""
|
|
292
284
|
try:
|
|
293
|
-
|
|
294
|
-
|
|
295
|
-
|
|
296
|
-
|
|
297
|
-
|
|
298
|
-
|
|
299
|
-
|
|
300
|
-
|
|
301
|
-
|
|
302
|
-
|
|
303
|
-
|
|
304
|
-
logger.error("Answer is not a string or callable")
|
|
305
|
-
|
|
306
|
-
# Get answer from the eval
|
|
307
|
-
if self.answer is not None:
|
|
308
|
-
if isinstance(self.answer, str):
|
|
309
|
-
return RunResponse(content=self.answer)
|
|
310
|
-
elif callable(self.answer):
|
|
311
|
-
_answer = self.answer()
|
|
312
|
-
if isinstance(_answer, str):
|
|
313
|
-
return RunResponse(content=_answer)
|
|
314
|
-
else:
|
|
315
|
-
logger.error("Answer is not a string")
|
|
316
|
-
else:
|
|
317
|
-
logger.error("Answer is not a string or callable")
|
|
318
|
-
|
|
319
|
-
# Get answer from the agent
|
|
320
|
-
if self.agent is not None and question is not None:
|
|
321
|
-
logger.debug("Getting answer from agent")
|
|
322
|
-
return self.agent.run(question)
|
|
285
|
+
response = evaluator_agent.run(evaluation_input, stream=False)
|
|
286
|
+
accuracy_agent_response = response.content
|
|
287
|
+
if accuracy_agent_response is None or not isinstance(accuracy_agent_response, AccuracyAgentResponse):
|
|
288
|
+
raise EvalError(f"Evaluator Agent returned an invalid response: {accuracy_agent_response}")
|
|
289
|
+
return AccuracyEvaluation(
|
|
290
|
+
input=input,
|
|
291
|
+
output=agent_output,
|
|
292
|
+
expected_output=evaluator_expected_output,
|
|
293
|
+
score=accuracy_agent_response.accuracy_score,
|
|
294
|
+
reason=accuracy_agent_response.accuracy_reason,
|
|
295
|
+
)
|
|
323
296
|
except Exception as e:
|
|
324
|
-
logger.
|
|
325
|
-
|
|
297
|
+
logger.exception(f"Failed to evaluate accuracy: {e}")
|
|
298
|
+
return None
|
|
326
299
|
|
|
327
|
-
def
|
|
328
|
-
|
|
300
|
+
async def aevaluate_answer(
|
|
301
|
+
self,
|
|
302
|
+
input: str,
|
|
303
|
+
evaluator_agent: Agent,
|
|
304
|
+
evaluation_input: str,
|
|
305
|
+
evaluator_expected_output: str,
|
|
306
|
+
agent_output: str,
|
|
307
|
+
) -> Optional[AccuracyEvaluation]:
|
|
308
|
+
"""Orchestrate the evaluation process asynchronously."""
|
|
329
309
|
try:
|
|
330
|
-
|
|
331
|
-
|
|
332
|
-
|
|
333
|
-
|
|
334
|
-
|
|
335
|
-
|
|
336
|
-
|
|
337
|
-
|
|
338
|
-
|
|
339
|
-
|
|
340
|
-
|
|
341
|
-
logger.error("Expected Answer is not a string or callable")
|
|
342
|
-
|
|
343
|
-
# Get the expected_answer from the eval
|
|
344
|
-
if self.expected_answer is not None:
|
|
345
|
-
if isinstance(self.expected_answer, str):
|
|
346
|
-
return self.expected_answer
|
|
347
|
-
elif callable(self.expected_answer):
|
|
348
|
-
_expected_answer = self.expected_answer()
|
|
349
|
-
if isinstance(_expected_answer, str):
|
|
350
|
-
return _expected_answer
|
|
351
|
-
else:
|
|
352
|
-
logger.error("Expected Answer is not a string")
|
|
353
|
-
else:
|
|
354
|
-
logger.error("Expected Answer is not a string or callable")
|
|
310
|
+
response = await evaluator_agent.arun(evaluation_input, stream=False)
|
|
311
|
+
accuracy_agent_response = response.content
|
|
312
|
+
if accuracy_agent_response is None or not isinstance(accuracy_agent_response, AccuracyAgentResponse):
|
|
313
|
+
raise EvalError(f"Evaluator Agent returned an invalid response: {accuracy_agent_response}")
|
|
314
|
+
return AccuracyEvaluation(
|
|
315
|
+
input=input,
|
|
316
|
+
output=agent_output,
|
|
317
|
+
expected_output=evaluator_expected_output,
|
|
318
|
+
score=accuracy_agent_response.accuracy_score,
|
|
319
|
+
reason=accuracy_agent_response.accuracy_reason,
|
|
320
|
+
)
|
|
355
321
|
except Exception as e:
|
|
356
|
-
logger.
|
|
357
|
-
|
|
322
|
+
logger.exception(f"Failed to evaluate accuracy asynchronously: {e}")
|
|
323
|
+
return None
|
|
358
324
|
|
|
359
325
|
def run(
|
|
360
326
|
self,
|
|
361
327
|
*,
|
|
362
|
-
question: Optional[Union[str, Callable]] = None,
|
|
363
|
-
expected_answer: Optional[Union[str, Callable]] = None,
|
|
364
|
-
answer: Optional[Union[str, Callable]] = None,
|
|
365
328
|
print_summary: bool = True,
|
|
366
329
|
print_results: bool = True,
|
|
367
330
|
) -> Optional[AccuracyResult]:
|
|
331
|
+
if isinstance(self.db, AsyncBaseDb):
|
|
332
|
+
raise ValueError("run() is not supported with an async DB. Please use arun() instead.")
|
|
333
|
+
|
|
334
|
+
if self.agent is None and self.team is None:
|
|
335
|
+
logger.error("You need to provide one of 'agent' or 'team' to run the evaluation.")
|
|
336
|
+
return None
|
|
337
|
+
|
|
338
|
+
if self.agent is not None and self.team is not None:
|
|
339
|
+
logger.error("Provide only one of 'agent' or 'team' to run the evaluation.")
|
|
340
|
+
return None
|
|
341
|
+
|
|
368
342
|
from rich.console import Console
|
|
369
343
|
from rich.live import Live
|
|
370
344
|
from rich.status import Status
|
|
371
345
|
|
|
372
|
-
self.
|
|
373
|
-
|
|
346
|
+
set_log_level_to_debug() if self.debug_mode else set_log_level_to_info()
|
|
347
|
+
|
|
374
348
|
self.result = AccuracyResult()
|
|
375
|
-
self.print_results = print_results
|
|
376
|
-
self.print_summary = print_summary
|
|
377
349
|
|
|
378
|
-
|
|
379
|
-
|
|
380
|
-
|
|
350
|
+
logger.debug(f"************ Evaluation Start: {self.eval_id} ************")
|
|
351
|
+
|
|
352
|
+
# Add a spinner while running the evaluations
|
|
353
|
+
console = Console()
|
|
354
|
+
with Live(console=console, transient=True) as live_log:
|
|
355
|
+
evaluator_agent = self.get_evaluator_agent()
|
|
356
|
+
eval_input = self.get_eval_input()
|
|
357
|
+
eval_expected_output = self.get_eval_expected_output()
|
|
358
|
+
|
|
359
|
+
for i in range(self.num_iterations):
|
|
360
|
+
status = Status(f"Running evaluation {i + 1}...", spinner="dots", speed=1.0, refresh_per_second=10)
|
|
361
|
+
live_log.update(status)
|
|
362
|
+
|
|
363
|
+
agent_session_id = f"eval_{self.eval_id}_{i + 1}"
|
|
364
|
+
|
|
365
|
+
if self.agent is not None:
|
|
366
|
+
agent_response = self.agent.run(input=eval_input, session_id=agent_session_id, stream=False)
|
|
367
|
+
output = agent_response.content
|
|
368
|
+
elif self.team is not None:
|
|
369
|
+
team_response = self.team.run(input=eval_input, session_id=agent_session_id, stream=False)
|
|
370
|
+
output = team_response.content
|
|
371
|
+
|
|
372
|
+
if not output:
|
|
373
|
+
logger.error(f"Failed to generate a valid answer on iteration {i + 1}: {output}")
|
|
374
|
+
continue
|
|
375
|
+
|
|
376
|
+
evaluation_input = dedent(f"""\
|
|
377
|
+
<agent_input>
|
|
378
|
+
{eval_input}
|
|
379
|
+
</agent_input>
|
|
380
|
+
|
|
381
|
+
<expected_output>
|
|
382
|
+
{eval_expected_output}
|
|
383
|
+
</expected_output>
|
|
384
|
+
|
|
385
|
+
<agent_output>
|
|
386
|
+
{output}
|
|
387
|
+
</agent_output>\
|
|
388
|
+
""")
|
|
389
|
+
logger.debug(f"Agent output #{i + 1}: {output}")
|
|
390
|
+
result = self.evaluate_answer(
|
|
391
|
+
input=eval_input,
|
|
392
|
+
evaluator_agent=evaluator_agent,
|
|
393
|
+
evaluation_input=evaluation_input,
|
|
394
|
+
evaluator_expected_output=eval_expected_output,
|
|
395
|
+
agent_output=output,
|
|
396
|
+
)
|
|
397
|
+
if result is None:
|
|
398
|
+
logger.error(f"Failed to evaluate accuracy on iteration {i + 1}")
|
|
399
|
+
continue
|
|
400
|
+
|
|
401
|
+
self.result.results.append(result)
|
|
402
|
+
self.result.compute_stats()
|
|
403
|
+
status.update(f"Eval iteration {i + 1} finished")
|
|
404
|
+
|
|
405
|
+
status.stop()
|
|
406
|
+
|
|
407
|
+
# Save result to file if requested
|
|
408
|
+
if self.file_path_to_save_results is not None and self.result is not None:
|
|
409
|
+
store_result_in_file(
|
|
410
|
+
file_path=self.file_path_to_save_results,
|
|
411
|
+
name=self.name,
|
|
412
|
+
eval_id=self.eval_id,
|
|
413
|
+
result=self.result,
|
|
414
|
+
)
|
|
415
|
+
|
|
416
|
+
# Print results if requested
|
|
417
|
+
if self.print_results or print_results:
|
|
418
|
+
self.result.print_results(console)
|
|
419
|
+
if self.print_summary or print_summary:
|
|
420
|
+
self.result.print_summary(console)
|
|
421
|
+
|
|
422
|
+
# Log results to the Agno DB if requested
|
|
423
|
+
if self.agent is not None:
|
|
424
|
+
agent_id = self.agent.id
|
|
425
|
+
team_id = None
|
|
426
|
+
model_id = self.agent.model.id if self.agent.model is not None else None
|
|
427
|
+
model_provider = self.agent.model.provider if self.agent.model is not None else None
|
|
428
|
+
evaluated_component_name = self.agent.name
|
|
429
|
+
elif self.team is not None:
|
|
430
|
+
agent_id = None
|
|
431
|
+
team_id = self.team.id
|
|
432
|
+
model_id = self.team.model.id if self.team.model is not None else None
|
|
433
|
+
model_provider = self.team.model.provider if self.team.model is not None else None
|
|
434
|
+
evaluated_component_name = self.team.name
|
|
435
|
+
|
|
436
|
+
if self.db:
|
|
437
|
+
log_eval_input = {
|
|
438
|
+
"additional_guidelines": self.additional_guidelines,
|
|
439
|
+
"additional_context": self.additional_context,
|
|
440
|
+
"num_iterations": self.num_iterations,
|
|
441
|
+
"expected_output": self.expected_output,
|
|
442
|
+
"input": self.input,
|
|
443
|
+
}
|
|
444
|
+
|
|
445
|
+
log_eval_run(
|
|
446
|
+
db=self.db,
|
|
447
|
+
run_id=self.eval_id, # type: ignore
|
|
448
|
+
run_data=asdict(self.result),
|
|
449
|
+
eval_type=EvalType.ACCURACY,
|
|
450
|
+
agent_id=agent_id,
|
|
451
|
+
team_id=team_id,
|
|
452
|
+
model_id=model_id,
|
|
453
|
+
model_provider=model_provider,
|
|
454
|
+
name=self.name if self.name is not None else None,
|
|
455
|
+
evaluated_component_name=evaluated_component_name,
|
|
456
|
+
eval_input=log_eval_input,
|
|
457
|
+
)
|
|
458
|
+
|
|
459
|
+
if self.telemetry:
|
|
460
|
+
from agno.api.evals import EvalRunCreate, create_eval_run_telemetry
|
|
461
|
+
|
|
462
|
+
create_eval_run_telemetry(
|
|
463
|
+
eval_run=EvalRunCreate(
|
|
464
|
+
run_id=self.eval_id,
|
|
465
|
+
eval_type=EvalType.ACCURACY,
|
|
466
|
+
data=self._get_telemetry_data(),
|
|
467
|
+
),
|
|
468
|
+
)
|
|
469
|
+
|
|
470
|
+
logger.debug(f"*********** Evaluation {self.eval_id} Finished ***********")
|
|
471
|
+
return self.result
|
|
472
|
+
|
|
473
|
+
async def arun(
|
|
474
|
+
self,
|
|
475
|
+
*,
|
|
476
|
+
print_summary: bool = True,
|
|
477
|
+
print_results: bool = True,
|
|
478
|
+
) -> Optional[AccuracyResult]:
|
|
479
|
+
if self.agent is None and self.team is None:
|
|
480
|
+
logger.error("You need to provide one of 'agent' or 'team' to run the evaluation.")
|
|
381
481
|
return None
|
|
382
482
|
|
|
383
|
-
|
|
384
|
-
|
|
385
|
-
)
|
|
386
|
-
if expected_answer_to_evaluate is None:
|
|
387
|
-
logger.error("No Expected Answer to evaluate.")
|
|
483
|
+
if self.agent is not None and self.team is not None:
|
|
484
|
+
logger.error("Provide only one of 'agent' or 'team' to run the evaluation.")
|
|
388
485
|
return None
|
|
389
486
|
|
|
390
|
-
|
|
391
|
-
|
|
392
|
-
|
|
393
|
-
logger.debug("***********************************************************")
|
|
487
|
+
from rich.console import Console
|
|
488
|
+
from rich.live import Live
|
|
489
|
+
from rich.status import Status
|
|
394
490
|
|
|
395
|
-
|
|
396
|
-
|
|
397
|
-
)
|
|
491
|
+
set_log_level_to_debug() if self.debug_mode else set_log_level_to_info()
|
|
492
|
+
|
|
493
|
+
self.result = AccuracyResult()
|
|
494
|
+
|
|
495
|
+
logger.debug(f"************ Evaluation Start: {self.eval_id} ************")
|
|
398
496
|
|
|
399
497
|
# Add a spinner while running the evaluations
|
|
400
498
|
console = Console()
|
|
401
499
|
with Live(console=console, transient=True) as live_log:
|
|
500
|
+
evaluator_agent = self.get_evaluator_agent()
|
|
501
|
+
eval_input = self.get_eval_input()
|
|
502
|
+
eval_expected_output = self.get_eval_expected_output()
|
|
503
|
+
|
|
402
504
|
for i in range(self.num_iterations):
|
|
403
505
|
status = Status(f"Running evaluation {i + 1}...", spinner="dots", speed=1.0, refresh_per_second=10)
|
|
404
506
|
live_log.update(status)
|
|
405
507
|
|
|
406
|
-
|
|
407
|
-
|
|
508
|
+
agent_session_id = f"eval_{self.eval_id}_{i + 1}"
|
|
509
|
+
|
|
510
|
+
if self.agent is not None:
|
|
511
|
+
agent_response = await self.agent.arun(input=eval_input, session_id=agent_session_id, stream=False)
|
|
512
|
+
output = agent_response.content
|
|
513
|
+
elif self.team is not None:
|
|
514
|
+
team_response = await self.team.arun(input=eval_input, session_id=agent_session_id, stream=False)
|
|
515
|
+
output = team_response.content
|
|
516
|
+
|
|
517
|
+
if not output:
|
|
518
|
+
logger.error(f"Failed to generate a valid answer on iteration {i + 1}: {output}")
|
|
519
|
+
continue
|
|
520
|
+
|
|
521
|
+
evaluation_input = dedent(f"""\
|
|
522
|
+
<agent_input>
|
|
523
|
+
{eval_input}
|
|
524
|
+
</agent_input>
|
|
525
|
+
|
|
526
|
+
<expected_output>
|
|
527
|
+
{eval_expected_output}
|
|
528
|
+
</expected_output>
|
|
529
|
+
|
|
530
|
+
<agent_output>
|
|
531
|
+
{output}
|
|
532
|
+
</agent_output>\
|
|
533
|
+
""")
|
|
534
|
+
logger.debug(f"Agent output #{i + 1}: {output}")
|
|
535
|
+
result = await self.aevaluate_answer(
|
|
536
|
+
input=eval_input,
|
|
537
|
+
evaluator_agent=evaluator_agent,
|
|
538
|
+
evaluation_input=evaluation_input,
|
|
539
|
+
evaluator_expected_output=eval_expected_output,
|
|
540
|
+
agent_output=output,
|
|
408
541
|
)
|
|
409
|
-
if
|
|
410
|
-
logger.error("
|
|
542
|
+
if result is None:
|
|
543
|
+
logger.error(f"Failed to evaluate accuracy on iteration {i + 1}")
|
|
411
544
|
continue
|
|
412
545
|
|
|
413
|
-
|
|
414
|
-
|
|
415
|
-
|
|
416
|
-
|
|
417
|
-
|
|
418
|
-
|
|
419
|
-
|
|
420
|
-
|
|
421
|
-
|
|
422
|
-
|
|
423
|
-
|
|
424
|
-
|
|
425
|
-
|
|
426
|
-
|
|
427
|
-
|
|
428
|
-
|
|
429
|
-
|
|
430
|
-
|
|
431
|
-
|
|
432
|
-
|
|
433
|
-
status.update(f"Running evaluation {i + 1}... Done")
|
|
434
|
-
except Exception as e:
|
|
435
|
-
logger.exception(f"Failed to evaluate accuracy, run #{i + 1}: {e}")
|
|
436
|
-
return None
|
|
437
|
-
|
|
438
|
-
status.stop()
|
|
439
|
-
|
|
440
|
-
# -*- Save result to file if save_result_to_file is set
|
|
441
|
-
if self.save_result_to_file is not None and self.result is not None:
|
|
442
|
-
try:
|
|
443
|
-
import json
|
|
546
|
+
self.result.results.append(result)
|
|
547
|
+
self.result.compute_stats()
|
|
548
|
+
status.update(f"Eval iteration {i + 1} finished")
|
|
549
|
+
|
|
550
|
+
status.stop()
|
|
551
|
+
|
|
552
|
+
# Save result to file if requested
|
|
553
|
+
if self.file_path_to_save_results is not None and self.result is not None:
|
|
554
|
+
store_result_in_file(
|
|
555
|
+
file_path=self.file_path_to_save_results,
|
|
556
|
+
name=self.name,
|
|
557
|
+
eval_id=self.eval_id,
|
|
558
|
+
result=self.result,
|
|
559
|
+
)
|
|
560
|
+
|
|
561
|
+
# Print results if requested
|
|
562
|
+
if self.print_results or print_results:
|
|
563
|
+
self.result.print_results(console)
|
|
564
|
+
if self.print_summary or print_summary:
|
|
565
|
+
self.result.print_summary(console)
|
|
444
566
|
|
|
445
|
-
|
|
446
|
-
|
|
447
|
-
|
|
448
|
-
|
|
449
|
-
|
|
450
|
-
|
|
567
|
+
if self.agent is not None:
|
|
568
|
+
agent_id = self.agent.id
|
|
569
|
+
team_id = None
|
|
570
|
+
model_id = self.agent.model.id if self.agent.model is not None else None
|
|
571
|
+
model_provider = self.agent.model.provider if self.agent.model is not None else None
|
|
572
|
+
evaluated_component_name = self.agent.name
|
|
573
|
+
elif self.team is not None:
|
|
574
|
+
agent_id = None
|
|
575
|
+
team_id = self.team.id
|
|
576
|
+
model_id = self.team.model.id if self.team.model is not None else None
|
|
577
|
+
model_provider = self.team.model.provider if self.team.model is not None else None
|
|
578
|
+
evaluated_component_name = self.team.name
|
|
579
|
+
|
|
580
|
+
# Log results to the Agno DB if requested
|
|
581
|
+
if self.db:
|
|
582
|
+
log_eval_input = {
|
|
583
|
+
"additional_guidelines": self.additional_guidelines,
|
|
584
|
+
"additional_context": self.additional_context,
|
|
585
|
+
"num_iterations": self.num_iterations,
|
|
586
|
+
"expected_output": self.expected_output,
|
|
587
|
+
"input": self.input,
|
|
588
|
+
}
|
|
589
|
+
await async_log_eval(
|
|
590
|
+
db=self.db,
|
|
591
|
+
run_id=self.eval_id, # type: ignore
|
|
592
|
+
run_data=asdict(self.result),
|
|
593
|
+
eval_type=EvalType.ACCURACY,
|
|
594
|
+
agent_id=agent_id,
|
|
595
|
+
model_id=model_id,
|
|
596
|
+
model_provider=model_provider,
|
|
597
|
+
name=self.name if self.name is not None else None,
|
|
598
|
+
evaluated_component_name=evaluated_component_name,
|
|
599
|
+
team_id=team_id,
|
|
600
|
+
workflow_id=None,
|
|
601
|
+
eval_input=log_eval_input,
|
|
602
|
+
)
|
|
603
|
+
|
|
604
|
+
if self.telemetry:
|
|
605
|
+
from agno.api.evals import EvalRunCreate, async_create_eval_run_telemetry
|
|
606
|
+
|
|
607
|
+
await async_create_eval_run_telemetry(
|
|
608
|
+
eval_run=EvalRunCreate(run_id=self.eval_id, eval_type=EvalType.ACCURACY),
|
|
609
|
+
)
|
|
610
|
+
|
|
611
|
+
logger.debug(f"*********** Evaluation {self.eval_id} Finished ***********")
|
|
612
|
+
return self.result
|
|
451
613
|
|
|
452
|
-
|
|
453
|
-
|
|
454
|
-
|
|
614
|
+
def run_with_output(
|
|
615
|
+
self,
|
|
616
|
+
*,
|
|
617
|
+
output: str,
|
|
618
|
+
print_summary: bool = True,
|
|
619
|
+
print_results: bool = True,
|
|
620
|
+
) -> Optional[AccuracyResult]:
|
|
621
|
+
"""Run the evaluation logic against the given answer, instead of generating an answer with the Agent"""
|
|
622
|
+
# Generate unique run_id for this execution (don't modify self.eval_id due to concurrency)
|
|
623
|
+
run_id = str(uuid4())
|
|
624
|
+
|
|
625
|
+
set_log_level_to_debug() if self.debug_mode else set_log_level_to_info()
|
|
626
|
+
|
|
627
|
+
self.result = AccuracyResult()
|
|
628
|
+
|
|
629
|
+
logger.debug(f"************ Evaluation Start: {run_id} ************")
|
|
630
|
+
|
|
631
|
+
evaluator_agent = self.get_evaluator_agent()
|
|
632
|
+
eval_input = self.get_eval_input()
|
|
633
|
+
eval_expected_output = self.get_eval_expected_output()
|
|
634
|
+
|
|
635
|
+
evaluation_input = dedent(f"""\
|
|
636
|
+
<agent_input>
|
|
637
|
+
{eval_input}
|
|
638
|
+
</agent_input>
|
|
455
639
|
|
|
456
|
-
|
|
640
|
+
<expected_output>
|
|
641
|
+
{eval_expected_output}
|
|
642
|
+
</expected_output>
|
|
643
|
+
|
|
644
|
+
<agent_output>
|
|
645
|
+
{output}
|
|
646
|
+
</agent_output>\
|
|
647
|
+
""")
|
|
648
|
+
|
|
649
|
+
result = self.evaluate_answer(
|
|
650
|
+
input=eval_input,
|
|
651
|
+
evaluator_agent=evaluator_agent,
|
|
652
|
+
evaluation_input=evaluation_input,
|
|
653
|
+
evaluator_expected_output=eval_expected_output,
|
|
654
|
+
agent_output=output,
|
|
655
|
+
)
|
|
656
|
+
|
|
657
|
+
if result is not None:
|
|
658
|
+
self.result.results.append(result)
|
|
659
|
+
self.result.compute_stats()
|
|
660
|
+
|
|
661
|
+
# Print results if requested
|
|
662
|
+
if self.print_results or print_results:
|
|
663
|
+
self.result.print_results()
|
|
664
|
+
if self.print_summary or print_summary:
|
|
665
|
+
self.result.print_summary()
|
|
666
|
+
|
|
667
|
+
# Save result to file if requested
|
|
668
|
+
if self.file_path_to_save_results is not None:
|
|
669
|
+
store_result_in_file(
|
|
670
|
+
file_path=self.file_path_to_save_results,
|
|
671
|
+
name=self.name,
|
|
672
|
+
eval_id=self.eval_id,
|
|
673
|
+
result=self.result,
|
|
674
|
+
)
|
|
675
|
+
# Log results to the Agno DB if requested
|
|
676
|
+
if self.db:
|
|
677
|
+
if isinstance(self.db, AsyncBaseDb):
|
|
678
|
+
log_error("You are using an async DB in a non-async method. The evaluation won't be stored in the DB.")
|
|
679
|
+
|
|
680
|
+
else:
|
|
681
|
+
if self.agent is not None:
|
|
682
|
+
agent_id = self.agent.id
|
|
683
|
+
team_id = None
|
|
684
|
+
model_id = self.agent.model.id if self.agent.model is not None else None
|
|
685
|
+
model_provider = self.agent.model.provider if self.agent.model is not None else None
|
|
686
|
+
evaluated_component_name = self.agent.name
|
|
687
|
+
elif self.team is not None:
|
|
688
|
+
agent_id = None
|
|
689
|
+
team_id = self.team.id
|
|
690
|
+
model_id = self.team.model.id if self.team.model is not None else None
|
|
691
|
+
model_provider = self.team.model.provider if self.team.model is not None else None
|
|
692
|
+
evaluated_component_name = self.team.name
|
|
693
|
+
else:
|
|
694
|
+
agent_id = None
|
|
695
|
+
team_id = None
|
|
696
|
+
model_id = None
|
|
697
|
+
model_provider = None
|
|
698
|
+
evaluated_component_name = None
|
|
699
|
+
|
|
700
|
+
log_eval_input = {
|
|
701
|
+
"additional_guidelines": self.additional_guidelines,
|
|
702
|
+
"additional_context": self.additional_context,
|
|
703
|
+
"num_iterations": self.num_iterations,
|
|
704
|
+
"expected_output": self.expected_output,
|
|
705
|
+
"input": self.input,
|
|
706
|
+
}
|
|
707
|
+
|
|
708
|
+
log_eval_run(
|
|
709
|
+
db=self.db,
|
|
710
|
+
run_id=self.eval_id, # type: ignore
|
|
711
|
+
run_data=asdict(self.result),
|
|
712
|
+
eval_type=EvalType.ACCURACY,
|
|
713
|
+
name=self.name if self.name is not None else None,
|
|
714
|
+
agent_id=agent_id,
|
|
715
|
+
team_id=team_id,
|
|
716
|
+
model_id=model_id,
|
|
717
|
+
model_provider=model_provider,
|
|
718
|
+
evaluated_component_name=evaluated_component_name,
|
|
719
|
+
workflow_id=None,
|
|
720
|
+
eval_input=log_eval_input,
|
|
721
|
+
)
|
|
722
|
+
|
|
723
|
+
if self.telemetry:
|
|
724
|
+
from agno.api.evals import EvalRunCreate, create_eval_run_telemetry
|
|
725
|
+
|
|
726
|
+
create_eval_run_telemetry(
|
|
727
|
+
eval_run=EvalRunCreate(
|
|
728
|
+
run_id=self.eval_id,
|
|
729
|
+
eval_type=EvalType.ACCURACY,
|
|
730
|
+
data=self._get_telemetry_data(),
|
|
731
|
+
),
|
|
732
|
+
)
|
|
733
|
+
|
|
734
|
+
logger.debug(f"*********** Evaluation End: {run_id} ***********")
|
|
735
|
+
return self.result
|
|
736
|
+
|
|
737
|
+
async def arun_with_output(
|
|
738
|
+
self,
|
|
739
|
+
*,
|
|
740
|
+
output: str,
|
|
741
|
+
print_summary: bool = True,
|
|
742
|
+
print_results: bool = True,
|
|
743
|
+
) -> Optional[AccuracyResult]:
|
|
744
|
+
"""Run the evaluation logic against the given answer, instead of generating an answer with the Agent"""
|
|
745
|
+
# Generate unique run_id for this execution (don't modify self.eval_id due to concurrency)
|
|
746
|
+
run_id = str(uuid4())
|
|
747
|
+
|
|
748
|
+
set_log_level_to_debug() if self.debug_mode else set_log_level_to_info()
|
|
749
|
+
|
|
750
|
+
self.result = AccuracyResult()
|
|
751
|
+
|
|
752
|
+
logger.debug(f"************ Evaluation Start: {run_id} ************")
|
|
753
|
+
|
|
754
|
+
evaluator_agent = self.get_evaluator_agent()
|
|
755
|
+
eval_input = self.get_eval_input()
|
|
756
|
+
eval_expected_output = self.get_eval_expected_output()
|
|
757
|
+
|
|
758
|
+
evaluation_input = dedent(f"""\
|
|
759
|
+
<agent_input>
|
|
760
|
+
{eval_input}
|
|
761
|
+
</agent_input>
|
|
762
|
+
|
|
763
|
+
<expected_output>
|
|
764
|
+
{eval_expected_output}
|
|
765
|
+
</expected_output>
|
|
766
|
+
|
|
767
|
+
<agent_output>
|
|
768
|
+
{output}
|
|
769
|
+
</agent_output>\
|
|
770
|
+
""")
|
|
771
|
+
|
|
772
|
+
result = await self.aevaluate_answer(
|
|
773
|
+
input=eval_input,
|
|
774
|
+
evaluator_agent=evaluator_agent,
|
|
775
|
+
evaluation_input=evaluation_input,
|
|
776
|
+
evaluator_expected_output=eval_expected_output,
|
|
777
|
+
agent_output=output,
|
|
778
|
+
)
|
|
779
|
+
|
|
780
|
+
if result is not None:
|
|
781
|
+
self.result.results.append(result)
|
|
782
|
+
self.result.compute_stats()
|
|
783
|
+
|
|
784
|
+
# Print results if requested
|
|
785
|
+
if self.print_results or print_results:
|
|
786
|
+
self.result.print_results()
|
|
787
|
+
if self.print_summary or print_summary:
|
|
788
|
+
self.result.print_summary()
|
|
789
|
+
|
|
790
|
+
# Save result to file if requested
|
|
791
|
+
if self.file_path_to_save_results is not None:
|
|
792
|
+
store_result_in_file(
|
|
793
|
+
file_path=self.file_path_to_save_results,
|
|
794
|
+
name=self.name,
|
|
795
|
+
eval_id=self.eval_id,
|
|
796
|
+
result=self.result,
|
|
797
|
+
)
|
|
798
|
+
# Log results to the Agno DB if requested
|
|
799
|
+
if self.db:
|
|
800
|
+
if self.agent is not None:
|
|
801
|
+
agent_id = self.agent.id
|
|
802
|
+
team_id = None
|
|
803
|
+
model_id = self.agent.model.id if self.agent.model is not None else None
|
|
804
|
+
model_provider = self.agent.model.provider if self.agent.model is not None else None
|
|
805
|
+
evaluated_component_name = self.agent.name
|
|
806
|
+
elif self.team is not None:
|
|
807
|
+
agent_id = None
|
|
808
|
+
team_id = self.team.id
|
|
809
|
+
model_id = self.team.model.id if self.team.model is not None else None
|
|
810
|
+
model_provider = self.team.model.provider if self.team.model is not None else None
|
|
811
|
+
evaluated_component_name = self.team.name
|
|
812
|
+
|
|
813
|
+
log_eval_input = {
|
|
814
|
+
"additional_guidelines": self.additional_guidelines,
|
|
815
|
+
"additional_context": self.additional_context,
|
|
816
|
+
"num_iterations": self.num_iterations,
|
|
817
|
+
"expected_output": self.expected_output,
|
|
818
|
+
"input": self.input,
|
|
819
|
+
}
|
|
820
|
+
|
|
821
|
+
await async_log_eval(
|
|
822
|
+
db=self.db,
|
|
823
|
+
run_id=self.eval_id, # type: ignore
|
|
824
|
+
run_data=asdict(self.result),
|
|
825
|
+
eval_type=EvalType.ACCURACY,
|
|
826
|
+
name=self.name if self.name is not None else None,
|
|
827
|
+
agent_id=agent_id,
|
|
828
|
+
team_id=team_id,
|
|
829
|
+
model_id=model_id,
|
|
830
|
+
model_provider=model_provider,
|
|
831
|
+
evaluated_component_name=evaluated_component_name,
|
|
832
|
+
workflow_id=None,
|
|
833
|
+
eval_input=log_eval_input,
|
|
834
|
+
)
|
|
835
|
+
|
|
836
|
+
logger.debug(f"*********** Evaluation End: {run_id} ***********")
|
|
457
837
|
return self.result
|
|
838
|
+
|
|
839
|
+
def _get_telemetry_data(self) -> Dict[str, Any]:
|
|
840
|
+
"""Get the telemetry data for the evaluation"""
|
|
841
|
+
return {
|
|
842
|
+
"agent_id": self.agent.id if self.agent else None,
|
|
843
|
+
"team_id": self.team.id if self.team else None,
|
|
844
|
+
"model_id": self.agent.model.id if self.agent and self.agent.model else None,
|
|
845
|
+
"model_provider": self.agent.model.provider if self.agent and self.agent.model else None,
|
|
846
|
+
"num_iterations": self.num_iterations,
|
|
847
|
+
}
|