agno 2.2.13__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- agno/__init__.py +8 -0
- agno/agent/__init__.py +51 -0
- agno/agent/agent.py +10405 -0
- agno/api/__init__.py +0 -0
- agno/api/agent.py +28 -0
- agno/api/api.py +40 -0
- agno/api/evals.py +22 -0
- agno/api/os.py +17 -0
- agno/api/routes.py +13 -0
- agno/api/schemas/__init__.py +9 -0
- agno/api/schemas/agent.py +16 -0
- agno/api/schemas/evals.py +16 -0
- agno/api/schemas/os.py +14 -0
- agno/api/schemas/response.py +6 -0
- agno/api/schemas/team.py +16 -0
- agno/api/schemas/utils.py +21 -0
- agno/api/schemas/workflows.py +16 -0
- agno/api/settings.py +53 -0
- agno/api/team.py +30 -0
- agno/api/workflow.py +28 -0
- agno/cloud/aws/base.py +214 -0
- agno/cloud/aws/s3/__init__.py +2 -0
- agno/cloud/aws/s3/api_client.py +43 -0
- agno/cloud/aws/s3/bucket.py +195 -0
- agno/cloud/aws/s3/object.py +57 -0
- agno/culture/__init__.py +3 -0
- agno/culture/manager.py +956 -0
- agno/db/__init__.py +24 -0
- agno/db/async_postgres/__init__.py +3 -0
- agno/db/base.py +598 -0
- agno/db/dynamo/__init__.py +3 -0
- agno/db/dynamo/dynamo.py +2042 -0
- agno/db/dynamo/schemas.py +314 -0
- agno/db/dynamo/utils.py +743 -0
- agno/db/firestore/__init__.py +3 -0
- agno/db/firestore/firestore.py +1795 -0
- agno/db/firestore/schemas.py +140 -0
- agno/db/firestore/utils.py +376 -0
- agno/db/gcs_json/__init__.py +3 -0
- agno/db/gcs_json/gcs_json_db.py +1335 -0
- agno/db/gcs_json/utils.py +228 -0
- agno/db/in_memory/__init__.py +3 -0
- agno/db/in_memory/in_memory_db.py +1160 -0
- agno/db/in_memory/utils.py +230 -0
- agno/db/json/__init__.py +3 -0
- agno/db/json/json_db.py +1328 -0
- agno/db/json/utils.py +230 -0
- agno/db/migrations/__init__.py +0 -0
- agno/db/migrations/v1_to_v2.py +635 -0
- agno/db/mongo/__init__.py +17 -0
- agno/db/mongo/async_mongo.py +2026 -0
- agno/db/mongo/mongo.py +1982 -0
- agno/db/mongo/schemas.py +87 -0
- agno/db/mongo/utils.py +259 -0
- agno/db/mysql/__init__.py +3 -0
- agno/db/mysql/mysql.py +2308 -0
- agno/db/mysql/schemas.py +138 -0
- agno/db/mysql/utils.py +355 -0
- agno/db/postgres/__init__.py +4 -0
- agno/db/postgres/async_postgres.py +1927 -0
- agno/db/postgres/postgres.py +2260 -0
- agno/db/postgres/schemas.py +139 -0
- agno/db/postgres/utils.py +442 -0
- agno/db/redis/__init__.py +3 -0
- agno/db/redis/redis.py +1660 -0
- agno/db/redis/schemas.py +123 -0
- agno/db/redis/utils.py +346 -0
- agno/db/schemas/__init__.py +4 -0
- agno/db/schemas/culture.py +120 -0
- agno/db/schemas/evals.py +33 -0
- agno/db/schemas/knowledge.py +40 -0
- agno/db/schemas/memory.py +46 -0
- agno/db/schemas/metrics.py +0 -0
- agno/db/singlestore/__init__.py +3 -0
- agno/db/singlestore/schemas.py +130 -0
- agno/db/singlestore/singlestore.py +2272 -0
- agno/db/singlestore/utils.py +384 -0
- agno/db/sqlite/__init__.py +4 -0
- agno/db/sqlite/async_sqlite.py +2293 -0
- agno/db/sqlite/schemas.py +133 -0
- agno/db/sqlite/sqlite.py +2288 -0
- agno/db/sqlite/utils.py +431 -0
- agno/db/surrealdb/__init__.py +3 -0
- agno/db/surrealdb/metrics.py +292 -0
- agno/db/surrealdb/models.py +309 -0
- agno/db/surrealdb/queries.py +71 -0
- agno/db/surrealdb/surrealdb.py +1353 -0
- agno/db/surrealdb/utils.py +147 -0
- agno/db/utils.py +116 -0
- agno/debug.py +18 -0
- agno/eval/__init__.py +14 -0
- agno/eval/accuracy.py +834 -0
- agno/eval/performance.py +773 -0
- agno/eval/reliability.py +306 -0
- agno/eval/utils.py +119 -0
- agno/exceptions.py +161 -0
- agno/filters.py +354 -0
- agno/guardrails/__init__.py +6 -0
- agno/guardrails/base.py +19 -0
- agno/guardrails/openai.py +144 -0
- agno/guardrails/pii.py +94 -0
- agno/guardrails/prompt_injection.py +52 -0
- agno/integrations/__init__.py +0 -0
- agno/integrations/discord/__init__.py +3 -0
- agno/integrations/discord/client.py +203 -0
- agno/knowledge/__init__.py +5 -0
- agno/knowledge/chunking/__init__.py +0 -0
- agno/knowledge/chunking/agentic.py +79 -0
- agno/knowledge/chunking/document.py +91 -0
- agno/knowledge/chunking/fixed.py +57 -0
- agno/knowledge/chunking/markdown.py +151 -0
- agno/knowledge/chunking/recursive.py +63 -0
- agno/knowledge/chunking/row.py +39 -0
- agno/knowledge/chunking/semantic.py +86 -0
- agno/knowledge/chunking/strategy.py +165 -0
- agno/knowledge/content.py +74 -0
- agno/knowledge/document/__init__.py +5 -0
- agno/knowledge/document/base.py +58 -0
- agno/knowledge/embedder/__init__.py +5 -0
- agno/knowledge/embedder/aws_bedrock.py +343 -0
- agno/knowledge/embedder/azure_openai.py +210 -0
- agno/knowledge/embedder/base.py +23 -0
- agno/knowledge/embedder/cohere.py +323 -0
- agno/knowledge/embedder/fastembed.py +62 -0
- agno/knowledge/embedder/fireworks.py +13 -0
- agno/knowledge/embedder/google.py +258 -0
- agno/knowledge/embedder/huggingface.py +94 -0
- agno/knowledge/embedder/jina.py +182 -0
- agno/knowledge/embedder/langdb.py +22 -0
- agno/knowledge/embedder/mistral.py +206 -0
- agno/knowledge/embedder/nebius.py +13 -0
- agno/knowledge/embedder/ollama.py +154 -0
- agno/knowledge/embedder/openai.py +195 -0
- agno/knowledge/embedder/sentence_transformer.py +63 -0
- agno/knowledge/embedder/together.py +13 -0
- agno/knowledge/embedder/vllm.py +262 -0
- agno/knowledge/embedder/voyageai.py +165 -0
- agno/knowledge/knowledge.py +1988 -0
- agno/knowledge/reader/__init__.py +7 -0
- agno/knowledge/reader/arxiv_reader.py +81 -0
- agno/knowledge/reader/base.py +95 -0
- agno/knowledge/reader/csv_reader.py +166 -0
- agno/knowledge/reader/docx_reader.py +82 -0
- agno/knowledge/reader/field_labeled_csv_reader.py +292 -0
- agno/knowledge/reader/firecrawl_reader.py +201 -0
- agno/knowledge/reader/json_reader.py +87 -0
- agno/knowledge/reader/markdown_reader.py +137 -0
- agno/knowledge/reader/pdf_reader.py +431 -0
- agno/knowledge/reader/pptx_reader.py +101 -0
- agno/knowledge/reader/reader_factory.py +313 -0
- agno/knowledge/reader/s3_reader.py +89 -0
- agno/knowledge/reader/tavily_reader.py +194 -0
- agno/knowledge/reader/text_reader.py +115 -0
- agno/knowledge/reader/web_search_reader.py +372 -0
- agno/knowledge/reader/website_reader.py +455 -0
- agno/knowledge/reader/wikipedia_reader.py +59 -0
- agno/knowledge/reader/youtube_reader.py +78 -0
- agno/knowledge/remote_content/__init__.py +0 -0
- agno/knowledge/remote_content/remote_content.py +88 -0
- agno/knowledge/reranker/__init__.py +3 -0
- agno/knowledge/reranker/base.py +14 -0
- agno/knowledge/reranker/cohere.py +64 -0
- agno/knowledge/reranker/infinity.py +195 -0
- agno/knowledge/reranker/sentence_transformer.py +54 -0
- agno/knowledge/types.py +39 -0
- agno/knowledge/utils.py +189 -0
- agno/media.py +462 -0
- agno/memory/__init__.py +3 -0
- agno/memory/manager.py +1327 -0
- agno/models/__init__.py +0 -0
- agno/models/aimlapi/__init__.py +5 -0
- agno/models/aimlapi/aimlapi.py +45 -0
- agno/models/anthropic/__init__.py +5 -0
- agno/models/anthropic/claude.py +757 -0
- agno/models/aws/__init__.py +15 -0
- agno/models/aws/bedrock.py +701 -0
- agno/models/aws/claude.py +378 -0
- agno/models/azure/__init__.py +18 -0
- agno/models/azure/ai_foundry.py +485 -0
- agno/models/azure/openai_chat.py +131 -0
- agno/models/base.py +2175 -0
- agno/models/cerebras/__init__.py +12 -0
- agno/models/cerebras/cerebras.py +501 -0
- agno/models/cerebras/cerebras_openai.py +112 -0
- agno/models/cohere/__init__.py +5 -0
- agno/models/cohere/chat.py +389 -0
- agno/models/cometapi/__init__.py +5 -0
- agno/models/cometapi/cometapi.py +57 -0
- agno/models/dashscope/__init__.py +5 -0
- agno/models/dashscope/dashscope.py +91 -0
- agno/models/deepinfra/__init__.py +5 -0
- agno/models/deepinfra/deepinfra.py +28 -0
- agno/models/deepseek/__init__.py +5 -0
- agno/models/deepseek/deepseek.py +61 -0
- agno/models/defaults.py +1 -0
- agno/models/fireworks/__init__.py +5 -0
- agno/models/fireworks/fireworks.py +26 -0
- agno/models/google/__init__.py +5 -0
- agno/models/google/gemini.py +1085 -0
- agno/models/groq/__init__.py +5 -0
- agno/models/groq/groq.py +556 -0
- agno/models/huggingface/__init__.py +5 -0
- agno/models/huggingface/huggingface.py +491 -0
- agno/models/ibm/__init__.py +5 -0
- agno/models/ibm/watsonx.py +422 -0
- agno/models/internlm/__init__.py +3 -0
- agno/models/internlm/internlm.py +26 -0
- agno/models/langdb/__init__.py +1 -0
- agno/models/langdb/langdb.py +48 -0
- agno/models/litellm/__init__.py +14 -0
- agno/models/litellm/chat.py +468 -0
- agno/models/litellm/litellm_openai.py +25 -0
- agno/models/llama_cpp/__init__.py +5 -0
- agno/models/llama_cpp/llama_cpp.py +22 -0
- agno/models/lmstudio/__init__.py +5 -0
- agno/models/lmstudio/lmstudio.py +25 -0
- agno/models/message.py +434 -0
- agno/models/meta/__init__.py +12 -0
- agno/models/meta/llama.py +475 -0
- agno/models/meta/llama_openai.py +78 -0
- agno/models/metrics.py +120 -0
- agno/models/mistral/__init__.py +5 -0
- agno/models/mistral/mistral.py +432 -0
- agno/models/nebius/__init__.py +3 -0
- agno/models/nebius/nebius.py +54 -0
- agno/models/nexus/__init__.py +3 -0
- agno/models/nexus/nexus.py +22 -0
- agno/models/nvidia/__init__.py +5 -0
- agno/models/nvidia/nvidia.py +28 -0
- agno/models/ollama/__init__.py +5 -0
- agno/models/ollama/chat.py +441 -0
- agno/models/openai/__init__.py +9 -0
- agno/models/openai/chat.py +883 -0
- agno/models/openai/like.py +27 -0
- agno/models/openai/responses.py +1050 -0
- agno/models/openrouter/__init__.py +5 -0
- agno/models/openrouter/openrouter.py +66 -0
- agno/models/perplexity/__init__.py +5 -0
- agno/models/perplexity/perplexity.py +187 -0
- agno/models/portkey/__init__.py +3 -0
- agno/models/portkey/portkey.py +81 -0
- agno/models/requesty/__init__.py +5 -0
- agno/models/requesty/requesty.py +52 -0
- agno/models/response.py +199 -0
- agno/models/sambanova/__init__.py +5 -0
- agno/models/sambanova/sambanova.py +28 -0
- agno/models/siliconflow/__init__.py +5 -0
- agno/models/siliconflow/siliconflow.py +25 -0
- agno/models/together/__init__.py +5 -0
- agno/models/together/together.py +25 -0
- agno/models/utils.py +266 -0
- agno/models/vercel/__init__.py +3 -0
- agno/models/vercel/v0.py +26 -0
- agno/models/vertexai/__init__.py +0 -0
- agno/models/vertexai/claude.py +70 -0
- agno/models/vllm/__init__.py +3 -0
- agno/models/vllm/vllm.py +78 -0
- agno/models/xai/__init__.py +3 -0
- agno/models/xai/xai.py +113 -0
- agno/os/__init__.py +3 -0
- agno/os/app.py +876 -0
- agno/os/auth.py +57 -0
- agno/os/config.py +104 -0
- agno/os/interfaces/__init__.py +1 -0
- agno/os/interfaces/a2a/__init__.py +3 -0
- agno/os/interfaces/a2a/a2a.py +42 -0
- agno/os/interfaces/a2a/router.py +250 -0
- agno/os/interfaces/a2a/utils.py +924 -0
- agno/os/interfaces/agui/__init__.py +3 -0
- agno/os/interfaces/agui/agui.py +47 -0
- agno/os/interfaces/agui/router.py +144 -0
- agno/os/interfaces/agui/utils.py +534 -0
- agno/os/interfaces/base.py +25 -0
- agno/os/interfaces/slack/__init__.py +3 -0
- agno/os/interfaces/slack/router.py +148 -0
- agno/os/interfaces/slack/security.py +30 -0
- agno/os/interfaces/slack/slack.py +47 -0
- agno/os/interfaces/whatsapp/__init__.py +3 -0
- agno/os/interfaces/whatsapp/router.py +211 -0
- agno/os/interfaces/whatsapp/security.py +53 -0
- agno/os/interfaces/whatsapp/whatsapp.py +36 -0
- agno/os/mcp.py +292 -0
- agno/os/middleware/__init__.py +7 -0
- agno/os/middleware/jwt.py +233 -0
- agno/os/router.py +1763 -0
- agno/os/routers/__init__.py +3 -0
- agno/os/routers/evals/__init__.py +3 -0
- agno/os/routers/evals/evals.py +430 -0
- agno/os/routers/evals/schemas.py +142 -0
- agno/os/routers/evals/utils.py +162 -0
- agno/os/routers/health.py +31 -0
- agno/os/routers/home.py +52 -0
- agno/os/routers/knowledge/__init__.py +3 -0
- agno/os/routers/knowledge/knowledge.py +997 -0
- agno/os/routers/knowledge/schemas.py +178 -0
- agno/os/routers/memory/__init__.py +3 -0
- agno/os/routers/memory/memory.py +515 -0
- agno/os/routers/memory/schemas.py +62 -0
- agno/os/routers/metrics/__init__.py +3 -0
- agno/os/routers/metrics/metrics.py +190 -0
- agno/os/routers/metrics/schemas.py +47 -0
- agno/os/routers/session/__init__.py +3 -0
- agno/os/routers/session/session.py +997 -0
- agno/os/schema.py +1055 -0
- agno/os/settings.py +43 -0
- agno/os/utils.py +630 -0
- agno/py.typed +0 -0
- agno/reasoning/__init__.py +0 -0
- agno/reasoning/anthropic.py +80 -0
- agno/reasoning/azure_ai_foundry.py +67 -0
- agno/reasoning/deepseek.py +63 -0
- agno/reasoning/default.py +97 -0
- agno/reasoning/gemini.py +73 -0
- agno/reasoning/groq.py +71 -0
- agno/reasoning/helpers.py +63 -0
- agno/reasoning/ollama.py +67 -0
- agno/reasoning/openai.py +86 -0
- agno/reasoning/step.py +31 -0
- agno/reasoning/vertexai.py +76 -0
- agno/run/__init__.py +6 -0
- agno/run/agent.py +787 -0
- agno/run/base.py +229 -0
- agno/run/cancel.py +81 -0
- agno/run/messages.py +32 -0
- agno/run/team.py +753 -0
- agno/run/workflow.py +708 -0
- agno/session/__init__.py +10 -0
- agno/session/agent.py +295 -0
- agno/session/summary.py +265 -0
- agno/session/team.py +392 -0
- agno/session/workflow.py +205 -0
- agno/team/__init__.py +37 -0
- agno/team/team.py +8793 -0
- agno/tools/__init__.py +10 -0
- agno/tools/agentql.py +120 -0
- agno/tools/airflow.py +69 -0
- agno/tools/api.py +122 -0
- agno/tools/apify.py +314 -0
- agno/tools/arxiv.py +127 -0
- agno/tools/aws_lambda.py +53 -0
- agno/tools/aws_ses.py +66 -0
- agno/tools/baidusearch.py +89 -0
- agno/tools/bitbucket.py +292 -0
- agno/tools/brandfetch.py +213 -0
- agno/tools/bravesearch.py +106 -0
- agno/tools/brightdata.py +367 -0
- agno/tools/browserbase.py +209 -0
- agno/tools/calcom.py +255 -0
- agno/tools/calculator.py +151 -0
- agno/tools/cartesia.py +187 -0
- agno/tools/clickup.py +244 -0
- agno/tools/confluence.py +240 -0
- agno/tools/crawl4ai.py +158 -0
- agno/tools/csv_toolkit.py +185 -0
- agno/tools/dalle.py +110 -0
- agno/tools/daytona.py +475 -0
- agno/tools/decorator.py +262 -0
- agno/tools/desi_vocal.py +108 -0
- agno/tools/discord.py +161 -0
- agno/tools/docker.py +716 -0
- agno/tools/duckdb.py +379 -0
- agno/tools/duckduckgo.py +91 -0
- agno/tools/e2b.py +703 -0
- agno/tools/eleven_labs.py +196 -0
- agno/tools/email.py +67 -0
- agno/tools/evm.py +129 -0
- agno/tools/exa.py +396 -0
- agno/tools/fal.py +127 -0
- agno/tools/file.py +240 -0
- agno/tools/file_generation.py +350 -0
- agno/tools/financial_datasets.py +288 -0
- agno/tools/firecrawl.py +143 -0
- agno/tools/function.py +1187 -0
- agno/tools/giphy.py +93 -0
- agno/tools/github.py +1760 -0
- agno/tools/gmail.py +922 -0
- agno/tools/google_bigquery.py +117 -0
- agno/tools/google_drive.py +270 -0
- agno/tools/google_maps.py +253 -0
- agno/tools/googlecalendar.py +674 -0
- agno/tools/googlesearch.py +98 -0
- agno/tools/googlesheets.py +377 -0
- agno/tools/hackernews.py +77 -0
- agno/tools/jina.py +101 -0
- agno/tools/jira.py +170 -0
- agno/tools/knowledge.py +218 -0
- agno/tools/linear.py +426 -0
- agno/tools/linkup.py +58 -0
- agno/tools/local_file_system.py +90 -0
- agno/tools/lumalab.py +183 -0
- agno/tools/mcp/__init__.py +10 -0
- agno/tools/mcp/mcp.py +331 -0
- agno/tools/mcp/multi_mcp.py +347 -0
- agno/tools/mcp/params.py +24 -0
- agno/tools/mcp_toolbox.py +284 -0
- agno/tools/mem0.py +193 -0
- agno/tools/memori.py +339 -0
- agno/tools/memory.py +419 -0
- agno/tools/mlx_transcribe.py +139 -0
- agno/tools/models/__init__.py +0 -0
- agno/tools/models/azure_openai.py +190 -0
- agno/tools/models/gemini.py +203 -0
- agno/tools/models/groq.py +158 -0
- agno/tools/models/morph.py +186 -0
- agno/tools/models/nebius.py +124 -0
- agno/tools/models_labs.py +195 -0
- agno/tools/moviepy_video.py +349 -0
- agno/tools/neo4j.py +134 -0
- agno/tools/newspaper.py +46 -0
- agno/tools/newspaper4k.py +93 -0
- agno/tools/notion.py +204 -0
- agno/tools/openai.py +202 -0
- agno/tools/openbb.py +160 -0
- agno/tools/opencv.py +321 -0
- agno/tools/openweather.py +233 -0
- agno/tools/oxylabs.py +385 -0
- agno/tools/pandas.py +102 -0
- agno/tools/parallel.py +314 -0
- agno/tools/postgres.py +257 -0
- agno/tools/pubmed.py +188 -0
- agno/tools/python.py +205 -0
- agno/tools/reasoning.py +283 -0
- agno/tools/reddit.py +467 -0
- agno/tools/replicate.py +117 -0
- agno/tools/resend.py +62 -0
- agno/tools/scrapegraph.py +222 -0
- agno/tools/searxng.py +152 -0
- agno/tools/serpapi.py +116 -0
- agno/tools/serper.py +255 -0
- agno/tools/shell.py +53 -0
- agno/tools/slack.py +136 -0
- agno/tools/sleep.py +20 -0
- agno/tools/spider.py +116 -0
- agno/tools/sql.py +154 -0
- agno/tools/streamlit/__init__.py +0 -0
- agno/tools/streamlit/components.py +113 -0
- agno/tools/tavily.py +254 -0
- agno/tools/telegram.py +48 -0
- agno/tools/todoist.py +218 -0
- agno/tools/tool_registry.py +1 -0
- agno/tools/toolkit.py +146 -0
- agno/tools/trafilatura.py +388 -0
- agno/tools/trello.py +274 -0
- agno/tools/twilio.py +186 -0
- agno/tools/user_control_flow.py +78 -0
- agno/tools/valyu.py +228 -0
- agno/tools/visualization.py +467 -0
- agno/tools/webbrowser.py +28 -0
- agno/tools/webex.py +76 -0
- agno/tools/website.py +54 -0
- agno/tools/webtools.py +45 -0
- agno/tools/whatsapp.py +286 -0
- agno/tools/wikipedia.py +63 -0
- agno/tools/workflow.py +278 -0
- agno/tools/x.py +335 -0
- agno/tools/yfinance.py +257 -0
- agno/tools/youtube.py +184 -0
- agno/tools/zendesk.py +82 -0
- agno/tools/zep.py +454 -0
- agno/tools/zoom.py +382 -0
- agno/utils/__init__.py +0 -0
- agno/utils/agent.py +820 -0
- agno/utils/audio.py +49 -0
- agno/utils/certs.py +27 -0
- agno/utils/code_execution.py +11 -0
- agno/utils/common.py +132 -0
- agno/utils/dttm.py +13 -0
- agno/utils/enum.py +22 -0
- agno/utils/env.py +11 -0
- agno/utils/events.py +696 -0
- agno/utils/format_str.py +16 -0
- agno/utils/functions.py +166 -0
- agno/utils/gemini.py +426 -0
- agno/utils/hooks.py +57 -0
- agno/utils/http.py +74 -0
- agno/utils/json_schema.py +234 -0
- agno/utils/knowledge.py +36 -0
- agno/utils/location.py +19 -0
- agno/utils/log.py +255 -0
- agno/utils/mcp.py +214 -0
- agno/utils/media.py +352 -0
- agno/utils/merge_dict.py +41 -0
- agno/utils/message.py +118 -0
- agno/utils/models/__init__.py +0 -0
- agno/utils/models/ai_foundry.py +43 -0
- agno/utils/models/claude.py +358 -0
- agno/utils/models/cohere.py +87 -0
- agno/utils/models/llama.py +78 -0
- agno/utils/models/mistral.py +98 -0
- agno/utils/models/openai_responses.py +140 -0
- agno/utils/models/schema_utils.py +153 -0
- agno/utils/models/watsonx.py +41 -0
- agno/utils/openai.py +257 -0
- agno/utils/pickle.py +32 -0
- agno/utils/pprint.py +178 -0
- agno/utils/print_response/__init__.py +0 -0
- agno/utils/print_response/agent.py +842 -0
- agno/utils/print_response/team.py +1724 -0
- agno/utils/print_response/workflow.py +1668 -0
- agno/utils/prompts.py +111 -0
- agno/utils/reasoning.py +108 -0
- agno/utils/response.py +163 -0
- agno/utils/response_iterator.py +17 -0
- agno/utils/safe_formatter.py +24 -0
- agno/utils/serialize.py +32 -0
- agno/utils/shell.py +22 -0
- agno/utils/streamlit.py +487 -0
- agno/utils/string.py +231 -0
- agno/utils/team.py +139 -0
- agno/utils/timer.py +41 -0
- agno/utils/tools.py +102 -0
- agno/utils/web.py +23 -0
- agno/utils/whatsapp.py +305 -0
- agno/utils/yaml_io.py +25 -0
- agno/vectordb/__init__.py +3 -0
- agno/vectordb/base.py +127 -0
- agno/vectordb/cassandra/__init__.py +5 -0
- agno/vectordb/cassandra/cassandra.py +501 -0
- agno/vectordb/cassandra/extra_param_mixin.py +11 -0
- agno/vectordb/cassandra/index.py +13 -0
- agno/vectordb/chroma/__init__.py +5 -0
- agno/vectordb/chroma/chromadb.py +929 -0
- agno/vectordb/clickhouse/__init__.py +9 -0
- agno/vectordb/clickhouse/clickhousedb.py +835 -0
- agno/vectordb/clickhouse/index.py +9 -0
- agno/vectordb/couchbase/__init__.py +3 -0
- agno/vectordb/couchbase/couchbase.py +1442 -0
- agno/vectordb/distance.py +7 -0
- agno/vectordb/lancedb/__init__.py +6 -0
- agno/vectordb/lancedb/lance_db.py +995 -0
- agno/vectordb/langchaindb/__init__.py +5 -0
- agno/vectordb/langchaindb/langchaindb.py +163 -0
- agno/vectordb/lightrag/__init__.py +5 -0
- agno/vectordb/lightrag/lightrag.py +388 -0
- agno/vectordb/llamaindex/__init__.py +3 -0
- agno/vectordb/llamaindex/llamaindexdb.py +166 -0
- agno/vectordb/milvus/__init__.py +4 -0
- agno/vectordb/milvus/milvus.py +1182 -0
- agno/vectordb/mongodb/__init__.py +9 -0
- agno/vectordb/mongodb/mongodb.py +1417 -0
- agno/vectordb/pgvector/__init__.py +12 -0
- agno/vectordb/pgvector/index.py +23 -0
- agno/vectordb/pgvector/pgvector.py +1462 -0
- agno/vectordb/pineconedb/__init__.py +5 -0
- agno/vectordb/pineconedb/pineconedb.py +747 -0
- agno/vectordb/qdrant/__init__.py +5 -0
- agno/vectordb/qdrant/qdrant.py +1134 -0
- agno/vectordb/redis/__init__.py +9 -0
- agno/vectordb/redis/redisdb.py +694 -0
- agno/vectordb/search.py +7 -0
- agno/vectordb/singlestore/__init__.py +10 -0
- agno/vectordb/singlestore/index.py +41 -0
- agno/vectordb/singlestore/singlestore.py +763 -0
- agno/vectordb/surrealdb/__init__.py +3 -0
- agno/vectordb/surrealdb/surrealdb.py +699 -0
- agno/vectordb/upstashdb/__init__.py +5 -0
- agno/vectordb/upstashdb/upstashdb.py +718 -0
- agno/vectordb/weaviate/__init__.py +8 -0
- agno/vectordb/weaviate/index.py +15 -0
- agno/vectordb/weaviate/weaviate.py +1005 -0
- agno/workflow/__init__.py +23 -0
- agno/workflow/agent.py +299 -0
- agno/workflow/condition.py +738 -0
- agno/workflow/loop.py +735 -0
- agno/workflow/parallel.py +824 -0
- agno/workflow/router.py +702 -0
- agno/workflow/step.py +1432 -0
- agno/workflow/steps.py +592 -0
- agno/workflow/types.py +520 -0
- agno/workflow/workflow.py +4321 -0
- agno-2.2.13.dist-info/METADATA +614 -0
- agno-2.2.13.dist-info/RECORD +575 -0
- agno-2.2.13.dist-info/WHEEL +5 -0
- agno-2.2.13.dist-info/licenses/LICENSE +201 -0
- agno-2.2.13.dist-info/top_level.txt +1 -0
agno/eval/accuracy.py
ADDED
|
@@ -0,0 +1,834 @@
|
|
|
1
|
+
from dataclasses import asdict, dataclass, field
|
|
2
|
+
from os import getenv
|
|
3
|
+
from textwrap import dedent
|
|
4
|
+
from typing import TYPE_CHECKING, Any, Callable, Dict, List, Optional, Union
|
|
5
|
+
from uuid import uuid4
|
|
6
|
+
|
|
7
|
+
from pydantic import BaseModel, Field
|
|
8
|
+
|
|
9
|
+
from agno.agent import Agent
|
|
10
|
+
from agno.db.base import AsyncBaseDb, BaseDb
|
|
11
|
+
from agno.db.schemas.evals import EvalType
|
|
12
|
+
from agno.eval.utils import async_log_eval, log_eval_run, store_result_in_file
|
|
13
|
+
from agno.exceptions import EvalError
|
|
14
|
+
from agno.models.base import Model
|
|
15
|
+
from agno.team.team import Team
|
|
16
|
+
from agno.utils.log import log_error, logger, set_log_level_to_debug, set_log_level_to_info
|
|
17
|
+
|
|
18
|
+
if TYPE_CHECKING:
|
|
19
|
+
from rich.console import Console
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
class AccuracyAgentResponse(BaseModel):
|
|
23
|
+
accuracy_score: int = Field(..., description="Accuracy Score between 1 and 10 assigned to the Agent's answer.")
|
|
24
|
+
accuracy_reason: str = Field(..., description="Detailed reasoning for the accuracy score.")
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
@dataclass
|
|
28
|
+
class AccuracyEvaluation:
|
|
29
|
+
input: str
|
|
30
|
+
output: str
|
|
31
|
+
expected_output: str
|
|
32
|
+
score: int
|
|
33
|
+
reason: str
|
|
34
|
+
|
|
35
|
+
def print_eval(self, console: Optional["Console"] = None):
|
|
36
|
+
from rich.box import ROUNDED
|
|
37
|
+
from rich.console import Console
|
|
38
|
+
from rich.markdown import Markdown
|
|
39
|
+
from rich.table import Table
|
|
40
|
+
|
|
41
|
+
if console is None:
|
|
42
|
+
console = Console()
|
|
43
|
+
|
|
44
|
+
results_table = Table(
|
|
45
|
+
box=ROUNDED,
|
|
46
|
+
border_style="blue",
|
|
47
|
+
show_header=False,
|
|
48
|
+
title="[ Evaluation Result ]",
|
|
49
|
+
title_style="bold sky_blue1",
|
|
50
|
+
title_justify="center",
|
|
51
|
+
)
|
|
52
|
+
results_table.add_row("Input", self.input)
|
|
53
|
+
results_table.add_row("Output", self.output)
|
|
54
|
+
results_table.add_row("Expected Output", self.expected_output)
|
|
55
|
+
results_table.add_row("Accuracy Score", f"{str(self.score)}/10")
|
|
56
|
+
results_table.add_row("Accuracy Reason", Markdown(self.reason))
|
|
57
|
+
console.print(results_table)
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
@dataclass
|
|
61
|
+
class AccuracyResult:
|
|
62
|
+
results: List[AccuracyEvaluation] = field(default_factory=list)
|
|
63
|
+
avg_score: float = field(init=False)
|
|
64
|
+
mean_score: float = field(init=False)
|
|
65
|
+
min_score: float = field(init=False)
|
|
66
|
+
max_score: float = field(init=False)
|
|
67
|
+
std_dev_score: float = field(init=False)
|
|
68
|
+
|
|
69
|
+
def __post_init__(self):
|
|
70
|
+
self.compute_stats()
|
|
71
|
+
|
|
72
|
+
def compute_stats(self):
|
|
73
|
+
import statistics
|
|
74
|
+
|
|
75
|
+
if self.results and len(self.results) > 0:
|
|
76
|
+
_results = [r.score for r in self.results]
|
|
77
|
+
self.avg_score = statistics.mean(_results)
|
|
78
|
+
self.mean_score = statistics.mean(_results)
|
|
79
|
+
self.min_score = min(_results)
|
|
80
|
+
self.max_score = max(_results)
|
|
81
|
+
self.std_dev_score = statistics.stdev(_results) if len(_results) > 1 else 0
|
|
82
|
+
|
|
83
|
+
def print_summary(self, console: Optional["Console"] = None):
|
|
84
|
+
from rich.box import ROUNDED
|
|
85
|
+
from rich.console import Console
|
|
86
|
+
from rich.table import Table
|
|
87
|
+
|
|
88
|
+
if console is None:
|
|
89
|
+
console = Console()
|
|
90
|
+
|
|
91
|
+
summary_table = Table(
|
|
92
|
+
box=ROUNDED,
|
|
93
|
+
border_style="blue",
|
|
94
|
+
show_header=False,
|
|
95
|
+
title="[ Evaluation Summary ]",
|
|
96
|
+
title_style="bold sky_blue1",
|
|
97
|
+
title_justify="center",
|
|
98
|
+
)
|
|
99
|
+
summary_table.add_row("Number of Runs", f"{len(self.results)}")
|
|
100
|
+
|
|
101
|
+
if self.avg_score is not None:
|
|
102
|
+
summary_table.add_row("Average Score", f"{self.avg_score:.2f}")
|
|
103
|
+
if self.mean_score is not None:
|
|
104
|
+
summary_table.add_row("Mean Score", f"{self.mean_score:.2f}")
|
|
105
|
+
if self.min_score is not None:
|
|
106
|
+
summary_table.add_row("Minimum Score", f"{self.min_score:.2f}")
|
|
107
|
+
if self.max_score is not None:
|
|
108
|
+
summary_table.add_row("Maximum Score", f"{self.max_score:.2f}")
|
|
109
|
+
if self.std_dev_score is not None:
|
|
110
|
+
summary_table.add_row("Standard Deviation", f"{self.std_dev_score:.2f}")
|
|
111
|
+
|
|
112
|
+
console.print(summary_table)
|
|
113
|
+
|
|
114
|
+
def print_results(self, console: Optional["Console"] = None):
|
|
115
|
+
from rich.box import ROUNDED
|
|
116
|
+
from rich.console import Console
|
|
117
|
+
from rich.table import Table
|
|
118
|
+
|
|
119
|
+
if console is None:
|
|
120
|
+
console = Console()
|
|
121
|
+
|
|
122
|
+
results_table = Table(
|
|
123
|
+
box=ROUNDED,
|
|
124
|
+
border_style="blue",
|
|
125
|
+
show_header=False,
|
|
126
|
+
title="[ Evaluation Result ]",
|
|
127
|
+
title_style="bold sky_blue1",
|
|
128
|
+
title_justify="center",
|
|
129
|
+
)
|
|
130
|
+
for result in self.results:
|
|
131
|
+
results_table.add_row("Input", result.input)
|
|
132
|
+
results_table.add_row("Output", result.output)
|
|
133
|
+
results_table.add_row("Expected Output", result.expected_output)
|
|
134
|
+
results_table.add_row("Accuracy Score", f"{str(result.score)}/10")
|
|
135
|
+
results_table.add_row("Accuracy Reason", result.reason)
|
|
136
|
+
console.print(results_table)
|
|
137
|
+
|
|
138
|
+
|
|
139
|
+
@dataclass
|
|
140
|
+
class AccuracyEval:
|
|
141
|
+
"""Interface to evaluate the accuracy of an Agent or Team, given a prompt and expected answer"""
|
|
142
|
+
|
|
143
|
+
# Input to evaluate
|
|
144
|
+
input: Union[str, Callable]
|
|
145
|
+
# Expected answer to the input
|
|
146
|
+
expected_output: Union[str, Callable]
|
|
147
|
+
# Agent to evaluate
|
|
148
|
+
agent: Optional[Agent] = None
|
|
149
|
+
# Team to evaluate
|
|
150
|
+
team: Optional[Team] = None
|
|
151
|
+
|
|
152
|
+
# Evaluation name
|
|
153
|
+
name: Optional[str] = None
|
|
154
|
+
# Evaluation UUID
|
|
155
|
+
eval_id: str = field(default_factory=lambda: str(uuid4()))
|
|
156
|
+
# Number of iterations to run
|
|
157
|
+
num_iterations: int = 1
|
|
158
|
+
# Result of the evaluation
|
|
159
|
+
result: Optional[AccuracyResult] = None
|
|
160
|
+
|
|
161
|
+
# Model for the evaluator agent
|
|
162
|
+
model: Optional[Model] = None
|
|
163
|
+
# Agent used to evaluate the answer
|
|
164
|
+
evaluator_agent: Optional[Agent] = None
|
|
165
|
+
# Guidelines for the evaluator agent
|
|
166
|
+
additional_guidelines: Optional[Union[str, List[str]]] = None
|
|
167
|
+
# Additional context to the evaluator agent
|
|
168
|
+
additional_context: Optional[str] = None
|
|
169
|
+
|
|
170
|
+
# Print summary of results
|
|
171
|
+
print_summary: bool = False
|
|
172
|
+
# Print detailed results
|
|
173
|
+
print_results: bool = False
|
|
174
|
+
# If set, results will be saved in the given file path
|
|
175
|
+
file_path_to_save_results: Optional[str] = None
|
|
176
|
+
# Enable debug logs
|
|
177
|
+
debug_mode: bool = getenv("AGNO_DEBUG", "false").lower() == "true"
|
|
178
|
+
# The database to store Evaluation results
|
|
179
|
+
db: Optional[Union[BaseDb, AsyncBaseDb]] = None
|
|
180
|
+
|
|
181
|
+
# Telemetry settings
|
|
182
|
+
# telemetry=True logs minimal telemetry for analytics
|
|
183
|
+
# This helps us improve our Evals and provide better support
|
|
184
|
+
telemetry: bool = True
|
|
185
|
+
|
|
186
|
+
def get_evaluator_agent(self) -> Agent:
|
|
187
|
+
"""Return the evaluator agent. If not provided, build it based on the evaluator fields and default instructions."""
|
|
188
|
+
if self.evaluator_agent is not None:
|
|
189
|
+
return self.evaluator_agent
|
|
190
|
+
|
|
191
|
+
model = self.model
|
|
192
|
+
if model is None:
|
|
193
|
+
try:
|
|
194
|
+
from agno.models.openai import OpenAIChat
|
|
195
|
+
|
|
196
|
+
model = OpenAIChat(id="o4-mini")
|
|
197
|
+
except (ModuleNotFoundError, ImportError) as e:
|
|
198
|
+
logger.exception(e)
|
|
199
|
+
raise EvalError(
|
|
200
|
+
"Agno uses `openai` as the default model provider. Please run `pip install openai` to use the default evaluator."
|
|
201
|
+
)
|
|
202
|
+
|
|
203
|
+
additional_guidelines = ""
|
|
204
|
+
if self.additional_guidelines is not None:
|
|
205
|
+
additional_guidelines = "\n## Additional Guidelines\n"
|
|
206
|
+
if isinstance(self.additional_guidelines, str):
|
|
207
|
+
additional_guidelines += self.additional_guidelines
|
|
208
|
+
else:
|
|
209
|
+
additional_guidelines += "\n- ".join(self.additional_guidelines)
|
|
210
|
+
additional_guidelines += "\n"
|
|
211
|
+
|
|
212
|
+
additional_context = ""
|
|
213
|
+
if self.additional_context is not None and len(self.additional_context) > 0:
|
|
214
|
+
additional_context = "\n## Additional Context\n"
|
|
215
|
+
additional_context += self.additional_context
|
|
216
|
+
additional_context += "\n"
|
|
217
|
+
|
|
218
|
+
return Agent(
|
|
219
|
+
model=model,
|
|
220
|
+
description=f"""\
|
|
221
|
+
You are an expert judge tasked with comparing the quality of an AI Agent’s output to a user-provided expected output. You must assume the expected_output is correct - even if you personally disagree.
|
|
222
|
+
|
|
223
|
+
## Evaluation Inputs
|
|
224
|
+
- agent_input: The original task or query given to the Agent.
|
|
225
|
+
- expected_output: The correct response to the task (provided by the user).
|
|
226
|
+
- NOTE: You must assume the expected_output is correct - even if you personally disagree.
|
|
227
|
+
- agent_output: The response generated by the Agent.
|
|
228
|
+
|
|
229
|
+
## Evaluation Criteria
|
|
230
|
+
- Accuracy: How closely does the agent_output match the expected_output?
|
|
231
|
+
- Completeness: Does the agent_output include all the key elements of the expected_output?
|
|
232
|
+
|
|
233
|
+
## Instructions
|
|
234
|
+
1. Compare the agent_output only to the expected_output, not what you think the expected_output should be.
|
|
235
|
+
2. Do not judge the correctness of the expected_output itself. Your role is only to compare the two outputs, the user provided expected_output is correct.
|
|
236
|
+
3. Follow the additional guidelines if provided.
|
|
237
|
+
4. Provide a detailed analysis including:
|
|
238
|
+
- Specific similarities and differences
|
|
239
|
+
- Important points included or omitted
|
|
240
|
+
- Any inaccuracies, paraphrasing errors, or structural differences
|
|
241
|
+
5. Reference the criteria explicitly in your reasoning.
|
|
242
|
+
6. Assign a score from 1 to 10 (whole numbers only):
|
|
243
|
+
1-2: Completely incorrect or irrelevant.
|
|
244
|
+
3-4: Major inaccuracies or missing key information.
|
|
245
|
+
5-6: Partially correct, but with significant issues.
|
|
246
|
+
7-8: Mostly accurate and complete, with minor issues
|
|
247
|
+
9-10: Highly accurate and complete, matching the expected answer and given guidelines closely.
|
|
248
|
+
{additional_guidelines}{additional_context}
|
|
249
|
+
Remember: You must only compare the agent_output to the expected_output. The expected_output is correct as it was provided by the user.
|
|
250
|
+
""",
|
|
251
|
+
output_schema=AccuracyAgentResponse,
|
|
252
|
+
structured_outputs=True,
|
|
253
|
+
)
|
|
254
|
+
|
|
255
|
+
def get_eval_expected_output(self) -> str:
|
|
256
|
+
"""Return the eval expected answer. If it is a callable, call it and return the resulting string"""
|
|
257
|
+
if callable(self.expected_output):
|
|
258
|
+
_output = self.expected_output()
|
|
259
|
+
if isinstance(_output, str):
|
|
260
|
+
return _output
|
|
261
|
+
else:
|
|
262
|
+
raise EvalError(f"The expected output needs to be or return a string, but it returned: {type(_output)}")
|
|
263
|
+
return self.expected_output
|
|
264
|
+
|
|
265
|
+
def get_eval_input(self) -> str:
|
|
266
|
+
"""Return the evaluation input. If it is a callable, call it and return the resulting string"""
|
|
267
|
+
if callable(self.input):
|
|
268
|
+
_input = self.input()
|
|
269
|
+
if isinstance(_input, str):
|
|
270
|
+
return _input
|
|
271
|
+
else:
|
|
272
|
+
raise EvalError(f"The eval input needs to be or return a string, but it returned: {type(_input)}")
|
|
273
|
+
return self.input
|
|
274
|
+
|
|
275
|
+
def evaluate_answer(
|
|
276
|
+
self,
|
|
277
|
+
input: str,
|
|
278
|
+
evaluator_agent: Agent,
|
|
279
|
+
evaluation_input: str,
|
|
280
|
+
evaluator_expected_output: str,
|
|
281
|
+
agent_output: str,
|
|
282
|
+
) -> Optional[AccuracyEvaluation]:
|
|
283
|
+
"""Orchestrate the evaluation process."""
|
|
284
|
+
try:
|
|
285
|
+
accuracy_agent_response = evaluator_agent.run(evaluation_input).content
|
|
286
|
+
if accuracy_agent_response is None or not isinstance(accuracy_agent_response, AccuracyAgentResponse):
|
|
287
|
+
raise EvalError(f"Evaluator Agent returned an invalid response: {accuracy_agent_response}")
|
|
288
|
+
return AccuracyEvaluation(
|
|
289
|
+
input=input,
|
|
290
|
+
output=agent_output,
|
|
291
|
+
expected_output=evaluator_expected_output,
|
|
292
|
+
score=accuracy_agent_response.accuracy_score,
|
|
293
|
+
reason=accuracy_agent_response.accuracy_reason,
|
|
294
|
+
)
|
|
295
|
+
except Exception as e:
|
|
296
|
+
logger.exception(f"Failed to evaluate accuracy: {e}")
|
|
297
|
+
return None
|
|
298
|
+
|
|
299
|
+
async def aevaluate_answer(
|
|
300
|
+
self,
|
|
301
|
+
input: str,
|
|
302
|
+
evaluator_agent: Agent,
|
|
303
|
+
evaluation_input: str,
|
|
304
|
+
evaluator_expected_output: str,
|
|
305
|
+
agent_output: str,
|
|
306
|
+
) -> Optional[AccuracyEvaluation]:
|
|
307
|
+
"""Orchestrate the evaluation process asynchronously."""
|
|
308
|
+
try:
|
|
309
|
+
response = await evaluator_agent.arun(evaluation_input)
|
|
310
|
+
accuracy_agent_response = response.content
|
|
311
|
+
if accuracy_agent_response is None or not isinstance(accuracy_agent_response, AccuracyAgentResponse):
|
|
312
|
+
raise EvalError(f"Evaluator Agent returned an invalid response: {accuracy_agent_response}")
|
|
313
|
+
return AccuracyEvaluation(
|
|
314
|
+
input=input,
|
|
315
|
+
output=agent_output,
|
|
316
|
+
expected_output=evaluator_expected_output,
|
|
317
|
+
score=accuracy_agent_response.accuracy_score,
|
|
318
|
+
reason=accuracy_agent_response.accuracy_reason,
|
|
319
|
+
)
|
|
320
|
+
except Exception as e:
|
|
321
|
+
logger.exception(f"Failed to evaluate accuracy asynchronously: {e}")
|
|
322
|
+
return None
|
|
323
|
+
|
|
324
|
+
def run(
|
|
325
|
+
self,
|
|
326
|
+
*,
|
|
327
|
+
print_summary: bool = True,
|
|
328
|
+
print_results: bool = True,
|
|
329
|
+
) -> Optional[AccuracyResult]:
|
|
330
|
+
if isinstance(self.db, AsyncBaseDb):
|
|
331
|
+
raise ValueError("run() is not supported with an async DB. Please use arun() instead.")
|
|
332
|
+
|
|
333
|
+
if self.agent is None and self.team is None:
|
|
334
|
+
logger.error("You need to provide one of 'agent' or 'team' to run the evaluation.")
|
|
335
|
+
return None
|
|
336
|
+
|
|
337
|
+
if self.agent is not None and self.team is not None:
|
|
338
|
+
logger.error("Provide only one of 'agent' or 'team' to run the evaluation.")
|
|
339
|
+
return None
|
|
340
|
+
|
|
341
|
+
from rich.console import Console
|
|
342
|
+
from rich.live import Live
|
|
343
|
+
from rich.status import Status
|
|
344
|
+
|
|
345
|
+
set_log_level_to_debug() if self.debug_mode else set_log_level_to_info()
|
|
346
|
+
|
|
347
|
+
self.result = AccuracyResult()
|
|
348
|
+
|
|
349
|
+
logger.debug(f"************ Evaluation Start: {self.eval_id} ************")
|
|
350
|
+
|
|
351
|
+
# Add a spinner while running the evaluations
|
|
352
|
+
console = Console()
|
|
353
|
+
with Live(console=console, transient=True) as live_log:
|
|
354
|
+
evaluator_agent = self.get_evaluator_agent()
|
|
355
|
+
eval_input = self.get_eval_input()
|
|
356
|
+
eval_expected_output = self.get_eval_expected_output()
|
|
357
|
+
|
|
358
|
+
for i in range(self.num_iterations):
|
|
359
|
+
status = Status(f"Running evaluation {i + 1}...", spinner="dots", speed=1.0, refresh_per_second=10)
|
|
360
|
+
live_log.update(status)
|
|
361
|
+
|
|
362
|
+
if self.agent is not None:
|
|
363
|
+
output = self.agent.run(input=eval_input).content
|
|
364
|
+
elif self.team is not None:
|
|
365
|
+
output = self.team.run(input=eval_input).content
|
|
366
|
+
|
|
367
|
+
if not output:
|
|
368
|
+
logger.error(f"Failed to generate a valid answer on iteration {i + 1}: {output}")
|
|
369
|
+
continue
|
|
370
|
+
|
|
371
|
+
evaluation_input = dedent(f"""\
|
|
372
|
+
<agent_input>
|
|
373
|
+
{eval_input}
|
|
374
|
+
</agent_input>
|
|
375
|
+
|
|
376
|
+
<expected_output>
|
|
377
|
+
{eval_expected_output}
|
|
378
|
+
</expected_output>
|
|
379
|
+
|
|
380
|
+
<agent_output>
|
|
381
|
+
{output}
|
|
382
|
+
</agent_output>\
|
|
383
|
+
""")
|
|
384
|
+
logger.debug(f"Agent output #{i + 1}: {output}")
|
|
385
|
+
result = self.evaluate_answer(
|
|
386
|
+
input=eval_input,
|
|
387
|
+
evaluator_agent=evaluator_agent,
|
|
388
|
+
evaluation_input=evaluation_input,
|
|
389
|
+
evaluator_expected_output=eval_expected_output,
|
|
390
|
+
agent_output=output,
|
|
391
|
+
)
|
|
392
|
+
if result is None:
|
|
393
|
+
logger.error(f"Failed to evaluate accuracy on iteration {i + 1}")
|
|
394
|
+
continue
|
|
395
|
+
|
|
396
|
+
self.result.results.append(result)
|
|
397
|
+
self.result.compute_stats()
|
|
398
|
+
status.update(f"Eval iteration {i + 1} finished")
|
|
399
|
+
|
|
400
|
+
status.stop()
|
|
401
|
+
|
|
402
|
+
# Save result to file if requested
|
|
403
|
+
if self.file_path_to_save_results is not None and self.result is not None:
|
|
404
|
+
store_result_in_file(
|
|
405
|
+
file_path=self.file_path_to_save_results,
|
|
406
|
+
name=self.name,
|
|
407
|
+
eval_id=self.eval_id,
|
|
408
|
+
result=self.result,
|
|
409
|
+
)
|
|
410
|
+
|
|
411
|
+
# Print results if requested
|
|
412
|
+
if self.print_results or print_results:
|
|
413
|
+
self.result.print_results(console)
|
|
414
|
+
if self.print_summary or print_summary:
|
|
415
|
+
self.result.print_summary(console)
|
|
416
|
+
|
|
417
|
+
# Log results to the Agno DB if requested
|
|
418
|
+
if self.agent is not None:
|
|
419
|
+
agent_id = self.agent.id
|
|
420
|
+
team_id = None
|
|
421
|
+
model_id = self.agent.model.id if self.agent.model is not None else None
|
|
422
|
+
model_provider = self.agent.model.provider if self.agent.model is not None else None
|
|
423
|
+
evaluated_component_name = self.agent.name
|
|
424
|
+
elif self.team is not None:
|
|
425
|
+
agent_id = None
|
|
426
|
+
team_id = self.team.id
|
|
427
|
+
model_id = self.team.model.id if self.team.model is not None else None
|
|
428
|
+
model_provider = self.team.model.provider if self.team.model is not None else None
|
|
429
|
+
evaluated_component_name = self.team.name
|
|
430
|
+
|
|
431
|
+
if self.db:
|
|
432
|
+
log_eval_input = {
|
|
433
|
+
"additional_guidelines": self.additional_guidelines,
|
|
434
|
+
"additional_context": self.additional_context,
|
|
435
|
+
"num_iterations": self.num_iterations,
|
|
436
|
+
"expected_output": self.expected_output,
|
|
437
|
+
"input": self.input,
|
|
438
|
+
}
|
|
439
|
+
|
|
440
|
+
log_eval_run(
|
|
441
|
+
db=self.db,
|
|
442
|
+
run_id=self.eval_id, # type: ignore
|
|
443
|
+
run_data=asdict(self.result),
|
|
444
|
+
eval_type=EvalType.ACCURACY,
|
|
445
|
+
agent_id=agent_id,
|
|
446
|
+
team_id=team_id,
|
|
447
|
+
model_id=model_id,
|
|
448
|
+
model_provider=model_provider,
|
|
449
|
+
name=self.name if self.name is not None else None,
|
|
450
|
+
evaluated_component_name=evaluated_component_name,
|
|
451
|
+
eval_input=log_eval_input,
|
|
452
|
+
)
|
|
453
|
+
|
|
454
|
+
if self.telemetry:
|
|
455
|
+
from agno.api.evals import EvalRunCreate, create_eval_run_telemetry
|
|
456
|
+
|
|
457
|
+
create_eval_run_telemetry(
|
|
458
|
+
eval_run=EvalRunCreate(
|
|
459
|
+
run_id=self.eval_id,
|
|
460
|
+
eval_type=EvalType.ACCURACY,
|
|
461
|
+
data=self._get_telemetry_data(),
|
|
462
|
+
),
|
|
463
|
+
)
|
|
464
|
+
|
|
465
|
+
logger.debug(f"*********** Evaluation {self.eval_id} Finished ***********")
|
|
466
|
+
return self.result
|
|
467
|
+
|
|
468
|
+
async def arun(
|
|
469
|
+
self,
|
|
470
|
+
*,
|
|
471
|
+
print_summary: bool = True,
|
|
472
|
+
print_results: bool = True,
|
|
473
|
+
) -> Optional[AccuracyResult]:
|
|
474
|
+
if self.agent is None and self.team is None:
|
|
475
|
+
logger.error("You need to provide one of 'agent' or 'team' to run the evaluation.")
|
|
476
|
+
return None
|
|
477
|
+
|
|
478
|
+
if self.agent is not None and self.team is not None:
|
|
479
|
+
logger.error("Provide only one of 'agent' or 'team' to run the evaluation.")
|
|
480
|
+
return None
|
|
481
|
+
|
|
482
|
+
from rich.console import Console
|
|
483
|
+
from rich.live import Live
|
|
484
|
+
from rich.status import Status
|
|
485
|
+
|
|
486
|
+
set_log_level_to_debug() if self.debug_mode else set_log_level_to_info()
|
|
487
|
+
|
|
488
|
+
self.result = AccuracyResult()
|
|
489
|
+
|
|
490
|
+
logger.debug(f"************ Evaluation Start: {self.eval_id} ************")
|
|
491
|
+
|
|
492
|
+
# Add a spinner while running the evaluations
|
|
493
|
+
console = Console()
|
|
494
|
+
with Live(console=console, transient=True) as live_log:
|
|
495
|
+
evaluator_agent = self.get_evaluator_agent()
|
|
496
|
+
eval_input = self.get_eval_input()
|
|
497
|
+
eval_expected_output = self.get_eval_expected_output()
|
|
498
|
+
|
|
499
|
+
for i in range(self.num_iterations):
|
|
500
|
+
status = Status(f"Running evaluation {i + 1}...", spinner="dots", speed=1.0, refresh_per_second=10)
|
|
501
|
+
live_log.update(status)
|
|
502
|
+
|
|
503
|
+
if self.agent is not None:
|
|
504
|
+
response = await self.agent.arun(input=eval_input)
|
|
505
|
+
output = response.content
|
|
506
|
+
elif self.team is not None:
|
|
507
|
+
response = await self.team.arun(input=eval_input) # type: ignore
|
|
508
|
+
output = response.content
|
|
509
|
+
|
|
510
|
+
if not output:
|
|
511
|
+
logger.error(f"Failed to generate a valid answer on iteration {i + 1}: {output}")
|
|
512
|
+
continue
|
|
513
|
+
|
|
514
|
+
evaluation_input = dedent(f"""\
|
|
515
|
+
<agent_input>
|
|
516
|
+
{eval_input}
|
|
517
|
+
</agent_input>
|
|
518
|
+
|
|
519
|
+
<expected_output>
|
|
520
|
+
{eval_expected_output}
|
|
521
|
+
</expected_output>
|
|
522
|
+
|
|
523
|
+
<agent_output>
|
|
524
|
+
{output}
|
|
525
|
+
</agent_output>\
|
|
526
|
+
""")
|
|
527
|
+
logger.debug(f"Agent output #{i + 1}: {output}")
|
|
528
|
+
result = await self.aevaluate_answer(
|
|
529
|
+
input=eval_input,
|
|
530
|
+
evaluator_agent=evaluator_agent,
|
|
531
|
+
evaluation_input=evaluation_input,
|
|
532
|
+
evaluator_expected_output=eval_expected_output,
|
|
533
|
+
agent_output=output,
|
|
534
|
+
)
|
|
535
|
+
if result is None:
|
|
536
|
+
logger.error(f"Failed to evaluate accuracy on iteration {i + 1}")
|
|
537
|
+
continue
|
|
538
|
+
|
|
539
|
+
self.result.results.append(result)
|
|
540
|
+
self.result.compute_stats()
|
|
541
|
+
status.update(f"Eval iteration {i + 1} finished")
|
|
542
|
+
|
|
543
|
+
status.stop()
|
|
544
|
+
|
|
545
|
+
# Save result to file if requested
|
|
546
|
+
if self.file_path_to_save_results is not None and self.result is not None:
|
|
547
|
+
store_result_in_file(
|
|
548
|
+
file_path=self.file_path_to_save_results,
|
|
549
|
+
name=self.name,
|
|
550
|
+
eval_id=self.eval_id,
|
|
551
|
+
result=self.result,
|
|
552
|
+
)
|
|
553
|
+
|
|
554
|
+
# Print results if requested
|
|
555
|
+
if self.print_results or print_results:
|
|
556
|
+
self.result.print_results(console)
|
|
557
|
+
if self.print_summary or print_summary:
|
|
558
|
+
self.result.print_summary(console)
|
|
559
|
+
|
|
560
|
+
if self.agent is not None:
|
|
561
|
+
agent_id = self.agent.id
|
|
562
|
+
team_id = None
|
|
563
|
+
model_id = self.agent.model.id if self.agent.model is not None else None
|
|
564
|
+
model_provider = self.agent.model.provider if self.agent.model is not None else None
|
|
565
|
+
evaluated_component_name = self.agent.name
|
|
566
|
+
elif self.team is not None:
|
|
567
|
+
agent_id = None
|
|
568
|
+
team_id = self.team.id
|
|
569
|
+
model_id = self.team.model.id if self.team.model is not None else None
|
|
570
|
+
model_provider = self.team.model.provider if self.team.model is not None else None
|
|
571
|
+
evaluated_component_name = self.team.name
|
|
572
|
+
|
|
573
|
+
# Log results to the Agno DB if requested
|
|
574
|
+
if self.db:
|
|
575
|
+
log_eval_input = {
|
|
576
|
+
"additional_guidelines": self.additional_guidelines,
|
|
577
|
+
"additional_context": self.additional_context,
|
|
578
|
+
"num_iterations": self.num_iterations,
|
|
579
|
+
"expected_output": self.expected_output,
|
|
580
|
+
"input": self.input,
|
|
581
|
+
}
|
|
582
|
+
await async_log_eval(
|
|
583
|
+
db=self.db,
|
|
584
|
+
run_id=self.eval_id, # type: ignore
|
|
585
|
+
run_data=asdict(self.result),
|
|
586
|
+
eval_type=EvalType.ACCURACY,
|
|
587
|
+
agent_id=agent_id,
|
|
588
|
+
model_id=model_id,
|
|
589
|
+
model_provider=model_provider,
|
|
590
|
+
name=self.name if self.name is not None else None,
|
|
591
|
+
evaluated_component_name=evaluated_component_name,
|
|
592
|
+
team_id=team_id,
|
|
593
|
+
workflow_id=None,
|
|
594
|
+
eval_input=log_eval_input,
|
|
595
|
+
)
|
|
596
|
+
|
|
597
|
+
if self.telemetry:
|
|
598
|
+
from agno.api.evals import EvalRunCreate, async_create_eval_run_telemetry
|
|
599
|
+
|
|
600
|
+
await async_create_eval_run_telemetry(
|
|
601
|
+
eval_run=EvalRunCreate(run_id=self.eval_id, eval_type=EvalType.ACCURACY),
|
|
602
|
+
)
|
|
603
|
+
|
|
604
|
+
logger.debug(f"*********** Evaluation {self.eval_id} Finished ***********")
|
|
605
|
+
return self.result
|
|
606
|
+
|
|
607
|
+
def run_with_output(
|
|
608
|
+
self,
|
|
609
|
+
*,
|
|
610
|
+
output: str,
|
|
611
|
+
print_summary: bool = True,
|
|
612
|
+
print_results: bool = True,
|
|
613
|
+
) -> Optional[AccuracyResult]:
|
|
614
|
+
"""Run the evaluation logic against the given answer, instead of generating an answer with the Agent"""
|
|
615
|
+
set_log_level_to_debug() if self.debug_mode else set_log_level_to_info()
|
|
616
|
+
|
|
617
|
+
self.result = AccuracyResult()
|
|
618
|
+
|
|
619
|
+
logger.debug(f"************ Evaluation Start: {self.eval_id} ************")
|
|
620
|
+
|
|
621
|
+
evaluator_agent = self.get_evaluator_agent()
|
|
622
|
+
eval_input = self.get_eval_input()
|
|
623
|
+
eval_expected_output = self.get_eval_expected_output()
|
|
624
|
+
|
|
625
|
+
evaluation_input = dedent(f"""\
|
|
626
|
+
<agent_input>
|
|
627
|
+
{eval_input}
|
|
628
|
+
</agent_input>
|
|
629
|
+
|
|
630
|
+
<expected_output>
|
|
631
|
+
{eval_expected_output}
|
|
632
|
+
</expected_output>
|
|
633
|
+
|
|
634
|
+
<agent_output>
|
|
635
|
+
{output}
|
|
636
|
+
</agent_output>\
|
|
637
|
+
""")
|
|
638
|
+
|
|
639
|
+
result = self.evaluate_answer(
|
|
640
|
+
input=eval_input,
|
|
641
|
+
evaluator_agent=evaluator_agent,
|
|
642
|
+
evaluation_input=evaluation_input,
|
|
643
|
+
evaluator_expected_output=eval_expected_output,
|
|
644
|
+
agent_output=output,
|
|
645
|
+
)
|
|
646
|
+
|
|
647
|
+
if result is not None:
|
|
648
|
+
self.result.results.append(result)
|
|
649
|
+
self.result.compute_stats()
|
|
650
|
+
|
|
651
|
+
# Print results if requested
|
|
652
|
+
if self.print_results or print_results:
|
|
653
|
+
self.result.print_results()
|
|
654
|
+
if self.print_summary or print_summary:
|
|
655
|
+
self.result.print_summary()
|
|
656
|
+
|
|
657
|
+
# Save result to file if requested
|
|
658
|
+
if self.file_path_to_save_results is not None:
|
|
659
|
+
store_result_in_file(
|
|
660
|
+
file_path=self.file_path_to_save_results,
|
|
661
|
+
name=self.name,
|
|
662
|
+
eval_id=self.eval_id,
|
|
663
|
+
result=self.result,
|
|
664
|
+
)
|
|
665
|
+
# Log results to the Agno DB if requested
|
|
666
|
+
if self.db:
|
|
667
|
+
if isinstance(self.db, AsyncBaseDb):
|
|
668
|
+
log_error("You are using an async DB in a non-async method. The evaluation won't be stored in the DB.")
|
|
669
|
+
|
|
670
|
+
else:
|
|
671
|
+
if self.agent is not None:
|
|
672
|
+
agent_id = self.agent.id
|
|
673
|
+
team_id = None
|
|
674
|
+
model_id = self.agent.model.id if self.agent.model is not None else None
|
|
675
|
+
model_provider = self.agent.model.provider if self.agent.model is not None else None
|
|
676
|
+
evaluated_component_name = self.agent.name
|
|
677
|
+
elif self.team is not None:
|
|
678
|
+
agent_id = None
|
|
679
|
+
team_id = self.team.id
|
|
680
|
+
model_id = self.team.model.id if self.team.model is not None else None
|
|
681
|
+
model_provider = self.team.model.provider if self.team.model is not None else None
|
|
682
|
+
evaluated_component_name = self.team.name
|
|
683
|
+
else:
|
|
684
|
+
agent_id = None
|
|
685
|
+
team_id = None
|
|
686
|
+
model_id = None
|
|
687
|
+
model_provider = None
|
|
688
|
+
evaluated_component_name = None
|
|
689
|
+
|
|
690
|
+
log_eval_input = {
|
|
691
|
+
"additional_guidelines": self.additional_guidelines,
|
|
692
|
+
"additional_context": self.additional_context,
|
|
693
|
+
"num_iterations": self.num_iterations,
|
|
694
|
+
"expected_output": self.expected_output,
|
|
695
|
+
"input": self.input,
|
|
696
|
+
}
|
|
697
|
+
|
|
698
|
+
log_eval_run(
|
|
699
|
+
db=self.db,
|
|
700
|
+
run_id=self.eval_id, # type: ignore
|
|
701
|
+
run_data=asdict(self.result),
|
|
702
|
+
eval_type=EvalType.ACCURACY,
|
|
703
|
+
name=self.name if self.name is not None else None,
|
|
704
|
+
agent_id=agent_id,
|
|
705
|
+
team_id=team_id,
|
|
706
|
+
model_id=model_id,
|
|
707
|
+
model_provider=model_provider,
|
|
708
|
+
evaluated_component_name=evaluated_component_name,
|
|
709
|
+
workflow_id=None,
|
|
710
|
+
eval_input=log_eval_input,
|
|
711
|
+
)
|
|
712
|
+
|
|
713
|
+
if self.telemetry:
|
|
714
|
+
from agno.api.evals import EvalRunCreate, create_eval_run_telemetry
|
|
715
|
+
|
|
716
|
+
create_eval_run_telemetry(
|
|
717
|
+
eval_run=EvalRunCreate(
|
|
718
|
+
run_id=self.eval_id,
|
|
719
|
+
eval_type=EvalType.ACCURACY,
|
|
720
|
+
data=self._get_telemetry_data(),
|
|
721
|
+
),
|
|
722
|
+
)
|
|
723
|
+
|
|
724
|
+
logger.debug(f"*********** Evaluation End: {self.eval_id} ***********")
|
|
725
|
+
return self.result
|
|
726
|
+
|
|
727
|
+
async def arun_with_output(
|
|
728
|
+
self,
|
|
729
|
+
*,
|
|
730
|
+
output: str,
|
|
731
|
+
print_summary: bool = True,
|
|
732
|
+
print_results: bool = True,
|
|
733
|
+
) -> Optional[AccuracyResult]:
|
|
734
|
+
"""Run the evaluation logic against the given answer, instead of generating an answer with the Agent"""
|
|
735
|
+
set_log_level_to_debug() if self.debug_mode else set_log_level_to_info()
|
|
736
|
+
|
|
737
|
+
self.result = AccuracyResult()
|
|
738
|
+
|
|
739
|
+
logger.debug(f"************ Evaluation Start: {self.eval_id} ************")
|
|
740
|
+
|
|
741
|
+
evaluator_agent = self.get_evaluator_agent()
|
|
742
|
+
eval_input = self.get_eval_input()
|
|
743
|
+
eval_expected_output = self.get_eval_expected_output()
|
|
744
|
+
|
|
745
|
+
evaluation_input = dedent(f"""\
|
|
746
|
+
<agent_input>
|
|
747
|
+
{eval_input}
|
|
748
|
+
</agent_input>
|
|
749
|
+
|
|
750
|
+
<expected_output>
|
|
751
|
+
{eval_expected_output}
|
|
752
|
+
</expected_output>
|
|
753
|
+
|
|
754
|
+
<agent_output>
|
|
755
|
+
{output}
|
|
756
|
+
</agent_output>\
|
|
757
|
+
""")
|
|
758
|
+
|
|
759
|
+
result = await self.aevaluate_answer(
|
|
760
|
+
input=eval_input,
|
|
761
|
+
evaluator_agent=evaluator_agent,
|
|
762
|
+
evaluation_input=evaluation_input,
|
|
763
|
+
evaluator_expected_output=eval_expected_output,
|
|
764
|
+
agent_output=output,
|
|
765
|
+
)
|
|
766
|
+
|
|
767
|
+
if result is not None:
|
|
768
|
+
self.result.results.append(result)
|
|
769
|
+
self.result.compute_stats()
|
|
770
|
+
|
|
771
|
+
# Print results if requested
|
|
772
|
+
if self.print_results or print_results:
|
|
773
|
+
self.result.print_results()
|
|
774
|
+
if self.print_summary or print_summary:
|
|
775
|
+
self.result.print_summary()
|
|
776
|
+
|
|
777
|
+
# Save result to file if requested
|
|
778
|
+
if self.file_path_to_save_results is not None:
|
|
779
|
+
store_result_in_file(
|
|
780
|
+
file_path=self.file_path_to_save_results,
|
|
781
|
+
name=self.name,
|
|
782
|
+
eval_id=self.eval_id,
|
|
783
|
+
result=self.result,
|
|
784
|
+
)
|
|
785
|
+
# Log results to the Agno DB if requested
|
|
786
|
+
if self.db:
|
|
787
|
+
if self.agent is not None:
|
|
788
|
+
agent_id = self.agent.id
|
|
789
|
+
team_id = None
|
|
790
|
+
model_id = self.agent.model.id if self.agent.model is not None else None
|
|
791
|
+
model_provider = self.agent.model.provider if self.agent.model is not None else None
|
|
792
|
+
evaluated_component_name = self.agent.name
|
|
793
|
+
elif self.team is not None:
|
|
794
|
+
agent_id = None
|
|
795
|
+
team_id = self.team.id
|
|
796
|
+
model_id = self.team.model.id if self.team.model is not None else None
|
|
797
|
+
model_provider = self.team.model.provider if self.team.model is not None else None
|
|
798
|
+
evaluated_component_name = self.team.name
|
|
799
|
+
|
|
800
|
+
log_eval_input = {
|
|
801
|
+
"additional_guidelines": self.additional_guidelines,
|
|
802
|
+
"additional_context": self.additional_context,
|
|
803
|
+
"num_iterations": self.num_iterations,
|
|
804
|
+
"expected_output": self.expected_output,
|
|
805
|
+
"input": self.input,
|
|
806
|
+
}
|
|
807
|
+
|
|
808
|
+
await async_log_eval(
|
|
809
|
+
db=self.db,
|
|
810
|
+
run_id=self.eval_id, # type: ignore
|
|
811
|
+
run_data=asdict(self.result),
|
|
812
|
+
eval_type=EvalType.ACCURACY,
|
|
813
|
+
name=self.name if self.name is not None else None,
|
|
814
|
+
agent_id=agent_id,
|
|
815
|
+
team_id=team_id,
|
|
816
|
+
model_id=model_id,
|
|
817
|
+
model_provider=model_provider,
|
|
818
|
+
evaluated_component_name=evaluated_component_name,
|
|
819
|
+
workflow_id=None,
|
|
820
|
+
eval_input=log_eval_input,
|
|
821
|
+
)
|
|
822
|
+
|
|
823
|
+
logger.debug(f"*********** Evaluation End: {self.eval_id} ***********")
|
|
824
|
+
return self.result
|
|
825
|
+
|
|
826
|
+
def _get_telemetry_data(self) -> Dict[str, Any]:
|
|
827
|
+
"""Get the telemetry data for the evaluation"""
|
|
828
|
+
return {
|
|
829
|
+
"agent_id": self.agent.id if self.agent else None,
|
|
830
|
+
"team_id": self.team.id if self.team else None,
|
|
831
|
+
"model_id": self.agent.model.id if self.agent and self.agent.model else None,
|
|
832
|
+
"model_provider": self.agent.model.provider if self.agent and self.agent.model else None,
|
|
833
|
+
"num_iterations": self.num_iterations,
|
|
834
|
+
}
|