PyPI - agno - Versions diffs - 0.1.2__py3-none-any.whl → 2.3.13__py3-none-any.whl - Mend

agno 0.1.2py3-none-any.whl → 2.3.13py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (723) hide show

agno/__init__.py +8 -0
agno/agent/__init__.py +44 -5
agno/agent/agent.py +10531 -2975
agno/api/agent.py +14 -53
agno/api/api.py +7 -46
agno/api/evals.py +22 -0
agno/api/os.py +17 -0
agno/api/routes.py +6 -25
agno/api/schemas/__init__.py +9 -0
agno/api/schemas/agent.py +6 -9
agno/api/schemas/evals.py +16 -0
agno/api/schemas/os.py +14 -0
agno/api/schemas/team.py +10 -10
agno/api/schemas/utils.py +21 -0
agno/api/schemas/workflows.py +16 -0
agno/api/settings.py +53 -0
agno/api/team.py +22 -26
agno/api/workflow.py +28 -0
agno/cloud/aws/base.py +214 -0
agno/cloud/aws/s3/__init__.py +2 -0
agno/cloud/aws/s3/api_client.py +43 -0
agno/cloud/aws/s3/bucket.py +195 -0
agno/cloud/aws/s3/object.py +57 -0
agno/compression/__init__.py +3 -0
agno/compression/manager.py +247 -0
agno/culture/__init__.py +3 -0
agno/culture/manager.py +956 -0
agno/db/__init__.py +24 -0
agno/db/async_postgres/__init__.py +3 -0
agno/db/base.py +946 -0
agno/db/dynamo/__init__.py +3 -0
agno/db/dynamo/dynamo.py +2781 -0
agno/db/dynamo/schemas.py +442 -0
agno/db/dynamo/utils.py +743 -0
agno/db/firestore/__init__.py +3 -0
agno/db/firestore/firestore.py +2379 -0
agno/db/firestore/schemas.py +181 -0
agno/db/firestore/utils.py +376 -0
agno/db/gcs_json/__init__.py +3 -0
agno/db/gcs_json/gcs_json_db.py +1791 -0
agno/db/gcs_json/utils.py +228 -0
agno/db/in_memory/__init__.py +3 -0
agno/db/in_memory/in_memory_db.py +1312 -0
agno/db/in_memory/utils.py +230 -0
agno/db/json/__init__.py +3 -0
agno/db/json/json_db.py +1777 -0
agno/db/json/utils.py +230 -0
agno/db/migrations/manager.py +199 -0
agno/db/migrations/v1_to_v2.py +635 -0
agno/db/migrations/versions/v2_3_0.py +938 -0
agno/db/mongo/__init__.py +17 -0
agno/db/mongo/async_mongo.py +2760 -0
agno/db/mongo/mongo.py +2597 -0
agno/db/mongo/schemas.py +119 -0
agno/db/mongo/utils.py +276 -0
agno/db/mysql/__init__.py +4 -0
agno/db/mysql/async_mysql.py +2912 -0
agno/db/mysql/mysql.py +2923 -0
agno/db/mysql/schemas.py +186 -0
agno/db/mysql/utils.py +488 -0
agno/db/postgres/__init__.py +4 -0
agno/db/postgres/async_postgres.py +2579 -0
agno/db/postgres/postgres.py +2870 -0
agno/db/postgres/schemas.py +187 -0
agno/db/postgres/utils.py +442 -0
agno/db/redis/__init__.py +3 -0
agno/db/redis/redis.py +2141 -0
agno/db/redis/schemas.py +159 -0
agno/db/redis/utils.py +346 -0
agno/db/schemas/__init__.py +4 -0
agno/db/schemas/culture.py +120 -0
agno/db/schemas/evals.py +34 -0
agno/db/schemas/knowledge.py +40 -0
agno/db/schemas/memory.py +61 -0
agno/db/singlestore/__init__.py +3 -0
agno/db/singlestore/schemas.py +179 -0
agno/db/singlestore/singlestore.py +2877 -0
agno/db/singlestore/utils.py +384 -0
agno/db/sqlite/__init__.py +4 -0
agno/db/sqlite/async_sqlite.py +2911 -0
agno/db/sqlite/schemas.py +181 -0
agno/db/sqlite/sqlite.py +2908 -0
agno/db/sqlite/utils.py +429 -0
agno/db/surrealdb/__init__.py +3 -0
agno/db/surrealdb/metrics.py +292 -0
agno/db/surrealdb/models.py +334 -0
agno/db/surrealdb/queries.py +71 -0
agno/db/surrealdb/surrealdb.py +1908 -0
agno/db/surrealdb/utils.py +147 -0
agno/db/utils.py +118 -0
agno/eval/__init__.py +24 -0
agno/eval/accuracy.py +666 -276
agno/eval/agent_as_judge.py +861 -0
agno/eval/base.py +29 -0
agno/eval/performance.py +779 -0
agno/eval/reliability.py +241 -62
agno/eval/utils.py +120 -0
agno/exceptions.py +143 -1
agno/filters.py +354 -0
agno/guardrails/__init__.py +6 -0
agno/guardrails/base.py +19 -0
agno/guardrails/openai.py +144 -0
agno/guardrails/pii.py +94 -0
agno/guardrails/prompt_injection.py +52 -0
agno/hooks/__init__.py +3 -0
agno/hooks/decorator.py +164 -0
agno/integrations/discord/__init__.py +3 -0
agno/integrations/discord/client.py +203 -0
agno/knowledge/__init__.py +5 -1
agno/{document → knowledge}/chunking/agentic.py +22 -14
agno/{document → knowledge}/chunking/document.py +2 -2
agno/{document → knowledge}/chunking/fixed.py +7 -6
agno/knowledge/chunking/markdown.py +151 -0
agno/{document → knowledge}/chunking/recursive.py +15 -3
agno/knowledge/chunking/row.py +39 -0
agno/knowledge/chunking/semantic.py +91 -0
agno/knowledge/chunking/strategy.py +165 -0
agno/knowledge/content.py +74 -0
agno/knowledge/document/__init__.py +5 -0
agno/{document → knowledge/document}/base.py +12 -2
agno/knowledge/embedder/__init__.py +5 -0
agno/knowledge/embedder/aws_bedrock.py +343 -0
agno/knowledge/embedder/azure_openai.py +210 -0
agno/{embedder → knowledge/embedder}/base.py +8 -0
agno/knowledge/embedder/cohere.py +323 -0
agno/knowledge/embedder/fastembed.py +62 -0
agno/{embedder → knowledge/embedder}/fireworks.py +1 -1
agno/knowledge/embedder/google.py +258 -0
agno/knowledge/embedder/huggingface.py +94 -0
agno/knowledge/embedder/jina.py +182 -0
agno/knowledge/embedder/langdb.py +22 -0
agno/knowledge/embedder/mistral.py +206 -0
agno/knowledge/embedder/nebius.py +13 -0
agno/knowledge/embedder/ollama.py +154 -0
agno/knowledge/embedder/openai.py +195 -0
agno/knowledge/embedder/sentence_transformer.py +63 -0
agno/{embedder → knowledge/embedder}/together.py +1 -1
agno/knowledge/embedder/vllm.py +262 -0
agno/knowledge/embedder/voyageai.py +165 -0
agno/knowledge/knowledge.py +3006 -0
agno/knowledge/reader/__init__.py +7 -0
agno/knowledge/reader/arxiv_reader.py +81 -0
agno/knowledge/reader/base.py +95 -0
agno/knowledge/reader/csv_reader.py +164 -0
agno/knowledge/reader/docx_reader.py +82 -0
agno/knowledge/reader/field_labeled_csv_reader.py +290 -0
agno/knowledge/reader/firecrawl_reader.py +201 -0
agno/knowledge/reader/json_reader.py +88 -0
agno/knowledge/reader/markdown_reader.py +137 -0
agno/knowledge/reader/pdf_reader.py +431 -0
agno/knowledge/reader/pptx_reader.py +101 -0
agno/knowledge/reader/reader_factory.py +313 -0
agno/knowledge/reader/s3_reader.py +89 -0
agno/knowledge/reader/tavily_reader.py +193 -0
agno/knowledge/reader/text_reader.py +127 -0
agno/knowledge/reader/web_search_reader.py +325 -0
agno/knowledge/reader/website_reader.py +455 -0
agno/knowledge/reader/wikipedia_reader.py +91 -0
agno/knowledge/reader/youtube_reader.py +78 -0
agno/knowledge/remote_content/remote_content.py +88 -0
agno/knowledge/reranker/__init__.py +3 -0
agno/{reranker → knowledge/reranker}/base.py +1 -1
agno/{reranker → knowledge/reranker}/cohere.py +2 -2
agno/knowledge/reranker/infinity.py +195 -0
agno/knowledge/reranker/sentence_transformer.py +54 -0
agno/knowledge/types.py +39 -0
agno/knowledge/utils.py +234 -0
agno/media.py +439 -95
agno/memory/__init__.py +16 -3
agno/memory/manager.py +1474 -123
agno/memory/strategies/__init__.py +15 -0
agno/memory/strategies/base.py +66 -0
agno/memory/strategies/summarize.py +196 -0
agno/memory/strategies/types.py +37 -0
agno/models/aimlapi/__init__.py +5 -0
agno/models/aimlapi/aimlapi.py +62 -0
agno/models/anthropic/__init__.py +4 -0
agno/models/anthropic/claude.py +960 -496
agno/models/aws/__init__.py +15 -0
agno/models/aws/bedrock.py +686 -451
agno/models/aws/claude.py +190 -183
agno/models/azure/__init__.py +18 -1
agno/models/azure/ai_foundry.py +489 -0
agno/models/azure/openai_chat.py +89 -40
agno/models/base.py +2477 -550
agno/models/cerebras/__init__.py +12 -0
agno/models/cerebras/cerebras.py +565 -0
agno/models/cerebras/cerebras_openai.py +131 -0
agno/models/cohere/__init__.py +4 -0
agno/models/cohere/chat.py +306 -492
agno/models/cometapi/__init__.py +5 -0
agno/models/cometapi/cometapi.py +74 -0
agno/models/dashscope/__init__.py +5 -0
agno/models/dashscope/dashscope.py +90 -0
agno/models/deepinfra/__init__.py +5 -0
agno/models/deepinfra/deepinfra.py +45 -0
agno/models/deepseek/__init__.py +4 -0
agno/models/deepseek/deepseek.py +110 -9
agno/models/fireworks/__init__.py +4 -0
agno/models/fireworks/fireworks.py +19 -22
agno/models/google/__init__.py +3 -7
agno/models/google/gemini.py +1717 -662
agno/models/google/utils.py +22 -0
agno/models/groq/__init__.py +4 -0
agno/models/groq/groq.py +391 -666
agno/models/huggingface/__init__.py +4 -0
agno/models/huggingface/huggingface.py +266 -538
agno/models/ibm/__init__.py +5 -0
agno/models/ibm/watsonx.py +432 -0
agno/models/internlm/__init__.py +3 -0
agno/models/internlm/internlm.py +20 -3
agno/models/langdb/__init__.py +1 -0
agno/models/langdb/langdb.py +60 -0
agno/models/litellm/__init__.py +14 -0
agno/models/litellm/chat.py +503 -0
agno/models/litellm/litellm_openai.py +42 -0
agno/models/llama_cpp/__init__.py +5 -0
agno/models/llama_cpp/llama_cpp.py +22 -0
agno/models/lmstudio/__init__.py +5 -0
agno/models/lmstudio/lmstudio.py +25 -0
agno/models/message.py +361 -39
agno/models/meta/__init__.py +12 -0
agno/models/meta/llama.py +502 -0
agno/models/meta/llama_openai.py +79 -0
agno/models/metrics.py +120 -0
agno/models/mistral/__init__.py +4 -0
agno/models/mistral/mistral.py +293 -393
agno/models/nebius/__init__.py +3 -0
agno/models/nebius/nebius.py +53 -0
agno/models/nexus/__init__.py +3 -0
agno/models/nexus/nexus.py +22 -0
agno/models/nvidia/__init__.py +4 -0
agno/models/nvidia/nvidia.py +22 -3
agno/models/ollama/__init__.py +4 -2
agno/models/ollama/chat.py +257 -492
agno/models/openai/__init__.py +7 -0
agno/models/openai/chat.py +725 -770
agno/models/openai/like.py +16 -2
agno/models/openai/responses.py +1121 -0
agno/models/openrouter/__init__.py +4 -0
agno/models/openrouter/openrouter.py +62 -5
agno/models/perplexity/__init__.py +5 -0
agno/models/perplexity/perplexity.py +203 -0
agno/models/portkey/__init__.py +3 -0
agno/models/portkey/portkey.py +82 -0
agno/models/requesty/__init__.py +5 -0
agno/models/requesty/requesty.py +69 -0
agno/models/response.py +177 -7
agno/models/sambanova/__init__.py +4 -0
agno/models/sambanova/sambanova.py +23 -4
agno/models/siliconflow/__init__.py +5 -0
agno/models/siliconflow/siliconflow.py +42 -0
agno/models/together/__init__.py +4 -0
agno/models/together/together.py +21 -164
agno/models/utils.py +266 -0
agno/models/vercel/__init__.py +3 -0
agno/models/vercel/v0.py +43 -0
agno/models/vertexai/__init__.py +0 -1
agno/models/vertexai/claude.py +190 -0
agno/models/vllm/__init__.py +3 -0
agno/models/vllm/vllm.py +83 -0
agno/models/xai/__init__.py +2 -0
agno/models/xai/xai.py +111 -7
agno/os/__init__.py +3 -0
agno/os/app.py +1027 -0
agno/os/auth.py +244 -0
agno/os/config.py +126 -0
agno/os/interfaces/__init__.py +1 -0
agno/os/interfaces/a2a/__init__.py +3 -0
agno/os/interfaces/a2a/a2a.py +42 -0
agno/os/interfaces/a2a/router.py +249 -0
agno/os/interfaces/a2a/utils.py +924 -0
agno/os/interfaces/agui/__init__.py +3 -0
agno/os/interfaces/agui/agui.py +47 -0
agno/os/interfaces/agui/router.py +147 -0
agno/os/interfaces/agui/utils.py +574 -0
agno/os/interfaces/base.py +25 -0
agno/os/interfaces/slack/__init__.py +3 -0
agno/os/interfaces/slack/router.py +148 -0
agno/os/interfaces/slack/security.py +30 -0
agno/os/interfaces/slack/slack.py +47 -0
agno/os/interfaces/whatsapp/__init__.py +3 -0
agno/os/interfaces/whatsapp/router.py +210 -0
agno/os/interfaces/whatsapp/security.py +55 -0
agno/os/interfaces/whatsapp/whatsapp.py +36 -0
agno/os/mcp.py +293 -0
agno/os/middleware/__init__.py +9 -0
agno/os/middleware/jwt.py +797 -0
agno/os/router.py +258 -0
agno/os/routers/__init__.py +3 -0
agno/os/routers/agents/__init__.py +3 -0
agno/os/routers/agents/router.py +599 -0
agno/os/routers/agents/schema.py +261 -0
agno/os/routers/evals/__init__.py +3 -0
agno/os/routers/evals/evals.py +450 -0
agno/os/routers/evals/schemas.py +174 -0
agno/os/routers/evals/utils.py +231 -0
agno/os/routers/health.py +31 -0
agno/os/routers/home.py +52 -0
agno/os/routers/knowledge/__init__.py +3 -0
agno/os/routers/knowledge/knowledge.py +1008 -0
agno/os/routers/knowledge/schemas.py +178 -0
agno/os/routers/memory/__init__.py +3 -0
agno/os/routers/memory/memory.py +661 -0
agno/os/routers/memory/schemas.py +88 -0
agno/os/routers/metrics/__init__.py +3 -0
agno/os/routers/metrics/metrics.py +190 -0
agno/os/routers/metrics/schemas.py +47 -0
agno/os/routers/session/__init__.py +3 -0
agno/os/routers/session/session.py +997 -0
agno/os/routers/teams/__init__.py +3 -0
agno/os/routers/teams/router.py +512 -0
agno/os/routers/teams/schema.py +257 -0
agno/os/routers/traces/__init__.py +3 -0
agno/os/routers/traces/schemas.py +414 -0
agno/os/routers/traces/traces.py +499 -0
agno/os/routers/workflows/__init__.py +3 -0
agno/os/routers/workflows/router.py +624 -0
agno/os/routers/workflows/schema.py +75 -0
agno/os/schema.py +534 -0
agno/os/scopes.py +469 -0
agno/{playground → os}/settings.py +7 -15
agno/os/utils.py +973 -0
agno/reasoning/anthropic.py +80 -0
agno/reasoning/azure_ai_foundry.py +67 -0
agno/reasoning/deepseek.py +63 -0
agno/reasoning/default.py +97 -0
agno/reasoning/gemini.py +73 -0
agno/reasoning/groq.py +71 -0
agno/reasoning/helpers.py +24 -1
agno/reasoning/ollama.py +67 -0
agno/reasoning/openai.py +86 -0
agno/reasoning/step.py +2 -1
agno/reasoning/vertexai.py +76 -0
agno/run/__init__.py +6 -0
agno/run/agent.py +822 -0
agno/run/base.py +247 -0
agno/run/cancel.py +81 -0
agno/run/requirement.py +181 -0
agno/run/team.py +767 -0
agno/run/workflow.py +708 -0
agno/session/__init__.py +10 -0
agno/session/agent.py +260 -0
agno/session/summary.py +265 -0
agno/session/team.py +342 -0
agno/session/workflow.py +501 -0
agno/table.py +10 -0
agno/team/__init__.py +37 -0
agno/team/team.py +9536 -0
agno/tools/__init__.py +7 -0
agno/tools/agentql.py +120 -0
agno/tools/airflow.py +22 -12
agno/tools/api.py +122 -0
agno/tools/apify.py +276 -83
agno/tools/{arxiv_toolkit.py → arxiv.py} +20 -12
agno/tools/aws_lambda.py +28 -7
agno/tools/aws_ses.py +66 -0
agno/tools/baidusearch.py +11 -4
agno/tools/bitbucket.py +292 -0
agno/tools/brandfetch.py +213 -0
agno/tools/bravesearch.py +106 -0
agno/tools/brightdata.py +367 -0
agno/tools/browserbase.py +209 -0
agno/tools/calcom.py +32 -23
agno/tools/calculator.py +24 -37
agno/tools/cartesia.py +187 -0
agno/tools/{clickup_tool.py → clickup.py} +17 -28
agno/tools/confluence.py +91 -26
agno/tools/crawl4ai.py +139 -43
agno/tools/csv_toolkit.py +28 -22
agno/tools/dalle.py +36 -22
agno/tools/daytona.py +475 -0
agno/tools/decorator.py +169 -14
agno/tools/desi_vocal.py +23 -11
agno/tools/discord.py +32 -29
agno/tools/docker.py +716 -0
agno/tools/duckdb.py +76 -81
agno/tools/duckduckgo.py +43 -40
agno/tools/e2b.py +703 -0
agno/tools/eleven_labs.py +65 -54
agno/tools/email.py +13 -5
agno/tools/evm.py +129 -0
agno/tools/exa.py +324 -42
agno/tools/fal.py +39 -35
agno/tools/file.py +196 -30
agno/tools/file_generation.py +356 -0
agno/tools/financial_datasets.py +288 -0
agno/tools/firecrawl.py +108 -33
agno/tools/function.py +960 -122
agno/tools/giphy.py +34 -12
agno/tools/github.py +1294 -97
agno/tools/gmail.py +922 -0
agno/tools/google_bigquery.py +117 -0
agno/tools/google_drive.py +271 -0
agno/tools/google_maps.py +253 -0
agno/tools/googlecalendar.py +607 -107
agno/tools/googlesheets.py +377 -0
agno/tools/hackernews.py +20 -12
agno/tools/jina.py +24 -14
agno/tools/jira.py +48 -19
agno/tools/knowledge.py +218 -0
agno/tools/linear.py +82 -43
agno/tools/linkup.py +58 -0
agno/tools/local_file_system.py +15 -7
agno/tools/lumalab.py +41 -26
agno/tools/mcp/__init__.py +10 -0
agno/tools/mcp/mcp.py +331 -0
agno/tools/mcp/multi_mcp.py +347 -0
agno/tools/mcp/params.py +24 -0
agno/tools/mcp_toolbox.py +284 -0
agno/tools/mem0.py +193 -0
agno/tools/memory.py +419 -0
agno/tools/mlx_transcribe.py +11 -9
agno/tools/models/azure_openai.py +190 -0
agno/tools/models/gemini.py +203 -0
agno/tools/models/groq.py +158 -0
agno/tools/models/morph.py +186 -0
agno/tools/models/nebius.py +124 -0
agno/tools/models_labs.py +163 -82
agno/tools/moviepy_video.py +18 -13
agno/tools/nano_banana.py +151 -0
agno/tools/neo4j.py +134 -0
agno/tools/newspaper.py +15 -4
agno/tools/newspaper4k.py +19 -6
agno/tools/notion.py +204 -0
agno/tools/openai.py +181 -17
agno/tools/openbb.py +27 -20
agno/tools/opencv.py +321 -0
agno/tools/openweather.py +233 -0
agno/tools/oxylabs.py +385 -0
agno/tools/pandas.py +25 -15
agno/tools/parallel.py +314 -0
agno/tools/postgres.py +238 -185
agno/tools/pubmed.py +125 -13
agno/tools/python.py +48 -35
agno/tools/reasoning.py +283 -0
agno/tools/reddit.py +207 -29
agno/tools/redshift.py +406 -0
agno/tools/replicate.py +69 -26
agno/tools/resend.py +11 -6
agno/tools/scrapegraph.py +179 -19
agno/tools/searxng.py +23 -31
agno/tools/serpapi.py +15 -10
agno/tools/serper.py +255 -0
agno/tools/shell.py +23 -12
agno/tools/shopify.py +1519 -0
agno/tools/slack.py +56 -14
agno/tools/sleep.py +8 -6
agno/tools/spider.py +35 -11
agno/tools/spotify.py +919 -0
agno/tools/sql.py +34 -19
agno/tools/tavily.py +158 -8
agno/tools/telegram.py +18 -8
agno/tools/todoist.py +218 -0
agno/tools/toolkit.py +134 -9
agno/tools/trafilatura.py +388 -0
agno/tools/trello.py +25 -28
agno/tools/twilio.py +18 -9
agno/tools/user_control_flow.py +78 -0
agno/tools/valyu.py +228 -0
agno/tools/visualization.py +467 -0
agno/tools/webbrowser.py +28 -0
agno/tools/webex.py +76 -0
agno/tools/website.py +23 -19
agno/tools/webtools.py +45 -0
agno/tools/whatsapp.py +286 -0
agno/tools/wikipedia.py +28 -19
agno/tools/workflow.py +285 -0
agno/tools/{twitter.py → x.py} +142 -46
agno/tools/yfinance.py +41 -39
agno/tools/youtube.py +34 -17
agno/tools/zendesk.py +15 -5
agno/tools/zep.py +454 -0
agno/tools/zoom.py +86 -37
agno/tracing/__init__.py +12 -0
agno/tracing/exporter.py +157 -0
agno/tracing/schemas.py +276 -0
agno/tracing/setup.py +111 -0
agno/utils/agent.py +938 -0
agno/utils/audio.py +37 -1
agno/utils/certs.py +27 -0
agno/utils/code_execution.py +11 -0
agno/utils/common.py +103 -20
agno/utils/cryptography.py +22 -0
agno/utils/dttm.py +33 -0
agno/utils/events.py +700 -0
agno/utils/functions.py +107 -37
agno/utils/gemini.py +426 -0
agno/utils/hooks.py +171 -0
agno/utils/http.py +185 -0
agno/utils/json_schema.py +159 -37
agno/utils/knowledge.py +36 -0
agno/utils/location.py +19 -0
agno/utils/log.py +221 -8
agno/utils/mcp.py +214 -0
agno/utils/media.py +335 -14
agno/utils/merge_dict.py +22 -1
agno/utils/message.py +77 -2
agno/utils/models/ai_foundry.py +50 -0
agno/utils/models/claude.py +373 -0
agno/utils/models/cohere.py +94 -0
agno/utils/models/llama.py +85 -0
agno/utils/models/mistral.py +100 -0
agno/utils/models/openai_responses.py +140 -0
agno/utils/models/schema_utils.py +153 -0
agno/utils/models/watsonx.py +41 -0
agno/utils/openai.py +257 -0
agno/utils/pickle.py +1 -1
agno/utils/pprint.py +124 -8
agno/utils/print_response/agent.py +930 -0
agno/utils/print_response/team.py +1914 -0
agno/utils/print_response/workflow.py +1668 -0
agno/utils/prompts.py +111 -0
agno/utils/reasoning.py +108 -0
agno/utils/response.py +163 -0
agno/utils/serialize.py +32 -0
agno/utils/shell.py +4 -4
agno/utils/streamlit.py +487 -0
agno/utils/string.py +204 -51
agno/utils/team.py +139 -0
agno/utils/timer.py +9 -2
agno/utils/tokens.py +657 -0
agno/utils/tools.py +19 -1
agno/utils/whatsapp.py +305 -0
agno/utils/yaml_io.py +3 -3
agno/vectordb/__init__.py +2 -0
agno/vectordb/base.py +87 -9
agno/vectordb/cassandra/__init__.py +5 -1
agno/vectordb/cassandra/cassandra.py +383 -27
agno/vectordb/chroma/__init__.py +4 -0
agno/vectordb/chroma/chromadb.py +748 -83
agno/vectordb/clickhouse/__init__.py +7 -1
agno/vectordb/clickhouse/clickhousedb.py +554 -53
agno/vectordb/couchbase/__init__.py +3 -0
agno/vectordb/couchbase/couchbase.py +1446 -0
agno/vectordb/lancedb/__init__.py +5 -0
agno/vectordb/lancedb/lance_db.py +730 -98
agno/vectordb/langchaindb/__init__.py +5 -0
agno/vectordb/langchaindb/langchaindb.py +163 -0
agno/vectordb/lightrag/__init__.py +5 -0
agno/vectordb/lightrag/lightrag.py +388 -0
agno/vectordb/llamaindex/__init__.py +3 -0
agno/vectordb/llamaindex/llamaindexdb.py +166 -0
agno/vectordb/milvus/__init__.py +3 -0
agno/vectordb/milvus/milvus.py +966 -78
agno/vectordb/mongodb/__init__.py +9 -1
agno/vectordb/mongodb/mongodb.py +1175 -172
agno/vectordb/pgvector/__init__.py +8 -0
agno/vectordb/pgvector/pgvector.py +599 -115
agno/vectordb/pineconedb/__init__.py +5 -1
agno/vectordb/pineconedb/pineconedb.py +406 -43
agno/vectordb/qdrant/__init__.py +4 -0
agno/vectordb/qdrant/qdrant.py +914 -61
agno/vectordb/redis/__init__.py +9 -0
agno/vectordb/redis/redisdb.py +682 -0
agno/vectordb/singlestore/__init__.py +8 -1
agno/vectordb/singlestore/singlestore.py +771 -0
agno/vectordb/surrealdb/__init__.py +3 -0
agno/vectordb/surrealdb/surrealdb.py +663 -0
agno/vectordb/upstashdb/__init__.py +5 -0
agno/vectordb/upstashdb/upstashdb.py +718 -0
agno/vectordb/weaviate/__init__.py +8 -0
agno/vectordb/weaviate/index.py +15 -0
agno/vectordb/weaviate/weaviate.py +1009 -0
agno/workflow/__init__.py +23 -1
agno/workflow/agent.py +299 -0
agno/workflow/condition.py +759 -0
agno/workflow/loop.py +756 -0
agno/workflow/parallel.py +853 -0
agno/workflow/router.py +723 -0
agno/workflow/step.py +1564 -0
agno/workflow/steps.py +613 -0
agno/workflow/types.py +556 -0
agno/workflow/workflow.py +4327 -514
agno-2.3.13.dist-info/METADATA +639 -0
agno-2.3.13.dist-info/RECORD +613 -0
{agno-0.1.2.dist-info → agno-2.3.13.dist-info}/WHEEL +1 -1
agno-2.3.13.dist-info/licenses/LICENSE +201 -0
agno/api/playground.py +0 -91
agno/api/schemas/playground.py +0 -22
agno/api/schemas/user.py +0 -22
agno/api/schemas/workspace.py +0 -46
agno/api/user.py +0 -160
agno/api/workspace.py +0 -151
agno/cli/auth_server.py +0 -118
agno/cli/config.py +0 -275
agno/cli/console.py +0 -88
agno/cli/credentials.py +0 -23
agno/cli/entrypoint.py +0 -571
agno/cli/operator.py +0 -355
agno/cli/settings.py +0 -85
agno/cli/ws/ws_cli.py +0 -817
agno/constants.py +0 -13
agno/document/__init__.py +0 -1
agno/document/chunking/semantic.py +0 -47
agno/document/chunking/strategy.py +0 -31
agno/document/reader/__init__.py +0 -1
agno/document/reader/arxiv_reader.py +0 -41
agno/document/reader/base.py +0 -22
agno/document/reader/csv_reader.py +0 -84
agno/document/reader/docx_reader.py +0 -46
agno/document/reader/firecrawl_reader.py +0 -99
agno/document/reader/json_reader.py +0 -43
agno/document/reader/pdf_reader.py +0 -219
agno/document/reader/s3/pdf_reader.py +0 -46
agno/document/reader/s3/text_reader.py +0 -51
agno/document/reader/text_reader.py +0 -41
agno/document/reader/website_reader.py +0 -175
agno/document/reader/youtube_reader.py +0 -50
agno/embedder/__init__.py +0 -1
agno/embedder/azure_openai.py +0 -86
agno/embedder/cohere.py +0 -72
agno/embedder/fastembed.py +0 -37
agno/embedder/google.py +0 -73
agno/embedder/huggingface.py +0 -54
agno/embedder/mistral.py +0 -80
agno/embedder/ollama.py +0 -57
agno/embedder/openai.py +0 -74
agno/embedder/sentence_transformer.py +0 -38
agno/embedder/voyageai.py +0 -64
agno/eval/perf.py +0 -201
agno/file/__init__.py +0 -1
agno/file/file.py +0 -16
agno/file/local/csv.py +0 -32
agno/file/local/txt.py +0 -19
agno/infra/app.py +0 -240
agno/infra/base.py +0 -144
agno/infra/context.py +0 -20
agno/infra/db_app.py +0 -52
agno/infra/resource.py +0 -205
agno/infra/resources.py +0 -55
agno/knowledge/agent.py +0 -230
agno/knowledge/arxiv.py +0 -22
agno/knowledge/combined.py +0 -22
agno/knowledge/csv.py +0 -28
agno/knowledge/csv_url.py +0 -19
agno/knowledge/document.py +0 -20
agno/knowledge/docx.py +0 -30
agno/knowledge/json.py +0 -28
agno/knowledge/langchain.py +0 -71
agno/knowledge/llamaindex.py +0 -66
agno/knowledge/pdf.py +0 -28
agno/knowledge/pdf_url.py +0 -26
agno/knowledge/s3/base.py +0 -60
agno/knowledge/s3/pdf.py +0 -21
agno/knowledge/s3/text.py +0 -23
agno/knowledge/text.py +0 -30
agno/knowledge/website.py +0 -88
agno/knowledge/wikipedia.py +0 -31
agno/knowledge/youtube.py +0 -22
agno/memory/agent.py +0 -392
agno/memory/classifier.py +0 -104
agno/memory/db/__init__.py +0 -1
agno/memory/db/base.py +0 -42
agno/memory/db/mongodb.py +0 -189
agno/memory/db/postgres.py +0 -203
agno/memory/db/sqlite.py +0 -193
agno/memory/memory.py +0 -15
agno/memory/row.py +0 -36
agno/memory/summarizer.py +0 -192
agno/memory/summary.py +0 -19
agno/memory/workflow.py +0 -38
agno/models/google/gemini_openai.py +0 -26
agno/models/ollama/hermes.py +0 -221
agno/models/ollama/tools.py +0 -362
agno/models/vertexai/gemini.py +0 -595
agno/playground/__init__.py +0 -3
agno/playground/async_router.py +0 -421
agno/playground/deploy.py +0 -249
agno/playground/operator.py +0 -92
agno/playground/playground.py +0 -91
agno/playground/schemas.py +0 -76
agno/playground/serve.py +0 -55
agno/playground/sync_router.py +0 -405
agno/reasoning/agent.py +0 -68
agno/run/response.py +0 -112
agno/storage/agent/__init__.py +0 -0
agno/storage/agent/base.py +0 -38
agno/storage/agent/dynamodb.py +0 -350
agno/storage/agent/json.py +0 -92
agno/storage/agent/mongodb.py +0 -228
agno/storage/agent/postgres.py +0 -367
agno/storage/agent/session.py +0 -79
agno/storage/agent/singlestore.py +0 -303
agno/storage/agent/sqlite.py +0 -357
agno/storage/agent/yaml.py +0 -93
agno/storage/workflow/__init__.py +0 -0
agno/storage/workflow/base.py +0 -40
agno/storage/workflow/mongodb.py +0 -233
agno/storage/workflow/postgres.py +0 -366
agno/storage/workflow/session.py +0 -60
agno/storage/workflow/sqlite.py +0 -359
agno/tools/googlesearch.py +0 -88
agno/utils/defaults.py +0 -57
agno/utils/filesystem.py +0 -39
agno/utils/git.py +0 -52
agno/utils/json_io.py +0 -30
agno/utils/load_env.py +0 -19
agno/utils/py_io.py +0 -19
agno/utils/pyproject.py +0 -18
agno/utils/resource_filter.py +0 -31
agno/vectordb/singlestore/s2vectordb.py +0 -390
agno/vectordb/singlestore/s2vectordb2.py +0 -355
agno/workspace/__init__.py +0 -0
agno/workspace/config.py +0 -325
agno/workspace/enums.py +0 -6
agno/workspace/helpers.py +0 -48
agno/workspace/operator.py +0 -758
agno/workspace/settings.py +0 -63
agno-0.1.2.dist-info/LICENSE +0 -375
agno-0.1.2.dist-info/METADATA +0 -502
agno-0.1.2.dist-info/RECORD +0 -352
agno-0.1.2.dist-info/entry_points.txt +0 -3
/agno/{cli → db/migrations}/__init__.py +0 -0
/agno/{cli/ws → db/migrations/versions}/__init__.py +0 -0
/agno/{document/chunking/__init__.py → db/schemas/metrics.py} +0 -0
/agno/{document/reader/s3 → integrations}/__init__.py +0 -0
/agno/{file/local → knowledge/chunking}/__init__.py +0 -0
/agno/{infra → knowledge/remote_content}/__init__.py +0 -0
/agno/{knowledge/s3 → tools/models}/__init__.py +0 -0
/agno/{reranker → utils/models}/__init__.py +0 -0
/agno/{storage → utils/print_response}/__init__.py +0 -0
{agno-0.1.2.dist-info → agno-2.3.13.dist-info}/top_level.txt +0 -0

agno/eval/accuracy.py CHANGED Viewed

@@ -1,14 +1,19 @@
 from dataclasses import asdict, dataclass, field
 from os import getenv
-from pathlib import Path
-from typing import TYPE_CHECKING, Callable, List, Optional, Union
+from textwrap import dedent
+from typing import TYPE_CHECKING, Any, Callable, Dict, List, Optional, Union
 from uuid import uuid4
 from pydantic import BaseModel, Field
-from agno.agent import Agent, RunResponse
+from agno.agent import Agent
+from agno.db.base import AsyncBaseDb, BaseDb
+from agno.db.schemas.evals import EvalType
+from agno.eval.utils import async_log_eval, log_eval_run, store_result_in_file
+from agno.exceptions import EvalError
 from agno.models.base import Model
-from agno.utils.log import logger, set_log_level_to_debug, set_log_level_to_info
+from agno.team.team import Team
+from agno.utils.log import log_error, logger, set_log_level_to_debug, set_log_level_to_info
 if TYPE_CHECKING:
     from rich.console import Console
@@ -21,9 +26,9 @@ class AccuracyAgentResponse(BaseModel):
 @dataclass
 class AccuracyEvaluation:
-    question: str
-    answer: str
-    expected_answer: str
+    input: str
+    output: str
+    expected_output: str
     score: int
     reason: str
@@ -44,9 +49,9 @@ class AccuracyEvaluation:
             title_style="bold sky_blue1",
             title_justify="center",
         )
-        results_table.add_row("Question", self.question)
-        results_table.add_row("Answer", self.answer)
-        results_table.add_row("Expected Answer", self.expected_answer)
+        results_table.add_row("Input", self.input)
+        results_table.add_row("Output", self.output)
+        results_table.add_row("Expected Output", self.expected_output)
         results_table.add_row("Accuracy Score", f"{str(self.score)}/10")
         results_table.add_row("Accuracy Reason", Markdown(self.reason))
         console.print(results_table)
@@ -92,11 +97,18 @@ class AccuracyResult:
             title_justify="center",
         )
         summary_table.add_row("Number of Runs", f"{len(self.results)}")
-        summary_table.add_row("Average Score", f"{self.avg_score:.2f}")
-        summary_table.add_row("Mean Score", f"{self.mean_score:.2f}")
-        summary_table.add_row("Minimum Score", f"{self.min_score:.2f}")
-        summary_table.add_row("Maximum Score", f"{self.max_score:.2f}")
-        summary_table.add_row("Standard Deviation", f"{self.std_dev_score:.2f}")
+        if self.avg_score is not None:
+            summary_table.add_row("Average Score", f"{self.avg_score:.2f}")
+        if self.mean_score is not None:
+            summary_table.add_row("Mean Score", f"{self.mean_score:.2f}")
+        if self.min_score is not None:
+            summary_table.add_row("Minimum Score", f"{self.min_score:.2f}")
+        if self.max_score is not None:
+            summary_table.add_row("Maximum Score", f"{self.max_score:.2f}")
+        if self.std_dev_score is not None:
+            summary_table.add_row("Standard Deviation", f"{self.std_dev_score:.2f}")
         console.print(summary_table)
     def print_results(self, console: Optional["Console"] = None):
@@ -116,9 +128,9 @@ class AccuracyResult:
             title_justify="center",
         )
         for result in self.results:
-            results_table.add_row("Question", result.question)
-            results_table.add_row("Answer", result.answer)
-            results_table.add_row("Expected Answer", result.expected_answer)
+            results_table.add_row("Input", result.input)
+            results_table.add_row("Output", result.output)
+            results_table.add_row("Expected Output", result.expected_output)
             results_table.add_row("Accuracy Score", f"{str(result.score)}/10")
             results_table.add_row("Accuracy Reason", result.reason)
         console.print(results_table)
@@ -126,61 +138,53 @@ class AccuracyResult:
 @dataclass
 class AccuracyEval:
-    """Evaluate the accuracy of an agent's answer."""
+    """Interface to evaluate the accuracy of an Agent or Team, given a prompt and expected answer"""
+    # Input to evaluate
+    input: Union[str, Callable]
+    # Expected answer to the input
+    expected_output: Union[str, Callable]
+    # Agent to evaluate
+    agent: Optional[Agent] = None
+    # Team to evaluate
+    team: Optional[Team] = None
     # Evaluation name
     name: Optional[str] = None
-    # Evaluation UUID (autogenerated if not set)
-    eval_id: Optional[str] = None
+    # Evaluation UUID
+    eval_id: str = field(default_factory=lambda: str(uuid4()))
+    # Number of iterations to run
+    num_iterations: int = 1
+    # Result of the evaluation
+    result: Optional[AccuracyResult] = None
-    # Model used to evaluate the answer
+    # Model for the evaluator agent
     model: Optional[Model] = None
-    # Evaluate an Agent
-    agent: Optional[Agent] = None
-    # Question to evaluate (can also be provided with the run method)
-    question: Optional[Union[str, Callable]] = None
-    # Answer to evaluate (can also be provided with the run method)
-    answer: Optional[Union[str, Callable]] = None
-    # Expected Answer for the question (can also be provided with the run method)
-    expected_answer: Optional[Union[str, Callable]] = None
+    # Agent used to evaluate the answer
     evaluator_agent: Optional[Agent] = None
     # Guidelines for the evaluator agent
-    evaluator_guidelines: Optional[List[str]] = None
+    additional_guidelines: Optional[Union[str, List[str]]] = None
     # Additional context to the evaluator agent
-    evaluator_context: Optional[str] = None
-    # Number of iterations to run
-    num_iterations: int = 3
-    # Result of the evaluation
-    result: Optional[AccuracyResult] = None
+    additional_context: Optional[str] = None
     # Print summary of results
     print_summary: bool = False
     # Print detailed results
     print_results: bool = False
-    # Save the result to a file
-    save_result_to_file: Optional[str] = None
-    # debug_mode=True enables debug logs
-    debug_mode: bool = False
-    def set_eval_id(self) -> str:
-        if self.eval_id is None:
-            self.eval_id = str(uuid4())
-        logger.debug(f"*********** Evaluation ID: {self.eval_id} ***********")
-        return self.eval_id
-    def set_debug_mode(self) -> None:
-        if self.debug_mode or getenv("AGNO_DEBUG", "false").lower() == "true":
-            self.debug_mode = True
-            set_log_level_to_debug()
-            logger.debug("Debug logs enabled")
-        else:
-            set_log_level_to_info()
-    def get_evaluator_agent(self, question: str, expected_answer: str) -> Agent:
+    # If set, results will be saved in the given file path
+    file_path_to_save_results: Optional[str] = None
+    # Enable debug logs
+    debug_mode: bool = getenv("AGNO_DEBUG", "false").lower() == "true"
+    # The database to store Evaluation results
+    db: Optional[Union[BaseDb, AsyncBaseDb]] = None
+    # Telemetry settings
+    # telemetry=True logs minimal telemetry for analytics
+    # This helps us improve our Evals and provide better support
+    telemetry: bool = True
+    def get_evaluator_agent(self) -> Agent:
+        """Return the evaluator agent. If not provided, build it based on the evaluator fields and default instructions."""
         if self.evaluator_agent is not None:
             return self.evaluator_agent
@@ -189,269 +193,655 @@ class AccuracyEval:
             try:
                 from agno.models.openai import OpenAIChat
-                model = OpenAIChat(id="gpt-4o-mini")
+                model = OpenAIChat(id="o4-mini")
             except (ModuleNotFoundError, ImportError) as e:
                 logger.exception(e)
-                logger.error(
+                raise EvalError(
                     "Agno uses `openai` as the default model provider. Please run `pip install openai` to use the default evaluator."
                 )
-                exit(1)
-        evaluator_guidelines = ""
-        if self.evaluator_guidelines is not None and len(self.evaluator_guidelines) > 0:
-            evaluator_guidelines = "\n## Guidelines for the Agent's answer:\n"
-            evaluator_guidelines += "\n- ".join(self.evaluator_guidelines)
-            evaluator_guidelines += "\n"
-        evaluator_context = ""
-        if self.evaluator_context is not None and len(self.evaluator_context) > 0:
-            evaluator_context = "## Additional Context:\n"
-            evaluator_context += self.evaluator_context
-            evaluator_context += "\n"
+        additional_guidelines = ""
+        if self.additional_guidelines is not None:
+            additional_guidelines = "\n## Additional Guidelines\n"
+            if isinstance(self.additional_guidelines, str):
+                additional_guidelines += self.additional_guidelines
+            else:
+                additional_guidelines += "\n- ".join(self.additional_guidelines)
+            additional_guidelines += "\n"
+        additional_context = ""
+        if self.additional_context is not None and len(self.additional_context) > 0:
+            additional_context = "\n## Additional Context\n"
+            additional_context += self.additional_context
+            additional_context += "\n"
         return Agent(
-            model=OpenAIChat(id="gpt-4o-mini"),
+            model=model,
             description=f"""\
-You are an Agent Evaluator tasked with assessing the accuracy of an AI Agent's answer compared to an expected answer for a given question.
-Your task is to provide a detailed analysis and assign a score on a scale of 1 to 10, where 10 indicates a perfect match to the expected answer.
-## Question:
-{question}
-## Expected Answer:
-{expected_answer}
-## Evaluation Criteria:
-1. Accuracy of information
-2. Completeness of the answer
-3. Relevance to the question
-4. Use of key concepts and ideas
-5. Overall structure and clarity of presentation
-{evaluator_guidelines}{evaluator_context}
-## Instructions:
-1. Carefully compare the AI Agent's answer to the expected answer.
-2. Provide a detailed analysis, highlighting:
-   - Specific similarities and differences
-   - Key points included or missed
-   - Any inaccuracies or misconceptions
-3. Explicitly reference the evaluation criteria and any provided guidelines in your reasoning.
-4. Assign a score from 1 to 10 (use only whole numbers) based on the following scale:
-   1-2: Completely incorrect or irrelevant
-   3-4: Major inaccuracies or missing crucial information
-   5-6: Partially correct, but with significant omissions or errors
+You are an expert judge tasked with comparing the quality of an AI Agent’s output to a user-provided expected output. You must assume the expected_output is correct - even if you personally disagree.
+## Evaluation Inputs
+- agent_input: The original task or query given to the Agent.
+- expected_output: The correct response to the task (provided by the user).
+    - NOTE: You must assume the expected_output is correct - even if you personally disagree.
+- agent_output: The response generated by the Agent.
+## Evaluation Criteria
+- Accuracy: How closely does the agent_output match the expected_output?
+- Completeness: Does the agent_output include all the key elements of the expected_output?
+## Instructions
+1. Compare the agent_output only to the expected_output, not what you think the expected_output should be.
+2. Do not judge the correctness of the expected_output itself. Your role is only to compare the two outputs, the user provided expected_output is correct.
+3. Follow the additional guidelines if provided.
+4. Provide a detailed analysis including:
+    - Specific similarities and differences
+    - Important points included or omitted
+    - Any inaccuracies, paraphrasing errors, or structural differences
+5. Reference the criteria explicitly in your reasoning.
+6. Assign a score from 1 to 10 (whole numbers only):
+   1-2: Completely incorrect or irrelevant.
+   3-4: Major inaccuracies or missing key information.
+   5-6: Partially correct, but with significant issues.
    7-8: Mostly accurate and complete, with minor issues
-   9-10: Highly accurate and complete, matching the expected answer closely
-Your evaluation should be objective, thorough, and well-reasoned. Provide specific examples from both answers to support your assessment.""",
-            response_model=AccuracyAgentResponse,
+   9-10: Highly accurate and complete, matching the expected answer and given guidelines closely.
+{additional_guidelines}{additional_context}
+Remember: You must only compare the agent_output to the expected_output. The expected_output is correct as it was provided by the user.
+""",
+            output_schema=AccuracyAgentResponse,
             structured_outputs=True,
         )
-    def get_question_to_evaluate(self, question: Optional[Union[str, Callable]] = None) -> Optional[str]:
-        """Get the question to evaluate."""
-        try:
-            # Get question from the run method
-            if question is not None:
-                if isinstance(question, str):
-                    return question
-                elif callable(question):
-                    _question = question()
-                    if isinstance(_question, str):
-                        return _question
-                    else:
-                        logger.error("Question is not a string")
-                else:
-                    logger.error("Question is not a string or callable")
-            # Get the question from the eval
-            if self.question is not None:
-                if isinstance(self.question, str):
-                    return self.question
-                elif callable(self.question):
-                    _question = self.question()
-                    if isinstance(_question, str):
-                        return _question
-                    else:
-                        logger.error("Question is not a string")
-                else:
-                    logger.error("Question is not a string or callable")
-        except Exception as e:
-            logger.error(f"Failed to get question to evaluate: {e}")
-        return None
-    def get_answer_to_evaluate(
-        self, question: str, answer: Optional[Union[str, Callable]] = None
-    ) -> Optional[RunResponse]:
-        """Get the answer to evaluate.
-        Priority:
-        1. Answer provided with the run method
-        2. Answer provided with the eval
-        3. Answer from the agent
-        """
+    def get_eval_expected_output(self) -> str:
+        """Return the eval expected answer. If it is a callable, call it and return the resulting string"""
+        if callable(self.expected_output):
+            _output = self.expected_output()
+            if isinstance(_output, str):
+                return _output
+            else:
+                raise EvalError(f"The expected output needs to be or return a string, but it returned: {type(_output)}")
+        return self.expected_output
+    def get_eval_input(self) -> str:
+        """Return the evaluation input. If it is a callable, call it and return the resulting string"""
+        if callable(self.input):
+            _input = self.input()
+            if isinstance(_input, str):
+                return _input
+            else:
+                raise EvalError(f"The eval input needs to be or return a string, but it returned: {type(_input)}")
+        return self.input
+    def evaluate_answer(
+        self,
+        input: str,
+        evaluator_agent: Agent,
+        evaluation_input: str,
+        evaluator_expected_output: str,
+        agent_output: str,
+    ) -> Optional[AccuracyEvaluation]:
+        """Orchestrate the evaluation process."""
         try:
-            # Get answer from the run method
-            if answer is not None:
-                if isinstance(answer, str):
-                    return RunResponse(content=answer)
-                elif callable(answer):
-                    _answer = answer()
-                    if isinstance(_answer, str):
-                        return RunResponse(content=_answer)
-                    else:
-                        logger.error("Answer is not a string")
-                else:
-                    logger.error("Answer is not a string or callable")
-            # Get answer from the eval
-            if self.answer is not None:
-                if isinstance(self.answer, str):
-                    return RunResponse(content=self.answer)
-                elif callable(self.answer):
-                    _answer = self.answer()
-                    if isinstance(_answer, str):
-                        return RunResponse(content=_answer)
-                    else:
-                        logger.error("Answer is not a string")
-                else:
-                    logger.error("Answer is not a string or callable")
-            # Get answer from the agent
-            if self.agent is not None and question is not None:
-                logger.debug("Getting answer from agent")
-                return self.agent.run(question)
+            response = evaluator_agent.run(evaluation_input, stream=False)
+            accuracy_agent_response = response.content
+            if accuracy_agent_response is None or not isinstance(accuracy_agent_response, AccuracyAgentResponse):
+                raise EvalError(f"Evaluator Agent returned an invalid response: {accuracy_agent_response}")
+            return AccuracyEvaluation(
+                input=input,
+                output=agent_output,
+                expected_output=evaluator_expected_output,
+                score=accuracy_agent_response.accuracy_score,
+                reason=accuracy_agent_response.accuracy_reason,
+            )
         except Exception as e:
-            logger.error(f"Failed to get answer to evaluate: {e}")
-        return None
+            logger.exception(f"Failed to evaluate accuracy: {e}")
+            return None
-    def get_expected_answer_to_evaluate(self, expected_answer: Optional[Union[str, Callable]] = None) -> Optional[str]:
-        """Get the expected answer to evaluate."""
+    async def aevaluate_answer(
+        self,
+        input: str,
+        evaluator_agent: Agent,
+        evaluation_input: str,
+        evaluator_expected_output: str,
+        agent_output: str,
+    ) -> Optional[AccuracyEvaluation]:
+        """Orchestrate the evaluation process asynchronously."""
         try:
-            # Get expected_answer from the run method
-            if expected_answer is not None:
-                if isinstance(expected_answer, str):
-                    return expected_answer
-                elif callable(expected_answer):
-                    _expected_answer = expected_answer()
-                    if isinstance(_expected_answer, str):
-                        return _expected_answer
-                    else:
-                        logger.error("Expected Answer is not a string")
-                else:
-                    logger.error("Expected Answer is not a string or callable")
-            # Get the expected_answer from the eval
-            if self.expected_answer is not None:
-                if isinstance(self.expected_answer, str):
-                    return self.expected_answer
-                elif callable(self.expected_answer):
-                    _expected_answer = self.expected_answer()
-                    if isinstance(_expected_answer, str):
-                        return _expected_answer
-                    else:
-                        logger.error("Expected Answer is not a string")
-                else:
-                    logger.error("Expected Answer is not a string or callable")
+            response = await evaluator_agent.arun(evaluation_input, stream=False)
+            accuracy_agent_response = response.content
+            if accuracy_agent_response is None or not isinstance(accuracy_agent_response, AccuracyAgentResponse):
+                raise EvalError(f"Evaluator Agent returned an invalid response: {accuracy_agent_response}")
+            return AccuracyEvaluation(
+                input=input,
+                output=agent_output,
+                expected_output=evaluator_expected_output,
+                score=accuracy_agent_response.accuracy_score,
+                reason=accuracy_agent_response.accuracy_reason,
+            )
         except Exception as e:
-            logger.error(f"Failed to get expected answer to evaluate: {e}")
-        return None
+            logger.exception(f"Failed to evaluate accuracy asynchronously: {e}")
+            return None
     def run(
         self,
         *,
-        question: Optional[Union[str, Callable]] = None,
-        expected_answer: Optional[Union[str, Callable]] = None,
-        answer: Optional[Union[str, Callable]] = None,
         print_summary: bool = True,
         print_results: bool = True,
     ) -> Optional[AccuracyResult]:
+        if isinstance(self.db, AsyncBaseDb):
+            raise ValueError("run() is not supported with an async DB. Please use arun() instead.")
+        if self.agent is None and self.team is None:
+            logger.error("You need to provide one of 'agent' or 'team' to run the evaluation.")
+            return None
+        if self.agent is not None and self.team is not None:
+            logger.error("Provide only one of 'agent' or 'team' to run the evaluation.")
+            return None
         from rich.console import Console
         from rich.live import Live
         from rich.status import Status
-        self.set_eval_id()
-        self.set_debug_mode()
+        set_log_level_to_debug() if self.debug_mode else set_log_level_to_info()
         self.result = AccuracyResult()
-        self.print_results = print_results
-        self.print_summary = print_summary
-        question_to_evaluate: Optional[str] = self.get_question_to_evaluate(question=question)
-        if question_to_evaluate is None:
-            logger.error("No Question to evaluate.")
+        logger.debug(f"************ Evaluation Start: {self.eval_id} ************")
+        # Add a spinner while running the evaluations
+        console = Console()
+        with Live(console=console, transient=True) as live_log:
+            evaluator_agent = self.get_evaluator_agent()
+            eval_input = self.get_eval_input()
+            eval_expected_output = self.get_eval_expected_output()
+            for i in range(self.num_iterations):
+                status = Status(f"Running evaluation {i + 1}...", spinner="dots", speed=1.0, refresh_per_second=10)
+                live_log.update(status)
+                agent_session_id = f"eval_{self.eval_id}_{i + 1}"
+                if self.agent is not None:
+                    agent_response = self.agent.run(input=eval_input, session_id=agent_session_id, stream=False)
+                    output = agent_response.content
+                elif self.team is not None:
+                    team_response = self.team.run(input=eval_input, session_id=agent_session_id, stream=False)
+                    output = team_response.content
+                if not output:
+                    logger.error(f"Failed to generate a valid answer on iteration {i + 1}: {output}")
+                    continue
+                evaluation_input = dedent(f"""\
+                    <agent_input>
+                    {eval_input}
+                    </agent_input>
+                    <expected_output>
+                    {eval_expected_output}
+                    </expected_output>
+                    <agent_output>
+                    {output}
+                    </agent_output>\
+                    """)
+                logger.debug(f"Agent output #{i + 1}: {output}")
+                result = self.evaluate_answer(
+                    input=eval_input,
+                    evaluator_agent=evaluator_agent,
+                    evaluation_input=evaluation_input,
+                    evaluator_expected_output=eval_expected_output,
+                    agent_output=output,
+                )
+                if result is None:
+                    logger.error(f"Failed to evaluate accuracy on iteration {i + 1}")
+                    continue
+                self.result.results.append(result)
+                self.result.compute_stats()
+                status.update(f"Eval iteration {i + 1} finished")
+            status.stop()
+        # Save result to file if requested
+        if self.file_path_to_save_results is not None and self.result is not None:
+            store_result_in_file(
+                file_path=self.file_path_to_save_results,
+                name=self.name,
+                eval_id=self.eval_id,
+                result=self.result,
+            )
+        # Print results if requested
+        if self.print_results or print_results:
+            self.result.print_results(console)
+        if self.print_summary or print_summary:
+            self.result.print_summary(console)
+        # Log results to the Agno DB if requested
+        if self.agent is not None:
+            agent_id = self.agent.id
+            team_id = None
+            model_id = self.agent.model.id if self.agent.model is not None else None
+            model_provider = self.agent.model.provider if self.agent.model is not None else None
+            evaluated_component_name = self.agent.name
+        elif self.team is not None:
+            agent_id = None
+            team_id = self.team.id
+            model_id = self.team.model.id if self.team.model is not None else None
+            model_provider = self.team.model.provider if self.team.model is not None else None
+            evaluated_component_name = self.team.name
+        if self.db:
+            log_eval_input = {
+                "additional_guidelines": self.additional_guidelines,
+                "additional_context": self.additional_context,
+                "num_iterations": self.num_iterations,
+                "expected_output": self.expected_output,
+                "input": self.input,
+            }
+            log_eval_run(
+                db=self.db,
+                run_id=self.eval_id,  # type: ignore
+                run_data=asdict(self.result),
+                eval_type=EvalType.ACCURACY,
+                agent_id=agent_id,
+                team_id=team_id,
+                model_id=model_id,
+                model_provider=model_provider,
+                name=self.name if self.name is not None else None,
+                evaluated_component_name=evaluated_component_name,
+                eval_input=log_eval_input,
+            )
+        if self.telemetry:
+            from agno.api.evals import EvalRunCreate, create_eval_run_telemetry
+            create_eval_run_telemetry(
+                eval_run=EvalRunCreate(
+                    run_id=self.eval_id,
+                    eval_type=EvalType.ACCURACY,
+                    data=self._get_telemetry_data(),
+                ),
+            )
+        logger.debug(f"*********** Evaluation {self.eval_id} Finished ***********")
+        return self.result
+    async def arun(
+        self,
+        *,
+        print_summary: bool = True,
+        print_results: bool = True,
+    ) -> Optional[AccuracyResult]:
+        if self.agent is None and self.team is None:
+            logger.error("You need to provide one of 'agent' or 'team' to run the evaluation.")
             return None
-        expected_answer_to_evaluate: Optional[str] = self.get_expected_answer_to_evaluate(
-            expected_answer=expected_answer
-        )
-        if expected_answer_to_evaluate is None:
-            logger.error("No Expected Answer to evaluate.")
+        if self.agent is not None and self.team is not None:
+            logger.error("Provide only one of 'agent' or 'team' to run the evaluation.")
             return None
-        logger.debug(f"************ Evaluation Start: {self.eval_id} ************")
-        logger.debug(f"Question: {question_to_evaluate}")
-        logger.debug(f"Expected Answer: {expected_answer_to_evaluate}")
-        logger.debug("***********************************************************")
+        from rich.console import Console
+        from rich.live import Live
+        from rich.status import Status
-        evaluator_agent: Agent = self.get_evaluator_agent(
-            question=question_to_evaluate, expected_answer=expected_answer_to_evaluate
-        )
+        set_log_level_to_debug() if self.debug_mode else set_log_level_to_info()
+        self.result = AccuracyResult()
+        logger.debug(f"************ Evaluation Start: {self.eval_id} ************")
         # Add a spinner while running the evaluations
         console = Console()
         with Live(console=console, transient=True) as live_log:
+            evaluator_agent = self.get_evaluator_agent()
+            eval_input = self.get_eval_input()
+            eval_expected_output = self.get_eval_expected_output()
             for i in range(self.num_iterations):
                 status = Status(f"Running evaluation {i + 1}...", spinner="dots", speed=1.0, refresh_per_second=10)
                 live_log.update(status)
-                answer_to_evaluate: Optional[RunResponse] = self.get_answer_to_evaluate(
-                    question=question_to_evaluate, answer=answer
+                agent_session_id = f"eval_{self.eval_id}_{i + 1}"
+                if self.agent is not None:
+                    agent_response = await self.agent.arun(input=eval_input, session_id=agent_session_id, stream=False)
+                    output = agent_response.content
+                elif self.team is not None:
+                    team_response = await self.team.arun(input=eval_input, session_id=agent_session_id, stream=False)
+                    output = team_response.content
+                if not output:
+                    logger.error(f"Failed to generate a valid answer on iteration {i + 1}: {output}")
+                    continue
+                evaluation_input = dedent(f"""\
+                    <agent_input>
+                    {eval_input}
+                    </agent_input>
+                    <expected_output>
+                    {eval_expected_output}
+                    </expected_output>
+                    <agent_output>
+                    {output}
+                    </agent_output>\
+                    """)
+                logger.debug(f"Agent output #{i + 1}: {output}")
+                result = await self.aevaluate_answer(
+                    input=eval_input,
+                    evaluator_agent=evaluator_agent,
+                    evaluation_input=evaluation_input,
+                    evaluator_expected_output=eval_expected_output,
+                    agent_output=output,
                 )
-                if answer_to_evaluate is None:
-                    logger.error("No Answer to evaluate.")
+                if result is None:
+                    logger.error(f"Failed to evaluate accuracy on iteration {i + 1}")
                     continue
-                try:
-                    logger.debug(f"Answer #{i + 1}: {answer_to_evaluate.content}")
-                    accuracy_agent_response = evaluator_agent.run(answer_to_evaluate.content).content
-                    if accuracy_agent_response is None or not isinstance(
-                        accuracy_agent_response, AccuracyAgentResponse
-                    ):
-                        logger.error("Evaluator Agent returned an invalid response")
-                        continue
-                    accuracy_evaluation = AccuracyEvaluation(
-                        question=question_to_evaluate,
-                        answer=answer_to_evaluate.content,  # type: ignore
-                        expected_answer=expected_answer_to_evaluate,
-                        score=accuracy_agent_response.accuracy_score,
-                        reason=accuracy_agent_response.accuracy_reason,
-                    )
-                    if self.print_results:
-                        accuracy_evaluation.print_eval(console)
-                    self.result.results.append(accuracy_evaluation)
-                    self.result.compute_stats()
-                    status.update(f"Running evaluation {i + 1}... Done")
-                except Exception as e:
-                    logger.exception(f"Failed to evaluate accuracy, run #{i + 1}: {e}")
-                    return None
-                status.stop()
-        # -*- Save result to file if save_result_to_file is set
-        if self.save_result_to_file is not None and self.result is not None:
-            try:
-                import json
+                self.result.results.append(result)
+                self.result.compute_stats()
+                status.update(f"Eval iteration {i + 1} finished")
+            status.stop()
+        # Save result to file if requested
+        if self.file_path_to_save_results is not None and self.result is not None:
+            store_result_in_file(
+                file_path=self.file_path_to_save_results,
+                name=self.name,
+                eval_id=self.eval_id,
+                result=self.result,
+            )
+        # Print results if requested
+        if self.print_results or print_results:
+            self.result.print_results(console)
+        if self.print_summary or print_summary:
+            self.result.print_summary(console)
-                fn_path = Path(self.save_result_to_file.format(name=self.name, eval_id=self.eval_id))
-                if not fn_path.parent.exists():
-                    fn_path.parent.mkdir(parents=True, exist_ok=True)
-                fn_path.write_text(json.dumps(asdict(self.result), indent=4))
-            except Exception as e:
-                logger.warning(f"Failed to save result to file: {e}")
+        if self.agent is not None:
+            agent_id = self.agent.id
+            team_id = None
+            model_id = self.agent.model.id if self.agent.model is not None else None
+            model_provider = self.agent.model.provider if self.agent.model is not None else None
+            evaluated_component_name = self.agent.name
+        elif self.team is not None:
+            agent_id = None
+            team_id = self.team.id
+            model_id = self.team.model.id if self.team.model is not None else None
+            model_provider = self.team.model.provider if self.team.model is not None else None
+            evaluated_component_name = self.team.name
+        # Log results to the Agno DB if requested
+        if self.db:
+            log_eval_input = {
+                "additional_guidelines": self.additional_guidelines,
+                "additional_context": self.additional_context,
+                "num_iterations": self.num_iterations,
+                "expected_output": self.expected_output,
+                "input": self.input,
+            }
+            await async_log_eval(
+                db=self.db,
+                run_id=self.eval_id,  # type: ignore
+                run_data=asdict(self.result),
+                eval_type=EvalType.ACCURACY,
+                agent_id=agent_id,
+                model_id=model_id,
+                model_provider=model_provider,
+                name=self.name if self.name is not None else None,
+                evaluated_component_name=evaluated_component_name,
+                team_id=team_id,
+                workflow_id=None,
+                eval_input=log_eval_input,
+            )
+        if self.telemetry:
+            from agno.api.evals import EvalRunCreate, async_create_eval_run_telemetry
+            await async_create_eval_run_telemetry(
+                eval_run=EvalRunCreate(run_id=self.eval_id, eval_type=EvalType.ACCURACY),
+            )
+        logger.debug(f"*********** Evaluation {self.eval_id} Finished ***********")
+        return self.result
-        # Show results
-        if self.print_summary or self.print_results:
-            self.result.print_summary(console)
+    def run_with_output(
+        self,
+        *,
+        output: str,
+        print_summary: bool = True,
+        print_results: bool = True,
+    ) -> Optional[AccuracyResult]:
+        """Run the evaluation logic against the given answer, instead of generating an answer with the Agent"""
+        # Generate unique run_id for this execution (don't modify self.eval_id due to concurrency)
+        run_id = str(uuid4())
+        set_log_level_to_debug() if self.debug_mode else set_log_level_to_info()
+        self.result = AccuracyResult()
+        logger.debug(f"************ Evaluation Start: {run_id} ************")
+        evaluator_agent = self.get_evaluator_agent()
+        eval_input = self.get_eval_input()
+        eval_expected_output = self.get_eval_expected_output()
+        evaluation_input = dedent(f"""\
+            <agent_input>
+            {eval_input}
+            </agent_input>
-        logger.debug(f"*********** Evaluation End: {self.eval_id} ***********")
+            <expected_output>
+            {eval_expected_output}
+            </expected_output>
+            <agent_output>
+            {output}
+            </agent_output>\
+            """)
+        result = self.evaluate_answer(
+            input=eval_input,
+            evaluator_agent=evaluator_agent,
+            evaluation_input=evaluation_input,
+            evaluator_expected_output=eval_expected_output,
+            agent_output=output,
+        )
+        if result is not None:
+            self.result.results.append(result)
+            self.result.compute_stats()
+            # Print results if requested
+            if self.print_results or print_results:
+                self.result.print_results()
+            if self.print_summary or print_summary:
+                self.result.print_summary()
+            # Save result to file if requested
+            if self.file_path_to_save_results is not None:
+                store_result_in_file(
+                    file_path=self.file_path_to_save_results,
+                    name=self.name,
+                    eval_id=self.eval_id,
+                    result=self.result,
+                )
+        # Log results to the Agno DB if requested
+        if self.db:
+            if isinstance(self.db, AsyncBaseDb):
+                log_error("You are using an async DB in a non-async method. The evaluation won't be stored in the DB.")
+            else:
+                if self.agent is not None:
+                    agent_id = self.agent.id
+                    team_id = None
+                    model_id = self.agent.model.id if self.agent.model is not None else None
+                    model_provider = self.agent.model.provider if self.agent.model is not None else None
+                    evaluated_component_name = self.agent.name
+                elif self.team is not None:
+                    agent_id = None
+                    team_id = self.team.id
+                    model_id = self.team.model.id if self.team.model is not None else None
+                    model_provider = self.team.model.provider if self.team.model is not None else None
+                    evaluated_component_name = self.team.name
+                else:
+                    agent_id = None
+                    team_id = None
+                    model_id = None
+                    model_provider = None
+                    evaluated_component_name = None
+                log_eval_input = {
+                    "additional_guidelines": self.additional_guidelines,
+                    "additional_context": self.additional_context,
+                    "num_iterations": self.num_iterations,
+                    "expected_output": self.expected_output,
+                    "input": self.input,
+                }
+                log_eval_run(
+                    db=self.db,
+                    run_id=self.eval_id,  # type: ignore
+                    run_data=asdict(self.result),
+                    eval_type=EvalType.ACCURACY,
+                    name=self.name if self.name is not None else None,
+                    agent_id=agent_id,
+                    team_id=team_id,
+                    model_id=model_id,
+                    model_provider=model_provider,
+                    evaluated_component_name=evaluated_component_name,
+                    workflow_id=None,
+                    eval_input=log_eval_input,
+                )
+        if self.telemetry:
+            from agno.api.evals import EvalRunCreate, create_eval_run_telemetry
+            create_eval_run_telemetry(
+                eval_run=EvalRunCreate(
+                    run_id=self.eval_id,
+                    eval_type=EvalType.ACCURACY,
+                    data=self._get_telemetry_data(),
+                ),
+            )
+        logger.debug(f"*********** Evaluation End: {run_id} ***********")
+        return self.result
+    async def arun_with_output(
+        self,
+        *,
+        output: str,
+        print_summary: bool = True,
+        print_results: bool = True,
+    ) -> Optional[AccuracyResult]:
+        """Run the evaluation logic against the given answer, instead of generating an answer with the Agent"""
+        # Generate unique run_id for this execution (don't modify self.eval_id due to concurrency)
+        run_id = str(uuid4())
+        set_log_level_to_debug() if self.debug_mode else set_log_level_to_info()
+        self.result = AccuracyResult()
+        logger.debug(f"************ Evaluation Start: {run_id} ************")
+        evaluator_agent = self.get_evaluator_agent()
+        eval_input = self.get_eval_input()
+        eval_expected_output = self.get_eval_expected_output()
+        evaluation_input = dedent(f"""\
+            <agent_input>
+            {eval_input}
+            </agent_input>
+            <expected_output>
+            {eval_expected_output}
+            </expected_output>
+            <agent_output>
+            {output}
+            </agent_output>\
+            """)
+        result = await self.aevaluate_answer(
+            input=eval_input,
+            evaluator_agent=evaluator_agent,
+            evaluation_input=evaluation_input,
+            evaluator_expected_output=eval_expected_output,
+            agent_output=output,
+        )
+        if result is not None:
+            self.result.results.append(result)
+            self.result.compute_stats()
+            # Print results if requested
+            if self.print_results or print_results:
+                self.result.print_results()
+            if self.print_summary or print_summary:
+                self.result.print_summary()
+            # Save result to file if requested
+            if self.file_path_to_save_results is not None:
+                store_result_in_file(
+                    file_path=self.file_path_to_save_results,
+                    name=self.name,
+                    eval_id=self.eval_id,
+                    result=self.result,
+                )
+        # Log results to the Agno DB if requested
+        if self.db:
+            if self.agent is not None:
+                agent_id = self.agent.id
+                team_id = None
+                model_id = self.agent.model.id if self.agent.model is not None else None
+                model_provider = self.agent.model.provider if self.agent.model is not None else None
+                evaluated_component_name = self.agent.name
+            elif self.team is not None:
+                agent_id = None
+                team_id = self.team.id
+                model_id = self.team.model.id if self.team.model is not None else None
+                model_provider = self.team.model.provider if self.team.model is not None else None
+                evaluated_component_name = self.team.name
+            log_eval_input = {
+                "additional_guidelines": self.additional_guidelines,
+                "additional_context": self.additional_context,
+                "num_iterations": self.num_iterations,
+                "expected_output": self.expected_output,
+                "input": self.input,
+            }
+            await async_log_eval(
+                db=self.db,
+                run_id=self.eval_id,  # type: ignore
+                run_data=asdict(self.result),
+                eval_type=EvalType.ACCURACY,
+                name=self.name if self.name is not None else None,
+                agent_id=agent_id,
+                team_id=team_id,
+                model_id=model_id,
+                model_provider=model_provider,
+                evaluated_component_name=evaluated_component_name,
+                workflow_id=None,
+                eval_input=log_eval_input,
+            )
+        logger.debug(f"*********** Evaluation End: {run_id} ***********")
         return self.result
+    def _get_telemetry_data(self) -> Dict[str, Any]:
+        """Get the telemetry data for the evaluation"""
+        return {
+            "agent_id": self.agent.id if self.agent else None,
+            "team_id": self.team.id if self.team else None,
+            "model_id": self.agent.model.id if self.agent and self.agent.model else None,
+            "model_provider": self.agent.model.provider if self.agent and self.agent.model else None,
+            "num_iterations": self.num_iterations,
+        }

agno 0.1.2__py3-none-any.whl → 2.3.13__py3-none-any.whl

agno 0.1.2py3-none-any.whl → 2.3.13py3-none-any.whl