PyPI - agno - Versions diffs - 0.1.2__py3-none-any.whl → 2.3.13__py3-none-any.whl - Mend

agno 0.1.2py3-none-any.whl → 2.3.13py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (723) hide show

agno/__init__.py +8 -0
agno/agent/__init__.py +44 -5
agno/agent/agent.py +10531 -2975
agno/api/agent.py +14 -53
agno/api/api.py +7 -46
agno/api/evals.py +22 -0
agno/api/os.py +17 -0
agno/api/routes.py +6 -25
agno/api/schemas/__init__.py +9 -0
agno/api/schemas/agent.py +6 -9
agno/api/schemas/evals.py +16 -0
agno/api/schemas/os.py +14 -0
agno/api/schemas/team.py +10 -10
agno/api/schemas/utils.py +21 -0
agno/api/schemas/workflows.py +16 -0
agno/api/settings.py +53 -0
agno/api/team.py +22 -26
agno/api/workflow.py +28 -0
agno/cloud/aws/base.py +214 -0
agno/cloud/aws/s3/__init__.py +2 -0
agno/cloud/aws/s3/api_client.py +43 -0
agno/cloud/aws/s3/bucket.py +195 -0
agno/cloud/aws/s3/object.py +57 -0
agno/compression/__init__.py +3 -0
agno/compression/manager.py +247 -0
agno/culture/__init__.py +3 -0
agno/culture/manager.py +956 -0
agno/db/__init__.py +24 -0
agno/db/async_postgres/__init__.py +3 -0
agno/db/base.py +946 -0
agno/db/dynamo/__init__.py +3 -0
agno/db/dynamo/dynamo.py +2781 -0
agno/db/dynamo/schemas.py +442 -0
agno/db/dynamo/utils.py +743 -0
agno/db/firestore/__init__.py +3 -0
agno/db/firestore/firestore.py +2379 -0
agno/db/firestore/schemas.py +181 -0
agno/db/firestore/utils.py +376 -0
agno/db/gcs_json/__init__.py +3 -0
agno/db/gcs_json/gcs_json_db.py +1791 -0
agno/db/gcs_json/utils.py +228 -0
agno/db/in_memory/__init__.py +3 -0
agno/db/in_memory/in_memory_db.py +1312 -0
agno/db/in_memory/utils.py +230 -0
agno/db/json/__init__.py +3 -0
agno/db/json/json_db.py +1777 -0
agno/db/json/utils.py +230 -0
agno/db/migrations/manager.py +199 -0
agno/db/migrations/v1_to_v2.py +635 -0
agno/db/migrations/versions/v2_3_0.py +938 -0
agno/db/mongo/__init__.py +17 -0
agno/db/mongo/async_mongo.py +2760 -0
agno/db/mongo/mongo.py +2597 -0
agno/db/mongo/schemas.py +119 -0
agno/db/mongo/utils.py +276 -0
agno/db/mysql/__init__.py +4 -0
agno/db/mysql/async_mysql.py +2912 -0
agno/db/mysql/mysql.py +2923 -0
agno/db/mysql/schemas.py +186 -0
agno/db/mysql/utils.py +488 -0
agno/db/postgres/__init__.py +4 -0
agno/db/postgres/async_postgres.py +2579 -0
agno/db/postgres/postgres.py +2870 -0
agno/db/postgres/schemas.py +187 -0
agno/db/postgres/utils.py +442 -0
agno/db/redis/__init__.py +3 -0
agno/db/redis/redis.py +2141 -0
agno/db/redis/schemas.py +159 -0
agno/db/redis/utils.py +346 -0
agno/db/schemas/__init__.py +4 -0
agno/db/schemas/culture.py +120 -0
agno/db/schemas/evals.py +34 -0
agno/db/schemas/knowledge.py +40 -0
agno/db/schemas/memory.py +61 -0
agno/db/singlestore/__init__.py +3 -0
agno/db/singlestore/schemas.py +179 -0
agno/db/singlestore/singlestore.py +2877 -0
agno/db/singlestore/utils.py +384 -0
agno/db/sqlite/__init__.py +4 -0
agno/db/sqlite/async_sqlite.py +2911 -0
agno/db/sqlite/schemas.py +181 -0
agno/db/sqlite/sqlite.py +2908 -0
agno/db/sqlite/utils.py +429 -0
agno/db/surrealdb/__init__.py +3 -0
agno/db/surrealdb/metrics.py +292 -0
agno/db/surrealdb/models.py +334 -0
agno/db/surrealdb/queries.py +71 -0
agno/db/surrealdb/surrealdb.py +1908 -0
agno/db/surrealdb/utils.py +147 -0
agno/db/utils.py +118 -0
agno/eval/__init__.py +24 -0
agno/eval/accuracy.py +666 -276
agno/eval/agent_as_judge.py +861 -0
agno/eval/base.py +29 -0
agno/eval/performance.py +779 -0
agno/eval/reliability.py +241 -62
agno/eval/utils.py +120 -0
agno/exceptions.py +143 -1
agno/filters.py +354 -0
agno/guardrails/__init__.py +6 -0
agno/guardrails/base.py +19 -0
agno/guardrails/openai.py +144 -0
agno/guardrails/pii.py +94 -0
agno/guardrails/prompt_injection.py +52 -0
agno/hooks/__init__.py +3 -0
agno/hooks/decorator.py +164 -0
agno/integrations/discord/__init__.py +3 -0
agno/integrations/discord/client.py +203 -0
agno/knowledge/__init__.py +5 -1
agno/{document → knowledge}/chunking/agentic.py +22 -14
agno/{document → knowledge}/chunking/document.py +2 -2
agno/{document → knowledge}/chunking/fixed.py +7 -6
agno/knowledge/chunking/markdown.py +151 -0
agno/{document → knowledge}/chunking/recursive.py +15 -3
agno/knowledge/chunking/row.py +39 -0
agno/knowledge/chunking/semantic.py +91 -0
agno/knowledge/chunking/strategy.py +165 -0
agno/knowledge/content.py +74 -0
agno/knowledge/document/__init__.py +5 -0
agno/{document → knowledge/document}/base.py +12 -2
agno/knowledge/embedder/__init__.py +5 -0
agno/knowledge/embedder/aws_bedrock.py +343 -0
agno/knowledge/embedder/azure_openai.py +210 -0
agno/{embedder → knowledge/embedder}/base.py +8 -0
agno/knowledge/embedder/cohere.py +323 -0
agno/knowledge/embedder/fastembed.py +62 -0
agno/{embedder → knowledge/embedder}/fireworks.py +1 -1
agno/knowledge/embedder/google.py +258 -0
agno/knowledge/embedder/huggingface.py +94 -0
agno/knowledge/embedder/jina.py +182 -0
agno/knowledge/embedder/langdb.py +22 -0
agno/knowledge/embedder/mistral.py +206 -0
agno/knowledge/embedder/nebius.py +13 -0
agno/knowledge/embedder/ollama.py +154 -0
agno/knowledge/embedder/openai.py +195 -0
agno/knowledge/embedder/sentence_transformer.py +63 -0
agno/{embedder → knowledge/embedder}/together.py +1 -1
agno/knowledge/embedder/vllm.py +262 -0
agno/knowledge/embedder/voyageai.py +165 -0
agno/knowledge/knowledge.py +3006 -0
agno/knowledge/reader/__init__.py +7 -0
agno/knowledge/reader/arxiv_reader.py +81 -0
agno/knowledge/reader/base.py +95 -0
agno/knowledge/reader/csv_reader.py +164 -0
agno/knowledge/reader/docx_reader.py +82 -0
agno/knowledge/reader/field_labeled_csv_reader.py +290 -0
agno/knowledge/reader/firecrawl_reader.py +201 -0
agno/knowledge/reader/json_reader.py +88 -0
agno/knowledge/reader/markdown_reader.py +137 -0
agno/knowledge/reader/pdf_reader.py +431 -0
agno/knowledge/reader/pptx_reader.py +101 -0
agno/knowledge/reader/reader_factory.py +313 -0
agno/knowledge/reader/s3_reader.py +89 -0
agno/knowledge/reader/tavily_reader.py +193 -0
agno/knowledge/reader/text_reader.py +127 -0
agno/knowledge/reader/web_search_reader.py +325 -0
agno/knowledge/reader/website_reader.py +455 -0
agno/knowledge/reader/wikipedia_reader.py +91 -0
agno/knowledge/reader/youtube_reader.py +78 -0
agno/knowledge/remote_content/remote_content.py +88 -0
agno/knowledge/reranker/__init__.py +3 -0
agno/{reranker → knowledge/reranker}/base.py +1 -1
agno/{reranker → knowledge/reranker}/cohere.py +2 -2
agno/knowledge/reranker/infinity.py +195 -0
agno/knowledge/reranker/sentence_transformer.py +54 -0
agno/knowledge/types.py +39 -0
agno/knowledge/utils.py +234 -0
agno/media.py +439 -95
agno/memory/__init__.py +16 -3
agno/memory/manager.py +1474 -123
agno/memory/strategies/__init__.py +15 -0
agno/memory/strategies/base.py +66 -0
agno/memory/strategies/summarize.py +196 -0
agno/memory/strategies/types.py +37 -0
agno/models/aimlapi/__init__.py +5 -0
agno/models/aimlapi/aimlapi.py +62 -0
agno/models/anthropic/__init__.py +4 -0
agno/models/anthropic/claude.py +960 -496
agno/models/aws/__init__.py +15 -0
agno/models/aws/bedrock.py +686 -451
agno/models/aws/claude.py +190 -183
agno/models/azure/__init__.py +18 -1
agno/models/azure/ai_foundry.py +489 -0
agno/models/azure/openai_chat.py +89 -40
agno/models/base.py +2477 -550
agno/models/cerebras/__init__.py +12 -0
agno/models/cerebras/cerebras.py +565 -0
agno/models/cerebras/cerebras_openai.py +131 -0
agno/models/cohere/__init__.py +4 -0
agno/models/cohere/chat.py +306 -492
agno/models/cometapi/__init__.py +5 -0
agno/models/cometapi/cometapi.py +74 -0
agno/models/dashscope/__init__.py +5 -0
agno/models/dashscope/dashscope.py +90 -0
agno/models/deepinfra/__init__.py +5 -0
agno/models/deepinfra/deepinfra.py +45 -0
agno/models/deepseek/__init__.py +4 -0
agno/models/deepseek/deepseek.py +110 -9
agno/models/fireworks/__init__.py +4 -0
agno/models/fireworks/fireworks.py +19 -22
agno/models/google/__init__.py +3 -7
agno/models/google/gemini.py +1717 -662
agno/models/google/utils.py +22 -0
agno/models/groq/__init__.py +4 -0
agno/models/groq/groq.py +391 -666
agno/models/huggingface/__init__.py +4 -0
agno/models/huggingface/huggingface.py +266 -538
agno/models/ibm/__init__.py +5 -0
agno/models/ibm/watsonx.py +432 -0
agno/models/internlm/__init__.py +3 -0
agno/models/internlm/internlm.py +20 -3
agno/models/langdb/__init__.py +1 -0
agno/models/langdb/langdb.py +60 -0
agno/models/litellm/__init__.py +14 -0
agno/models/litellm/chat.py +503 -0
agno/models/litellm/litellm_openai.py +42 -0
agno/models/llama_cpp/__init__.py +5 -0
agno/models/llama_cpp/llama_cpp.py +22 -0
agno/models/lmstudio/__init__.py +5 -0
agno/models/lmstudio/lmstudio.py +25 -0
agno/models/message.py +361 -39
agno/models/meta/__init__.py +12 -0
agno/models/meta/llama.py +502 -0
agno/models/meta/llama_openai.py +79 -0
agno/models/metrics.py +120 -0
agno/models/mistral/__init__.py +4 -0
agno/models/mistral/mistral.py +293 -393
agno/models/nebius/__init__.py +3 -0
agno/models/nebius/nebius.py +53 -0
agno/models/nexus/__init__.py +3 -0
agno/models/nexus/nexus.py +22 -0
agno/models/nvidia/__init__.py +4 -0
agno/models/nvidia/nvidia.py +22 -3
agno/models/ollama/__init__.py +4 -2
agno/models/ollama/chat.py +257 -492
agno/models/openai/__init__.py +7 -0
agno/models/openai/chat.py +725 -770
agno/models/openai/like.py +16 -2
agno/models/openai/responses.py +1121 -0
agno/models/openrouter/__init__.py +4 -0
agno/models/openrouter/openrouter.py +62 -5
agno/models/perplexity/__init__.py +5 -0
agno/models/perplexity/perplexity.py +203 -0
agno/models/portkey/__init__.py +3 -0
agno/models/portkey/portkey.py +82 -0
agno/models/requesty/__init__.py +5 -0
agno/models/requesty/requesty.py +69 -0
agno/models/response.py +177 -7
agno/models/sambanova/__init__.py +4 -0
agno/models/sambanova/sambanova.py +23 -4
agno/models/siliconflow/__init__.py +5 -0
agno/models/siliconflow/siliconflow.py +42 -0
agno/models/together/__init__.py +4 -0
agno/models/together/together.py +21 -164
agno/models/utils.py +266 -0
agno/models/vercel/__init__.py +3 -0
agno/models/vercel/v0.py +43 -0
agno/models/vertexai/__init__.py +0 -1
agno/models/vertexai/claude.py +190 -0
agno/models/vllm/__init__.py +3 -0
agno/models/vllm/vllm.py +83 -0
agno/models/xai/__init__.py +2 -0
agno/models/xai/xai.py +111 -7
agno/os/__init__.py +3 -0
agno/os/app.py +1027 -0
agno/os/auth.py +244 -0
agno/os/config.py +126 -0
agno/os/interfaces/__init__.py +1 -0
agno/os/interfaces/a2a/__init__.py +3 -0
agno/os/interfaces/a2a/a2a.py +42 -0
agno/os/interfaces/a2a/router.py +249 -0
agno/os/interfaces/a2a/utils.py +924 -0
agno/os/interfaces/agui/__init__.py +3 -0
agno/os/interfaces/agui/agui.py +47 -0
agno/os/interfaces/agui/router.py +147 -0
agno/os/interfaces/agui/utils.py +574 -0
agno/os/interfaces/base.py +25 -0
agno/os/interfaces/slack/__init__.py +3 -0
agno/os/interfaces/slack/router.py +148 -0
agno/os/interfaces/slack/security.py +30 -0
agno/os/interfaces/slack/slack.py +47 -0
agno/os/interfaces/whatsapp/__init__.py +3 -0
agno/os/interfaces/whatsapp/router.py +210 -0
agno/os/interfaces/whatsapp/security.py +55 -0
agno/os/interfaces/whatsapp/whatsapp.py +36 -0
agno/os/mcp.py +293 -0
agno/os/middleware/__init__.py +9 -0
agno/os/middleware/jwt.py +797 -0
agno/os/router.py +258 -0
agno/os/routers/__init__.py +3 -0
agno/os/routers/agents/__init__.py +3 -0
agno/os/routers/agents/router.py +599 -0
agno/os/routers/agents/schema.py +261 -0
agno/os/routers/evals/__init__.py +3 -0
agno/os/routers/evals/evals.py +450 -0
agno/os/routers/evals/schemas.py +174 -0
agno/os/routers/evals/utils.py +231 -0
agno/os/routers/health.py +31 -0
agno/os/routers/home.py +52 -0
agno/os/routers/knowledge/__init__.py +3 -0
agno/os/routers/knowledge/knowledge.py +1008 -0
agno/os/routers/knowledge/schemas.py +178 -0
agno/os/routers/memory/__init__.py +3 -0
agno/os/routers/memory/memory.py +661 -0
agno/os/routers/memory/schemas.py +88 -0
agno/os/routers/metrics/__init__.py +3 -0
agno/os/routers/metrics/metrics.py +190 -0
agno/os/routers/metrics/schemas.py +47 -0
agno/os/routers/session/__init__.py +3 -0
agno/os/routers/session/session.py +997 -0
agno/os/routers/teams/__init__.py +3 -0
agno/os/routers/teams/router.py +512 -0
agno/os/routers/teams/schema.py +257 -0
agno/os/routers/traces/__init__.py +3 -0
agno/os/routers/traces/schemas.py +414 -0
agno/os/routers/traces/traces.py +499 -0
agno/os/routers/workflows/__init__.py +3 -0
agno/os/routers/workflows/router.py +624 -0
agno/os/routers/workflows/schema.py +75 -0
agno/os/schema.py +534 -0
agno/os/scopes.py +469 -0
agno/{playground → os}/settings.py +7 -15
agno/os/utils.py +973 -0
agno/reasoning/anthropic.py +80 -0
agno/reasoning/azure_ai_foundry.py +67 -0
agno/reasoning/deepseek.py +63 -0
agno/reasoning/default.py +97 -0
agno/reasoning/gemini.py +73 -0
agno/reasoning/groq.py +71 -0
agno/reasoning/helpers.py +24 -1
agno/reasoning/ollama.py +67 -0
agno/reasoning/openai.py +86 -0
agno/reasoning/step.py +2 -1
agno/reasoning/vertexai.py +76 -0
agno/run/__init__.py +6 -0
agno/run/agent.py +822 -0
agno/run/base.py +247 -0
agno/run/cancel.py +81 -0
agno/run/requirement.py +181 -0
agno/run/team.py +767 -0
agno/run/workflow.py +708 -0
agno/session/__init__.py +10 -0
agno/session/agent.py +260 -0
agno/session/summary.py +265 -0
agno/session/team.py +342 -0
agno/session/workflow.py +501 -0
agno/table.py +10 -0
agno/team/__init__.py +37 -0
agno/team/team.py +9536 -0
agno/tools/__init__.py +7 -0
agno/tools/agentql.py +120 -0
agno/tools/airflow.py +22 -12
agno/tools/api.py +122 -0
agno/tools/apify.py +276 -83
agno/tools/{arxiv_toolkit.py → arxiv.py} +20 -12
agno/tools/aws_lambda.py +28 -7
agno/tools/aws_ses.py +66 -0
agno/tools/baidusearch.py +11 -4
agno/tools/bitbucket.py +292 -0
agno/tools/brandfetch.py +213 -0
agno/tools/bravesearch.py +106 -0
agno/tools/brightdata.py +367 -0
agno/tools/browserbase.py +209 -0
agno/tools/calcom.py +32 -23
agno/tools/calculator.py +24 -37
agno/tools/cartesia.py +187 -0
agno/tools/{clickup_tool.py → clickup.py} +17 -28
agno/tools/confluence.py +91 -26
agno/tools/crawl4ai.py +139 -43
agno/tools/csv_toolkit.py +28 -22
agno/tools/dalle.py +36 -22
agno/tools/daytona.py +475 -0
agno/tools/decorator.py +169 -14
agno/tools/desi_vocal.py +23 -11
agno/tools/discord.py +32 -29
agno/tools/docker.py +716 -0
agno/tools/duckdb.py +76 -81
agno/tools/duckduckgo.py +43 -40
agno/tools/e2b.py +703 -0
agno/tools/eleven_labs.py +65 -54
agno/tools/email.py +13 -5
agno/tools/evm.py +129 -0
agno/tools/exa.py +324 -42
agno/tools/fal.py +39 -35
agno/tools/file.py +196 -30
agno/tools/file_generation.py +356 -0
agno/tools/financial_datasets.py +288 -0
agno/tools/firecrawl.py +108 -33
agno/tools/function.py +960 -122
agno/tools/giphy.py +34 -12
agno/tools/github.py +1294 -97
agno/tools/gmail.py +922 -0
agno/tools/google_bigquery.py +117 -0
agno/tools/google_drive.py +271 -0
agno/tools/google_maps.py +253 -0
agno/tools/googlecalendar.py +607 -107
agno/tools/googlesheets.py +377 -0
agno/tools/hackernews.py +20 -12
agno/tools/jina.py +24 -14
agno/tools/jira.py +48 -19
agno/tools/knowledge.py +218 -0
agno/tools/linear.py +82 -43
agno/tools/linkup.py +58 -0
agno/tools/local_file_system.py +15 -7
agno/tools/lumalab.py +41 -26
agno/tools/mcp/__init__.py +10 -0
agno/tools/mcp/mcp.py +331 -0
agno/tools/mcp/multi_mcp.py +347 -0
agno/tools/mcp/params.py +24 -0
agno/tools/mcp_toolbox.py +284 -0
agno/tools/mem0.py +193 -0
agno/tools/memory.py +419 -0
agno/tools/mlx_transcribe.py +11 -9
agno/tools/models/azure_openai.py +190 -0
agno/tools/models/gemini.py +203 -0
agno/tools/models/groq.py +158 -0
agno/tools/models/morph.py +186 -0
agno/tools/models/nebius.py +124 -0
agno/tools/models_labs.py +163 -82
agno/tools/moviepy_video.py +18 -13
agno/tools/nano_banana.py +151 -0
agno/tools/neo4j.py +134 -0
agno/tools/newspaper.py +15 -4
agno/tools/newspaper4k.py +19 -6
agno/tools/notion.py +204 -0
agno/tools/openai.py +181 -17
agno/tools/openbb.py +27 -20
agno/tools/opencv.py +321 -0
agno/tools/openweather.py +233 -0
agno/tools/oxylabs.py +385 -0
agno/tools/pandas.py +25 -15
agno/tools/parallel.py +314 -0
agno/tools/postgres.py +238 -185
agno/tools/pubmed.py +125 -13
agno/tools/python.py +48 -35
agno/tools/reasoning.py +283 -0
agno/tools/reddit.py +207 -29
agno/tools/redshift.py +406 -0
agno/tools/replicate.py +69 -26
agno/tools/resend.py +11 -6
agno/tools/scrapegraph.py +179 -19
agno/tools/searxng.py +23 -31
agno/tools/serpapi.py +15 -10
agno/tools/serper.py +255 -0
agno/tools/shell.py +23 -12
agno/tools/shopify.py +1519 -0
agno/tools/slack.py +56 -14
agno/tools/sleep.py +8 -6
agno/tools/spider.py +35 -11
agno/tools/spotify.py +919 -0
agno/tools/sql.py +34 -19
agno/tools/tavily.py +158 -8
agno/tools/telegram.py +18 -8
agno/tools/todoist.py +218 -0
agno/tools/toolkit.py +134 -9
agno/tools/trafilatura.py +388 -0
agno/tools/trello.py +25 -28
agno/tools/twilio.py +18 -9
agno/tools/user_control_flow.py +78 -0
agno/tools/valyu.py +228 -0
agno/tools/visualization.py +467 -0
agno/tools/webbrowser.py +28 -0
agno/tools/webex.py +76 -0
agno/tools/website.py +23 -19
agno/tools/webtools.py +45 -0
agno/tools/whatsapp.py +286 -0
agno/tools/wikipedia.py +28 -19
agno/tools/workflow.py +285 -0
agno/tools/{twitter.py → x.py} +142 -46
agno/tools/yfinance.py +41 -39
agno/tools/youtube.py +34 -17
agno/tools/zendesk.py +15 -5
agno/tools/zep.py +454 -0
agno/tools/zoom.py +86 -37
agno/tracing/__init__.py +12 -0
agno/tracing/exporter.py +157 -0
agno/tracing/schemas.py +276 -0
agno/tracing/setup.py +111 -0
agno/utils/agent.py +938 -0
agno/utils/audio.py +37 -1
agno/utils/certs.py +27 -0
agno/utils/code_execution.py +11 -0
agno/utils/common.py +103 -20
agno/utils/cryptography.py +22 -0
agno/utils/dttm.py +33 -0
agno/utils/events.py +700 -0
agno/utils/functions.py +107 -37
agno/utils/gemini.py +426 -0
agno/utils/hooks.py +171 -0
agno/utils/http.py +185 -0
agno/utils/json_schema.py +159 -37
agno/utils/knowledge.py +36 -0
agno/utils/location.py +19 -0
agno/utils/log.py +221 -8
agno/utils/mcp.py +214 -0
agno/utils/media.py +335 -14
agno/utils/merge_dict.py +22 -1
agno/utils/message.py +77 -2
agno/utils/models/ai_foundry.py +50 -0
agno/utils/models/claude.py +373 -0
agno/utils/models/cohere.py +94 -0
agno/utils/models/llama.py +85 -0
agno/utils/models/mistral.py +100 -0
agno/utils/models/openai_responses.py +140 -0
agno/utils/models/schema_utils.py +153 -0
agno/utils/models/watsonx.py +41 -0
agno/utils/openai.py +257 -0
agno/utils/pickle.py +1 -1
agno/utils/pprint.py +124 -8
agno/utils/print_response/agent.py +930 -0
agno/utils/print_response/team.py +1914 -0
agno/utils/print_response/workflow.py +1668 -0
agno/utils/prompts.py +111 -0
agno/utils/reasoning.py +108 -0
agno/utils/response.py +163 -0
agno/utils/serialize.py +32 -0
agno/utils/shell.py +4 -4
agno/utils/streamlit.py +487 -0
agno/utils/string.py +204 -51
agno/utils/team.py +139 -0
agno/utils/timer.py +9 -2
agno/utils/tokens.py +657 -0
agno/utils/tools.py +19 -1
agno/utils/whatsapp.py +305 -0
agno/utils/yaml_io.py +3 -3
agno/vectordb/__init__.py +2 -0
agno/vectordb/base.py +87 -9
agno/vectordb/cassandra/__init__.py +5 -1
agno/vectordb/cassandra/cassandra.py +383 -27
agno/vectordb/chroma/__init__.py +4 -0
agno/vectordb/chroma/chromadb.py +748 -83
agno/vectordb/clickhouse/__init__.py +7 -1
agno/vectordb/clickhouse/clickhousedb.py +554 -53
agno/vectordb/couchbase/__init__.py +3 -0
agno/vectordb/couchbase/couchbase.py +1446 -0
agno/vectordb/lancedb/__init__.py +5 -0
agno/vectordb/lancedb/lance_db.py +730 -98
agno/vectordb/langchaindb/__init__.py +5 -0
agno/vectordb/langchaindb/langchaindb.py +163 -0
agno/vectordb/lightrag/__init__.py +5 -0
agno/vectordb/lightrag/lightrag.py +388 -0
agno/vectordb/llamaindex/__init__.py +3 -0
agno/vectordb/llamaindex/llamaindexdb.py +166 -0
agno/vectordb/milvus/__init__.py +3 -0
agno/vectordb/milvus/milvus.py +966 -78
agno/vectordb/mongodb/__init__.py +9 -1
agno/vectordb/mongodb/mongodb.py +1175 -172
agno/vectordb/pgvector/__init__.py +8 -0
agno/vectordb/pgvector/pgvector.py +599 -115
agno/vectordb/pineconedb/__init__.py +5 -1
agno/vectordb/pineconedb/pineconedb.py +406 -43
agno/vectordb/qdrant/__init__.py +4 -0
agno/vectordb/qdrant/qdrant.py +914 -61
agno/vectordb/redis/__init__.py +9 -0
agno/vectordb/redis/redisdb.py +682 -0
agno/vectordb/singlestore/__init__.py +8 -1
agno/vectordb/singlestore/singlestore.py +771 -0
agno/vectordb/surrealdb/__init__.py +3 -0
agno/vectordb/surrealdb/surrealdb.py +663 -0
agno/vectordb/upstashdb/__init__.py +5 -0
agno/vectordb/upstashdb/upstashdb.py +718 -0
agno/vectordb/weaviate/__init__.py +8 -0
agno/vectordb/weaviate/index.py +15 -0
agno/vectordb/weaviate/weaviate.py +1009 -0
agno/workflow/__init__.py +23 -1
agno/workflow/agent.py +299 -0
agno/workflow/condition.py +759 -0
agno/workflow/loop.py +756 -0
agno/workflow/parallel.py +853 -0
agno/workflow/router.py +723 -0
agno/workflow/step.py +1564 -0
agno/workflow/steps.py +613 -0
agno/workflow/types.py +556 -0
agno/workflow/workflow.py +4327 -514
agno-2.3.13.dist-info/METADATA +639 -0
agno-2.3.13.dist-info/RECORD +613 -0
{agno-0.1.2.dist-info → agno-2.3.13.dist-info}/WHEEL +1 -1
agno-2.3.13.dist-info/licenses/LICENSE +201 -0
agno/api/playground.py +0 -91
agno/api/schemas/playground.py +0 -22
agno/api/schemas/user.py +0 -22
agno/api/schemas/workspace.py +0 -46
agno/api/user.py +0 -160
agno/api/workspace.py +0 -151
agno/cli/auth_server.py +0 -118
agno/cli/config.py +0 -275
agno/cli/console.py +0 -88
agno/cli/credentials.py +0 -23
agno/cli/entrypoint.py +0 -571
agno/cli/operator.py +0 -355
agno/cli/settings.py +0 -85
agno/cli/ws/ws_cli.py +0 -817
agno/constants.py +0 -13
agno/document/__init__.py +0 -1
agno/document/chunking/semantic.py +0 -47
agno/document/chunking/strategy.py +0 -31
agno/document/reader/__init__.py +0 -1
agno/document/reader/arxiv_reader.py +0 -41
agno/document/reader/base.py +0 -22
agno/document/reader/csv_reader.py +0 -84
agno/document/reader/docx_reader.py +0 -46
agno/document/reader/firecrawl_reader.py +0 -99
agno/document/reader/json_reader.py +0 -43
agno/document/reader/pdf_reader.py +0 -219
agno/document/reader/s3/pdf_reader.py +0 -46
agno/document/reader/s3/text_reader.py +0 -51
agno/document/reader/text_reader.py +0 -41
agno/document/reader/website_reader.py +0 -175
agno/document/reader/youtube_reader.py +0 -50
agno/embedder/__init__.py +0 -1
agno/embedder/azure_openai.py +0 -86
agno/embedder/cohere.py +0 -72
agno/embedder/fastembed.py +0 -37
agno/embedder/google.py +0 -73
agno/embedder/huggingface.py +0 -54
agno/embedder/mistral.py +0 -80
agno/embedder/ollama.py +0 -57
agno/embedder/openai.py +0 -74
agno/embedder/sentence_transformer.py +0 -38
agno/embedder/voyageai.py +0 -64
agno/eval/perf.py +0 -201
agno/file/__init__.py +0 -1
agno/file/file.py +0 -16
agno/file/local/csv.py +0 -32
agno/file/local/txt.py +0 -19
agno/infra/app.py +0 -240
agno/infra/base.py +0 -144
agno/infra/context.py +0 -20
agno/infra/db_app.py +0 -52
agno/infra/resource.py +0 -205
agno/infra/resources.py +0 -55
agno/knowledge/agent.py +0 -230
agno/knowledge/arxiv.py +0 -22
agno/knowledge/combined.py +0 -22
agno/knowledge/csv.py +0 -28
agno/knowledge/csv_url.py +0 -19
agno/knowledge/document.py +0 -20
agno/knowledge/docx.py +0 -30
agno/knowledge/json.py +0 -28
agno/knowledge/langchain.py +0 -71
agno/knowledge/llamaindex.py +0 -66
agno/knowledge/pdf.py +0 -28
agno/knowledge/pdf_url.py +0 -26
agno/knowledge/s3/base.py +0 -60
agno/knowledge/s3/pdf.py +0 -21
agno/knowledge/s3/text.py +0 -23
agno/knowledge/text.py +0 -30
agno/knowledge/website.py +0 -88
agno/knowledge/wikipedia.py +0 -31
agno/knowledge/youtube.py +0 -22
agno/memory/agent.py +0 -392
agno/memory/classifier.py +0 -104
agno/memory/db/__init__.py +0 -1
agno/memory/db/base.py +0 -42
agno/memory/db/mongodb.py +0 -189
agno/memory/db/postgres.py +0 -203
agno/memory/db/sqlite.py +0 -193
agno/memory/memory.py +0 -15
agno/memory/row.py +0 -36
agno/memory/summarizer.py +0 -192
agno/memory/summary.py +0 -19
agno/memory/workflow.py +0 -38
agno/models/google/gemini_openai.py +0 -26
agno/models/ollama/hermes.py +0 -221
agno/models/ollama/tools.py +0 -362
agno/models/vertexai/gemini.py +0 -595
agno/playground/__init__.py +0 -3
agno/playground/async_router.py +0 -421
agno/playground/deploy.py +0 -249
agno/playground/operator.py +0 -92
agno/playground/playground.py +0 -91
agno/playground/schemas.py +0 -76
agno/playground/serve.py +0 -55
agno/playground/sync_router.py +0 -405
agno/reasoning/agent.py +0 -68
agno/run/response.py +0 -112
agno/storage/agent/__init__.py +0 -0
agno/storage/agent/base.py +0 -38
agno/storage/agent/dynamodb.py +0 -350
agno/storage/agent/json.py +0 -92
agno/storage/agent/mongodb.py +0 -228
agno/storage/agent/postgres.py +0 -367
agno/storage/agent/session.py +0 -79
agno/storage/agent/singlestore.py +0 -303
agno/storage/agent/sqlite.py +0 -357
agno/storage/agent/yaml.py +0 -93
agno/storage/workflow/__init__.py +0 -0
agno/storage/workflow/base.py +0 -40
agno/storage/workflow/mongodb.py +0 -233
agno/storage/workflow/postgres.py +0 -366
agno/storage/workflow/session.py +0 -60
agno/storage/workflow/sqlite.py +0 -359
agno/tools/googlesearch.py +0 -88
agno/utils/defaults.py +0 -57
agno/utils/filesystem.py +0 -39
agno/utils/git.py +0 -52
agno/utils/json_io.py +0 -30
agno/utils/load_env.py +0 -19
agno/utils/py_io.py +0 -19
agno/utils/pyproject.py +0 -18
agno/utils/resource_filter.py +0 -31
agno/vectordb/singlestore/s2vectordb.py +0 -390
agno/vectordb/singlestore/s2vectordb2.py +0 -355
agno/workspace/__init__.py +0 -0
agno/workspace/config.py +0 -325
agno/workspace/enums.py +0 -6
agno/workspace/helpers.py +0 -48
agno/workspace/operator.py +0 -758
agno/workspace/settings.py +0 -63
agno-0.1.2.dist-info/LICENSE +0 -375
agno-0.1.2.dist-info/METADATA +0 -502
agno-0.1.2.dist-info/RECORD +0 -352
agno-0.1.2.dist-info/entry_points.txt +0 -3
/agno/{cli → db/migrations}/__init__.py +0 -0
/agno/{cli/ws → db/migrations/versions}/__init__.py +0 -0
/agno/{document/chunking/__init__.py → db/schemas/metrics.py} +0 -0
/agno/{document/reader/s3 → integrations}/__init__.py +0 -0
/agno/{file/local → knowledge/chunking}/__init__.py +0 -0
/agno/{infra → knowledge/remote_content}/__init__.py +0 -0
/agno/{knowledge/s3 → tools/models}/__init__.py +0 -0
/agno/{reranker → utils/models}/__init__.py +0 -0
/agno/{storage → utils/print_response}/__init__.py +0 -0
{agno-0.1.2.dist-info → agno-2.3.13.dist-info}/top_level.txt +0 -0

agno/vectordb/chroma/chromadb.py CHANGED Viewed

@@ -1,21 +1,23 @@
+import asyncio
+import json
 from hashlib import md5
-from typing import Any, Dict, List, Optional
+from typing import Any, Dict, List, Mapping, Optional, Union, cast
 try:
     from chromadb import Client as ChromaDbClient
     from chromadb import PersistentClient as PersistentChromaDbClient
     from chromadb.api.client import ClientAPI
     from chromadb.api.models.Collection import Collection
-    from chromadb.api.types import GetResult, IncludeEnum, QueryResult
+    from chromadb.api.types import QueryResult
 except ImportError:
     raise ImportError("The `chromadb` package is not installed. Please install it via `pip install chromadb`.")
-from agno.document import Document
-from agno.embedder import Embedder
-from agno.embedder.openai import OpenAIEmbedder
-from agno.reranker.base import Reranker
-from agno.utils.log import logger
+from agno.filters import FilterExpr
+from agno.knowledge.document import Document
+from agno.knowledge.embedder import Embedder
+from agno.knowledge.reranker.base import Reranker
+from agno.utils.log import log_debug, log_error, log_info, log_warning, logger
 from agno.vectordb.base import VectorDb
 from agno.vectordb.distance import Distance
@@ -24,19 +26,39 @@ class ChromaDb(VectorDb):
     def __init__(
         self,
         collection: str,
-        embedder: Embedder = OpenAIEmbedder(),
+        name: Optional[str] = None,
+        description: Optional[str] = None,
+        id: Optional[str] = None,
+        embedder: Optional[Embedder] = None,
         distance: Distance = Distance.cosine,
         path: str = "tmp/chromadb",
         persistent_client: bool = False,
         reranker: Optional[Reranker] = None,
         **kwargs,
     ):
-        # Collection attributes
-        self.collection: str = collection
+        # Validate required parameters
+        if not collection:
+            raise ValueError("Collection name must be provided.")
+        # Dynamic ID generation based on unique identifiers
+        if id is None:
+            from agno.utils.string import generate_id
+            seed = f"{path}#{collection}"
+            id = generate_id(seed)
+        # Initialize base class with name, description, and generated ID
+        super().__init__(id=id, name=name, description=description)
+        # Collection attributes
+        self.collection_name: str = collection
         # Embedder for embedding the document contents
-        self.embedder: Embedder = embedder
+        if embedder is None:
+            from agno.knowledge.embedder.openai import OpenAIEmbedder
+            embedder = OpenAIEmbedder()
+            log_info("Embedder not provided, using OpenAIEmbedder as default.")
+        self.embedder: Embedder = embedder
         # Distance metric
         self.distance: Distance = distance
@@ -56,16 +78,54 @@ class ChromaDb(VectorDb):
         # Chroma client kwargs
         self.kwargs = kwargs
+    def _flatten_metadata(self, metadata: Dict[str, Any]) -> Dict[str, Union[str, int, float, bool]]:
+        """
+        Flatten nested metadata to ChromaDB-compatible format.
+        Args:
+            metadata: Dictionary that may contain nested structures
+        Returns:
+            Flattened dictionary with only primitive values
+        """
+        flattened: Dict[str, Any] = {}
+        def _flatten_recursive(obj: Any, prefix: str = "") -> None:
+            if isinstance(obj, dict):
+                if len(obj) == 0:
+                    # Handle empty dictionaries by converting to JSON string
+                    flattened[prefix] = json.dumps(obj)
+                else:
+                    for key, value in obj.items():
+                        new_key = f"{prefix}.{key}" if prefix else key
+                        _flatten_recursive(value, new_key)
+            elif isinstance(obj, (list, tuple)):
+                # Convert lists/tuples to JSON strings
+                flattened[prefix] = json.dumps(obj)
+            elif isinstance(obj, (str, int, float, bool)) or obj is None:
+                if obj is not None:  # ChromaDB doesn't accept None values
+                    flattened[prefix] = obj
+            else:
+                # Convert other complex types to JSON strings
+                try:
+                    flattened[prefix] = json.dumps(obj)
+                except (TypeError, ValueError):
+                    # If it can't be serialized, convert to string
+                    flattened[prefix] = str(obj)
+        _flatten_recursive(metadata)
+        return flattened
     @property
     def client(self) -> ClientAPI:
         if self._client is None:
             if not self.persistent_client:
-                logger.debug("Creating Chroma Client")
+                log_debug("Creating Chroma Client")
                 self._client = ChromaDbClient(
                     **self.kwargs,
                 )
             elif self.persistent_client:
-                logger.debug("Creating Persistent Chroma Client")
+                log_debug("Creating Persistent Chroma Client")
                 self._client = PersistentChromaDbClient(
                     path=self.path,
                     **self.kwargs,
@@ -74,32 +134,18 @@ class ChromaDb(VectorDb):
     def create(self) -> None:
         """Create the collection in ChromaDb."""
-        if not self.exists():
-            logger.debug(f"Creating collection: {self.collection}")
+        if self.exists():
+            log_debug(f"Collection already exists: {self.collection_name}")
+            self._collection = self.client.get_collection(name=self.collection_name)
+        else:
+            log_debug(f"Creating collection: {self.collection_name}")
             self._collection = self.client.create_collection(
-                name=self.collection, metadata={"hnsw:space": self.distance.value}
+                name=self.collection_name, metadata={"hnsw:space": self.distance.value}
             )
-        else:
-            logger.debug(f"Collection already exists: {self.collection}")
-            self._collection = self.client.get_collection(name=self.collection)
-    def doc_exists(self, document: Document) -> bool:
-        """Check if a document exists in the collection.
-        Args:
-            document (Document): Document to check.
-        Returns:
-            bool: True if document exists, False otherwise.
-        """
-        if self.client:
-            try:
-                collection: Collection = self.client.get_collection(name=self.collection)
-                collection_data: GetResult = collection.get(include=[IncludeEnum.documents])
-                if collection_data.get("documents") != []:
-                    return True
-            except Exception as e:
-                logger.error(f"Document does not exist: {e}")
-        return False
+    async def async_create(self) -> None:
+        """Create the collection asynchronously by running in a thread."""
+        await asyncio.to_thread(self.create)
     def name_exists(self, name: str) -> bool:
         """Check if a document with a given name exists in the collection.
@@ -107,121 +153,432 @@ class ChromaDb(VectorDb):
             name (str): Name of the document to check.
         Returns:
             bool: True if document exists, False otherwise."""
-        if self.client:
-            try:
-                collections: Collection = self.client.get_collection(name=self.collection)
-                for collection in collections:  # type: ignore
-                    if name in collection:
-                        return True
-            except Exception as e:
-                logger.error(f"Document with given name does not exist: {e}")
+        if not self.client:
+            logger.warning("Client not initialized")
+            return False
+        try:
+            collection: Collection = self.client.get_collection(name=self.collection_name)
+            result = collection.get(where=cast(Any, {"name": {"$eq": name}}), limit=1)
+            return len(result.get("ids", [])) > 0
+        except Exception as e:
+            logger.error(f"Error checking name existence: {e}")
         return False
-    def insert(self, documents: List[Document], filters: Optional[Dict[str, Any]] = None) -> None:
+    async def async_name_exists(self, name: str) -> bool:
+        """Check if a document with given name exists asynchronously."""
+        return await asyncio.to_thread(self.name_exists, name)
+    def insert(self, content_hash: str, documents: List[Document], filters: Optional[Dict[str, Any]] = None) -> None:
         """Insert documents into the collection.
         Args:
             documents (List[Document]): List of documents to insert
-            filters (Optional[Dict[str, Any]]): Filters to apply while inserting documents
+            filters (Optional[Dict[str, Any]]): Filters to merge with document metadata
         """
-        logger.debug(f"Inserting {len(documents)} documents")
+        log_info(f"Inserting {len(documents)} documents")
         ids: List = []
         docs: List = []
         docs_embeddings: List = []
+        docs_metadata: List = []
+        if not self._collection:
+            self._collection = self.client.get_collection(name=self.collection_name)
         for document in documents:
             document.embed(embedder=self.embedder)
             cleaned_content = document.content.replace("\x00", "\ufffd")
             doc_id = md5(cleaned_content.encode()).hexdigest()
+            # Handle metadata and filters
+            metadata = document.meta_data or {}
+            if filters:
+                metadata.update(filters)
+            # Add name, content_id to metadata
+            if document.name is not None:
+                metadata["name"] = document.name
+            if document.content_id is not None:
+                metadata["content_id"] = document.content_id
+            metadata["content_hash"] = content_hash
+            # Flatten metadata for ChromaDB compatibility
+            flattened_metadata = self._flatten_metadata(metadata)
+            docs_embeddings.append(document.embedding)
+            docs.append(cleaned_content)
+            ids.append(doc_id)
+            docs_metadata.append(flattened_metadata)
+            log_debug(f"Prepared document: {document.id} | {document.name} | {flattened_metadata}")
+        if self._collection is None:
+            logger.warning("Collection does not exist")
+        else:
+            if len(docs) > 0:
+                self._collection.add(ids=ids, embeddings=docs_embeddings, documents=docs, metadatas=docs_metadata)
+                log_debug(f"Committed {len(docs)} documents")
+    async def async_insert(
+        self, content_hash: str, documents: List[Document], filters: Optional[Dict[str, Any]] = None
+    ) -> None:
+        """Insert documents asynchronously by running in a thread."""
+        log_info(f"Async Inserting {len(documents)} documents")
+        ids: List = []
+        docs: List = []
+        docs_embeddings: List = []
+        docs_metadata: List = []
+        if not self._collection:
+            self._collection = self.client.get_collection(name=self.collection_name)
+        if self.embedder.enable_batch and hasattr(self.embedder, "async_get_embeddings_batch_and_usage"):
+            # Use batch embedding when enabled and supported
+            try:
+                # Extract content from all documents
+                doc_contents = [doc.content for doc in documents]
+                # Get batch embeddings and usage
+                embeddings, usages = await self.embedder.async_get_embeddings_batch_and_usage(doc_contents)
+                # Process documents with pre-computed embeddings
+                for j, doc in enumerate(documents):
+                    try:
+                        if j < len(embeddings):
+                            doc.embedding = embeddings[j]
+                            doc.usage = usages[j] if j < len(usages) else None
+                    except Exception as e:
+                        logger.error(f"Error assigning batch embedding to document '{doc.name}': {e}")
+            except Exception as e:
+                # Check if this is a rate limit error - don't fall back as it would make things worse
+                error_str = str(e).lower()
+                is_rate_limit = any(
+                    phrase in error_str
+                    for phrase in ["rate limit", "too many requests", "429", "trial key", "api calls / minute"]
+                )
+                if is_rate_limit:
+                    logger.error(f"Rate limit detected during batch embedding. {e}")
+                    raise e
+                else:
+                    logger.warning(f"Async batch embedding failed, falling back to individual embeddings: {e}")
+                    # Fall back to individual embedding
+                    embed_tasks = [doc.async_embed(embedder=self.embedder) for doc in documents]
+                    await asyncio.gather(*embed_tasks, return_exceptions=True)
+        else:
+            # Use individual embedding
+            try:
+                embed_tasks = [document.async_embed(embedder=self.embedder) for document in documents]
+                await asyncio.gather(*embed_tasks, return_exceptions=True)
+            except Exception as e:
+                log_error(f"Error processing document: {e}")
+        for document in documents:
+            cleaned_content = document.content.replace("\x00", "\ufffd")
+            # Include content_hash in ID to ensure uniqueness across different content hashes
+            base_id = document.id or md5(cleaned_content.encode()).hexdigest()
+            doc_id = md5(f"{base_id}_{content_hash}".encode()).hexdigest()
+            # Handle metadata and filters
+            metadata = document.meta_data or {}
+            if filters:
+                metadata.update(filters)
+            # Add name, content_id to metadata
+            if document.name is not None:
+                metadata["name"] = document.name
+            if document.content_id is not None:
+                metadata["content_id"] = document.content_id
+            metadata["content_hash"] = content_hash
+            # Flatten metadata for ChromaDB compatibility
+            flattened_metadata = self._flatten_metadata(metadata)
             docs_embeddings.append(document.embedding)
             docs.append(cleaned_content)
             ids.append(doc_id)
-            logger.debug(f"Inserted document: {document.id} | {document.name} | {document.meta_data}")
+            docs_metadata.append(flattened_metadata)
+            log_debug(f"Prepared document: {document.id} | {document.name} | {flattened_metadata}")
-        if len(docs) > 0 and self._collection is not None:
-            self._collection.add(ids=ids, embeddings=docs_embeddings, documents=docs)
-            logger.debug(f"Committed {len(docs)} documents")
+        if self._collection is None:
+            logger.warning("Collection does not exist")
         else:
-            logger.error("Collection does not exist")
+            if len(docs) > 0:
+                self._collection.add(ids=ids, embeddings=docs_embeddings, documents=docs, metadatas=docs_metadata)
+                log_debug(f"Committed {len(docs)} documents")
     def upsert_available(self) -> bool:
         """Check if upsert is available in ChromaDB."""
         return True
-    def upsert(self, documents: List[Document], filters: Optional[Dict[str, Any]] = None) -> None:
+    def upsert(self, content_hash: str, documents: List[Document], filters: Optional[Dict[str, Any]] = None) -> None:
         """Upsert documents into the collection.
         Args:
             documents (List[Document]): List of documents to upsert
             filters (Optional[Dict[str, Any]]): Filters to apply while upserting
         """
-        logger.debug(f"Upserting {len(documents)} documents")
+        try:
+            if self.content_hash_exists(content_hash):
+                self._delete_by_content_hash(content_hash)
+            self._upsert(content_hash, documents, filters)
+        except Exception as e:
+            logger.error(f"Error upserting documents by content hash: {e}")
+            raise
+    def _upsert(self, content_hash: str, documents: List[Document], filters: Optional[Dict[str, Any]] = None) -> None:
+        """Upsert documents into the collection.
+        Args:
+            documents (List[Document]): List of documents to upsert
+            filters (Optional[Dict[str, Any]]): Filters to apply while upserting
+        """
+        log_info(f"Upserting {len(documents)} documents")
         ids: List = []
         docs: List = []
         docs_embeddings: List = []
+        docs_metadata: List = []
+        if not self._collection:
+            self._collection = self.client.get_collection(name=self.collection_name)
         for document in documents:
             document.embed(embedder=self.embedder)
             cleaned_content = document.content.replace("\x00", "\ufffd")
             doc_id = md5(cleaned_content.encode()).hexdigest()
+            # Handle metadata and filters
+            metadata = document.meta_data or {}
+            if filters:
+                metadata.update(filters)
+            # Add name, content_id to metadata
+            if document.name is not None:
+                metadata["name"] = document.name
+            if document.content_id is not None:
+                metadata["content_id"] = document.content_id
+            metadata["content_hash"] = content_hash
+            # Flatten metadata for ChromaDB compatibility
+            flattened_metadata = self._flatten_metadata(metadata)
             docs_embeddings.append(document.embedding)
             docs.append(cleaned_content)
             ids.append(doc_id)
-            logger.debug(f"Upserted document: {document.id} | {document.name} | {document.meta_data}")
+            docs_metadata.append(flattened_metadata)
+            log_debug(f"Upserted document: {document.id} | {document.name} | {flattened_metadata}")
+        if self._collection is None:
+            logger.warning("Collection does not exist")
+        else:
+            if len(docs) > 0:
+                self._collection.upsert(ids=ids, embeddings=docs_embeddings, documents=docs, metadatas=docs_metadata)
+                log_debug(f"Committed {len(docs)} documents")
+    async def _async_upsert(
+        self, content_hash: str, documents: List[Document], filters: Optional[Dict[str, Any]] = None
+    ) -> None:
+        """Upsert documents into the collection.
+        Args:
+            documents (List[Document]): List of documents to upsert
+            filters (Optional[Dict[str, Any]]): Filters to apply while upserting
+        """
+        log_info(f"Async Upserting {len(documents)} documents")
+        ids: List = []
+        docs: List = []
+        docs_embeddings: List = []
+        docs_metadata: List = []
+        if not self._collection:
+            self._collection = self.client.get_collection(name=self.collection_name)
+        if self.embedder.enable_batch and hasattr(self.embedder, "async_get_embeddings_batch_and_usage"):
+            # Use batch embedding when enabled and supported
+            try:
+                # Extract content from all documents
+                doc_contents = [doc.content for doc in documents]
+                # Get batch embeddings and usage
+                embeddings, usages = await self.embedder.async_get_embeddings_batch_and_usage(doc_contents)
-        if len(docs) > 0 and self._collection is not None:
-            self._collection.upsert(ids=ids, embeddings=docs_embeddings, documents=docs)
-            logger.debug(f"Committed {len(docs)} documents")
+                # Process documents with pre-computed embeddings
+                for j, doc in enumerate(documents):
+                    try:
+                        if j < len(embeddings):
+                            doc.embedding = embeddings[j]
+                            doc.usage = usages[j] if j < len(usages) else None
+                    except Exception as e:
+                        logger.error(f"Error assigning batch embedding to document '{doc.name}': {e}")
+            except Exception as e:
+                # Check if this is a rate limit error - don't fall back as it would make things worse
+                error_str = str(e).lower()
+                is_rate_limit = any(
+                    phrase in error_str
+                    for phrase in ["rate limit", "too many requests", "429", "trial key", "api calls / minute"]
+                )
+                if is_rate_limit:
+                    logger.error(f"Rate limit detected during batch embedding. {e}")
+                    raise e
+                else:
+                    logger.warning(f"Async batch embedding failed, falling back to individual embeddings: {e}")
+                    # Fall back to individual embedding
+                    embed_tasks = [doc.async_embed(embedder=self.embedder) for doc in documents]
+                    await asyncio.gather(*embed_tasks, return_exceptions=True)
         else:
-            logger.error("Collection does not exist")
+            # Use individual embedding
+            embed_tasks = [document.async_embed(embedder=self.embedder) for document in documents]
+            await asyncio.gather(*embed_tasks, return_exceptions=True)
+        for document in documents:
+            cleaned_content = document.content.replace("\x00", "\ufffd")
+            # Include content_hash in ID to ensure uniqueness across different content hashes
+            base_id = document.id or md5(cleaned_content.encode()).hexdigest()
+            doc_id = md5(f"{base_id}_{content_hash}".encode()).hexdigest()
-    def search(self, query: str, limit: int = 5, filters: Optional[Dict[str, Any]] = None) -> List[Document]:
+            # Handle metadata and filters
+            metadata = document.meta_data or {}
+            if filters:
+                metadata.update(filters)
+            # Add name, content_id to metadata
+            if document.name is not None:
+                metadata["name"] = document.name
+            if document.content_id is not None:
+                metadata["content_id"] = document.content_id
+            metadata["content_hash"] = content_hash
+            # Flatten metadata for ChromaDB compatibility
+            flattened_metadata = self._flatten_metadata(metadata)
+            docs_embeddings.append(document.embedding)
+            docs.append(cleaned_content)
+            ids.append(doc_id)
+            docs_metadata.append(flattened_metadata)
+            log_debug(f"Upserted document: {document.id} | {document.name} | {flattened_metadata}")
+        if self._collection is None:
+            logger.warning("Collection does not exist")
+        else:
+            if len(docs) > 0:
+                self._collection.upsert(ids=ids, embeddings=docs_embeddings, documents=docs, metadatas=docs_metadata)
+                log_debug(f"Committed {len(docs)} documents")
+    async def async_upsert(
+        self, content_hash: str, documents: List[Document], filters: Optional[Dict[str, Any]] = None
+    ) -> None:
+        """Upsert documents asynchronously by running in a thread."""
+        try:
+            if self.content_hash_exists(content_hash):
+                self._delete_by_content_hash(content_hash)
+            await self._async_upsert(content_hash, documents, filters)
+        except Exception as e:
+            logger.error(f"Error upserting documents by content hash: {e}")
+            raise
+    def search(
+        self, query: str, limit: int = 5, filters: Optional[Union[Dict[str, Any], List[FilterExpr]]] = None
+    ) -> List[Document]:
         """Search the collection for a query.
         Args:
             query (str): Query to search for.
             limit (int): Number of results to return.
-            filters (Optional[Dict[str, Any]]): Filters to apply while searching.
+            filters (Optional[Union[Dict[str, Any], List[FilterExpr]]]): Filters to apply while searching.
+                Supports ChromaDB's filtering operators:
+                - $eq, $ne: Equality/Inequality
+                - $gt, $gte, $lt, $lte: Numeric comparisons
+                - $in, $nin: List inclusion/exclusion
+                - $and, $or: Logical operators
         Returns:
             List[Document]: List of search results.
         """
+        if isinstance(filters, list):
+            log_warning("Filter Expressions are not yet supported in ChromaDB. No filters will be applied.")
+            filters = None
         query_embedding = self.embedder.get_embedding(query)
         if query_embedding is None:
             logger.error(f"Error getting embedding for Query: {query}")
             return []
         if not self._collection:
-            self._collection = self.client.get_collection(name=self.collection)
+            self._collection = self.client.get_collection(name=self.collection_name)
+        # Convert simple filters to ChromaDB's format if needed
+        where_filter = self._convert_filters(filters) if filters else None
         result: QueryResult = self._collection.query(
             query_embeddings=query_embedding,
             n_results=limit,
+            where=where_filter,  # Add where filter
+            include=["metadatas", "documents", "embeddings", "distances", "uris"],
         )
         # Build search results
         search_results: List[Document] = []
-        ids = result.get("ids", [[]])[0]
-        metadata = result.get("metadatas", [[]])[0]  # type: ignore
-        documents = result.get("documents", [[]])[0]  # type: ignore
-        embeddings = result.get("embeddings")
-        distances = result.get("distances", [[]])[0]  # type: ignore
-        uris = result.get("uris")
-        data = result.get("data")
-        metadata["distances"] = distances  # type: ignore
-        metadata["uris"] = uris  # type: ignore
-        metadata["data"] = data  # type: ignore
+        ids_list = result.get("ids", [[]])  # type: ignore
+        metadata_list = result.get("metadatas", [[{}]])  # type: ignore
+        documents_list = result.get("documents", [[]])  # type: ignore
+        embeddings_list = result.get("embeddings")  # type: ignore
+        distances_list = result.get("distances", [[]])  # type: ignore
+        if not ids_list or not metadata_list or not documents_list or embeddings_list is None or not distances_list:
+            return search_results
+        ids = ids_list[0]
+        metadata = [dict(m) if m else {} for m in metadata_list[0]]  # Convert to mutable dicts
+        documents = documents_list[0]
+        embeddings_raw = embeddings_list[0] if embeddings_list else []
+        embeddings = []
+        for e in embeddings_raw:
+            if hasattr(e, "tolist") and callable(getattr(e, "tolist", None)):
+                try:
+                    embeddings.append(list(cast(Any, e).tolist()))
+                except (AttributeError, TypeError):
+                    embeddings.append(list(e) if isinstance(e, (list, tuple)) else [])
+            elif isinstance(e, (list, tuple)):
+                embeddings.append([float(x) for x in e if isinstance(x, (int, float))])
+            elif isinstance(e, (int, float)):
+                embeddings.append([float(e)])
+            else:
+                embeddings.append([])
+        distances = distances_list[0]
+        for idx, distance in enumerate(distances):
+            if idx < len(metadata):
+                metadata[idx]["distances"] = distance
         try:
-            # Use zip to iterate over multiple lists simultaneously
-            for id_, distance, metadata, document in zip(ids, distances, metadata, documents):
+            for idx, (id_, doc_metadata, document) in enumerate(zip(ids, metadata, documents)):
+                # Extract the fields we added to metadata
+                name_val = doc_metadata.pop("name", None)
+                content_id_val = doc_metadata.pop("content_id", None)
+                # Convert types to match Document constructor expectations
+                name = str(name_val) if name_val is not None and not isinstance(name_val, str) else name_val
+                content_id = (
+                    str(content_id_val)
+                    if content_id_val is not None and not isinstance(content_id_val, str)
+                    else content_id_val
+                )
+                content = str(document) if document is not None else ""
+                embedding = embeddings[idx] if idx < len(embeddings) else None
                 search_results.append(
                     Document(
                         id=id_,
-                        meta_data=metadata,
-                        content=document,
-                        embedding=embeddings,  # type: ignore
+                        name=name,
+                        meta_data=doc_metadata,
+                        content=content,
+                        embedding=embedding,
+                        content_id=content_id,
                     )
                 )
         except Exception as e:
@@ -230,28 +587,68 @@ class ChromaDb(VectorDb):
         if self.reranker:
             search_results = self.reranker.rerank(query=query, documents=search_results)
+        log_info(f"Found {len(search_results)} documents")
         return search_results
+    def _convert_filters(self, filters: Dict[str, Any]) -> Dict[str, Any]:
+        """Convert simple filters to ChromaDB's filter format.
+        Handles conversion of simple key-value filters to ChromaDB's operator format
+        when needed.
+        """
+        if not filters:
+            return {}
+        # If filters already use ChromaDB operators ($eq, $ne, etc.), return as is
+        if any(key.startswith("$") for key in filters.keys()):
+            return filters
+        # Convert simple key-value pairs to ChromaDB's format
+        converted = {}
+        for key, value in filters.items():
+            if isinstance(value, (list, tuple)):
+                # Convert lists to $in operator
+                converted[key] = {"$in": list(value)}
+            else:
+                # Convert simple equality to $eq
+                converted[key] = {"$eq": value}
+        return converted
+    async def async_search(
+        self, query: str, limit: int = 5, filters: Optional[Union[Dict[str, Any], List[FilterExpr]]] = None
+    ) -> List[Document]:
+        """Search asynchronously by running in a thread."""
+        return await asyncio.to_thread(self.search, query, limit, filters)
     def drop(self) -> None:
         """Delete the collection."""
         if self.exists():
-            logger.debug(f"Deleting collection: {self.collection}")
-            self.client.delete_collection(name=self.collection)
+            log_debug(f"Deleting collection: {self.collection_name}")
+            self.client.delete_collection(name=self.collection_name)
+    async def async_drop(self) -> None:
+        """Drop the collection asynchronously by running in a thread."""
+        await asyncio.to_thread(self.drop)
     def exists(self) -> bool:
         """Check if the collection exists."""
         try:
-            self.client.get_collection(name=self.collection)
+            self.client.get_collection(name=self.collection_name)
             return True
         except Exception as e:
-            logger.debug(f"Collection does not exist: {e}")
+            log_debug(f"Collection does not exist: {e}")
         return False
+    async def async_exists(self) -> bool:
+        """Check if collection exists asynchronously by running in a thread."""
+        return await asyncio.to_thread(self.exists)
     def get_count(self) -> int:
         """Get the count of documents in the collection."""
         if self.exists():
             try:
-                collection: Collection = self.client.get_collection(name=self.collection)
+                collection: Collection = self.client.get_collection(name=self.collection_name)
                 return collection.count()
             except Exception as e:
                 logger.error(f"Error getting count: {e}")
@@ -262,8 +659,276 @@ class ChromaDb(VectorDb):
     def delete(self) -> bool:
         try:
-            self.client.delete_collection(name=self.collection)
+            self.client.delete_collection(name=self.collection_name)
             return True
         except Exception as e:
             logger.error(f"Error clearing collection: {e}")
             return False
+    def delete_by_id(self, id: str) -> bool:
+        """Delete document by ID."""
+        if not self.client:
+            logger.error("Client not initialized")
+            return False
+        try:
+            collection: Collection = self.client.get_collection(name=self.collection_name)
+            # Check if document exists
+            if not self.id_exists(id):
+                log_info(f"Document with ID '{id}' not found")
+                return False
+            # Delete the document
+            collection.delete(ids=[id])
+            log_info(f"Deleted document with ID '{id}'")
+            return True
+        except Exception as e:
+            logger.error(f"Error deleting document by ID '{id}': {e}")
+            return False
+    def delete_by_name(self, name: str) -> bool:
+        """Delete documents by name."""
+        if not self.client:
+            logger.error("Client not initialized")
+            return False
+        try:
+            collection: Collection = self.client.get_collection(name=self.collection_name)
+            # Find all documents with the given name
+            result = collection.get(where=cast(Any, {"name": {"$eq": name}}))
+            ids_to_delete = result.get("ids", [])
+            if not ids_to_delete:
+                log_info(f"No documents found with name '{name}'")
+                return False
+            # Delete all matching documents
+            collection.delete(ids=ids_to_delete)
+            log_info(f"Deleted {len(ids_to_delete)} documents with name '{name}'")
+            return True
+        except Exception as e:
+            logger.error(f"Error deleting documents by name '{name}': {e}")
+            return False
+    def delete_by_metadata(self, metadata: Dict[str, Any]) -> bool:
+        """Delete documents by metadata."""
+        if not self.client:
+            logger.error("Client not initialized")
+            return False
+        try:
+            collection: Collection = self.client.get_collection(name=self.collection_name)
+            # Build where clause for metadata filtering
+            where_clause = {}
+            for key, value in metadata.items():
+                where_clause[key] = {"$eq": value}
+            # Find all documents with the matching metadata
+            result = collection.get(where=cast(Any, where_clause))
+            ids_to_delete = result.get("ids", [])
+            if not ids_to_delete:
+                log_info(f"No documents found with metadata '{metadata}'")
+                return False
+            # Delete all matching documents
+            collection.delete(ids=ids_to_delete)
+            log_info(f"Deleted {len(ids_to_delete)} documents with metadata '{metadata}'")
+            return True
+        except Exception as e:
+            logger.error(f"Error deleting documents by metadata '{metadata}': {e}")
+            return False
+    def delete_by_content_id(self, content_id: str) -> bool:
+        """Delete documents by content ID."""
+        if not self.client:
+            logger.error("Client not initialized")
+            return False
+        try:
+            collection: Collection = self.client.get_collection(name=self.collection_name)
+            # Find all documents with the given content_id
+            result = collection.get(where=cast(Any, {"content_id": {"$eq": content_id}}))
+            ids_to_delete = result.get("ids", [])
+            if not ids_to_delete:
+                log_info(f"No documents found with content_id '{content_id}'")
+                return False
+            # Delete all matching documents
+            collection.delete(ids=ids_to_delete)
+            log_info(f"Deleted {len(ids_to_delete)} documents with content_id '{content_id}'")
+            return True
+        except Exception as e:
+            logger.error(f"Error deleting documents by content_id '{content_id}': {e}")
+            return False
+    def _delete_by_content_hash(self, content_hash: str) -> bool:
+        """Delete documents by content hash."""
+        if not self.client:
+            logger.error("Client not initialized")
+            return False
+        try:
+            collection: Collection = self.client.get_collection(name=self.collection_name)
+            # Find all documents with the given content_hash
+            result = collection.get(where=cast(Any, {"content_hash": {"$eq": content_hash}}))
+            ids_to_delete = result.get("ids", [])
+            if not ids_to_delete:
+                log_info(f"No documents found with content_hash '{content_hash}'")
+                return False
+            # Delete all matching documents
+            collection.delete(ids=ids_to_delete)
+            log_info(f"Deleted {len(ids_to_delete)} documents with content_hash '{content_hash}'")
+            return True
+        except Exception as e:
+            logger.error(f"Error deleting documents by content_hash '{content_hash}': {e}")
+            return False
+    def id_exists(self, id: str) -> bool:
+        """Check if a document with the given ID exists in the collection.
+        Args:
+            id (str): The document ID to check.
+        Returns:
+            bool: True if the document exists, False otherwise.
+        """
+        if not self.client:
+            logger.error("Client not initialized")
+            return False
+        try:
+            collection: Collection = self.client.get_collection(name=self.collection_name)
+            # Try to get the document by ID
+            result = collection.get(ids=[id])
+            found_ids = result.get("ids", [])
+            # Return True if the document was found
+            return len(found_ids) > 0
+        except Exception as e:
+            logger.error(f"Error checking if ID '{id}' exists: {e}")
+            return False
+    def content_hash_exists(self, content_hash: str) -> bool:
+        """Check if documents with the given content hash exist."""
+        if not self.client:
+            logger.error("Client not initialized")
+            return False
+        try:
+            collection: Collection = self.client.get_collection(name=self.collection_name)
+            # Try to query for documents with the given content_hash
+            try:
+                result = collection.get(where=cast(Any, {"content_hash": {"$eq": content_hash}}))
+                # Safely extract ids from result
+                if hasattr(result, "get") and callable(result.get):
+                    found_ids = result.get("ids", [])
+                elif hasattr(result, "__getitem__") and "ids" in result:
+                    found_ids = result["ids"]
+                else:
+                    found_ids = []
+                # Return True if any documents were found
+                if isinstance(found_ids, (list, tuple)):
+                    return len(found_ids) > 0
+                elif isinstance(found_ids, int):
+                    # Some ChromaDB versions might return a count instead of a list
+                    return found_ids > 0
+                else:
+                    return False
+            except TypeError as te:
+                if "object of type 'int' has no len()" in str(te):
+                    # Known issue with ChromaDB 0.5.0 - internal bug
+                    # As a workaround, assume content doesn't exist to allow processing to continue
+                    logger.warning(
+                        f"ChromaDB internal error (version 0.5.0 bug): {te}. Assuming content_hash '{content_hash}' does not exist."
+                    )
+                    return False
+                else:
+                    raise te
+        except Exception as e:
+            logger.error(f"Error checking if content_hash '{content_hash}' exists: {e}")
+            return False
+    def update_metadata(self, content_id: str, metadata: Dict[str, Any]) -> None:
+        """
+        Update the metadata for documents with the given content_id.
+        Args:
+            content_id (str): The content ID to update
+            metadata (Dict[str, Any]): The metadata to update
+        """
+        try:
+            if not self.client:
+                logger.error("Client not initialized")
+                return
+            collection: Collection = self.client.get_collection(name=self.collection_name)
+            # Find documents with the given content_id
+            try:
+                result = collection.get(where=cast(Any, {"content_id": {"$eq": content_id}}))
+                # Extract IDs and current metadata
+                if hasattr(result, "get") and callable(result.get):
+                    ids = result.get("ids", [])
+                    current_metadatas = result.get("metadatas", [])
+                elif hasattr(result, "__getitem__"):
+                    ids = result.get("ids", []) if "ids" in result else []
+                    current_metadatas = result.get("metadatas", []) if "metadatas" in result else []
+                else:
+                    ids = []
+                    current_metadatas = []
+                if not ids:
+                    logger.debug(f"No documents found with content_id: {content_id}")
+                    return
+                # Flatten the new metadata first
+                flattened_new_metadata = self._flatten_metadata(metadata)
+                # Merge metadata for each document
+                updated_metadatas = []
+                for i, current_meta in enumerate(current_metadatas or []):
+                    if current_meta is None:
+                        meta_dict: Dict[str, Any] = {}
+                    else:
+                        meta_dict = dict(current_meta)  # Convert Mapping to dict
+                    # Update with flattened metadata
+                    meta_dict.update(flattened_new_metadata)
+                    updated_metadatas.append(meta_dict)
+                # Convert to the expected type for ChromaDB
+                chroma_metadatas = cast(List[Mapping[str, Union[str, int, float, bool]]], updated_metadatas)
+                chroma_metadatas = [{k: v for k, v in m.items() if k and v} for m in chroma_metadatas]
+                collection.update(ids=ids, metadatas=chroma_metadatas)  # type: ignore
+                logger.debug(f"Updated metadata for {len(ids)} documents with content_id: {content_id}")
+            except TypeError as te:
+                if "object of type 'int' has no len()" in str(te):
+                    logger.warning(
+                        f"ChromaDB internal error (version 0.5.0 bug): {te}. Cannot update metadata for content_id '{content_id}'."
+                    )
+                    return
+                else:
+                    raise te
+        except Exception as e:
+            logger.error(f"Error updating metadata for content_id '{content_id}': {e}")
+            raise
+    def get_supported_search_types(self) -> List[str]:
+        """Get the supported search types for this vector database."""
+        return []  # ChromaDb doesn't use SearchType enum

agno 0.1.2__py3-none-any.whl → 2.3.13__py3-none-any.whl

agno 0.1.2py3-none-any.whl → 2.3.13py3-none-any.whl