PyPI - agno - Versions diffs - 0.1.2__py3-none-any.whl → 2.3.13__py3-none-any.whl - Mend

agno 0.1.2py3-none-any.whl → 2.3.13py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (723) hide show

agno/__init__.py +8 -0
agno/agent/__init__.py +44 -5
agno/agent/agent.py +10531 -2975
agno/api/agent.py +14 -53
agno/api/api.py +7 -46
agno/api/evals.py +22 -0
agno/api/os.py +17 -0
agno/api/routes.py +6 -25
agno/api/schemas/__init__.py +9 -0
agno/api/schemas/agent.py +6 -9
agno/api/schemas/evals.py +16 -0
agno/api/schemas/os.py +14 -0
agno/api/schemas/team.py +10 -10
agno/api/schemas/utils.py +21 -0
agno/api/schemas/workflows.py +16 -0
agno/api/settings.py +53 -0
agno/api/team.py +22 -26
agno/api/workflow.py +28 -0
agno/cloud/aws/base.py +214 -0
agno/cloud/aws/s3/__init__.py +2 -0
agno/cloud/aws/s3/api_client.py +43 -0
agno/cloud/aws/s3/bucket.py +195 -0
agno/cloud/aws/s3/object.py +57 -0
agno/compression/__init__.py +3 -0
agno/compression/manager.py +247 -0
agno/culture/__init__.py +3 -0
agno/culture/manager.py +956 -0
agno/db/__init__.py +24 -0
agno/db/async_postgres/__init__.py +3 -0
agno/db/base.py +946 -0
agno/db/dynamo/__init__.py +3 -0
agno/db/dynamo/dynamo.py +2781 -0
agno/db/dynamo/schemas.py +442 -0
agno/db/dynamo/utils.py +743 -0
agno/db/firestore/__init__.py +3 -0
agno/db/firestore/firestore.py +2379 -0
agno/db/firestore/schemas.py +181 -0
agno/db/firestore/utils.py +376 -0
agno/db/gcs_json/__init__.py +3 -0
agno/db/gcs_json/gcs_json_db.py +1791 -0
agno/db/gcs_json/utils.py +228 -0
agno/db/in_memory/__init__.py +3 -0
agno/db/in_memory/in_memory_db.py +1312 -0
agno/db/in_memory/utils.py +230 -0
agno/db/json/__init__.py +3 -0
agno/db/json/json_db.py +1777 -0
agno/db/json/utils.py +230 -0
agno/db/migrations/manager.py +199 -0
agno/db/migrations/v1_to_v2.py +635 -0
agno/db/migrations/versions/v2_3_0.py +938 -0
agno/db/mongo/__init__.py +17 -0
agno/db/mongo/async_mongo.py +2760 -0
agno/db/mongo/mongo.py +2597 -0
agno/db/mongo/schemas.py +119 -0
agno/db/mongo/utils.py +276 -0
agno/db/mysql/__init__.py +4 -0
agno/db/mysql/async_mysql.py +2912 -0
agno/db/mysql/mysql.py +2923 -0
agno/db/mysql/schemas.py +186 -0
agno/db/mysql/utils.py +488 -0
agno/db/postgres/__init__.py +4 -0
agno/db/postgres/async_postgres.py +2579 -0
agno/db/postgres/postgres.py +2870 -0
agno/db/postgres/schemas.py +187 -0
agno/db/postgres/utils.py +442 -0
agno/db/redis/__init__.py +3 -0
agno/db/redis/redis.py +2141 -0
agno/db/redis/schemas.py +159 -0
agno/db/redis/utils.py +346 -0
agno/db/schemas/__init__.py +4 -0
agno/db/schemas/culture.py +120 -0
agno/db/schemas/evals.py +34 -0
agno/db/schemas/knowledge.py +40 -0
agno/db/schemas/memory.py +61 -0
agno/db/singlestore/__init__.py +3 -0
agno/db/singlestore/schemas.py +179 -0
agno/db/singlestore/singlestore.py +2877 -0
agno/db/singlestore/utils.py +384 -0
agno/db/sqlite/__init__.py +4 -0
agno/db/sqlite/async_sqlite.py +2911 -0
agno/db/sqlite/schemas.py +181 -0
agno/db/sqlite/sqlite.py +2908 -0
agno/db/sqlite/utils.py +429 -0
agno/db/surrealdb/__init__.py +3 -0
agno/db/surrealdb/metrics.py +292 -0
agno/db/surrealdb/models.py +334 -0
agno/db/surrealdb/queries.py +71 -0
agno/db/surrealdb/surrealdb.py +1908 -0
agno/db/surrealdb/utils.py +147 -0
agno/db/utils.py +118 -0
agno/eval/__init__.py +24 -0
agno/eval/accuracy.py +666 -276
agno/eval/agent_as_judge.py +861 -0
agno/eval/base.py +29 -0
agno/eval/performance.py +779 -0
agno/eval/reliability.py +241 -62
agno/eval/utils.py +120 -0
agno/exceptions.py +143 -1
agno/filters.py +354 -0
agno/guardrails/__init__.py +6 -0
agno/guardrails/base.py +19 -0
agno/guardrails/openai.py +144 -0
agno/guardrails/pii.py +94 -0
agno/guardrails/prompt_injection.py +52 -0
agno/hooks/__init__.py +3 -0
agno/hooks/decorator.py +164 -0
agno/integrations/discord/__init__.py +3 -0
agno/integrations/discord/client.py +203 -0
agno/knowledge/__init__.py +5 -1
agno/{document → knowledge}/chunking/agentic.py +22 -14
agno/{document → knowledge}/chunking/document.py +2 -2
agno/{document → knowledge}/chunking/fixed.py +7 -6
agno/knowledge/chunking/markdown.py +151 -0
agno/{document → knowledge}/chunking/recursive.py +15 -3
agno/knowledge/chunking/row.py +39 -0
agno/knowledge/chunking/semantic.py +91 -0
agno/knowledge/chunking/strategy.py +165 -0
agno/knowledge/content.py +74 -0
agno/knowledge/document/__init__.py +5 -0
agno/{document → knowledge/document}/base.py +12 -2
agno/knowledge/embedder/__init__.py +5 -0
agno/knowledge/embedder/aws_bedrock.py +343 -0
agno/knowledge/embedder/azure_openai.py +210 -0
agno/{embedder → knowledge/embedder}/base.py +8 -0
agno/knowledge/embedder/cohere.py +323 -0
agno/knowledge/embedder/fastembed.py +62 -0
agno/{embedder → knowledge/embedder}/fireworks.py +1 -1
agno/knowledge/embedder/google.py +258 -0
agno/knowledge/embedder/huggingface.py +94 -0
agno/knowledge/embedder/jina.py +182 -0
agno/knowledge/embedder/langdb.py +22 -0
agno/knowledge/embedder/mistral.py +206 -0
agno/knowledge/embedder/nebius.py +13 -0
agno/knowledge/embedder/ollama.py +154 -0
agno/knowledge/embedder/openai.py +195 -0
agno/knowledge/embedder/sentence_transformer.py +63 -0
agno/{embedder → knowledge/embedder}/together.py +1 -1
agno/knowledge/embedder/vllm.py +262 -0
agno/knowledge/embedder/voyageai.py +165 -0
agno/knowledge/knowledge.py +3006 -0
agno/knowledge/reader/__init__.py +7 -0
agno/knowledge/reader/arxiv_reader.py +81 -0
agno/knowledge/reader/base.py +95 -0
agno/knowledge/reader/csv_reader.py +164 -0
agno/knowledge/reader/docx_reader.py +82 -0
agno/knowledge/reader/field_labeled_csv_reader.py +290 -0
agno/knowledge/reader/firecrawl_reader.py +201 -0
agno/knowledge/reader/json_reader.py +88 -0
agno/knowledge/reader/markdown_reader.py +137 -0
agno/knowledge/reader/pdf_reader.py +431 -0
agno/knowledge/reader/pptx_reader.py +101 -0
agno/knowledge/reader/reader_factory.py +313 -0
agno/knowledge/reader/s3_reader.py +89 -0
agno/knowledge/reader/tavily_reader.py +193 -0
agno/knowledge/reader/text_reader.py +127 -0
agno/knowledge/reader/web_search_reader.py +325 -0
agno/knowledge/reader/website_reader.py +455 -0
agno/knowledge/reader/wikipedia_reader.py +91 -0
agno/knowledge/reader/youtube_reader.py +78 -0
agno/knowledge/remote_content/remote_content.py +88 -0
agno/knowledge/reranker/__init__.py +3 -0
agno/{reranker → knowledge/reranker}/base.py +1 -1
agno/{reranker → knowledge/reranker}/cohere.py +2 -2
agno/knowledge/reranker/infinity.py +195 -0
agno/knowledge/reranker/sentence_transformer.py +54 -0
agno/knowledge/types.py +39 -0
agno/knowledge/utils.py +234 -0
agno/media.py +439 -95
agno/memory/__init__.py +16 -3
agno/memory/manager.py +1474 -123
agno/memory/strategies/__init__.py +15 -0
agno/memory/strategies/base.py +66 -0
agno/memory/strategies/summarize.py +196 -0
agno/memory/strategies/types.py +37 -0
agno/models/aimlapi/__init__.py +5 -0
agno/models/aimlapi/aimlapi.py +62 -0
agno/models/anthropic/__init__.py +4 -0
agno/models/anthropic/claude.py +960 -496
agno/models/aws/__init__.py +15 -0
agno/models/aws/bedrock.py +686 -451
agno/models/aws/claude.py +190 -183
agno/models/azure/__init__.py +18 -1
agno/models/azure/ai_foundry.py +489 -0
agno/models/azure/openai_chat.py +89 -40
agno/models/base.py +2477 -550
agno/models/cerebras/__init__.py +12 -0
agno/models/cerebras/cerebras.py +565 -0
agno/models/cerebras/cerebras_openai.py +131 -0
agno/models/cohere/__init__.py +4 -0
agno/models/cohere/chat.py +306 -492
agno/models/cometapi/__init__.py +5 -0
agno/models/cometapi/cometapi.py +74 -0
agno/models/dashscope/__init__.py +5 -0
agno/models/dashscope/dashscope.py +90 -0
agno/models/deepinfra/__init__.py +5 -0
agno/models/deepinfra/deepinfra.py +45 -0
agno/models/deepseek/__init__.py +4 -0
agno/models/deepseek/deepseek.py +110 -9
agno/models/fireworks/__init__.py +4 -0
agno/models/fireworks/fireworks.py +19 -22
agno/models/google/__init__.py +3 -7
agno/models/google/gemini.py +1717 -662
agno/models/google/utils.py +22 -0
agno/models/groq/__init__.py +4 -0
agno/models/groq/groq.py +391 -666
agno/models/huggingface/__init__.py +4 -0
agno/models/huggingface/huggingface.py +266 -538
agno/models/ibm/__init__.py +5 -0
agno/models/ibm/watsonx.py +432 -0
agno/models/internlm/__init__.py +3 -0
agno/models/internlm/internlm.py +20 -3
agno/models/langdb/__init__.py +1 -0
agno/models/langdb/langdb.py +60 -0
agno/models/litellm/__init__.py +14 -0
agno/models/litellm/chat.py +503 -0
agno/models/litellm/litellm_openai.py +42 -0
agno/models/llama_cpp/__init__.py +5 -0
agno/models/llama_cpp/llama_cpp.py +22 -0
agno/models/lmstudio/__init__.py +5 -0
agno/models/lmstudio/lmstudio.py +25 -0
agno/models/message.py +361 -39
agno/models/meta/__init__.py +12 -0
agno/models/meta/llama.py +502 -0
agno/models/meta/llama_openai.py +79 -0
agno/models/metrics.py +120 -0
agno/models/mistral/__init__.py +4 -0
agno/models/mistral/mistral.py +293 -393
agno/models/nebius/__init__.py +3 -0
agno/models/nebius/nebius.py +53 -0
agno/models/nexus/__init__.py +3 -0
agno/models/nexus/nexus.py +22 -0
agno/models/nvidia/__init__.py +4 -0
agno/models/nvidia/nvidia.py +22 -3
agno/models/ollama/__init__.py +4 -2
agno/models/ollama/chat.py +257 -492
agno/models/openai/__init__.py +7 -0
agno/models/openai/chat.py +725 -770
agno/models/openai/like.py +16 -2
agno/models/openai/responses.py +1121 -0
agno/models/openrouter/__init__.py +4 -0
agno/models/openrouter/openrouter.py +62 -5
agno/models/perplexity/__init__.py +5 -0
agno/models/perplexity/perplexity.py +203 -0
agno/models/portkey/__init__.py +3 -0
agno/models/portkey/portkey.py +82 -0
agno/models/requesty/__init__.py +5 -0
agno/models/requesty/requesty.py +69 -0
agno/models/response.py +177 -7
agno/models/sambanova/__init__.py +4 -0
agno/models/sambanova/sambanova.py +23 -4
agno/models/siliconflow/__init__.py +5 -0
agno/models/siliconflow/siliconflow.py +42 -0
agno/models/together/__init__.py +4 -0
agno/models/together/together.py +21 -164
agno/models/utils.py +266 -0
agno/models/vercel/__init__.py +3 -0
agno/models/vercel/v0.py +43 -0
agno/models/vertexai/__init__.py +0 -1
agno/models/vertexai/claude.py +190 -0
agno/models/vllm/__init__.py +3 -0
agno/models/vllm/vllm.py +83 -0
agno/models/xai/__init__.py +2 -0
agno/models/xai/xai.py +111 -7
agno/os/__init__.py +3 -0
agno/os/app.py +1027 -0
agno/os/auth.py +244 -0
agno/os/config.py +126 -0
agno/os/interfaces/__init__.py +1 -0
agno/os/interfaces/a2a/__init__.py +3 -0
agno/os/interfaces/a2a/a2a.py +42 -0
agno/os/interfaces/a2a/router.py +249 -0
agno/os/interfaces/a2a/utils.py +924 -0
agno/os/interfaces/agui/__init__.py +3 -0
agno/os/interfaces/agui/agui.py +47 -0
agno/os/interfaces/agui/router.py +147 -0
agno/os/interfaces/agui/utils.py +574 -0
agno/os/interfaces/base.py +25 -0
agno/os/interfaces/slack/__init__.py +3 -0
agno/os/interfaces/slack/router.py +148 -0
agno/os/interfaces/slack/security.py +30 -0
agno/os/interfaces/slack/slack.py +47 -0
agno/os/interfaces/whatsapp/__init__.py +3 -0
agno/os/interfaces/whatsapp/router.py +210 -0
agno/os/interfaces/whatsapp/security.py +55 -0
agno/os/interfaces/whatsapp/whatsapp.py +36 -0
agno/os/mcp.py +293 -0
agno/os/middleware/__init__.py +9 -0
agno/os/middleware/jwt.py +797 -0
agno/os/router.py +258 -0
agno/os/routers/__init__.py +3 -0
agno/os/routers/agents/__init__.py +3 -0
agno/os/routers/agents/router.py +599 -0
agno/os/routers/agents/schema.py +261 -0
agno/os/routers/evals/__init__.py +3 -0
agno/os/routers/evals/evals.py +450 -0
agno/os/routers/evals/schemas.py +174 -0
agno/os/routers/evals/utils.py +231 -0
agno/os/routers/health.py +31 -0
agno/os/routers/home.py +52 -0
agno/os/routers/knowledge/__init__.py +3 -0
agno/os/routers/knowledge/knowledge.py +1008 -0
agno/os/routers/knowledge/schemas.py +178 -0
agno/os/routers/memory/__init__.py +3 -0
agno/os/routers/memory/memory.py +661 -0
agno/os/routers/memory/schemas.py +88 -0
agno/os/routers/metrics/__init__.py +3 -0
agno/os/routers/metrics/metrics.py +190 -0
agno/os/routers/metrics/schemas.py +47 -0
agno/os/routers/session/__init__.py +3 -0
agno/os/routers/session/session.py +997 -0
agno/os/routers/teams/__init__.py +3 -0
agno/os/routers/teams/router.py +512 -0
agno/os/routers/teams/schema.py +257 -0
agno/os/routers/traces/__init__.py +3 -0
agno/os/routers/traces/schemas.py +414 -0
agno/os/routers/traces/traces.py +499 -0
agno/os/routers/workflows/__init__.py +3 -0
agno/os/routers/workflows/router.py +624 -0
agno/os/routers/workflows/schema.py +75 -0
agno/os/schema.py +534 -0
agno/os/scopes.py +469 -0
agno/{playground → os}/settings.py +7 -15
agno/os/utils.py +973 -0
agno/reasoning/anthropic.py +80 -0
agno/reasoning/azure_ai_foundry.py +67 -0
agno/reasoning/deepseek.py +63 -0
agno/reasoning/default.py +97 -0
agno/reasoning/gemini.py +73 -0
agno/reasoning/groq.py +71 -0
agno/reasoning/helpers.py +24 -1
agno/reasoning/ollama.py +67 -0
agno/reasoning/openai.py +86 -0
agno/reasoning/step.py +2 -1
agno/reasoning/vertexai.py +76 -0
agno/run/__init__.py +6 -0
agno/run/agent.py +822 -0
agno/run/base.py +247 -0
agno/run/cancel.py +81 -0
agno/run/requirement.py +181 -0
agno/run/team.py +767 -0
agno/run/workflow.py +708 -0
agno/session/__init__.py +10 -0
agno/session/agent.py +260 -0
agno/session/summary.py +265 -0
agno/session/team.py +342 -0
agno/session/workflow.py +501 -0
agno/table.py +10 -0
agno/team/__init__.py +37 -0
agno/team/team.py +9536 -0
agno/tools/__init__.py +7 -0
agno/tools/agentql.py +120 -0
agno/tools/airflow.py +22 -12
agno/tools/api.py +122 -0
agno/tools/apify.py +276 -83
agno/tools/{arxiv_toolkit.py → arxiv.py} +20 -12
agno/tools/aws_lambda.py +28 -7
agno/tools/aws_ses.py +66 -0
agno/tools/baidusearch.py +11 -4
agno/tools/bitbucket.py +292 -0
agno/tools/brandfetch.py +213 -0
agno/tools/bravesearch.py +106 -0
agno/tools/brightdata.py +367 -0
agno/tools/browserbase.py +209 -0
agno/tools/calcom.py +32 -23
agno/tools/calculator.py +24 -37
agno/tools/cartesia.py +187 -0
agno/tools/{clickup_tool.py → clickup.py} +17 -28
agno/tools/confluence.py +91 -26
agno/tools/crawl4ai.py +139 -43
agno/tools/csv_toolkit.py +28 -22
agno/tools/dalle.py +36 -22
agno/tools/daytona.py +475 -0
agno/tools/decorator.py +169 -14
agno/tools/desi_vocal.py +23 -11
agno/tools/discord.py +32 -29
agno/tools/docker.py +716 -0
agno/tools/duckdb.py +76 -81
agno/tools/duckduckgo.py +43 -40
agno/tools/e2b.py +703 -0
agno/tools/eleven_labs.py +65 -54
agno/tools/email.py +13 -5
agno/tools/evm.py +129 -0
agno/tools/exa.py +324 -42
agno/tools/fal.py +39 -35
agno/tools/file.py +196 -30
agno/tools/file_generation.py +356 -0
agno/tools/financial_datasets.py +288 -0
agno/tools/firecrawl.py +108 -33
agno/tools/function.py +960 -122
agno/tools/giphy.py +34 -12
agno/tools/github.py +1294 -97
agno/tools/gmail.py +922 -0
agno/tools/google_bigquery.py +117 -0
agno/tools/google_drive.py +271 -0
agno/tools/google_maps.py +253 -0
agno/tools/googlecalendar.py +607 -107
agno/tools/googlesheets.py +377 -0
agno/tools/hackernews.py +20 -12
agno/tools/jina.py +24 -14
agno/tools/jira.py +48 -19
agno/tools/knowledge.py +218 -0
agno/tools/linear.py +82 -43
agno/tools/linkup.py +58 -0
agno/tools/local_file_system.py +15 -7
agno/tools/lumalab.py +41 -26
agno/tools/mcp/__init__.py +10 -0
agno/tools/mcp/mcp.py +331 -0
agno/tools/mcp/multi_mcp.py +347 -0
agno/tools/mcp/params.py +24 -0
agno/tools/mcp_toolbox.py +284 -0
agno/tools/mem0.py +193 -0
agno/tools/memory.py +419 -0
agno/tools/mlx_transcribe.py +11 -9
agno/tools/models/azure_openai.py +190 -0
agno/tools/models/gemini.py +203 -0
agno/tools/models/groq.py +158 -0
agno/tools/models/morph.py +186 -0
agno/tools/models/nebius.py +124 -0
agno/tools/models_labs.py +163 -82
agno/tools/moviepy_video.py +18 -13
agno/tools/nano_banana.py +151 -0
agno/tools/neo4j.py +134 -0
agno/tools/newspaper.py +15 -4
agno/tools/newspaper4k.py +19 -6
agno/tools/notion.py +204 -0
agno/tools/openai.py +181 -17
agno/tools/openbb.py +27 -20
agno/tools/opencv.py +321 -0
agno/tools/openweather.py +233 -0
agno/tools/oxylabs.py +385 -0
agno/tools/pandas.py +25 -15
agno/tools/parallel.py +314 -0
agno/tools/postgres.py +238 -185
agno/tools/pubmed.py +125 -13
agno/tools/python.py +48 -35
agno/tools/reasoning.py +283 -0
agno/tools/reddit.py +207 -29
agno/tools/redshift.py +406 -0
agno/tools/replicate.py +69 -26
agno/tools/resend.py +11 -6
agno/tools/scrapegraph.py +179 -19
agno/tools/searxng.py +23 -31
agno/tools/serpapi.py +15 -10
agno/tools/serper.py +255 -0
agno/tools/shell.py +23 -12
agno/tools/shopify.py +1519 -0
agno/tools/slack.py +56 -14
agno/tools/sleep.py +8 -6
agno/tools/spider.py +35 -11
agno/tools/spotify.py +919 -0
agno/tools/sql.py +34 -19
agno/tools/tavily.py +158 -8
agno/tools/telegram.py +18 -8
agno/tools/todoist.py +218 -0
agno/tools/toolkit.py +134 -9
agno/tools/trafilatura.py +388 -0
agno/tools/trello.py +25 -28
agno/tools/twilio.py +18 -9
agno/tools/user_control_flow.py +78 -0
agno/tools/valyu.py +228 -0
agno/tools/visualization.py +467 -0
agno/tools/webbrowser.py +28 -0
agno/tools/webex.py +76 -0
agno/tools/website.py +23 -19
agno/tools/webtools.py +45 -0
agno/tools/whatsapp.py +286 -0
agno/tools/wikipedia.py +28 -19
agno/tools/workflow.py +285 -0
agno/tools/{twitter.py → x.py} +142 -46
agno/tools/yfinance.py +41 -39
agno/tools/youtube.py +34 -17
agno/tools/zendesk.py +15 -5
agno/tools/zep.py +454 -0
agno/tools/zoom.py +86 -37
agno/tracing/__init__.py +12 -0
agno/tracing/exporter.py +157 -0
agno/tracing/schemas.py +276 -0
agno/tracing/setup.py +111 -0
agno/utils/agent.py +938 -0
agno/utils/audio.py +37 -1
agno/utils/certs.py +27 -0
agno/utils/code_execution.py +11 -0
agno/utils/common.py +103 -20
agno/utils/cryptography.py +22 -0
agno/utils/dttm.py +33 -0
agno/utils/events.py +700 -0
agno/utils/functions.py +107 -37
agno/utils/gemini.py +426 -0
agno/utils/hooks.py +171 -0
agno/utils/http.py +185 -0
agno/utils/json_schema.py +159 -37
agno/utils/knowledge.py +36 -0
agno/utils/location.py +19 -0
agno/utils/log.py +221 -8
agno/utils/mcp.py +214 -0
agno/utils/media.py +335 -14
agno/utils/merge_dict.py +22 -1
agno/utils/message.py +77 -2
agno/utils/models/ai_foundry.py +50 -0
agno/utils/models/claude.py +373 -0
agno/utils/models/cohere.py +94 -0
agno/utils/models/llama.py +85 -0
agno/utils/models/mistral.py +100 -0
agno/utils/models/openai_responses.py +140 -0
agno/utils/models/schema_utils.py +153 -0
agno/utils/models/watsonx.py +41 -0
agno/utils/openai.py +257 -0
agno/utils/pickle.py +1 -1
agno/utils/pprint.py +124 -8
agno/utils/print_response/agent.py +930 -0
agno/utils/print_response/team.py +1914 -0
agno/utils/print_response/workflow.py +1668 -0
agno/utils/prompts.py +111 -0
agno/utils/reasoning.py +108 -0
agno/utils/response.py +163 -0
agno/utils/serialize.py +32 -0
agno/utils/shell.py +4 -4
agno/utils/streamlit.py +487 -0
agno/utils/string.py +204 -51
agno/utils/team.py +139 -0
agno/utils/timer.py +9 -2
agno/utils/tokens.py +657 -0
agno/utils/tools.py +19 -1
agno/utils/whatsapp.py +305 -0
agno/utils/yaml_io.py +3 -3
agno/vectordb/__init__.py +2 -0
agno/vectordb/base.py +87 -9
agno/vectordb/cassandra/__init__.py +5 -1
agno/vectordb/cassandra/cassandra.py +383 -27
agno/vectordb/chroma/__init__.py +4 -0
agno/vectordb/chroma/chromadb.py +748 -83
agno/vectordb/clickhouse/__init__.py +7 -1
agno/vectordb/clickhouse/clickhousedb.py +554 -53
agno/vectordb/couchbase/__init__.py +3 -0
agno/vectordb/couchbase/couchbase.py +1446 -0
agno/vectordb/lancedb/__init__.py +5 -0
agno/vectordb/lancedb/lance_db.py +730 -98
agno/vectordb/langchaindb/__init__.py +5 -0
agno/vectordb/langchaindb/langchaindb.py +163 -0
agno/vectordb/lightrag/__init__.py +5 -0
agno/vectordb/lightrag/lightrag.py +388 -0
agno/vectordb/llamaindex/__init__.py +3 -0
agno/vectordb/llamaindex/llamaindexdb.py +166 -0
agno/vectordb/milvus/__init__.py +3 -0
agno/vectordb/milvus/milvus.py +966 -78
agno/vectordb/mongodb/__init__.py +9 -1
agno/vectordb/mongodb/mongodb.py +1175 -172
agno/vectordb/pgvector/__init__.py +8 -0
agno/vectordb/pgvector/pgvector.py +599 -115
agno/vectordb/pineconedb/__init__.py +5 -1
agno/vectordb/pineconedb/pineconedb.py +406 -43
agno/vectordb/qdrant/__init__.py +4 -0
agno/vectordb/qdrant/qdrant.py +914 -61
agno/vectordb/redis/__init__.py +9 -0
agno/vectordb/redis/redisdb.py +682 -0
agno/vectordb/singlestore/__init__.py +8 -1
agno/vectordb/singlestore/singlestore.py +771 -0
agno/vectordb/surrealdb/__init__.py +3 -0
agno/vectordb/surrealdb/surrealdb.py +663 -0
agno/vectordb/upstashdb/__init__.py +5 -0
agno/vectordb/upstashdb/upstashdb.py +718 -0
agno/vectordb/weaviate/__init__.py +8 -0
agno/vectordb/weaviate/index.py +15 -0
agno/vectordb/weaviate/weaviate.py +1009 -0
agno/workflow/__init__.py +23 -1
agno/workflow/agent.py +299 -0
agno/workflow/condition.py +759 -0
agno/workflow/loop.py +756 -0
agno/workflow/parallel.py +853 -0
agno/workflow/router.py +723 -0
agno/workflow/step.py +1564 -0
agno/workflow/steps.py +613 -0
agno/workflow/types.py +556 -0
agno/workflow/workflow.py +4327 -514
agno-2.3.13.dist-info/METADATA +639 -0
agno-2.3.13.dist-info/RECORD +613 -0
{agno-0.1.2.dist-info → agno-2.3.13.dist-info}/WHEEL +1 -1
agno-2.3.13.dist-info/licenses/LICENSE +201 -0
agno/api/playground.py +0 -91
agno/api/schemas/playground.py +0 -22
agno/api/schemas/user.py +0 -22
agno/api/schemas/workspace.py +0 -46
agno/api/user.py +0 -160
agno/api/workspace.py +0 -151
agno/cli/auth_server.py +0 -118
agno/cli/config.py +0 -275
agno/cli/console.py +0 -88
agno/cli/credentials.py +0 -23
agno/cli/entrypoint.py +0 -571
agno/cli/operator.py +0 -355
agno/cli/settings.py +0 -85
agno/cli/ws/ws_cli.py +0 -817
agno/constants.py +0 -13
agno/document/__init__.py +0 -1
agno/document/chunking/semantic.py +0 -47
agno/document/chunking/strategy.py +0 -31
agno/document/reader/__init__.py +0 -1
agno/document/reader/arxiv_reader.py +0 -41
agno/document/reader/base.py +0 -22
agno/document/reader/csv_reader.py +0 -84
agno/document/reader/docx_reader.py +0 -46
agno/document/reader/firecrawl_reader.py +0 -99
agno/document/reader/json_reader.py +0 -43
agno/document/reader/pdf_reader.py +0 -219
agno/document/reader/s3/pdf_reader.py +0 -46
agno/document/reader/s3/text_reader.py +0 -51
agno/document/reader/text_reader.py +0 -41
agno/document/reader/website_reader.py +0 -175
agno/document/reader/youtube_reader.py +0 -50
agno/embedder/__init__.py +0 -1
agno/embedder/azure_openai.py +0 -86
agno/embedder/cohere.py +0 -72
agno/embedder/fastembed.py +0 -37
agno/embedder/google.py +0 -73
agno/embedder/huggingface.py +0 -54
agno/embedder/mistral.py +0 -80
agno/embedder/ollama.py +0 -57
agno/embedder/openai.py +0 -74
agno/embedder/sentence_transformer.py +0 -38
agno/embedder/voyageai.py +0 -64
agno/eval/perf.py +0 -201
agno/file/__init__.py +0 -1
agno/file/file.py +0 -16
agno/file/local/csv.py +0 -32
agno/file/local/txt.py +0 -19
agno/infra/app.py +0 -240
agno/infra/base.py +0 -144
agno/infra/context.py +0 -20
agno/infra/db_app.py +0 -52
agno/infra/resource.py +0 -205
agno/infra/resources.py +0 -55
agno/knowledge/agent.py +0 -230
agno/knowledge/arxiv.py +0 -22
agno/knowledge/combined.py +0 -22
agno/knowledge/csv.py +0 -28
agno/knowledge/csv_url.py +0 -19
agno/knowledge/document.py +0 -20
agno/knowledge/docx.py +0 -30
agno/knowledge/json.py +0 -28
agno/knowledge/langchain.py +0 -71
agno/knowledge/llamaindex.py +0 -66
agno/knowledge/pdf.py +0 -28
agno/knowledge/pdf_url.py +0 -26
agno/knowledge/s3/base.py +0 -60
agno/knowledge/s3/pdf.py +0 -21
agno/knowledge/s3/text.py +0 -23
agno/knowledge/text.py +0 -30
agno/knowledge/website.py +0 -88
agno/knowledge/wikipedia.py +0 -31
agno/knowledge/youtube.py +0 -22
agno/memory/agent.py +0 -392
agno/memory/classifier.py +0 -104
agno/memory/db/__init__.py +0 -1
agno/memory/db/base.py +0 -42
agno/memory/db/mongodb.py +0 -189
agno/memory/db/postgres.py +0 -203
agno/memory/db/sqlite.py +0 -193
agno/memory/memory.py +0 -15
agno/memory/row.py +0 -36
agno/memory/summarizer.py +0 -192
agno/memory/summary.py +0 -19
agno/memory/workflow.py +0 -38
agno/models/google/gemini_openai.py +0 -26
agno/models/ollama/hermes.py +0 -221
agno/models/ollama/tools.py +0 -362
agno/models/vertexai/gemini.py +0 -595
agno/playground/__init__.py +0 -3
agno/playground/async_router.py +0 -421
agno/playground/deploy.py +0 -249
agno/playground/operator.py +0 -92
agno/playground/playground.py +0 -91
agno/playground/schemas.py +0 -76
agno/playground/serve.py +0 -55
agno/playground/sync_router.py +0 -405
agno/reasoning/agent.py +0 -68
agno/run/response.py +0 -112
agno/storage/agent/__init__.py +0 -0
agno/storage/agent/base.py +0 -38
agno/storage/agent/dynamodb.py +0 -350
agno/storage/agent/json.py +0 -92
agno/storage/agent/mongodb.py +0 -228
agno/storage/agent/postgres.py +0 -367
agno/storage/agent/session.py +0 -79
agno/storage/agent/singlestore.py +0 -303
agno/storage/agent/sqlite.py +0 -357
agno/storage/agent/yaml.py +0 -93
agno/storage/workflow/__init__.py +0 -0
agno/storage/workflow/base.py +0 -40
agno/storage/workflow/mongodb.py +0 -233
agno/storage/workflow/postgres.py +0 -366
agno/storage/workflow/session.py +0 -60
agno/storage/workflow/sqlite.py +0 -359
agno/tools/googlesearch.py +0 -88
agno/utils/defaults.py +0 -57
agno/utils/filesystem.py +0 -39
agno/utils/git.py +0 -52
agno/utils/json_io.py +0 -30
agno/utils/load_env.py +0 -19
agno/utils/py_io.py +0 -19
agno/utils/pyproject.py +0 -18
agno/utils/resource_filter.py +0 -31
agno/vectordb/singlestore/s2vectordb.py +0 -390
agno/vectordb/singlestore/s2vectordb2.py +0 -355
agno/workspace/__init__.py +0 -0
agno/workspace/config.py +0 -325
agno/workspace/enums.py +0 -6
agno/workspace/helpers.py +0 -48
agno/workspace/operator.py +0 -758
agno/workspace/settings.py +0 -63
agno-0.1.2.dist-info/LICENSE +0 -375
agno-0.1.2.dist-info/METADATA +0 -502
agno-0.1.2.dist-info/RECORD +0 -352
agno-0.1.2.dist-info/entry_points.txt +0 -3
/agno/{cli → db/migrations}/__init__.py +0 -0
/agno/{cli/ws → db/migrations/versions}/__init__.py +0 -0
/agno/{document/chunking/__init__.py → db/schemas/metrics.py} +0 -0
/agno/{document/reader/s3 → integrations}/__init__.py +0 -0
/agno/{file/local → knowledge/chunking}/__init__.py +0 -0
/agno/{infra → knowledge/remote_content}/__init__.py +0 -0
/agno/{knowledge/s3 → tools/models}/__init__.py +0 -0
/agno/{reranker → utils/models}/__init__.py +0 -0
/agno/{storage → utils/print_response}/__init__.py +0 -0
{agno-0.1.2.dist-info → agno-2.3.13.dist-info}/top_level.txt +0 -0

agno/vectordb/mongodb/mongodb.py CHANGED Viewed

@@ -1,12 +1,16 @@
+import asyncio
 import time
-from typing import Any, Dict, List, Optional
+from typing import Any, Dict, List, Optional, Union
-from agno.document import Document
-from agno.embedder import Embedder
-from agno.embedder.openai import OpenAIEmbedder
-from agno.utils.log import logger
+from bson import ObjectId
+from agno.filters import FilterExpr
+from agno.knowledge.document import Document
+from agno.knowledge.embedder import Embedder
+from agno.utils.log import log_debug, log_info, log_warning, logger
 from agno.vectordb.base import VectorDb
 from agno.vectordb.distance import Distance
+from agno.vectordb.search import SearchType
 try:
     from hashlib import md5
@@ -14,7 +18,7 @@ try:
 except ImportError:
     raise ImportError("`hashlib` not installed. Please install using `pip install hashlib`")
 try:
-    from pymongo import MongoClient, errors
+    from pymongo import AsyncMongoClient, MongoClient, errors
     from pymongo.collection import Collection
     from pymongo.operations import SearchIndexModel
@@ -22,7 +26,7 @@ except ImportError:
     raise ImportError("`pymongo` not installed. Please install using `pip install pymongo`")
-class MongoDBVector(VectorDb):
+class MongoDb(VectorDb):
     """
     MongoDB Vector Database implementation with elegant handling of Atlas Search index creation.
     """
@@ -30,215 +34,536 @@ class MongoDBVector(VectorDb):
     def __init__(
         self,
         collection_name: str,
+        name: Optional[str] = None,
+        description: Optional[str] = None,
+        id: Optional[str] = None,
         db_url: Optional[str] = "mongodb://localhost:27017/",
-        database: str = "ai",
-        embedder: Embedder = OpenAIEmbedder(),
+        database: str = "agno",
+        embedder: Optional[Embedder] = None,
         distance_metric: str = Distance.cosine,
         overwrite: bool = False,
-        wait_until_index_ready: Optional[float] = None,
-        wait_after_insert: Optional[float] = None,
+        wait_until_index_ready_in_seconds: Optional[float] = 3,
+        wait_after_insert_in_seconds: Optional[float] = 3,
+        max_pool_size: int = 100,
+        retry_writes: bool = True,
+        client: Optional[MongoClient] = None,
+        search_index_name: Optional[str] = "vector_index_1",
+        cosmos_compatibility: Optional[bool] = False,
+        search_type: SearchType = SearchType.vector,
+        hybrid_vector_weight: float = 0.5,
+        hybrid_keyword_weight: float = 0.5,
+        hybrid_rank_constant: int = 60,
         **kwargs,
     ):
         """
-        Initialize the MongoDBVector with MongoDB collection details.
+        Initialize the MongoDb with MongoDB collection details.
         Args:
             collection_name (str): Name of the MongoDB collection.
+            name (Optional[str]): Name of the vector database.
+            description (Optional[str]): Description of the vector database.
             db_url (Optional[str]): MongoDB connection string.
             database (str): Database name.
             embedder (Embedder): Embedder instance for generating embeddings.
             distance_metric (str): Distance metric for similarity.
             overwrite (bool): Overwrite existing collection and index if True.
-            wait_until_index_ready (float): Time in seconds to wait until the index is ready.
+            wait_until_index_ready_in_seconds (float): Time in seconds to wait until the index is ready.
+            wait_after_insert_in_seconds (float): Time in seconds to wait after inserting documents.
+            max_pool_size (int): Maximum number of connections in the connection pool
+            retry_writes (bool): Whether to retry write operations
+            client (Optional[MongoClient]): An existing MongoClient instance.
+            search_index_name (str): Name of the search index (default: "vector_index_1")
+            cosmos_compatibility (bool): Whether to use Azure Cosmos DB Mongovcore compatibility mode.
+            search_type: The search type to use when searching for documents.
+            hybrid_vector_weight (float): Default weight for vector search results in hybrid search.
+            hybrid_keyword_weight (float): Default weight for keyword search results in hybrid search.
+            hybrid_rank_constant (int): Default rank constant (k) for Reciprocal Rank Fusion in hybrid search. This constant is added to the rank before taking the reciprocal, helping to smooth scores. A common value is 60.
             **kwargs: Additional arguments for MongoClient.
         """
+        # Validate required parameters
         if not collection_name:
             raise ValueError("Collection name must not be empty.")
+        if not database:
+            raise ValueError("Database name must not be empty.")
+        # Dynamic ID generation based on unique identifiers
+        if id is None:
+            from agno.utils.string import generate_id
+            connection_identifier = db_url or "mongodb://localhost:27017/"
+            seed = f"{connection_identifier}#{database}#{collection_name}"
+            id = generate_id(seed)
         self.collection_name = collection_name
+        # Initialize base class with name, description, and generated ID
+        super().__init__(id=id, name=name, description=description)
         self.database = database
+        self.search_index_name = search_index_name
+        self.cosmos_compatibility = cosmos_compatibility
+        self.search_type = search_type
+        self.hybrid_vector_weight = hybrid_vector_weight
+        self.hybrid_keyword_weight = hybrid_keyword_weight
+        self.hybrid_rank_constant = hybrid_rank_constant
+        if embedder is None:
+            from agno.knowledge.embedder.openai import OpenAIEmbedder
+            embedder = OpenAIEmbedder()
+            log_info("Embedder not provided, using OpenAIEmbedder as default.")
         self.embedder = embedder
         self.distance_metric = distance_metric
         self.connection_string = db_url
         self.overwrite = overwrite
-        self.wait_until_index_ready = wait_until_index_ready
-        self.wait_after_insert = wait_after_insert
+        self.wait_until_index_ready_in_seconds = wait_until_index_ready_in_seconds
+        self.wait_after_insert_in_seconds = wait_after_insert_in_seconds
         self.kwargs = kwargs
+        self.kwargs.update(
+            {
+                "maxPoolSize": max_pool_size,
+                "retryWrites": retry_writes,
+                "serverSelectionTimeoutMS": 5000,  # 5 second timeout
+            }
+        )
+        self._client = client
+        self._db = None
+        self._collection: Optional[Collection] = None
-        self._client = self._get_client()
-        self._db = self._client[self.database]
-        self._collection = self._get_or_create_collection()
+        self._async_client: Optional[AsyncMongoClient] = None
+        self._async_db = None
+        self._async_collection: Optional[Collection] = None
     def _get_client(self) -> MongoClient:
         """Create or retrieve the MongoDB client."""
-        try:
-            logger.debug("Creating MongoDB Client")
-            client: MongoClient = MongoClient(self.connection_string, **self.kwargs)
-            # Trigger a connection to verify the client
-            client.admin.command("ping")
-            logger.info("Connected to MongoDB successfully.")
-            return client
-        except errors.ConnectionFailure as e:
-            logger.error(f"Failed to connect to MongoDB: {e}")
-            raise ConnectionError(f"Failed to connect to MongoDB: {e}")
-        except Exception as e:
-            logger.error(f"An error occurred while connecting to MongoDB: {e}")
-            raise
+        if self._client is None:
+            if self.cosmos_compatibility:
+                try:
+                    log_debug("Creating MongoDB Client for Azure Cosmos DB")
+                    # Cosmos DB specific settings
+                    cosmos_kwargs = {
+                        "retryWrites": False,
+                        "ssl": True,
+                        "tlsAllowInvalidCertificates": True,
+                        "maxPoolSize": 100,
+                        "maxIdleTimeMS": 30000,
+                    }
+                    # Suppress UserWarning about CosmosDB
+                    import warnings
+                    with warnings.catch_warnings():
+                        warnings.filterwarnings(
+                            "ignore", category=UserWarning, message=".*connected to a CosmosDB cluster.*"
+                        )
+                        self._client = MongoClient(self.connection_string, **cosmos_kwargs)  # type: ignore
+                        self._client.admin.command("ping")
+                    log_info("Connected to Azure Cosmos DB successfully.")
+                    self._db = self._client.get_database(self.database)  # type: ignore
+                    log_info(f"Using database: {self.database}")
+                except errors.ConnectionFailure as e:
+                    raise ConnectionError(f"Failed to connect to Azure Cosmos DB: {e}")
+                except Exception as e:
+                    logger.error(f"An error occurred while connecting to Azure Cosmos DB: {e}")
+                    raise
+            else:
+                try:
+                    log_debug("Creating MongoDB Client")
+                    self._client = MongoClient(self.connection_string, **self.kwargs)
+                    # Trigger a connection to verify the client
+                    self._client.admin.command("ping")
+                    log_info("Connected to MongoDB successfully.")
+                    self._db = self._client[self.database]  # type: ignore
+                except errors.ConnectionFailure as e:
+                    logger.error(f"Failed to connect to MongoDB: {e}")
+                    raise ConnectionError(f"Failed to connect to MongoDB: {e}")
+                except Exception as e:
+                    logger.error(f"An error occurred while connecting to MongoDB: {e}")
+                    raise
+        return self._client
+    async def _get_async_client(self) -> AsyncMongoClient:
+        """Create or retrieve the async MongoDB client."""
+        if self._async_client is None:
+            log_debug("Creating Async MongoDB Client")
+            self._async_client = AsyncMongoClient(
+                self.connection_string,
+                maxPoolSize=self.kwargs.get("maxPoolSize", 100),
+                retryWrites=self.kwargs.get("retryWrites", True),
+                serverSelectionTimeoutMS=5000,
+            )
+            # Verify connection
+            try:
+                await self._async_client.admin.command("ping")
+                log_info("Connected to MongoDB asynchronously.")
+            except Exception as e:
+                logger.error(f"Failed to connect to MongoDB asynchronously: {e}")
+                raise
+        return self._async_client
     def _get_or_create_collection(self) -> Collection:
         """Get or create the MongoDB collection, handling Atlas Search index creation."""
-        self._collection = self._db[self.collection_name]
+        self._collection = self._db[self.collection_name]  # type: ignore
         if not self.collection_exists():
-            logger.info(f"Creating collection '{self.collection_name}'.")
-            self._db.create_collection(self.collection_name)
+            log_info(f"Creating collection '{self.collection_name}'.")
+            self._db.create_collection(self.collection_name)  # type: ignore
             self._create_search_index()
         else:
-            logger.info(f"Using existing collection '{self.collection_name}'.")
+            log_info(f"Using existing collection '{self.collection_name}'.")
             # check if index exists
-            logger.info(f"Checking if search index '{self.collection_name}' exists.")
+            log_info(f"Checking if search index '{self.collection_name}' exists.")
             if not self._search_index_exists():
-                logger.info(f"Search index '{self.collection_name}' does not exist. Creating it.")
+                log_info(f"Search index '{self.collection_name}' does not exist. Creating it.")
                 self._create_search_index()
-                if self.wait_until_index_ready:
+                if self.wait_until_index_ready_in_seconds and not self.cosmos_compatibility:
                     self._wait_for_index_ready()
+            else:
+                log_info("Using existing vector search index.")
+        return self._collection  # type: ignore
+    def _get_collection(self) -> Collection:
+        """Get or create the MongoDB collection."""
+        if self._collection is None:
+            if self._client is None:
+                self._get_client()
+            self._collection = self._db[self.collection_name]  # type: ignore
+            log_info(f"Using collection: {self.collection_name}")
         return self._collection
+    async def _get_async_collection(self):
+        """Get or create the async MongoDB collection."""
+        if self._async_collection is None:
+            client = await self._get_async_client()
+            self._async_db = client[self.database]  # type: ignore
+            self._async_collection = self._async_db[self.collection_name]  # type: ignore
+        return self._async_collection
     def _create_search_index(self, overwrite: bool = True) -> None:
-        """Create or overwrite the Atlas Search index."""
-        index_name = "vector_index_1"
-        try:
-            if overwrite and self._search_index_exists():
-                logger.info(f"Dropping existing search index '{index_name}'.")
-                self._collection.drop_search_index(index_name)
+        """Create or overwrite the Atlas Search index with proper error handling."""
+        index_name = self.search_index_name or "vector_index_1"
+        max_retries = 3
+        retry_delay = 5
-            logger.info(f"Creating search index '{index_name}'.")
+        if self.cosmos_compatibility:
+            try:
+                collection = self._get_collection()
-            search_index_model = SearchIndexModel(
-                definition={
-                    "fields": [
-                        {
-                            "type": "vector",
-                            "numDimensions": 1536,
-                            "path": "embedding",
-                            "similarity": self.distance_metric,  # cosine
+                # Handle overwrite if requested
+                if overwrite and index_name in collection.index_information():
+                    log_info(f"Dropping existing index '{index_name}'")
+                    collection.drop_index(index_name)
+                embedding_dim = getattr(self.embedder, "dimensions", 1536)
+                log_info(f"Creating vector search index '{index_name}'")
+                # Create vector search index using Cosmos DB IVF format
+                collection.create_index(
+                    [("embedding", "cosmosSearch")],
+                    name=index_name,
+                    cosmosSearchOptions={
+                        "kind": "vector-ivf",
+                        "numLists": 1,
+                        "dimensions": embedding_dim,
+                        "similarity": self._get_cosmos_similarity_metric(),
+                    },
+                )
+                log_info(f"Created vector search index '{index_name}' successfully")
+            except Exception as e:
+                logger.error(f"Error creating vector search index: {e}")
+                raise
+        else:
+            for attempt in range(max_retries):
+                try:
+                    if overwrite and self._search_index_exists():
+                        log_info(f"Dropping existing search index '{index_name}'.")
+                        try:
+                            collection = self._get_collection()
+                            collection.drop_search_index(index_name)
+                            # Wait longer after index deletion
+                            time.sleep(retry_delay * 2)
+                        except errors.OperationFailure as e:
+                            if "Index already requested to be deleted" in str(e):
+                                log_info("Index is already being deleted, waiting...")
+                                time.sleep(retry_delay * 2)  # Wait longer for deletion to complete
+                            else:
+                                raise
+                    # Verify index is gone before creating new one
+                    retries = 3
+                    while retries > 0 and self._search_index_exists():
+                        log_info("Waiting for index deletion to complete...")
+                        time.sleep(retry_delay)
+                        retries -= 1
+                    log_info(f"Creating search index '{index_name}'.")
+                    # Get embedding dimension from embedder
+                    embedding_dim = getattr(self.embedder, "dimensions", 1536)
+                    search_index_model = SearchIndexModel(
+                        definition={
+                            "fields": [
+                                {
+                                    "type": "vector",
+                                    "numDimensions": embedding_dim,
+                                    "path": "embedding",
+                                    "similarity": self.distance_metric,
+                                },
+                            ]
                         },
-                    ]
-                },
-                name=index_name,
-                type="vectorSearch",
-            )
+                        name=index_name,
+                        type="vectorSearch",
+                    )
-            # Create the Atlas Search index
-            self._collection.create_search_index(model=search_index_model)
-            logger.info(f"Search index '{index_name}' created successfully.")
-        except errors.OperationFailure as e:
-            logger.error(f"Failed to create search index: {e}")
-            raise
+                    collection = self._get_collection()
+                    collection.create_search_index(model=search_index_model)
+                    if self.wait_until_index_ready_in_seconds:
+                        self._wait_for_index_ready()
+                    log_info(f"Search index '{index_name}' created successfully.")
+                    return
+                except errors.OperationFailure as e:
+                    if "Duplicate Index" in str(e) and attempt < max_retries - 1:
+                        logger.warning(f"Index already exists, retrying... (attempt {attempt + 1})")
+                        time.sleep(retry_delay * (attempt + 1))
+                        continue
+                    logger.error(f"Failed to create search index: {e}")
+                    raise
+                except Exception as e:
+                    logger.error(f"Unexpected error creating search index: {e}")
+                    raise
+    async def _create_search_index_async(self) -> None:
+        """Create the Atlas Search index asynchronously."""
+        index_name = self.search_index_name
+        max_retries = 3
+        retry_delay = 5
+        for attempt in range(max_retries):
+            try:
+                collection = await self._get_async_collection()
+                # Get embedding dimension from embedder
+                embedding_dim = getattr(self.embedder, "dimensions", 1536)
+                search_index_model = SearchIndexModel(
+                    definition={
+                        "fields": [
+                            {
+                                "type": "vector",
+                                "numDimensions": embedding_dim,
+                                "path": "embedding",
+                                "similarity": self.distance_metric,
+                            },
+                        ]
+                    },
+                    name=index_name,
+                    type="vectorSearch",
+                )
+                await collection.create_search_index(model=search_index_model)
+                log_info(f"Search index '{index_name}' created successfully.")
+                return
+            except Exception as e:
+                if attempt < max_retries - 1:
+                    await asyncio.sleep(retry_delay * (attempt + 1))
+                    continue
+                logger.error(f"Failed to create search index: {e}")
+                raise
     def _search_index_exists(self) -> bool:
         """Check if the search index exists."""
-        index_name = "vector_index_1"
-        try:
-            indexes = list(self._collection.list_search_indexes())
-            exists = any(index["name"] == index_name for index in indexes)
-            return exists
-        except Exception as e:
-            logger.error(f"Error checking search index existence: {e}")
-            return False
+        index_name = self.search_index_name
+        if self.cosmos_compatibility:
+            index_name = self.search_index_name or "vector_index_1"
+            try:
+                collection = self._get_collection()
+                indexes = collection.index_information()
+                for idx_name, idx_info in indexes.items():
+                    if idx_name == index_name:
+                        key_info = idx_info.get("key", [])
+                        for key_value_pair in key_info:
+                            # Ensure we have a tuple/list with exactly 2 elements
+                            if isinstance(key_value_pair, (tuple, list)) and len(key_value_pair) == 2:
+                                key, value = key_value_pair
+                                if key == "embedding" and value == "cosmosSearch":
+                                    log_debug(f"Found existing vector search index: {index_name}")
+                                    return True
+                log_debug(f"Vector search index '{index_name}' not found")
+                return False
+            except Exception as e:
+                logger.error(f"Error checking search index existence: {e}")
+                return False
+        else:
+            try:
+                collection = self._get_collection()
+                indexes = list(collection.list_search_indexes())  # type: ignore
+                exists = any(index["name"] == index_name for index in indexes)  # type: ignore
+                return exists
+            except Exception as e:
+                logger.error(f"Error checking search index existence: {e}")
+                return False
     def _wait_for_index_ready(self) -> None:
         """Wait until the Atlas Search index is ready."""
-        start_time = time.time()
-        index_name = "vector_index_1"
+        index_name = self.search_index_name
         while True:
             try:
                 if self._search_index_exists():
-                    logger.info(f"Search index '{index_name}' is ready.")
+                    log_info(f"Search index '{index_name}' is ready.")
                     break
             except Exception as e:
                 logger.error(f"Error checking index status: {e}")
-            if time.time() - start_time > self.wait_until_index_ready:  # type: ignore
                 raise TimeoutError("Timeout waiting for search index to become ready.")
             time.sleep(1)
+    async def _wait_for_index_ready_async(self) -> None:
+        """Wait until the Atlas Search index is ready asynchronously."""
+        start_time = time.time()
+        index_name = self.search_index_name
+        while True:
+            try:
+                collection = await self._get_async_collection()
+                indexes = await collection.list_search_indexes()
+                if any(index["name"] == index_name for index in indexes):
+                    log_info(f"Search index '{index_name}' is ready.")
+                    break
+            except Exception as e:
+                logger.error(f"Error checking index status asynchronously: {e}")
+                import traceback
+                logger.error(f"Traceback: {traceback.format_exc()}")
+            if time.time() - start_time > self.wait_until_index_ready_in_seconds:  # type: ignore
+                raise TimeoutError("Timeout waiting for search index to become ready.")
+            await asyncio.sleep(1)
     def collection_exists(self) -> bool:
         """Check if the collection exists in the database."""
-        return self.collection_name in self._db.list_collection_names()
+        if self._db is None:
+            self._get_client()
+        return self.collection_name in self._db.list_collection_names()  # type: ignore
     def create(self) -> None:
         """Create the MongoDB collection and indexes if they do not exist."""
         self._get_or_create_collection()
-    def doc_exists(self, document: Document) -> bool:
-        """Check if a document exists in the MongoDB collection based on its content."""
-        doc_id = md5(document.content.encode("utf-8")).hexdigest()
-        try:
-            exists = self._collection.find_one({"_id": doc_id}) is not None
-            logger.debug(f"Document {'exists' if exists else 'does not exist'}: {doc_id}")
-            return exists
-        except Exception as e:
-            logger.error(f"Error checking document existence: {e}")
-            return False
+    async def async_create(self) -> None:
+        """Create the MongoDB collection and indexes asynchronously."""
+        await self._get_async_collection()
+        if not await self.async_exists():
+            log_info(f"Creating collection '{self.collection_name}' asynchronously.")
+            await self._async_db.create_collection(self.collection_name)  # type: ignore
+            await self._create_search_index_async()
+            if self.wait_until_index_ready_in_seconds:
+                await self._wait_for_index_ready_async()
     def name_exists(self, name: str) -> bool:
         """Check if a document with a given name exists in the collection."""
         try:
-            exists = self._collection.find_one({"name": name}) is not None
-            logger.debug(f"Document with name '{name}' {'exists' if exists else 'does not exist'}")
+            collection = self._get_collection()
+            exists = collection.find_one({"name": name}) is not None
+            log_debug(f"Document with name '{name}' {'exists' if exists else 'does not exist'}")
             return exists
         except Exception as e:
             logger.error(f"Error checking document name existence: {e}")
             return False
     def id_exists(self, id: str) -> bool:
-        """Check if a document with a given ID exists in the collection."""
+        """Check if a document with the given ID exists in the collection.
+        Args:
+            id (str): The document ID to check.
+        Returns:
+            bool: True if the document exists, False otherwise.
+        """
         try:
-            exists = self._collection.find_one({"_id": id}) is not None
-            logger.debug(f"Document with ID '{id}' {'exists' if exists else 'does not exist'}")
+            collection = self._get_collection()
+            result = collection.find_one({"_id": id})
+            exists = result is not None
+            log_debug(f"Document with ID '{id}' {'exists' if exists else 'does not exist'}")
             return exists
         except Exception as e:
             logger.error(f"Error checking document ID existence: {e}")
             return False
-    def insert(self, documents: List[Document], filters: Optional[Dict[str, Any]] = None) -> None:
+    def content_hash_exists(self, content_hash: str) -> bool:
+        """Check if documents with the given content hash exist in the collection.
+        Args:
+            content_hash (str): The content hash to check.
+        Returns:
+            bool: True if documents with the content hash exist, False otherwise.
+        """
+        try:
+            collection = self._get_collection()
+            result = collection.find_one({"content_hash": content_hash})
+            exists = result is not None
+            log_debug(f"Document with content_hash '{content_hash}' {'exists' if exists else 'does not exist'}")
+            return exists
+        except Exception as e:
+            logger.error(f"Error checking content_hash existence: {e}")
+            return False
+    def insert(self, content_hash: str, documents: List[Document], filters: Optional[Dict[str, Any]] = None) -> None:
         """Insert documents into the MongoDB collection."""
-        logger.info(f"Inserting {len(documents)} documents")
+        log_debug(f"Inserting {len(documents)} documents")
+        collection = self._get_collection()
         prepared_docs = []
         for document in documents:
             try:
-                doc_data = self.prepare_doc(document)
+                document.embed(embedder=self.embedder)
+                if document.embedding is None:
+                    raise ValueError(f"Failed to generate embedding for document: {document.id}")
+                doc_data = self.prepare_doc(content_hash, document, filters)
                 prepared_docs.append(doc_data)
             except ValueError as e:
                 logger.error(f"Error preparing document '{document.name}': {e}")
         if prepared_docs:
             try:
-                self._collection.insert_many(prepared_docs, ordered=False)
-                logger.info(f"Inserted {len(prepared_docs)} documents successfully.")
-                # lets wait for 5 minutes.... just in case
-                # feel free to 'optimize'... :)
-                if self.wait_after_insert and self.wait_after_insert > 0:
-                    time.sleep(self.wait_after_insert)
+                collection.insert_many(prepared_docs, ordered=False)
+                log_info(f"Inserted {len(prepared_docs)} documents successfully.")
+                if self.wait_after_insert_in_seconds and self.wait_after_insert_in_seconds > 0:
+                    time.sleep(self.wait_after_insert_in_seconds)
             except errors.BulkWriteError as e:
                 logger.warning(f"Bulk write error while inserting documents: {e.details}")
             except Exception as e:
                 logger.error(f"Error inserting documents: {e}")
-    def upsert(self, documents: List[Document], filters: Optional[Dict[str, Any]] = None) -> None:
+    def upsert(self, content_hash: str, documents: List[Document], filters: Optional[Dict[str, Any]] = None) -> None:
         """Upsert documents into the MongoDB collection."""
-        logger.info(f"Upserting {len(documents)} documents")
+        log_info(f"Upserting {len(documents)} documents")
+        collection = self._get_collection()
         for document in documents:
             try:
-                doc_data = self.prepare_doc(document)
-                self._collection.update_one(
+                document.embed(embedder=self.embedder)
+                if document.embedding is None:
+                    raise ValueError(f"Failed to generate embedding for document: {document.id}")
+                doc_data = self.prepare_doc(content_hash, document, filters)
+                collection.update_one(
                     {"_id": doc_data["_id"]},
                     {"$set": doc_data},
                     upsert=True,
                 )
-                logger.info(f"Upserted document: {doc_data['_id']}")
+                log_info(f"Upserted document: {doc_data['_id']}")
             except Exception as e:
                 logger.error(f"Error upserting document '{document.name}': {e}")
@@ -246,55 +571,142 @@ class MongoDBVector(VectorDb):
         """Indicate that upsert functionality is available."""
         return True
-    def search(self, query: str, limit: int = 5, filters: Optional[Dict[str, Any]] = None) -> List[Document]:
-        """Search the MongoDB collection for documents relevant to the query."""
+    def search(
+        self,
+        query: str,
+        limit: int = 5,
+        filters: Optional[Union[Dict[str, Any], List[FilterExpr]]] = None,
+        min_score: float = 0.0,
+    ) -> List[Document]:
+        """Search for documents using vector similarity."""
+        if isinstance(filters, List):
+            log_warning("Filters Expressions are not supported in MongoDB. No filters will be applied.")
+            filters = None
+        if self.search_type == SearchType.hybrid:
+            return self.hybrid_search(query, limit=limit, filters=filters)
         query_embedding = self.embedder.get_embedding(query)
         if query_embedding is None:
             logger.error(f"Failed to generate embedding for query: {query}")
             return []
-        try:
-            pipeline = [
-                {
-                    "$vectorSearch": {
-                        "index": "vector_index_1",
-                        "limit": 10,
-                        "numCandidates": 10,
-                        "queryVector": self.embedder.get_embedding(query),
-                        "path": "embedding",
+        if self.cosmos_compatibility:
+            # Azure Cosmos DB Mongo Vcore compatibility mode
+            try:
+                collection = self._get_collection()
+                # Construct the search pipeline
+                search_stage = {
+                    "$search": {
+                        "cosmosSearch": {"vector": query_embedding, "path": "embedding", "k": limit, "nProbes": 2},
+                        "returnStoredSource": True,
                     }
-                },
-                {"$set": {"score": {"$meta": "vectorSearchScore"}}},
-            ]
-            pipeline.append({"$project": {"embedding": 0}})
-            agg = list(self._collection.aggregate(pipeline))  # type: ignore
-            docs = []
-            for doc in agg:
-                docs.append(
+                }
+                pipeline = [
+                    search_stage,
+                    {
+                        "$project": {
+                            "similarityScore": {"$meta": "searchScore"},
+                            "_id": 1,
+                            "name": 1,
+                            "content": 1,
+                            "meta_data": 1,
+                        }
+                    },
+                ]
+                results = list(collection.aggregate(pipeline))
+                docs = [
                     Document(
                         id=str(doc["_id"]),
                         name=doc.get("name"),
                         content=doc["content"],
-                        meta_data=doc.get("meta_data", {}),
+                        meta_data={**doc.get("meta_data", {}), "score": doc.get("similarityScore", 0.0)},
+                        content_id=doc.get("content_id"),
                     )
-                )
-            logger.info(f"Search completed. Found {len(docs)} documents.")
-            return docs
-        except Exception as e:
-            logger.error(f"Error during search: {e}")
-            return []
+                    for doc in results
+                ]
+                log_info(f"Search completed. Found {len(docs)} documents.")
+                return docs
+            except Exception as e:
+                logger.error(f"Error during vector search: {e}")
+                return []
+        else:
+            # MongoDB Atlas Search
+            try:
+                collection = self._get_collection()
+                pipeline = [
+                    {
+                        "$vectorSearch": {
+                            "index": self.search_index_name,
+                            "limit": limit,
+                            "numCandidates": min(limit * 4, 100),
+                            "queryVector": query_embedding,
+                            "path": "embedding",
+                        }
+                    },
+                    {"$set": {"score": {"$meta": "vectorSearchScore"}}},
+                ]
+                match_filters = {}
+                if min_score > 0:
+                    match_filters["score"] = {"$gte": min_score}
+                # Handle filters if provided
+                if filters:
+                    # MongoDB uses dot notation for nested fields, so we need to prepend meta_data. if needed
+                    mongo_filters = {}
+                    for key, value in filters.items():
+                        # If the key doesn't already include a dot notation for meta_data
+                        if not key.startswith("meta_data.") and "." not in key:
+                            mongo_filters[f"meta_data.{key}"] = value
+                        else:
+                            mongo_filters[key] = value
+                    match_filters.update(mongo_filters)
+                if match_filters:
+                    pipeline.append({"$match": match_filters})  # type: ignore
+                pipeline.append({"$project": {"embedding": 0}})
+                results = list(collection.aggregate(pipeline))  # type: ignore
+                docs = []
+                for doc in results:
+                    # Convert ObjectIds to strings before creating Document
+                    clean_doc = self._convert_objectids_to_strings(doc)
+                    document = Document(
+                        id=str(clean_doc["_id"]),
+                        name=clean_doc.get("name"),
+                        content=clean_doc["content"],
+                        meta_data={**clean_doc.get("meta_data", {}), "score": clean_doc.get("score", 0.0)},
+                        content_id=clean_doc.get("content_id"),
+                    )
+                    docs.append(document)
+                log_info(f"Search completed. Found {len(docs)} documents.")
+                return docs
+            except Exception as e:
+                logger.error(f"Error during search: {e}")
+                raise
     def vector_search(self, query: str, limit: int = 5) -> List[Document]:
         """Perform a vector-based search."""
-        logger.debug("Performing vector search.")
+        log_debug("Performing vector search.")
         return self.search(query, limit=limit)
     def keyword_search(self, query: str, limit: int = 5) -> List[Document]:
         """Perform a keyword-based search."""
         try:
-            cursor = self._collection.find(
+            collection = self._get_collection()
+            cursor = collection.find(
                 {"content": {"$regex": query, "$options": "i"}},
-                {"_id": 1, "name": 1, "content": 1, "meta_data": 1},
+                {"_id": 1, "name": 1, "content": 1, "meta_data": 1, "content_id": 1},
             ).limit(limit)
             results = [
                 Document(
@@ -302,42 +714,244 @@ class MongoDBVector(VectorDb):
                     name=doc.get("name"),
                     content=doc["content"],
                     meta_data=doc.get("meta_data", {}),
+                    content_id=doc.get("content_id"),
                 )
                 for doc in cursor
             ]
-            logger.debug(f"Keyword search completed. Found {len(results)} documents.")
+            log_debug(f"Keyword search completed. Found {len(results)} documents.")
             return results
         except Exception as e:
             logger.error(f"Error during keyword search: {e}")
             return []
-    def hybrid_search(self, query: str, limit: int = 5) -> List[Document]:
-        """Perform a hybrid search combining vector and keyword-based searches."""
-        logger.debug("Performing hybrid search is not yet implemented.")
-        return []
+    def hybrid_search(
+        self,
+        query: str,
+        limit: int = 5,
+        filters: Optional[Dict[str, Any]] = None,
+    ) -> List[Document]:
+        """
+        Perform a hybrid search combining vector and keyword-based searches using Reciprocal Rank Fusion.
+        Weights for vector and keyword search are configured at the instance level (hybrid_vector_weight, hybrid_keyword_weight).
+        The rank constant k is used in the RRF formula `1 / (rank + k)` to smooth scores.
+        Reference: https://www.mongodb.com/docs/atlas/atlas-vector-search/tutorials/reciprocal-rank-fusion
+        """
+        if self.cosmos_compatibility:
+            log_warning("Hybrid search is not implemented for Cosmos DB compatibility mode. Returning empty list.")
+            return []
+        log_debug(f"Performing hybrid search for query: '{query}' with limit: {limit}")
+        query_embedding = self.embedder.get_embedding(query)
+        if query_embedding is None:
+            logger.error(f"Failed to generate embedding for query: {query}")
+            return []
+        collection = self._get_collection()
+        k = self.hybrid_rank_constant
+        mongo_filters = {}
+        if filters:
+            for key, value in filters.items():
+                # If the key doesn't already include a dot notation for meta_data
+                if not key.startswith("meta_data.") and "." not in key:
+                    mongo_filters[f"meta_data.{key}"] = value
+                else:
+                    mongo_filters[key] = value
+        pipeline = [
+            # Vector Search Branch
+            {
+                "$vectorSearch": {
+                    "index": self.search_index_name,
+                    "path": "embedding",
+                    "queryVector": query_embedding,
+                    "numCandidates": min(limit * 10, 200),
+                    "limit": limit * 2,
+                }
+            },
+            {"$group": {"_id": None, "docs": {"$push": "$$ROOT"}}},
+            {"$unwind": {"path": "$docs", "includeArrayIndex": "rank"}},
+            {
+                "$addFields": {
+                    "_id": "$docs._id",
+                    "name": "$docs.name",
+                    "content": "$docs.content",
+                    "meta_data": "$docs.meta_data",
+                    "content_id": "$docs.content_id",
+                    "vs_score": {
+                        "$divide": [
+                            self.hybrid_vector_weight,
+                            {"$add": ["$rank", k, 1]},
+                        ]
+                    },
+                    "fts_score": 0.0,  # Ensure fts_score exists with a default value
+                }
+            },
+            {
+                "$project": {
+                    "_id": 1,
+                    "name": 1,
+                    "content": 1,
+                    "meta_data": 1,
+                    "content_id": 1,
+                    "vs_score": 1,
+                    # Now fts_score is included with its value (0.0 here)
+                    "fts_score": 1,
+                }
+            },
+            # Union with Keyword Search Branch
+            {
+                "$unionWith": {
+                    "coll": self.collection_name,
+                    "pipeline": [
+                        {
+                            "$search": {
+                                "index": "default",
+                                "text": {"query": query, "path": "content"},
+                            }
+                        },
+                        {"$limit": limit * 2},
+                        {"$group": {"_id": None, "docs": {"$push": "$$ROOT"}}},
+                        {"$unwind": {"path": "$docs", "includeArrayIndex": "rank"}},
+                        {
+                            "$addFields": {
+                                "_id": "$docs._id",
+                                "name": "$docs.name",
+                                "content": "$docs.content",
+                                "meta_data": "$docs.meta_data",
+                                "content_id": "$docs.content_id",
+                                "vs_score": 0.0,
+                                "fts_score": {
+                                    "$divide": [
+                                        self.hybrid_keyword_weight,
+                                        {"$add": ["$rank", k, 1]},
+                                    ]
+                                },
+                            }
+                        },
+                        {
+                            "$project": {
+                                "_id": 1,
+                                "name": 1,
+                                "content": 1,
+                                "meta_data": 1,
+                                "content_id": 1,
+                                "vs_score": 1,
+                                "fts_score": 1,
+                            }
+                        },
+                    ],
+                }
+            },
+            # Combine and Rank
+            {
+                "$group": {
+                    "_id": "$_id",
+                    "name": {"$first": "$name"},
+                    "content": {"$first": "$content"},
+                    "meta_data": {"$first": "$meta_data"},
+                    "content_id": {"$first": "$content_id"},
+                    "vs_score": {"$sum": "$vs_score"},
+                    "fts_score": {"$sum": "$fts_score"},
+                }
+            },
+            {
+                "$project": {
+                    "_id": 1,
+                    "name": 1,
+                    "content": 1,
+                    "meta_data": 1,
+                    "content_id": 1,
+                    "score": {"$add": ["$vs_score", "$fts_score"]},
+                }
+            },
+            {"$sort": {"score": -1}},
+            {"$limit": limit},
+        ]
+        # Apply filters if provided
+        if mongo_filters:
+            pipeline.append({"$match": mongo_filters})
+        try:
+            from typing import Mapping, Sequence, cast
+            results = list(collection.aggregate(cast(Sequence[Mapping[str, Any]], pipeline)))
+            docs = []
+            for doc in results:
+                # Convert ObjectIds to strings before creating Document
+                clean_doc = self._convert_objectids_to_strings(doc)
+                document = Document(
+                    id=str(clean_doc["_id"]),
+                    name=clean_doc.get("name"),
+                    content=clean_doc["content"],
+                    meta_data={**clean_doc.get("meta_data", {}), "score": clean_doc.get("score", 0.0)},
+                    content_id=clean_doc.get("content_id"),
+                )
+                docs.append(document)
+            log_info(f"Hybrid search completed. Found {len(docs)} documents.")
+            return docs
+        except errors.OperationFailure as e:
+            logger.error(
+                f"Error during hybrid search, potentially due to missing or misconfigured Atlas Search index for text search: {e}"
+            )
+            logger.error(f"Details: {e.details}")
+            return []
+        except Exception as e:
+            logger.error(f"Error during hybrid search: {e}")
+            import traceback
+            logger.error(f"Traceback: {traceback.format_exc()}")
+            return []
     def drop(self) -> None:
-        """Drop the collection from the database."""
+        """Drop the collection and clean up indexes."""
+        collection = self._get_collection()
+        index_name = self.search_index_name or "vector_index_1"
         if self.exists():
-            try:
-                logger.debug(f"Dropping collection '{self.collection_name}'.")
-                self._collection.drop()
-                logger.info(f"Collection '{self.collection_name}' dropped successfully.")
-                # Add delay to allow lucene index to be deleted
-                time.sleep(50)
-                """
-                pymongo.errors.OperationFailure: Duplicate Index, full error: {'ok': 0.0, 'errmsg': 'Duplicate Index', 'code': 68, 'codeName': 'IndexAlreadyExists', '$clusterTime': {'clusterTime': Timestamp(1733205025, 28), 'signature': {'hash': b'', 'keyId': 7394931654956941332}}, 'operationTime': Timestamp(1733205025, 28)}
-                """
-            except Exception as e:
-                logger.error(f"Error dropping collection '{self.collection_name}': {e}")
-                raise
-        else:
-            logger.info(f"Collection '{self.collection_name}' does not exist.")
+            if self.cosmos_compatibility:
+                # Cosmos DB specific handling
+                try:
+                    # Drop the index if it exists
+                    if self._search_index_exists():
+                        log_info(f"Dropping index '{index_name}'")
+                        try:
+                            collection.drop_index(index_name)
+                        except Exception as e:
+                            logger.error(f"Error dropping index: {e}")
+                except Exception as e:
+                    logger.error(f"Error dropping collection: {e}")
+                    raise
+            else:
+                # MongoDB Atlas specific handling
+                try:
+                    if self._search_index_exists():
+                        collection.drop_search_index(index_name)
+                        time.sleep(2)
+                except Exception as e:
+                    logger.error(f"Error dropping collection: {e}")
+                    raise
+        # Drop the collection
+        collection.drop()
+        time.sleep(2)
+        log_info(f"Collection '{self.collection_name}' dropped successfully")
     def exists(self) -> bool:
         """Check if the MongoDB collection exists."""
         exists = self.collection_exists()
-        logger.debug(f"Collection '{self.collection_name}' existence: {exists}")
+        log_debug(f"Collection '{self.collection_name}' existence: {exists}")
         return exists
     def optimize(self) -> None:
@@ -345,24 +959,31 @@ class MongoDBVector(VectorDb):
         pass
     def delete(self) -> bool:
-        """Delete the entire collection from the database."""
+        """Delete all documents from the collection."""
         if self.exists():
             try:
-                self._collection.drop()
-                logger.info(f"Collection '{self.collection_name}' deleted successfully.")
-                return True
+                collection = self._get_collection()
+                result = collection.delete_many({})
+                # Consider any deletion (even 0) as success
+                success = result.deleted_count >= 0
+                log_info(f"Deleted {result.deleted_count} documents from collection.")
+                return success
             except Exception as e:
-                logger.error(f"Error deleting collection '{self.collection_name}': {e}")
+                logger.error(f"Error deleting documents: {e}")
                 return False
-        else:
-            logger.warning(f"Collection '{self.collection_name}' does not exist.")
-            return False
+        # Return True if collection doesn't exist (nothing to delete)
+        return True
-    def prepare_doc(self, document: Document) -> Dict[str, Any]:
+    def prepare_doc(
+        self, content_hash: str, document: Document, filters: Optional[Dict[str, Any]] = None
+    ) -> Dict[str, Any]:
         """Prepare a document for insertion or upsertion into MongoDB."""
-        document.embed(embedder=self.embedder)
-        if document.embedding is None:
-            raise ValueError(f"Failed to generate embedding for document: {document.id}")
+        # Add filters to document metadata if provided
+        if filters:
+            meta_data = document.meta_data.copy() if document.meta_data else {}
+            meta_data.update(filters)
+            document.meta_data = meta_data
         cleaned_content = document.content.replace("\x00", "\ufffd")
         doc_id = md5(cleaned_content.encode("utf-8")).hexdigest()
@@ -372,16 +993,398 @@ class MongoDBVector(VectorDb):
             "content": cleaned_content,
             "meta_data": document.meta_data,
             "embedding": document.embedding,
+            "content_id": document.content_id,
+            "content_hash": content_hash,
         }
-        logger.debug(f"Prepared document: {doc_data['_id']}")
+        log_debug(f"Prepared document: {doc_data['_id']}")
         return doc_data
     def get_count(self) -> int:
         """Get the count of documents in the MongoDB collection."""
         try:
-            count = self._collection.count_documents({})
-            logger.debug(f"Collection '{self.collection_name}' has {count} documents.")
+            collection = self._get_collection()
+            count = collection.count_documents({})
+            log_debug(f"Collection '{self.collection_name}' has {count} documents.")
             return count
         except Exception as e:
             logger.error(f"Error getting document count: {e}")
             return 0
+    async def async_insert(
+        self, content_hash: str, documents: List[Document], filters: Optional[Dict[str, Any]] = None
+    ) -> None:
+        """Insert documents asynchronously."""
+        log_debug(f"Inserting {len(documents)} documents asynchronously")
+        collection = await self._get_async_collection()
+        if self.embedder.enable_batch and hasattr(self.embedder, "async_get_embeddings_batch_and_usage"):
+            # Use batch embedding when enabled and supported
+            try:
+                # Extract content from all documents
+                doc_contents = [doc.content for doc in documents]
+                # Get batch embeddings and usage
+                embeddings, usages = await self.embedder.async_get_embeddings_batch_and_usage(doc_contents)
+                # Process documents with pre-computed embeddings
+                for j, doc in enumerate(documents):
+                    try:
+                        if j < len(embeddings):
+                            doc.embedding = embeddings[j]
+                            doc.usage = usages[j] if j < len(usages) else None
+                    except Exception as e:
+                        logger.error(f"Error assigning batch embedding to document '{doc.name}': {e}")
+            except Exception as e:
+                # Check if this is a rate limit error - don't fall back as it would make things worse
+                error_str = str(e).lower()
+                is_rate_limit = any(
+                    phrase in error_str
+                    for phrase in ["rate limit", "too many requests", "429", "trial key", "api calls / minute"]
+                )
+                if is_rate_limit:
+                    logger.error(f"Rate limit detected during batch embedding. {e}")
+                    raise e
+                else:
+                    logger.warning(f"Async batch embedding failed, falling back to individual embeddings: {e}")
+                    # Fall back to individual embedding
+                    embed_tasks = [doc.async_embed(embedder=self.embedder) for doc in documents]
+                    await asyncio.gather(*embed_tasks, return_exceptions=True)
+        else:
+            # Use individual embedding
+            embed_tasks = [document.async_embed(embedder=self.embedder) for document in documents]
+            await asyncio.gather(*embed_tasks, return_exceptions=True)
+        prepared_docs = []
+        for document in documents:
+            try:
+                doc_data = self.prepare_doc(content_hash, document, filters)
+                prepared_docs.append(doc_data)
+            except ValueError as e:
+                logger.error(f"Error preparing document '{document.name}': {e}")
+        if prepared_docs:
+            try:
+                await collection.insert_many(prepared_docs, ordered=False)
+                log_info(f"Inserted {len(prepared_docs)} documents successfully.")
+                if self.wait_after_insert_in_seconds and self.wait_after_insert_in_seconds > 0:
+                    await asyncio.sleep(self.wait_after_insert_in_seconds)
+            except errors.BulkWriteError as e:
+                logger.warning(f"Bulk write error while inserting documents: {e.details}")
+            except Exception as e:
+                logger.error(f"Error inserting documents asynchronously: {e}")
+    async def async_upsert(
+        self, content_hash: str, documents: List[Document], filters: Optional[Dict[str, Any]] = None
+    ) -> None:
+        """Upsert documents asynchronously."""
+        log_info(f"Upserting {len(documents)} documents asynchronously")
+        collection = await self._get_async_collection()
+        if self.embedder.enable_batch and hasattr(self.embedder, "async_get_embeddings_batch_and_usage"):
+            # Use batch embedding when enabled and supported
+            try:
+                # Extract content from all documents
+                doc_contents = [doc.content for doc in documents]
+                # Get batch embeddings and usage
+                embeddings, usages = await self.embedder.async_get_embeddings_batch_and_usage(doc_contents)
+                # Process documents with pre-computed embeddings
+                for j, doc in enumerate(documents):
+                    try:
+                        if j < len(embeddings):
+                            doc.embedding = embeddings[j]
+                            doc.usage = usages[j] if j < len(usages) else None
+                    except Exception as e:
+                        logger.error(f"Error assigning batch embedding to document '{doc.name}': {e}")
+            except Exception as e:
+                # Check if this is a rate limit error - don't fall back as it would make things worse
+                error_str = str(e).lower()
+                is_rate_limit = any(
+                    phrase in error_str
+                    for phrase in ["rate limit", "too many requests", "429", "trial key", "api calls / minute"]
+                )
+                if is_rate_limit:
+                    logger.error(f"Rate limit detected during batch embedding. {e}")
+                    raise e
+                else:
+                    logger.warning(f"Async batch embedding failed, falling back to individual embeddings: {e}")
+                    # Fall back to individual embedding
+                    embed_tasks = [doc.async_embed(embedder=self.embedder) for doc in documents]
+                    await asyncio.gather(*embed_tasks, return_exceptions=True)
+        else:
+            # Use individual embedding
+            embed_tasks = [document.async_embed(embedder=self.embedder) for document in documents]
+            await asyncio.gather(*embed_tasks, return_exceptions=True)
+        for document in documents:
+            try:
+                doc_data = self.prepare_doc(content_hash, document, filters)
+                await collection.update_one(
+                    {"_id": doc_data["_id"]},
+                    {"$set": doc_data},
+                    upsert=True,
+                )
+                log_info(f"Upserted document: {doc_data['_id']}")
+            except Exception as e:
+                logger.error(f"Error upserting document '{document.name}' asynchronously: {e}")
+    async def async_search(
+        self, query: str, limit: int = 5, filters: Optional[Union[Dict[str, Any], List[FilterExpr]]] = None
+    ) -> List[Document]:
+        """Search for documents asynchronously."""
+        if isinstance(filters, List):
+            log_warning("Filters Expressions are not supported in MongoDB. No filters will be applied.")
+            filters = None
+        query_embedding = self.embedder.get_embedding(query)
+        if query_embedding is None:
+            logger.error(f"Failed to generate embedding for query: {query}")
+            return []
+        try:
+            collection = await self._get_async_collection()
+            pipeline = [
+                {
+                    "$vectorSearch": {
+                        "index": self.search_index_name,
+                        "limit": limit,
+                        "numCandidates": min(limit * 4, 100),
+                        "queryVector": query_embedding,
+                        "path": "embedding",
+                    }
+                },
+                {"$set": {"score": {"$meta": "vectorSearchScore"}}},
+            ]
+            # Handle filters if provided
+            if filters:
+                # MongoDB uses dot notation for nested fields, so we need to prepend meta_data. if needed
+                mongo_filters = {}
+                for key, value in filters.items():
+                    # If the key doesn't already include a dot notation for meta_data
+                    if not key.startswith("meta_data.") and "." not in key:
+                        mongo_filters[f"meta_data.{key}"] = value
+                    else:
+                        mongo_filters[key] = value
+                pipeline.append({"$match": mongo_filters})
+            pipeline.append({"$project": {"embedding": 0}})
+            # With AsyncMongoClient, aggregate() returns a coroutine that resolves to a cursor
+            # We need to await it first to get the cursor
+            cursor = await collection.aggregate(pipeline)
+            # Now we can iterate over the cursor to get results
+            results = []
+            async for doc in cursor:
+                results.append(doc)
+                if len(results) >= limit:
+                    break
+            docs = [
+                Document(
+                    id=str(doc["_id"]),
+                    name=doc.get("name"),
+                    content=doc["content"],
+                    meta_data={**doc.get("meta_data", {}), "score": doc.get("score", 0.0)},
+                    content_id=doc.get("content_id"),
+                )
+                for doc in results
+            ]
+            log_info(f"Async search completed. Found {len(docs)} documents.")
+            return docs
+        except Exception as e:
+            logger.error(f"Error during async search: {e}")
+            # Include traceback for better debugging
+            import traceback
+            logger.error(f"Traceback: {traceback.format_exc()}")
+            raise
+    async def async_drop(self) -> None:
+        """Drop the collection asynchronously."""
+        if await self.async_exists():
+            try:
+                collection = await self._get_async_collection()
+                await collection.drop()
+                log_info(f"Collection '{self.collection_name}' dropped asynchronously")
+            except Exception as e:
+                logger.error(f"Error dropping collection asynchronously: {e}")
+                raise
+    async def async_exists(self) -> bool:
+        """Check if the collection exists asynchronously."""
+        try:
+            client = await self._get_async_client()
+            collection_names = await client[self.database].list_collection_names()
+            exists = self.collection_name in collection_names
+            log_debug(f"Collection '{self.collection_name}' existence (async): {exists}")
+            return exists
+        except Exception as e:
+            logger.error(f"Error checking collection existence asynchronously: {e}")
+            return False
+    async def async_name_exists(self, name: str) -> bool:
+        """Check if a document with a given name exists asynchronously."""
+        try:
+            collection = await self._get_async_collection()
+            exists = await collection.find_one({"name": name}) is not None
+            log_debug(f"Document with name '{name}' {'exists' if exists else 'does not exist'} (async)")
+            return exists
+        except Exception as e:
+            logger.error(f"Error checking document name existence asynchronously: {e}")
+            return False
+    def _get_cosmos_similarity_metric(self) -> str:
+        """Convert MongoDB distance metric to Cosmos DB format."""
+        # Cosmos DB supports: COS (cosine), L2 (Euclidean), IP (inner product)
+        metric_mapping = {"cosine": "COS", "euclidean": "L2", "dotProduct": "IP"}
+        return metric_mapping.get(self.distance_metric, "COS")
+    def _convert_objectids_to_strings(self, obj: Any) -> Any:
+        """
+        Recursively convert MongoDB ObjectIds to strings in any data structure.
+        Args:
+            obj: Any object that might contain ObjectIds
+        Returns:
+            The same object with ObjectIds converted to strings
+        """
+        if isinstance(obj, ObjectId):
+            return str(obj)
+        elif isinstance(obj, dict):
+            return {key: self._convert_objectids_to_strings(value) for key, value in obj.items()}
+        elif isinstance(obj, list):
+            return [self._convert_objectids_to_strings(item) for item in obj]
+        elif isinstance(obj, tuple):
+            return tuple(self._convert_objectids_to_strings(item) for item in obj)
+        else:
+            return obj
+    def delete_by_id(self, id: str) -> bool:
+        """Delete document by ID."""
+        try:
+            collection = self._get_collection()
+            result = collection.delete_one({"_id": id})
+            if result.deleted_count > 0:
+                log_info(
+                    f"Deleted {result.deleted_count} document(s) with ID '{id}' from collection '{self.collection_name}'."
+                )
+                return True
+            else:
+                log_info(f"No documents found with ID '{id}' to delete.")
+                return True
+        except Exception as e:
+            logger.error(f"Error deleting document with ID '{id}': {e}")
+            return False
+    def delete_by_name(self, name: str) -> bool:
+        """Delete documents by name."""
+        try:
+            collection = self._get_collection()
+            result = collection.delete_many({"name": name})
+            log_info(
+                f"Deleted {result.deleted_count} document(s) with name '{name}' from collection '{self.collection_name}'."
+            )
+            return True
+        except Exception as e:
+            logger.error(f"Error deleting documents with name '{name}': {e}")
+            return False
+    def delete_by_metadata(self, metadata: Dict[str, Any]) -> bool:
+        """Delete documents by metadata."""
+        try:
+            collection = self._get_collection()
+            # Build MongoDB query for metadata matching
+            mongo_filters = {}
+            for key, value in metadata.items():
+                # Use dot notation for nested metadata fields
+                mongo_filters[f"meta_data.{key}"] = value
+            result = collection.delete_many(mongo_filters)
+            log_info(
+                f"Deleted {result.deleted_count} document(s) with metadata '{metadata}' from collection '{self.collection_name}'."
+            )
+            return True
+        except Exception as e:
+            logger.error(f"Error deleting documents with metadata '{metadata}': {e}")
+            return False
+    def _delete_by_content_hash(self, content_hash: str) -> bool:
+        """Delete documents by content hash.
+        Args:
+            content_hash (str): The content hash to delete.
+        Returns:
+            bool: True if documents were deleted successfully, False otherwise.
+        """
+        try:
+            collection = self._get_collection()
+            result = collection.delete_many({"content_hash": content_hash})
+            log_info(f"Deleted {result.deleted_count} documents with content_hash '{content_hash}'")
+            return True
+        except Exception as e:
+            logger.error(f"Error deleting documents by content_hash '{content_hash}': {e}")
+            return False
+    def delete_by_content_id(self, content_id: str) -> bool:
+        """Delete documents by content ID."""
+        try:
+            collection = self._get_collection()
+            result = collection.delete_many({"content_id": content_id})
+            log_info(
+                f"Deleted {result.deleted_count} document(s) with content_id '{content_id}' from collection '{self.collection_name}'."
+            )
+            return True
+        except Exception as e:
+            logger.error(f"Error deleting documents with content_id '{content_id}': {e}")
+            return False
+    def update_metadata(self, content_id: str, metadata: Dict[str, Any]) -> None:
+        """
+        Update the metadata for documents with the given content_id.
+        Args:
+            content_id (str): The content ID to update
+            metadata (Dict[str, Any]): The metadata to update
+        """
+        try:
+            collection = self._client[self.database][self.collection_name]  # type: ignore
+            # Create query filter for content_id
+            filter_query = {"content_id": content_id}
+            update_operations = {}
+            for key, value in metadata.items():
+                update_operations[f"meta_data.{key}"] = value
+                update_operations[f"filters.{key}"] = value
+            # Update documents
+            result = collection.update_many(filter_query, {"$set": update_operations})
+            if result.matched_count == 0:
+                logger.debug(f"No documents found with content_id: {content_id}")
+            else:
+                logger.debug(f"Updated metadata for {result.matched_count} documents with content_id: {content_id}")
+        except Exception as e:
+            logger.error(f"Error updating metadata for content_id '{content_id}': {e}")
+            raise
+    def get_supported_search_types(self) -> List[str]:
+        """Get the supported search types for this vector database."""
+        return [SearchType.vector, SearchType.hybrid]

agno 0.1.2__py3-none-any.whl → 2.3.13__py3-none-any.whl

agno 0.1.2py3-none-any.whl → 2.3.13py3-none-any.whl