agno 2.2.13__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- agno/__init__.py +8 -0
- agno/agent/__init__.py +51 -0
- agno/agent/agent.py +10405 -0
- agno/api/__init__.py +0 -0
- agno/api/agent.py +28 -0
- agno/api/api.py +40 -0
- agno/api/evals.py +22 -0
- agno/api/os.py +17 -0
- agno/api/routes.py +13 -0
- agno/api/schemas/__init__.py +9 -0
- agno/api/schemas/agent.py +16 -0
- agno/api/schemas/evals.py +16 -0
- agno/api/schemas/os.py +14 -0
- agno/api/schemas/response.py +6 -0
- agno/api/schemas/team.py +16 -0
- agno/api/schemas/utils.py +21 -0
- agno/api/schemas/workflows.py +16 -0
- agno/api/settings.py +53 -0
- agno/api/team.py +30 -0
- agno/api/workflow.py +28 -0
- agno/cloud/aws/base.py +214 -0
- agno/cloud/aws/s3/__init__.py +2 -0
- agno/cloud/aws/s3/api_client.py +43 -0
- agno/cloud/aws/s3/bucket.py +195 -0
- agno/cloud/aws/s3/object.py +57 -0
- agno/culture/__init__.py +3 -0
- agno/culture/manager.py +956 -0
- agno/db/__init__.py +24 -0
- agno/db/async_postgres/__init__.py +3 -0
- agno/db/base.py +598 -0
- agno/db/dynamo/__init__.py +3 -0
- agno/db/dynamo/dynamo.py +2042 -0
- agno/db/dynamo/schemas.py +314 -0
- agno/db/dynamo/utils.py +743 -0
- agno/db/firestore/__init__.py +3 -0
- agno/db/firestore/firestore.py +1795 -0
- agno/db/firestore/schemas.py +140 -0
- agno/db/firestore/utils.py +376 -0
- agno/db/gcs_json/__init__.py +3 -0
- agno/db/gcs_json/gcs_json_db.py +1335 -0
- agno/db/gcs_json/utils.py +228 -0
- agno/db/in_memory/__init__.py +3 -0
- agno/db/in_memory/in_memory_db.py +1160 -0
- agno/db/in_memory/utils.py +230 -0
- agno/db/json/__init__.py +3 -0
- agno/db/json/json_db.py +1328 -0
- agno/db/json/utils.py +230 -0
- agno/db/migrations/__init__.py +0 -0
- agno/db/migrations/v1_to_v2.py +635 -0
- agno/db/mongo/__init__.py +17 -0
- agno/db/mongo/async_mongo.py +2026 -0
- agno/db/mongo/mongo.py +1982 -0
- agno/db/mongo/schemas.py +87 -0
- agno/db/mongo/utils.py +259 -0
- agno/db/mysql/__init__.py +3 -0
- agno/db/mysql/mysql.py +2308 -0
- agno/db/mysql/schemas.py +138 -0
- agno/db/mysql/utils.py +355 -0
- agno/db/postgres/__init__.py +4 -0
- agno/db/postgres/async_postgres.py +1927 -0
- agno/db/postgres/postgres.py +2260 -0
- agno/db/postgres/schemas.py +139 -0
- agno/db/postgres/utils.py +442 -0
- agno/db/redis/__init__.py +3 -0
- agno/db/redis/redis.py +1660 -0
- agno/db/redis/schemas.py +123 -0
- agno/db/redis/utils.py +346 -0
- agno/db/schemas/__init__.py +4 -0
- agno/db/schemas/culture.py +120 -0
- agno/db/schemas/evals.py +33 -0
- agno/db/schemas/knowledge.py +40 -0
- agno/db/schemas/memory.py +46 -0
- agno/db/schemas/metrics.py +0 -0
- agno/db/singlestore/__init__.py +3 -0
- agno/db/singlestore/schemas.py +130 -0
- agno/db/singlestore/singlestore.py +2272 -0
- agno/db/singlestore/utils.py +384 -0
- agno/db/sqlite/__init__.py +4 -0
- agno/db/sqlite/async_sqlite.py +2293 -0
- agno/db/sqlite/schemas.py +133 -0
- agno/db/sqlite/sqlite.py +2288 -0
- agno/db/sqlite/utils.py +431 -0
- agno/db/surrealdb/__init__.py +3 -0
- agno/db/surrealdb/metrics.py +292 -0
- agno/db/surrealdb/models.py +309 -0
- agno/db/surrealdb/queries.py +71 -0
- agno/db/surrealdb/surrealdb.py +1353 -0
- agno/db/surrealdb/utils.py +147 -0
- agno/db/utils.py +116 -0
- agno/debug.py +18 -0
- agno/eval/__init__.py +14 -0
- agno/eval/accuracy.py +834 -0
- agno/eval/performance.py +773 -0
- agno/eval/reliability.py +306 -0
- agno/eval/utils.py +119 -0
- agno/exceptions.py +161 -0
- agno/filters.py +354 -0
- agno/guardrails/__init__.py +6 -0
- agno/guardrails/base.py +19 -0
- agno/guardrails/openai.py +144 -0
- agno/guardrails/pii.py +94 -0
- agno/guardrails/prompt_injection.py +52 -0
- agno/integrations/__init__.py +0 -0
- agno/integrations/discord/__init__.py +3 -0
- agno/integrations/discord/client.py +203 -0
- agno/knowledge/__init__.py +5 -0
- agno/knowledge/chunking/__init__.py +0 -0
- agno/knowledge/chunking/agentic.py +79 -0
- agno/knowledge/chunking/document.py +91 -0
- agno/knowledge/chunking/fixed.py +57 -0
- agno/knowledge/chunking/markdown.py +151 -0
- agno/knowledge/chunking/recursive.py +63 -0
- agno/knowledge/chunking/row.py +39 -0
- agno/knowledge/chunking/semantic.py +86 -0
- agno/knowledge/chunking/strategy.py +165 -0
- agno/knowledge/content.py +74 -0
- agno/knowledge/document/__init__.py +5 -0
- agno/knowledge/document/base.py +58 -0
- agno/knowledge/embedder/__init__.py +5 -0
- agno/knowledge/embedder/aws_bedrock.py +343 -0
- agno/knowledge/embedder/azure_openai.py +210 -0
- agno/knowledge/embedder/base.py +23 -0
- agno/knowledge/embedder/cohere.py +323 -0
- agno/knowledge/embedder/fastembed.py +62 -0
- agno/knowledge/embedder/fireworks.py +13 -0
- agno/knowledge/embedder/google.py +258 -0
- agno/knowledge/embedder/huggingface.py +94 -0
- agno/knowledge/embedder/jina.py +182 -0
- agno/knowledge/embedder/langdb.py +22 -0
- agno/knowledge/embedder/mistral.py +206 -0
- agno/knowledge/embedder/nebius.py +13 -0
- agno/knowledge/embedder/ollama.py +154 -0
- agno/knowledge/embedder/openai.py +195 -0
- agno/knowledge/embedder/sentence_transformer.py +63 -0
- agno/knowledge/embedder/together.py +13 -0
- agno/knowledge/embedder/vllm.py +262 -0
- agno/knowledge/embedder/voyageai.py +165 -0
- agno/knowledge/knowledge.py +1988 -0
- agno/knowledge/reader/__init__.py +7 -0
- agno/knowledge/reader/arxiv_reader.py +81 -0
- agno/knowledge/reader/base.py +95 -0
- agno/knowledge/reader/csv_reader.py +166 -0
- agno/knowledge/reader/docx_reader.py +82 -0
- agno/knowledge/reader/field_labeled_csv_reader.py +292 -0
- agno/knowledge/reader/firecrawl_reader.py +201 -0
- agno/knowledge/reader/json_reader.py +87 -0
- agno/knowledge/reader/markdown_reader.py +137 -0
- agno/knowledge/reader/pdf_reader.py +431 -0
- agno/knowledge/reader/pptx_reader.py +101 -0
- agno/knowledge/reader/reader_factory.py +313 -0
- agno/knowledge/reader/s3_reader.py +89 -0
- agno/knowledge/reader/tavily_reader.py +194 -0
- agno/knowledge/reader/text_reader.py +115 -0
- agno/knowledge/reader/web_search_reader.py +372 -0
- agno/knowledge/reader/website_reader.py +455 -0
- agno/knowledge/reader/wikipedia_reader.py +59 -0
- agno/knowledge/reader/youtube_reader.py +78 -0
- agno/knowledge/remote_content/__init__.py +0 -0
- agno/knowledge/remote_content/remote_content.py +88 -0
- agno/knowledge/reranker/__init__.py +3 -0
- agno/knowledge/reranker/base.py +14 -0
- agno/knowledge/reranker/cohere.py +64 -0
- agno/knowledge/reranker/infinity.py +195 -0
- agno/knowledge/reranker/sentence_transformer.py +54 -0
- agno/knowledge/types.py +39 -0
- agno/knowledge/utils.py +189 -0
- agno/media.py +462 -0
- agno/memory/__init__.py +3 -0
- agno/memory/manager.py +1327 -0
- agno/models/__init__.py +0 -0
- agno/models/aimlapi/__init__.py +5 -0
- agno/models/aimlapi/aimlapi.py +45 -0
- agno/models/anthropic/__init__.py +5 -0
- agno/models/anthropic/claude.py +757 -0
- agno/models/aws/__init__.py +15 -0
- agno/models/aws/bedrock.py +701 -0
- agno/models/aws/claude.py +378 -0
- agno/models/azure/__init__.py +18 -0
- agno/models/azure/ai_foundry.py +485 -0
- agno/models/azure/openai_chat.py +131 -0
- agno/models/base.py +2175 -0
- agno/models/cerebras/__init__.py +12 -0
- agno/models/cerebras/cerebras.py +501 -0
- agno/models/cerebras/cerebras_openai.py +112 -0
- agno/models/cohere/__init__.py +5 -0
- agno/models/cohere/chat.py +389 -0
- agno/models/cometapi/__init__.py +5 -0
- agno/models/cometapi/cometapi.py +57 -0
- agno/models/dashscope/__init__.py +5 -0
- agno/models/dashscope/dashscope.py +91 -0
- agno/models/deepinfra/__init__.py +5 -0
- agno/models/deepinfra/deepinfra.py +28 -0
- agno/models/deepseek/__init__.py +5 -0
- agno/models/deepseek/deepseek.py +61 -0
- agno/models/defaults.py +1 -0
- agno/models/fireworks/__init__.py +5 -0
- agno/models/fireworks/fireworks.py +26 -0
- agno/models/google/__init__.py +5 -0
- agno/models/google/gemini.py +1085 -0
- agno/models/groq/__init__.py +5 -0
- agno/models/groq/groq.py +556 -0
- agno/models/huggingface/__init__.py +5 -0
- agno/models/huggingface/huggingface.py +491 -0
- agno/models/ibm/__init__.py +5 -0
- agno/models/ibm/watsonx.py +422 -0
- agno/models/internlm/__init__.py +3 -0
- agno/models/internlm/internlm.py +26 -0
- agno/models/langdb/__init__.py +1 -0
- agno/models/langdb/langdb.py +48 -0
- agno/models/litellm/__init__.py +14 -0
- agno/models/litellm/chat.py +468 -0
- agno/models/litellm/litellm_openai.py +25 -0
- agno/models/llama_cpp/__init__.py +5 -0
- agno/models/llama_cpp/llama_cpp.py +22 -0
- agno/models/lmstudio/__init__.py +5 -0
- agno/models/lmstudio/lmstudio.py +25 -0
- agno/models/message.py +434 -0
- agno/models/meta/__init__.py +12 -0
- agno/models/meta/llama.py +475 -0
- agno/models/meta/llama_openai.py +78 -0
- agno/models/metrics.py +120 -0
- agno/models/mistral/__init__.py +5 -0
- agno/models/mistral/mistral.py +432 -0
- agno/models/nebius/__init__.py +3 -0
- agno/models/nebius/nebius.py +54 -0
- agno/models/nexus/__init__.py +3 -0
- agno/models/nexus/nexus.py +22 -0
- agno/models/nvidia/__init__.py +5 -0
- agno/models/nvidia/nvidia.py +28 -0
- agno/models/ollama/__init__.py +5 -0
- agno/models/ollama/chat.py +441 -0
- agno/models/openai/__init__.py +9 -0
- agno/models/openai/chat.py +883 -0
- agno/models/openai/like.py +27 -0
- agno/models/openai/responses.py +1050 -0
- agno/models/openrouter/__init__.py +5 -0
- agno/models/openrouter/openrouter.py +66 -0
- agno/models/perplexity/__init__.py +5 -0
- agno/models/perplexity/perplexity.py +187 -0
- agno/models/portkey/__init__.py +3 -0
- agno/models/portkey/portkey.py +81 -0
- agno/models/requesty/__init__.py +5 -0
- agno/models/requesty/requesty.py +52 -0
- agno/models/response.py +199 -0
- agno/models/sambanova/__init__.py +5 -0
- agno/models/sambanova/sambanova.py +28 -0
- agno/models/siliconflow/__init__.py +5 -0
- agno/models/siliconflow/siliconflow.py +25 -0
- agno/models/together/__init__.py +5 -0
- agno/models/together/together.py +25 -0
- agno/models/utils.py +266 -0
- agno/models/vercel/__init__.py +3 -0
- agno/models/vercel/v0.py +26 -0
- agno/models/vertexai/__init__.py +0 -0
- agno/models/vertexai/claude.py +70 -0
- agno/models/vllm/__init__.py +3 -0
- agno/models/vllm/vllm.py +78 -0
- agno/models/xai/__init__.py +3 -0
- agno/models/xai/xai.py +113 -0
- agno/os/__init__.py +3 -0
- agno/os/app.py +876 -0
- agno/os/auth.py +57 -0
- agno/os/config.py +104 -0
- agno/os/interfaces/__init__.py +1 -0
- agno/os/interfaces/a2a/__init__.py +3 -0
- agno/os/interfaces/a2a/a2a.py +42 -0
- agno/os/interfaces/a2a/router.py +250 -0
- agno/os/interfaces/a2a/utils.py +924 -0
- agno/os/interfaces/agui/__init__.py +3 -0
- agno/os/interfaces/agui/agui.py +47 -0
- agno/os/interfaces/agui/router.py +144 -0
- agno/os/interfaces/agui/utils.py +534 -0
- agno/os/interfaces/base.py +25 -0
- agno/os/interfaces/slack/__init__.py +3 -0
- agno/os/interfaces/slack/router.py +148 -0
- agno/os/interfaces/slack/security.py +30 -0
- agno/os/interfaces/slack/slack.py +47 -0
- agno/os/interfaces/whatsapp/__init__.py +3 -0
- agno/os/interfaces/whatsapp/router.py +211 -0
- agno/os/interfaces/whatsapp/security.py +53 -0
- agno/os/interfaces/whatsapp/whatsapp.py +36 -0
- agno/os/mcp.py +292 -0
- agno/os/middleware/__init__.py +7 -0
- agno/os/middleware/jwt.py +233 -0
- agno/os/router.py +1763 -0
- agno/os/routers/__init__.py +3 -0
- agno/os/routers/evals/__init__.py +3 -0
- agno/os/routers/evals/evals.py +430 -0
- agno/os/routers/evals/schemas.py +142 -0
- agno/os/routers/evals/utils.py +162 -0
- agno/os/routers/health.py +31 -0
- agno/os/routers/home.py +52 -0
- agno/os/routers/knowledge/__init__.py +3 -0
- agno/os/routers/knowledge/knowledge.py +997 -0
- agno/os/routers/knowledge/schemas.py +178 -0
- agno/os/routers/memory/__init__.py +3 -0
- agno/os/routers/memory/memory.py +515 -0
- agno/os/routers/memory/schemas.py +62 -0
- agno/os/routers/metrics/__init__.py +3 -0
- agno/os/routers/metrics/metrics.py +190 -0
- agno/os/routers/metrics/schemas.py +47 -0
- agno/os/routers/session/__init__.py +3 -0
- agno/os/routers/session/session.py +997 -0
- agno/os/schema.py +1055 -0
- agno/os/settings.py +43 -0
- agno/os/utils.py +630 -0
- agno/py.typed +0 -0
- agno/reasoning/__init__.py +0 -0
- agno/reasoning/anthropic.py +80 -0
- agno/reasoning/azure_ai_foundry.py +67 -0
- agno/reasoning/deepseek.py +63 -0
- agno/reasoning/default.py +97 -0
- agno/reasoning/gemini.py +73 -0
- agno/reasoning/groq.py +71 -0
- agno/reasoning/helpers.py +63 -0
- agno/reasoning/ollama.py +67 -0
- agno/reasoning/openai.py +86 -0
- agno/reasoning/step.py +31 -0
- agno/reasoning/vertexai.py +76 -0
- agno/run/__init__.py +6 -0
- agno/run/agent.py +787 -0
- agno/run/base.py +229 -0
- agno/run/cancel.py +81 -0
- agno/run/messages.py +32 -0
- agno/run/team.py +753 -0
- agno/run/workflow.py +708 -0
- agno/session/__init__.py +10 -0
- agno/session/agent.py +295 -0
- agno/session/summary.py +265 -0
- agno/session/team.py +392 -0
- agno/session/workflow.py +205 -0
- agno/team/__init__.py +37 -0
- agno/team/team.py +8793 -0
- agno/tools/__init__.py +10 -0
- agno/tools/agentql.py +120 -0
- agno/tools/airflow.py +69 -0
- agno/tools/api.py +122 -0
- agno/tools/apify.py +314 -0
- agno/tools/arxiv.py +127 -0
- agno/tools/aws_lambda.py +53 -0
- agno/tools/aws_ses.py +66 -0
- agno/tools/baidusearch.py +89 -0
- agno/tools/bitbucket.py +292 -0
- agno/tools/brandfetch.py +213 -0
- agno/tools/bravesearch.py +106 -0
- agno/tools/brightdata.py +367 -0
- agno/tools/browserbase.py +209 -0
- agno/tools/calcom.py +255 -0
- agno/tools/calculator.py +151 -0
- agno/tools/cartesia.py +187 -0
- agno/tools/clickup.py +244 -0
- agno/tools/confluence.py +240 -0
- agno/tools/crawl4ai.py +158 -0
- agno/tools/csv_toolkit.py +185 -0
- agno/tools/dalle.py +110 -0
- agno/tools/daytona.py +475 -0
- agno/tools/decorator.py +262 -0
- agno/tools/desi_vocal.py +108 -0
- agno/tools/discord.py +161 -0
- agno/tools/docker.py +716 -0
- agno/tools/duckdb.py +379 -0
- agno/tools/duckduckgo.py +91 -0
- agno/tools/e2b.py +703 -0
- agno/tools/eleven_labs.py +196 -0
- agno/tools/email.py +67 -0
- agno/tools/evm.py +129 -0
- agno/tools/exa.py +396 -0
- agno/tools/fal.py +127 -0
- agno/tools/file.py +240 -0
- agno/tools/file_generation.py +350 -0
- agno/tools/financial_datasets.py +288 -0
- agno/tools/firecrawl.py +143 -0
- agno/tools/function.py +1187 -0
- agno/tools/giphy.py +93 -0
- agno/tools/github.py +1760 -0
- agno/tools/gmail.py +922 -0
- agno/tools/google_bigquery.py +117 -0
- agno/tools/google_drive.py +270 -0
- agno/tools/google_maps.py +253 -0
- agno/tools/googlecalendar.py +674 -0
- agno/tools/googlesearch.py +98 -0
- agno/tools/googlesheets.py +377 -0
- agno/tools/hackernews.py +77 -0
- agno/tools/jina.py +101 -0
- agno/tools/jira.py +170 -0
- agno/tools/knowledge.py +218 -0
- agno/tools/linear.py +426 -0
- agno/tools/linkup.py +58 -0
- agno/tools/local_file_system.py +90 -0
- agno/tools/lumalab.py +183 -0
- agno/tools/mcp/__init__.py +10 -0
- agno/tools/mcp/mcp.py +331 -0
- agno/tools/mcp/multi_mcp.py +347 -0
- agno/tools/mcp/params.py +24 -0
- agno/tools/mcp_toolbox.py +284 -0
- agno/tools/mem0.py +193 -0
- agno/tools/memori.py +339 -0
- agno/tools/memory.py +419 -0
- agno/tools/mlx_transcribe.py +139 -0
- agno/tools/models/__init__.py +0 -0
- agno/tools/models/azure_openai.py +190 -0
- agno/tools/models/gemini.py +203 -0
- agno/tools/models/groq.py +158 -0
- agno/tools/models/morph.py +186 -0
- agno/tools/models/nebius.py +124 -0
- agno/tools/models_labs.py +195 -0
- agno/tools/moviepy_video.py +349 -0
- agno/tools/neo4j.py +134 -0
- agno/tools/newspaper.py +46 -0
- agno/tools/newspaper4k.py +93 -0
- agno/tools/notion.py +204 -0
- agno/tools/openai.py +202 -0
- agno/tools/openbb.py +160 -0
- agno/tools/opencv.py +321 -0
- agno/tools/openweather.py +233 -0
- agno/tools/oxylabs.py +385 -0
- agno/tools/pandas.py +102 -0
- agno/tools/parallel.py +314 -0
- agno/tools/postgres.py +257 -0
- agno/tools/pubmed.py +188 -0
- agno/tools/python.py +205 -0
- agno/tools/reasoning.py +283 -0
- agno/tools/reddit.py +467 -0
- agno/tools/replicate.py +117 -0
- agno/tools/resend.py +62 -0
- agno/tools/scrapegraph.py +222 -0
- agno/tools/searxng.py +152 -0
- agno/tools/serpapi.py +116 -0
- agno/tools/serper.py +255 -0
- agno/tools/shell.py +53 -0
- agno/tools/slack.py +136 -0
- agno/tools/sleep.py +20 -0
- agno/tools/spider.py +116 -0
- agno/tools/sql.py +154 -0
- agno/tools/streamlit/__init__.py +0 -0
- agno/tools/streamlit/components.py +113 -0
- agno/tools/tavily.py +254 -0
- agno/tools/telegram.py +48 -0
- agno/tools/todoist.py +218 -0
- agno/tools/tool_registry.py +1 -0
- agno/tools/toolkit.py +146 -0
- agno/tools/trafilatura.py +388 -0
- agno/tools/trello.py +274 -0
- agno/tools/twilio.py +186 -0
- agno/tools/user_control_flow.py +78 -0
- agno/tools/valyu.py +228 -0
- agno/tools/visualization.py +467 -0
- agno/tools/webbrowser.py +28 -0
- agno/tools/webex.py +76 -0
- agno/tools/website.py +54 -0
- agno/tools/webtools.py +45 -0
- agno/tools/whatsapp.py +286 -0
- agno/tools/wikipedia.py +63 -0
- agno/tools/workflow.py +278 -0
- agno/tools/x.py +335 -0
- agno/tools/yfinance.py +257 -0
- agno/tools/youtube.py +184 -0
- agno/tools/zendesk.py +82 -0
- agno/tools/zep.py +454 -0
- agno/tools/zoom.py +382 -0
- agno/utils/__init__.py +0 -0
- agno/utils/agent.py +820 -0
- agno/utils/audio.py +49 -0
- agno/utils/certs.py +27 -0
- agno/utils/code_execution.py +11 -0
- agno/utils/common.py +132 -0
- agno/utils/dttm.py +13 -0
- agno/utils/enum.py +22 -0
- agno/utils/env.py +11 -0
- agno/utils/events.py +696 -0
- agno/utils/format_str.py +16 -0
- agno/utils/functions.py +166 -0
- agno/utils/gemini.py +426 -0
- agno/utils/hooks.py +57 -0
- agno/utils/http.py +74 -0
- agno/utils/json_schema.py +234 -0
- agno/utils/knowledge.py +36 -0
- agno/utils/location.py +19 -0
- agno/utils/log.py +255 -0
- agno/utils/mcp.py +214 -0
- agno/utils/media.py +352 -0
- agno/utils/merge_dict.py +41 -0
- agno/utils/message.py +118 -0
- agno/utils/models/__init__.py +0 -0
- agno/utils/models/ai_foundry.py +43 -0
- agno/utils/models/claude.py +358 -0
- agno/utils/models/cohere.py +87 -0
- agno/utils/models/llama.py +78 -0
- agno/utils/models/mistral.py +98 -0
- agno/utils/models/openai_responses.py +140 -0
- agno/utils/models/schema_utils.py +153 -0
- agno/utils/models/watsonx.py +41 -0
- agno/utils/openai.py +257 -0
- agno/utils/pickle.py +32 -0
- agno/utils/pprint.py +178 -0
- agno/utils/print_response/__init__.py +0 -0
- agno/utils/print_response/agent.py +842 -0
- agno/utils/print_response/team.py +1724 -0
- agno/utils/print_response/workflow.py +1668 -0
- agno/utils/prompts.py +111 -0
- agno/utils/reasoning.py +108 -0
- agno/utils/response.py +163 -0
- agno/utils/response_iterator.py +17 -0
- agno/utils/safe_formatter.py +24 -0
- agno/utils/serialize.py +32 -0
- agno/utils/shell.py +22 -0
- agno/utils/streamlit.py +487 -0
- agno/utils/string.py +231 -0
- agno/utils/team.py +139 -0
- agno/utils/timer.py +41 -0
- agno/utils/tools.py +102 -0
- agno/utils/web.py +23 -0
- agno/utils/whatsapp.py +305 -0
- agno/utils/yaml_io.py +25 -0
- agno/vectordb/__init__.py +3 -0
- agno/vectordb/base.py +127 -0
- agno/vectordb/cassandra/__init__.py +5 -0
- agno/vectordb/cassandra/cassandra.py +501 -0
- agno/vectordb/cassandra/extra_param_mixin.py +11 -0
- agno/vectordb/cassandra/index.py +13 -0
- agno/vectordb/chroma/__init__.py +5 -0
- agno/vectordb/chroma/chromadb.py +929 -0
- agno/vectordb/clickhouse/__init__.py +9 -0
- agno/vectordb/clickhouse/clickhousedb.py +835 -0
- agno/vectordb/clickhouse/index.py +9 -0
- agno/vectordb/couchbase/__init__.py +3 -0
- agno/vectordb/couchbase/couchbase.py +1442 -0
- agno/vectordb/distance.py +7 -0
- agno/vectordb/lancedb/__init__.py +6 -0
- agno/vectordb/lancedb/lance_db.py +995 -0
- agno/vectordb/langchaindb/__init__.py +5 -0
- agno/vectordb/langchaindb/langchaindb.py +163 -0
- agno/vectordb/lightrag/__init__.py +5 -0
- agno/vectordb/lightrag/lightrag.py +388 -0
- agno/vectordb/llamaindex/__init__.py +3 -0
- agno/vectordb/llamaindex/llamaindexdb.py +166 -0
- agno/vectordb/milvus/__init__.py +4 -0
- agno/vectordb/milvus/milvus.py +1182 -0
- agno/vectordb/mongodb/__init__.py +9 -0
- agno/vectordb/mongodb/mongodb.py +1417 -0
- agno/vectordb/pgvector/__init__.py +12 -0
- agno/vectordb/pgvector/index.py +23 -0
- agno/vectordb/pgvector/pgvector.py +1462 -0
- agno/vectordb/pineconedb/__init__.py +5 -0
- agno/vectordb/pineconedb/pineconedb.py +747 -0
- agno/vectordb/qdrant/__init__.py +5 -0
- agno/vectordb/qdrant/qdrant.py +1134 -0
- agno/vectordb/redis/__init__.py +9 -0
- agno/vectordb/redis/redisdb.py +694 -0
- agno/vectordb/search.py +7 -0
- agno/vectordb/singlestore/__init__.py +10 -0
- agno/vectordb/singlestore/index.py +41 -0
- agno/vectordb/singlestore/singlestore.py +763 -0
- agno/vectordb/surrealdb/__init__.py +3 -0
- agno/vectordb/surrealdb/surrealdb.py +699 -0
- agno/vectordb/upstashdb/__init__.py +5 -0
- agno/vectordb/upstashdb/upstashdb.py +718 -0
- agno/vectordb/weaviate/__init__.py +8 -0
- agno/vectordb/weaviate/index.py +15 -0
- agno/vectordb/weaviate/weaviate.py +1005 -0
- agno/workflow/__init__.py +23 -0
- agno/workflow/agent.py +299 -0
- agno/workflow/condition.py +738 -0
- agno/workflow/loop.py +735 -0
- agno/workflow/parallel.py +824 -0
- agno/workflow/router.py +702 -0
- agno/workflow/step.py +1432 -0
- agno/workflow/steps.py +592 -0
- agno/workflow/types.py +520 -0
- agno/workflow/workflow.py +4321 -0
- agno-2.2.13.dist-info/METADATA +614 -0
- agno-2.2.13.dist-info/RECORD +575 -0
- agno-2.2.13.dist-info/WHEEL +5 -0
- agno-2.2.13.dist-info/licenses/LICENSE +201 -0
- agno-2.2.13.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,1134 @@
|
|
|
1
|
+
from hashlib import md5
|
|
2
|
+
from typing import Any, Dict, List, Optional, Union
|
|
3
|
+
|
|
4
|
+
try:
|
|
5
|
+
from qdrant_client import AsyncQdrantClient, QdrantClient # noqa: F401
|
|
6
|
+
from qdrant_client.http import models
|
|
7
|
+
except ImportError:
|
|
8
|
+
raise ImportError(
|
|
9
|
+
"The `qdrant-client` package is not installed. Please install it via `pip install qdrant-client`."
|
|
10
|
+
)
|
|
11
|
+
|
|
12
|
+
from agno.filters import FilterExpr
|
|
13
|
+
from agno.knowledge.document import Document
|
|
14
|
+
from agno.knowledge.embedder import Embedder
|
|
15
|
+
from agno.knowledge.reranker.base import Reranker
|
|
16
|
+
from agno.utils.log import log_debug, log_error, log_info, log_warning
|
|
17
|
+
from agno.vectordb.base import VectorDb
|
|
18
|
+
from agno.vectordb.distance import Distance
|
|
19
|
+
from agno.vectordb.search import SearchType
|
|
20
|
+
|
|
21
|
+
DEFAULT_DENSE_VECTOR_NAME = "dense"
|
|
22
|
+
DEFAULT_SPARSE_VECTOR_NAME = "sparse"
|
|
23
|
+
DEFAULT_SPARSE_MODEL = "Qdrant/bm25"
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
class Qdrant(VectorDb):
|
|
27
|
+
"""Vector DB implementation powered by Qdrant - https://qdrant.tech/"""
|
|
28
|
+
|
|
29
|
+
def __init__(
|
|
30
|
+
self,
|
|
31
|
+
collection: str,
|
|
32
|
+
name: Optional[str] = None,
|
|
33
|
+
description: Optional[str] = None,
|
|
34
|
+
id: Optional[str] = None,
|
|
35
|
+
embedder: Optional[Embedder] = None,
|
|
36
|
+
distance: Distance = Distance.cosine,
|
|
37
|
+
location: Optional[str] = None,
|
|
38
|
+
url: Optional[str] = None,
|
|
39
|
+
port: Optional[int] = 6333,
|
|
40
|
+
grpc_port: int = 6334,
|
|
41
|
+
prefer_grpc: bool = False,
|
|
42
|
+
https: Optional[bool] = None,
|
|
43
|
+
api_key: Optional[str] = None,
|
|
44
|
+
prefix: Optional[str] = None,
|
|
45
|
+
timeout: Optional[float] = None,
|
|
46
|
+
host: Optional[str] = None,
|
|
47
|
+
path: Optional[str] = None,
|
|
48
|
+
reranker: Optional[Reranker] = None,
|
|
49
|
+
search_type: SearchType = SearchType.vector,
|
|
50
|
+
dense_vector_name: str = DEFAULT_DENSE_VECTOR_NAME,
|
|
51
|
+
sparse_vector_name: str = DEFAULT_SPARSE_VECTOR_NAME,
|
|
52
|
+
hybrid_fusion_strategy: models.Fusion = models.Fusion.RRF,
|
|
53
|
+
fastembed_kwargs: Optional[dict] = None,
|
|
54
|
+
**kwargs,
|
|
55
|
+
):
|
|
56
|
+
"""
|
|
57
|
+
Args:
|
|
58
|
+
collection (str): Name of the Qdrant collection.
|
|
59
|
+
name (Optional[str]): Name of the vector database.
|
|
60
|
+
description (Optional[str]): Description of the vector database.
|
|
61
|
+
embedder (Optional[Embedder]): Optional embedder for automatic vector generation.
|
|
62
|
+
distance (Distance): Distance metric to use (default: cosine).
|
|
63
|
+
location (Optional[str]): `":memory:"` for in-memory, or str used as `url`. If `None`, use default host/port.
|
|
64
|
+
url (Optional[str]): Full URL (scheme, host, port, prefix). Overrides host/port if provided.
|
|
65
|
+
port (Optional[int]): REST API port (default: 6333).
|
|
66
|
+
grpc_port (int): gRPC interface port (default: 6334).
|
|
67
|
+
prefer_grpc (bool): Prefer gRPC over REST if True.
|
|
68
|
+
https (Optional[bool]): Use HTTPS if True.
|
|
69
|
+
api_key (Optional[str]): API key for Qdrant Cloud authentication.
|
|
70
|
+
prefix (Optional[str]): URL path prefix (e.g., "service/v1").
|
|
71
|
+
timeout (Optional[float]): Request timeout (REST: default 5s, gRPC: unlimited).
|
|
72
|
+
host (Optional[str]): Qdrant host (default: "localhost" if not specified).
|
|
73
|
+
path (Optional[str]): Path for local persistence (QdrantLocal).
|
|
74
|
+
reranker (Optional[Reranker]): Optional reranker for result refinement.
|
|
75
|
+
search_type (SearchType): Whether to use vector, keyword or hybrid search.
|
|
76
|
+
dense_vector_name (str): Dense vector name.
|
|
77
|
+
sparse_vector_name (str): Sparse vector name.
|
|
78
|
+
hybrid_fusion_strategy (models.Fusion): Strategy for hybrid fusion.
|
|
79
|
+
fastembed_kwargs (Optional[dict]): Keyword args for `fastembed.SparseTextEmbedding.__init__()`.
|
|
80
|
+
**kwargs: Keyword args for `qdrant_client.QdrantClient.__init__()`.
|
|
81
|
+
"""
|
|
82
|
+
# Validate required parameters
|
|
83
|
+
if not collection:
|
|
84
|
+
raise ValueError("Collection name must be provided.")
|
|
85
|
+
|
|
86
|
+
# Dynamic ID generation based on unique identifiers
|
|
87
|
+
if id is None:
|
|
88
|
+
from agno.utils.string import generate_id
|
|
89
|
+
|
|
90
|
+
host_identifier = host or location or url or "localhost"
|
|
91
|
+
seed = f"{host_identifier}#{collection}"
|
|
92
|
+
id = generate_id(seed)
|
|
93
|
+
|
|
94
|
+
# Initialize base class with name, description, and generated ID
|
|
95
|
+
super().__init__(id=id, name=name, description=description)
|
|
96
|
+
|
|
97
|
+
# Collection attributes
|
|
98
|
+
self.collection: str = collection
|
|
99
|
+
|
|
100
|
+
# Embedder for embedding the document contents
|
|
101
|
+
if embedder is None:
|
|
102
|
+
from agno.knowledge.embedder.openai import OpenAIEmbedder
|
|
103
|
+
|
|
104
|
+
embedder = OpenAIEmbedder()
|
|
105
|
+
log_info("Embedder not provided, using OpenAIEmbedder as default.")
|
|
106
|
+
|
|
107
|
+
self.embedder: Embedder = embedder
|
|
108
|
+
self.dimensions: Optional[int] = self.embedder.dimensions
|
|
109
|
+
|
|
110
|
+
# Distance metric
|
|
111
|
+
self.distance: Distance = distance
|
|
112
|
+
|
|
113
|
+
# Qdrant client instance
|
|
114
|
+
self._client: Optional[QdrantClient] = None
|
|
115
|
+
|
|
116
|
+
# Qdrant async client instance
|
|
117
|
+
self._async_client: Optional[AsyncQdrantClient] = None
|
|
118
|
+
|
|
119
|
+
# Qdrant client arguments
|
|
120
|
+
self.location: Optional[str] = location
|
|
121
|
+
self.url: Optional[str] = url
|
|
122
|
+
self.port: Optional[int] = port
|
|
123
|
+
self.grpc_port: int = grpc_port
|
|
124
|
+
self.prefer_grpc: bool = prefer_grpc
|
|
125
|
+
self.https: Optional[bool] = https
|
|
126
|
+
self.api_key: Optional[str] = api_key
|
|
127
|
+
self.prefix: Optional[str] = prefix
|
|
128
|
+
self.timeout: Optional[float] = timeout
|
|
129
|
+
self.host: Optional[str] = host
|
|
130
|
+
self.path: Optional[str] = path
|
|
131
|
+
|
|
132
|
+
# Reranker instance
|
|
133
|
+
self.reranker: Optional[Reranker] = reranker
|
|
134
|
+
|
|
135
|
+
# Qdrant client kwargs
|
|
136
|
+
self.kwargs = kwargs
|
|
137
|
+
|
|
138
|
+
self.search_type = search_type
|
|
139
|
+
self.dense_vector_name = dense_vector_name
|
|
140
|
+
self.sparse_vector_name = sparse_vector_name
|
|
141
|
+
self.hybrid_fusion_strategy = hybrid_fusion_strategy
|
|
142
|
+
|
|
143
|
+
# TODO(v2.0.0): Remove backward compatibility for unnamed vectors
|
|
144
|
+
# TODO(v2.0.0): Make named vectors mandatory and simplify the codebase
|
|
145
|
+
self.use_named_vectors = search_type in [SearchType.hybrid]
|
|
146
|
+
|
|
147
|
+
if self.search_type in [SearchType.keyword, SearchType.hybrid]:
|
|
148
|
+
try:
|
|
149
|
+
from fastembed import SparseTextEmbedding # type: ignore
|
|
150
|
+
|
|
151
|
+
default_kwargs = {"model_name": DEFAULT_SPARSE_MODEL}
|
|
152
|
+
if fastembed_kwargs:
|
|
153
|
+
default_kwargs.update(fastembed_kwargs)
|
|
154
|
+
|
|
155
|
+
# Type ignore for mypy as SparseTextEmbedding constructor accepts flexible kwargs
|
|
156
|
+
self.sparse_encoder = SparseTextEmbedding(**default_kwargs) # type: ignore
|
|
157
|
+
|
|
158
|
+
except ImportError as e:
|
|
159
|
+
raise ImportError(
|
|
160
|
+
"To use keyword/hybrid search, install the `fastembed` extra with `pip install fastembed`."
|
|
161
|
+
) from e
|
|
162
|
+
|
|
163
|
+
@property
|
|
164
|
+
def client(self) -> QdrantClient:
|
|
165
|
+
if self._client is None:
|
|
166
|
+
log_debug("Creating Qdrant Client")
|
|
167
|
+
self._client = QdrantClient(
|
|
168
|
+
location=self.location,
|
|
169
|
+
url=self.url,
|
|
170
|
+
port=self.port,
|
|
171
|
+
grpc_port=self.grpc_port,
|
|
172
|
+
prefer_grpc=self.prefer_grpc,
|
|
173
|
+
https=self.https,
|
|
174
|
+
api_key=self.api_key,
|
|
175
|
+
prefix=self.prefix,
|
|
176
|
+
timeout=int(self.timeout) if self.timeout is not None else None,
|
|
177
|
+
host=self.host,
|
|
178
|
+
path=self.path,
|
|
179
|
+
**self.kwargs,
|
|
180
|
+
)
|
|
181
|
+
return self._client
|
|
182
|
+
|
|
183
|
+
@property
|
|
184
|
+
def async_client(self) -> AsyncQdrantClient:
|
|
185
|
+
"""Get or create the async Qdrant client."""
|
|
186
|
+
if self._async_client is None:
|
|
187
|
+
log_debug("Creating Async Qdrant Client")
|
|
188
|
+
self._async_client = AsyncQdrantClient(
|
|
189
|
+
location=self.location,
|
|
190
|
+
url=self.url,
|
|
191
|
+
port=self.port,
|
|
192
|
+
grpc_port=self.grpc_port,
|
|
193
|
+
prefer_grpc=self.prefer_grpc,
|
|
194
|
+
https=self.https,
|
|
195
|
+
api_key=self.api_key,
|
|
196
|
+
prefix=self.prefix,
|
|
197
|
+
timeout=int(self.timeout) if self.timeout is not None else None,
|
|
198
|
+
host=self.host,
|
|
199
|
+
path=self.path,
|
|
200
|
+
**self.kwargs,
|
|
201
|
+
)
|
|
202
|
+
return self._async_client
|
|
203
|
+
|
|
204
|
+
def create(self) -> None:
|
|
205
|
+
_distance = models.Distance.COSINE
|
|
206
|
+
if self.distance == Distance.l2:
|
|
207
|
+
_distance = models.Distance.EUCLID
|
|
208
|
+
elif self.distance == Distance.max_inner_product:
|
|
209
|
+
_distance = models.Distance.DOT
|
|
210
|
+
|
|
211
|
+
if not self.exists():
|
|
212
|
+
log_debug(f"Creating collection: {self.collection}")
|
|
213
|
+
|
|
214
|
+
# Configure vectors based on search type
|
|
215
|
+
if self.search_type == SearchType.vector:
|
|
216
|
+
# Maintain backward compatibility with unnamed vectors
|
|
217
|
+
vectors_config = models.VectorParams(size=self.dimensions or 1536, distance=_distance)
|
|
218
|
+
else:
|
|
219
|
+
# Use named vectors for hybrid search
|
|
220
|
+
vectors_config = {
|
|
221
|
+
self.dense_vector_name: models.VectorParams(size=self.dimensions or 1536, distance=_distance)
|
|
222
|
+
} # type: ignore
|
|
223
|
+
|
|
224
|
+
self.client.create_collection(
|
|
225
|
+
collection_name=self.collection,
|
|
226
|
+
vectors_config=vectors_config,
|
|
227
|
+
sparse_vectors_config={self.sparse_vector_name: models.SparseVectorParams()}
|
|
228
|
+
if self.search_type in [SearchType.keyword, SearchType.hybrid]
|
|
229
|
+
else None,
|
|
230
|
+
)
|
|
231
|
+
|
|
232
|
+
async def async_create(self) -> None:
|
|
233
|
+
"""Create the collection asynchronously."""
|
|
234
|
+
# Collection distance
|
|
235
|
+
_distance = models.Distance.COSINE
|
|
236
|
+
if self.distance == Distance.l2:
|
|
237
|
+
_distance = models.Distance.EUCLID
|
|
238
|
+
elif self.distance == Distance.max_inner_product:
|
|
239
|
+
_distance = models.Distance.DOT
|
|
240
|
+
|
|
241
|
+
if not await self.async_exists():
|
|
242
|
+
log_debug(f"Creating collection asynchronously: {self.collection}")
|
|
243
|
+
|
|
244
|
+
# Configure vectors based on search type
|
|
245
|
+
if self.search_type == SearchType.vector:
|
|
246
|
+
# Maintain backward compatibility with unnamed vectors
|
|
247
|
+
vectors_config = models.VectorParams(size=self.dimensions or 1536, distance=_distance)
|
|
248
|
+
else:
|
|
249
|
+
# Use named vectors for hybrid search
|
|
250
|
+
vectors_config = {
|
|
251
|
+
self.dense_vector_name: models.VectorParams(size=self.dimensions or 1536, distance=_distance)
|
|
252
|
+
} # type: ignore
|
|
253
|
+
|
|
254
|
+
await self.async_client.create_collection(
|
|
255
|
+
collection_name=self.collection,
|
|
256
|
+
vectors_config=vectors_config,
|
|
257
|
+
sparse_vectors_config={self.sparse_vector_name: models.SparseVectorParams()}
|
|
258
|
+
if self.search_type in [SearchType.keyword, SearchType.hybrid]
|
|
259
|
+
else None,
|
|
260
|
+
)
|
|
261
|
+
|
|
262
|
+
def doc_exists(self, document: Document) -> bool:
|
|
263
|
+
"""
|
|
264
|
+
Validating if the document exists or not
|
|
265
|
+
|
|
266
|
+
Args:
|
|
267
|
+
document (Document): Document to validate
|
|
268
|
+
"""
|
|
269
|
+
if self.client:
|
|
270
|
+
cleaned_content = document.content.replace("\x00", "\ufffd")
|
|
271
|
+
doc_id = md5(cleaned_content.encode()).hexdigest()
|
|
272
|
+
collection_points = self.client.retrieve(
|
|
273
|
+
collection_name=self.collection,
|
|
274
|
+
ids=[doc_id],
|
|
275
|
+
)
|
|
276
|
+
return len(collection_points) > 0
|
|
277
|
+
return False
|
|
278
|
+
|
|
279
|
+
async def async_doc_exists(self, document: Document) -> bool:
|
|
280
|
+
"""Check if a document exists asynchronously."""
|
|
281
|
+
cleaned_content = document.content.replace("\x00", "\ufffd")
|
|
282
|
+
doc_id = md5(cleaned_content.encode()).hexdigest()
|
|
283
|
+
collection_points = await self.async_client.retrieve(
|
|
284
|
+
collection_name=self.collection,
|
|
285
|
+
ids=[doc_id],
|
|
286
|
+
)
|
|
287
|
+
return len(collection_points) > 0
|
|
288
|
+
|
|
289
|
+
def name_exists(self, name: str) -> bool:
|
|
290
|
+
"""
|
|
291
|
+
Validates if a document with the given name exists in the collection.
|
|
292
|
+
|
|
293
|
+
Args:
|
|
294
|
+
name (str): The name of the document to check.
|
|
295
|
+
|
|
296
|
+
Returns:
|
|
297
|
+
bool: True if a document with the given name exists, False otherwise.
|
|
298
|
+
"""
|
|
299
|
+
if self.client:
|
|
300
|
+
scroll_result = self.client.scroll(
|
|
301
|
+
collection_name=self.collection,
|
|
302
|
+
scroll_filter=models.Filter(
|
|
303
|
+
must=[models.FieldCondition(key="name", match=models.MatchValue(value=name))]
|
|
304
|
+
),
|
|
305
|
+
limit=1,
|
|
306
|
+
)
|
|
307
|
+
return len(scroll_result[0]) > 0
|
|
308
|
+
return False
|
|
309
|
+
|
|
310
|
+
async def async_name_exists(self, name: str) -> bool: # type: ignore[override]
|
|
311
|
+
"""
|
|
312
|
+
Asynchronously validates if a document with the given name exists in the collection.
|
|
313
|
+
|
|
314
|
+
Args:
|
|
315
|
+
name (str): The name of the document to check.
|
|
316
|
+
|
|
317
|
+
Returns:
|
|
318
|
+
bool: True if a document with the given name exists, False otherwise.
|
|
319
|
+
"""
|
|
320
|
+
if self.async_client:
|
|
321
|
+
scroll_result = await self.async_client.scroll(
|
|
322
|
+
collection_name=self.collection,
|
|
323
|
+
scroll_filter=models.Filter(
|
|
324
|
+
must=[models.FieldCondition(key="name", match=models.MatchValue(value=name))]
|
|
325
|
+
),
|
|
326
|
+
limit=1,
|
|
327
|
+
)
|
|
328
|
+
return len(scroll_result[0]) > 0
|
|
329
|
+
return False
|
|
330
|
+
|
|
331
|
+
def insert(
|
|
332
|
+
self,
|
|
333
|
+
content_hash: str,
|
|
334
|
+
documents: List[Document],
|
|
335
|
+
filters: Optional[Dict[str, Any]] = None,
|
|
336
|
+
batch_size: int = 10,
|
|
337
|
+
) -> None:
|
|
338
|
+
"""
|
|
339
|
+
Insert documents into the database.
|
|
340
|
+
|
|
341
|
+
Args:
|
|
342
|
+
documents (List[Document]): List of documents to insert
|
|
343
|
+
filters (Optional[Dict[str, Any]]): Filters to apply while inserting documents
|
|
344
|
+
batch_size (int): Batch size for inserting documents
|
|
345
|
+
"""
|
|
346
|
+
log_debug(f"Inserting {len(documents)} documents")
|
|
347
|
+
points = []
|
|
348
|
+
for document in documents:
|
|
349
|
+
cleaned_content = document.content.replace("\x00", "\ufffd")
|
|
350
|
+
doc_id = md5(cleaned_content.encode()).hexdigest()
|
|
351
|
+
|
|
352
|
+
# TODO(v2.0.0): Remove conditional vector naming logic
|
|
353
|
+
if self.use_named_vectors:
|
|
354
|
+
vector = {self.dense_vector_name: document.embedding}
|
|
355
|
+
else:
|
|
356
|
+
vector = document.embedding # type: ignore
|
|
357
|
+
|
|
358
|
+
if self.search_type == SearchType.vector:
|
|
359
|
+
# For vector search, maintain backward compatibility with unnamed vectors
|
|
360
|
+
document.embed(embedder=self.embedder)
|
|
361
|
+
vector = document.embedding # type: ignore
|
|
362
|
+
else:
|
|
363
|
+
# For other search types, use named vectors
|
|
364
|
+
vector = {}
|
|
365
|
+
if self.search_type in [SearchType.hybrid]:
|
|
366
|
+
document.embed(embedder=self.embedder)
|
|
367
|
+
vector[self.dense_vector_name] = document.embedding
|
|
368
|
+
|
|
369
|
+
if self.search_type in [SearchType.keyword, SearchType.hybrid]:
|
|
370
|
+
vector[self.sparse_vector_name] = next(
|
|
371
|
+
iter(self.sparse_encoder.embed([document.content]))
|
|
372
|
+
).as_object() # type: ignore
|
|
373
|
+
|
|
374
|
+
# Create payload with document properties
|
|
375
|
+
payload = {
|
|
376
|
+
"name": document.name,
|
|
377
|
+
"meta_data": document.meta_data,
|
|
378
|
+
"content": cleaned_content,
|
|
379
|
+
"usage": document.usage,
|
|
380
|
+
"content_id": document.content_id,
|
|
381
|
+
"content_hash": content_hash,
|
|
382
|
+
}
|
|
383
|
+
|
|
384
|
+
# Add filters as metadata if provided
|
|
385
|
+
if filters:
|
|
386
|
+
# Merge filters with existing metadata
|
|
387
|
+
if "meta_data" not in payload:
|
|
388
|
+
payload["meta_data"] = {}
|
|
389
|
+
payload["meta_data"].update(filters) # type: ignore
|
|
390
|
+
|
|
391
|
+
points.append(
|
|
392
|
+
models.PointStruct(
|
|
393
|
+
id=doc_id,
|
|
394
|
+
vector=vector, # type: ignore
|
|
395
|
+
payload=payload,
|
|
396
|
+
)
|
|
397
|
+
)
|
|
398
|
+
log_debug(f"Inserted document: {document.name} ({document.meta_data})")
|
|
399
|
+
if len(points) > 0:
|
|
400
|
+
self.client.upsert(collection_name=self.collection, wait=False, points=points)
|
|
401
|
+
log_debug(f"Upsert {len(points)} documents")
|
|
402
|
+
|
|
403
|
+
async def async_insert(
|
|
404
|
+
self, content_hash: str, documents: List[Document], filters: Optional[Dict[str, Any]] = None
|
|
405
|
+
) -> None:
|
|
406
|
+
"""
|
|
407
|
+
Insert documents asynchronously.
|
|
408
|
+
|
|
409
|
+
Args:
|
|
410
|
+
documents (List[Document]): List of documents to insert
|
|
411
|
+
filters (Optional[Dict[str, Any]]): Filters to apply while inserting documents
|
|
412
|
+
"""
|
|
413
|
+
log_debug(f"Inserting {len(documents)} documents asynchronously")
|
|
414
|
+
|
|
415
|
+
# Apply batch embedding when needed for vector or hybrid search
|
|
416
|
+
if self.search_type in [SearchType.vector, SearchType.hybrid]:
|
|
417
|
+
if self.embedder.enable_batch and hasattr(self.embedder, "async_get_embeddings_batch_and_usage"):
|
|
418
|
+
# Use batch embedding when enabled and supported
|
|
419
|
+
try:
|
|
420
|
+
# Extract content from all documents
|
|
421
|
+
doc_contents = [doc.content for doc in documents]
|
|
422
|
+
|
|
423
|
+
# Get batch embeddings and usage
|
|
424
|
+
embeddings, usages = await self.embedder.async_get_embeddings_batch_and_usage(doc_contents)
|
|
425
|
+
|
|
426
|
+
# Process documents with pre-computed embeddings
|
|
427
|
+
for j, doc in enumerate(documents):
|
|
428
|
+
try:
|
|
429
|
+
if j < len(embeddings):
|
|
430
|
+
doc.embedding = embeddings[j]
|
|
431
|
+
doc.usage = usages[j] if j < len(usages) else None
|
|
432
|
+
except Exception as e:
|
|
433
|
+
log_error(f"Error assigning batch embedding to document '{doc.name}': {e}")
|
|
434
|
+
|
|
435
|
+
except Exception as e:
|
|
436
|
+
# Check if this is a rate limit error - don't fall back as it would make things worse
|
|
437
|
+
error_str = str(e).lower()
|
|
438
|
+
is_rate_limit = any(
|
|
439
|
+
phrase in error_str
|
|
440
|
+
for phrase in ["rate limit", "too many requests", "429", "trial key", "api calls / minute"]
|
|
441
|
+
)
|
|
442
|
+
|
|
443
|
+
if is_rate_limit:
|
|
444
|
+
log_error(f"Rate limit detected during batch embedding. {e}")
|
|
445
|
+
raise e
|
|
446
|
+
else:
|
|
447
|
+
log_warning(f"Async batch embedding failed, falling back to individual embeddings: {e}")
|
|
448
|
+
# Fall back to individual embedding
|
|
449
|
+
for doc in documents:
|
|
450
|
+
if self.search_type in [SearchType.vector, SearchType.hybrid]:
|
|
451
|
+
doc.embed(embedder=self.embedder)
|
|
452
|
+
else:
|
|
453
|
+
# Use individual embedding
|
|
454
|
+
for doc in documents:
|
|
455
|
+
if self.search_type in [SearchType.vector, SearchType.hybrid]:
|
|
456
|
+
doc.embed(embedder=self.embedder)
|
|
457
|
+
|
|
458
|
+
async def process_document(document):
|
|
459
|
+
cleaned_content = document.content.replace("\x00", "\ufffd")
|
|
460
|
+
doc_id = md5(cleaned_content.encode()).hexdigest()
|
|
461
|
+
|
|
462
|
+
if self.search_type == SearchType.vector:
|
|
463
|
+
# For vector search, maintain backward compatibility with unnamed vectors
|
|
464
|
+
vector = document.embedding # Already embedded above
|
|
465
|
+
else:
|
|
466
|
+
# For other search types, use named vectors
|
|
467
|
+
vector = {}
|
|
468
|
+
if self.search_type in [SearchType.hybrid]:
|
|
469
|
+
vector[self.dense_vector_name] = document.embedding # Already embedded above
|
|
470
|
+
|
|
471
|
+
if self.search_type in [SearchType.keyword, SearchType.hybrid]:
|
|
472
|
+
vector[self.sparse_vector_name] = next(
|
|
473
|
+
iter(self.sparse_encoder.embed([document.content]))
|
|
474
|
+
).as_object() # type: ignore
|
|
475
|
+
|
|
476
|
+
if self.search_type in [SearchType.keyword, SearchType.hybrid]:
|
|
477
|
+
vector[self.sparse_vector_name] = next(iter(self.sparse_encoder.embed([document.content]))).as_object()
|
|
478
|
+
|
|
479
|
+
# Create payload with document properties
|
|
480
|
+
payload = {
|
|
481
|
+
"name": document.name,
|
|
482
|
+
"meta_data": document.meta_data,
|
|
483
|
+
"content": cleaned_content,
|
|
484
|
+
"usage": document.usage,
|
|
485
|
+
"content_id": document.content_id,
|
|
486
|
+
"content_hash": content_hash,
|
|
487
|
+
}
|
|
488
|
+
|
|
489
|
+
# Add filters as metadata if provided
|
|
490
|
+
if filters:
|
|
491
|
+
# Merge filters with existing metadata
|
|
492
|
+
if "meta_data" not in payload:
|
|
493
|
+
payload["meta_data"] = {}
|
|
494
|
+
payload["meta_data"].update(filters)
|
|
495
|
+
|
|
496
|
+
log_debug(f"Inserted document asynchronously: {document.name} ({document.meta_data})")
|
|
497
|
+
return models.PointStruct( # type: ignore
|
|
498
|
+
id=doc_id,
|
|
499
|
+
vector=vector, # type: ignore
|
|
500
|
+
payload=payload,
|
|
501
|
+
)
|
|
502
|
+
|
|
503
|
+
import asyncio
|
|
504
|
+
|
|
505
|
+
# Process all documents in parallel
|
|
506
|
+
points = await asyncio.gather(*[process_document(doc) for doc in documents])
|
|
507
|
+
|
|
508
|
+
if len(points) > 0:
|
|
509
|
+
await self.async_client.upsert(collection_name=self.collection, wait=False, points=points)
|
|
510
|
+
log_debug(f"Upserted {len(points)} documents asynchronously")
|
|
511
|
+
|
|
512
|
+
def upsert(self, content_hash: str, documents: List[Document], filters: Optional[Dict[str, Any]] = None) -> None:
|
|
513
|
+
"""
|
|
514
|
+
Upsert documents into the database.
|
|
515
|
+
|
|
516
|
+
Args:
|
|
517
|
+
documents (List[Document]): List of documents to upsert
|
|
518
|
+
filters (Optional[Dict[str, Any]]): Filters to apply while upserting
|
|
519
|
+
"""
|
|
520
|
+
log_debug("Redirecting the request to insert")
|
|
521
|
+
if self.content_hash_exists(content_hash):
|
|
522
|
+
self._delete_by_content_hash(content_hash)
|
|
523
|
+
self.insert(content_hash=content_hash, documents=documents, filters=filters)
|
|
524
|
+
|
|
525
|
+
async def async_upsert(
|
|
526
|
+
self, content_hash: str, documents: List[Document], filters: Optional[Dict[str, Any]] = None
|
|
527
|
+
) -> None:
|
|
528
|
+
"""Upsert documents asynchronously."""
|
|
529
|
+
log_debug("Redirecting the async request to async_insert")
|
|
530
|
+
await self.async_insert(content_hash=content_hash, documents=documents, filters=filters)
|
|
531
|
+
|
|
532
|
+
def search(
|
|
533
|
+
self, query: str, limit: int = 5, filters: Optional[Union[Dict[str, Any], List[FilterExpr]]] = None
|
|
534
|
+
) -> List[Document]:
|
|
535
|
+
"""
|
|
536
|
+
Search for documents in the collection.
|
|
537
|
+
|
|
538
|
+
Args:
|
|
539
|
+
query (str): Query to search for
|
|
540
|
+
limit (int): Number of search results to return
|
|
541
|
+
filters (Optional[Dict[str, Any]]): Filters to apply while searching
|
|
542
|
+
"""
|
|
543
|
+
|
|
544
|
+
if isinstance(filters, List):
|
|
545
|
+
log_warning("Filters Expressions are not supported in Qdrant. No filters will be applied.")
|
|
546
|
+
filters = None
|
|
547
|
+
|
|
548
|
+
filters = self._format_filters(filters or {}) # type: ignore
|
|
549
|
+
if self.search_type == SearchType.vector:
|
|
550
|
+
results = self._run_vector_search_sync(query, limit, filters) # type: ignore
|
|
551
|
+
elif self.search_type == SearchType.keyword:
|
|
552
|
+
results = self._run_keyword_search_sync(query, limit, filters) # type: ignore
|
|
553
|
+
elif self.search_type == SearchType.hybrid:
|
|
554
|
+
results = self._run_hybrid_search_sync(query, limit, filters) # type: ignore
|
|
555
|
+
else:
|
|
556
|
+
raise ValueError(f"Unsupported search type: {self.search_type}")
|
|
557
|
+
|
|
558
|
+
return self._build_search_results(results, query)
|
|
559
|
+
|
|
560
|
+
async def async_search(
|
|
561
|
+
self, query: str, limit: int = 5, filters: Optional[Union[Dict[str, Any], List[FilterExpr]]] = None
|
|
562
|
+
) -> List[Document]:
|
|
563
|
+
if isinstance(filters, List):
|
|
564
|
+
log_warning("Filters Expressions are not supported in Qdrant. No filters will be applied.")
|
|
565
|
+
filters = None
|
|
566
|
+
|
|
567
|
+
filters = self._format_filters(filters or {}) # type: ignore
|
|
568
|
+
if self.search_type == SearchType.vector:
|
|
569
|
+
results = await self._run_vector_search_async(query, limit, filters) # type: ignore
|
|
570
|
+
elif self.search_type == SearchType.keyword:
|
|
571
|
+
results = await self._run_keyword_search_async(query, limit, filters) # type: ignore
|
|
572
|
+
elif self.search_type == SearchType.hybrid:
|
|
573
|
+
results = await self._run_hybrid_search_async(query, limit, filters) # type: ignore
|
|
574
|
+
else:
|
|
575
|
+
raise ValueError(f"Unsupported search type: {self.search_type}")
|
|
576
|
+
|
|
577
|
+
return self._build_search_results(results, query)
|
|
578
|
+
|
|
579
|
+
def _run_hybrid_search_sync(
|
|
580
|
+
self,
|
|
581
|
+
query: str,
|
|
582
|
+
limit: int,
|
|
583
|
+
filters: Optional[Union[Dict[str, Any], List[FilterExpr]]],
|
|
584
|
+
) -> List[models.ScoredPoint]:
|
|
585
|
+
dense_embedding = self.embedder.get_embedding(query)
|
|
586
|
+
sparse_embedding = next(iter(self.sparse_encoder.embed([query]))).as_object()
|
|
587
|
+
call = self.client.query_points(
|
|
588
|
+
collection_name=self.collection,
|
|
589
|
+
prefetch=[
|
|
590
|
+
models.Prefetch(
|
|
591
|
+
query=models.SparseVector(**sparse_embedding), # type: ignore # type: ignore
|
|
592
|
+
limit=limit,
|
|
593
|
+
using=self.sparse_vector_name,
|
|
594
|
+
),
|
|
595
|
+
models.Prefetch(query=dense_embedding, limit=limit, using=self.dense_vector_name),
|
|
596
|
+
],
|
|
597
|
+
query=models.FusionQuery(fusion=self.hybrid_fusion_strategy),
|
|
598
|
+
with_vectors=True,
|
|
599
|
+
with_payload=True,
|
|
600
|
+
limit=limit,
|
|
601
|
+
query_filter=filters,
|
|
602
|
+
)
|
|
603
|
+
return call.points
|
|
604
|
+
|
|
605
|
+
def _run_vector_search_sync(
|
|
606
|
+
self,
|
|
607
|
+
query: str,
|
|
608
|
+
limit: int,
|
|
609
|
+
filters: Optional[Union[Dict[str, Any], List[FilterExpr]]],
|
|
610
|
+
) -> List[models.ScoredPoint]:
|
|
611
|
+
dense_embedding = self.embedder.get_embedding(query)
|
|
612
|
+
|
|
613
|
+
# TODO(v2.0.0): Remove this conditional and always use named vectors
|
|
614
|
+
if self.use_named_vectors:
|
|
615
|
+
call = self.client.query_points(
|
|
616
|
+
collection_name=self.collection,
|
|
617
|
+
query=dense_embedding,
|
|
618
|
+
with_vectors=True,
|
|
619
|
+
with_payload=True,
|
|
620
|
+
limit=limit,
|
|
621
|
+
query_filter=filters,
|
|
622
|
+
using=self.dense_vector_name,
|
|
623
|
+
)
|
|
624
|
+
else:
|
|
625
|
+
# Backward compatibility mode - use unnamed vector
|
|
626
|
+
call = self.client.query_points(
|
|
627
|
+
collection_name=self.collection,
|
|
628
|
+
query=dense_embedding,
|
|
629
|
+
with_vectors=True,
|
|
630
|
+
with_payload=True,
|
|
631
|
+
limit=limit,
|
|
632
|
+
query_filter=filters,
|
|
633
|
+
)
|
|
634
|
+
return call.points
|
|
635
|
+
|
|
636
|
+
def _run_keyword_search_sync(
|
|
637
|
+
self,
|
|
638
|
+
query: str,
|
|
639
|
+
limit: int,
|
|
640
|
+
filters: Optional[Union[Dict[str, Any], List[FilterExpr]]],
|
|
641
|
+
) -> List[models.ScoredPoint]:
|
|
642
|
+
sparse_embedding = next(iter(self.sparse_encoder.embed([query]))).as_object()
|
|
643
|
+
call = self.client.query_points(
|
|
644
|
+
collection_name=self.collection,
|
|
645
|
+
query=models.SparseVector(**sparse_embedding), # type: ignore
|
|
646
|
+
with_vectors=True,
|
|
647
|
+
with_payload=True,
|
|
648
|
+
limit=limit,
|
|
649
|
+
using=self.sparse_vector_name,
|
|
650
|
+
query_filter=filters,
|
|
651
|
+
)
|
|
652
|
+
return call.points
|
|
653
|
+
|
|
654
|
+
async def _run_vector_search_async(
|
|
655
|
+
self,
|
|
656
|
+
query: str,
|
|
657
|
+
limit: int,
|
|
658
|
+
filters: Optional[Dict[str, Any]],
|
|
659
|
+
) -> List[models.ScoredPoint]:
|
|
660
|
+
dense_embedding = self.embedder.get_embedding(query)
|
|
661
|
+
|
|
662
|
+
# TODO(v2.0.0): Remove this conditional and always use named vectors
|
|
663
|
+
if self.use_named_vectors:
|
|
664
|
+
call = await self.async_client.query_points(
|
|
665
|
+
collection_name=self.collection,
|
|
666
|
+
query=dense_embedding,
|
|
667
|
+
with_vectors=True,
|
|
668
|
+
with_payload=True,
|
|
669
|
+
limit=limit,
|
|
670
|
+
query_filter=filters,
|
|
671
|
+
using=self.dense_vector_name,
|
|
672
|
+
)
|
|
673
|
+
else:
|
|
674
|
+
# Backward compatibility mode - use unnamed vector
|
|
675
|
+
call = await self.async_client.query_points(
|
|
676
|
+
collection_name=self.collection,
|
|
677
|
+
query=dense_embedding,
|
|
678
|
+
with_vectors=True,
|
|
679
|
+
with_payload=True,
|
|
680
|
+
limit=limit,
|
|
681
|
+
query_filter=filters,
|
|
682
|
+
)
|
|
683
|
+
return call.points
|
|
684
|
+
|
|
685
|
+
async def _run_keyword_search_async(
|
|
686
|
+
self,
|
|
687
|
+
query: str,
|
|
688
|
+
limit: int,
|
|
689
|
+
filters: Optional[Dict[str, Any]],
|
|
690
|
+
) -> List[models.ScoredPoint]:
|
|
691
|
+
sparse_embedding = next(iter(self.sparse_encoder.embed([query]))).as_object()
|
|
692
|
+
call = await self.async_client.query_points(
|
|
693
|
+
collection_name=self.collection,
|
|
694
|
+
query=models.SparseVector(**sparse_embedding), # type: ignore
|
|
695
|
+
with_vectors=True,
|
|
696
|
+
with_payload=True,
|
|
697
|
+
limit=limit,
|
|
698
|
+
using=self.sparse_vector_name,
|
|
699
|
+
query_filter=filters,
|
|
700
|
+
)
|
|
701
|
+
return call.points
|
|
702
|
+
|
|
703
|
+
async def _run_hybrid_search_async(
|
|
704
|
+
self,
|
|
705
|
+
query: str,
|
|
706
|
+
limit: int,
|
|
707
|
+
filters: Optional[Union[Dict[str, Any], List[FilterExpr]]],
|
|
708
|
+
) -> List[models.ScoredPoint]:
|
|
709
|
+
dense_embedding = self.embedder.get_embedding(query)
|
|
710
|
+
sparse_embedding = next(iter(self.sparse_encoder.embed([query]))).as_object()
|
|
711
|
+
call = await self.async_client.query_points(
|
|
712
|
+
collection_name=self.collection,
|
|
713
|
+
prefetch=[
|
|
714
|
+
models.Prefetch(
|
|
715
|
+
query=models.SparseVector(**sparse_embedding), # type: ignore # type: ignore
|
|
716
|
+
limit=limit,
|
|
717
|
+
using=self.sparse_vector_name,
|
|
718
|
+
),
|
|
719
|
+
models.Prefetch(query=dense_embedding, limit=limit, using=self.dense_vector_name),
|
|
720
|
+
],
|
|
721
|
+
query=models.FusionQuery(fusion=self.hybrid_fusion_strategy),
|
|
722
|
+
with_vectors=True,
|
|
723
|
+
with_payload=True,
|
|
724
|
+
limit=limit,
|
|
725
|
+
query_filter=filters,
|
|
726
|
+
)
|
|
727
|
+
return call.points
|
|
728
|
+
|
|
729
|
+
def _build_search_results(self, results, query: str) -> List[Document]:
|
|
730
|
+
search_results: List[Document] = []
|
|
731
|
+
|
|
732
|
+
for result in results:
|
|
733
|
+
if result.payload is None:
|
|
734
|
+
continue
|
|
735
|
+
search_results.append(
|
|
736
|
+
Document(
|
|
737
|
+
name=result.payload["name"],
|
|
738
|
+
meta_data=result.payload["meta_data"],
|
|
739
|
+
content=result.payload["content"],
|
|
740
|
+
embedder=self.embedder,
|
|
741
|
+
embedding=result.vector, # type: ignore
|
|
742
|
+
usage=result.payload.get("usage"),
|
|
743
|
+
content_id=result.payload.get("content_id"),
|
|
744
|
+
)
|
|
745
|
+
)
|
|
746
|
+
|
|
747
|
+
if self.reranker:
|
|
748
|
+
search_results = self.reranker.rerank(query=query, documents=search_results)
|
|
749
|
+
|
|
750
|
+
log_info(f"Found {len(search_results)} documents")
|
|
751
|
+
return search_results
|
|
752
|
+
|
|
753
|
+
def _format_filters(self, filters: Optional[Dict[str, Any]]) -> Optional[models.Filter]:
|
|
754
|
+
if filters:
|
|
755
|
+
filter_conditions = []
|
|
756
|
+
for key, value in filters.items():
|
|
757
|
+
# If key contains a dot already, assume it's in the correct format
|
|
758
|
+
# Otherwise, assume it's a metadata field and add the prefix
|
|
759
|
+
if "." not in key and not key.startswith("meta_data."):
|
|
760
|
+
# This is a simple field name, assume it's metadata
|
|
761
|
+
key = f"meta_data.{key}"
|
|
762
|
+
|
|
763
|
+
if isinstance(value, dict):
|
|
764
|
+
# Handle nested dictionaries
|
|
765
|
+
for sub_key, sub_value in value.items():
|
|
766
|
+
filter_conditions.append(
|
|
767
|
+
models.FieldCondition(key=f"{key}.{sub_key}", match=models.MatchValue(value=sub_value))
|
|
768
|
+
)
|
|
769
|
+
else:
|
|
770
|
+
# Handle direct key-value pairs
|
|
771
|
+
filter_conditions.append(models.FieldCondition(key=key, match=models.MatchValue(value=value)))
|
|
772
|
+
|
|
773
|
+
if filter_conditions:
|
|
774
|
+
return models.Filter(must=filter_conditions) # type: ignore
|
|
775
|
+
|
|
776
|
+
return None
|
|
777
|
+
|
|
778
|
+
def optimize(self) -> None:
|
|
779
|
+
pass
|
|
780
|
+
|
|
781
|
+
def drop(self) -> None:
|
|
782
|
+
if self.exists():
|
|
783
|
+
log_debug(f"Deleting collection: {self.collection}")
|
|
784
|
+
self.client.delete_collection(self.collection)
|
|
785
|
+
|
|
786
|
+
async def async_drop(self) -> None:
|
|
787
|
+
"""Drop the collection asynchronously."""
|
|
788
|
+
if await self.async_exists():
|
|
789
|
+
log_debug(f"Deleting collection asynchronously: {self.collection}")
|
|
790
|
+
await self.async_client.delete_collection(self.collection)
|
|
791
|
+
|
|
792
|
+
def exists(self) -> bool:
|
|
793
|
+
"""Check if the collection exists."""
|
|
794
|
+
return self.client.collection_exists(collection_name=self.collection)
|
|
795
|
+
|
|
796
|
+
async def async_exists(self) -> bool:
|
|
797
|
+
"""Check if the collection exists asynchronously."""
|
|
798
|
+
return await self.async_client.collection_exists(collection_name=self.collection)
|
|
799
|
+
|
|
800
|
+
def get_count(self) -> int:
|
|
801
|
+
count_result: models.CountResult = self.client.count(collection_name=self.collection, exact=True)
|
|
802
|
+
return count_result.count
|
|
803
|
+
|
|
804
|
+
def point_exists(self, id: str) -> bool:
|
|
805
|
+
"""Check if a point with the given ID exists in the collection."""
|
|
806
|
+
try:
|
|
807
|
+
log_info(f"Checking if point with ID '{id}' (type: {type(id)}) exists in collection '{self.collection}'")
|
|
808
|
+
points = self.client.retrieve(
|
|
809
|
+
collection_name=self.collection, ids=[id], with_payload=False, with_vectors=False
|
|
810
|
+
)
|
|
811
|
+
log_info(f"Retrieved {len(points)} points for ID '{id}'")
|
|
812
|
+
if len(points) > 0:
|
|
813
|
+
log_info(f"Found point with ID: {points[0].id} (type: {type(points[0].id)})")
|
|
814
|
+
return len(points) > 0
|
|
815
|
+
except Exception as e:
|
|
816
|
+
log_info(f"Error checking if point {id} exists: {e}")
|
|
817
|
+
return False
|
|
818
|
+
|
|
819
|
+
def delete(self) -> bool:
|
|
820
|
+
return self.client.delete_collection(collection_name=self.collection)
|
|
821
|
+
|
|
822
|
+
def delete_by_id(self, id: str) -> bool:
|
|
823
|
+
try:
|
|
824
|
+
# Check if point exists before deletion
|
|
825
|
+
if not self.point_exists(id):
|
|
826
|
+
log_warning(f"Point with ID {id} does not exist")
|
|
827
|
+
return True
|
|
828
|
+
|
|
829
|
+
self.client.delete(
|
|
830
|
+
collection_name=self.collection,
|
|
831
|
+
points_selector=models.PointIdsList(points=[id]),
|
|
832
|
+
wait=True, # Wait for the operation to complete
|
|
833
|
+
)
|
|
834
|
+
return True
|
|
835
|
+
|
|
836
|
+
except Exception as e:
|
|
837
|
+
log_info(f"Error deleting point with ID {id}: {e}")
|
|
838
|
+
return False
|
|
839
|
+
|
|
840
|
+
def delete_by_name(self, name: str) -> bool:
|
|
841
|
+
"""Delete all points that have the specified name in their payload (precise match)."""
|
|
842
|
+
try:
|
|
843
|
+
log_info(f"Attempting to delete all points with name: {name}")
|
|
844
|
+
|
|
845
|
+
# Create a filter to find all points with the specified name (precise match)
|
|
846
|
+
filter_condition = models.Filter(
|
|
847
|
+
must=[models.FieldCondition(key="name", match=models.MatchValue(value=name))]
|
|
848
|
+
)
|
|
849
|
+
|
|
850
|
+
# First, count how many points will be deleted
|
|
851
|
+
count_result = self.client.count(collection_name=self.collection, count_filter=filter_condition, exact=True)
|
|
852
|
+
|
|
853
|
+
if count_result.count == 0:
|
|
854
|
+
log_warning(f"No points found with name: {name}")
|
|
855
|
+
return True
|
|
856
|
+
|
|
857
|
+
log_info(f"Found {count_result.count} points to delete with name: {name}")
|
|
858
|
+
|
|
859
|
+
# Delete all points matching the filter
|
|
860
|
+
result = self.client.delete(
|
|
861
|
+
collection_name=self.collection,
|
|
862
|
+
points_selector=filter_condition,
|
|
863
|
+
wait=True, # Wait for the operation to complete
|
|
864
|
+
)
|
|
865
|
+
|
|
866
|
+
# Check if the deletion was successful
|
|
867
|
+
if result.status == models.UpdateStatus.COMPLETED:
|
|
868
|
+
log_info(f"Successfully deleted {count_result.count} points with name: {name}")
|
|
869
|
+
return True
|
|
870
|
+
else:
|
|
871
|
+
log_warning(f"Deletion failed for name {name}. Status: {result.status}")
|
|
872
|
+
return False
|
|
873
|
+
|
|
874
|
+
except Exception as e:
|
|
875
|
+
log_warning(f"Error deleting points with name {name}: {e}")
|
|
876
|
+
return False
|
|
877
|
+
|
|
878
|
+
def delete_by_metadata(self, metadata: Dict[str, Any]) -> bool:
|
|
879
|
+
"""Delete all points where the given metadata is contained in the meta_data payload field."""
|
|
880
|
+
try:
|
|
881
|
+
log_info(f"Attempting to delete all points with metadata: {metadata}")
|
|
882
|
+
|
|
883
|
+
# Create filter conditions for each metadata key-value pair
|
|
884
|
+
filter_conditions = []
|
|
885
|
+
for key, value in metadata.items():
|
|
886
|
+
# Use the meta_data prefix since that's how metadata is stored in the payload
|
|
887
|
+
filter_conditions.append(
|
|
888
|
+
models.FieldCondition(key=f"meta_data.{key}", match=models.MatchValue(value=value))
|
|
889
|
+
)
|
|
890
|
+
|
|
891
|
+
# Create a filter that requires ALL metadata conditions to match
|
|
892
|
+
filter_condition = models.Filter(must=filter_conditions) # type: ignore
|
|
893
|
+
|
|
894
|
+
# First, count how many points will be deleted
|
|
895
|
+
count_result = self.client.count(collection_name=self.collection, count_filter=filter_condition, exact=True)
|
|
896
|
+
|
|
897
|
+
if count_result.count == 0:
|
|
898
|
+
log_warning(f"No points found with metadata: {metadata}")
|
|
899
|
+
return True
|
|
900
|
+
|
|
901
|
+
log_info(f"Found {count_result.count} points to delete with metadata: {metadata}")
|
|
902
|
+
|
|
903
|
+
# Delete all points matching the filter
|
|
904
|
+
result = self.client.delete(
|
|
905
|
+
collection_name=self.collection,
|
|
906
|
+
points_selector=filter_condition,
|
|
907
|
+
wait=True, # Wait for the operation to complete
|
|
908
|
+
)
|
|
909
|
+
|
|
910
|
+
# Check if the deletion was successful
|
|
911
|
+
if result.status == models.UpdateStatus.COMPLETED:
|
|
912
|
+
log_info(f"Successfully deleted {count_result.count} points with metadata: {metadata}")
|
|
913
|
+
return True
|
|
914
|
+
else:
|
|
915
|
+
log_warning(f"Deletion failed for metadata {metadata}. Status: {result.status}")
|
|
916
|
+
return False
|
|
917
|
+
|
|
918
|
+
except Exception as e:
|
|
919
|
+
log_warning(f"Error deleting points with metadata {metadata}: {e}")
|
|
920
|
+
return False
|
|
921
|
+
|
|
922
|
+
def delete_by_content_id(self, content_id: str) -> bool:
|
|
923
|
+
"""Delete all points that have the specified content_id in their payload."""
|
|
924
|
+
try:
|
|
925
|
+
log_info(f"Attempting to delete all points with content_id: {content_id}")
|
|
926
|
+
|
|
927
|
+
# Create a filter to find all points with the specified content_id
|
|
928
|
+
filter_condition = models.Filter(
|
|
929
|
+
must=[models.FieldCondition(key="content_id", match=models.MatchValue(value=content_id))]
|
|
930
|
+
)
|
|
931
|
+
|
|
932
|
+
# First, count how many points will be deleted
|
|
933
|
+
count_result = self.client.count(collection_name=self.collection, count_filter=filter_condition, exact=True)
|
|
934
|
+
|
|
935
|
+
if count_result.count == 0:
|
|
936
|
+
log_warning(f"No points found with content_id: {content_id}")
|
|
937
|
+
return True
|
|
938
|
+
|
|
939
|
+
log_info(f"Found {count_result.count} points to delete with content_id: {content_id}")
|
|
940
|
+
|
|
941
|
+
# Delete all points matching the filter
|
|
942
|
+
result = self.client.delete(
|
|
943
|
+
collection_name=self.collection,
|
|
944
|
+
points_selector=filter_condition,
|
|
945
|
+
wait=True, # Wait for the operation to complete
|
|
946
|
+
)
|
|
947
|
+
|
|
948
|
+
# Check if the deletion was successful
|
|
949
|
+
if result.status == models.UpdateStatus.COMPLETED:
|
|
950
|
+
log_info(f"Successfully deleted {count_result.count} points with content_id: {content_id}")
|
|
951
|
+
return True
|
|
952
|
+
else:
|
|
953
|
+
log_warning(f"Deletion failed for content_id {content_id}. Status: {result.status}")
|
|
954
|
+
return False
|
|
955
|
+
|
|
956
|
+
except Exception as e:
|
|
957
|
+
log_warning(f"Error deleting points with content_id {content_id}: {e}")
|
|
958
|
+
return False
|
|
959
|
+
|
|
960
|
+
def id_exists(self, id: str) -> bool:
|
|
961
|
+
"""Check if a point with the given ID exists in the collection.
|
|
962
|
+
|
|
963
|
+
Args:
|
|
964
|
+
id (str): The ID to check.
|
|
965
|
+
|
|
966
|
+
Returns:
|
|
967
|
+
bool: True if the point exists, False otherwise.
|
|
968
|
+
"""
|
|
969
|
+
try:
|
|
970
|
+
points = self.client.retrieve(
|
|
971
|
+
collection_name=self.collection, ids=[id], with_payload=False, with_vectors=False
|
|
972
|
+
)
|
|
973
|
+
return len(points) > 0
|
|
974
|
+
except Exception as e:
|
|
975
|
+
log_info(f"Error checking if point {id} exists: {e}")
|
|
976
|
+
return False
|
|
977
|
+
|
|
978
|
+
def content_hash_exists(self, content_hash: str) -> bool:
|
|
979
|
+
"""Check if any points with the given content hash exist in the collection.
|
|
980
|
+
|
|
981
|
+
Args:
|
|
982
|
+
content_hash (str): The content hash to check.
|
|
983
|
+
|
|
984
|
+
Returns:
|
|
985
|
+
bool: True if points with the content hash exist, False otherwise.
|
|
986
|
+
"""
|
|
987
|
+
try:
|
|
988
|
+
# Create a filter to find points with the specified content_hash
|
|
989
|
+
filter_condition = models.Filter(
|
|
990
|
+
must=[models.FieldCondition(key="content_hash", match=models.MatchValue(value=content_hash))]
|
|
991
|
+
)
|
|
992
|
+
|
|
993
|
+
# Count how many points match the filter
|
|
994
|
+
count_result = self.client.count(collection_name=self.collection, count_filter=filter_condition, exact=True)
|
|
995
|
+
return count_result.count > 0
|
|
996
|
+
except Exception as e:
|
|
997
|
+
log_info(f"Error checking if content_hash {content_hash} exists: {e}")
|
|
998
|
+
return False
|
|
999
|
+
|
|
1000
|
+
def _delete_by_content_hash(self, content_hash: str) -> bool:
|
|
1001
|
+
"""Delete all points that have the specified content_hash in their payload.
|
|
1002
|
+
|
|
1003
|
+
Args:
|
|
1004
|
+
content_hash (str): The content hash to delete.
|
|
1005
|
+
|
|
1006
|
+
Returns:
|
|
1007
|
+
bool: True if points were deleted successfully, False otherwise.
|
|
1008
|
+
"""
|
|
1009
|
+
try:
|
|
1010
|
+
log_info(f"Attempting to delete all points with content_hash: {content_hash}")
|
|
1011
|
+
|
|
1012
|
+
# Create a filter to find all points with the specified content_hash
|
|
1013
|
+
filter_condition = models.Filter(
|
|
1014
|
+
must=[models.FieldCondition(key="content_hash", match=models.MatchValue(value=content_hash))]
|
|
1015
|
+
)
|
|
1016
|
+
|
|
1017
|
+
# First, count how many points will be deleted
|
|
1018
|
+
count_result = self.client.count(collection_name=self.collection, count_filter=filter_condition, exact=True)
|
|
1019
|
+
|
|
1020
|
+
if count_result.count == 0:
|
|
1021
|
+
log_warning(f"No points found with content_hash: {content_hash}")
|
|
1022
|
+
return True
|
|
1023
|
+
|
|
1024
|
+
log_info(f"Found {count_result.count} points to delete with content_hash: {content_hash}")
|
|
1025
|
+
|
|
1026
|
+
# Delete all points matching the filter
|
|
1027
|
+
result = self.client.delete(
|
|
1028
|
+
collection_name=self.collection,
|
|
1029
|
+
points_selector=filter_condition,
|
|
1030
|
+
wait=True, # Wait for the operation to complete
|
|
1031
|
+
)
|
|
1032
|
+
|
|
1033
|
+
# Check if the deletion was successful
|
|
1034
|
+
if result.status == models.UpdateStatus.COMPLETED:
|
|
1035
|
+
log_info(f"Successfully deleted {count_result.count} points with content_hash: {content_hash}")
|
|
1036
|
+
return True
|
|
1037
|
+
else:
|
|
1038
|
+
log_warning(f"Deletion failed for content_hash {content_hash}. Status: {result.status}")
|
|
1039
|
+
return False
|
|
1040
|
+
|
|
1041
|
+
except Exception as e:
|
|
1042
|
+
log_warning(f"Error deleting points with content_hash {content_hash}: {e}")
|
|
1043
|
+
return False
|
|
1044
|
+
|
|
1045
|
+
def update_metadata(self, content_id: str, metadata: Dict[str, Any]) -> None:
|
|
1046
|
+
"""
|
|
1047
|
+
Update the metadata for documents with the given content_id.
|
|
1048
|
+
|
|
1049
|
+
Args:
|
|
1050
|
+
content_id (str): The content ID to update
|
|
1051
|
+
metadata (Dict[str, Any]): The metadata to update
|
|
1052
|
+
"""
|
|
1053
|
+
try:
|
|
1054
|
+
if not self.client:
|
|
1055
|
+
log_error("Client not initialized")
|
|
1056
|
+
return
|
|
1057
|
+
|
|
1058
|
+
# Create filter for content_id
|
|
1059
|
+
filter_condition = models.Filter(
|
|
1060
|
+
must=[models.FieldCondition(key="content_id", match=models.MatchValue(value=content_id))]
|
|
1061
|
+
)
|
|
1062
|
+
|
|
1063
|
+
# Search for points with the given content_id
|
|
1064
|
+
search_result = self.client.scroll(
|
|
1065
|
+
collection_name=self.collection,
|
|
1066
|
+
scroll_filter=filter_condition,
|
|
1067
|
+
limit=10000, # Get all matching points
|
|
1068
|
+
with_payload=True,
|
|
1069
|
+
with_vectors=False,
|
|
1070
|
+
)
|
|
1071
|
+
|
|
1072
|
+
if not search_result[0]: # search_result is a tuple (points, next_page_offset)
|
|
1073
|
+
log_error(f"No documents found with content_id: {content_id}")
|
|
1074
|
+
return
|
|
1075
|
+
|
|
1076
|
+
points = search_result[0]
|
|
1077
|
+
update_operations = []
|
|
1078
|
+
|
|
1079
|
+
# Prepare update operations for each point
|
|
1080
|
+
for point in points:
|
|
1081
|
+
point_id = point.id
|
|
1082
|
+
current_payload = point.payload or {}
|
|
1083
|
+
|
|
1084
|
+
# Merge existing metadata with new metadata
|
|
1085
|
+
updated_payload = current_payload.copy()
|
|
1086
|
+
updated_payload.update(metadata)
|
|
1087
|
+
|
|
1088
|
+
if "filters" not in updated_payload:
|
|
1089
|
+
updated_payload["filters"] = {}
|
|
1090
|
+
if isinstance(updated_payload["filters"], dict):
|
|
1091
|
+
updated_payload["filters"].update(metadata)
|
|
1092
|
+
else:
|
|
1093
|
+
updated_payload["filters"] = metadata
|
|
1094
|
+
|
|
1095
|
+
# Create set payload operation
|
|
1096
|
+
update_operations.append(models.SetPayload(payload=updated_payload, points=[point_id]))
|
|
1097
|
+
|
|
1098
|
+
# Execute all updates
|
|
1099
|
+
for operation in update_operations:
|
|
1100
|
+
self.client.set_payload(
|
|
1101
|
+
collection_name=self.collection, payload=operation.payload, points=operation.points
|
|
1102
|
+
)
|
|
1103
|
+
|
|
1104
|
+
log_debug(f"Updated metadata for {len(update_operations)} documents with content_id: {content_id}")
|
|
1105
|
+
|
|
1106
|
+
except Exception as e:
|
|
1107
|
+
log_error(f"Error updating metadata for content_id '{content_id}': {e}")
|
|
1108
|
+
raise
|
|
1109
|
+
|
|
1110
|
+
def close(self) -> None:
|
|
1111
|
+
"""Close the Qdrant client connections."""
|
|
1112
|
+
if self._client is not None:
|
|
1113
|
+
try:
|
|
1114
|
+
self._client.close()
|
|
1115
|
+
log_debug("Qdrant client closed successfully")
|
|
1116
|
+
except Exception as e:
|
|
1117
|
+
log_debug(f"Error closing Qdrant client: {e}")
|
|
1118
|
+
finally:
|
|
1119
|
+
self._client = None
|
|
1120
|
+
|
|
1121
|
+
async def async_close(self) -> None:
|
|
1122
|
+
"""Close the Qdrant client connections asynchronously."""
|
|
1123
|
+
if self._async_client is not None:
|
|
1124
|
+
try:
|
|
1125
|
+
await self._async_client.close()
|
|
1126
|
+
log_debug("Async Qdrant client closed successfully")
|
|
1127
|
+
except Exception as e:
|
|
1128
|
+
log_debug(f"Error closing async Qdrant client: {e}")
|
|
1129
|
+
finally:
|
|
1130
|
+
self._async_client = None
|
|
1131
|
+
|
|
1132
|
+
def get_supported_search_types(self) -> List[str]:
|
|
1133
|
+
"""Get the supported search types for this vector database."""
|
|
1134
|
+
return [SearchType.vector, SearchType.keyword, SearchType.hybrid]
|