agno 0.1.2__py3-none-any.whl → 2.3.13__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- agno/__init__.py +8 -0
- agno/agent/__init__.py +44 -5
- agno/agent/agent.py +10531 -2975
- agno/api/agent.py +14 -53
- agno/api/api.py +7 -46
- agno/api/evals.py +22 -0
- agno/api/os.py +17 -0
- agno/api/routes.py +6 -25
- agno/api/schemas/__init__.py +9 -0
- agno/api/schemas/agent.py +6 -9
- agno/api/schemas/evals.py +16 -0
- agno/api/schemas/os.py +14 -0
- agno/api/schemas/team.py +10 -10
- agno/api/schemas/utils.py +21 -0
- agno/api/schemas/workflows.py +16 -0
- agno/api/settings.py +53 -0
- agno/api/team.py +22 -26
- agno/api/workflow.py +28 -0
- agno/cloud/aws/base.py +214 -0
- agno/cloud/aws/s3/__init__.py +2 -0
- agno/cloud/aws/s3/api_client.py +43 -0
- agno/cloud/aws/s3/bucket.py +195 -0
- agno/cloud/aws/s3/object.py +57 -0
- agno/compression/__init__.py +3 -0
- agno/compression/manager.py +247 -0
- agno/culture/__init__.py +3 -0
- agno/culture/manager.py +956 -0
- agno/db/__init__.py +24 -0
- agno/db/async_postgres/__init__.py +3 -0
- agno/db/base.py +946 -0
- agno/db/dynamo/__init__.py +3 -0
- agno/db/dynamo/dynamo.py +2781 -0
- agno/db/dynamo/schemas.py +442 -0
- agno/db/dynamo/utils.py +743 -0
- agno/db/firestore/__init__.py +3 -0
- agno/db/firestore/firestore.py +2379 -0
- agno/db/firestore/schemas.py +181 -0
- agno/db/firestore/utils.py +376 -0
- agno/db/gcs_json/__init__.py +3 -0
- agno/db/gcs_json/gcs_json_db.py +1791 -0
- agno/db/gcs_json/utils.py +228 -0
- agno/db/in_memory/__init__.py +3 -0
- agno/db/in_memory/in_memory_db.py +1312 -0
- agno/db/in_memory/utils.py +230 -0
- agno/db/json/__init__.py +3 -0
- agno/db/json/json_db.py +1777 -0
- agno/db/json/utils.py +230 -0
- agno/db/migrations/manager.py +199 -0
- agno/db/migrations/v1_to_v2.py +635 -0
- agno/db/migrations/versions/v2_3_0.py +938 -0
- agno/db/mongo/__init__.py +17 -0
- agno/db/mongo/async_mongo.py +2760 -0
- agno/db/mongo/mongo.py +2597 -0
- agno/db/mongo/schemas.py +119 -0
- agno/db/mongo/utils.py +276 -0
- agno/db/mysql/__init__.py +4 -0
- agno/db/mysql/async_mysql.py +2912 -0
- agno/db/mysql/mysql.py +2923 -0
- agno/db/mysql/schemas.py +186 -0
- agno/db/mysql/utils.py +488 -0
- agno/db/postgres/__init__.py +4 -0
- agno/db/postgres/async_postgres.py +2579 -0
- agno/db/postgres/postgres.py +2870 -0
- agno/db/postgres/schemas.py +187 -0
- agno/db/postgres/utils.py +442 -0
- agno/db/redis/__init__.py +3 -0
- agno/db/redis/redis.py +2141 -0
- agno/db/redis/schemas.py +159 -0
- agno/db/redis/utils.py +346 -0
- agno/db/schemas/__init__.py +4 -0
- agno/db/schemas/culture.py +120 -0
- agno/db/schemas/evals.py +34 -0
- agno/db/schemas/knowledge.py +40 -0
- agno/db/schemas/memory.py +61 -0
- agno/db/singlestore/__init__.py +3 -0
- agno/db/singlestore/schemas.py +179 -0
- agno/db/singlestore/singlestore.py +2877 -0
- agno/db/singlestore/utils.py +384 -0
- agno/db/sqlite/__init__.py +4 -0
- agno/db/sqlite/async_sqlite.py +2911 -0
- agno/db/sqlite/schemas.py +181 -0
- agno/db/sqlite/sqlite.py +2908 -0
- agno/db/sqlite/utils.py +429 -0
- agno/db/surrealdb/__init__.py +3 -0
- agno/db/surrealdb/metrics.py +292 -0
- agno/db/surrealdb/models.py +334 -0
- agno/db/surrealdb/queries.py +71 -0
- agno/db/surrealdb/surrealdb.py +1908 -0
- agno/db/surrealdb/utils.py +147 -0
- agno/db/utils.py +118 -0
- agno/eval/__init__.py +24 -0
- agno/eval/accuracy.py +666 -276
- agno/eval/agent_as_judge.py +861 -0
- agno/eval/base.py +29 -0
- agno/eval/performance.py +779 -0
- agno/eval/reliability.py +241 -62
- agno/eval/utils.py +120 -0
- agno/exceptions.py +143 -1
- agno/filters.py +354 -0
- agno/guardrails/__init__.py +6 -0
- agno/guardrails/base.py +19 -0
- agno/guardrails/openai.py +144 -0
- agno/guardrails/pii.py +94 -0
- agno/guardrails/prompt_injection.py +52 -0
- agno/hooks/__init__.py +3 -0
- agno/hooks/decorator.py +164 -0
- agno/integrations/discord/__init__.py +3 -0
- agno/integrations/discord/client.py +203 -0
- agno/knowledge/__init__.py +5 -1
- agno/{document → knowledge}/chunking/agentic.py +22 -14
- agno/{document → knowledge}/chunking/document.py +2 -2
- agno/{document → knowledge}/chunking/fixed.py +7 -6
- agno/knowledge/chunking/markdown.py +151 -0
- agno/{document → knowledge}/chunking/recursive.py +15 -3
- agno/knowledge/chunking/row.py +39 -0
- agno/knowledge/chunking/semantic.py +91 -0
- agno/knowledge/chunking/strategy.py +165 -0
- agno/knowledge/content.py +74 -0
- agno/knowledge/document/__init__.py +5 -0
- agno/{document → knowledge/document}/base.py +12 -2
- agno/knowledge/embedder/__init__.py +5 -0
- agno/knowledge/embedder/aws_bedrock.py +343 -0
- agno/knowledge/embedder/azure_openai.py +210 -0
- agno/{embedder → knowledge/embedder}/base.py +8 -0
- agno/knowledge/embedder/cohere.py +323 -0
- agno/knowledge/embedder/fastembed.py +62 -0
- agno/{embedder → knowledge/embedder}/fireworks.py +1 -1
- agno/knowledge/embedder/google.py +258 -0
- agno/knowledge/embedder/huggingface.py +94 -0
- agno/knowledge/embedder/jina.py +182 -0
- agno/knowledge/embedder/langdb.py +22 -0
- agno/knowledge/embedder/mistral.py +206 -0
- agno/knowledge/embedder/nebius.py +13 -0
- agno/knowledge/embedder/ollama.py +154 -0
- agno/knowledge/embedder/openai.py +195 -0
- agno/knowledge/embedder/sentence_transformer.py +63 -0
- agno/{embedder → knowledge/embedder}/together.py +1 -1
- agno/knowledge/embedder/vllm.py +262 -0
- agno/knowledge/embedder/voyageai.py +165 -0
- agno/knowledge/knowledge.py +3006 -0
- agno/knowledge/reader/__init__.py +7 -0
- agno/knowledge/reader/arxiv_reader.py +81 -0
- agno/knowledge/reader/base.py +95 -0
- agno/knowledge/reader/csv_reader.py +164 -0
- agno/knowledge/reader/docx_reader.py +82 -0
- agno/knowledge/reader/field_labeled_csv_reader.py +290 -0
- agno/knowledge/reader/firecrawl_reader.py +201 -0
- agno/knowledge/reader/json_reader.py +88 -0
- agno/knowledge/reader/markdown_reader.py +137 -0
- agno/knowledge/reader/pdf_reader.py +431 -0
- agno/knowledge/reader/pptx_reader.py +101 -0
- agno/knowledge/reader/reader_factory.py +313 -0
- agno/knowledge/reader/s3_reader.py +89 -0
- agno/knowledge/reader/tavily_reader.py +193 -0
- agno/knowledge/reader/text_reader.py +127 -0
- agno/knowledge/reader/web_search_reader.py +325 -0
- agno/knowledge/reader/website_reader.py +455 -0
- agno/knowledge/reader/wikipedia_reader.py +91 -0
- agno/knowledge/reader/youtube_reader.py +78 -0
- agno/knowledge/remote_content/remote_content.py +88 -0
- agno/knowledge/reranker/__init__.py +3 -0
- agno/{reranker → knowledge/reranker}/base.py +1 -1
- agno/{reranker → knowledge/reranker}/cohere.py +2 -2
- agno/knowledge/reranker/infinity.py +195 -0
- agno/knowledge/reranker/sentence_transformer.py +54 -0
- agno/knowledge/types.py +39 -0
- agno/knowledge/utils.py +234 -0
- agno/media.py +439 -95
- agno/memory/__init__.py +16 -3
- agno/memory/manager.py +1474 -123
- agno/memory/strategies/__init__.py +15 -0
- agno/memory/strategies/base.py +66 -0
- agno/memory/strategies/summarize.py +196 -0
- agno/memory/strategies/types.py +37 -0
- agno/models/aimlapi/__init__.py +5 -0
- agno/models/aimlapi/aimlapi.py +62 -0
- agno/models/anthropic/__init__.py +4 -0
- agno/models/anthropic/claude.py +960 -496
- agno/models/aws/__init__.py +15 -0
- agno/models/aws/bedrock.py +686 -451
- agno/models/aws/claude.py +190 -183
- agno/models/azure/__init__.py +18 -1
- agno/models/azure/ai_foundry.py +489 -0
- agno/models/azure/openai_chat.py +89 -40
- agno/models/base.py +2477 -550
- agno/models/cerebras/__init__.py +12 -0
- agno/models/cerebras/cerebras.py +565 -0
- agno/models/cerebras/cerebras_openai.py +131 -0
- agno/models/cohere/__init__.py +4 -0
- agno/models/cohere/chat.py +306 -492
- agno/models/cometapi/__init__.py +5 -0
- agno/models/cometapi/cometapi.py +74 -0
- agno/models/dashscope/__init__.py +5 -0
- agno/models/dashscope/dashscope.py +90 -0
- agno/models/deepinfra/__init__.py +5 -0
- agno/models/deepinfra/deepinfra.py +45 -0
- agno/models/deepseek/__init__.py +4 -0
- agno/models/deepseek/deepseek.py +110 -9
- agno/models/fireworks/__init__.py +4 -0
- agno/models/fireworks/fireworks.py +19 -22
- agno/models/google/__init__.py +3 -7
- agno/models/google/gemini.py +1717 -662
- agno/models/google/utils.py +22 -0
- agno/models/groq/__init__.py +4 -0
- agno/models/groq/groq.py +391 -666
- agno/models/huggingface/__init__.py +4 -0
- agno/models/huggingface/huggingface.py +266 -538
- agno/models/ibm/__init__.py +5 -0
- agno/models/ibm/watsonx.py +432 -0
- agno/models/internlm/__init__.py +3 -0
- agno/models/internlm/internlm.py +20 -3
- agno/models/langdb/__init__.py +1 -0
- agno/models/langdb/langdb.py +60 -0
- agno/models/litellm/__init__.py +14 -0
- agno/models/litellm/chat.py +503 -0
- agno/models/litellm/litellm_openai.py +42 -0
- agno/models/llama_cpp/__init__.py +5 -0
- agno/models/llama_cpp/llama_cpp.py +22 -0
- agno/models/lmstudio/__init__.py +5 -0
- agno/models/lmstudio/lmstudio.py +25 -0
- agno/models/message.py +361 -39
- agno/models/meta/__init__.py +12 -0
- agno/models/meta/llama.py +502 -0
- agno/models/meta/llama_openai.py +79 -0
- agno/models/metrics.py +120 -0
- agno/models/mistral/__init__.py +4 -0
- agno/models/mistral/mistral.py +293 -393
- agno/models/nebius/__init__.py +3 -0
- agno/models/nebius/nebius.py +53 -0
- agno/models/nexus/__init__.py +3 -0
- agno/models/nexus/nexus.py +22 -0
- agno/models/nvidia/__init__.py +4 -0
- agno/models/nvidia/nvidia.py +22 -3
- agno/models/ollama/__init__.py +4 -2
- agno/models/ollama/chat.py +257 -492
- agno/models/openai/__init__.py +7 -0
- agno/models/openai/chat.py +725 -770
- agno/models/openai/like.py +16 -2
- agno/models/openai/responses.py +1121 -0
- agno/models/openrouter/__init__.py +4 -0
- agno/models/openrouter/openrouter.py +62 -5
- agno/models/perplexity/__init__.py +5 -0
- agno/models/perplexity/perplexity.py +203 -0
- agno/models/portkey/__init__.py +3 -0
- agno/models/portkey/portkey.py +82 -0
- agno/models/requesty/__init__.py +5 -0
- agno/models/requesty/requesty.py +69 -0
- agno/models/response.py +177 -7
- agno/models/sambanova/__init__.py +4 -0
- agno/models/sambanova/sambanova.py +23 -4
- agno/models/siliconflow/__init__.py +5 -0
- agno/models/siliconflow/siliconflow.py +42 -0
- agno/models/together/__init__.py +4 -0
- agno/models/together/together.py +21 -164
- agno/models/utils.py +266 -0
- agno/models/vercel/__init__.py +3 -0
- agno/models/vercel/v0.py +43 -0
- agno/models/vertexai/__init__.py +0 -1
- agno/models/vertexai/claude.py +190 -0
- agno/models/vllm/__init__.py +3 -0
- agno/models/vllm/vllm.py +83 -0
- agno/models/xai/__init__.py +2 -0
- agno/models/xai/xai.py +111 -7
- agno/os/__init__.py +3 -0
- agno/os/app.py +1027 -0
- agno/os/auth.py +244 -0
- agno/os/config.py +126 -0
- agno/os/interfaces/__init__.py +1 -0
- agno/os/interfaces/a2a/__init__.py +3 -0
- agno/os/interfaces/a2a/a2a.py +42 -0
- agno/os/interfaces/a2a/router.py +249 -0
- agno/os/interfaces/a2a/utils.py +924 -0
- agno/os/interfaces/agui/__init__.py +3 -0
- agno/os/interfaces/agui/agui.py +47 -0
- agno/os/interfaces/agui/router.py +147 -0
- agno/os/interfaces/agui/utils.py +574 -0
- agno/os/interfaces/base.py +25 -0
- agno/os/interfaces/slack/__init__.py +3 -0
- agno/os/interfaces/slack/router.py +148 -0
- agno/os/interfaces/slack/security.py +30 -0
- agno/os/interfaces/slack/slack.py +47 -0
- agno/os/interfaces/whatsapp/__init__.py +3 -0
- agno/os/interfaces/whatsapp/router.py +210 -0
- agno/os/interfaces/whatsapp/security.py +55 -0
- agno/os/interfaces/whatsapp/whatsapp.py +36 -0
- agno/os/mcp.py +293 -0
- agno/os/middleware/__init__.py +9 -0
- agno/os/middleware/jwt.py +797 -0
- agno/os/router.py +258 -0
- agno/os/routers/__init__.py +3 -0
- agno/os/routers/agents/__init__.py +3 -0
- agno/os/routers/agents/router.py +599 -0
- agno/os/routers/agents/schema.py +261 -0
- agno/os/routers/evals/__init__.py +3 -0
- agno/os/routers/evals/evals.py +450 -0
- agno/os/routers/evals/schemas.py +174 -0
- agno/os/routers/evals/utils.py +231 -0
- agno/os/routers/health.py +31 -0
- agno/os/routers/home.py +52 -0
- agno/os/routers/knowledge/__init__.py +3 -0
- agno/os/routers/knowledge/knowledge.py +1008 -0
- agno/os/routers/knowledge/schemas.py +178 -0
- agno/os/routers/memory/__init__.py +3 -0
- agno/os/routers/memory/memory.py +661 -0
- agno/os/routers/memory/schemas.py +88 -0
- agno/os/routers/metrics/__init__.py +3 -0
- agno/os/routers/metrics/metrics.py +190 -0
- agno/os/routers/metrics/schemas.py +47 -0
- agno/os/routers/session/__init__.py +3 -0
- agno/os/routers/session/session.py +997 -0
- agno/os/routers/teams/__init__.py +3 -0
- agno/os/routers/teams/router.py +512 -0
- agno/os/routers/teams/schema.py +257 -0
- agno/os/routers/traces/__init__.py +3 -0
- agno/os/routers/traces/schemas.py +414 -0
- agno/os/routers/traces/traces.py +499 -0
- agno/os/routers/workflows/__init__.py +3 -0
- agno/os/routers/workflows/router.py +624 -0
- agno/os/routers/workflows/schema.py +75 -0
- agno/os/schema.py +534 -0
- agno/os/scopes.py +469 -0
- agno/{playground → os}/settings.py +7 -15
- agno/os/utils.py +973 -0
- agno/reasoning/anthropic.py +80 -0
- agno/reasoning/azure_ai_foundry.py +67 -0
- agno/reasoning/deepseek.py +63 -0
- agno/reasoning/default.py +97 -0
- agno/reasoning/gemini.py +73 -0
- agno/reasoning/groq.py +71 -0
- agno/reasoning/helpers.py +24 -1
- agno/reasoning/ollama.py +67 -0
- agno/reasoning/openai.py +86 -0
- agno/reasoning/step.py +2 -1
- agno/reasoning/vertexai.py +76 -0
- agno/run/__init__.py +6 -0
- agno/run/agent.py +822 -0
- agno/run/base.py +247 -0
- agno/run/cancel.py +81 -0
- agno/run/requirement.py +181 -0
- agno/run/team.py +767 -0
- agno/run/workflow.py +708 -0
- agno/session/__init__.py +10 -0
- agno/session/agent.py +260 -0
- agno/session/summary.py +265 -0
- agno/session/team.py +342 -0
- agno/session/workflow.py +501 -0
- agno/table.py +10 -0
- agno/team/__init__.py +37 -0
- agno/team/team.py +9536 -0
- agno/tools/__init__.py +7 -0
- agno/tools/agentql.py +120 -0
- agno/tools/airflow.py +22 -12
- agno/tools/api.py +122 -0
- agno/tools/apify.py +276 -83
- agno/tools/{arxiv_toolkit.py → arxiv.py} +20 -12
- agno/tools/aws_lambda.py +28 -7
- agno/tools/aws_ses.py +66 -0
- agno/tools/baidusearch.py +11 -4
- agno/tools/bitbucket.py +292 -0
- agno/tools/brandfetch.py +213 -0
- agno/tools/bravesearch.py +106 -0
- agno/tools/brightdata.py +367 -0
- agno/tools/browserbase.py +209 -0
- agno/tools/calcom.py +32 -23
- agno/tools/calculator.py +24 -37
- agno/tools/cartesia.py +187 -0
- agno/tools/{clickup_tool.py → clickup.py} +17 -28
- agno/tools/confluence.py +91 -26
- agno/tools/crawl4ai.py +139 -43
- agno/tools/csv_toolkit.py +28 -22
- agno/tools/dalle.py +36 -22
- agno/tools/daytona.py +475 -0
- agno/tools/decorator.py +169 -14
- agno/tools/desi_vocal.py +23 -11
- agno/tools/discord.py +32 -29
- agno/tools/docker.py +716 -0
- agno/tools/duckdb.py +76 -81
- agno/tools/duckduckgo.py +43 -40
- agno/tools/e2b.py +703 -0
- agno/tools/eleven_labs.py +65 -54
- agno/tools/email.py +13 -5
- agno/tools/evm.py +129 -0
- agno/tools/exa.py +324 -42
- agno/tools/fal.py +39 -35
- agno/tools/file.py +196 -30
- agno/tools/file_generation.py +356 -0
- agno/tools/financial_datasets.py +288 -0
- agno/tools/firecrawl.py +108 -33
- agno/tools/function.py +960 -122
- agno/tools/giphy.py +34 -12
- agno/tools/github.py +1294 -97
- agno/tools/gmail.py +922 -0
- agno/tools/google_bigquery.py +117 -0
- agno/tools/google_drive.py +271 -0
- agno/tools/google_maps.py +253 -0
- agno/tools/googlecalendar.py +607 -107
- agno/tools/googlesheets.py +377 -0
- agno/tools/hackernews.py +20 -12
- agno/tools/jina.py +24 -14
- agno/tools/jira.py +48 -19
- agno/tools/knowledge.py +218 -0
- agno/tools/linear.py +82 -43
- agno/tools/linkup.py +58 -0
- agno/tools/local_file_system.py +15 -7
- agno/tools/lumalab.py +41 -26
- agno/tools/mcp/__init__.py +10 -0
- agno/tools/mcp/mcp.py +331 -0
- agno/tools/mcp/multi_mcp.py +347 -0
- agno/tools/mcp/params.py +24 -0
- agno/tools/mcp_toolbox.py +284 -0
- agno/tools/mem0.py +193 -0
- agno/tools/memory.py +419 -0
- agno/tools/mlx_transcribe.py +11 -9
- agno/tools/models/azure_openai.py +190 -0
- agno/tools/models/gemini.py +203 -0
- agno/tools/models/groq.py +158 -0
- agno/tools/models/morph.py +186 -0
- agno/tools/models/nebius.py +124 -0
- agno/tools/models_labs.py +163 -82
- agno/tools/moviepy_video.py +18 -13
- agno/tools/nano_banana.py +151 -0
- agno/tools/neo4j.py +134 -0
- agno/tools/newspaper.py +15 -4
- agno/tools/newspaper4k.py +19 -6
- agno/tools/notion.py +204 -0
- agno/tools/openai.py +181 -17
- agno/tools/openbb.py +27 -20
- agno/tools/opencv.py +321 -0
- agno/tools/openweather.py +233 -0
- agno/tools/oxylabs.py +385 -0
- agno/tools/pandas.py +25 -15
- agno/tools/parallel.py +314 -0
- agno/tools/postgres.py +238 -185
- agno/tools/pubmed.py +125 -13
- agno/tools/python.py +48 -35
- agno/tools/reasoning.py +283 -0
- agno/tools/reddit.py +207 -29
- agno/tools/redshift.py +406 -0
- agno/tools/replicate.py +69 -26
- agno/tools/resend.py +11 -6
- agno/tools/scrapegraph.py +179 -19
- agno/tools/searxng.py +23 -31
- agno/tools/serpapi.py +15 -10
- agno/tools/serper.py +255 -0
- agno/tools/shell.py +23 -12
- agno/tools/shopify.py +1519 -0
- agno/tools/slack.py +56 -14
- agno/tools/sleep.py +8 -6
- agno/tools/spider.py +35 -11
- agno/tools/spotify.py +919 -0
- agno/tools/sql.py +34 -19
- agno/tools/tavily.py +158 -8
- agno/tools/telegram.py +18 -8
- agno/tools/todoist.py +218 -0
- agno/tools/toolkit.py +134 -9
- agno/tools/trafilatura.py +388 -0
- agno/tools/trello.py +25 -28
- agno/tools/twilio.py +18 -9
- agno/tools/user_control_flow.py +78 -0
- agno/tools/valyu.py +228 -0
- agno/tools/visualization.py +467 -0
- agno/tools/webbrowser.py +28 -0
- agno/tools/webex.py +76 -0
- agno/tools/website.py +23 -19
- agno/tools/webtools.py +45 -0
- agno/tools/whatsapp.py +286 -0
- agno/tools/wikipedia.py +28 -19
- agno/tools/workflow.py +285 -0
- agno/tools/{twitter.py → x.py} +142 -46
- agno/tools/yfinance.py +41 -39
- agno/tools/youtube.py +34 -17
- agno/tools/zendesk.py +15 -5
- agno/tools/zep.py +454 -0
- agno/tools/zoom.py +86 -37
- agno/tracing/__init__.py +12 -0
- agno/tracing/exporter.py +157 -0
- agno/tracing/schemas.py +276 -0
- agno/tracing/setup.py +111 -0
- agno/utils/agent.py +938 -0
- agno/utils/audio.py +37 -1
- agno/utils/certs.py +27 -0
- agno/utils/code_execution.py +11 -0
- agno/utils/common.py +103 -20
- agno/utils/cryptography.py +22 -0
- agno/utils/dttm.py +33 -0
- agno/utils/events.py +700 -0
- agno/utils/functions.py +107 -37
- agno/utils/gemini.py +426 -0
- agno/utils/hooks.py +171 -0
- agno/utils/http.py +185 -0
- agno/utils/json_schema.py +159 -37
- agno/utils/knowledge.py +36 -0
- agno/utils/location.py +19 -0
- agno/utils/log.py +221 -8
- agno/utils/mcp.py +214 -0
- agno/utils/media.py +335 -14
- agno/utils/merge_dict.py +22 -1
- agno/utils/message.py +77 -2
- agno/utils/models/ai_foundry.py +50 -0
- agno/utils/models/claude.py +373 -0
- agno/utils/models/cohere.py +94 -0
- agno/utils/models/llama.py +85 -0
- agno/utils/models/mistral.py +100 -0
- agno/utils/models/openai_responses.py +140 -0
- agno/utils/models/schema_utils.py +153 -0
- agno/utils/models/watsonx.py +41 -0
- agno/utils/openai.py +257 -0
- agno/utils/pickle.py +1 -1
- agno/utils/pprint.py +124 -8
- agno/utils/print_response/agent.py +930 -0
- agno/utils/print_response/team.py +1914 -0
- agno/utils/print_response/workflow.py +1668 -0
- agno/utils/prompts.py +111 -0
- agno/utils/reasoning.py +108 -0
- agno/utils/response.py +163 -0
- agno/utils/serialize.py +32 -0
- agno/utils/shell.py +4 -4
- agno/utils/streamlit.py +487 -0
- agno/utils/string.py +204 -51
- agno/utils/team.py +139 -0
- agno/utils/timer.py +9 -2
- agno/utils/tokens.py +657 -0
- agno/utils/tools.py +19 -1
- agno/utils/whatsapp.py +305 -0
- agno/utils/yaml_io.py +3 -3
- agno/vectordb/__init__.py +2 -0
- agno/vectordb/base.py +87 -9
- agno/vectordb/cassandra/__init__.py +5 -1
- agno/vectordb/cassandra/cassandra.py +383 -27
- agno/vectordb/chroma/__init__.py +4 -0
- agno/vectordb/chroma/chromadb.py +748 -83
- agno/vectordb/clickhouse/__init__.py +7 -1
- agno/vectordb/clickhouse/clickhousedb.py +554 -53
- agno/vectordb/couchbase/__init__.py +3 -0
- agno/vectordb/couchbase/couchbase.py +1446 -0
- agno/vectordb/lancedb/__init__.py +5 -0
- agno/vectordb/lancedb/lance_db.py +730 -98
- agno/vectordb/langchaindb/__init__.py +5 -0
- agno/vectordb/langchaindb/langchaindb.py +163 -0
- agno/vectordb/lightrag/__init__.py +5 -0
- agno/vectordb/lightrag/lightrag.py +388 -0
- agno/vectordb/llamaindex/__init__.py +3 -0
- agno/vectordb/llamaindex/llamaindexdb.py +166 -0
- agno/vectordb/milvus/__init__.py +3 -0
- agno/vectordb/milvus/milvus.py +966 -78
- agno/vectordb/mongodb/__init__.py +9 -1
- agno/vectordb/mongodb/mongodb.py +1175 -172
- agno/vectordb/pgvector/__init__.py +8 -0
- agno/vectordb/pgvector/pgvector.py +599 -115
- agno/vectordb/pineconedb/__init__.py +5 -1
- agno/vectordb/pineconedb/pineconedb.py +406 -43
- agno/vectordb/qdrant/__init__.py +4 -0
- agno/vectordb/qdrant/qdrant.py +914 -61
- agno/vectordb/redis/__init__.py +9 -0
- agno/vectordb/redis/redisdb.py +682 -0
- agno/vectordb/singlestore/__init__.py +8 -1
- agno/vectordb/singlestore/singlestore.py +771 -0
- agno/vectordb/surrealdb/__init__.py +3 -0
- agno/vectordb/surrealdb/surrealdb.py +663 -0
- agno/vectordb/upstashdb/__init__.py +5 -0
- agno/vectordb/upstashdb/upstashdb.py +718 -0
- agno/vectordb/weaviate/__init__.py +8 -0
- agno/vectordb/weaviate/index.py +15 -0
- agno/vectordb/weaviate/weaviate.py +1009 -0
- agno/workflow/__init__.py +23 -1
- agno/workflow/agent.py +299 -0
- agno/workflow/condition.py +759 -0
- agno/workflow/loop.py +756 -0
- agno/workflow/parallel.py +853 -0
- agno/workflow/router.py +723 -0
- agno/workflow/step.py +1564 -0
- agno/workflow/steps.py +613 -0
- agno/workflow/types.py +556 -0
- agno/workflow/workflow.py +4327 -514
- agno-2.3.13.dist-info/METADATA +639 -0
- agno-2.3.13.dist-info/RECORD +613 -0
- {agno-0.1.2.dist-info → agno-2.3.13.dist-info}/WHEEL +1 -1
- agno-2.3.13.dist-info/licenses/LICENSE +201 -0
- agno/api/playground.py +0 -91
- agno/api/schemas/playground.py +0 -22
- agno/api/schemas/user.py +0 -22
- agno/api/schemas/workspace.py +0 -46
- agno/api/user.py +0 -160
- agno/api/workspace.py +0 -151
- agno/cli/auth_server.py +0 -118
- agno/cli/config.py +0 -275
- agno/cli/console.py +0 -88
- agno/cli/credentials.py +0 -23
- agno/cli/entrypoint.py +0 -571
- agno/cli/operator.py +0 -355
- agno/cli/settings.py +0 -85
- agno/cli/ws/ws_cli.py +0 -817
- agno/constants.py +0 -13
- agno/document/__init__.py +0 -1
- agno/document/chunking/semantic.py +0 -47
- agno/document/chunking/strategy.py +0 -31
- agno/document/reader/__init__.py +0 -1
- agno/document/reader/arxiv_reader.py +0 -41
- agno/document/reader/base.py +0 -22
- agno/document/reader/csv_reader.py +0 -84
- agno/document/reader/docx_reader.py +0 -46
- agno/document/reader/firecrawl_reader.py +0 -99
- agno/document/reader/json_reader.py +0 -43
- agno/document/reader/pdf_reader.py +0 -219
- agno/document/reader/s3/pdf_reader.py +0 -46
- agno/document/reader/s3/text_reader.py +0 -51
- agno/document/reader/text_reader.py +0 -41
- agno/document/reader/website_reader.py +0 -175
- agno/document/reader/youtube_reader.py +0 -50
- agno/embedder/__init__.py +0 -1
- agno/embedder/azure_openai.py +0 -86
- agno/embedder/cohere.py +0 -72
- agno/embedder/fastembed.py +0 -37
- agno/embedder/google.py +0 -73
- agno/embedder/huggingface.py +0 -54
- agno/embedder/mistral.py +0 -80
- agno/embedder/ollama.py +0 -57
- agno/embedder/openai.py +0 -74
- agno/embedder/sentence_transformer.py +0 -38
- agno/embedder/voyageai.py +0 -64
- agno/eval/perf.py +0 -201
- agno/file/__init__.py +0 -1
- agno/file/file.py +0 -16
- agno/file/local/csv.py +0 -32
- agno/file/local/txt.py +0 -19
- agno/infra/app.py +0 -240
- agno/infra/base.py +0 -144
- agno/infra/context.py +0 -20
- agno/infra/db_app.py +0 -52
- agno/infra/resource.py +0 -205
- agno/infra/resources.py +0 -55
- agno/knowledge/agent.py +0 -230
- agno/knowledge/arxiv.py +0 -22
- agno/knowledge/combined.py +0 -22
- agno/knowledge/csv.py +0 -28
- agno/knowledge/csv_url.py +0 -19
- agno/knowledge/document.py +0 -20
- agno/knowledge/docx.py +0 -30
- agno/knowledge/json.py +0 -28
- agno/knowledge/langchain.py +0 -71
- agno/knowledge/llamaindex.py +0 -66
- agno/knowledge/pdf.py +0 -28
- agno/knowledge/pdf_url.py +0 -26
- agno/knowledge/s3/base.py +0 -60
- agno/knowledge/s3/pdf.py +0 -21
- agno/knowledge/s3/text.py +0 -23
- agno/knowledge/text.py +0 -30
- agno/knowledge/website.py +0 -88
- agno/knowledge/wikipedia.py +0 -31
- agno/knowledge/youtube.py +0 -22
- agno/memory/agent.py +0 -392
- agno/memory/classifier.py +0 -104
- agno/memory/db/__init__.py +0 -1
- agno/memory/db/base.py +0 -42
- agno/memory/db/mongodb.py +0 -189
- agno/memory/db/postgres.py +0 -203
- agno/memory/db/sqlite.py +0 -193
- agno/memory/memory.py +0 -15
- agno/memory/row.py +0 -36
- agno/memory/summarizer.py +0 -192
- agno/memory/summary.py +0 -19
- agno/memory/workflow.py +0 -38
- agno/models/google/gemini_openai.py +0 -26
- agno/models/ollama/hermes.py +0 -221
- agno/models/ollama/tools.py +0 -362
- agno/models/vertexai/gemini.py +0 -595
- agno/playground/__init__.py +0 -3
- agno/playground/async_router.py +0 -421
- agno/playground/deploy.py +0 -249
- agno/playground/operator.py +0 -92
- agno/playground/playground.py +0 -91
- agno/playground/schemas.py +0 -76
- agno/playground/serve.py +0 -55
- agno/playground/sync_router.py +0 -405
- agno/reasoning/agent.py +0 -68
- agno/run/response.py +0 -112
- agno/storage/agent/__init__.py +0 -0
- agno/storage/agent/base.py +0 -38
- agno/storage/agent/dynamodb.py +0 -350
- agno/storage/agent/json.py +0 -92
- agno/storage/agent/mongodb.py +0 -228
- agno/storage/agent/postgres.py +0 -367
- agno/storage/agent/session.py +0 -79
- agno/storage/agent/singlestore.py +0 -303
- agno/storage/agent/sqlite.py +0 -357
- agno/storage/agent/yaml.py +0 -93
- agno/storage/workflow/__init__.py +0 -0
- agno/storage/workflow/base.py +0 -40
- agno/storage/workflow/mongodb.py +0 -233
- agno/storage/workflow/postgres.py +0 -366
- agno/storage/workflow/session.py +0 -60
- agno/storage/workflow/sqlite.py +0 -359
- agno/tools/googlesearch.py +0 -88
- agno/utils/defaults.py +0 -57
- agno/utils/filesystem.py +0 -39
- agno/utils/git.py +0 -52
- agno/utils/json_io.py +0 -30
- agno/utils/load_env.py +0 -19
- agno/utils/py_io.py +0 -19
- agno/utils/pyproject.py +0 -18
- agno/utils/resource_filter.py +0 -31
- agno/vectordb/singlestore/s2vectordb.py +0 -390
- agno/vectordb/singlestore/s2vectordb2.py +0 -355
- agno/workspace/__init__.py +0 -0
- agno/workspace/config.py +0 -325
- agno/workspace/enums.py +0 -6
- agno/workspace/helpers.py +0 -48
- agno/workspace/operator.py +0 -758
- agno/workspace/settings.py +0 -63
- agno-0.1.2.dist-info/LICENSE +0 -375
- agno-0.1.2.dist-info/METADATA +0 -502
- agno-0.1.2.dist-info/RECORD +0 -352
- agno-0.1.2.dist-info/entry_points.txt +0 -3
- /agno/{cli → db/migrations}/__init__.py +0 -0
- /agno/{cli/ws → db/migrations/versions}/__init__.py +0 -0
- /agno/{document/chunking/__init__.py → db/schemas/metrics.py} +0 -0
- /agno/{document/reader/s3 → integrations}/__init__.py +0 -0
- /agno/{file/local → knowledge/chunking}/__init__.py +0 -0
- /agno/{infra → knowledge/remote_content}/__init__.py +0 -0
- /agno/{knowledge/s3 → tools/models}/__init__.py +0 -0
- /agno/{reranker → utils/models}/__init__.py +0 -0
- /agno/{storage → utils/print_response}/__init__.py +0 -0
- {agno-0.1.2.dist-info → agno-2.3.13.dist-info}/top_level.txt +0 -0
agno/vectordb/milvus/milvus.py
CHANGED
|
@@ -1,27 +1,44 @@
|
|
|
1
|
+
import json
|
|
1
2
|
from hashlib import md5
|
|
2
|
-
from typing import Any, Dict, List, Optional
|
|
3
|
+
from typing import Any, Dict, List, Optional, Union
|
|
3
4
|
|
|
4
5
|
try:
|
|
5
|
-
|
|
6
|
+
import asyncio
|
|
7
|
+
|
|
8
|
+
from pymilvus import AsyncMilvusClient, MilvusClient # type: ignore
|
|
6
9
|
except ImportError:
|
|
7
10
|
raise ImportError("The `pymilvus` package is not installed. Please install it via `pip install pymilvus`.")
|
|
8
11
|
|
|
9
|
-
from agno.
|
|
10
|
-
from agno.
|
|
11
|
-
from agno.embedder
|
|
12
|
-
from agno.
|
|
12
|
+
from agno.filters import FilterExpr
|
|
13
|
+
from agno.knowledge.document import Document
|
|
14
|
+
from agno.knowledge.embedder import Embedder
|
|
15
|
+
from agno.knowledge.reranker.base import Reranker
|
|
16
|
+
from agno.utils.log import log_debug, log_error, log_info, log_warning
|
|
13
17
|
from agno.vectordb.base import VectorDb
|
|
14
18
|
from agno.vectordb.distance import Distance
|
|
19
|
+
from agno.vectordb.search import SearchType
|
|
20
|
+
|
|
21
|
+
MILVUS_DISTANCE_MAP = {
|
|
22
|
+
Distance.cosine: "COSINE",
|
|
23
|
+
Distance.l2: "L2",
|
|
24
|
+
Distance.max_inner_product: "IP",
|
|
25
|
+
}
|
|
15
26
|
|
|
16
27
|
|
|
17
28
|
class Milvus(VectorDb):
|
|
18
29
|
def __init__(
|
|
19
30
|
self,
|
|
20
31
|
collection: str,
|
|
21
|
-
|
|
32
|
+
name: Optional[str] = None,
|
|
33
|
+
description: Optional[str] = None,
|
|
34
|
+
id: Optional[str] = None,
|
|
35
|
+
embedder: Optional[Embedder] = None,
|
|
22
36
|
distance: Distance = Distance.cosine,
|
|
23
37
|
uri: str = "http://localhost:19530",
|
|
24
38
|
token: Optional[str] = None,
|
|
39
|
+
search_type: SearchType = SearchType.vector,
|
|
40
|
+
reranker: Optional[Reranker] = None,
|
|
41
|
+
sparse_vector_dimensions: int = 10000,
|
|
25
42
|
**kwargs,
|
|
26
43
|
):
|
|
27
44
|
"""
|
|
@@ -29,6 +46,8 @@ class Milvus(VectorDb):
|
|
|
29
46
|
|
|
30
47
|
Args:
|
|
31
48
|
collection (str): Name of the Milvus collection.
|
|
49
|
+
name (Optional[str]): Name of the vector database.
|
|
50
|
+
description (Optional[str]): Description of the vector database.
|
|
32
51
|
embedder (Embedder): Embedder to use for embedding documents.
|
|
33
52
|
distance (Distance): Distance metric to use for vector similarity.
|
|
34
53
|
uri (Optional[str]): URI of the Milvus server.
|
|
@@ -46,21 +65,48 @@ class Milvus(VectorDb):
|
|
|
46
65
|
[Public Endpoint and API key](https://docs.zilliz.com/docs/on-zilliz-cloud-console#cluster-details)
|
|
47
66
|
in Zilliz Cloud.
|
|
48
67
|
token (Optional[str]): Token for authentication with the Milvus server.
|
|
68
|
+
search_type (SearchType): Type of search to perform (vector, keyword, or hybrid)
|
|
69
|
+
reranker (Optional[Reranker]): Reranker to use for hybrid search results
|
|
49
70
|
**kwargs: Additional keyword arguments to pass to the MilvusClient.
|
|
50
71
|
"""
|
|
72
|
+
# Validate required parameters
|
|
73
|
+
if not collection:
|
|
74
|
+
raise ValueError("Collection name must be provided.")
|
|
75
|
+
|
|
76
|
+
# Dynamic ID generation based on unique identifiers
|
|
77
|
+
if id is None:
|
|
78
|
+
from agno.utils.string import generate_id
|
|
79
|
+
|
|
80
|
+
seed = f"{uri or 'milvus'}#{collection}"
|
|
81
|
+
id = generate_id(seed)
|
|
82
|
+
|
|
83
|
+
# Initialize base class with name, description, and generated ID
|
|
84
|
+
super().__init__(id=id, name=name, description=description)
|
|
85
|
+
|
|
51
86
|
self.collection: str = collection
|
|
87
|
+
|
|
88
|
+
if embedder is None:
|
|
89
|
+
from agno.knowledge.embedder.openai import OpenAIEmbedder
|
|
90
|
+
|
|
91
|
+
embedder = OpenAIEmbedder()
|
|
92
|
+
log_info("Embedder not provided, using OpenAIEmbedder as default.")
|
|
52
93
|
self.embedder: Embedder = embedder
|
|
53
94
|
self.dimensions: Optional[int] = self.embedder.dimensions
|
|
95
|
+
|
|
54
96
|
self.distance: Distance = distance
|
|
55
97
|
self.uri: str = uri
|
|
56
98
|
self.token: Optional[str] = token
|
|
57
99
|
self._client: Optional[MilvusClient] = None
|
|
100
|
+
self._async_client: Optional[AsyncMilvusClient] = None
|
|
101
|
+
self.search_type: SearchType = search_type
|
|
102
|
+
self.reranker: Optional[Reranker] = reranker
|
|
103
|
+
self.sparse_vector_dimensions = sparse_vector_dimensions
|
|
58
104
|
self.kwargs = kwargs
|
|
59
105
|
|
|
60
106
|
@property
|
|
61
107
|
def client(self) -> MilvusClient:
|
|
62
108
|
if self._client is None:
|
|
63
|
-
|
|
109
|
+
log_debug("Creating Milvus Client")
|
|
64
110
|
self._client = MilvusClient(
|
|
65
111
|
uri=self.uri,
|
|
66
112
|
token=self.token,
|
|
@@ -68,39 +114,210 @@ class Milvus(VectorDb):
|
|
|
68
114
|
)
|
|
69
115
|
return self._client
|
|
70
116
|
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
if self.
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
logger.debug(f"Creating collection: {self.collection}")
|
|
80
|
-
self.client.create_collection(
|
|
81
|
-
collection_name=self.collection,
|
|
82
|
-
dimension=self.dimensions,
|
|
83
|
-
metric_type=_distance,
|
|
84
|
-
id_type="string",
|
|
85
|
-
max_length=65_535,
|
|
117
|
+
@property
|
|
118
|
+
def async_client(self) -> AsyncMilvusClient:
|
|
119
|
+
if not hasattr(self, "_async_client") or self._async_client is None:
|
|
120
|
+
log_debug("Creating Async Milvus Client")
|
|
121
|
+
self._async_client = AsyncMilvusClient(
|
|
122
|
+
uri=self.uri,
|
|
123
|
+
token=self.token,
|
|
124
|
+
**self.kwargs,
|
|
86
125
|
)
|
|
126
|
+
return self._async_client
|
|
87
127
|
|
|
88
|
-
def
|
|
128
|
+
def _get_sparse_vector(self, text: str) -> Dict[int, float]:
|
|
89
129
|
"""
|
|
90
|
-
|
|
130
|
+
Convert text into a sparse vector representation using a simple TF-IDF-like scoring.
|
|
131
|
+
|
|
132
|
+
This method creates a sparse vector by:
|
|
133
|
+
1. Converting text to lowercase and splitting into words
|
|
134
|
+
2. Computing word frequencies
|
|
135
|
+
3. Creating a hash-based word ID (modulo 10000)
|
|
136
|
+
4. Computing a TF-IDF-like score for each word
|
|
91
137
|
|
|
92
138
|
Args:
|
|
93
|
-
|
|
139
|
+
text: Input text to convert to sparse vector
|
|
140
|
+
|
|
141
|
+
Returns:
|
|
142
|
+
Dictionary mapping word IDs (int) to their TF-IDF-like scores (float)
|
|
94
143
|
"""
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
144
|
+
from collections import Counter
|
|
145
|
+
|
|
146
|
+
import numpy as np
|
|
147
|
+
|
|
148
|
+
# Simple word-based sparse vector creation
|
|
149
|
+
words = text.lower().split()
|
|
150
|
+
word_counts = Counter(words)
|
|
151
|
+
|
|
152
|
+
# Create sparse vector (word_id: tf-idf_score)
|
|
153
|
+
sparse_vector = {}
|
|
154
|
+
for word, count in word_counts.items():
|
|
155
|
+
word_id = hash(word) % self.sparse_vector_dimensions
|
|
156
|
+
# Simple tf-idf-like score
|
|
157
|
+
score = count * np.log(1 + len(words))
|
|
158
|
+
sparse_vector[word_id] = float(score)
|
|
159
|
+
|
|
160
|
+
return sparse_vector
|
|
161
|
+
|
|
162
|
+
def _create_hybrid_schema(self) -> Any:
|
|
163
|
+
"""Create a schema for hybrid collection with all necessary fields."""
|
|
164
|
+
from pymilvus import DataType
|
|
165
|
+
|
|
166
|
+
schema = MilvusClient.create_schema(
|
|
167
|
+
auto_id=False,
|
|
168
|
+
enable_dynamic_field=True,
|
|
169
|
+
)
|
|
170
|
+
|
|
171
|
+
# Define field configurations
|
|
172
|
+
fields = [
|
|
173
|
+
("id", DataType.VARCHAR, 128, True), # (name, type, max_length, is_primary)
|
|
174
|
+
("name", DataType.VARCHAR, 1000, False),
|
|
175
|
+
("content", DataType.VARCHAR, 65535, False),
|
|
176
|
+
("content_id", DataType.VARCHAR, 1000, False),
|
|
177
|
+
("content_hash", DataType.VARCHAR, 1000, False),
|
|
178
|
+
("text", DataType.VARCHAR, 1000, False),
|
|
179
|
+
("meta_data", DataType.VARCHAR, 65535, False),
|
|
180
|
+
("usage", DataType.VARCHAR, 65535, False),
|
|
181
|
+
]
|
|
182
|
+
|
|
183
|
+
# Add VARCHAR fields
|
|
184
|
+
for field_name, datatype, max_length, is_primary in fields:
|
|
185
|
+
schema.add_field(field_name=field_name, datatype=datatype, max_length=max_length, is_primary=is_primary)
|
|
186
|
+
|
|
187
|
+
# Add vector fields
|
|
188
|
+
schema.add_field(field_name="dense_vector", datatype=DataType.FLOAT_VECTOR, dim=self.dimensions)
|
|
189
|
+
schema.add_field(field_name="sparse_vector", datatype=DataType.SPARSE_FLOAT_VECTOR)
|
|
190
|
+
|
|
191
|
+
return schema
|
|
192
|
+
|
|
193
|
+
def _prepare_hybrid_index_params(self) -> Any:
|
|
194
|
+
"""Prepare index parameters for both dense and sparse vectors."""
|
|
195
|
+
index_params = self.client.prepare_index_params()
|
|
196
|
+
|
|
197
|
+
# Add indexes for both vector types
|
|
198
|
+
index_params.add_index(
|
|
199
|
+
field_name="dense_vector",
|
|
200
|
+
index_name="dense_index",
|
|
201
|
+
index_type="IVF_FLAT",
|
|
202
|
+
metric_type=self._get_metric_type(),
|
|
203
|
+
params={"nlist": 1024},
|
|
204
|
+
)
|
|
205
|
+
|
|
206
|
+
index_params.add_index(
|
|
207
|
+
field_name="sparse_vector",
|
|
208
|
+
index_name="sparse_index",
|
|
209
|
+
index_type="SPARSE_INVERTED_INDEX",
|
|
210
|
+
metric_type="IP",
|
|
211
|
+
params={"drop_ratio_build": 0.2},
|
|
212
|
+
)
|
|
213
|
+
|
|
214
|
+
return index_params
|
|
215
|
+
|
|
216
|
+
def _prepare_document_data(
|
|
217
|
+
self, content_hash: str, document: Document, include_vectors: bool = True
|
|
218
|
+
) -> Dict[str, Union[str, List[float], Dict[int, float], None]]:
|
|
219
|
+
"""
|
|
220
|
+
Prepare document data for insertion.
|
|
221
|
+
|
|
222
|
+
Args:
|
|
223
|
+
document: Document to prepare data for
|
|
224
|
+
include_vectors: Whether to include vector data
|
|
225
|
+
|
|
226
|
+
Returns:
|
|
227
|
+
Dictionary with document data where values can be strings, vectors (List[float]),
|
|
228
|
+
sparse vectors (Dict[int, float]), or None
|
|
229
|
+
"""
|
|
230
|
+
|
|
231
|
+
cleaned_content = document.content.replace("\x00", "\ufffd")
|
|
232
|
+
# Include content_hash in ID to ensure uniqueness across different content hashes
|
|
233
|
+
base_id = document.id or md5(cleaned_content.encode()).hexdigest()
|
|
234
|
+
doc_id = md5(f"{base_id}_{content_hash}".encode()).hexdigest()
|
|
235
|
+
|
|
236
|
+
# Convert dictionary fields to JSON strings
|
|
237
|
+
meta_data_str = json.dumps(document.meta_data) if document.meta_data else "{}"
|
|
238
|
+
usage_str = json.dumps(document.usage) if document.usage else "{}"
|
|
239
|
+
|
|
240
|
+
data: Dict[str, Union[str, List[float], Dict[int, float], None]] = {
|
|
241
|
+
"id": doc_id,
|
|
242
|
+
"text": cleaned_content,
|
|
243
|
+
"name": document.name,
|
|
244
|
+
"content_id": document.content_id,
|
|
245
|
+
"meta_data": meta_data_str,
|
|
246
|
+
"content": cleaned_content,
|
|
247
|
+
"usage": usage_str,
|
|
248
|
+
"content_hash": content_hash,
|
|
249
|
+
}
|
|
250
|
+
|
|
251
|
+
if include_vectors:
|
|
252
|
+
if self.search_type == SearchType.hybrid:
|
|
253
|
+
data.update(
|
|
254
|
+
{
|
|
255
|
+
"dense_vector": document.embedding, # List[float] or None # Dict[int, float]
|
|
256
|
+
"sparse_vector": self._get_sparse_vector(cleaned_content),
|
|
257
|
+
}
|
|
258
|
+
)
|
|
259
|
+
else:
|
|
260
|
+
vector_data: Optional[List[float]] = document.embedding
|
|
261
|
+
data["vector"] = vector_data
|
|
262
|
+
|
|
263
|
+
return data
|
|
264
|
+
|
|
265
|
+
def _create_hybrid_collection(self) -> None:
|
|
266
|
+
"""Create a collection specifically for hybrid search."""
|
|
267
|
+
log_debug(f"Creating hybrid collection: {self.collection}")
|
|
268
|
+
|
|
269
|
+
schema = self._create_hybrid_schema()
|
|
270
|
+
index_params = self._prepare_hybrid_index_params()
|
|
271
|
+
|
|
272
|
+
self.client.create_collection(collection_name=self.collection, schema=schema, index_params=index_params)
|
|
273
|
+
|
|
274
|
+
async def _async_create_hybrid_collection(self) -> None:
|
|
275
|
+
"""Create a hybrid collection asynchronously."""
|
|
276
|
+
log_debug(f"Creating hybrid collection asynchronously: {self.collection}")
|
|
277
|
+
|
|
278
|
+
schema = self._create_hybrid_schema()
|
|
279
|
+
index_params = self._prepare_hybrid_index_params()
|
|
280
|
+
|
|
281
|
+
await self.async_client.create_collection(
|
|
282
|
+
collection_name=self.collection, schema=schema, index_params=index_params
|
|
283
|
+
)
|
|
284
|
+
|
|
285
|
+
def create(self) -> None:
|
|
286
|
+
"""Create a collection based on search type if it doesn't exist."""
|
|
287
|
+
if self.exists():
|
|
288
|
+
return
|
|
289
|
+
|
|
290
|
+
if self.search_type == SearchType.hybrid:
|
|
291
|
+
self._create_hybrid_collection()
|
|
292
|
+
return
|
|
293
|
+
|
|
294
|
+
_distance = self._get_metric_type()
|
|
295
|
+
log_debug(f"Creating collection: {self.collection}")
|
|
296
|
+
self.client.create_collection(
|
|
297
|
+
collection_name=self.collection,
|
|
298
|
+
dimension=self.dimensions,
|
|
299
|
+
metric_type=_distance,
|
|
300
|
+
id_type="string",
|
|
301
|
+
max_length=65_535,
|
|
302
|
+
)
|
|
303
|
+
|
|
304
|
+
async def async_create(self) -> None:
|
|
305
|
+
"""Create collection asynchronously based on search type."""
|
|
306
|
+
# Use the synchronous client to check if collection exists
|
|
307
|
+
if not self.client.has_collection(self.collection):
|
|
308
|
+
if self.search_type == SearchType.hybrid:
|
|
309
|
+
await self._async_create_hybrid_collection()
|
|
310
|
+
else:
|
|
311
|
+
# Original async create logic for regular vector search
|
|
312
|
+
_distance = self._get_metric_type()
|
|
313
|
+
log_debug(f"Creating collection asynchronously: {self.collection}")
|
|
314
|
+
await self.async_client.create_collection(
|
|
315
|
+
collection_name=self.collection,
|
|
316
|
+
dimension=self.dimensions,
|
|
317
|
+
metric_type=_distance,
|
|
318
|
+
id_type="string",
|
|
319
|
+
max_length=65_535,
|
|
320
|
+
)
|
|
104
321
|
|
|
105
322
|
def name_exists(self, name: str) -> bool:
|
|
106
323
|
"""
|
|
@@ -119,7 +336,7 @@ class Milvus(VectorDb):
|
|
|
119
336
|
filter=expr,
|
|
120
337
|
limit=1,
|
|
121
338
|
)
|
|
122
|
-
return len(scroll_result[0]) > 0
|
|
339
|
+
return len(scroll_result) > 0 and len(scroll_result[0]) > 0
|
|
123
340
|
return False
|
|
124
341
|
|
|
125
342
|
def id_exists(self, id: str) -> bool:
|
|
@@ -131,33 +348,186 @@ class Milvus(VectorDb):
|
|
|
131
348
|
return len(collection_points) > 0
|
|
132
349
|
return False
|
|
133
350
|
|
|
134
|
-
def
|
|
351
|
+
def content_hash_exists(self, content_hash: str) -> bool:
|
|
135
352
|
"""
|
|
136
|
-
|
|
353
|
+
Check if a document with the given content hash exists.
|
|
137
354
|
|
|
138
355
|
Args:
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
356
|
+
content_hash (str): The content hash to check.
|
|
357
|
+
|
|
358
|
+
Returns:
|
|
359
|
+
bool: True if a document with the given content hash exists, False otherwise.
|
|
142
360
|
"""
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
cleaned_content = document.content.replace("\x00", "\ufffd")
|
|
147
|
-
doc_id = md5(cleaned_content.encode()).hexdigest()
|
|
148
|
-
data = {
|
|
149
|
-
"id": doc_id,
|
|
150
|
-
"vector": document.embedding,
|
|
151
|
-
"name": document.name,
|
|
152
|
-
"meta_data": document.meta_data,
|
|
153
|
-
"content": cleaned_content,
|
|
154
|
-
"usage": document.usage,
|
|
155
|
-
}
|
|
156
|
-
self.client.insert(
|
|
361
|
+
if self.client:
|
|
362
|
+
expr = f'content_hash == "{content_hash}"'
|
|
363
|
+
scroll_result = self.client.query(
|
|
157
364
|
collection_name=self.collection,
|
|
158
|
-
|
|
365
|
+
filter=expr,
|
|
366
|
+
limit=1,
|
|
159
367
|
)
|
|
160
|
-
|
|
368
|
+
return len(scroll_result) > 0 and len(scroll_result[0]) > 0
|
|
369
|
+
return False
|
|
370
|
+
|
|
371
|
+
def _delete_by_content_hash(self, content_hash: str) -> bool:
|
|
372
|
+
"""
|
|
373
|
+
Delete documents by content hash.
|
|
374
|
+
|
|
375
|
+
Args:
|
|
376
|
+
content_hash (str): The content hash to delete.
|
|
377
|
+
|
|
378
|
+
Returns:
|
|
379
|
+
bool: True if documents were deleted, False otherwise.
|
|
380
|
+
"""
|
|
381
|
+
if self.client:
|
|
382
|
+
expr = f'content_hash == "{content_hash}"'
|
|
383
|
+
self.client.delete(collection_name=self.collection, filter=expr)
|
|
384
|
+
log_info(f"Deleted documents with content_hash '{content_hash}' from collection '{self.collection}'.")
|
|
385
|
+
return True
|
|
386
|
+
return False
|
|
387
|
+
|
|
388
|
+
def _insert_hybrid_document(self, content_hash: str, document: Document) -> None:
|
|
389
|
+
"""Insert a document with both dense and sparse vectors."""
|
|
390
|
+
data = self._prepare_document_data(content_hash=content_hash, document=document, include_vectors=True)
|
|
391
|
+
document.embed(embedder=self.embedder)
|
|
392
|
+
self.client.insert(
|
|
393
|
+
collection_name=self.collection,
|
|
394
|
+
data=data,
|
|
395
|
+
)
|
|
396
|
+
log_debug(f"Inserted hybrid document: {document.name} ({document.meta_data})")
|
|
397
|
+
|
|
398
|
+
async def _async_insert_hybrid_document(self, content_hash: str, document: Document) -> None:
|
|
399
|
+
"""Insert a document with both dense and sparse vectors asynchronously."""
|
|
400
|
+
data = self._prepare_document_data(content_hash=content_hash, document=document, include_vectors=True)
|
|
401
|
+
|
|
402
|
+
await self.async_client.insert(
|
|
403
|
+
collection_name=self.collection,
|
|
404
|
+
data=data,
|
|
405
|
+
)
|
|
406
|
+
log_debug(f"Inserted hybrid document asynchronously: {document.name} ({document.meta_data})")
|
|
407
|
+
|
|
408
|
+
def insert(self, content_hash: str, documents: List[Document], filters: Optional[Dict[str, Any]] = None) -> None:
|
|
409
|
+
"""Insert documents based on search type."""
|
|
410
|
+
log_debug(f"Inserting {len(documents)} documents")
|
|
411
|
+
|
|
412
|
+
if self.search_type == SearchType.hybrid:
|
|
413
|
+
for document in documents:
|
|
414
|
+
self._insert_hybrid_document(content_hash=content_hash, document=document)
|
|
415
|
+
else:
|
|
416
|
+
for document in documents:
|
|
417
|
+
document.embed(embedder=self.embedder)
|
|
418
|
+
if not document.embedding:
|
|
419
|
+
log_debug(f"Skipping document without embedding: {document.name} ({document.meta_data})")
|
|
420
|
+
continue
|
|
421
|
+
cleaned_content = document.content.replace("\x00", "\ufffd")
|
|
422
|
+
doc_id = md5(cleaned_content.encode()).hexdigest()
|
|
423
|
+
|
|
424
|
+
meta_data = document.meta_data or {}
|
|
425
|
+
if filters:
|
|
426
|
+
meta_data.update(filters)
|
|
427
|
+
|
|
428
|
+
data = {
|
|
429
|
+
"id": doc_id,
|
|
430
|
+
"vector": document.embedding,
|
|
431
|
+
"name": document.name,
|
|
432
|
+
"content_id": document.content_id,
|
|
433
|
+
"meta_data": meta_data,
|
|
434
|
+
"content": cleaned_content,
|
|
435
|
+
"usage": document.usage,
|
|
436
|
+
"content_hash": content_hash,
|
|
437
|
+
}
|
|
438
|
+
self.client.insert(
|
|
439
|
+
collection_name=self.collection,
|
|
440
|
+
data=data,
|
|
441
|
+
)
|
|
442
|
+
log_debug(f"Inserted document: {document.name} ({meta_data})")
|
|
443
|
+
|
|
444
|
+
log_info(f"Inserted {len(documents)} documents")
|
|
445
|
+
|
|
446
|
+
async def async_insert(
|
|
447
|
+
self, content_hash: str, documents: List[Document], filters: Optional[Dict[str, Any]] = None
|
|
448
|
+
) -> None:
|
|
449
|
+
"""Insert documents asynchronously based on search type."""
|
|
450
|
+
log_info(f"Inserting {len(documents)} documents asynchronously")
|
|
451
|
+
|
|
452
|
+
if self.embedder.enable_batch and hasattr(self.embedder, "async_get_embeddings_batch_and_usage"):
|
|
453
|
+
# Use batch embedding when enabled and supported
|
|
454
|
+
try:
|
|
455
|
+
# Extract content from all documents
|
|
456
|
+
doc_contents = [doc.content for doc in documents]
|
|
457
|
+
|
|
458
|
+
# Get batch embeddings and usage
|
|
459
|
+
embeddings, usages = await self.embedder.async_get_embeddings_batch_and_usage(doc_contents)
|
|
460
|
+
|
|
461
|
+
# Process documents with pre-computed embeddings
|
|
462
|
+
for j, doc in enumerate(documents):
|
|
463
|
+
try:
|
|
464
|
+
if j < len(embeddings):
|
|
465
|
+
doc.embedding = embeddings[j]
|
|
466
|
+
doc.usage = usages[j] if j < len(usages) else None
|
|
467
|
+
except Exception as e:
|
|
468
|
+
log_error(f"Error assigning batch embedding to document '{doc.name}': {e}")
|
|
469
|
+
|
|
470
|
+
except Exception as e:
|
|
471
|
+
# Check if this is a rate limit error - don't fall back as it would make things worse
|
|
472
|
+
error_str = str(e).lower()
|
|
473
|
+
is_rate_limit = any(
|
|
474
|
+
phrase in error_str
|
|
475
|
+
for phrase in ["rate limit", "too many requests", "429", "trial key", "api calls / minute"]
|
|
476
|
+
)
|
|
477
|
+
|
|
478
|
+
if is_rate_limit:
|
|
479
|
+
log_error(f"Rate limit detected during batch embedding. {e}")
|
|
480
|
+
raise e
|
|
481
|
+
else:
|
|
482
|
+
log_error(f"Async batch embedding failed, falling back to individual embeddings: {e}")
|
|
483
|
+
# Fall back to individual embedding
|
|
484
|
+
embed_tasks = [doc.async_embed(embedder=self.embedder) for doc in documents]
|
|
485
|
+
await asyncio.gather(*embed_tasks, return_exceptions=True)
|
|
486
|
+
else:
|
|
487
|
+
# Use individual embedding
|
|
488
|
+
embed_tasks = [document.async_embed(embedder=self.embedder) for document in documents]
|
|
489
|
+
await asyncio.gather(*embed_tasks, return_exceptions=True)
|
|
490
|
+
|
|
491
|
+
if self.search_type == SearchType.hybrid:
|
|
492
|
+
await asyncio.gather(
|
|
493
|
+
*[self._async_insert_hybrid_document(content_hash=content_hash, document=doc) for doc in documents]
|
|
494
|
+
)
|
|
495
|
+
else:
|
|
496
|
+
|
|
497
|
+
async def process_document(document):
|
|
498
|
+
document.embed(embedder=self.embedder)
|
|
499
|
+
if not document.embedding:
|
|
500
|
+
log_debug(f"Skipping document without embedding: {document.name} ({document.meta_data})")
|
|
501
|
+
return None
|
|
502
|
+
cleaned_content = document.content.replace("\x00", "\ufffd")
|
|
503
|
+
# Include content_hash in ID to ensure uniqueness across different content hashes
|
|
504
|
+
base_id = document.id or md5(cleaned_content.encode()).hexdigest()
|
|
505
|
+
doc_id = md5(f"{base_id}_{content_hash}".encode()).hexdigest()
|
|
506
|
+
|
|
507
|
+
meta_data = document.meta_data or {}
|
|
508
|
+
if filters:
|
|
509
|
+
meta_data.update(filters)
|
|
510
|
+
|
|
511
|
+
data = {
|
|
512
|
+
"id": doc_id,
|
|
513
|
+
"vector": document.embedding,
|
|
514
|
+
"name": document.name,
|
|
515
|
+
"content_id": document.content_id,
|
|
516
|
+
"meta_data": meta_data,
|
|
517
|
+
"content": cleaned_content,
|
|
518
|
+
"usage": document.usage,
|
|
519
|
+
"content_hash": content_hash,
|
|
520
|
+
}
|
|
521
|
+
await self.async_client.insert(
|
|
522
|
+
collection_name=self.collection,
|
|
523
|
+
data=data,
|
|
524
|
+
)
|
|
525
|
+
log_debug(f"Inserted document asynchronously: {document.name} ({document.meta_data})")
|
|
526
|
+
return data
|
|
527
|
+
|
|
528
|
+
await asyncio.gather(*[process_document(doc) for doc in documents])
|
|
529
|
+
|
|
530
|
+
log_info(f"Inserted {len(documents)} documents asynchronously")
|
|
161
531
|
|
|
162
532
|
def upsert_available(self) -> bool:
|
|
163
533
|
"""
|
|
@@ -168,7 +538,7 @@ class Milvus(VectorDb):
|
|
|
168
538
|
"""
|
|
169
539
|
return True
|
|
170
540
|
|
|
171
|
-
def upsert(self, documents: List[Document], filters: Optional[Dict[str, Any]] = None) -> None:
|
|
541
|
+
def upsert(self, content_hash: str, documents: List[Document], filters: Optional[Dict[str, Any]] = None) -> None:
|
|
172
542
|
"""
|
|
173
543
|
Upsert documents into the database.
|
|
174
544
|
|
|
@@ -176,37 +546,133 @@ class Milvus(VectorDb):
|
|
|
176
546
|
documents (List[Document]): List of documents to upsert
|
|
177
547
|
filters (Optional[Dict[str, Any]]): Filters to apply while upserting
|
|
178
548
|
"""
|
|
179
|
-
|
|
549
|
+
log_debug(f"Upserting {len(documents)} documents")
|
|
180
550
|
for document in documents:
|
|
181
551
|
document.embed(embedder=self.embedder)
|
|
182
552
|
cleaned_content = document.content.replace("\x00", "\ufffd")
|
|
183
553
|
doc_id = md5(cleaned_content.encode()).hexdigest()
|
|
554
|
+
|
|
555
|
+
meta_data = document.meta_data or {}
|
|
556
|
+
if filters:
|
|
557
|
+
meta_data.update(filters)
|
|
558
|
+
|
|
184
559
|
data = {
|
|
185
560
|
"id": doc_id,
|
|
186
561
|
"vector": document.embedding,
|
|
187
562
|
"name": document.name,
|
|
563
|
+
"content_id": document.content_id,
|
|
188
564
|
"meta_data": document.meta_data,
|
|
189
565
|
"content": cleaned_content,
|
|
190
566
|
"usage": document.usage,
|
|
567
|
+
"content_hash": content_hash,
|
|
191
568
|
}
|
|
192
569
|
self.client.upsert(
|
|
193
570
|
collection_name=self.collection,
|
|
194
571
|
data=data,
|
|
195
572
|
)
|
|
196
|
-
|
|
573
|
+
log_debug(f"Upserted document: {document.name} ({document.meta_data})")
|
|
574
|
+
|
|
575
|
+
async def async_upsert(
|
|
576
|
+
self, content_hash: str, documents: List[Document], filters: Optional[Dict[str, Any]] = None
|
|
577
|
+
) -> None:
|
|
578
|
+
log_debug(f"Upserting {len(documents)} documents asynchronously")
|
|
579
|
+
|
|
580
|
+
if self.embedder.enable_batch and hasattr(self.embedder, "async_get_embeddings_batch_and_usage"):
|
|
581
|
+
# Use batch embedding when enabled and supported
|
|
582
|
+
try:
|
|
583
|
+
# Extract content from all documents
|
|
584
|
+
doc_contents = [doc.content for doc in documents]
|
|
585
|
+
|
|
586
|
+
# Get batch embeddings and usage
|
|
587
|
+
embeddings, usages = await self.embedder.async_get_embeddings_batch_and_usage(doc_contents)
|
|
588
|
+
|
|
589
|
+
# Process documents with pre-computed embeddings
|
|
590
|
+
for j, doc in enumerate(documents):
|
|
591
|
+
try:
|
|
592
|
+
if j < len(embeddings):
|
|
593
|
+
doc.embedding = embeddings[j]
|
|
594
|
+
doc.usage = usages[j] if j < len(usages) else None
|
|
595
|
+
except Exception as e:
|
|
596
|
+
log_error(f"Error assigning batch embedding to document '{doc.name}': {e}")
|
|
597
|
+
|
|
598
|
+
except Exception as e:
|
|
599
|
+
# Check if this is a rate limit error - don't fall back as it would make things worse
|
|
600
|
+
error_str = str(e).lower()
|
|
601
|
+
is_rate_limit = any(
|
|
602
|
+
phrase in error_str
|
|
603
|
+
for phrase in ["rate limit", "too many requests", "429", "trial key", "api calls / minute"]
|
|
604
|
+
)
|
|
605
|
+
|
|
606
|
+
if is_rate_limit:
|
|
607
|
+
log_error(f"Rate limit detected during batch embedding. {e}")
|
|
608
|
+
raise e
|
|
609
|
+
else:
|
|
610
|
+
log_error(f"Async batch embedding failed, falling back to individual embeddings: {e}")
|
|
611
|
+
# Fall back to individual embedding
|
|
612
|
+
embed_tasks = [doc.async_embed(embedder=self.embedder) for doc in documents]
|
|
613
|
+
await asyncio.gather(*embed_tasks, return_exceptions=True)
|
|
614
|
+
else:
|
|
615
|
+
# Use individual embedding
|
|
616
|
+
embed_tasks = [document.async_embed(embedder=self.embedder) for document in documents]
|
|
617
|
+
await asyncio.gather(*embed_tasks, return_exceptions=True)
|
|
618
|
+
|
|
619
|
+
async def process_document(document):
|
|
620
|
+
cleaned_content = document.content.replace("\x00", "\ufffd")
|
|
621
|
+
doc_id = md5(cleaned_content.encode()).hexdigest()
|
|
622
|
+
data = {
|
|
623
|
+
"id": doc_id,
|
|
624
|
+
"vector": document.embedding,
|
|
625
|
+
"name": document.name,
|
|
626
|
+
"content_id": document.content_id,
|
|
627
|
+
"meta_data": document.meta_data,
|
|
628
|
+
"content": cleaned_content,
|
|
629
|
+
"usage": document.usage,
|
|
630
|
+
"content_hash": content_hash,
|
|
631
|
+
}
|
|
632
|
+
await self.async_client.upsert(
|
|
633
|
+
collection_name=self.collection,
|
|
634
|
+
data=data,
|
|
635
|
+
)
|
|
636
|
+
log_debug(f"Upserted document asynchronously: {document.name} ({document.meta_data})")
|
|
637
|
+
return data
|
|
197
638
|
|
|
198
|
-
|
|
639
|
+
# Process all documents in parallel
|
|
640
|
+
await asyncio.gather(*[process_document(doc) for doc in documents])
|
|
641
|
+
|
|
642
|
+
log_debug(f"Upserted {len(documents)} documents asynchronously in parallel")
|
|
643
|
+
|
|
644
|
+
def _get_metric_type(self) -> str:
|
|
645
|
+
"""
|
|
646
|
+
Get the Milvus metric type string for the current distance setting.
|
|
647
|
+
|
|
648
|
+
Returns:
|
|
649
|
+
Milvus metric type string, defaults to "COSINE" if distance not found
|
|
199
650
|
"""
|
|
200
|
-
|
|
651
|
+
return MILVUS_DISTANCE_MAP.get(self.distance, "COSINE")
|
|
652
|
+
|
|
653
|
+
def search(
|
|
654
|
+
self, query: str, limit: int = 5, filters: Optional[Union[Dict[str, Any], List[FilterExpr]]] = None
|
|
655
|
+
) -> List[Document]:
|
|
656
|
+
"""
|
|
657
|
+
Search for documents matching the query.
|
|
201
658
|
|
|
202
659
|
Args:
|
|
203
|
-
query (str): Query to search for
|
|
204
|
-
limit (int):
|
|
205
|
-
filters (Optional[Dict[str, Any]]): Filters to apply
|
|
660
|
+
query (str): Query string to search for
|
|
661
|
+
limit (int): Maximum number of results to return
|
|
662
|
+
filters (Optional[Dict[str, Any]]): Filters to apply to the search
|
|
663
|
+
|
|
664
|
+
Returns:
|
|
665
|
+
List[Document]: List of matching documents
|
|
206
666
|
"""
|
|
667
|
+
if isinstance(filters, List):
|
|
668
|
+
log_warning("Filters Expressions are not supported in Milvus. No filters will be applied.")
|
|
669
|
+
filters = None
|
|
670
|
+
if self.search_type == SearchType.hybrid:
|
|
671
|
+
return self.hybrid_search(query, limit)
|
|
672
|
+
|
|
207
673
|
query_embedding = self.embedder.get_embedding(query)
|
|
208
674
|
if query_embedding is None:
|
|
209
|
-
|
|
675
|
+
log_error(f"Error getting embedding for Query: {query}")
|
|
210
676
|
return []
|
|
211
677
|
|
|
212
678
|
results = self.client.search(
|
|
@@ -226,25 +692,273 @@ class Milvus(VectorDb):
|
|
|
226
692
|
name=result["entity"].get("name", None),
|
|
227
693
|
meta_data=result["entity"].get("meta_data", {}),
|
|
228
694
|
content=result["entity"].get("content", ""),
|
|
695
|
+
content_id=result["entity"].get("content_id", None),
|
|
696
|
+
embedder=self.embedder,
|
|
697
|
+
embedding=result["entity"].get("vector", None),
|
|
698
|
+
usage=result["entity"].get("usage", None),
|
|
699
|
+
)
|
|
700
|
+
)
|
|
701
|
+
|
|
702
|
+
# Apply reranker if available
|
|
703
|
+
if self.reranker and search_results:
|
|
704
|
+
search_results = self.reranker.rerank(query=query, documents=search_results)
|
|
705
|
+
search_results = search_results[:limit]
|
|
706
|
+
|
|
707
|
+
log_info(f"Found {len(search_results)} documents")
|
|
708
|
+
return search_results
|
|
709
|
+
|
|
710
|
+
async def async_search(
|
|
711
|
+
self, query: str, limit: int = 5, filters: Optional[Union[Dict[str, Any], List[FilterExpr]]] = None
|
|
712
|
+
) -> List[Document]:
|
|
713
|
+
if isinstance(filters, List):
|
|
714
|
+
log_warning("Filters Expressions are not supported in Milvus. No filters will be applied.")
|
|
715
|
+
filters = None
|
|
716
|
+
if self.search_type == SearchType.hybrid:
|
|
717
|
+
return await self.async_hybrid_search(query, limit, filters)
|
|
718
|
+
|
|
719
|
+
query_embedding = self.embedder.get_embedding(query)
|
|
720
|
+
if query_embedding is None:
|
|
721
|
+
log_error(f"Error getting embedding for Query: {query}")
|
|
722
|
+
return []
|
|
723
|
+
|
|
724
|
+
results = await self.async_client.search(
|
|
725
|
+
collection_name=self.collection,
|
|
726
|
+
data=[query_embedding],
|
|
727
|
+
filter=self._build_expr(filters),
|
|
728
|
+
output_fields=["*"],
|
|
729
|
+
limit=limit,
|
|
730
|
+
)
|
|
731
|
+
|
|
732
|
+
# Build search results
|
|
733
|
+
search_results: List[Document] = []
|
|
734
|
+
for result in results[0]:
|
|
735
|
+
search_results.append(
|
|
736
|
+
Document(
|
|
737
|
+
id=result["id"],
|
|
738
|
+
name=result["entity"].get("name", None),
|
|
739
|
+
meta_data=result["entity"].get("meta_data", {}),
|
|
740
|
+
content=result["entity"].get("content", ""),
|
|
741
|
+
content_id=result["entity"].get("content_id", None),
|
|
229
742
|
embedder=self.embedder,
|
|
230
743
|
embedding=result["entity"].get("vector", None),
|
|
231
744
|
usage=result["entity"].get("usage", None),
|
|
232
745
|
)
|
|
233
746
|
)
|
|
234
747
|
|
|
748
|
+
log_info(f"Found {len(search_results)} documents")
|
|
235
749
|
return search_results
|
|
236
750
|
|
|
751
|
+
def hybrid_search(
|
|
752
|
+
self, query: str, limit: int = 5, filters: Optional[Union[Dict[str, Any], List[FilterExpr]]] = None
|
|
753
|
+
) -> List[Document]:
|
|
754
|
+
"""
|
|
755
|
+
Perform a hybrid search combining dense and sparse vector similarity.
|
|
756
|
+
|
|
757
|
+
Args:
|
|
758
|
+
query (str): Query string to search for
|
|
759
|
+
limit (int): Maximum number of results to return
|
|
760
|
+
filters (Optional[Dict[str, Any]]): Filters to apply to the search
|
|
761
|
+
|
|
762
|
+
Returns:
|
|
763
|
+
List[Document]: List of matching documents
|
|
764
|
+
"""
|
|
765
|
+
from pymilvus import AnnSearchRequest, RRFRanker
|
|
766
|
+
|
|
767
|
+
# Get query embeddings
|
|
768
|
+
dense_vector = self.embedder.get_embedding(query)
|
|
769
|
+
sparse_vector = self._get_sparse_vector(query)
|
|
770
|
+
|
|
771
|
+
if dense_vector is None:
|
|
772
|
+
log_error(f"Error getting dense embedding for Query: {query}")
|
|
773
|
+
return []
|
|
774
|
+
|
|
775
|
+
if self._client is None:
|
|
776
|
+
log_error("Milvus client not initialized")
|
|
777
|
+
return []
|
|
778
|
+
|
|
779
|
+
try:
|
|
780
|
+
# Refer to docs for details- https://milvus.io/docs/multi-vector-search.md
|
|
781
|
+
|
|
782
|
+
# Create search request for dense vectors
|
|
783
|
+
dense_search_param = {
|
|
784
|
+
"data": [dense_vector],
|
|
785
|
+
"anns_field": "dense_vector",
|
|
786
|
+
"param": {"metric_type": self._get_metric_type(), "params": {"nprobe": 10}},
|
|
787
|
+
"limit": limit
|
|
788
|
+
* 2, # Fetch more candidates for better reranking quality - each vector search returns 2x results which are then merged and reranked
|
|
789
|
+
}
|
|
790
|
+
|
|
791
|
+
# Create search request for sparse vectors
|
|
792
|
+
sparse_search_param = {
|
|
793
|
+
"data": [sparse_vector],
|
|
794
|
+
"anns_field": "sparse_vector",
|
|
795
|
+
"param": {"metric_type": "IP", "params": {"drop_ratio_build": 0.2}},
|
|
796
|
+
"limit": limit * 2, # Match dense search limit to ensure balanced candidate pool for reranking
|
|
797
|
+
}
|
|
798
|
+
|
|
799
|
+
# Create search requests
|
|
800
|
+
dense_request = AnnSearchRequest(**dense_search_param)
|
|
801
|
+
sparse_request = AnnSearchRequest(**sparse_search_param)
|
|
802
|
+
reqs = [dense_request, sparse_request]
|
|
803
|
+
|
|
804
|
+
# Use RRFRanker for balanced importance between vectors
|
|
805
|
+
ranker = RRFRanker(60) # Default k=60
|
|
806
|
+
|
|
807
|
+
log_info("Performing hybrid search")
|
|
808
|
+
results = self._client.hybrid_search(
|
|
809
|
+
collection_name=self.collection, reqs=reqs, ranker=ranker, limit=limit, output_fields=["*"]
|
|
810
|
+
)
|
|
811
|
+
|
|
812
|
+
# Build search results
|
|
813
|
+
search_results: List[Document] = []
|
|
814
|
+
for hits in results:
|
|
815
|
+
for hit in hits:
|
|
816
|
+
entity = hit.get("entity", {})
|
|
817
|
+
meta_data = json.loads(entity.get("meta_data", "{}")) if entity.get("meta_data") else {}
|
|
818
|
+
usage = json.loads(entity.get("usage", "{}")) if entity.get("usage") else None
|
|
819
|
+
|
|
820
|
+
search_results.append(
|
|
821
|
+
Document(
|
|
822
|
+
id=hit.get("id"),
|
|
823
|
+
name=entity.get("name", None),
|
|
824
|
+
meta_data=meta_data, # Now a dictionary
|
|
825
|
+
content=entity.get("content", ""),
|
|
826
|
+
content_id=entity.get("content_id", None),
|
|
827
|
+
embedder=self.embedder,
|
|
828
|
+
embedding=entity.get("dense_vector", None),
|
|
829
|
+
usage=usage, # Now a dictionary or None
|
|
830
|
+
)
|
|
831
|
+
)
|
|
832
|
+
|
|
833
|
+
# Apply additional reranking if custom reranker is provided
|
|
834
|
+
if self.reranker and search_results:
|
|
835
|
+
search_results = self.reranker.rerank(query=query, documents=search_results)
|
|
836
|
+
|
|
837
|
+
log_info(f"Found {len(search_results)} documents")
|
|
838
|
+
return search_results
|
|
839
|
+
|
|
840
|
+
except Exception as e:
|
|
841
|
+
log_error(f"Error during hybrid search: {e}")
|
|
842
|
+
return []
|
|
843
|
+
|
|
844
|
+
async def async_hybrid_search(
|
|
845
|
+
self, query: str, limit: int = 5, filters: Optional[Union[Dict[str, Any], List[FilterExpr]]] = None
|
|
846
|
+
) -> List[Document]:
|
|
847
|
+
"""
|
|
848
|
+
Perform an asynchronous hybrid search combining dense and sparse vector similarity.
|
|
849
|
+
|
|
850
|
+
Args:
|
|
851
|
+
query (str): Query string to search for
|
|
852
|
+
limit (int): Maximum number of results to return
|
|
853
|
+
filters (Optional[Dict[str, Any]]): Filters to apply to the search
|
|
854
|
+
|
|
855
|
+
Returns:
|
|
856
|
+
List[Document]: List of matching documents
|
|
857
|
+
"""
|
|
858
|
+
from pymilvus import AnnSearchRequest, RRFRanker
|
|
859
|
+
|
|
860
|
+
# Get query embeddings
|
|
861
|
+
dense_vector = self.embedder.get_embedding(query)
|
|
862
|
+
sparse_vector = self._get_sparse_vector(query)
|
|
863
|
+
|
|
864
|
+
if dense_vector is None:
|
|
865
|
+
log_error(f"Error getting dense embedding for Query: {query}")
|
|
866
|
+
return []
|
|
867
|
+
|
|
868
|
+
try:
|
|
869
|
+
# Refer to docs for details- https://milvus.io/docs/multi-vector-search.md
|
|
870
|
+
|
|
871
|
+
# Create search request for dense vectors
|
|
872
|
+
dense_search_param = {
|
|
873
|
+
"data": [dense_vector],
|
|
874
|
+
"anns_field": "dense_vector",
|
|
875
|
+
"param": {"metric_type": self._get_metric_type(), "params": {"nprobe": 10}},
|
|
876
|
+
"limit": limit
|
|
877
|
+
* 2, # Fetch more candidates for better reranking quality - each vector search returns 2x results which are then merged and reranked
|
|
878
|
+
}
|
|
879
|
+
|
|
880
|
+
# Create search request for sparse vectors
|
|
881
|
+
sparse_search_param = {
|
|
882
|
+
"data": [sparse_vector],
|
|
883
|
+
"anns_field": "sparse_vector",
|
|
884
|
+
"param": {"metric_type": "IP", "params": {"drop_ratio_build": 0.2}},
|
|
885
|
+
"limit": limit * 2, # Match dense search limit to ensure balanced candidate pool for reranking
|
|
886
|
+
}
|
|
887
|
+
|
|
888
|
+
# Create search requests
|
|
889
|
+
dense_request = AnnSearchRequest(**dense_search_param)
|
|
890
|
+
sparse_request = AnnSearchRequest(**sparse_search_param)
|
|
891
|
+
reqs = [dense_request, sparse_request]
|
|
892
|
+
|
|
893
|
+
# Use RRFRanker for balanced importance between vectors
|
|
894
|
+
ranker = RRFRanker(60) # Default k=60
|
|
895
|
+
|
|
896
|
+
log_info("Performing async hybrid search")
|
|
897
|
+
results = await self.async_client.hybrid_search(
|
|
898
|
+
collection_name=self.collection, reqs=reqs, ranker=ranker, limit=limit, output_fields=["*"]
|
|
899
|
+
)
|
|
900
|
+
|
|
901
|
+
# Build search results
|
|
902
|
+
search_results: List[Document] = []
|
|
903
|
+
for hits in results:
|
|
904
|
+
for hit in hits:
|
|
905
|
+
entity = hit.get("entity", {})
|
|
906
|
+
meta_data = json.loads(entity.get("meta_data", "{}")) if entity.get("meta_data") else {}
|
|
907
|
+
usage = json.loads(entity.get("usage", "{}")) if entity.get("usage") else None
|
|
908
|
+
|
|
909
|
+
search_results.append(
|
|
910
|
+
Document(
|
|
911
|
+
id=hit.get("id"),
|
|
912
|
+
name=entity.get("name", None),
|
|
913
|
+
meta_data=meta_data, # Now a dictionary
|
|
914
|
+
content=entity.get("content", ""),
|
|
915
|
+
embedder=self.embedder,
|
|
916
|
+
embedding=entity.get("dense_vector", None),
|
|
917
|
+
usage=usage, # Now a dictionary or None
|
|
918
|
+
)
|
|
919
|
+
)
|
|
920
|
+
|
|
921
|
+
# Apply additional reranking if custom reranker is provided
|
|
922
|
+
if self.reranker and search_results:
|
|
923
|
+
search_results = self.reranker.rerank(query=query, documents=search_results)
|
|
924
|
+
|
|
925
|
+
log_info(f"Found {len(search_results)} documents")
|
|
926
|
+
return search_results
|
|
927
|
+
|
|
928
|
+
except Exception as e:
|
|
929
|
+
log_error(f"Error during async hybrid search: {e}")
|
|
930
|
+
return []
|
|
931
|
+
|
|
237
932
|
def drop(self) -> None:
|
|
238
933
|
if self.exists():
|
|
239
|
-
|
|
934
|
+
log_debug(f"Deleting collection: {self.collection}")
|
|
240
935
|
self.client.drop_collection(self.collection)
|
|
241
936
|
|
|
937
|
+
async def async_drop(self) -> None:
|
|
938
|
+
"""
|
|
939
|
+
Drop collection asynchronously.
|
|
940
|
+
AsyncMilvusClient supports drop_collection().
|
|
941
|
+
"""
|
|
942
|
+
# Check using synchronous client
|
|
943
|
+
if self.client.has_collection(self.collection):
|
|
944
|
+
log_debug(f"Deleting collection asynchronously: {self.collection}")
|
|
945
|
+
await self.async_client.drop_collection(self.collection)
|
|
946
|
+
|
|
242
947
|
def exists(self) -> bool:
|
|
243
948
|
if self.client:
|
|
244
949
|
if self.client.has_collection(self.collection):
|
|
245
950
|
return True
|
|
246
951
|
return False
|
|
247
952
|
|
|
953
|
+
async def async_exists(self) -> bool:
|
|
954
|
+
"""
|
|
955
|
+
Check if collection exists asynchronously.
|
|
956
|
+
|
|
957
|
+
has_collection() is not supported by AsyncMilvusClient,
|
|
958
|
+
so we use the synchronous client.
|
|
959
|
+
"""
|
|
960
|
+
return self.client.has_collection(self.collection)
|
|
961
|
+
|
|
248
962
|
def get_count(self) -> int:
|
|
249
963
|
return self.client.get_collection_stats(collection_name="test_collection")["row_count"]
|
|
250
964
|
|
|
@@ -254,15 +968,189 @@ class Milvus(VectorDb):
|
|
|
254
968
|
return True
|
|
255
969
|
return False
|
|
256
970
|
|
|
257
|
-
def
|
|
258
|
-
|
|
259
|
-
|
|
260
|
-
|
|
261
|
-
|
|
262
|
-
|
|
971
|
+
def delete_by_id(self, id: str) -> bool:
|
|
972
|
+
"""
|
|
973
|
+
Delete a document by its ID.
|
|
974
|
+
|
|
975
|
+
Args:
|
|
976
|
+
id (str): The document ID to delete
|
|
977
|
+
|
|
978
|
+
Returns:
|
|
979
|
+
bool: True if document was deleted, False otherwise
|
|
980
|
+
"""
|
|
981
|
+
try:
|
|
982
|
+
log_debug(f"Milvus VectorDB : Deleting document with ID {id}")
|
|
983
|
+
if not self.id_exists(id):
|
|
984
|
+
return False
|
|
985
|
+
|
|
986
|
+
# Delete by ID using Milvus delete operation
|
|
987
|
+
self.client.delete(collection_name=self.collection, ids=[id])
|
|
988
|
+
log_info(f"Deleted document with ID '{id}' from collection '{self.collection}'.")
|
|
989
|
+
return True
|
|
990
|
+
except Exception as e:
|
|
991
|
+
log_info(f"Error deleting document with ID {id}: {e}")
|
|
992
|
+
return False
|
|
993
|
+
|
|
994
|
+
def delete_by_name(self, name: str) -> bool:
|
|
995
|
+
"""
|
|
996
|
+
Delete documents by name.
|
|
997
|
+
|
|
998
|
+
Args:
|
|
999
|
+
name (str): The document name to delete
|
|
1000
|
+
|
|
1001
|
+
Returns:
|
|
1002
|
+
bool: True if documents were deleted, False otherwise
|
|
1003
|
+
"""
|
|
1004
|
+
try:
|
|
1005
|
+
log_debug(f"Milvus VectorDB : Deleting documents with name {name}")
|
|
1006
|
+
if not self.name_exists(name):
|
|
1007
|
+
return False
|
|
1008
|
+
|
|
1009
|
+
# Delete by name using Milvus delete operation with filter
|
|
1010
|
+
expr = f'name == "{name}"'
|
|
1011
|
+
self.client.delete(collection_name=self.collection, filter=expr)
|
|
1012
|
+
log_info(f"Deleted documents with name '{name}' from collection '{self.collection}'.")
|
|
1013
|
+
return True
|
|
1014
|
+
except Exception as e:
|
|
1015
|
+
log_info(f"Error deleting documents with name {name}: {e}")
|
|
1016
|
+
return False
|
|
1017
|
+
|
|
1018
|
+
def delete_by_metadata(self, metadata: Dict[str, Any]) -> bool:
|
|
1019
|
+
"""
|
|
1020
|
+
Delete documents by metadata.
|
|
1021
|
+
|
|
1022
|
+
Args:
|
|
1023
|
+
metadata (Dict[str, Any]): The metadata to match for deletion
|
|
1024
|
+
|
|
1025
|
+
Returns:
|
|
1026
|
+
bool: True if documents were deleted, False otherwise
|
|
1027
|
+
"""
|
|
1028
|
+
try:
|
|
1029
|
+
log_debug(f"Milvus VectorDB : Deleting documents with metadata {metadata}")
|
|
1030
|
+
|
|
1031
|
+
# Build filter expression for metadata matching
|
|
1032
|
+
expr = self._build_expr(metadata)
|
|
1033
|
+
if not expr:
|
|
1034
|
+
return False
|
|
1035
|
+
|
|
1036
|
+
# Delete by metadata using Milvus delete operation with filter
|
|
1037
|
+
self.client.delete(collection_name=self.collection, filter=expr)
|
|
1038
|
+
log_info(f"Deleted documents with metadata '{metadata}' from collection '{self.collection}'.")
|
|
1039
|
+
return True
|
|
1040
|
+
except Exception as e:
|
|
1041
|
+
log_info(f"Error deleting documents with metadata {metadata}: {e}")
|
|
1042
|
+
return False
|
|
1043
|
+
|
|
1044
|
+
def delete_by_content_id(self, content_id: str) -> bool:
|
|
1045
|
+
"""
|
|
1046
|
+
Delete documents by content ID.
|
|
1047
|
+
|
|
1048
|
+
Args:
|
|
1049
|
+
content_id (str): The content ID to delete
|
|
1050
|
+
|
|
1051
|
+
Returns:
|
|
1052
|
+
bool: True if documents were deleted, False otherwise
|
|
1053
|
+
"""
|
|
1054
|
+
try:
|
|
1055
|
+
log_debug(f"Milvus VectorDB : Deleting documents with content_id {content_id}")
|
|
1056
|
+
|
|
1057
|
+
# Delete by content_id using Milvus delete operation with filter
|
|
1058
|
+
expr = f'content_id == "{content_id}"'
|
|
1059
|
+
self.client.delete(collection_name=self.collection, filter=expr)
|
|
1060
|
+
log_info(f"Deleted documents with content_id '{content_id}' from collection '{self.collection}'.")
|
|
1061
|
+
return True
|
|
1062
|
+
except Exception as e:
|
|
1063
|
+
log_info(f"Error deleting documents with content_id {content_id}: {e}")
|
|
1064
|
+
return False
|
|
1065
|
+
|
|
1066
|
+
def _build_expr(self, filters: Optional[Dict[str, Any]]) -> Optional[str]:
|
|
1067
|
+
"""Build Milvus expression from filters."""
|
|
1068
|
+
if not filters:
|
|
1069
|
+
return None
|
|
1070
|
+
|
|
1071
|
+
expressions = []
|
|
1072
|
+
for k, v in filters.items():
|
|
1073
|
+
if isinstance(v, (list, tuple)):
|
|
1074
|
+
# For array values, use json_contains_any
|
|
1075
|
+
values_str = json.dumps(v)
|
|
1076
|
+
expr = f'json_contains_any(meta_data, {values_str}, "{k}")'
|
|
1077
|
+
elif isinstance(v, str):
|
|
1078
|
+
# For string values
|
|
1079
|
+
expr = f'meta_data["{k}"] == "{v}"'
|
|
1080
|
+
elif isinstance(v, bool):
|
|
1081
|
+
# For boolean values
|
|
1082
|
+
expr = f'meta_data["{k}"] == {str(v).lower()}'
|
|
1083
|
+
elif isinstance(v, (int, float)):
|
|
1084
|
+
# For numeric values
|
|
1085
|
+
expr = f'meta_data["{k}"] == {v}'
|
|
1086
|
+
elif v is None:
|
|
1087
|
+
# For null values
|
|
1088
|
+
expr = f'meta_data["{k}"] is null'
|
|
1089
|
+
else:
|
|
1090
|
+
# For other types, convert to string
|
|
1091
|
+
expr = f'meta_data["{k}"] == "{str(v)}"'
|
|
1092
|
+
|
|
1093
|
+
expressions.append(expr)
|
|
1094
|
+
|
|
1095
|
+
if expressions:
|
|
1096
|
+
return " and ".join(expressions)
|
|
1097
|
+
return None
|
|
1098
|
+
|
|
1099
|
+
def async_name_exists(self, name: str) -> bool:
|
|
1100
|
+
raise NotImplementedError(f"Async not supported on {self.__class__.__name__}.")
|
|
1101
|
+
|
|
1102
|
+
def update_metadata(self, content_id: str, metadata: Dict[str, Any]) -> None:
|
|
1103
|
+
"""
|
|
1104
|
+
Update the metadata for documents with the given content_id.
|
|
1105
|
+
|
|
1106
|
+
Args:
|
|
1107
|
+
content_id (str): The content ID to update
|
|
1108
|
+
metadata (Dict[str, Any]): The metadata to update
|
|
1109
|
+
"""
|
|
1110
|
+
try:
|
|
1111
|
+
# Search for documents with the given content_id
|
|
1112
|
+
search_expr = f'content_id == "{content_id}"'
|
|
1113
|
+
results = self.client.query(
|
|
1114
|
+
collection_name=self.collection, filter=search_expr, output_fields=["id", "meta_data", "filters"]
|
|
1115
|
+
)
|
|
1116
|
+
|
|
1117
|
+
if not results:
|
|
1118
|
+
log_debug(f"No documents found with content_id: {content_id}")
|
|
1119
|
+
return
|
|
1120
|
+
|
|
1121
|
+
# Update each document
|
|
1122
|
+
updated_count = 0
|
|
1123
|
+
for result in results:
|
|
1124
|
+
doc_id = result["id"]
|
|
1125
|
+
current_metadata = result.get("meta_data", {})
|
|
1126
|
+
current_filters = result.get("filters", {})
|
|
1127
|
+
|
|
1128
|
+
# Merge existing metadata with new metadata
|
|
1129
|
+
if isinstance(current_metadata, dict):
|
|
1130
|
+
updated_metadata = current_metadata.copy()
|
|
1131
|
+
updated_metadata.update(metadata)
|
|
263
1132
|
else:
|
|
264
|
-
|
|
265
|
-
|
|
266
|
-
|
|
267
|
-
|
|
268
|
-
|
|
1133
|
+
updated_metadata = metadata
|
|
1134
|
+
|
|
1135
|
+
if isinstance(current_filters, dict):
|
|
1136
|
+
updated_filters = current_filters.copy()
|
|
1137
|
+
updated_filters.update(metadata)
|
|
1138
|
+
else:
|
|
1139
|
+
updated_filters = metadata
|
|
1140
|
+
|
|
1141
|
+
# Update the document
|
|
1142
|
+
self.client.upsert(
|
|
1143
|
+
collection_name=self.collection,
|
|
1144
|
+
data=[{"id": doc_id, "meta_data": updated_metadata, "filters": updated_filters}],
|
|
1145
|
+
)
|
|
1146
|
+
updated_count += 1
|
|
1147
|
+
|
|
1148
|
+
log_debug(f"Updated metadata for {updated_count} documents with content_id: {content_id}")
|
|
1149
|
+
|
|
1150
|
+
except Exception as e:
|
|
1151
|
+
log_error(f"Error updating metadata for content_id '{content_id}': {e}")
|
|
1152
|
+
raise
|
|
1153
|
+
|
|
1154
|
+
def get_supported_search_types(self) -> List[str]:
|
|
1155
|
+
"""Get the supported search types for this vector database."""
|
|
1156
|
+
return [SearchType.vector, SearchType.hybrid]
|