agno 0.1.2__py3-none-any.whl → 2.3.13__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- agno/__init__.py +8 -0
- agno/agent/__init__.py +44 -5
- agno/agent/agent.py +10531 -2975
- agno/api/agent.py +14 -53
- agno/api/api.py +7 -46
- agno/api/evals.py +22 -0
- agno/api/os.py +17 -0
- agno/api/routes.py +6 -25
- agno/api/schemas/__init__.py +9 -0
- agno/api/schemas/agent.py +6 -9
- agno/api/schemas/evals.py +16 -0
- agno/api/schemas/os.py +14 -0
- agno/api/schemas/team.py +10 -10
- agno/api/schemas/utils.py +21 -0
- agno/api/schemas/workflows.py +16 -0
- agno/api/settings.py +53 -0
- agno/api/team.py +22 -26
- agno/api/workflow.py +28 -0
- agno/cloud/aws/base.py +214 -0
- agno/cloud/aws/s3/__init__.py +2 -0
- agno/cloud/aws/s3/api_client.py +43 -0
- agno/cloud/aws/s3/bucket.py +195 -0
- agno/cloud/aws/s3/object.py +57 -0
- agno/compression/__init__.py +3 -0
- agno/compression/manager.py +247 -0
- agno/culture/__init__.py +3 -0
- agno/culture/manager.py +956 -0
- agno/db/__init__.py +24 -0
- agno/db/async_postgres/__init__.py +3 -0
- agno/db/base.py +946 -0
- agno/db/dynamo/__init__.py +3 -0
- agno/db/dynamo/dynamo.py +2781 -0
- agno/db/dynamo/schemas.py +442 -0
- agno/db/dynamo/utils.py +743 -0
- agno/db/firestore/__init__.py +3 -0
- agno/db/firestore/firestore.py +2379 -0
- agno/db/firestore/schemas.py +181 -0
- agno/db/firestore/utils.py +376 -0
- agno/db/gcs_json/__init__.py +3 -0
- agno/db/gcs_json/gcs_json_db.py +1791 -0
- agno/db/gcs_json/utils.py +228 -0
- agno/db/in_memory/__init__.py +3 -0
- agno/db/in_memory/in_memory_db.py +1312 -0
- agno/db/in_memory/utils.py +230 -0
- agno/db/json/__init__.py +3 -0
- agno/db/json/json_db.py +1777 -0
- agno/db/json/utils.py +230 -0
- agno/db/migrations/manager.py +199 -0
- agno/db/migrations/v1_to_v2.py +635 -0
- agno/db/migrations/versions/v2_3_0.py +938 -0
- agno/db/mongo/__init__.py +17 -0
- agno/db/mongo/async_mongo.py +2760 -0
- agno/db/mongo/mongo.py +2597 -0
- agno/db/mongo/schemas.py +119 -0
- agno/db/mongo/utils.py +276 -0
- agno/db/mysql/__init__.py +4 -0
- agno/db/mysql/async_mysql.py +2912 -0
- agno/db/mysql/mysql.py +2923 -0
- agno/db/mysql/schemas.py +186 -0
- agno/db/mysql/utils.py +488 -0
- agno/db/postgres/__init__.py +4 -0
- agno/db/postgres/async_postgres.py +2579 -0
- agno/db/postgres/postgres.py +2870 -0
- agno/db/postgres/schemas.py +187 -0
- agno/db/postgres/utils.py +442 -0
- agno/db/redis/__init__.py +3 -0
- agno/db/redis/redis.py +2141 -0
- agno/db/redis/schemas.py +159 -0
- agno/db/redis/utils.py +346 -0
- agno/db/schemas/__init__.py +4 -0
- agno/db/schemas/culture.py +120 -0
- agno/db/schemas/evals.py +34 -0
- agno/db/schemas/knowledge.py +40 -0
- agno/db/schemas/memory.py +61 -0
- agno/db/singlestore/__init__.py +3 -0
- agno/db/singlestore/schemas.py +179 -0
- agno/db/singlestore/singlestore.py +2877 -0
- agno/db/singlestore/utils.py +384 -0
- agno/db/sqlite/__init__.py +4 -0
- agno/db/sqlite/async_sqlite.py +2911 -0
- agno/db/sqlite/schemas.py +181 -0
- agno/db/sqlite/sqlite.py +2908 -0
- agno/db/sqlite/utils.py +429 -0
- agno/db/surrealdb/__init__.py +3 -0
- agno/db/surrealdb/metrics.py +292 -0
- agno/db/surrealdb/models.py +334 -0
- agno/db/surrealdb/queries.py +71 -0
- agno/db/surrealdb/surrealdb.py +1908 -0
- agno/db/surrealdb/utils.py +147 -0
- agno/db/utils.py +118 -0
- agno/eval/__init__.py +24 -0
- agno/eval/accuracy.py +666 -276
- agno/eval/agent_as_judge.py +861 -0
- agno/eval/base.py +29 -0
- agno/eval/performance.py +779 -0
- agno/eval/reliability.py +241 -62
- agno/eval/utils.py +120 -0
- agno/exceptions.py +143 -1
- agno/filters.py +354 -0
- agno/guardrails/__init__.py +6 -0
- agno/guardrails/base.py +19 -0
- agno/guardrails/openai.py +144 -0
- agno/guardrails/pii.py +94 -0
- agno/guardrails/prompt_injection.py +52 -0
- agno/hooks/__init__.py +3 -0
- agno/hooks/decorator.py +164 -0
- agno/integrations/discord/__init__.py +3 -0
- agno/integrations/discord/client.py +203 -0
- agno/knowledge/__init__.py +5 -1
- agno/{document → knowledge}/chunking/agentic.py +22 -14
- agno/{document → knowledge}/chunking/document.py +2 -2
- agno/{document → knowledge}/chunking/fixed.py +7 -6
- agno/knowledge/chunking/markdown.py +151 -0
- agno/{document → knowledge}/chunking/recursive.py +15 -3
- agno/knowledge/chunking/row.py +39 -0
- agno/knowledge/chunking/semantic.py +91 -0
- agno/knowledge/chunking/strategy.py +165 -0
- agno/knowledge/content.py +74 -0
- agno/knowledge/document/__init__.py +5 -0
- agno/{document → knowledge/document}/base.py +12 -2
- agno/knowledge/embedder/__init__.py +5 -0
- agno/knowledge/embedder/aws_bedrock.py +343 -0
- agno/knowledge/embedder/azure_openai.py +210 -0
- agno/{embedder → knowledge/embedder}/base.py +8 -0
- agno/knowledge/embedder/cohere.py +323 -0
- agno/knowledge/embedder/fastembed.py +62 -0
- agno/{embedder → knowledge/embedder}/fireworks.py +1 -1
- agno/knowledge/embedder/google.py +258 -0
- agno/knowledge/embedder/huggingface.py +94 -0
- agno/knowledge/embedder/jina.py +182 -0
- agno/knowledge/embedder/langdb.py +22 -0
- agno/knowledge/embedder/mistral.py +206 -0
- agno/knowledge/embedder/nebius.py +13 -0
- agno/knowledge/embedder/ollama.py +154 -0
- agno/knowledge/embedder/openai.py +195 -0
- agno/knowledge/embedder/sentence_transformer.py +63 -0
- agno/{embedder → knowledge/embedder}/together.py +1 -1
- agno/knowledge/embedder/vllm.py +262 -0
- agno/knowledge/embedder/voyageai.py +165 -0
- agno/knowledge/knowledge.py +3006 -0
- agno/knowledge/reader/__init__.py +7 -0
- agno/knowledge/reader/arxiv_reader.py +81 -0
- agno/knowledge/reader/base.py +95 -0
- agno/knowledge/reader/csv_reader.py +164 -0
- agno/knowledge/reader/docx_reader.py +82 -0
- agno/knowledge/reader/field_labeled_csv_reader.py +290 -0
- agno/knowledge/reader/firecrawl_reader.py +201 -0
- agno/knowledge/reader/json_reader.py +88 -0
- agno/knowledge/reader/markdown_reader.py +137 -0
- agno/knowledge/reader/pdf_reader.py +431 -0
- agno/knowledge/reader/pptx_reader.py +101 -0
- agno/knowledge/reader/reader_factory.py +313 -0
- agno/knowledge/reader/s3_reader.py +89 -0
- agno/knowledge/reader/tavily_reader.py +193 -0
- agno/knowledge/reader/text_reader.py +127 -0
- agno/knowledge/reader/web_search_reader.py +325 -0
- agno/knowledge/reader/website_reader.py +455 -0
- agno/knowledge/reader/wikipedia_reader.py +91 -0
- agno/knowledge/reader/youtube_reader.py +78 -0
- agno/knowledge/remote_content/remote_content.py +88 -0
- agno/knowledge/reranker/__init__.py +3 -0
- agno/{reranker → knowledge/reranker}/base.py +1 -1
- agno/{reranker → knowledge/reranker}/cohere.py +2 -2
- agno/knowledge/reranker/infinity.py +195 -0
- agno/knowledge/reranker/sentence_transformer.py +54 -0
- agno/knowledge/types.py +39 -0
- agno/knowledge/utils.py +234 -0
- agno/media.py +439 -95
- agno/memory/__init__.py +16 -3
- agno/memory/manager.py +1474 -123
- agno/memory/strategies/__init__.py +15 -0
- agno/memory/strategies/base.py +66 -0
- agno/memory/strategies/summarize.py +196 -0
- agno/memory/strategies/types.py +37 -0
- agno/models/aimlapi/__init__.py +5 -0
- agno/models/aimlapi/aimlapi.py +62 -0
- agno/models/anthropic/__init__.py +4 -0
- agno/models/anthropic/claude.py +960 -496
- agno/models/aws/__init__.py +15 -0
- agno/models/aws/bedrock.py +686 -451
- agno/models/aws/claude.py +190 -183
- agno/models/azure/__init__.py +18 -1
- agno/models/azure/ai_foundry.py +489 -0
- agno/models/azure/openai_chat.py +89 -40
- agno/models/base.py +2477 -550
- agno/models/cerebras/__init__.py +12 -0
- agno/models/cerebras/cerebras.py +565 -0
- agno/models/cerebras/cerebras_openai.py +131 -0
- agno/models/cohere/__init__.py +4 -0
- agno/models/cohere/chat.py +306 -492
- agno/models/cometapi/__init__.py +5 -0
- agno/models/cometapi/cometapi.py +74 -0
- agno/models/dashscope/__init__.py +5 -0
- agno/models/dashscope/dashscope.py +90 -0
- agno/models/deepinfra/__init__.py +5 -0
- agno/models/deepinfra/deepinfra.py +45 -0
- agno/models/deepseek/__init__.py +4 -0
- agno/models/deepseek/deepseek.py +110 -9
- agno/models/fireworks/__init__.py +4 -0
- agno/models/fireworks/fireworks.py +19 -22
- agno/models/google/__init__.py +3 -7
- agno/models/google/gemini.py +1717 -662
- agno/models/google/utils.py +22 -0
- agno/models/groq/__init__.py +4 -0
- agno/models/groq/groq.py +391 -666
- agno/models/huggingface/__init__.py +4 -0
- agno/models/huggingface/huggingface.py +266 -538
- agno/models/ibm/__init__.py +5 -0
- agno/models/ibm/watsonx.py +432 -0
- agno/models/internlm/__init__.py +3 -0
- agno/models/internlm/internlm.py +20 -3
- agno/models/langdb/__init__.py +1 -0
- agno/models/langdb/langdb.py +60 -0
- agno/models/litellm/__init__.py +14 -0
- agno/models/litellm/chat.py +503 -0
- agno/models/litellm/litellm_openai.py +42 -0
- agno/models/llama_cpp/__init__.py +5 -0
- agno/models/llama_cpp/llama_cpp.py +22 -0
- agno/models/lmstudio/__init__.py +5 -0
- agno/models/lmstudio/lmstudio.py +25 -0
- agno/models/message.py +361 -39
- agno/models/meta/__init__.py +12 -0
- agno/models/meta/llama.py +502 -0
- agno/models/meta/llama_openai.py +79 -0
- agno/models/metrics.py +120 -0
- agno/models/mistral/__init__.py +4 -0
- agno/models/mistral/mistral.py +293 -393
- agno/models/nebius/__init__.py +3 -0
- agno/models/nebius/nebius.py +53 -0
- agno/models/nexus/__init__.py +3 -0
- agno/models/nexus/nexus.py +22 -0
- agno/models/nvidia/__init__.py +4 -0
- agno/models/nvidia/nvidia.py +22 -3
- agno/models/ollama/__init__.py +4 -2
- agno/models/ollama/chat.py +257 -492
- agno/models/openai/__init__.py +7 -0
- agno/models/openai/chat.py +725 -770
- agno/models/openai/like.py +16 -2
- agno/models/openai/responses.py +1121 -0
- agno/models/openrouter/__init__.py +4 -0
- agno/models/openrouter/openrouter.py +62 -5
- agno/models/perplexity/__init__.py +5 -0
- agno/models/perplexity/perplexity.py +203 -0
- agno/models/portkey/__init__.py +3 -0
- agno/models/portkey/portkey.py +82 -0
- agno/models/requesty/__init__.py +5 -0
- agno/models/requesty/requesty.py +69 -0
- agno/models/response.py +177 -7
- agno/models/sambanova/__init__.py +4 -0
- agno/models/sambanova/sambanova.py +23 -4
- agno/models/siliconflow/__init__.py +5 -0
- agno/models/siliconflow/siliconflow.py +42 -0
- agno/models/together/__init__.py +4 -0
- agno/models/together/together.py +21 -164
- agno/models/utils.py +266 -0
- agno/models/vercel/__init__.py +3 -0
- agno/models/vercel/v0.py +43 -0
- agno/models/vertexai/__init__.py +0 -1
- agno/models/vertexai/claude.py +190 -0
- agno/models/vllm/__init__.py +3 -0
- agno/models/vllm/vllm.py +83 -0
- agno/models/xai/__init__.py +2 -0
- agno/models/xai/xai.py +111 -7
- agno/os/__init__.py +3 -0
- agno/os/app.py +1027 -0
- agno/os/auth.py +244 -0
- agno/os/config.py +126 -0
- agno/os/interfaces/__init__.py +1 -0
- agno/os/interfaces/a2a/__init__.py +3 -0
- agno/os/interfaces/a2a/a2a.py +42 -0
- agno/os/interfaces/a2a/router.py +249 -0
- agno/os/interfaces/a2a/utils.py +924 -0
- agno/os/interfaces/agui/__init__.py +3 -0
- agno/os/interfaces/agui/agui.py +47 -0
- agno/os/interfaces/agui/router.py +147 -0
- agno/os/interfaces/agui/utils.py +574 -0
- agno/os/interfaces/base.py +25 -0
- agno/os/interfaces/slack/__init__.py +3 -0
- agno/os/interfaces/slack/router.py +148 -0
- agno/os/interfaces/slack/security.py +30 -0
- agno/os/interfaces/slack/slack.py +47 -0
- agno/os/interfaces/whatsapp/__init__.py +3 -0
- agno/os/interfaces/whatsapp/router.py +210 -0
- agno/os/interfaces/whatsapp/security.py +55 -0
- agno/os/interfaces/whatsapp/whatsapp.py +36 -0
- agno/os/mcp.py +293 -0
- agno/os/middleware/__init__.py +9 -0
- agno/os/middleware/jwt.py +797 -0
- agno/os/router.py +258 -0
- agno/os/routers/__init__.py +3 -0
- agno/os/routers/agents/__init__.py +3 -0
- agno/os/routers/agents/router.py +599 -0
- agno/os/routers/agents/schema.py +261 -0
- agno/os/routers/evals/__init__.py +3 -0
- agno/os/routers/evals/evals.py +450 -0
- agno/os/routers/evals/schemas.py +174 -0
- agno/os/routers/evals/utils.py +231 -0
- agno/os/routers/health.py +31 -0
- agno/os/routers/home.py +52 -0
- agno/os/routers/knowledge/__init__.py +3 -0
- agno/os/routers/knowledge/knowledge.py +1008 -0
- agno/os/routers/knowledge/schemas.py +178 -0
- agno/os/routers/memory/__init__.py +3 -0
- agno/os/routers/memory/memory.py +661 -0
- agno/os/routers/memory/schemas.py +88 -0
- agno/os/routers/metrics/__init__.py +3 -0
- agno/os/routers/metrics/metrics.py +190 -0
- agno/os/routers/metrics/schemas.py +47 -0
- agno/os/routers/session/__init__.py +3 -0
- agno/os/routers/session/session.py +997 -0
- agno/os/routers/teams/__init__.py +3 -0
- agno/os/routers/teams/router.py +512 -0
- agno/os/routers/teams/schema.py +257 -0
- agno/os/routers/traces/__init__.py +3 -0
- agno/os/routers/traces/schemas.py +414 -0
- agno/os/routers/traces/traces.py +499 -0
- agno/os/routers/workflows/__init__.py +3 -0
- agno/os/routers/workflows/router.py +624 -0
- agno/os/routers/workflows/schema.py +75 -0
- agno/os/schema.py +534 -0
- agno/os/scopes.py +469 -0
- agno/{playground → os}/settings.py +7 -15
- agno/os/utils.py +973 -0
- agno/reasoning/anthropic.py +80 -0
- agno/reasoning/azure_ai_foundry.py +67 -0
- agno/reasoning/deepseek.py +63 -0
- agno/reasoning/default.py +97 -0
- agno/reasoning/gemini.py +73 -0
- agno/reasoning/groq.py +71 -0
- agno/reasoning/helpers.py +24 -1
- agno/reasoning/ollama.py +67 -0
- agno/reasoning/openai.py +86 -0
- agno/reasoning/step.py +2 -1
- agno/reasoning/vertexai.py +76 -0
- agno/run/__init__.py +6 -0
- agno/run/agent.py +822 -0
- agno/run/base.py +247 -0
- agno/run/cancel.py +81 -0
- agno/run/requirement.py +181 -0
- agno/run/team.py +767 -0
- agno/run/workflow.py +708 -0
- agno/session/__init__.py +10 -0
- agno/session/agent.py +260 -0
- agno/session/summary.py +265 -0
- agno/session/team.py +342 -0
- agno/session/workflow.py +501 -0
- agno/table.py +10 -0
- agno/team/__init__.py +37 -0
- agno/team/team.py +9536 -0
- agno/tools/__init__.py +7 -0
- agno/tools/agentql.py +120 -0
- agno/tools/airflow.py +22 -12
- agno/tools/api.py +122 -0
- agno/tools/apify.py +276 -83
- agno/tools/{arxiv_toolkit.py → arxiv.py} +20 -12
- agno/tools/aws_lambda.py +28 -7
- agno/tools/aws_ses.py +66 -0
- agno/tools/baidusearch.py +11 -4
- agno/tools/bitbucket.py +292 -0
- agno/tools/brandfetch.py +213 -0
- agno/tools/bravesearch.py +106 -0
- agno/tools/brightdata.py +367 -0
- agno/tools/browserbase.py +209 -0
- agno/tools/calcom.py +32 -23
- agno/tools/calculator.py +24 -37
- agno/tools/cartesia.py +187 -0
- agno/tools/{clickup_tool.py → clickup.py} +17 -28
- agno/tools/confluence.py +91 -26
- agno/tools/crawl4ai.py +139 -43
- agno/tools/csv_toolkit.py +28 -22
- agno/tools/dalle.py +36 -22
- agno/tools/daytona.py +475 -0
- agno/tools/decorator.py +169 -14
- agno/tools/desi_vocal.py +23 -11
- agno/tools/discord.py +32 -29
- agno/tools/docker.py +716 -0
- agno/tools/duckdb.py +76 -81
- agno/tools/duckduckgo.py +43 -40
- agno/tools/e2b.py +703 -0
- agno/tools/eleven_labs.py +65 -54
- agno/tools/email.py +13 -5
- agno/tools/evm.py +129 -0
- agno/tools/exa.py +324 -42
- agno/tools/fal.py +39 -35
- agno/tools/file.py +196 -30
- agno/tools/file_generation.py +356 -0
- agno/tools/financial_datasets.py +288 -0
- agno/tools/firecrawl.py +108 -33
- agno/tools/function.py +960 -122
- agno/tools/giphy.py +34 -12
- agno/tools/github.py +1294 -97
- agno/tools/gmail.py +922 -0
- agno/tools/google_bigquery.py +117 -0
- agno/tools/google_drive.py +271 -0
- agno/tools/google_maps.py +253 -0
- agno/tools/googlecalendar.py +607 -107
- agno/tools/googlesheets.py +377 -0
- agno/tools/hackernews.py +20 -12
- agno/tools/jina.py +24 -14
- agno/tools/jira.py +48 -19
- agno/tools/knowledge.py +218 -0
- agno/tools/linear.py +82 -43
- agno/tools/linkup.py +58 -0
- agno/tools/local_file_system.py +15 -7
- agno/tools/lumalab.py +41 -26
- agno/tools/mcp/__init__.py +10 -0
- agno/tools/mcp/mcp.py +331 -0
- agno/tools/mcp/multi_mcp.py +347 -0
- agno/tools/mcp/params.py +24 -0
- agno/tools/mcp_toolbox.py +284 -0
- agno/tools/mem0.py +193 -0
- agno/tools/memory.py +419 -0
- agno/tools/mlx_transcribe.py +11 -9
- agno/tools/models/azure_openai.py +190 -0
- agno/tools/models/gemini.py +203 -0
- agno/tools/models/groq.py +158 -0
- agno/tools/models/morph.py +186 -0
- agno/tools/models/nebius.py +124 -0
- agno/tools/models_labs.py +163 -82
- agno/tools/moviepy_video.py +18 -13
- agno/tools/nano_banana.py +151 -0
- agno/tools/neo4j.py +134 -0
- agno/tools/newspaper.py +15 -4
- agno/tools/newspaper4k.py +19 -6
- agno/tools/notion.py +204 -0
- agno/tools/openai.py +181 -17
- agno/tools/openbb.py +27 -20
- agno/tools/opencv.py +321 -0
- agno/tools/openweather.py +233 -0
- agno/tools/oxylabs.py +385 -0
- agno/tools/pandas.py +25 -15
- agno/tools/parallel.py +314 -0
- agno/tools/postgres.py +238 -185
- agno/tools/pubmed.py +125 -13
- agno/tools/python.py +48 -35
- agno/tools/reasoning.py +283 -0
- agno/tools/reddit.py +207 -29
- agno/tools/redshift.py +406 -0
- agno/tools/replicate.py +69 -26
- agno/tools/resend.py +11 -6
- agno/tools/scrapegraph.py +179 -19
- agno/tools/searxng.py +23 -31
- agno/tools/serpapi.py +15 -10
- agno/tools/serper.py +255 -0
- agno/tools/shell.py +23 -12
- agno/tools/shopify.py +1519 -0
- agno/tools/slack.py +56 -14
- agno/tools/sleep.py +8 -6
- agno/tools/spider.py +35 -11
- agno/tools/spotify.py +919 -0
- agno/tools/sql.py +34 -19
- agno/tools/tavily.py +158 -8
- agno/tools/telegram.py +18 -8
- agno/tools/todoist.py +218 -0
- agno/tools/toolkit.py +134 -9
- agno/tools/trafilatura.py +388 -0
- agno/tools/trello.py +25 -28
- agno/tools/twilio.py +18 -9
- agno/tools/user_control_flow.py +78 -0
- agno/tools/valyu.py +228 -0
- agno/tools/visualization.py +467 -0
- agno/tools/webbrowser.py +28 -0
- agno/tools/webex.py +76 -0
- agno/tools/website.py +23 -19
- agno/tools/webtools.py +45 -0
- agno/tools/whatsapp.py +286 -0
- agno/tools/wikipedia.py +28 -19
- agno/tools/workflow.py +285 -0
- agno/tools/{twitter.py → x.py} +142 -46
- agno/tools/yfinance.py +41 -39
- agno/tools/youtube.py +34 -17
- agno/tools/zendesk.py +15 -5
- agno/tools/zep.py +454 -0
- agno/tools/zoom.py +86 -37
- agno/tracing/__init__.py +12 -0
- agno/tracing/exporter.py +157 -0
- agno/tracing/schemas.py +276 -0
- agno/tracing/setup.py +111 -0
- agno/utils/agent.py +938 -0
- agno/utils/audio.py +37 -1
- agno/utils/certs.py +27 -0
- agno/utils/code_execution.py +11 -0
- agno/utils/common.py +103 -20
- agno/utils/cryptography.py +22 -0
- agno/utils/dttm.py +33 -0
- agno/utils/events.py +700 -0
- agno/utils/functions.py +107 -37
- agno/utils/gemini.py +426 -0
- agno/utils/hooks.py +171 -0
- agno/utils/http.py +185 -0
- agno/utils/json_schema.py +159 -37
- agno/utils/knowledge.py +36 -0
- agno/utils/location.py +19 -0
- agno/utils/log.py +221 -8
- agno/utils/mcp.py +214 -0
- agno/utils/media.py +335 -14
- agno/utils/merge_dict.py +22 -1
- agno/utils/message.py +77 -2
- agno/utils/models/ai_foundry.py +50 -0
- agno/utils/models/claude.py +373 -0
- agno/utils/models/cohere.py +94 -0
- agno/utils/models/llama.py +85 -0
- agno/utils/models/mistral.py +100 -0
- agno/utils/models/openai_responses.py +140 -0
- agno/utils/models/schema_utils.py +153 -0
- agno/utils/models/watsonx.py +41 -0
- agno/utils/openai.py +257 -0
- agno/utils/pickle.py +1 -1
- agno/utils/pprint.py +124 -8
- agno/utils/print_response/agent.py +930 -0
- agno/utils/print_response/team.py +1914 -0
- agno/utils/print_response/workflow.py +1668 -0
- agno/utils/prompts.py +111 -0
- agno/utils/reasoning.py +108 -0
- agno/utils/response.py +163 -0
- agno/utils/serialize.py +32 -0
- agno/utils/shell.py +4 -4
- agno/utils/streamlit.py +487 -0
- agno/utils/string.py +204 -51
- agno/utils/team.py +139 -0
- agno/utils/timer.py +9 -2
- agno/utils/tokens.py +657 -0
- agno/utils/tools.py +19 -1
- agno/utils/whatsapp.py +305 -0
- agno/utils/yaml_io.py +3 -3
- agno/vectordb/__init__.py +2 -0
- agno/vectordb/base.py +87 -9
- agno/vectordb/cassandra/__init__.py +5 -1
- agno/vectordb/cassandra/cassandra.py +383 -27
- agno/vectordb/chroma/__init__.py +4 -0
- agno/vectordb/chroma/chromadb.py +748 -83
- agno/vectordb/clickhouse/__init__.py +7 -1
- agno/vectordb/clickhouse/clickhousedb.py +554 -53
- agno/vectordb/couchbase/__init__.py +3 -0
- agno/vectordb/couchbase/couchbase.py +1446 -0
- agno/vectordb/lancedb/__init__.py +5 -0
- agno/vectordb/lancedb/lance_db.py +730 -98
- agno/vectordb/langchaindb/__init__.py +5 -0
- agno/vectordb/langchaindb/langchaindb.py +163 -0
- agno/vectordb/lightrag/__init__.py +5 -0
- agno/vectordb/lightrag/lightrag.py +388 -0
- agno/vectordb/llamaindex/__init__.py +3 -0
- agno/vectordb/llamaindex/llamaindexdb.py +166 -0
- agno/vectordb/milvus/__init__.py +3 -0
- agno/vectordb/milvus/milvus.py +966 -78
- agno/vectordb/mongodb/__init__.py +9 -1
- agno/vectordb/mongodb/mongodb.py +1175 -172
- agno/vectordb/pgvector/__init__.py +8 -0
- agno/vectordb/pgvector/pgvector.py +599 -115
- agno/vectordb/pineconedb/__init__.py +5 -1
- agno/vectordb/pineconedb/pineconedb.py +406 -43
- agno/vectordb/qdrant/__init__.py +4 -0
- agno/vectordb/qdrant/qdrant.py +914 -61
- agno/vectordb/redis/__init__.py +9 -0
- agno/vectordb/redis/redisdb.py +682 -0
- agno/vectordb/singlestore/__init__.py +8 -1
- agno/vectordb/singlestore/singlestore.py +771 -0
- agno/vectordb/surrealdb/__init__.py +3 -0
- agno/vectordb/surrealdb/surrealdb.py +663 -0
- agno/vectordb/upstashdb/__init__.py +5 -0
- agno/vectordb/upstashdb/upstashdb.py +718 -0
- agno/vectordb/weaviate/__init__.py +8 -0
- agno/vectordb/weaviate/index.py +15 -0
- agno/vectordb/weaviate/weaviate.py +1009 -0
- agno/workflow/__init__.py +23 -1
- agno/workflow/agent.py +299 -0
- agno/workflow/condition.py +759 -0
- agno/workflow/loop.py +756 -0
- agno/workflow/parallel.py +853 -0
- agno/workflow/router.py +723 -0
- agno/workflow/step.py +1564 -0
- agno/workflow/steps.py +613 -0
- agno/workflow/types.py +556 -0
- agno/workflow/workflow.py +4327 -514
- agno-2.3.13.dist-info/METADATA +639 -0
- agno-2.3.13.dist-info/RECORD +613 -0
- {agno-0.1.2.dist-info → agno-2.3.13.dist-info}/WHEEL +1 -1
- agno-2.3.13.dist-info/licenses/LICENSE +201 -0
- agno/api/playground.py +0 -91
- agno/api/schemas/playground.py +0 -22
- agno/api/schemas/user.py +0 -22
- agno/api/schemas/workspace.py +0 -46
- agno/api/user.py +0 -160
- agno/api/workspace.py +0 -151
- agno/cli/auth_server.py +0 -118
- agno/cli/config.py +0 -275
- agno/cli/console.py +0 -88
- agno/cli/credentials.py +0 -23
- agno/cli/entrypoint.py +0 -571
- agno/cli/operator.py +0 -355
- agno/cli/settings.py +0 -85
- agno/cli/ws/ws_cli.py +0 -817
- agno/constants.py +0 -13
- agno/document/__init__.py +0 -1
- agno/document/chunking/semantic.py +0 -47
- agno/document/chunking/strategy.py +0 -31
- agno/document/reader/__init__.py +0 -1
- agno/document/reader/arxiv_reader.py +0 -41
- agno/document/reader/base.py +0 -22
- agno/document/reader/csv_reader.py +0 -84
- agno/document/reader/docx_reader.py +0 -46
- agno/document/reader/firecrawl_reader.py +0 -99
- agno/document/reader/json_reader.py +0 -43
- agno/document/reader/pdf_reader.py +0 -219
- agno/document/reader/s3/pdf_reader.py +0 -46
- agno/document/reader/s3/text_reader.py +0 -51
- agno/document/reader/text_reader.py +0 -41
- agno/document/reader/website_reader.py +0 -175
- agno/document/reader/youtube_reader.py +0 -50
- agno/embedder/__init__.py +0 -1
- agno/embedder/azure_openai.py +0 -86
- agno/embedder/cohere.py +0 -72
- agno/embedder/fastembed.py +0 -37
- agno/embedder/google.py +0 -73
- agno/embedder/huggingface.py +0 -54
- agno/embedder/mistral.py +0 -80
- agno/embedder/ollama.py +0 -57
- agno/embedder/openai.py +0 -74
- agno/embedder/sentence_transformer.py +0 -38
- agno/embedder/voyageai.py +0 -64
- agno/eval/perf.py +0 -201
- agno/file/__init__.py +0 -1
- agno/file/file.py +0 -16
- agno/file/local/csv.py +0 -32
- agno/file/local/txt.py +0 -19
- agno/infra/app.py +0 -240
- agno/infra/base.py +0 -144
- agno/infra/context.py +0 -20
- agno/infra/db_app.py +0 -52
- agno/infra/resource.py +0 -205
- agno/infra/resources.py +0 -55
- agno/knowledge/agent.py +0 -230
- agno/knowledge/arxiv.py +0 -22
- agno/knowledge/combined.py +0 -22
- agno/knowledge/csv.py +0 -28
- agno/knowledge/csv_url.py +0 -19
- agno/knowledge/document.py +0 -20
- agno/knowledge/docx.py +0 -30
- agno/knowledge/json.py +0 -28
- agno/knowledge/langchain.py +0 -71
- agno/knowledge/llamaindex.py +0 -66
- agno/knowledge/pdf.py +0 -28
- agno/knowledge/pdf_url.py +0 -26
- agno/knowledge/s3/base.py +0 -60
- agno/knowledge/s3/pdf.py +0 -21
- agno/knowledge/s3/text.py +0 -23
- agno/knowledge/text.py +0 -30
- agno/knowledge/website.py +0 -88
- agno/knowledge/wikipedia.py +0 -31
- agno/knowledge/youtube.py +0 -22
- agno/memory/agent.py +0 -392
- agno/memory/classifier.py +0 -104
- agno/memory/db/__init__.py +0 -1
- agno/memory/db/base.py +0 -42
- agno/memory/db/mongodb.py +0 -189
- agno/memory/db/postgres.py +0 -203
- agno/memory/db/sqlite.py +0 -193
- agno/memory/memory.py +0 -15
- agno/memory/row.py +0 -36
- agno/memory/summarizer.py +0 -192
- agno/memory/summary.py +0 -19
- agno/memory/workflow.py +0 -38
- agno/models/google/gemini_openai.py +0 -26
- agno/models/ollama/hermes.py +0 -221
- agno/models/ollama/tools.py +0 -362
- agno/models/vertexai/gemini.py +0 -595
- agno/playground/__init__.py +0 -3
- agno/playground/async_router.py +0 -421
- agno/playground/deploy.py +0 -249
- agno/playground/operator.py +0 -92
- agno/playground/playground.py +0 -91
- agno/playground/schemas.py +0 -76
- agno/playground/serve.py +0 -55
- agno/playground/sync_router.py +0 -405
- agno/reasoning/agent.py +0 -68
- agno/run/response.py +0 -112
- agno/storage/agent/__init__.py +0 -0
- agno/storage/agent/base.py +0 -38
- agno/storage/agent/dynamodb.py +0 -350
- agno/storage/agent/json.py +0 -92
- agno/storage/agent/mongodb.py +0 -228
- agno/storage/agent/postgres.py +0 -367
- agno/storage/agent/session.py +0 -79
- agno/storage/agent/singlestore.py +0 -303
- agno/storage/agent/sqlite.py +0 -357
- agno/storage/agent/yaml.py +0 -93
- agno/storage/workflow/__init__.py +0 -0
- agno/storage/workflow/base.py +0 -40
- agno/storage/workflow/mongodb.py +0 -233
- agno/storage/workflow/postgres.py +0 -366
- agno/storage/workflow/session.py +0 -60
- agno/storage/workflow/sqlite.py +0 -359
- agno/tools/googlesearch.py +0 -88
- agno/utils/defaults.py +0 -57
- agno/utils/filesystem.py +0 -39
- agno/utils/git.py +0 -52
- agno/utils/json_io.py +0 -30
- agno/utils/load_env.py +0 -19
- agno/utils/py_io.py +0 -19
- agno/utils/pyproject.py +0 -18
- agno/utils/resource_filter.py +0 -31
- agno/vectordb/singlestore/s2vectordb.py +0 -390
- agno/vectordb/singlestore/s2vectordb2.py +0 -355
- agno/workspace/__init__.py +0 -0
- agno/workspace/config.py +0 -325
- agno/workspace/enums.py +0 -6
- agno/workspace/helpers.py +0 -48
- agno/workspace/operator.py +0 -758
- agno/workspace/settings.py +0 -63
- agno-0.1.2.dist-info/LICENSE +0 -375
- agno-0.1.2.dist-info/METADATA +0 -502
- agno-0.1.2.dist-info/RECORD +0 -352
- agno-0.1.2.dist-info/entry_points.txt +0 -3
- /agno/{cli → db/migrations}/__init__.py +0 -0
- /agno/{cli/ws → db/migrations/versions}/__init__.py +0 -0
- /agno/{document/chunking/__init__.py → db/schemas/metrics.py} +0 -0
- /agno/{document/reader/s3 → integrations}/__init__.py +0 -0
- /agno/{file/local → knowledge/chunking}/__init__.py +0 -0
- /agno/{infra → knowledge/remote_content}/__init__.py +0 -0
- /agno/{knowledge/s3 → tools/models}/__init__.py +0 -0
- /agno/{reranker → utils/models}/__init__.py +0 -0
- /agno/{storage → utils/print_response}/__init__.py +0 -0
- {agno-0.1.2.dist-info → agno-2.3.13.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,1009 @@
|
|
|
1
|
+
import asyncio
|
|
2
|
+
import json
|
|
3
|
+
import uuid
|
|
4
|
+
from hashlib import md5
|
|
5
|
+
from os import getenv
|
|
6
|
+
from typing import Any, Dict, List, Optional, Tuple, Union
|
|
7
|
+
|
|
8
|
+
try:
|
|
9
|
+
from warnings import filterwarnings
|
|
10
|
+
|
|
11
|
+
import weaviate
|
|
12
|
+
from weaviate import WeaviateAsyncClient
|
|
13
|
+
from weaviate.classes.config import Configure, DataType, Property, Tokenization, VectorDistances
|
|
14
|
+
from weaviate.classes.init import Auth
|
|
15
|
+
from weaviate.classes.query import Filter
|
|
16
|
+
|
|
17
|
+
filterwarnings("ignore", category=ResourceWarning)
|
|
18
|
+
except ImportError:
|
|
19
|
+
raise ImportError("Weaviate is not installed. Install using 'pip install weaviate-client'.")
|
|
20
|
+
|
|
21
|
+
from agno.filters import FilterExpr
|
|
22
|
+
from agno.knowledge.document import Document
|
|
23
|
+
from agno.knowledge.embedder import Embedder
|
|
24
|
+
from agno.knowledge.reranker.base import Reranker
|
|
25
|
+
from agno.utils.log import log_debug, log_info, log_warning, logger
|
|
26
|
+
from agno.vectordb.base import VectorDb
|
|
27
|
+
from agno.vectordb.search import SearchType
|
|
28
|
+
from agno.vectordb.weaviate.index import Distance, VectorIndex
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
class Weaviate(VectorDb):
|
|
32
|
+
"""
|
|
33
|
+
Weaviate class for managing vector operations with Weaviate vector database (v4 client).
|
|
34
|
+
"""
|
|
35
|
+
|
|
36
|
+
def __init__(
|
|
37
|
+
self,
|
|
38
|
+
# Connection/Client params
|
|
39
|
+
wcd_url: Optional[str] = None,
|
|
40
|
+
wcd_api_key: Optional[str] = None,
|
|
41
|
+
client: Optional[weaviate.WeaviateClient] = None,
|
|
42
|
+
local: bool = False,
|
|
43
|
+
# Collection params
|
|
44
|
+
collection: str = "default",
|
|
45
|
+
name: Optional[str] = None,
|
|
46
|
+
description: Optional[str] = None,
|
|
47
|
+
id: Optional[str] = None,
|
|
48
|
+
vector_index: VectorIndex = VectorIndex.HNSW,
|
|
49
|
+
distance: Distance = Distance.COSINE,
|
|
50
|
+
# Search/Embedding params
|
|
51
|
+
embedder: Optional[Embedder] = None,
|
|
52
|
+
search_type: SearchType = SearchType.vector,
|
|
53
|
+
reranker: Optional[Reranker] = None,
|
|
54
|
+
hybrid_search_alpha: float = 0.5,
|
|
55
|
+
):
|
|
56
|
+
# Dynamic ID generation based on unique identifiers
|
|
57
|
+
if id is None:
|
|
58
|
+
from agno.utils.string import generate_id
|
|
59
|
+
|
|
60
|
+
connection_identifier = wcd_url or "local" if local else "default"
|
|
61
|
+
seed = f"{connection_identifier}#{collection}"
|
|
62
|
+
id = generate_id(seed)
|
|
63
|
+
|
|
64
|
+
# Initialize base class with name, description, and generated ID
|
|
65
|
+
super().__init__(id=id, name=name, description=description)
|
|
66
|
+
|
|
67
|
+
# Connection setup
|
|
68
|
+
self.wcd_url = wcd_url or getenv("WCD_URL")
|
|
69
|
+
self.wcd_api_key = wcd_api_key or getenv("WCD_API_KEY")
|
|
70
|
+
self.local = local
|
|
71
|
+
self.client = client
|
|
72
|
+
self.async_client = None
|
|
73
|
+
|
|
74
|
+
# Collection setup
|
|
75
|
+
self.collection = collection
|
|
76
|
+
self.vector_index = vector_index
|
|
77
|
+
self.distance = distance
|
|
78
|
+
|
|
79
|
+
# Embedder setup
|
|
80
|
+
if embedder is None:
|
|
81
|
+
from agno.knowledge.embedder.openai import OpenAIEmbedder
|
|
82
|
+
|
|
83
|
+
embedder = OpenAIEmbedder()
|
|
84
|
+
log_info("Embedder not provided, using OpenAIEmbedder as default.")
|
|
85
|
+
self.embedder: Embedder = embedder
|
|
86
|
+
|
|
87
|
+
# Search setup
|
|
88
|
+
self.search_type: SearchType = search_type
|
|
89
|
+
self.reranker: Optional[Reranker] = reranker
|
|
90
|
+
self.hybrid_search_alpha = hybrid_search_alpha
|
|
91
|
+
|
|
92
|
+
@staticmethod
|
|
93
|
+
def _get_doc_uuid(document: Document) -> Tuple[uuid.UUID, str]:
|
|
94
|
+
cleaned_content = document.content.replace("\x00", "\ufffd")
|
|
95
|
+
content_hash = md5(cleaned_content.encode()).hexdigest()
|
|
96
|
+
doc_uuid = uuid.UUID(hex=content_hash[:32])
|
|
97
|
+
return doc_uuid, cleaned_content
|
|
98
|
+
|
|
99
|
+
def get_client(self) -> weaviate.WeaviateClient:
|
|
100
|
+
"""Initialize and return a Weaviate client instance.
|
|
101
|
+
|
|
102
|
+
Attempts to create a client using WCD (Weaviate Cloud Deployment) credentials if provided,
|
|
103
|
+
otherwise falls back to local connection. Maintains a singleton pattern by reusing
|
|
104
|
+
an existing client if already initialized.
|
|
105
|
+
|
|
106
|
+
Returns:
|
|
107
|
+
weaviate.WeaviateClient: An initialized Weaviate client instance.
|
|
108
|
+
"""
|
|
109
|
+
if self.client is None:
|
|
110
|
+
if self.wcd_url and self.wcd_api_key and not self.local:
|
|
111
|
+
log_info("Initializing Weaviate Cloud client")
|
|
112
|
+
self.client = weaviate.connect_to_weaviate_cloud(
|
|
113
|
+
cluster_url=self.wcd_url, auth_credentials=Auth.api_key(self.wcd_api_key)
|
|
114
|
+
)
|
|
115
|
+
else:
|
|
116
|
+
log_info("Initializing local Weaviate client")
|
|
117
|
+
self.client = weaviate.connect_to_local()
|
|
118
|
+
|
|
119
|
+
if not self.client.is_connected(): # type: ignore
|
|
120
|
+
self.client.connect() # type: ignore
|
|
121
|
+
|
|
122
|
+
if not self.client.is_ready(): # type: ignore
|
|
123
|
+
raise Exception("Weaviate client is not ready")
|
|
124
|
+
|
|
125
|
+
return self.client
|
|
126
|
+
|
|
127
|
+
async def get_async_client(self) -> WeaviateAsyncClient:
|
|
128
|
+
"""Get or create the async client."""
|
|
129
|
+
if self.async_client is None:
|
|
130
|
+
if self.wcd_url and self.wcd_api_key and not self.local:
|
|
131
|
+
log_info("Initializing Weaviate Cloud async client")
|
|
132
|
+
self.async_client = weaviate.use_async_with_weaviate_cloud(
|
|
133
|
+
cluster_url=self.wcd_url,
|
|
134
|
+
auth_credentials=Auth.api_key(self.wcd_api_key), # type: ignore
|
|
135
|
+
)
|
|
136
|
+
else:
|
|
137
|
+
log_info("Initializing local Weaviate async client")
|
|
138
|
+
self.async_client = weaviate.use_async_with_local() # type: ignore
|
|
139
|
+
|
|
140
|
+
if not self.async_client.is_connected(): # type: ignore
|
|
141
|
+
await self.async_client.connect() # type: ignore
|
|
142
|
+
|
|
143
|
+
if not await self.async_client.is_ready(): # type: ignore
|
|
144
|
+
raise ConnectionError("Weaviate async client is not ready")
|
|
145
|
+
|
|
146
|
+
return self.async_client # type: ignore
|
|
147
|
+
|
|
148
|
+
def create(self) -> None:
|
|
149
|
+
"""Create the collection in Weaviate if it doesn't exist."""
|
|
150
|
+
if not self.exists():
|
|
151
|
+
log_debug(f"Creating collection '{self.collection}' in Weaviate.")
|
|
152
|
+
self.get_client().collections.create(
|
|
153
|
+
name=self.collection,
|
|
154
|
+
properties=[
|
|
155
|
+
Property(name="name", data_type=DataType.TEXT),
|
|
156
|
+
Property(name="content", data_type=DataType.TEXT, tokenization=Tokenization.LOWERCASE),
|
|
157
|
+
Property(name="meta_data", data_type=DataType.TEXT),
|
|
158
|
+
Property(name="content_id", data_type=DataType.TEXT),
|
|
159
|
+
Property(name="content_hash", data_type=DataType.TEXT),
|
|
160
|
+
],
|
|
161
|
+
vectorizer_config=Configure.Vectorizer.none(),
|
|
162
|
+
vector_index_config=self.get_vector_index_config(self.vector_index, self.distance),
|
|
163
|
+
)
|
|
164
|
+
log_debug(f"Collection '{self.collection}' created in Weaviate.")
|
|
165
|
+
|
|
166
|
+
async def async_create(self) -> None:
|
|
167
|
+
client = await self.get_async_client()
|
|
168
|
+
try:
|
|
169
|
+
await client.collections.create(
|
|
170
|
+
name=self.collection,
|
|
171
|
+
properties=[
|
|
172
|
+
Property(name="name", data_type=DataType.TEXT),
|
|
173
|
+
Property(name="content", data_type=DataType.TEXT, tokenization=Tokenization.LOWERCASE),
|
|
174
|
+
Property(name="meta_data", data_type=DataType.TEXT),
|
|
175
|
+
Property(name="content_id", data_type=DataType.TEXT),
|
|
176
|
+
Property(name="content_hash", data_type=DataType.TEXT),
|
|
177
|
+
],
|
|
178
|
+
vectorizer_config=Configure.Vectorizer.none(),
|
|
179
|
+
vector_index_config=self.get_vector_index_config(self.vector_index, self.distance),
|
|
180
|
+
)
|
|
181
|
+
log_debug(f"Collection '{self.collection}' created in Weaviate asynchronously.")
|
|
182
|
+
finally:
|
|
183
|
+
await client.close()
|
|
184
|
+
|
|
185
|
+
def content_hash_exists(self, content_hash: str) -> bool:
|
|
186
|
+
"""Check if a document with the given content hash exists in the collection."""
|
|
187
|
+
collection = self.get_client().collections.get(self.collection)
|
|
188
|
+
result = collection.query.fetch_objects(
|
|
189
|
+
limit=1,
|
|
190
|
+
filters=Filter.by_property("content_hash").equal(content_hash),
|
|
191
|
+
)
|
|
192
|
+
return len(result.objects) > 0
|
|
193
|
+
|
|
194
|
+
def name_exists(self, name: str) -> bool:
|
|
195
|
+
"""
|
|
196
|
+
Validate if a document with the given name exists in Weaviate.
|
|
197
|
+
|
|
198
|
+
Args:
|
|
199
|
+
name (str): The name of the document to check.
|
|
200
|
+
|
|
201
|
+
Returns:
|
|
202
|
+
bool: True if a document with the given name exists, False otherwise.
|
|
203
|
+
"""
|
|
204
|
+
collection = self.get_client().collections.get(self.collection)
|
|
205
|
+
result = collection.query.fetch_objects(
|
|
206
|
+
limit=1,
|
|
207
|
+
filters=Filter.by_property("name").equal(name),
|
|
208
|
+
)
|
|
209
|
+
return len(result.objects) > 0
|
|
210
|
+
|
|
211
|
+
async def async_name_exists(self, name: str) -> bool:
|
|
212
|
+
"""
|
|
213
|
+
Asynchronously validate if a document with the given name exists in Weaviate.
|
|
214
|
+
|
|
215
|
+
Args:
|
|
216
|
+
name (str): The name of the document to check.
|
|
217
|
+
|
|
218
|
+
Returns:
|
|
219
|
+
bool: True if a document with the given name exists, False otherwise.
|
|
220
|
+
"""
|
|
221
|
+
client = await self.get_async_client()
|
|
222
|
+
try:
|
|
223
|
+
collection = client.collections.get(self.collection)
|
|
224
|
+
result = await collection.query.fetch_objects(
|
|
225
|
+
limit=1,
|
|
226
|
+
filters=Filter.by_property("name").equal(name),
|
|
227
|
+
)
|
|
228
|
+
return len(result.objects) > 0
|
|
229
|
+
finally:
|
|
230
|
+
await client.close()
|
|
231
|
+
|
|
232
|
+
def insert(self, content_hash: str, documents: List[Document], filters: Optional[Dict[str, Any]] = None) -> None:
|
|
233
|
+
"""
|
|
234
|
+
Insert documents into Weaviate.
|
|
235
|
+
|
|
236
|
+
Args:
|
|
237
|
+
documents (List[Document]): List of documents to insert
|
|
238
|
+
filters (Optional[Dict[str, Any]]): Filters to apply while inserting documents
|
|
239
|
+
"""
|
|
240
|
+
log_debug(f"Inserting {len(documents)} documents into Weaviate.")
|
|
241
|
+
collection = self.get_client().collections.get(self.collection)
|
|
242
|
+
|
|
243
|
+
for document in documents:
|
|
244
|
+
document.embed(embedder=self.embedder)
|
|
245
|
+
if document.embedding is None:
|
|
246
|
+
logger.error(f"Document embedding is None: {document.name}")
|
|
247
|
+
continue
|
|
248
|
+
|
|
249
|
+
cleaned_content = document.content.replace("\x00", "\ufffd")
|
|
250
|
+
# Include content_hash in ID to ensure uniqueness across different content hashes
|
|
251
|
+
base_id = document.id or md5(cleaned_content.encode()).hexdigest()
|
|
252
|
+
record_id = md5(f"{base_id}_{content_hash}".encode()).hexdigest()
|
|
253
|
+
doc_uuid = uuid.UUID(hex=record_id[:32])
|
|
254
|
+
|
|
255
|
+
# Merge filters with metadata
|
|
256
|
+
meta_data = document.meta_data or {}
|
|
257
|
+
if filters:
|
|
258
|
+
meta_data.update(filters)
|
|
259
|
+
|
|
260
|
+
# Serialize meta_data to JSON string
|
|
261
|
+
meta_data_str = json.dumps(meta_data) if meta_data else None
|
|
262
|
+
|
|
263
|
+
collection.data.insert(
|
|
264
|
+
properties={
|
|
265
|
+
"name": document.name,
|
|
266
|
+
"content": cleaned_content,
|
|
267
|
+
"meta_data": meta_data_str,
|
|
268
|
+
"content_id": document.content_id,
|
|
269
|
+
"content_hash": content_hash,
|
|
270
|
+
},
|
|
271
|
+
vector=document.embedding,
|
|
272
|
+
uuid=doc_uuid,
|
|
273
|
+
)
|
|
274
|
+
log_debug(f"Inserted document: {document.name} ({meta_data})")
|
|
275
|
+
|
|
276
|
+
async def async_insert(
|
|
277
|
+
self, content_hash: str, documents: List[Document], filters: Optional[Dict[str, Any]] = None
|
|
278
|
+
) -> None:
|
|
279
|
+
"""
|
|
280
|
+
Insert documents into Weaviate asynchronously.
|
|
281
|
+
|
|
282
|
+
Args:
|
|
283
|
+
documents (List[Document]): List of documents to insert
|
|
284
|
+
filters (Optional[Dict[str, Any]]): Filters to apply while inserting documents
|
|
285
|
+
"""
|
|
286
|
+
log_debug(f"Inserting {len(documents)} documents into Weaviate asynchronously.")
|
|
287
|
+
if not documents:
|
|
288
|
+
return
|
|
289
|
+
|
|
290
|
+
# Apply batch embedding logic
|
|
291
|
+
if self.embedder.enable_batch and hasattr(self.embedder, "async_get_embeddings_batch_and_usage"):
|
|
292
|
+
# Use batch embedding when enabled and supported
|
|
293
|
+
try:
|
|
294
|
+
# Extract content from all documents
|
|
295
|
+
doc_contents = [doc.content for doc in documents]
|
|
296
|
+
|
|
297
|
+
# Get batch embeddings and usage
|
|
298
|
+
embeddings, usages = await self.embedder.async_get_embeddings_batch_and_usage(doc_contents)
|
|
299
|
+
|
|
300
|
+
# Process documents with pre-computed embeddings
|
|
301
|
+
for j, doc in enumerate(documents):
|
|
302
|
+
try:
|
|
303
|
+
if j < len(embeddings):
|
|
304
|
+
doc.embedding = embeddings[j]
|
|
305
|
+
doc.usage = usages[j] if j < len(usages) else None
|
|
306
|
+
except Exception as e:
|
|
307
|
+
logger.error(f"Error assigning batch embedding to document '{doc.name}': {e}")
|
|
308
|
+
|
|
309
|
+
except Exception as e:
|
|
310
|
+
# Check if this is a rate limit error - don't fall back as it would make things worse
|
|
311
|
+
error_str = str(e).lower()
|
|
312
|
+
is_rate_limit = any(
|
|
313
|
+
phrase in error_str
|
|
314
|
+
for phrase in ["rate limit", "too many requests", "429", "trial key", "api calls / minute"]
|
|
315
|
+
)
|
|
316
|
+
|
|
317
|
+
if is_rate_limit:
|
|
318
|
+
logger.error(f"Rate limit detected during batch embedding. {e}")
|
|
319
|
+
raise e
|
|
320
|
+
else:
|
|
321
|
+
logger.warning(f"Async batch embedding failed, falling back to individual embeddings: {e}")
|
|
322
|
+
# Fall back to individual embedding
|
|
323
|
+
embed_tasks = [doc.async_embed(embedder=self.embedder) for doc in documents]
|
|
324
|
+
await asyncio.gather(*embed_tasks, return_exceptions=True)
|
|
325
|
+
else:
|
|
326
|
+
# Use individual embedding
|
|
327
|
+
embed_tasks = [document.async_embed(embedder=self.embedder) for document in documents]
|
|
328
|
+
await asyncio.gather(*embed_tasks, return_exceptions=True)
|
|
329
|
+
|
|
330
|
+
client = await self.get_async_client()
|
|
331
|
+
try:
|
|
332
|
+
collection = client.collections.get(self.collection)
|
|
333
|
+
|
|
334
|
+
# Process documents first
|
|
335
|
+
for document in documents:
|
|
336
|
+
try:
|
|
337
|
+
if document.embedding is None:
|
|
338
|
+
logger.error(f"Document embedding is None: {document.name}")
|
|
339
|
+
continue
|
|
340
|
+
|
|
341
|
+
# Clean content and generate UUID
|
|
342
|
+
cleaned_content = document.content.replace("\x00", "\ufffd")
|
|
343
|
+
# Include content_hash in ID to ensure uniqueness across different content hashes
|
|
344
|
+
base_id = document.id or md5(cleaned_content.encode()).hexdigest()
|
|
345
|
+
record_id = md5(f"{base_id}_{content_hash}".encode()).hexdigest()
|
|
346
|
+
doc_uuid = uuid.UUID(hex=record_id[:32])
|
|
347
|
+
|
|
348
|
+
# Serialize meta_data to JSON string
|
|
349
|
+
meta_data_str = json.dumps(document.meta_data) if document.meta_data else None
|
|
350
|
+
|
|
351
|
+
# Insert properties and vector separately
|
|
352
|
+
properties = {
|
|
353
|
+
"name": document.name,
|
|
354
|
+
"content": cleaned_content,
|
|
355
|
+
"meta_data": meta_data_str,
|
|
356
|
+
"content_id": document.content_id,
|
|
357
|
+
"content_hash": content_hash,
|
|
358
|
+
}
|
|
359
|
+
|
|
360
|
+
# Use the API correctly - properties, vector and uuid are separate parameters
|
|
361
|
+
await collection.data.insert(properties=properties, vector=document.embedding, uuid=doc_uuid)
|
|
362
|
+
|
|
363
|
+
log_debug(f"Inserted document asynchronously: {document.name}")
|
|
364
|
+
|
|
365
|
+
except Exception as e:
|
|
366
|
+
logger.error(f"Error inserting document {document.name}: {str(e)}")
|
|
367
|
+
finally:
|
|
368
|
+
await client.close()
|
|
369
|
+
|
|
370
|
+
def upsert(self, content_hash: str, documents: List[Document], filters: Optional[Dict[str, Any]] = None) -> None:
|
|
371
|
+
"""
|
|
372
|
+
Upsert documents into Weaviate.
|
|
373
|
+
|
|
374
|
+
Args:
|
|
375
|
+
documents (List[Document]): List of documents to upsert
|
|
376
|
+
filters (Optional[Dict[str, Any]]): Filters to apply while upserting
|
|
377
|
+
"""
|
|
378
|
+
log_debug(f"Upserting {len(documents)} documents into Weaviate.")
|
|
379
|
+
if self.content_hash_exists(content_hash):
|
|
380
|
+
self._delete_by_content_hash(content_hash)
|
|
381
|
+
self.insert(content_hash=content_hash, documents=documents, filters=filters)
|
|
382
|
+
|
|
383
|
+
async def async_upsert(
|
|
384
|
+
self, content_hash: str, documents: List[Document], filters: Optional[Dict[str, Any]] = None
|
|
385
|
+
) -> None:
|
|
386
|
+
"""
|
|
387
|
+
Upsert documents into Weaviate asynchronously.
|
|
388
|
+
When documents with the same ID already exist, they will be replaced.
|
|
389
|
+
Otherwise, new documents will be created.
|
|
390
|
+
|
|
391
|
+
Args:
|
|
392
|
+
documents (List[Document]): List of documents to upsert
|
|
393
|
+
filters (Optional[Dict[str, Any]]): Filters to apply while upserting
|
|
394
|
+
"""
|
|
395
|
+
if self.content_hash_exists(content_hash):
|
|
396
|
+
self._delete_by_content_hash(content_hash)
|
|
397
|
+
await self.async_insert(content_hash=content_hash, documents=documents, filters=filters)
|
|
398
|
+
return
|
|
399
|
+
|
|
400
|
+
def search(
|
|
401
|
+
self, query: str, limit: int = 5, filters: Optional[Union[Dict[str, Any], List[FilterExpr]]] = None
|
|
402
|
+
) -> List[Document]:
|
|
403
|
+
"""
|
|
404
|
+
Perform a search based on the configured search type.
|
|
405
|
+
|
|
406
|
+
Args:
|
|
407
|
+
query (str): The search query.
|
|
408
|
+
limit (int): Maximum number of results to return.
|
|
409
|
+
filters (Optional[Dict[str, Any]]): Filters to apply to the search.
|
|
410
|
+
|
|
411
|
+
Returns:
|
|
412
|
+
List[Document]: List of matching documents.
|
|
413
|
+
"""
|
|
414
|
+
if isinstance(filters, List):
|
|
415
|
+
log_warning("Filters Expressions are not supported in Weaviate. No filters will be applied.")
|
|
416
|
+
filters = None
|
|
417
|
+
if self.search_type == SearchType.vector:
|
|
418
|
+
return self.vector_search(query, limit, filters)
|
|
419
|
+
elif self.search_type == SearchType.keyword:
|
|
420
|
+
return self.keyword_search(query, limit, filters)
|
|
421
|
+
elif self.search_type == SearchType.hybrid:
|
|
422
|
+
return self.hybrid_search(query, limit, filters)
|
|
423
|
+
else:
|
|
424
|
+
logger.error(f"Invalid search type '{self.search_type}'.")
|
|
425
|
+
return []
|
|
426
|
+
|
|
427
|
+
async def async_search(
|
|
428
|
+
self, query: str, limit: int = 5, filters: Optional[Union[Dict[str, Any], List[FilterExpr]]] = None
|
|
429
|
+
) -> List[Document]:
|
|
430
|
+
"""
|
|
431
|
+
Perform a search based on the configured search type asynchronously.
|
|
432
|
+
|
|
433
|
+
Args:
|
|
434
|
+
query (str): The search query.
|
|
435
|
+
limit (int): Maximum number of results to return.
|
|
436
|
+
filters (Optional[Dict[str, Any]]): Filters to apply to the search.
|
|
437
|
+
|
|
438
|
+
Returns:
|
|
439
|
+
List[Document]: List of matching documents.
|
|
440
|
+
"""
|
|
441
|
+
if isinstance(filters, List):
|
|
442
|
+
log_warning("Filters Expressions are not supported in Weaviate. No filters will be applied.")
|
|
443
|
+
filters = None
|
|
444
|
+
if self.search_type == SearchType.vector:
|
|
445
|
+
return await self.async_vector_search(query, limit, filters)
|
|
446
|
+
elif self.search_type == SearchType.keyword:
|
|
447
|
+
return await self.async_keyword_search(query, limit, filters)
|
|
448
|
+
elif self.search_type == SearchType.hybrid:
|
|
449
|
+
return await self.async_hybrid_search(query, limit, filters)
|
|
450
|
+
else:
|
|
451
|
+
logger.error(f"Invalid search type '{self.search_type}'.")
|
|
452
|
+
return []
|
|
453
|
+
|
|
454
|
+
def vector_search(
|
|
455
|
+
self, query: str, limit: int = 5, filters: Optional[Union[Dict[str, Any], List[FilterExpr]]] = None
|
|
456
|
+
) -> List[Document]:
|
|
457
|
+
try:
|
|
458
|
+
query_embedding = self.embedder.get_embedding(query)
|
|
459
|
+
if query_embedding is None:
|
|
460
|
+
logger.error(f"Error getting embedding for query: {query}")
|
|
461
|
+
return []
|
|
462
|
+
|
|
463
|
+
collection = self.get_client().collections.get(self.collection)
|
|
464
|
+
filter_expr = self._build_filter_expression(filters)
|
|
465
|
+
|
|
466
|
+
response = collection.query.near_vector(
|
|
467
|
+
near_vector=query_embedding,
|
|
468
|
+
limit=limit,
|
|
469
|
+
return_properties=["name", "content", "meta_data", "content_id"],
|
|
470
|
+
include_vector=True,
|
|
471
|
+
filters=filter_expr,
|
|
472
|
+
)
|
|
473
|
+
|
|
474
|
+
search_results: List[Document] = self.get_search_results(response)
|
|
475
|
+
|
|
476
|
+
if self.reranker:
|
|
477
|
+
search_results = self.reranker.rerank(query=query, documents=search_results)
|
|
478
|
+
|
|
479
|
+
log_info(f"Found {len(search_results)} documents")
|
|
480
|
+
|
|
481
|
+
return search_results
|
|
482
|
+
|
|
483
|
+
except Exception as e:
|
|
484
|
+
logger.error(f"Error searching for documents: {e}")
|
|
485
|
+
return []
|
|
486
|
+
|
|
487
|
+
finally:
|
|
488
|
+
self.get_client().close()
|
|
489
|
+
|
|
490
|
+
async def async_vector_search(
|
|
491
|
+
self, query: str, limit: int = 5, filters: Optional[Union[Dict[str, Any], List[FilterExpr]]] = None
|
|
492
|
+
) -> List[Document]:
|
|
493
|
+
"""
|
|
494
|
+
Perform a vector search in Weaviate asynchronously.
|
|
495
|
+
|
|
496
|
+
Args:
|
|
497
|
+
query (str): The search query.
|
|
498
|
+
limit (int): Maximum number of results to return.
|
|
499
|
+
|
|
500
|
+
Returns:
|
|
501
|
+
List[Document]: List of matching documents.
|
|
502
|
+
"""
|
|
503
|
+
query_embedding = self.embedder.get_embedding(query)
|
|
504
|
+
if query_embedding is None:
|
|
505
|
+
logger.error(f"Error getting embedding for query: {query}")
|
|
506
|
+
return []
|
|
507
|
+
|
|
508
|
+
search_results = []
|
|
509
|
+
client = await self.get_async_client()
|
|
510
|
+
try:
|
|
511
|
+
collection = client.collections.get(self.collection)
|
|
512
|
+
filter_expr = self._build_filter_expression(filters)
|
|
513
|
+
|
|
514
|
+
response = await collection.query.near_vector(
|
|
515
|
+
near_vector=query_embedding,
|
|
516
|
+
limit=limit,
|
|
517
|
+
return_properties=["name", "content", "meta_data", "content_id"],
|
|
518
|
+
include_vector=True,
|
|
519
|
+
filters=filter_expr,
|
|
520
|
+
)
|
|
521
|
+
|
|
522
|
+
search_results = self.get_search_results(response)
|
|
523
|
+
|
|
524
|
+
if self.reranker:
|
|
525
|
+
search_results = self.reranker.rerank(query=query, documents=search_results)
|
|
526
|
+
|
|
527
|
+
log_info(f"Found {len(search_results)} documents")
|
|
528
|
+
|
|
529
|
+
await client.close()
|
|
530
|
+
return search_results
|
|
531
|
+
|
|
532
|
+
except Exception as e:
|
|
533
|
+
logger.error(f"Error searching for documents: {e}")
|
|
534
|
+
return []
|
|
535
|
+
|
|
536
|
+
def keyword_search(
|
|
537
|
+
self, query: str, limit: int = 5, filters: Optional[Union[Dict[str, Any], List[FilterExpr]]] = None
|
|
538
|
+
) -> List[Document]:
|
|
539
|
+
try:
|
|
540
|
+
collection = self.get_client().collections.get(self.collection)
|
|
541
|
+
filter_expr = self._build_filter_expression(filters)
|
|
542
|
+
|
|
543
|
+
response = collection.query.bm25(
|
|
544
|
+
query=query,
|
|
545
|
+
query_properties=["content"],
|
|
546
|
+
limit=limit,
|
|
547
|
+
return_properties=["name", "content", "meta_data", "content_id"],
|
|
548
|
+
include_vector=True,
|
|
549
|
+
filters=filter_expr,
|
|
550
|
+
)
|
|
551
|
+
|
|
552
|
+
search_results: List[Document] = self.get_search_results(response)
|
|
553
|
+
|
|
554
|
+
if self.reranker:
|
|
555
|
+
search_results = self.reranker.rerank(query=query, documents=search_results)
|
|
556
|
+
|
|
557
|
+
log_info(f"Found {len(search_results)} documents")
|
|
558
|
+
|
|
559
|
+
return search_results
|
|
560
|
+
|
|
561
|
+
except Exception as e:
|
|
562
|
+
logger.error(f"Error searching for documents: {e}")
|
|
563
|
+
return []
|
|
564
|
+
|
|
565
|
+
finally:
|
|
566
|
+
self.get_client().close()
|
|
567
|
+
|
|
568
|
+
async def async_keyword_search(
|
|
569
|
+
self, query: str, limit: int = 5, filters: Optional[Union[Dict[str, Any], List[FilterExpr]]] = None
|
|
570
|
+
) -> List[Document]:
|
|
571
|
+
"""
|
|
572
|
+
Perform a keyword search in Weaviate asynchronously.
|
|
573
|
+
|
|
574
|
+
Args:
|
|
575
|
+
query (str): The search query.
|
|
576
|
+
limit (int): Maximum number of results to return.
|
|
577
|
+
|
|
578
|
+
Returns:
|
|
579
|
+
List[Document]: List of matching documents.
|
|
580
|
+
"""
|
|
581
|
+
search_results = []
|
|
582
|
+
client = await self.get_async_client()
|
|
583
|
+
try:
|
|
584
|
+
collection = client.collections.get(self.collection)
|
|
585
|
+
|
|
586
|
+
filter_expr = self._build_filter_expression(filters)
|
|
587
|
+
response = await collection.query.bm25(
|
|
588
|
+
query=query,
|
|
589
|
+
query_properties=["content"],
|
|
590
|
+
limit=limit,
|
|
591
|
+
return_properties=["name", "content", "meta_data", "content_id"],
|
|
592
|
+
include_vector=True,
|
|
593
|
+
filters=filter_expr,
|
|
594
|
+
)
|
|
595
|
+
|
|
596
|
+
search_results = self.get_search_results(response)
|
|
597
|
+
|
|
598
|
+
if self.reranker:
|
|
599
|
+
search_results = self.reranker.rerank(query=query, documents=search_results)
|
|
600
|
+
|
|
601
|
+
log_info(f"Found {len(search_results)} documents")
|
|
602
|
+
|
|
603
|
+
await client.close()
|
|
604
|
+
return search_results
|
|
605
|
+
|
|
606
|
+
except Exception as e:
|
|
607
|
+
logger.error(f"Error searching for documents: {e}")
|
|
608
|
+
return []
|
|
609
|
+
|
|
610
|
+
def hybrid_search(
|
|
611
|
+
self, query: str, limit: int = 5, filters: Optional[Union[Dict[str, Any], List[FilterExpr]]] = None
|
|
612
|
+
) -> List[Document]:
|
|
613
|
+
try:
|
|
614
|
+
query_embedding = self.embedder.get_embedding(query)
|
|
615
|
+
if query_embedding is None:
|
|
616
|
+
logger.error(f"Error getting embedding for query: {query}")
|
|
617
|
+
return []
|
|
618
|
+
|
|
619
|
+
collection = self.get_client().collections.get(self.collection)
|
|
620
|
+
filter_expr = self._build_filter_expression(filters)
|
|
621
|
+
|
|
622
|
+
response = collection.query.hybrid(
|
|
623
|
+
query=query,
|
|
624
|
+
vector=query_embedding,
|
|
625
|
+
limit=limit,
|
|
626
|
+
return_properties=["name", "content", "meta_data", "content_id"],
|
|
627
|
+
include_vector=True,
|
|
628
|
+
query_properties=["content"],
|
|
629
|
+
alpha=self.hybrid_search_alpha,
|
|
630
|
+
filters=filter_expr,
|
|
631
|
+
)
|
|
632
|
+
|
|
633
|
+
search_results: List[Document] = self.get_search_results(response)
|
|
634
|
+
|
|
635
|
+
if self.reranker:
|
|
636
|
+
search_results = self.reranker.rerank(query=query, documents=search_results)
|
|
637
|
+
|
|
638
|
+
log_info(f"Found {len(search_results)} documents")
|
|
639
|
+
|
|
640
|
+
return search_results
|
|
641
|
+
|
|
642
|
+
except Exception as e:
|
|
643
|
+
logger.error(f"Error searching for documents: {e}")
|
|
644
|
+
return []
|
|
645
|
+
|
|
646
|
+
finally:
|
|
647
|
+
self.get_client().close()
|
|
648
|
+
|
|
649
|
+
async def async_hybrid_search(
|
|
650
|
+
self, query: str, limit: int = 5, filters: Optional[Union[Dict[str, Any], List[FilterExpr]]] = None
|
|
651
|
+
) -> List[Document]:
|
|
652
|
+
"""
|
|
653
|
+
Perform a hybrid search combining vector and keyword search in Weaviate asynchronously.
|
|
654
|
+
|
|
655
|
+
Args:
|
|
656
|
+
query (str): The keyword query.
|
|
657
|
+
limit (int): Maximum number of results to return.
|
|
658
|
+
|
|
659
|
+
Returns:
|
|
660
|
+
List[Document]: List of matching documents.
|
|
661
|
+
"""
|
|
662
|
+
query_embedding = self.embedder.get_embedding(query)
|
|
663
|
+
if query_embedding is None:
|
|
664
|
+
logger.error(f"Error getting embedding for query: {query}")
|
|
665
|
+
return []
|
|
666
|
+
|
|
667
|
+
search_results = []
|
|
668
|
+
client = await self.get_async_client()
|
|
669
|
+
try:
|
|
670
|
+
collection = client.collections.get(self.collection)
|
|
671
|
+
|
|
672
|
+
filter_expr = self._build_filter_expression(filters)
|
|
673
|
+
response = await collection.query.hybrid(
|
|
674
|
+
query=query,
|
|
675
|
+
vector=query_embedding,
|
|
676
|
+
limit=limit,
|
|
677
|
+
return_properties=["name", "content", "meta_data", "content_id"],
|
|
678
|
+
include_vector=True,
|
|
679
|
+
query_properties=["content"],
|
|
680
|
+
alpha=self.hybrid_search_alpha,
|
|
681
|
+
filters=filter_expr,
|
|
682
|
+
)
|
|
683
|
+
|
|
684
|
+
search_results = self.get_search_results(response)
|
|
685
|
+
|
|
686
|
+
if self.reranker:
|
|
687
|
+
search_results = self.reranker.rerank(query=query, documents=search_results)
|
|
688
|
+
|
|
689
|
+
log_info(f"Found {len(search_results)} documents")
|
|
690
|
+
|
|
691
|
+
await client.close()
|
|
692
|
+
return search_results
|
|
693
|
+
|
|
694
|
+
except Exception as e:
|
|
695
|
+
logger.error(f"Error searching for documents: {e}")
|
|
696
|
+
return []
|
|
697
|
+
|
|
698
|
+
def exists(self) -> bool:
|
|
699
|
+
"""Check if the collection exists in Weaviate."""
|
|
700
|
+
return self.get_client().collections.exists(self.collection)
|
|
701
|
+
|
|
702
|
+
async def async_exists(self) -> bool:
|
|
703
|
+
"""Check if the collection exists in Weaviate asynchronously."""
|
|
704
|
+
client = await self.get_async_client()
|
|
705
|
+
try:
|
|
706
|
+
return await client.collections.exists(self.collection)
|
|
707
|
+
finally:
|
|
708
|
+
await client.close()
|
|
709
|
+
|
|
710
|
+
def drop(self) -> None:
|
|
711
|
+
"""Delete the Weaviate collection."""
|
|
712
|
+
if self.exists():
|
|
713
|
+
log_debug(f"Deleting collection '{self.collection}' from Weaviate.")
|
|
714
|
+
self.get_client().collections.delete(self.collection)
|
|
715
|
+
|
|
716
|
+
async def async_drop(self) -> None:
|
|
717
|
+
"""Delete the Weaviate collection asynchronously."""
|
|
718
|
+
if await self.async_exists():
|
|
719
|
+
log_debug(f"Deleting collection '{self.collection}' from Weaviate asynchronously.")
|
|
720
|
+
client = await self.get_async_client()
|
|
721
|
+
try:
|
|
722
|
+
await client.collections.delete(self.collection)
|
|
723
|
+
finally:
|
|
724
|
+
await client.close()
|
|
725
|
+
|
|
726
|
+
def optimize(self) -> None:
|
|
727
|
+
"""Optimize the vector database (e.g., rebuild indexes)."""
|
|
728
|
+
pass
|
|
729
|
+
|
|
730
|
+
def delete(self) -> bool:
|
|
731
|
+
"""Delete all records from the database."""
|
|
732
|
+
self.drop()
|
|
733
|
+
return True
|
|
734
|
+
|
|
735
|
+
def delete_by_id(self, id: str) -> bool:
|
|
736
|
+
"""Delete document by ID."""
|
|
737
|
+
try:
|
|
738
|
+
try:
|
|
739
|
+
doc_uuid = uuid.UUID(hex=id[:32]) if len(id) == 32 else uuid.UUID(id)
|
|
740
|
+
except ValueError:
|
|
741
|
+
log_info(f"Invalid UUID format for ID '{id}' - treating as non-existent")
|
|
742
|
+
return True
|
|
743
|
+
|
|
744
|
+
collection = self.get_client().collections.get(self.collection)
|
|
745
|
+
|
|
746
|
+
if not collection.data.exists(doc_uuid):
|
|
747
|
+
log_info(f"Document with ID {id} does not exist")
|
|
748
|
+
return True
|
|
749
|
+
|
|
750
|
+
collection.data.delete_by_id(doc_uuid)
|
|
751
|
+
log_info(f"Deleted document with ID '{id}' from collection '{self.collection}'.")
|
|
752
|
+
return True
|
|
753
|
+
except Exception as e:
|
|
754
|
+
logger.error(f"Error deleting document by ID '{id}': {e}")
|
|
755
|
+
return False
|
|
756
|
+
|
|
757
|
+
def delete_by_name(self, name: str) -> bool:
|
|
758
|
+
"""Delete content by name using direct filter deletion."""
|
|
759
|
+
try:
|
|
760
|
+
collection = self.get_client().collections.get(self.collection)
|
|
761
|
+
|
|
762
|
+
collection.data.delete_many(where=Filter.by_property("name").equal(name))
|
|
763
|
+
|
|
764
|
+
log_info(f"Deleted documents with name '{name}' from collection '{self.collection}'.")
|
|
765
|
+
return True
|
|
766
|
+
|
|
767
|
+
except Exception as e:
|
|
768
|
+
logger.error(f"Error deleting documents by name '{name}': {e}")
|
|
769
|
+
return False
|
|
770
|
+
|
|
771
|
+
def delete_by_metadata(self, metadata: Dict[str, Any]) -> bool:
|
|
772
|
+
"""Delete content by metadata using direct filter deletion."""
|
|
773
|
+
try:
|
|
774
|
+
collection = self.get_client().collections.get(self.collection)
|
|
775
|
+
|
|
776
|
+
# Build filter for metadata search
|
|
777
|
+
filter_expr = self._build_filter_expression(metadata)
|
|
778
|
+
if filter_expr is None:
|
|
779
|
+
log_info(f"No valid filter could be built for metadata: {metadata}")
|
|
780
|
+
return False
|
|
781
|
+
|
|
782
|
+
collection.data.delete_many(where=filter_expr)
|
|
783
|
+
|
|
784
|
+
log_info(f"Deleted documents with metadata '{metadata}' from collection '{self.collection}'.")
|
|
785
|
+
return True
|
|
786
|
+
|
|
787
|
+
except Exception as e:
|
|
788
|
+
logger.error(f"Error deleting documents by metadata '{metadata}': {e}")
|
|
789
|
+
return False
|
|
790
|
+
|
|
791
|
+
def delete_by_content_id(self, content_id: str) -> bool:
|
|
792
|
+
"""Delete content by content ID using direct filter deletion."""
|
|
793
|
+
try:
|
|
794
|
+
collection = self.get_client().collections.get(self.collection)
|
|
795
|
+
|
|
796
|
+
collection.data.delete_many(where=Filter.by_property("content_id").equal(content_id))
|
|
797
|
+
|
|
798
|
+
log_info(f"Deleted documents with content_id '{content_id}' from collection '{self.collection}'.")
|
|
799
|
+
return True
|
|
800
|
+
|
|
801
|
+
except Exception as e:
|
|
802
|
+
logger.error(f"Error deleting documents by content_id '{content_id}': {e}")
|
|
803
|
+
return False
|
|
804
|
+
|
|
805
|
+
def delete_by_content_hash(self, content_hash: str) -> bool:
|
|
806
|
+
"""Delete content by content hash using direct filter deletion."""
|
|
807
|
+
try:
|
|
808
|
+
collection = self.get_client().collections.get(self.collection)
|
|
809
|
+
collection.data.delete_many(where=Filter.by_property("content_hash").equal(content_hash))
|
|
810
|
+
return True
|
|
811
|
+
except Exception as e:
|
|
812
|
+
logger.error(f"Error deleting documents by content_hash '{content_hash}': {e}")
|
|
813
|
+
return False
|
|
814
|
+
|
|
815
|
+
def get_vector_index_config(self, index_type: VectorIndex, distance_metric: Distance):
|
|
816
|
+
"""
|
|
817
|
+
Returns the appropriate vector index configuration with the specified distance metric.
|
|
818
|
+
|
|
819
|
+
Args:
|
|
820
|
+
index_type (VectorIndex): Type of vector index (HNSW, FLAT, DYNAMIC).
|
|
821
|
+
distance_metric (Distance): Distance metric (COSINE, DOT, etc).
|
|
822
|
+
|
|
823
|
+
Returns:
|
|
824
|
+
Configure.VectorIndex: The configured vector index instance.
|
|
825
|
+
"""
|
|
826
|
+
# Get the Weaviate distance metric
|
|
827
|
+
distance = getattr(VectorDistances, distance_metric.name)
|
|
828
|
+
|
|
829
|
+
# Define vector index configurations based on enum value
|
|
830
|
+
configs = {
|
|
831
|
+
VectorIndex.HNSW: Configure.VectorIndex.hnsw(distance_metric=distance),
|
|
832
|
+
VectorIndex.FLAT: Configure.VectorIndex.flat(distance_metric=distance),
|
|
833
|
+
VectorIndex.DYNAMIC: Configure.VectorIndex.dynamic(distance_metric=distance),
|
|
834
|
+
}
|
|
835
|
+
|
|
836
|
+
return configs[index_type]
|
|
837
|
+
|
|
838
|
+
def get_search_results(self, response: Any) -> List[Document]:
|
|
839
|
+
"""
|
|
840
|
+
Create search results from the Weaviate response.
|
|
841
|
+
|
|
842
|
+
Args:
|
|
843
|
+
response (Any): The Weaviate response object.
|
|
844
|
+
|
|
845
|
+
Returns:
|
|
846
|
+
List[Document]: List of matching documents.
|
|
847
|
+
"""
|
|
848
|
+
search_results: List[Document] = []
|
|
849
|
+
for obj in response.objects:
|
|
850
|
+
properties = obj.properties
|
|
851
|
+
meta_data = json.loads(properties["meta_data"]) if properties.get("meta_data") else {}
|
|
852
|
+
embedding = obj.vector["default"] if isinstance(obj.vector, dict) else obj.vector
|
|
853
|
+
|
|
854
|
+
search_results.append(
|
|
855
|
+
Document(
|
|
856
|
+
name=properties.get("name"),
|
|
857
|
+
meta_data=meta_data,
|
|
858
|
+
content=properties.get("content", ""),
|
|
859
|
+
embedder=self.embedder,
|
|
860
|
+
embedding=embedding,
|
|
861
|
+
content_id=properties.get("content_id"),
|
|
862
|
+
)
|
|
863
|
+
)
|
|
864
|
+
|
|
865
|
+
return search_results
|
|
866
|
+
|
|
867
|
+
def upsert_available(self) -> bool:
|
|
868
|
+
"""Indicate that upsert functionality is available."""
|
|
869
|
+
return True
|
|
870
|
+
|
|
871
|
+
def _build_filter_expression(self, filters: Optional[Union[Dict[str, Any], List[FilterExpr]]]):
|
|
872
|
+
"""
|
|
873
|
+
Build a filter expression for Weaviate queries.
|
|
874
|
+
|
|
875
|
+
Args:
|
|
876
|
+
filters (Optional[Dict[str, Any]]): Dictionary of filters to apply.
|
|
877
|
+
|
|
878
|
+
Returns:
|
|
879
|
+
Optional[Filter]: The constructed filter expression, or None if no filters provided.
|
|
880
|
+
"""
|
|
881
|
+
if not filters:
|
|
882
|
+
return None
|
|
883
|
+
if isinstance(filters, List):
|
|
884
|
+
log_warning("Filters Expressions are not supported in Weaviate. No filters will be applied.")
|
|
885
|
+
return None
|
|
886
|
+
try:
|
|
887
|
+
# Create a filter for each key-value pair
|
|
888
|
+
filter_conditions = []
|
|
889
|
+
for key, value in filters.items():
|
|
890
|
+
# Create a pattern to match in the JSON string
|
|
891
|
+
if isinstance(value, (list, tuple)):
|
|
892
|
+
# For list values
|
|
893
|
+
pattern = f'"{key}": {json.dumps(value)}'
|
|
894
|
+
else:
|
|
895
|
+
# For single values
|
|
896
|
+
pattern = f'"{key}": "{value}"'
|
|
897
|
+
|
|
898
|
+
# Add the filter condition using like operator
|
|
899
|
+
filter_conditions.append(Filter.by_property("meta_data").like(f"*{pattern}*"))
|
|
900
|
+
|
|
901
|
+
# If we have multiple conditions, combine them
|
|
902
|
+
if len(filter_conditions) > 1:
|
|
903
|
+
# Use the first condition as base and chain the rest
|
|
904
|
+
filter_expr = filter_conditions[0]
|
|
905
|
+
for condition in filter_conditions[1:]:
|
|
906
|
+
filter_expr = filter_expr & condition
|
|
907
|
+
return filter_expr
|
|
908
|
+
elif filter_conditions:
|
|
909
|
+
return filter_conditions[0]
|
|
910
|
+
|
|
911
|
+
except Exception as e:
|
|
912
|
+
logger.error(f"Error building filter expression: {e}")
|
|
913
|
+
return None
|
|
914
|
+
|
|
915
|
+
return None
|
|
916
|
+
|
|
917
|
+
def id_exists(self, id: str) -> bool:
|
|
918
|
+
"""Check if a document with the given ID exists in the collection.
|
|
919
|
+
|
|
920
|
+
Args:
|
|
921
|
+
id (str): The document ID to check.
|
|
922
|
+
|
|
923
|
+
Returns:
|
|
924
|
+
bool: True if the document exists, False otherwise.
|
|
925
|
+
"""
|
|
926
|
+
try:
|
|
927
|
+
doc_uuid = uuid.UUID(hex=id[:32]) if len(id) == 32 else uuid.UUID(id)
|
|
928
|
+
collection = self.get_client().collections.get(self.collection)
|
|
929
|
+
return collection.data.exists(doc_uuid)
|
|
930
|
+
except ValueError:
|
|
931
|
+
log_info(f"Invalid UUID format for ID '{id}' - treating as non-existent")
|
|
932
|
+
return False
|
|
933
|
+
except Exception as e:
|
|
934
|
+
logger.error(f"Error checking if ID '{id}' exists: {e}")
|
|
935
|
+
return False
|
|
936
|
+
|
|
937
|
+
def update_metadata(self, content_id: str, metadata: Dict[str, Any]) -> None:
|
|
938
|
+
"""
|
|
939
|
+
Update the metadata for documents with the given content_id.
|
|
940
|
+
|
|
941
|
+
Args:
|
|
942
|
+
content_id (str): The content ID to update
|
|
943
|
+
metadata (Dict[str, Any]): The metadata to update
|
|
944
|
+
"""
|
|
945
|
+
try:
|
|
946
|
+
weaviate_client = self.get_client()
|
|
947
|
+
collection = weaviate_client.collections.get(self.collection)
|
|
948
|
+
|
|
949
|
+
# Query for objects with the given content_id
|
|
950
|
+
query_result = collection.query.fetch_objects( # type: ignore
|
|
951
|
+
where=Filter.by_property("content_id").equal(content_id),
|
|
952
|
+
limit=1000, # Get all matching objects
|
|
953
|
+
)
|
|
954
|
+
|
|
955
|
+
if not query_result.objects:
|
|
956
|
+
logger.debug(f"No documents found with content_id: {content_id}")
|
|
957
|
+
return
|
|
958
|
+
|
|
959
|
+
# Update each matching object
|
|
960
|
+
updated_count = 0
|
|
961
|
+
for obj in query_result.objects:
|
|
962
|
+
# Get current properties
|
|
963
|
+
current_properties = obj.properties or {}
|
|
964
|
+
|
|
965
|
+
# Merge existing metadata with new metadata
|
|
966
|
+
updated_properties = current_properties.copy()
|
|
967
|
+
|
|
968
|
+
# Handle nested metadata updates
|
|
969
|
+
if "meta_data" in updated_properties and isinstance(updated_properties["meta_data"], dict):
|
|
970
|
+
updated_properties["meta_data"].update(metadata)
|
|
971
|
+
else:
|
|
972
|
+
# If no existing meta_data or it's not a dict, set it directly
|
|
973
|
+
updated_properties["meta_data"] = metadata
|
|
974
|
+
|
|
975
|
+
if "filters" in updated_properties and isinstance(updated_properties["filters"], dict):
|
|
976
|
+
updated_properties["filters"].update(metadata)
|
|
977
|
+
else:
|
|
978
|
+
updated_properties["filters"] = metadata
|
|
979
|
+
|
|
980
|
+
# Update the object
|
|
981
|
+
collection.data.update(uuid=obj.uuid, properties=updated_properties)
|
|
982
|
+
updated_count += 1
|
|
983
|
+
|
|
984
|
+
logger.debug(f"Updated metadata for {updated_count} documents with content_id: {content_id}")
|
|
985
|
+
|
|
986
|
+
except Exception as e:
|
|
987
|
+
logger.error(f"Error updating metadata for content_id '{content_id}': {e}")
|
|
988
|
+
raise
|
|
989
|
+
|
|
990
|
+
def _delete_by_content_hash(self, content_hash: str) -> bool:
|
|
991
|
+
"""Delete documents by content hash using direct filter deletion."""
|
|
992
|
+
try:
|
|
993
|
+
collection = self.get_client().collections.get(self.collection)
|
|
994
|
+
|
|
995
|
+
# Build filter for content_hash search
|
|
996
|
+
filter_expr = Filter.by_property("content_hash").equal(content_hash)
|
|
997
|
+
|
|
998
|
+
collection.data.delete_many(where=filter_expr)
|
|
999
|
+
|
|
1000
|
+
log_info(f"Deleted documents with content_hash '{content_hash}' from collection '{self.collection}'.")
|
|
1001
|
+
return True
|
|
1002
|
+
|
|
1003
|
+
except Exception as e:
|
|
1004
|
+
logger.error(f"Error deleting documents by content_hash '{content_hash}': {e}")
|
|
1005
|
+
return False
|
|
1006
|
+
|
|
1007
|
+
def get_supported_search_types(self) -> List[str]:
|
|
1008
|
+
"""Get the supported search types for this vector database."""
|
|
1009
|
+
return [SearchType.vector, SearchType.keyword, SearchType.hybrid]
|