agno 2.2.13__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- agno/__init__.py +8 -0
- agno/agent/__init__.py +51 -0
- agno/agent/agent.py +10405 -0
- agno/api/__init__.py +0 -0
- agno/api/agent.py +28 -0
- agno/api/api.py +40 -0
- agno/api/evals.py +22 -0
- agno/api/os.py +17 -0
- agno/api/routes.py +13 -0
- agno/api/schemas/__init__.py +9 -0
- agno/api/schemas/agent.py +16 -0
- agno/api/schemas/evals.py +16 -0
- agno/api/schemas/os.py +14 -0
- agno/api/schemas/response.py +6 -0
- agno/api/schemas/team.py +16 -0
- agno/api/schemas/utils.py +21 -0
- agno/api/schemas/workflows.py +16 -0
- agno/api/settings.py +53 -0
- agno/api/team.py +30 -0
- agno/api/workflow.py +28 -0
- agno/cloud/aws/base.py +214 -0
- agno/cloud/aws/s3/__init__.py +2 -0
- agno/cloud/aws/s3/api_client.py +43 -0
- agno/cloud/aws/s3/bucket.py +195 -0
- agno/cloud/aws/s3/object.py +57 -0
- agno/culture/__init__.py +3 -0
- agno/culture/manager.py +956 -0
- agno/db/__init__.py +24 -0
- agno/db/async_postgres/__init__.py +3 -0
- agno/db/base.py +598 -0
- agno/db/dynamo/__init__.py +3 -0
- agno/db/dynamo/dynamo.py +2042 -0
- agno/db/dynamo/schemas.py +314 -0
- agno/db/dynamo/utils.py +743 -0
- agno/db/firestore/__init__.py +3 -0
- agno/db/firestore/firestore.py +1795 -0
- agno/db/firestore/schemas.py +140 -0
- agno/db/firestore/utils.py +376 -0
- agno/db/gcs_json/__init__.py +3 -0
- agno/db/gcs_json/gcs_json_db.py +1335 -0
- agno/db/gcs_json/utils.py +228 -0
- agno/db/in_memory/__init__.py +3 -0
- agno/db/in_memory/in_memory_db.py +1160 -0
- agno/db/in_memory/utils.py +230 -0
- agno/db/json/__init__.py +3 -0
- agno/db/json/json_db.py +1328 -0
- agno/db/json/utils.py +230 -0
- agno/db/migrations/__init__.py +0 -0
- agno/db/migrations/v1_to_v2.py +635 -0
- agno/db/mongo/__init__.py +17 -0
- agno/db/mongo/async_mongo.py +2026 -0
- agno/db/mongo/mongo.py +1982 -0
- agno/db/mongo/schemas.py +87 -0
- agno/db/mongo/utils.py +259 -0
- agno/db/mysql/__init__.py +3 -0
- agno/db/mysql/mysql.py +2308 -0
- agno/db/mysql/schemas.py +138 -0
- agno/db/mysql/utils.py +355 -0
- agno/db/postgres/__init__.py +4 -0
- agno/db/postgres/async_postgres.py +1927 -0
- agno/db/postgres/postgres.py +2260 -0
- agno/db/postgres/schemas.py +139 -0
- agno/db/postgres/utils.py +442 -0
- agno/db/redis/__init__.py +3 -0
- agno/db/redis/redis.py +1660 -0
- agno/db/redis/schemas.py +123 -0
- agno/db/redis/utils.py +346 -0
- agno/db/schemas/__init__.py +4 -0
- agno/db/schemas/culture.py +120 -0
- agno/db/schemas/evals.py +33 -0
- agno/db/schemas/knowledge.py +40 -0
- agno/db/schemas/memory.py +46 -0
- agno/db/schemas/metrics.py +0 -0
- agno/db/singlestore/__init__.py +3 -0
- agno/db/singlestore/schemas.py +130 -0
- agno/db/singlestore/singlestore.py +2272 -0
- agno/db/singlestore/utils.py +384 -0
- agno/db/sqlite/__init__.py +4 -0
- agno/db/sqlite/async_sqlite.py +2293 -0
- agno/db/sqlite/schemas.py +133 -0
- agno/db/sqlite/sqlite.py +2288 -0
- agno/db/sqlite/utils.py +431 -0
- agno/db/surrealdb/__init__.py +3 -0
- agno/db/surrealdb/metrics.py +292 -0
- agno/db/surrealdb/models.py +309 -0
- agno/db/surrealdb/queries.py +71 -0
- agno/db/surrealdb/surrealdb.py +1353 -0
- agno/db/surrealdb/utils.py +147 -0
- agno/db/utils.py +116 -0
- agno/debug.py +18 -0
- agno/eval/__init__.py +14 -0
- agno/eval/accuracy.py +834 -0
- agno/eval/performance.py +773 -0
- agno/eval/reliability.py +306 -0
- agno/eval/utils.py +119 -0
- agno/exceptions.py +161 -0
- agno/filters.py +354 -0
- agno/guardrails/__init__.py +6 -0
- agno/guardrails/base.py +19 -0
- agno/guardrails/openai.py +144 -0
- agno/guardrails/pii.py +94 -0
- agno/guardrails/prompt_injection.py +52 -0
- agno/integrations/__init__.py +0 -0
- agno/integrations/discord/__init__.py +3 -0
- agno/integrations/discord/client.py +203 -0
- agno/knowledge/__init__.py +5 -0
- agno/knowledge/chunking/__init__.py +0 -0
- agno/knowledge/chunking/agentic.py +79 -0
- agno/knowledge/chunking/document.py +91 -0
- agno/knowledge/chunking/fixed.py +57 -0
- agno/knowledge/chunking/markdown.py +151 -0
- agno/knowledge/chunking/recursive.py +63 -0
- agno/knowledge/chunking/row.py +39 -0
- agno/knowledge/chunking/semantic.py +86 -0
- agno/knowledge/chunking/strategy.py +165 -0
- agno/knowledge/content.py +74 -0
- agno/knowledge/document/__init__.py +5 -0
- agno/knowledge/document/base.py +58 -0
- agno/knowledge/embedder/__init__.py +5 -0
- agno/knowledge/embedder/aws_bedrock.py +343 -0
- agno/knowledge/embedder/azure_openai.py +210 -0
- agno/knowledge/embedder/base.py +23 -0
- agno/knowledge/embedder/cohere.py +323 -0
- agno/knowledge/embedder/fastembed.py +62 -0
- agno/knowledge/embedder/fireworks.py +13 -0
- agno/knowledge/embedder/google.py +258 -0
- agno/knowledge/embedder/huggingface.py +94 -0
- agno/knowledge/embedder/jina.py +182 -0
- agno/knowledge/embedder/langdb.py +22 -0
- agno/knowledge/embedder/mistral.py +206 -0
- agno/knowledge/embedder/nebius.py +13 -0
- agno/knowledge/embedder/ollama.py +154 -0
- agno/knowledge/embedder/openai.py +195 -0
- agno/knowledge/embedder/sentence_transformer.py +63 -0
- agno/knowledge/embedder/together.py +13 -0
- agno/knowledge/embedder/vllm.py +262 -0
- agno/knowledge/embedder/voyageai.py +165 -0
- agno/knowledge/knowledge.py +1988 -0
- agno/knowledge/reader/__init__.py +7 -0
- agno/knowledge/reader/arxiv_reader.py +81 -0
- agno/knowledge/reader/base.py +95 -0
- agno/knowledge/reader/csv_reader.py +166 -0
- agno/knowledge/reader/docx_reader.py +82 -0
- agno/knowledge/reader/field_labeled_csv_reader.py +292 -0
- agno/knowledge/reader/firecrawl_reader.py +201 -0
- agno/knowledge/reader/json_reader.py +87 -0
- agno/knowledge/reader/markdown_reader.py +137 -0
- agno/knowledge/reader/pdf_reader.py +431 -0
- agno/knowledge/reader/pptx_reader.py +101 -0
- agno/knowledge/reader/reader_factory.py +313 -0
- agno/knowledge/reader/s3_reader.py +89 -0
- agno/knowledge/reader/tavily_reader.py +194 -0
- agno/knowledge/reader/text_reader.py +115 -0
- agno/knowledge/reader/web_search_reader.py +372 -0
- agno/knowledge/reader/website_reader.py +455 -0
- agno/knowledge/reader/wikipedia_reader.py +59 -0
- agno/knowledge/reader/youtube_reader.py +78 -0
- agno/knowledge/remote_content/__init__.py +0 -0
- agno/knowledge/remote_content/remote_content.py +88 -0
- agno/knowledge/reranker/__init__.py +3 -0
- agno/knowledge/reranker/base.py +14 -0
- agno/knowledge/reranker/cohere.py +64 -0
- agno/knowledge/reranker/infinity.py +195 -0
- agno/knowledge/reranker/sentence_transformer.py +54 -0
- agno/knowledge/types.py +39 -0
- agno/knowledge/utils.py +189 -0
- agno/media.py +462 -0
- agno/memory/__init__.py +3 -0
- agno/memory/manager.py +1327 -0
- agno/models/__init__.py +0 -0
- agno/models/aimlapi/__init__.py +5 -0
- agno/models/aimlapi/aimlapi.py +45 -0
- agno/models/anthropic/__init__.py +5 -0
- agno/models/anthropic/claude.py +757 -0
- agno/models/aws/__init__.py +15 -0
- agno/models/aws/bedrock.py +701 -0
- agno/models/aws/claude.py +378 -0
- agno/models/azure/__init__.py +18 -0
- agno/models/azure/ai_foundry.py +485 -0
- agno/models/azure/openai_chat.py +131 -0
- agno/models/base.py +2175 -0
- agno/models/cerebras/__init__.py +12 -0
- agno/models/cerebras/cerebras.py +501 -0
- agno/models/cerebras/cerebras_openai.py +112 -0
- agno/models/cohere/__init__.py +5 -0
- agno/models/cohere/chat.py +389 -0
- agno/models/cometapi/__init__.py +5 -0
- agno/models/cometapi/cometapi.py +57 -0
- agno/models/dashscope/__init__.py +5 -0
- agno/models/dashscope/dashscope.py +91 -0
- agno/models/deepinfra/__init__.py +5 -0
- agno/models/deepinfra/deepinfra.py +28 -0
- agno/models/deepseek/__init__.py +5 -0
- agno/models/deepseek/deepseek.py +61 -0
- agno/models/defaults.py +1 -0
- agno/models/fireworks/__init__.py +5 -0
- agno/models/fireworks/fireworks.py +26 -0
- agno/models/google/__init__.py +5 -0
- agno/models/google/gemini.py +1085 -0
- agno/models/groq/__init__.py +5 -0
- agno/models/groq/groq.py +556 -0
- agno/models/huggingface/__init__.py +5 -0
- agno/models/huggingface/huggingface.py +491 -0
- agno/models/ibm/__init__.py +5 -0
- agno/models/ibm/watsonx.py +422 -0
- agno/models/internlm/__init__.py +3 -0
- agno/models/internlm/internlm.py +26 -0
- agno/models/langdb/__init__.py +1 -0
- agno/models/langdb/langdb.py +48 -0
- agno/models/litellm/__init__.py +14 -0
- agno/models/litellm/chat.py +468 -0
- agno/models/litellm/litellm_openai.py +25 -0
- agno/models/llama_cpp/__init__.py +5 -0
- agno/models/llama_cpp/llama_cpp.py +22 -0
- agno/models/lmstudio/__init__.py +5 -0
- agno/models/lmstudio/lmstudio.py +25 -0
- agno/models/message.py +434 -0
- agno/models/meta/__init__.py +12 -0
- agno/models/meta/llama.py +475 -0
- agno/models/meta/llama_openai.py +78 -0
- agno/models/metrics.py +120 -0
- agno/models/mistral/__init__.py +5 -0
- agno/models/mistral/mistral.py +432 -0
- agno/models/nebius/__init__.py +3 -0
- agno/models/nebius/nebius.py +54 -0
- agno/models/nexus/__init__.py +3 -0
- agno/models/nexus/nexus.py +22 -0
- agno/models/nvidia/__init__.py +5 -0
- agno/models/nvidia/nvidia.py +28 -0
- agno/models/ollama/__init__.py +5 -0
- agno/models/ollama/chat.py +441 -0
- agno/models/openai/__init__.py +9 -0
- agno/models/openai/chat.py +883 -0
- agno/models/openai/like.py +27 -0
- agno/models/openai/responses.py +1050 -0
- agno/models/openrouter/__init__.py +5 -0
- agno/models/openrouter/openrouter.py +66 -0
- agno/models/perplexity/__init__.py +5 -0
- agno/models/perplexity/perplexity.py +187 -0
- agno/models/portkey/__init__.py +3 -0
- agno/models/portkey/portkey.py +81 -0
- agno/models/requesty/__init__.py +5 -0
- agno/models/requesty/requesty.py +52 -0
- agno/models/response.py +199 -0
- agno/models/sambanova/__init__.py +5 -0
- agno/models/sambanova/sambanova.py +28 -0
- agno/models/siliconflow/__init__.py +5 -0
- agno/models/siliconflow/siliconflow.py +25 -0
- agno/models/together/__init__.py +5 -0
- agno/models/together/together.py +25 -0
- agno/models/utils.py +266 -0
- agno/models/vercel/__init__.py +3 -0
- agno/models/vercel/v0.py +26 -0
- agno/models/vertexai/__init__.py +0 -0
- agno/models/vertexai/claude.py +70 -0
- agno/models/vllm/__init__.py +3 -0
- agno/models/vllm/vllm.py +78 -0
- agno/models/xai/__init__.py +3 -0
- agno/models/xai/xai.py +113 -0
- agno/os/__init__.py +3 -0
- agno/os/app.py +876 -0
- agno/os/auth.py +57 -0
- agno/os/config.py +104 -0
- agno/os/interfaces/__init__.py +1 -0
- agno/os/interfaces/a2a/__init__.py +3 -0
- agno/os/interfaces/a2a/a2a.py +42 -0
- agno/os/interfaces/a2a/router.py +250 -0
- agno/os/interfaces/a2a/utils.py +924 -0
- agno/os/interfaces/agui/__init__.py +3 -0
- agno/os/interfaces/agui/agui.py +47 -0
- agno/os/interfaces/agui/router.py +144 -0
- agno/os/interfaces/agui/utils.py +534 -0
- agno/os/interfaces/base.py +25 -0
- agno/os/interfaces/slack/__init__.py +3 -0
- agno/os/interfaces/slack/router.py +148 -0
- agno/os/interfaces/slack/security.py +30 -0
- agno/os/interfaces/slack/slack.py +47 -0
- agno/os/interfaces/whatsapp/__init__.py +3 -0
- agno/os/interfaces/whatsapp/router.py +211 -0
- agno/os/interfaces/whatsapp/security.py +53 -0
- agno/os/interfaces/whatsapp/whatsapp.py +36 -0
- agno/os/mcp.py +292 -0
- agno/os/middleware/__init__.py +7 -0
- agno/os/middleware/jwt.py +233 -0
- agno/os/router.py +1763 -0
- agno/os/routers/__init__.py +3 -0
- agno/os/routers/evals/__init__.py +3 -0
- agno/os/routers/evals/evals.py +430 -0
- agno/os/routers/evals/schemas.py +142 -0
- agno/os/routers/evals/utils.py +162 -0
- agno/os/routers/health.py +31 -0
- agno/os/routers/home.py +52 -0
- agno/os/routers/knowledge/__init__.py +3 -0
- agno/os/routers/knowledge/knowledge.py +997 -0
- agno/os/routers/knowledge/schemas.py +178 -0
- agno/os/routers/memory/__init__.py +3 -0
- agno/os/routers/memory/memory.py +515 -0
- agno/os/routers/memory/schemas.py +62 -0
- agno/os/routers/metrics/__init__.py +3 -0
- agno/os/routers/metrics/metrics.py +190 -0
- agno/os/routers/metrics/schemas.py +47 -0
- agno/os/routers/session/__init__.py +3 -0
- agno/os/routers/session/session.py +997 -0
- agno/os/schema.py +1055 -0
- agno/os/settings.py +43 -0
- agno/os/utils.py +630 -0
- agno/py.typed +0 -0
- agno/reasoning/__init__.py +0 -0
- agno/reasoning/anthropic.py +80 -0
- agno/reasoning/azure_ai_foundry.py +67 -0
- agno/reasoning/deepseek.py +63 -0
- agno/reasoning/default.py +97 -0
- agno/reasoning/gemini.py +73 -0
- agno/reasoning/groq.py +71 -0
- agno/reasoning/helpers.py +63 -0
- agno/reasoning/ollama.py +67 -0
- agno/reasoning/openai.py +86 -0
- agno/reasoning/step.py +31 -0
- agno/reasoning/vertexai.py +76 -0
- agno/run/__init__.py +6 -0
- agno/run/agent.py +787 -0
- agno/run/base.py +229 -0
- agno/run/cancel.py +81 -0
- agno/run/messages.py +32 -0
- agno/run/team.py +753 -0
- agno/run/workflow.py +708 -0
- agno/session/__init__.py +10 -0
- agno/session/agent.py +295 -0
- agno/session/summary.py +265 -0
- agno/session/team.py +392 -0
- agno/session/workflow.py +205 -0
- agno/team/__init__.py +37 -0
- agno/team/team.py +8793 -0
- agno/tools/__init__.py +10 -0
- agno/tools/agentql.py +120 -0
- agno/tools/airflow.py +69 -0
- agno/tools/api.py +122 -0
- agno/tools/apify.py +314 -0
- agno/tools/arxiv.py +127 -0
- agno/tools/aws_lambda.py +53 -0
- agno/tools/aws_ses.py +66 -0
- agno/tools/baidusearch.py +89 -0
- agno/tools/bitbucket.py +292 -0
- agno/tools/brandfetch.py +213 -0
- agno/tools/bravesearch.py +106 -0
- agno/tools/brightdata.py +367 -0
- agno/tools/browserbase.py +209 -0
- agno/tools/calcom.py +255 -0
- agno/tools/calculator.py +151 -0
- agno/tools/cartesia.py +187 -0
- agno/tools/clickup.py +244 -0
- agno/tools/confluence.py +240 -0
- agno/tools/crawl4ai.py +158 -0
- agno/tools/csv_toolkit.py +185 -0
- agno/tools/dalle.py +110 -0
- agno/tools/daytona.py +475 -0
- agno/tools/decorator.py +262 -0
- agno/tools/desi_vocal.py +108 -0
- agno/tools/discord.py +161 -0
- agno/tools/docker.py +716 -0
- agno/tools/duckdb.py +379 -0
- agno/tools/duckduckgo.py +91 -0
- agno/tools/e2b.py +703 -0
- agno/tools/eleven_labs.py +196 -0
- agno/tools/email.py +67 -0
- agno/tools/evm.py +129 -0
- agno/tools/exa.py +396 -0
- agno/tools/fal.py +127 -0
- agno/tools/file.py +240 -0
- agno/tools/file_generation.py +350 -0
- agno/tools/financial_datasets.py +288 -0
- agno/tools/firecrawl.py +143 -0
- agno/tools/function.py +1187 -0
- agno/tools/giphy.py +93 -0
- agno/tools/github.py +1760 -0
- agno/tools/gmail.py +922 -0
- agno/tools/google_bigquery.py +117 -0
- agno/tools/google_drive.py +270 -0
- agno/tools/google_maps.py +253 -0
- agno/tools/googlecalendar.py +674 -0
- agno/tools/googlesearch.py +98 -0
- agno/tools/googlesheets.py +377 -0
- agno/tools/hackernews.py +77 -0
- agno/tools/jina.py +101 -0
- agno/tools/jira.py +170 -0
- agno/tools/knowledge.py +218 -0
- agno/tools/linear.py +426 -0
- agno/tools/linkup.py +58 -0
- agno/tools/local_file_system.py +90 -0
- agno/tools/lumalab.py +183 -0
- agno/tools/mcp/__init__.py +10 -0
- agno/tools/mcp/mcp.py +331 -0
- agno/tools/mcp/multi_mcp.py +347 -0
- agno/tools/mcp/params.py +24 -0
- agno/tools/mcp_toolbox.py +284 -0
- agno/tools/mem0.py +193 -0
- agno/tools/memori.py +339 -0
- agno/tools/memory.py +419 -0
- agno/tools/mlx_transcribe.py +139 -0
- agno/tools/models/__init__.py +0 -0
- agno/tools/models/azure_openai.py +190 -0
- agno/tools/models/gemini.py +203 -0
- agno/tools/models/groq.py +158 -0
- agno/tools/models/morph.py +186 -0
- agno/tools/models/nebius.py +124 -0
- agno/tools/models_labs.py +195 -0
- agno/tools/moviepy_video.py +349 -0
- agno/tools/neo4j.py +134 -0
- agno/tools/newspaper.py +46 -0
- agno/tools/newspaper4k.py +93 -0
- agno/tools/notion.py +204 -0
- agno/tools/openai.py +202 -0
- agno/tools/openbb.py +160 -0
- agno/tools/opencv.py +321 -0
- agno/tools/openweather.py +233 -0
- agno/tools/oxylabs.py +385 -0
- agno/tools/pandas.py +102 -0
- agno/tools/parallel.py +314 -0
- agno/tools/postgres.py +257 -0
- agno/tools/pubmed.py +188 -0
- agno/tools/python.py +205 -0
- agno/tools/reasoning.py +283 -0
- agno/tools/reddit.py +467 -0
- agno/tools/replicate.py +117 -0
- agno/tools/resend.py +62 -0
- agno/tools/scrapegraph.py +222 -0
- agno/tools/searxng.py +152 -0
- agno/tools/serpapi.py +116 -0
- agno/tools/serper.py +255 -0
- agno/tools/shell.py +53 -0
- agno/tools/slack.py +136 -0
- agno/tools/sleep.py +20 -0
- agno/tools/spider.py +116 -0
- agno/tools/sql.py +154 -0
- agno/tools/streamlit/__init__.py +0 -0
- agno/tools/streamlit/components.py +113 -0
- agno/tools/tavily.py +254 -0
- agno/tools/telegram.py +48 -0
- agno/tools/todoist.py +218 -0
- agno/tools/tool_registry.py +1 -0
- agno/tools/toolkit.py +146 -0
- agno/tools/trafilatura.py +388 -0
- agno/tools/trello.py +274 -0
- agno/tools/twilio.py +186 -0
- agno/tools/user_control_flow.py +78 -0
- agno/tools/valyu.py +228 -0
- agno/tools/visualization.py +467 -0
- agno/tools/webbrowser.py +28 -0
- agno/tools/webex.py +76 -0
- agno/tools/website.py +54 -0
- agno/tools/webtools.py +45 -0
- agno/tools/whatsapp.py +286 -0
- agno/tools/wikipedia.py +63 -0
- agno/tools/workflow.py +278 -0
- agno/tools/x.py +335 -0
- agno/tools/yfinance.py +257 -0
- agno/tools/youtube.py +184 -0
- agno/tools/zendesk.py +82 -0
- agno/tools/zep.py +454 -0
- agno/tools/zoom.py +382 -0
- agno/utils/__init__.py +0 -0
- agno/utils/agent.py +820 -0
- agno/utils/audio.py +49 -0
- agno/utils/certs.py +27 -0
- agno/utils/code_execution.py +11 -0
- agno/utils/common.py +132 -0
- agno/utils/dttm.py +13 -0
- agno/utils/enum.py +22 -0
- agno/utils/env.py +11 -0
- agno/utils/events.py +696 -0
- agno/utils/format_str.py +16 -0
- agno/utils/functions.py +166 -0
- agno/utils/gemini.py +426 -0
- agno/utils/hooks.py +57 -0
- agno/utils/http.py +74 -0
- agno/utils/json_schema.py +234 -0
- agno/utils/knowledge.py +36 -0
- agno/utils/location.py +19 -0
- agno/utils/log.py +255 -0
- agno/utils/mcp.py +214 -0
- agno/utils/media.py +352 -0
- agno/utils/merge_dict.py +41 -0
- agno/utils/message.py +118 -0
- agno/utils/models/__init__.py +0 -0
- agno/utils/models/ai_foundry.py +43 -0
- agno/utils/models/claude.py +358 -0
- agno/utils/models/cohere.py +87 -0
- agno/utils/models/llama.py +78 -0
- agno/utils/models/mistral.py +98 -0
- agno/utils/models/openai_responses.py +140 -0
- agno/utils/models/schema_utils.py +153 -0
- agno/utils/models/watsonx.py +41 -0
- agno/utils/openai.py +257 -0
- agno/utils/pickle.py +32 -0
- agno/utils/pprint.py +178 -0
- agno/utils/print_response/__init__.py +0 -0
- agno/utils/print_response/agent.py +842 -0
- agno/utils/print_response/team.py +1724 -0
- agno/utils/print_response/workflow.py +1668 -0
- agno/utils/prompts.py +111 -0
- agno/utils/reasoning.py +108 -0
- agno/utils/response.py +163 -0
- agno/utils/response_iterator.py +17 -0
- agno/utils/safe_formatter.py +24 -0
- agno/utils/serialize.py +32 -0
- agno/utils/shell.py +22 -0
- agno/utils/streamlit.py +487 -0
- agno/utils/string.py +231 -0
- agno/utils/team.py +139 -0
- agno/utils/timer.py +41 -0
- agno/utils/tools.py +102 -0
- agno/utils/web.py +23 -0
- agno/utils/whatsapp.py +305 -0
- agno/utils/yaml_io.py +25 -0
- agno/vectordb/__init__.py +3 -0
- agno/vectordb/base.py +127 -0
- agno/vectordb/cassandra/__init__.py +5 -0
- agno/vectordb/cassandra/cassandra.py +501 -0
- agno/vectordb/cassandra/extra_param_mixin.py +11 -0
- agno/vectordb/cassandra/index.py +13 -0
- agno/vectordb/chroma/__init__.py +5 -0
- agno/vectordb/chroma/chromadb.py +929 -0
- agno/vectordb/clickhouse/__init__.py +9 -0
- agno/vectordb/clickhouse/clickhousedb.py +835 -0
- agno/vectordb/clickhouse/index.py +9 -0
- agno/vectordb/couchbase/__init__.py +3 -0
- agno/vectordb/couchbase/couchbase.py +1442 -0
- agno/vectordb/distance.py +7 -0
- agno/vectordb/lancedb/__init__.py +6 -0
- agno/vectordb/lancedb/lance_db.py +995 -0
- agno/vectordb/langchaindb/__init__.py +5 -0
- agno/vectordb/langchaindb/langchaindb.py +163 -0
- agno/vectordb/lightrag/__init__.py +5 -0
- agno/vectordb/lightrag/lightrag.py +388 -0
- agno/vectordb/llamaindex/__init__.py +3 -0
- agno/vectordb/llamaindex/llamaindexdb.py +166 -0
- agno/vectordb/milvus/__init__.py +4 -0
- agno/vectordb/milvus/milvus.py +1182 -0
- agno/vectordb/mongodb/__init__.py +9 -0
- agno/vectordb/mongodb/mongodb.py +1417 -0
- agno/vectordb/pgvector/__init__.py +12 -0
- agno/vectordb/pgvector/index.py +23 -0
- agno/vectordb/pgvector/pgvector.py +1462 -0
- agno/vectordb/pineconedb/__init__.py +5 -0
- agno/vectordb/pineconedb/pineconedb.py +747 -0
- agno/vectordb/qdrant/__init__.py +5 -0
- agno/vectordb/qdrant/qdrant.py +1134 -0
- agno/vectordb/redis/__init__.py +9 -0
- agno/vectordb/redis/redisdb.py +694 -0
- agno/vectordb/search.py +7 -0
- agno/vectordb/singlestore/__init__.py +10 -0
- agno/vectordb/singlestore/index.py +41 -0
- agno/vectordb/singlestore/singlestore.py +763 -0
- agno/vectordb/surrealdb/__init__.py +3 -0
- agno/vectordb/surrealdb/surrealdb.py +699 -0
- agno/vectordb/upstashdb/__init__.py +5 -0
- agno/vectordb/upstashdb/upstashdb.py +718 -0
- agno/vectordb/weaviate/__init__.py +8 -0
- agno/vectordb/weaviate/index.py +15 -0
- agno/vectordb/weaviate/weaviate.py +1005 -0
- agno/workflow/__init__.py +23 -0
- agno/workflow/agent.py +299 -0
- agno/workflow/condition.py +738 -0
- agno/workflow/loop.py +735 -0
- agno/workflow/parallel.py +824 -0
- agno/workflow/router.py +702 -0
- agno/workflow/step.py +1432 -0
- agno/workflow/steps.py +592 -0
- agno/workflow/types.py +520 -0
- agno/workflow/workflow.py +4321 -0
- agno-2.2.13.dist-info/METADATA +614 -0
- agno-2.2.13.dist-info/RECORD +575 -0
- agno-2.2.13.dist-info/WHEEL +5 -0
- agno-2.2.13.dist-info/licenses/LICENSE +201 -0
- agno-2.2.13.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,995 @@
|
|
|
1
|
+
import asyncio
|
|
2
|
+
import json
|
|
3
|
+
from hashlib import md5
|
|
4
|
+
from os import getenv
|
|
5
|
+
from typing import Any, Dict, List, Optional, Union
|
|
6
|
+
|
|
7
|
+
try:
|
|
8
|
+
import lancedb
|
|
9
|
+
import pyarrow as pa
|
|
10
|
+
except ImportError:
|
|
11
|
+
raise ImportError("`lancedb` not installed. Please install using `pip install lancedb`")
|
|
12
|
+
|
|
13
|
+
from agno.filters import FilterExpr
|
|
14
|
+
from agno.knowledge.document import Document
|
|
15
|
+
from agno.knowledge.embedder import Embedder
|
|
16
|
+
from agno.knowledge.reranker.base import Reranker
|
|
17
|
+
from agno.utils.log import log_debug, log_info, log_warning, logger
|
|
18
|
+
from agno.vectordb.base import VectorDb
|
|
19
|
+
from agno.vectordb.distance import Distance
|
|
20
|
+
from agno.vectordb.search import SearchType
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
class LanceDb(VectorDb):
|
|
24
|
+
"""
|
|
25
|
+
LanceDb class for managing vector operations with LanceDb
|
|
26
|
+
|
|
27
|
+
Args:
|
|
28
|
+
uri: The URI of the LanceDB database.
|
|
29
|
+
name: Name of the vector database.
|
|
30
|
+
description: Description of the vector database.
|
|
31
|
+
connection: The LanceDB connection to use.
|
|
32
|
+
table: The LanceDB table instance to use.
|
|
33
|
+
async_connection: The LanceDB async connection to use.
|
|
34
|
+
async_table: The LanceDB async table instance to use.
|
|
35
|
+
table_name: The name of the LanceDB table to use.
|
|
36
|
+
api_key: The API key to use for the LanceDB connection.
|
|
37
|
+
embedder: The embedder to use when embedding the document contents.
|
|
38
|
+
search_type: The search type to use when searching for documents.
|
|
39
|
+
distance: The distance metric to use when searching for documents.
|
|
40
|
+
nprobes: The number of probes to use when searching for documents.
|
|
41
|
+
reranker: The reranker to use when reranking documents.
|
|
42
|
+
use_tantivy: Whether to use Tantivy for full text search.
|
|
43
|
+
on_bad_vectors: What to do if the vector is bad. One of "error", "drop", "fill", "null".
|
|
44
|
+
fill_value: The value to fill the vector with if on_bad_vectors is "fill".
|
|
45
|
+
"""
|
|
46
|
+
|
|
47
|
+
def __init__(
|
|
48
|
+
self,
|
|
49
|
+
uri: lancedb.URI = "/tmp/lancedb",
|
|
50
|
+
name: Optional[str] = None,
|
|
51
|
+
description: Optional[str] = None,
|
|
52
|
+
id: Optional[str] = None,
|
|
53
|
+
connection: Optional[lancedb.LanceDBConnection] = None,
|
|
54
|
+
table: Optional[lancedb.db.LanceTable] = None,
|
|
55
|
+
async_connection: Optional[lancedb.AsyncConnection] = None,
|
|
56
|
+
async_table: Optional[lancedb.db.AsyncTable] = None,
|
|
57
|
+
table_name: Optional[str] = None,
|
|
58
|
+
api_key: Optional[str] = None,
|
|
59
|
+
embedder: Optional[Embedder] = None,
|
|
60
|
+
search_type: SearchType = SearchType.vector,
|
|
61
|
+
distance: Distance = Distance.cosine,
|
|
62
|
+
nprobes: Optional[int] = None,
|
|
63
|
+
reranker: Optional[Reranker] = None,
|
|
64
|
+
use_tantivy: bool = True,
|
|
65
|
+
on_bad_vectors: Optional[str] = None, # One of "error", "drop", "fill", "null".
|
|
66
|
+
fill_value: Optional[float] = None, # Only used if on_bad_vectors is "fill"
|
|
67
|
+
):
|
|
68
|
+
# Dynamic ID generation based on unique identifiers
|
|
69
|
+
if id is None:
|
|
70
|
+
from agno.utils.string import generate_id
|
|
71
|
+
|
|
72
|
+
table_identifier = table_name or "default_table"
|
|
73
|
+
seed = f"{uri}#{table_identifier}"
|
|
74
|
+
id = generate_id(seed)
|
|
75
|
+
|
|
76
|
+
# Initialize base class with name, description, and generated ID
|
|
77
|
+
super().__init__(id=id, name=name, description=description)
|
|
78
|
+
|
|
79
|
+
# Embedder for embedding the document contents
|
|
80
|
+
if embedder is None:
|
|
81
|
+
from agno.knowledge.embedder.openai import OpenAIEmbedder
|
|
82
|
+
|
|
83
|
+
embedder = OpenAIEmbedder()
|
|
84
|
+
log_info("Embedder not provided, using OpenAIEmbedder as default.")
|
|
85
|
+
self.embedder: Embedder = embedder
|
|
86
|
+
self.dimensions: Optional[int] = self.embedder.dimensions
|
|
87
|
+
|
|
88
|
+
if self.dimensions is None:
|
|
89
|
+
raise ValueError("Embedder.dimensions must be set.")
|
|
90
|
+
|
|
91
|
+
# Search type
|
|
92
|
+
self.search_type: SearchType = search_type
|
|
93
|
+
# Distance metric
|
|
94
|
+
self.distance: Distance = distance
|
|
95
|
+
|
|
96
|
+
# Remote LanceDB connection details
|
|
97
|
+
self.api_key: Optional[str] = api_key
|
|
98
|
+
|
|
99
|
+
# LanceDB connection details
|
|
100
|
+
self.uri: lancedb.URI = uri
|
|
101
|
+
self.connection: lancedb.DBConnection = connection or lancedb.connect(uri=self.uri, api_key=api_key)
|
|
102
|
+
self.table: Optional[lancedb.db.LanceTable] = table
|
|
103
|
+
|
|
104
|
+
self.async_connection: Optional[lancedb.AsyncConnection] = async_connection
|
|
105
|
+
self.async_table: Optional[lancedb.db.AsyncTable] = async_table
|
|
106
|
+
|
|
107
|
+
if table_name and table_name in self.connection.table_names():
|
|
108
|
+
# Open the table if it exists
|
|
109
|
+
try:
|
|
110
|
+
self.table = self.connection.open_table(name=table_name)
|
|
111
|
+
self.table_name = self.table.name
|
|
112
|
+
self._vector_col = self.table.schema.names[0]
|
|
113
|
+
self._id = self.table.schema.names[1] # type: ignore
|
|
114
|
+
except ValueError as e:
|
|
115
|
+
# Table might have been dropped by async operations but sync connection hasn't updated
|
|
116
|
+
if "was not found" in str(e):
|
|
117
|
+
log_debug(f"Table {table_name} listed but not accessible, will create if needed")
|
|
118
|
+
self.table = None
|
|
119
|
+
else:
|
|
120
|
+
raise
|
|
121
|
+
|
|
122
|
+
# LanceDB table details
|
|
123
|
+
if self.table is None:
|
|
124
|
+
# LanceDB table details
|
|
125
|
+
if table:
|
|
126
|
+
if not isinstance(table, lancedb.db.LanceTable):
|
|
127
|
+
raise ValueError(
|
|
128
|
+
"table should be an instance of lancedb.db.LanceTable, ",
|
|
129
|
+
f"got {type(table)}",
|
|
130
|
+
)
|
|
131
|
+
self.table = table
|
|
132
|
+
self.table_name = self.table.name
|
|
133
|
+
self._vector_col = self.table.schema.names[0]
|
|
134
|
+
self._id = self.table.schema.names[1] # type: ignore
|
|
135
|
+
else:
|
|
136
|
+
if not table_name:
|
|
137
|
+
raise ValueError("Either table or table_name should be provided.")
|
|
138
|
+
self.table_name = table_name
|
|
139
|
+
self._id = "id"
|
|
140
|
+
self._vector_col = "vector"
|
|
141
|
+
self.table = self._init_table()
|
|
142
|
+
|
|
143
|
+
self.reranker: Optional[Reranker] = reranker
|
|
144
|
+
self.nprobes: Optional[int] = nprobes
|
|
145
|
+
self.on_bad_vectors: Optional[str] = on_bad_vectors
|
|
146
|
+
self.fill_value: Optional[float] = fill_value
|
|
147
|
+
self.fts_index_exists = False
|
|
148
|
+
self.use_tantivy = use_tantivy
|
|
149
|
+
|
|
150
|
+
if self.use_tantivy and (self.search_type in [SearchType.keyword, SearchType.hybrid]):
|
|
151
|
+
try:
|
|
152
|
+
import tantivy # noqa: F401
|
|
153
|
+
except ImportError:
|
|
154
|
+
raise ImportError(
|
|
155
|
+
"Please install tantivy-py `pip install tantivy` to use the full text search feature." # noqa: E501
|
|
156
|
+
)
|
|
157
|
+
|
|
158
|
+
log_debug(f"Initialized LanceDb with table: '{self.table_name}'")
|
|
159
|
+
|
|
160
|
+
def _prepare_vector(self, embedding) -> List[float]:
|
|
161
|
+
"""Prepare vector embedding for insertion, ensuring correct dimensions and type."""
|
|
162
|
+
if embedding is not None and len(embedding) > 0:
|
|
163
|
+
# Convert to list of floats
|
|
164
|
+
vector = [float(x) for x in embedding]
|
|
165
|
+
|
|
166
|
+
# Ensure vector has correct dimensions if specified
|
|
167
|
+
if self.dimensions:
|
|
168
|
+
if len(vector) != self.dimensions:
|
|
169
|
+
if len(vector) > self.dimensions:
|
|
170
|
+
# Truncate if too long
|
|
171
|
+
vector = vector[: self.dimensions]
|
|
172
|
+
log_debug(f"Truncated vector from {len(embedding)} to {self.dimensions} dimensions")
|
|
173
|
+
else:
|
|
174
|
+
# Pad with zeros if too short
|
|
175
|
+
vector.extend([0.0] * (self.dimensions - len(vector)))
|
|
176
|
+
log_debug(f"Padded vector from {len(embedding)} to {self.dimensions} dimensions")
|
|
177
|
+
|
|
178
|
+
return vector
|
|
179
|
+
else:
|
|
180
|
+
# Fallback if embedding is None or empty
|
|
181
|
+
return [0.0] * (self.dimensions or 1536)
|
|
182
|
+
|
|
183
|
+
async def _get_async_connection(self) -> lancedb.AsyncConnection:
|
|
184
|
+
"""Get or create an async connection to LanceDB."""
|
|
185
|
+
if self.async_connection is None:
|
|
186
|
+
self.async_connection = await lancedb.connect_async(self.uri)
|
|
187
|
+
# Only try to open table if it exists and we don't have it already
|
|
188
|
+
if self.async_table is None:
|
|
189
|
+
table_names = await self.async_connection.table_names()
|
|
190
|
+
if self.table_name in table_names:
|
|
191
|
+
try:
|
|
192
|
+
self.async_table = await self.async_connection.open_table(self.table_name)
|
|
193
|
+
except ValueError:
|
|
194
|
+
# Table might have been dropped by another operation
|
|
195
|
+
pass
|
|
196
|
+
return self.async_connection
|
|
197
|
+
|
|
198
|
+
def _refresh_sync_connection(self) -> None:
|
|
199
|
+
"""Refresh the sync connection to see changes made by async operations."""
|
|
200
|
+
try:
|
|
201
|
+
# Re-establish sync connection to see async changes
|
|
202
|
+
if self.connection and self.table_name in self.connection.table_names():
|
|
203
|
+
self.table = self.connection.open_table(self.table_name)
|
|
204
|
+
except Exception as e:
|
|
205
|
+
log_debug(f"Could not refresh sync connection: {e}")
|
|
206
|
+
# If refresh fails, we can still function but sync methods might not see async changes
|
|
207
|
+
|
|
208
|
+
def create(self) -> None:
|
|
209
|
+
"""Create the table if it does not exist."""
|
|
210
|
+
if not self.exists():
|
|
211
|
+
self.table = self._init_table()
|
|
212
|
+
|
|
213
|
+
async def async_create(self) -> None:
|
|
214
|
+
"""Create the table asynchronously if it does not exist."""
|
|
215
|
+
if not await self.async_exists():
|
|
216
|
+
try:
|
|
217
|
+
conn = await self._get_async_connection()
|
|
218
|
+
schema = self._base_schema()
|
|
219
|
+
|
|
220
|
+
log_debug(f"Creating table asynchronously: {self.table_name}")
|
|
221
|
+
self.async_table = await conn.create_table(
|
|
222
|
+
self.table_name, schema=schema, mode="overwrite", exist_ok=True
|
|
223
|
+
)
|
|
224
|
+
log_debug(f"Successfully created async table: {self.table_name}")
|
|
225
|
+
except Exception as e:
|
|
226
|
+
logger.error(f"Error creating async table: {e}")
|
|
227
|
+
# Try to fall back to sync table creation
|
|
228
|
+
try:
|
|
229
|
+
log_debug("Falling back to sync table creation")
|
|
230
|
+
self.table = self._init_table()
|
|
231
|
+
log_debug("Sync table created successfully")
|
|
232
|
+
except Exception as sync_e:
|
|
233
|
+
logger.error(f"Sync table creation also failed: {sync_e}")
|
|
234
|
+
raise
|
|
235
|
+
|
|
236
|
+
def _base_schema(self) -> pa.Schema:
|
|
237
|
+
# Use fixed-size list for vector field as required by LanceDB
|
|
238
|
+
if self.dimensions:
|
|
239
|
+
vector_field = pa.field(self._vector_col, pa.list_(pa.float32(), self.dimensions))
|
|
240
|
+
else:
|
|
241
|
+
# Fallback to dynamic list if dimensions not known (should be rare)
|
|
242
|
+
vector_field = pa.field(self._vector_col, pa.list_(pa.float32()))
|
|
243
|
+
|
|
244
|
+
return pa.schema(
|
|
245
|
+
[
|
|
246
|
+
vector_field,
|
|
247
|
+
pa.field(self._id, pa.string()),
|
|
248
|
+
pa.field("payload", pa.string()),
|
|
249
|
+
]
|
|
250
|
+
)
|
|
251
|
+
|
|
252
|
+
def _init_table(self) -> lancedb.db.LanceTable:
|
|
253
|
+
schema = self._base_schema()
|
|
254
|
+
|
|
255
|
+
log_info(f"Creating table: {self.table_name}")
|
|
256
|
+
if self.api_key or getenv("LANCEDB_API_KEY"):
|
|
257
|
+
log_info("API key found, creating table in remote LanceDB")
|
|
258
|
+
tbl = self.connection.create_table(name=self.table_name, schema=schema, mode="overwrite") # type: ignore
|
|
259
|
+
else:
|
|
260
|
+
tbl = self.connection.create_table(name=self.table_name, schema=schema, mode="overwrite", exist_ok=True) # type: ignore
|
|
261
|
+
return tbl # type: ignore
|
|
262
|
+
|
|
263
|
+
def doc_exists(self, document: Document) -> bool:
|
|
264
|
+
"""
|
|
265
|
+
Validating if the document exists or not
|
|
266
|
+
|
|
267
|
+
Args:
|
|
268
|
+
document (Document): Document to validate
|
|
269
|
+
"""
|
|
270
|
+
try:
|
|
271
|
+
if self.table is not None:
|
|
272
|
+
cleaned_content = document.content.replace("\x00", "\ufffd")
|
|
273
|
+
doc_id = md5(cleaned_content.encode()).hexdigest()
|
|
274
|
+
result = self.table.search().where(f"{self._id}='{doc_id}'").to_arrow()
|
|
275
|
+
return len(result) > 0
|
|
276
|
+
except Exception:
|
|
277
|
+
# Search sometimes fails with stale cache data, it means the doc doesn't exist
|
|
278
|
+
return False
|
|
279
|
+
|
|
280
|
+
return False
|
|
281
|
+
|
|
282
|
+
async def async_doc_exists(self, document: Document) -> bool:
|
|
283
|
+
"""
|
|
284
|
+
Asynchronously validate if the document exists
|
|
285
|
+
|
|
286
|
+
Args:
|
|
287
|
+
document (Document): Document to validate
|
|
288
|
+
|
|
289
|
+
Returns:
|
|
290
|
+
bool: True if document exists, False otherwise
|
|
291
|
+
"""
|
|
292
|
+
if self.connection:
|
|
293
|
+
self.table = self.connection.open_table(name=self.table_name)
|
|
294
|
+
return self.doc_exists(document)
|
|
295
|
+
|
|
296
|
+
def insert(self, content_hash: str, documents: List[Document], filters: Optional[Dict[str, Any]] = None) -> None:
|
|
297
|
+
"""
|
|
298
|
+
Insert documents into the database.
|
|
299
|
+
|
|
300
|
+
Args:
|
|
301
|
+
documents (List[Document]): List of documents to insert
|
|
302
|
+
filters (Optional[Dict[str, Any]]): Filters to add as metadata to documents
|
|
303
|
+
"""
|
|
304
|
+
if len(documents) <= 0:
|
|
305
|
+
log_info("No documents to insert")
|
|
306
|
+
return
|
|
307
|
+
|
|
308
|
+
log_debug(f"Inserting {len(documents)} documents")
|
|
309
|
+
data = []
|
|
310
|
+
|
|
311
|
+
for document in documents:
|
|
312
|
+
if self.doc_exists(document):
|
|
313
|
+
continue
|
|
314
|
+
|
|
315
|
+
# Add filters to document metadata if provided
|
|
316
|
+
if filters:
|
|
317
|
+
meta_data = document.meta_data.copy() if document.meta_data else {}
|
|
318
|
+
meta_data.update(filters)
|
|
319
|
+
document.meta_data = meta_data
|
|
320
|
+
|
|
321
|
+
document.embed(embedder=self.embedder)
|
|
322
|
+
cleaned_content = document.content.replace("\x00", "\ufffd")
|
|
323
|
+
doc_id = str(md5(cleaned_content.encode()).hexdigest())
|
|
324
|
+
payload = {
|
|
325
|
+
"name": document.name,
|
|
326
|
+
"meta_data": document.meta_data,
|
|
327
|
+
"content": cleaned_content,
|
|
328
|
+
"usage": document.usage,
|
|
329
|
+
"content_id": document.content_id,
|
|
330
|
+
"content_hash": content_hash,
|
|
331
|
+
}
|
|
332
|
+
data.append(
|
|
333
|
+
{
|
|
334
|
+
"id": doc_id,
|
|
335
|
+
"vector": self._prepare_vector(document.embedding),
|
|
336
|
+
"payload": json.dumps(payload),
|
|
337
|
+
}
|
|
338
|
+
)
|
|
339
|
+
log_debug(f"Parsed document: {document.name} ({document.meta_data})")
|
|
340
|
+
|
|
341
|
+
if self.table is None:
|
|
342
|
+
logger.error("Table not initialized. Please create the table first")
|
|
343
|
+
return
|
|
344
|
+
|
|
345
|
+
if not data:
|
|
346
|
+
log_debug("No new data to insert")
|
|
347
|
+
return
|
|
348
|
+
|
|
349
|
+
if self.on_bad_vectors is not None:
|
|
350
|
+
self.table.add(data, on_bad_vectors=self.on_bad_vectors, fill_value=self.fill_value)
|
|
351
|
+
else:
|
|
352
|
+
self.table.add(data)
|
|
353
|
+
|
|
354
|
+
log_debug(f"Inserted {len(data)} documents")
|
|
355
|
+
|
|
356
|
+
async def async_insert(
|
|
357
|
+
self, content_hash: str, documents: List[Document], filters: Optional[Dict[str, Any]] = None
|
|
358
|
+
) -> None:
|
|
359
|
+
"""
|
|
360
|
+
Asynchronously insert documents into the database.
|
|
361
|
+
|
|
362
|
+
Note: Currently wraps sync insert method since LanceDB async insert has sync/async table
|
|
363
|
+
synchronization issues causing empty vectors. We still do async embedding for performance.
|
|
364
|
+
|
|
365
|
+
Args:
|
|
366
|
+
documents (List[Document]): List of documents to insert
|
|
367
|
+
filters (Optional[Dict[str, Any]]): Filters to apply while inserting documents
|
|
368
|
+
"""
|
|
369
|
+
if len(documents) <= 0:
|
|
370
|
+
log_debug("No documents to insert")
|
|
371
|
+
return
|
|
372
|
+
|
|
373
|
+
log_debug(f"Inserting {len(documents)} documents")
|
|
374
|
+
|
|
375
|
+
# Still do async embedding for performance
|
|
376
|
+
if self.embedder.enable_batch and hasattr(self.embedder, "async_get_embeddings_batch_and_usage"):
|
|
377
|
+
try:
|
|
378
|
+
doc_contents = [doc.content for doc in documents]
|
|
379
|
+
embeddings, usages = await self.embedder.async_get_embeddings_batch_and_usage(doc_contents)
|
|
380
|
+
|
|
381
|
+
for j, doc in enumerate(documents):
|
|
382
|
+
if j < len(embeddings):
|
|
383
|
+
doc.embedding = embeddings[j]
|
|
384
|
+
doc.usage = usages[j] if j < len(usages) else None
|
|
385
|
+
except Exception as e:
|
|
386
|
+
error_str = str(e).lower()
|
|
387
|
+
is_rate_limit = any(
|
|
388
|
+
phrase in error_str
|
|
389
|
+
for phrase in ["rate limit", "too many requests", "429", "trial key", "api calls / minute"]
|
|
390
|
+
)
|
|
391
|
+
if is_rate_limit:
|
|
392
|
+
logger.error(f"Rate limit detected during batch embedding. {e}")
|
|
393
|
+
raise e
|
|
394
|
+
else:
|
|
395
|
+
logger.warning(f"Async batch embedding failed, falling back to individual embeddings: {e}")
|
|
396
|
+
embed_tasks = [doc.async_embed(embedder=self.embedder) for doc in documents]
|
|
397
|
+
await asyncio.gather(*embed_tasks, return_exceptions=True)
|
|
398
|
+
else:
|
|
399
|
+
embed_tasks = [doc.async_embed(embedder=self.embedder) for doc in documents]
|
|
400
|
+
await asyncio.gather(*embed_tasks, return_exceptions=True)
|
|
401
|
+
|
|
402
|
+
# Use sync insert to avoid sync/async table synchronization issues
|
|
403
|
+
self.insert(content_hash, documents, filters)
|
|
404
|
+
|
|
405
|
+
def upsert_available(self) -> bool:
|
|
406
|
+
"""Check if upsert is available in LanceDB."""
|
|
407
|
+
return True
|
|
408
|
+
|
|
409
|
+
def upsert(self, content_hash: str, documents: List[Document], filters: Optional[Dict[str, Any]] = None) -> None:
|
|
410
|
+
"""
|
|
411
|
+
Upsert documents into the database.
|
|
412
|
+
|
|
413
|
+
Args:
|
|
414
|
+
documents (List[Document]): List of documents to upsert
|
|
415
|
+
filters (Optional[Dict[str, Any]]): Filters to apply while upserting
|
|
416
|
+
"""
|
|
417
|
+
if self.content_hash_exists(content_hash):
|
|
418
|
+
self._delete_by_content_hash(content_hash)
|
|
419
|
+
self.insert(content_hash=content_hash, documents=documents, filters=filters)
|
|
420
|
+
|
|
421
|
+
async def async_upsert(
|
|
422
|
+
self, content_hash: str, documents: List[Document], filters: Optional[Dict[str, Any]] = None
|
|
423
|
+
) -> None:
|
|
424
|
+
"""
|
|
425
|
+
Asynchronously upsert documents into the database.
|
|
426
|
+
|
|
427
|
+
Note: Uses async embedding for performance, then sync upsert for reliability.
|
|
428
|
+
"""
|
|
429
|
+
if len(documents) > 0:
|
|
430
|
+
# Do async embedding for performance
|
|
431
|
+
if self.embedder.enable_batch and hasattr(self.embedder, "async_get_embeddings_batch_and_usage"):
|
|
432
|
+
try:
|
|
433
|
+
doc_contents = [doc.content for doc in documents]
|
|
434
|
+
embeddings, usages = await self.embedder.async_get_embeddings_batch_and_usage(doc_contents)
|
|
435
|
+
for j, doc in enumerate(documents):
|
|
436
|
+
if j < len(embeddings):
|
|
437
|
+
doc.embedding = embeddings[j]
|
|
438
|
+
doc.usage = usages[j] if j < len(usages) else None
|
|
439
|
+
except Exception as e:
|
|
440
|
+
error_str = str(e).lower()
|
|
441
|
+
is_rate_limit = any(
|
|
442
|
+
phrase in error_str
|
|
443
|
+
for phrase in ["rate limit", "too many requests", "429", "trial key", "api calls / minute"]
|
|
444
|
+
)
|
|
445
|
+
if is_rate_limit:
|
|
446
|
+
raise e
|
|
447
|
+
else:
|
|
448
|
+
embed_tasks = [doc.async_embed(embedder=self.embedder) for doc in documents]
|
|
449
|
+
await asyncio.gather(*embed_tasks, return_exceptions=True)
|
|
450
|
+
else:
|
|
451
|
+
embed_tasks = [doc.async_embed(embedder=self.embedder) for doc in documents]
|
|
452
|
+
await asyncio.gather(*embed_tasks, return_exceptions=True)
|
|
453
|
+
|
|
454
|
+
# Use sync upsert for reliability
|
|
455
|
+
self.upsert(content_hash=content_hash, documents=documents, filters=filters)
|
|
456
|
+
|
|
457
|
+
def search(
|
|
458
|
+
self, query: str, limit: int = 5, filters: Optional[Union[Dict[str, Any], List[FilterExpr]]] = None
|
|
459
|
+
) -> List[Document]:
|
|
460
|
+
"""
|
|
461
|
+
Search for documents matching the query.
|
|
462
|
+
|
|
463
|
+
Args:
|
|
464
|
+
query (str): Query string to search for
|
|
465
|
+
limit (int): Maximum number of results to return
|
|
466
|
+
filters (Optional[Dict[str, Any]]): Filters to apply to the search
|
|
467
|
+
|
|
468
|
+
Returns:
|
|
469
|
+
List[Document]: List of matching documents
|
|
470
|
+
"""
|
|
471
|
+
if self.connection:
|
|
472
|
+
self.table = self.connection.open_table(name=self.table_name)
|
|
473
|
+
|
|
474
|
+
results = None
|
|
475
|
+
|
|
476
|
+
if isinstance(filters, list):
|
|
477
|
+
log_warning("Filter Expressions are not yet supported in LanceDB. No filters will be applied.")
|
|
478
|
+
filters = None
|
|
479
|
+
|
|
480
|
+
if self.search_type == SearchType.vector:
|
|
481
|
+
results = self.vector_search(query, limit)
|
|
482
|
+
elif self.search_type == SearchType.keyword:
|
|
483
|
+
results = self.keyword_search(query, limit)
|
|
484
|
+
elif self.search_type == SearchType.hybrid:
|
|
485
|
+
results = self.hybrid_search(query, limit)
|
|
486
|
+
else:
|
|
487
|
+
logger.error(f"Invalid search type '{self.search_type}'.")
|
|
488
|
+
return []
|
|
489
|
+
|
|
490
|
+
if results is None:
|
|
491
|
+
return []
|
|
492
|
+
|
|
493
|
+
search_results = self._build_search_results(results)
|
|
494
|
+
|
|
495
|
+
# Filter results based on metadata if filters are provided
|
|
496
|
+
if filters and search_results:
|
|
497
|
+
filtered_results = []
|
|
498
|
+
for doc in search_results:
|
|
499
|
+
if doc.meta_data is None:
|
|
500
|
+
continue
|
|
501
|
+
|
|
502
|
+
# Check if all filter criteria match
|
|
503
|
+
match = True
|
|
504
|
+
for key, value in filters.items():
|
|
505
|
+
if key not in doc.meta_data or doc.meta_data[key] != value:
|
|
506
|
+
match = False
|
|
507
|
+
break
|
|
508
|
+
|
|
509
|
+
if match:
|
|
510
|
+
filtered_results.append(doc)
|
|
511
|
+
|
|
512
|
+
search_results = filtered_results
|
|
513
|
+
|
|
514
|
+
if self.reranker and search_results:
|
|
515
|
+
search_results = self.reranker.rerank(query=query, documents=search_results)
|
|
516
|
+
|
|
517
|
+
log_info(f"Found {len(search_results)} documents")
|
|
518
|
+
return search_results
|
|
519
|
+
|
|
520
|
+
async def async_search(
|
|
521
|
+
self, query: str, limit: int = 5, filters: Optional[Union[Dict[str, Any], List[FilterExpr]]] = None
|
|
522
|
+
) -> List[Document]:
|
|
523
|
+
"""
|
|
524
|
+
Asynchronously search for documents matching the query.
|
|
525
|
+
|
|
526
|
+
Note: Currently wraps sync search method since LanceDB async search has sync/async table
|
|
527
|
+
synchronization issues. Performance impact is minimal for search operations.
|
|
528
|
+
|
|
529
|
+
Args:
|
|
530
|
+
query (str): Query string to search for
|
|
531
|
+
limit (int): Maximum number of results to return
|
|
532
|
+
filters (Optional[Dict[str, Any]]): Filters to apply to the search
|
|
533
|
+
|
|
534
|
+
Returns:
|
|
535
|
+
List[Document]: List of matching documents
|
|
536
|
+
"""
|
|
537
|
+
# Wrap sync search method to avoid sync/async table synchronization issues
|
|
538
|
+
return self.search(query=query, limit=limit, filters=filters)
|
|
539
|
+
|
|
540
|
+
def vector_search(
|
|
541
|
+
self, query: str, limit: int = 5, filters: Optional[Union[Dict[str, Any], List[FilterExpr]]] = None
|
|
542
|
+
) -> List[Document]:
|
|
543
|
+
query_embedding = self.embedder.get_embedding(query)
|
|
544
|
+
if query_embedding is None:
|
|
545
|
+
logger.error(f"Error getting embedding for Query: {query}")
|
|
546
|
+
return None
|
|
547
|
+
|
|
548
|
+
if self.table is None:
|
|
549
|
+
logger.error("Table not initialized. Please create the table first")
|
|
550
|
+
return None # type: ignore
|
|
551
|
+
|
|
552
|
+
results = self.table.search(
|
|
553
|
+
query=query_embedding,
|
|
554
|
+
vector_column_name=self._vector_col,
|
|
555
|
+
).limit(limit)
|
|
556
|
+
|
|
557
|
+
if self.nprobes:
|
|
558
|
+
results.nprobes(self.nprobes)
|
|
559
|
+
|
|
560
|
+
return results.to_pandas()
|
|
561
|
+
|
|
562
|
+
def hybrid_search(
|
|
563
|
+
self, query: str, limit: int = 5, filters: Optional[Union[Dict[str, Any], List[FilterExpr]]] = None
|
|
564
|
+
) -> List[Document]:
|
|
565
|
+
query_embedding = self.embedder.get_embedding(query)
|
|
566
|
+
if query_embedding is None:
|
|
567
|
+
logger.error(f"Error getting embedding for Query: {query}")
|
|
568
|
+
return []
|
|
569
|
+
|
|
570
|
+
if self.table is None:
|
|
571
|
+
logger.error("Table not initialized. Please create the table first")
|
|
572
|
+
return []
|
|
573
|
+
|
|
574
|
+
if not self.fts_index_exists:
|
|
575
|
+
self.table.create_fts_index("payload", use_tantivy=self.use_tantivy, replace=True)
|
|
576
|
+
self.fts_index_exists = True
|
|
577
|
+
|
|
578
|
+
results = (
|
|
579
|
+
self.table.search(
|
|
580
|
+
vector_column_name=self._vector_col,
|
|
581
|
+
query_type="hybrid",
|
|
582
|
+
)
|
|
583
|
+
.vector(query_embedding)
|
|
584
|
+
.text(query)
|
|
585
|
+
.limit(limit)
|
|
586
|
+
)
|
|
587
|
+
|
|
588
|
+
if self.nprobes:
|
|
589
|
+
results.nprobes(self.nprobes)
|
|
590
|
+
|
|
591
|
+
return results.to_pandas()
|
|
592
|
+
|
|
593
|
+
def keyword_search(
|
|
594
|
+
self, query: str, limit: int = 5, filters: Optional[Union[Dict[str, Any], List[FilterExpr]]] = None
|
|
595
|
+
) -> List[Document]:
|
|
596
|
+
if self.table is None:
|
|
597
|
+
logger.error("Table not initialized. Please create the table first")
|
|
598
|
+
return []
|
|
599
|
+
|
|
600
|
+
if not self.fts_index_exists:
|
|
601
|
+
self.table.create_fts_index("payload", use_tantivy=self.use_tantivy, replace=True)
|
|
602
|
+
self.fts_index_exists = True
|
|
603
|
+
|
|
604
|
+
results = self.table.search(
|
|
605
|
+
query=query,
|
|
606
|
+
query_type="fts",
|
|
607
|
+
).limit(limit)
|
|
608
|
+
|
|
609
|
+
return results.to_pandas()
|
|
610
|
+
|
|
611
|
+
def _build_search_results(self, results) -> List[Document]: # TODO: typehint pandas?
|
|
612
|
+
search_results: List[Document] = []
|
|
613
|
+
try:
|
|
614
|
+
for _, item in results.iterrows():
|
|
615
|
+
payload = json.loads(item["payload"])
|
|
616
|
+
search_results.append(
|
|
617
|
+
Document(
|
|
618
|
+
name=payload["name"],
|
|
619
|
+
meta_data=payload["meta_data"],
|
|
620
|
+
content=payload["content"],
|
|
621
|
+
embedder=self.embedder,
|
|
622
|
+
embedding=item["vector"],
|
|
623
|
+
usage=payload["usage"],
|
|
624
|
+
content_id=payload.get("content_id"),
|
|
625
|
+
)
|
|
626
|
+
)
|
|
627
|
+
|
|
628
|
+
except Exception as e:
|
|
629
|
+
logger.error(f"Error building search results: {e}")
|
|
630
|
+
|
|
631
|
+
return search_results
|
|
632
|
+
|
|
633
|
+
def drop(self) -> None:
|
|
634
|
+
if self.exists():
|
|
635
|
+
log_debug(f"Deleting collection: {self.table_name}")
|
|
636
|
+
self.connection.drop_table(self.table_name) # type: ignore
|
|
637
|
+
# Clear the table reference after dropping
|
|
638
|
+
self.table = None
|
|
639
|
+
|
|
640
|
+
async def async_drop(self) -> None:
|
|
641
|
+
"""Drop the table asynchronously."""
|
|
642
|
+
if await self.async_exists():
|
|
643
|
+
log_debug(f"Deleting collection: {self.table_name}")
|
|
644
|
+
conn = await self._get_async_connection()
|
|
645
|
+
await conn.drop_table(self.table_name)
|
|
646
|
+
# Clear the async table reference after dropping
|
|
647
|
+
self.async_table = None
|
|
648
|
+
|
|
649
|
+
def exists(self) -> bool:
|
|
650
|
+
# If we have an async table that was created, the table exists
|
|
651
|
+
if self.async_table is not None:
|
|
652
|
+
return True
|
|
653
|
+
if self.connection:
|
|
654
|
+
return self.table_name in self.connection.table_names()
|
|
655
|
+
return False
|
|
656
|
+
|
|
657
|
+
async def async_exists(self) -> bool:
|
|
658
|
+
"""Check if the table exists asynchronously."""
|
|
659
|
+
# If we have an async table that was created, the table exists
|
|
660
|
+
if self.async_table is not None:
|
|
661
|
+
return True
|
|
662
|
+
# Check if table exists in database without trying to open it
|
|
663
|
+
if self.async_connection is None:
|
|
664
|
+
self.async_connection = await lancedb.connect_async(self.uri)
|
|
665
|
+
table_names = await self.async_connection.table_names()
|
|
666
|
+
return self.table_name in table_names
|
|
667
|
+
|
|
668
|
+
async def async_get_count(self) -> int:
|
|
669
|
+
"""Get the number of rows in the table asynchronously."""
|
|
670
|
+
await self._get_async_connection()
|
|
671
|
+
if self.async_table is not None:
|
|
672
|
+
return await self.async_table.count_rows()
|
|
673
|
+
return 0
|
|
674
|
+
|
|
675
|
+
def get_count(self) -> int:
|
|
676
|
+
# If we have data in the async table but sync table isn't available, try to get count from async table
|
|
677
|
+
if self.async_table is not None:
|
|
678
|
+
try:
|
|
679
|
+
import asyncio
|
|
680
|
+
|
|
681
|
+
# Check if we're already in an event loop
|
|
682
|
+
try:
|
|
683
|
+
asyncio.get_running_loop()
|
|
684
|
+
# We're in an async context, can't use asyncio.run
|
|
685
|
+
log_debug("Already in async context, falling back to sync table for count")
|
|
686
|
+
except RuntimeError:
|
|
687
|
+
# No event loop running, safe to use asyncio.run
|
|
688
|
+
try:
|
|
689
|
+
return asyncio.run(self.async_get_count())
|
|
690
|
+
except Exception as e:
|
|
691
|
+
log_debug(f"Failed to get async count: {e}")
|
|
692
|
+
except Exception as e:
|
|
693
|
+
log_debug(f"Error in async count logic: {e}")
|
|
694
|
+
|
|
695
|
+
if self.exists() and self.table:
|
|
696
|
+
return self.table.count_rows()
|
|
697
|
+
return 0
|
|
698
|
+
|
|
699
|
+
def optimize(self) -> None:
|
|
700
|
+
pass
|
|
701
|
+
|
|
702
|
+
def delete(self) -> bool:
|
|
703
|
+
return False
|
|
704
|
+
|
|
705
|
+
def name_exists(self, name: str) -> bool:
|
|
706
|
+
"""Check if a document with the given name exists in the database"""
|
|
707
|
+
if self.table is None:
|
|
708
|
+
return False
|
|
709
|
+
|
|
710
|
+
try:
|
|
711
|
+
result = self.table.search().select(["payload"]).to_pandas()
|
|
712
|
+
# Convert the JSON strings in payload column to dictionaries
|
|
713
|
+
payloads = result["payload"].apply(json.loads)
|
|
714
|
+
|
|
715
|
+
# Check if the name exists in any of the payloads
|
|
716
|
+
return any(payload.get("name") == name for payload in payloads)
|
|
717
|
+
except Exception as e:
|
|
718
|
+
logger.error(f"Error checking name existence: {e}")
|
|
719
|
+
return False
|
|
720
|
+
|
|
721
|
+
async def async_name_exists(self, name: str) -> bool:
|
|
722
|
+
raise NotImplementedError(f"Async not supported on {self.__class__.__name__}.")
|
|
723
|
+
|
|
724
|
+
def id_exists(self, id: str) -> bool:
|
|
725
|
+
"""Check if a document with the given ID exists in the database"""
|
|
726
|
+
if self.table is None:
|
|
727
|
+
logger.error("Table not initialized")
|
|
728
|
+
return False
|
|
729
|
+
|
|
730
|
+
try:
|
|
731
|
+
# Search for the document with the specific ID
|
|
732
|
+
result = self.table.search().where(f"{self._id} = '{id}'").to_pandas()
|
|
733
|
+
return len(result) > 0
|
|
734
|
+
except Exception as e:
|
|
735
|
+
logger.error(f"Error checking id existence: {e}")
|
|
736
|
+
return False
|
|
737
|
+
|
|
738
|
+
def delete_by_id(self, id: str) -> bool:
|
|
739
|
+
"""Delete content by ID."""
|
|
740
|
+
if self.table is None:
|
|
741
|
+
logger.error("Table not initialized")
|
|
742
|
+
return False
|
|
743
|
+
|
|
744
|
+
try:
|
|
745
|
+
# Delete rows where the id matches
|
|
746
|
+
self.table.delete(f"{self._id} = '{id}'")
|
|
747
|
+
log_info(f"Deleted records with id '{id}' from table '{self.table_name}'.")
|
|
748
|
+
return True
|
|
749
|
+
except Exception as e:
|
|
750
|
+
logger.error(f"Error deleting rows by id '{id}': {e}")
|
|
751
|
+
return False
|
|
752
|
+
|
|
753
|
+
def delete_by_name(self, name: str) -> bool:
|
|
754
|
+
"""Delete content by name."""
|
|
755
|
+
if self.table is None:
|
|
756
|
+
logger.error("Table not initialized")
|
|
757
|
+
return False
|
|
758
|
+
|
|
759
|
+
try:
|
|
760
|
+
total_count = self.table.count_rows()
|
|
761
|
+
result = self.table.search().select(["id", "payload"]).limit(total_count).to_pandas()
|
|
762
|
+
|
|
763
|
+
# Find matching IDs
|
|
764
|
+
ids_to_delete = []
|
|
765
|
+
for _, row in result.iterrows():
|
|
766
|
+
payload = json.loads(row["payload"])
|
|
767
|
+
if payload.get("name") == name:
|
|
768
|
+
ids_to_delete.append(row["id"])
|
|
769
|
+
|
|
770
|
+
# Delete matching records
|
|
771
|
+
if ids_to_delete:
|
|
772
|
+
for doc_id in ids_to_delete:
|
|
773
|
+
self.table.delete(f"{self._id} = '{doc_id}'")
|
|
774
|
+
log_info(f"Deleted {len(ids_to_delete)} records with name '{name}' from table '{self.table_name}'.")
|
|
775
|
+
return True
|
|
776
|
+
else:
|
|
777
|
+
log_info(f"No records found with name '{name}' to delete.")
|
|
778
|
+
return False
|
|
779
|
+
|
|
780
|
+
except Exception as e:
|
|
781
|
+
logger.error(f"Error deleting rows by name '{name}': {e}")
|
|
782
|
+
return False
|
|
783
|
+
|
|
784
|
+
def delete_by_metadata(self, metadata: Dict[str, Any]) -> bool:
|
|
785
|
+
"""Delete content by metadata."""
|
|
786
|
+
if self.table is None:
|
|
787
|
+
logger.error("Table not initialized")
|
|
788
|
+
return False
|
|
789
|
+
|
|
790
|
+
try:
|
|
791
|
+
total_count = self.table.count_rows()
|
|
792
|
+
result = self.table.search().select(["id", "payload"]).limit(total_count).to_pandas()
|
|
793
|
+
|
|
794
|
+
# Find matching IDs
|
|
795
|
+
ids_to_delete = []
|
|
796
|
+
for _, row in result.iterrows():
|
|
797
|
+
payload = json.loads(row["payload"])
|
|
798
|
+
doc_metadata = payload.get("meta_data", {})
|
|
799
|
+
|
|
800
|
+
# Check if all metadata key-value pairs match
|
|
801
|
+
match = True
|
|
802
|
+
for key, value in metadata.items():
|
|
803
|
+
if key not in doc_metadata or doc_metadata[key] != value:
|
|
804
|
+
match = False
|
|
805
|
+
break
|
|
806
|
+
|
|
807
|
+
if match:
|
|
808
|
+
ids_to_delete.append(row["id"])
|
|
809
|
+
|
|
810
|
+
# Delete matching records
|
|
811
|
+
if ids_to_delete:
|
|
812
|
+
for doc_id in ids_to_delete:
|
|
813
|
+
self.table.delete(f"{self._id} = '{doc_id}'")
|
|
814
|
+
log_info(
|
|
815
|
+
f"Deleted {len(ids_to_delete)} records with metadata '{metadata}' from table '{self.table_name}'."
|
|
816
|
+
)
|
|
817
|
+
return True
|
|
818
|
+
else:
|
|
819
|
+
log_info(f"No records found with metadata '{metadata}' to delete.")
|
|
820
|
+
return False
|
|
821
|
+
|
|
822
|
+
except Exception as e:
|
|
823
|
+
logger.error(f"Error deleting rows by metadata '{metadata}': {e}")
|
|
824
|
+
return False
|
|
825
|
+
|
|
826
|
+
def delete_by_content_id(self, content_id: str) -> bool:
|
|
827
|
+
"""Delete content by content ID."""
|
|
828
|
+
if self.table is None:
|
|
829
|
+
logger.error("Table not initialized")
|
|
830
|
+
return False
|
|
831
|
+
|
|
832
|
+
try:
|
|
833
|
+
total_count = self.table.count_rows()
|
|
834
|
+
result = self.table.search().select(["id", "payload"]).limit(total_count).to_pandas()
|
|
835
|
+
|
|
836
|
+
# Find matching IDs
|
|
837
|
+
ids_to_delete = []
|
|
838
|
+
for _, row in result.iterrows():
|
|
839
|
+
payload = json.loads(row["payload"])
|
|
840
|
+
if payload.get("content_id") == content_id:
|
|
841
|
+
ids_to_delete.append(row["id"])
|
|
842
|
+
|
|
843
|
+
# Delete matching records
|
|
844
|
+
if ids_to_delete:
|
|
845
|
+
for doc_id in ids_to_delete:
|
|
846
|
+
self.table.delete(f"{self._id} = '{doc_id}'")
|
|
847
|
+
log_info(
|
|
848
|
+
f"Deleted {len(ids_to_delete)} records with content_id '{content_id}' from table '{self.table_name}'."
|
|
849
|
+
)
|
|
850
|
+
return True
|
|
851
|
+
else:
|
|
852
|
+
log_info(f"No records found with content_id '{content_id}' to delete.")
|
|
853
|
+
return False
|
|
854
|
+
|
|
855
|
+
except Exception as e:
|
|
856
|
+
logger.error(f"Error deleting rows by content_id '{content_id}': {e}")
|
|
857
|
+
return False
|
|
858
|
+
|
|
859
|
+
def _delete_by_content_hash(self, content_hash: str) -> bool:
|
|
860
|
+
"""Delete content by content hash."""
|
|
861
|
+
if self.table is None:
|
|
862
|
+
logger.error("Table not initialized")
|
|
863
|
+
return False
|
|
864
|
+
|
|
865
|
+
try:
|
|
866
|
+
total_count = self.table.count_rows()
|
|
867
|
+
result = self.table.search().select(["id", "payload"]).limit(total_count).to_pandas()
|
|
868
|
+
|
|
869
|
+
# Find matching IDs
|
|
870
|
+
ids_to_delete = []
|
|
871
|
+
for _, row in result.iterrows():
|
|
872
|
+
payload = json.loads(row["payload"])
|
|
873
|
+
if payload.get("content_hash") == content_hash:
|
|
874
|
+
ids_to_delete.append(row["id"])
|
|
875
|
+
|
|
876
|
+
# Delete matching records
|
|
877
|
+
if ids_to_delete:
|
|
878
|
+
for doc_id in ids_to_delete:
|
|
879
|
+
self.table.delete(f"{self._id} = '{doc_id}'")
|
|
880
|
+
log_info(
|
|
881
|
+
f"Deleted {len(ids_to_delete)} records with content_hash '{content_hash}' from table '{self.table_name}'."
|
|
882
|
+
)
|
|
883
|
+
return True
|
|
884
|
+
else:
|
|
885
|
+
log_info(f"No records found with content_hash '{content_hash}' to delete.")
|
|
886
|
+
return False
|
|
887
|
+
|
|
888
|
+
except Exception as e:
|
|
889
|
+
logger.error(f"Error deleting rows by content_hash '{content_hash}': {e}")
|
|
890
|
+
return False
|
|
891
|
+
|
|
892
|
+
def content_hash_exists(self, content_hash: str) -> bool:
|
|
893
|
+
"""Check if documents with the given content hash exist."""
|
|
894
|
+
if self.table is None:
|
|
895
|
+
logger.error("Table not initialized")
|
|
896
|
+
return False
|
|
897
|
+
|
|
898
|
+
try:
|
|
899
|
+
total_count = self.table.count_rows()
|
|
900
|
+
result = self.table.search().select(["id", "payload"]).limit(total_count).to_pandas()
|
|
901
|
+
|
|
902
|
+
# Check if any records match the content_hash
|
|
903
|
+
for _, row in result.iterrows():
|
|
904
|
+
payload = json.loads(row["payload"])
|
|
905
|
+
if payload.get("content_hash") == content_hash:
|
|
906
|
+
return True
|
|
907
|
+
|
|
908
|
+
return False
|
|
909
|
+
|
|
910
|
+
except Exception as e:
|
|
911
|
+
logger.error(f"Error checking content_hash existence '{content_hash}': {e}")
|
|
912
|
+
return False
|
|
913
|
+
|
|
914
|
+
def update_metadata(self, content_id: str, metadata: Dict[str, Any]) -> None:
|
|
915
|
+
"""
|
|
916
|
+
Update the metadata for documents with the given content_id.
|
|
917
|
+
|
|
918
|
+
Args:
|
|
919
|
+
content_id (str): The content ID to update
|
|
920
|
+
metadata (Dict[str, Any]): The metadata to update
|
|
921
|
+
"""
|
|
922
|
+
import json
|
|
923
|
+
|
|
924
|
+
try:
|
|
925
|
+
if self.table is None:
|
|
926
|
+
logger.error("Table not initialized")
|
|
927
|
+
return
|
|
928
|
+
|
|
929
|
+
# Get all documents and filter in Python (LanceDB doesn't support JSON operators)
|
|
930
|
+
total_count = self.table.count_rows()
|
|
931
|
+
results = self.table.search().select(["id", "payload"]).limit(total_count).to_pandas()
|
|
932
|
+
|
|
933
|
+
if results.empty:
|
|
934
|
+
logger.debug("No documents found")
|
|
935
|
+
return
|
|
936
|
+
|
|
937
|
+
# Find matching documents with the given content_id
|
|
938
|
+
matching_rows = []
|
|
939
|
+
for _, row in results.iterrows():
|
|
940
|
+
payload = json.loads(row["payload"])
|
|
941
|
+
if payload.get("content_id") == content_id:
|
|
942
|
+
matching_rows.append(row)
|
|
943
|
+
|
|
944
|
+
if not matching_rows:
|
|
945
|
+
logger.debug(f"No documents found with content_id: {content_id}")
|
|
946
|
+
return
|
|
947
|
+
|
|
948
|
+
# Update each matching document
|
|
949
|
+
updated_count = 0
|
|
950
|
+
for row in matching_rows:
|
|
951
|
+
row_id = row["id"]
|
|
952
|
+
current_payload = json.loads(row["payload"])
|
|
953
|
+
|
|
954
|
+
# Merge existing metadata with new metadata
|
|
955
|
+
if "meta_data" in current_payload:
|
|
956
|
+
current_payload["meta_data"].update(metadata)
|
|
957
|
+
else:
|
|
958
|
+
current_payload["meta_data"] = metadata
|
|
959
|
+
|
|
960
|
+
if "filters" in current_payload:
|
|
961
|
+
if isinstance(current_payload["filters"], dict):
|
|
962
|
+
current_payload["filters"].update(metadata)
|
|
963
|
+
else:
|
|
964
|
+
current_payload["filters"] = metadata
|
|
965
|
+
else:
|
|
966
|
+
current_payload["filters"] = metadata
|
|
967
|
+
|
|
968
|
+
# Update the document
|
|
969
|
+
update_data = {"id": row_id, "payload": json.dumps(current_payload)}
|
|
970
|
+
|
|
971
|
+
# LanceDB doesn't have a direct update, so we need to delete and re-insert
|
|
972
|
+
# First, get all the existing data
|
|
973
|
+
vector_data = row["vector"] if "vector" in row else None
|
|
974
|
+
text_data = row["text"] if "text" in row else None
|
|
975
|
+
|
|
976
|
+
# Create complete update record
|
|
977
|
+
if vector_data is not None:
|
|
978
|
+
update_data["vector"] = vector_data
|
|
979
|
+
if text_data is not None:
|
|
980
|
+
update_data["text"] = text_data
|
|
981
|
+
|
|
982
|
+
# Delete old record and insert updated one
|
|
983
|
+
self.table.delete(f"id = '{row_id}'")
|
|
984
|
+
self.table.add([update_data])
|
|
985
|
+
updated_count += 1
|
|
986
|
+
|
|
987
|
+
logger.debug(f"Updated metadata for {updated_count} documents with content_id: {content_id}")
|
|
988
|
+
|
|
989
|
+
except Exception as e:
|
|
990
|
+
logger.error(f"Error updating metadata for content_id '{content_id}': {e}")
|
|
991
|
+
raise
|
|
992
|
+
|
|
993
|
+
def get_supported_search_types(self) -> List[str]:
|
|
994
|
+
"""Get the supported search types for this vector database."""
|
|
995
|
+
return [SearchType.vector, SearchType.keyword, SearchType.hybrid]
|