agno 0.1.2__py3-none-any.whl → 2.3.13__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- agno/__init__.py +8 -0
- agno/agent/__init__.py +44 -5
- agno/agent/agent.py +10531 -2975
- agno/api/agent.py +14 -53
- agno/api/api.py +7 -46
- agno/api/evals.py +22 -0
- agno/api/os.py +17 -0
- agno/api/routes.py +6 -25
- agno/api/schemas/__init__.py +9 -0
- agno/api/schemas/agent.py +6 -9
- agno/api/schemas/evals.py +16 -0
- agno/api/schemas/os.py +14 -0
- agno/api/schemas/team.py +10 -10
- agno/api/schemas/utils.py +21 -0
- agno/api/schemas/workflows.py +16 -0
- agno/api/settings.py +53 -0
- agno/api/team.py +22 -26
- agno/api/workflow.py +28 -0
- agno/cloud/aws/base.py +214 -0
- agno/cloud/aws/s3/__init__.py +2 -0
- agno/cloud/aws/s3/api_client.py +43 -0
- agno/cloud/aws/s3/bucket.py +195 -0
- agno/cloud/aws/s3/object.py +57 -0
- agno/compression/__init__.py +3 -0
- agno/compression/manager.py +247 -0
- agno/culture/__init__.py +3 -0
- agno/culture/manager.py +956 -0
- agno/db/__init__.py +24 -0
- agno/db/async_postgres/__init__.py +3 -0
- agno/db/base.py +946 -0
- agno/db/dynamo/__init__.py +3 -0
- agno/db/dynamo/dynamo.py +2781 -0
- agno/db/dynamo/schemas.py +442 -0
- agno/db/dynamo/utils.py +743 -0
- agno/db/firestore/__init__.py +3 -0
- agno/db/firestore/firestore.py +2379 -0
- agno/db/firestore/schemas.py +181 -0
- agno/db/firestore/utils.py +376 -0
- agno/db/gcs_json/__init__.py +3 -0
- agno/db/gcs_json/gcs_json_db.py +1791 -0
- agno/db/gcs_json/utils.py +228 -0
- agno/db/in_memory/__init__.py +3 -0
- agno/db/in_memory/in_memory_db.py +1312 -0
- agno/db/in_memory/utils.py +230 -0
- agno/db/json/__init__.py +3 -0
- agno/db/json/json_db.py +1777 -0
- agno/db/json/utils.py +230 -0
- agno/db/migrations/manager.py +199 -0
- agno/db/migrations/v1_to_v2.py +635 -0
- agno/db/migrations/versions/v2_3_0.py +938 -0
- agno/db/mongo/__init__.py +17 -0
- agno/db/mongo/async_mongo.py +2760 -0
- agno/db/mongo/mongo.py +2597 -0
- agno/db/mongo/schemas.py +119 -0
- agno/db/mongo/utils.py +276 -0
- agno/db/mysql/__init__.py +4 -0
- agno/db/mysql/async_mysql.py +2912 -0
- agno/db/mysql/mysql.py +2923 -0
- agno/db/mysql/schemas.py +186 -0
- agno/db/mysql/utils.py +488 -0
- agno/db/postgres/__init__.py +4 -0
- agno/db/postgres/async_postgres.py +2579 -0
- agno/db/postgres/postgres.py +2870 -0
- agno/db/postgres/schemas.py +187 -0
- agno/db/postgres/utils.py +442 -0
- agno/db/redis/__init__.py +3 -0
- agno/db/redis/redis.py +2141 -0
- agno/db/redis/schemas.py +159 -0
- agno/db/redis/utils.py +346 -0
- agno/db/schemas/__init__.py +4 -0
- agno/db/schemas/culture.py +120 -0
- agno/db/schemas/evals.py +34 -0
- agno/db/schemas/knowledge.py +40 -0
- agno/db/schemas/memory.py +61 -0
- agno/db/singlestore/__init__.py +3 -0
- agno/db/singlestore/schemas.py +179 -0
- agno/db/singlestore/singlestore.py +2877 -0
- agno/db/singlestore/utils.py +384 -0
- agno/db/sqlite/__init__.py +4 -0
- agno/db/sqlite/async_sqlite.py +2911 -0
- agno/db/sqlite/schemas.py +181 -0
- agno/db/sqlite/sqlite.py +2908 -0
- agno/db/sqlite/utils.py +429 -0
- agno/db/surrealdb/__init__.py +3 -0
- agno/db/surrealdb/metrics.py +292 -0
- agno/db/surrealdb/models.py +334 -0
- agno/db/surrealdb/queries.py +71 -0
- agno/db/surrealdb/surrealdb.py +1908 -0
- agno/db/surrealdb/utils.py +147 -0
- agno/db/utils.py +118 -0
- agno/eval/__init__.py +24 -0
- agno/eval/accuracy.py +666 -276
- agno/eval/agent_as_judge.py +861 -0
- agno/eval/base.py +29 -0
- agno/eval/performance.py +779 -0
- agno/eval/reliability.py +241 -62
- agno/eval/utils.py +120 -0
- agno/exceptions.py +143 -1
- agno/filters.py +354 -0
- agno/guardrails/__init__.py +6 -0
- agno/guardrails/base.py +19 -0
- agno/guardrails/openai.py +144 -0
- agno/guardrails/pii.py +94 -0
- agno/guardrails/prompt_injection.py +52 -0
- agno/hooks/__init__.py +3 -0
- agno/hooks/decorator.py +164 -0
- agno/integrations/discord/__init__.py +3 -0
- agno/integrations/discord/client.py +203 -0
- agno/knowledge/__init__.py +5 -1
- agno/{document → knowledge}/chunking/agentic.py +22 -14
- agno/{document → knowledge}/chunking/document.py +2 -2
- agno/{document → knowledge}/chunking/fixed.py +7 -6
- agno/knowledge/chunking/markdown.py +151 -0
- agno/{document → knowledge}/chunking/recursive.py +15 -3
- agno/knowledge/chunking/row.py +39 -0
- agno/knowledge/chunking/semantic.py +91 -0
- agno/knowledge/chunking/strategy.py +165 -0
- agno/knowledge/content.py +74 -0
- agno/knowledge/document/__init__.py +5 -0
- agno/{document → knowledge/document}/base.py +12 -2
- agno/knowledge/embedder/__init__.py +5 -0
- agno/knowledge/embedder/aws_bedrock.py +343 -0
- agno/knowledge/embedder/azure_openai.py +210 -0
- agno/{embedder → knowledge/embedder}/base.py +8 -0
- agno/knowledge/embedder/cohere.py +323 -0
- agno/knowledge/embedder/fastembed.py +62 -0
- agno/{embedder → knowledge/embedder}/fireworks.py +1 -1
- agno/knowledge/embedder/google.py +258 -0
- agno/knowledge/embedder/huggingface.py +94 -0
- agno/knowledge/embedder/jina.py +182 -0
- agno/knowledge/embedder/langdb.py +22 -0
- agno/knowledge/embedder/mistral.py +206 -0
- agno/knowledge/embedder/nebius.py +13 -0
- agno/knowledge/embedder/ollama.py +154 -0
- agno/knowledge/embedder/openai.py +195 -0
- agno/knowledge/embedder/sentence_transformer.py +63 -0
- agno/{embedder → knowledge/embedder}/together.py +1 -1
- agno/knowledge/embedder/vllm.py +262 -0
- agno/knowledge/embedder/voyageai.py +165 -0
- agno/knowledge/knowledge.py +3006 -0
- agno/knowledge/reader/__init__.py +7 -0
- agno/knowledge/reader/arxiv_reader.py +81 -0
- agno/knowledge/reader/base.py +95 -0
- agno/knowledge/reader/csv_reader.py +164 -0
- agno/knowledge/reader/docx_reader.py +82 -0
- agno/knowledge/reader/field_labeled_csv_reader.py +290 -0
- agno/knowledge/reader/firecrawl_reader.py +201 -0
- agno/knowledge/reader/json_reader.py +88 -0
- agno/knowledge/reader/markdown_reader.py +137 -0
- agno/knowledge/reader/pdf_reader.py +431 -0
- agno/knowledge/reader/pptx_reader.py +101 -0
- agno/knowledge/reader/reader_factory.py +313 -0
- agno/knowledge/reader/s3_reader.py +89 -0
- agno/knowledge/reader/tavily_reader.py +193 -0
- agno/knowledge/reader/text_reader.py +127 -0
- agno/knowledge/reader/web_search_reader.py +325 -0
- agno/knowledge/reader/website_reader.py +455 -0
- agno/knowledge/reader/wikipedia_reader.py +91 -0
- agno/knowledge/reader/youtube_reader.py +78 -0
- agno/knowledge/remote_content/remote_content.py +88 -0
- agno/knowledge/reranker/__init__.py +3 -0
- agno/{reranker → knowledge/reranker}/base.py +1 -1
- agno/{reranker → knowledge/reranker}/cohere.py +2 -2
- agno/knowledge/reranker/infinity.py +195 -0
- agno/knowledge/reranker/sentence_transformer.py +54 -0
- agno/knowledge/types.py +39 -0
- agno/knowledge/utils.py +234 -0
- agno/media.py +439 -95
- agno/memory/__init__.py +16 -3
- agno/memory/manager.py +1474 -123
- agno/memory/strategies/__init__.py +15 -0
- agno/memory/strategies/base.py +66 -0
- agno/memory/strategies/summarize.py +196 -0
- agno/memory/strategies/types.py +37 -0
- agno/models/aimlapi/__init__.py +5 -0
- agno/models/aimlapi/aimlapi.py +62 -0
- agno/models/anthropic/__init__.py +4 -0
- agno/models/anthropic/claude.py +960 -496
- agno/models/aws/__init__.py +15 -0
- agno/models/aws/bedrock.py +686 -451
- agno/models/aws/claude.py +190 -183
- agno/models/azure/__init__.py +18 -1
- agno/models/azure/ai_foundry.py +489 -0
- agno/models/azure/openai_chat.py +89 -40
- agno/models/base.py +2477 -550
- agno/models/cerebras/__init__.py +12 -0
- agno/models/cerebras/cerebras.py +565 -0
- agno/models/cerebras/cerebras_openai.py +131 -0
- agno/models/cohere/__init__.py +4 -0
- agno/models/cohere/chat.py +306 -492
- agno/models/cometapi/__init__.py +5 -0
- agno/models/cometapi/cometapi.py +74 -0
- agno/models/dashscope/__init__.py +5 -0
- agno/models/dashscope/dashscope.py +90 -0
- agno/models/deepinfra/__init__.py +5 -0
- agno/models/deepinfra/deepinfra.py +45 -0
- agno/models/deepseek/__init__.py +4 -0
- agno/models/deepseek/deepseek.py +110 -9
- agno/models/fireworks/__init__.py +4 -0
- agno/models/fireworks/fireworks.py +19 -22
- agno/models/google/__init__.py +3 -7
- agno/models/google/gemini.py +1717 -662
- agno/models/google/utils.py +22 -0
- agno/models/groq/__init__.py +4 -0
- agno/models/groq/groq.py +391 -666
- agno/models/huggingface/__init__.py +4 -0
- agno/models/huggingface/huggingface.py +266 -538
- agno/models/ibm/__init__.py +5 -0
- agno/models/ibm/watsonx.py +432 -0
- agno/models/internlm/__init__.py +3 -0
- agno/models/internlm/internlm.py +20 -3
- agno/models/langdb/__init__.py +1 -0
- agno/models/langdb/langdb.py +60 -0
- agno/models/litellm/__init__.py +14 -0
- agno/models/litellm/chat.py +503 -0
- agno/models/litellm/litellm_openai.py +42 -0
- agno/models/llama_cpp/__init__.py +5 -0
- agno/models/llama_cpp/llama_cpp.py +22 -0
- agno/models/lmstudio/__init__.py +5 -0
- agno/models/lmstudio/lmstudio.py +25 -0
- agno/models/message.py +361 -39
- agno/models/meta/__init__.py +12 -0
- agno/models/meta/llama.py +502 -0
- agno/models/meta/llama_openai.py +79 -0
- agno/models/metrics.py +120 -0
- agno/models/mistral/__init__.py +4 -0
- agno/models/mistral/mistral.py +293 -393
- agno/models/nebius/__init__.py +3 -0
- agno/models/nebius/nebius.py +53 -0
- agno/models/nexus/__init__.py +3 -0
- agno/models/nexus/nexus.py +22 -0
- agno/models/nvidia/__init__.py +4 -0
- agno/models/nvidia/nvidia.py +22 -3
- agno/models/ollama/__init__.py +4 -2
- agno/models/ollama/chat.py +257 -492
- agno/models/openai/__init__.py +7 -0
- agno/models/openai/chat.py +725 -770
- agno/models/openai/like.py +16 -2
- agno/models/openai/responses.py +1121 -0
- agno/models/openrouter/__init__.py +4 -0
- agno/models/openrouter/openrouter.py +62 -5
- agno/models/perplexity/__init__.py +5 -0
- agno/models/perplexity/perplexity.py +203 -0
- agno/models/portkey/__init__.py +3 -0
- agno/models/portkey/portkey.py +82 -0
- agno/models/requesty/__init__.py +5 -0
- agno/models/requesty/requesty.py +69 -0
- agno/models/response.py +177 -7
- agno/models/sambanova/__init__.py +4 -0
- agno/models/sambanova/sambanova.py +23 -4
- agno/models/siliconflow/__init__.py +5 -0
- agno/models/siliconflow/siliconflow.py +42 -0
- agno/models/together/__init__.py +4 -0
- agno/models/together/together.py +21 -164
- agno/models/utils.py +266 -0
- agno/models/vercel/__init__.py +3 -0
- agno/models/vercel/v0.py +43 -0
- agno/models/vertexai/__init__.py +0 -1
- agno/models/vertexai/claude.py +190 -0
- agno/models/vllm/__init__.py +3 -0
- agno/models/vllm/vllm.py +83 -0
- agno/models/xai/__init__.py +2 -0
- agno/models/xai/xai.py +111 -7
- agno/os/__init__.py +3 -0
- agno/os/app.py +1027 -0
- agno/os/auth.py +244 -0
- agno/os/config.py +126 -0
- agno/os/interfaces/__init__.py +1 -0
- agno/os/interfaces/a2a/__init__.py +3 -0
- agno/os/interfaces/a2a/a2a.py +42 -0
- agno/os/interfaces/a2a/router.py +249 -0
- agno/os/interfaces/a2a/utils.py +924 -0
- agno/os/interfaces/agui/__init__.py +3 -0
- agno/os/interfaces/agui/agui.py +47 -0
- agno/os/interfaces/agui/router.py +147 -0
- agno/os/interfaces/agui/utils.py +574 -0
- agno/os/interfaces/base.py +25 -0
- agno/os/interfaces/slack/__init__.py +3 -0
- agno/os/interfaces/slack/router.py +148 -0
- agno/os/interfaces/slack/security.py +30 -0
- agno/os/interfaces/slack/slack.py +47 -0
- agno/os/interfaces/whatsapp/__init__.py +3 -0
- agno/os/interfaces/whatsapp/router.py +210 -0
- agno/os/interfaces/whatsapp/security.py +55 -0
- agno/os/interfaces/whatsapp/whatsapp.py +36 -0
- agno/os/mcp.py +293 -0
- agno/os/middleware/__init__.py +9 -0
- agno/os/middleware/jwt.py +797 -0
- agno/os/router.py +258 -0
- agno/os/routers/__init__.py +3 -0
- agno/os/routers/agents/__init__.py +3 -0
- agno/os/routers/agents/router.py +599 -0
- agno/os/routers/agents/schema.py +261 -0
- agno/os/routers/evals/__init__.py +3 -0
- agno/os/routers/evals/evals.py +450 -0
- agno/os/routers/evals/schemas.py +174 -0
- agno/os/routers/evals/utils.py +231 -0
- agno/os/routers/health.py +31 -0
- agno/os/routers/home.py +52 -0
- agno/os/routers/knowledge/__init__.py +3 -0
- agno/os/routers/knowledge/knowledge.py +1008 -0
- agno/os/routers/knowledge/schemas.py +178 -0
- agno/os/routers/memory/__init__.py +3 -0
- agno/os/routers/memory/memory.py +661 -0
- agno/os/routers/memory/schemas.py +88 -0
- agno/os/routers/metrics/__init__.py +3 -0
- agno/os/routers/metrics/metrics.py +190 -0
- agno/os/routers/metrics/schemas.py +47 -0
- agno/os/routers/session/__init__.py +3 -0
- agno/os/routers/session/session.py +997 -0
- agno/os/routers/teams/__init__.py +3 -0
- agno/os/routers/teams/router.py +512 -0
- agno/os/routers/teams/schema.py +257 -0
- agno/os/routers/traces/__init__.py +3 -0
- agno/os/routers/traces/schemas.py +414 -0
- agno/os/routers/traces/traces.py +499 -0
- agno/os/routers/workflows/__init__.py +3 -0
- agno/os/routers/workflows/router.py +624 -0
- agno/os/routers/workflows/schema.py +75 -0
- agno/os/schema.py +534 -0
- agno/os/scopes.py +469 -0
- agno/{playground → os}/settings.py +7 -15
- agno/os/utils.py +973 -0
- agno/reasoning/anthropic.py +80 -0
- agno/reasoning/azure_ai_foundry.py +67 -0
- agno/reasoning/deepseek.py +63 -0
- agno/reasoning/default.py +97 -0
- agno/reasoning/gemini.py +73 -0
- agno/reasoning/groq.py +71 -0
- agno/reasoning/helpers.py +24 -1
- agno/reasoning/ollama.py +67 -0
- agno/reasoning/openai.py +86 -0
- agno/reasoning/step.py +2 -1
- agno/reasoning/vertexai.py +76 -0
- agno/run/__init__.py +6 -0
- agno/run/agent.py +822 -0
- agno/run/base.py +247 -0
- agno/run/cancel.py +81 -0
- agno/run/requirement.py +181 -0
- agno/run/team.py +767 -0
- agno/run/workflow.py +708 -0
- agno/session/__init__.py +10 -0
- agno/session/agent.py +260 -0
- agno/session/summary.py +265 -0
- agno/session/team.py +342 -0
- agno/session/workflow.py +501 -0
- agno/table.py +10 -0
- agno/team/__init__.py +37 -0
- agno/team/team.py +9536 -0
- agno/tools/__init__.py +7 -0
- agno/tools/agentql.py +120 -0
- agno/tools/airflow.py +22 -12
- agno/tools/api.py +122 -0
- agno/tools/apify.py +276 -83
- agno/tools/{arxiv_toolkit.py → arxiv.py} +20 -12
- agno/tools/aws_lambda.py +28 -7
- agno/tools/aws_ses.py +66 -0
- agno/tools/baidusearch.py +11 -4
- agno/tools/bitbucket.py +292 -0
- agno/tools/brandfetch.py +213 -0
- agno/tools/bravesearch.py +106 -0
- agno/tools/brightdata.py +367 -0
- agno/tools/browserbase.py +209 -0
- agno/tools/calcom.py +32 -23
- agno/tools/calculator.py +24 -37
- agno/tools/cartesia.py +187 -0
- agno/tools/{clickup_tool.py → clickup.py} +17 -28
- agno/tools/confluence.py +91 -26
- agno/tools/crawl4ai.py +139 -43
- agno/tools/csv_toolkit.py +28 -22
- agno/tools/dalle.py +36 -22
- agno/tools/daytona.py +475 -0
- agno/tools/decorator.py +169 -14
- agno/tools/desi_vocal.py +23 -11
- agno/tools/discord.py +32 -29
- agno/tools/docker.py +716 -0
- agno/tools/duckdb.py +76 -81
- agno/tools/duckduckgo.py +43 -40
- agno/tools/e2b.py +703 -0
- agno/tools/eleven_labs.py +65 -54
- agno/tools/email.py +13 -5
- agno/tools/evm.py +129 -0
- agno/tools/exa.py +324 -42
- agno/tools/fal.py +39 -35
- agno/tools/file.py +196 -30
- agno/tools/file_generation.py +356 -0
- agno/tools/financial_datasets.py +288 -0
- agno/tools/firecrawl.py +108 -33
- agno/tools/function.py +960 -122
- agno/tools/giphy.py +34 -12
- agno/tools/github.py +1294 -97
- agno/tools/gmail.py +922 -0
- agno/tools/google_bigquery.py +117 -0
- agno/tools/google_drive.py +271 -0
- agno/tools/google_maps.py +253 -0
- agno/tools/googlecalendar.py +607 -107
- agno/tools/googlesheets.py +377 -0
- agno/tools/hackernews.py +20 -12
- agno/tools/jina.py +24 -14
- agno/tools/jira.py +48 -19
- agno/tools/knowledge.py +218 -0
- agno/tools/linear.py +82 -43
- agno/tools/linkup.py +58 -0
- agno/tools/local_file_system.py +15 -7
- agno/tools/lumalab.py +41 -26
- agno/tools/mcp/__init__.py +10 -0
- agno/tools/mcp/mcp.py +331 -0
- agno/tools/mcp/multi_mcp.py +347 -0
- agno/tools/mcp/params.py +24 -0
- agno/tools/mcp_toolbox.py +284 -0
- agno/tools/mem0.py +193 -0
- agno/tools/memory.py +419 -0
- agno/tools/mlx_transcribe.py +11 -9
- agno/tools/models/azure_openai.py +190 -0
- agno/tools/models/gemini.py +203 -0
- agno/tools/models/groq.py +158 -0
- agno/tools/models/morph.py +186 -0
- agno/tools/models/nebius.py +124 -0
- agno/tools/models_labs.py +163 -82
- agno/tools/moviepy_video.py +18 -13
- agno/tools/nano_banana.py +151 -0
- agno/tools/neo4j.py +134 -0
- agno/tools/newspaper.py +15 -4
- agno/tools/newspaper4k.py +19 -6
- agno/tools/notion.py +204 -0
- agno/tools/openai.py +181 -17
- agno/tools/openbb.py +27 -20
- agno/tools/opencv.py +321 -0
- agno/tools/openweather.py +233 -0
- agno/tools/oxylabs.py +385 -0
- agno/tools/pandas.py +25 -15
- agno/tools/parallel.py +314 -0
- agno/tools/postgres.py +238 -185
- agno/tools/pubmed.py +125 -13
- agno/tools/python.py +48 -35
- agno/tools/reasoning.py +283 -0
- agno/tools/reddit.py +207 -29
- agno/tools/redshift.py +406 -0
- agno/tools/replicate.py +69 -26
- agno/tools/resend.py +11 -6
- agno/tools/scrapegraph.py +179 -19
- agno/tools/searxng.py +23 -31
- agno/tools/serpapi.py +15 -10
- agno/tools/serper.py +255 -0
- agno/tools/shell.py +23 -12
- agno/tools/shopify.py +1519 -0
- agno/tools/slack.py +56 -14
- agno/tools/sleep.py +8 -6
- agno/tools/spider.py +35 -11
- agno/tools/spotify.py +919 -0
- agno/tools/sql.py +34 -19
- agno/tools/tavily.py +158 -8
- agno/tools/telegram.py +18 -8
- agno/tools/todoist.py +218 -0
- agno/tools/toolkit.py +134 -9
- agno/tools/trafilatura.py +388 -0
- agno/tools/trello.py +25 -28
- agno/tools/twilio.py +18 -9
- agno/tools/user_control_flow.py +78 -0
- agno/tools/valyu.py +228 -0
- agno/tools/visualization.py +467 -0
- agno/tools/webbrowser.py +28 -0
- agno/tools/webex.py +76 -0
- agno/tools/website.py +23 -19
- agno/tools/webtools.py +45 -0
- agno/tools/whatsapp.py +286 -0
- agno/tools/wikipedia.py +28 -19
- agno/tools/workflow.py +285 -0
- agno/tools/{twitter.py → x.py} +142 -46
- agno/tools/yfinance.py +41 -39
- agno/tools/youtube.py +34 -17
- agno/tools/zendesk.py +15 -5
- agno/tools/zep.py +454 -0
- agno/tools/zoom.py +86 -37
- agno/tracing/__init__.py +12 -0
- agno/tracing/exporter.py +157 -0
- agno/tracing/schemas.py +276 -0
- agno/tracing/setup.py +111 -0
- agno/utils/agent.py +938 -0
- agno/utils/audio.py +37 -1
- agno/utils/certs.py +27 -0
- agno/utils/code_execution.py +11 -0
- agno/utils/common.py +103 -20
- agno/utils/cryptography.py +22 -0
- agno/utils/dttm.py +33 -0
- agno/utils/events.py +700 -0
- agno/utils/functions.py +107 -37
- agno/utils/gemini.py +426 -0
- agno/utils/hooks.py +171 -0
- agno/utils/http.py +185 -0
- agno/utils/json_schema.py +159 -37
- agno/utils/knowledge.py +36 -0
- agno/utils/location.py +19 -0
- agno/utils/log.py +221 -8
- agno/utils/mcp.py +214 -0
- agno/utils/media.py +335 -14
- agno/utils/merge_dict.py +22 -1
- agno/utils/message.py +77 -2
- agno/utils/models/ai_foundry.py +50 -0
- agno/utils/models/claude.py +373 -0
- agno/utils/models/cohere.py +94 -0
- agno/utils/models/llama.py +85 -0
- agno/utils/models/mistral.py +100 -0
- agno/utils/models/openai_responses.py +140 -0
- agno/utils/models/schema_utils.py +153 -0
- agno/utils/models/watsonx.py +41 -0
- agno/utils/openai.py +257 -0
- agno/utils/pickle.py +1 -1
- agno/utils/pprint.py +124 -8
- agno/utils/print_response/agent.py +930 -0
- agno/utils/print_response/team.py +1914 -0
- agno/utils/print_response/workflow.py +1668 -0
- agno/utils/prompts.py +111 -0
- agno/utils/reasoning.py +108 -0
- agno/utils/response.py +163 -0
- agno/utils/serialize.py +32 -0
- agno/utils/shell.py +4 -4
- agno/utils/streamlit.py +487 -0
- agno/utils/string.py +204 -51
- agno/utils/team.py +139 -0
- agno/utils/timer.py +9 -2
- agno/utils/tokens.py +657 -0
- agno/utils/tools.py +19 -1
- agno/utils/whatsapp.py +305 -0
- agno/utils/yaml_io.py +3 -3
- agno/vectordb/__init__.py +2 -0
- agno/vectordb/base.py +87 -9
- agno/vectordb/cassandra/__init__.py +5 -1
- agno/vectordb/cassandra/cassandra.py +383 -27
- agno/vectordb/chroma/__init__.py +4 -0
- agno/vectordb/chroma/chromadb.py +748 -83
- agno/vectordb/clickhouse/__init__.py +7 -1
- agno/vectordb/clickhouse/clickhousedb.py +554 -53
- agno/vectordb/couchbase/__init__.py +3 -0
- agno/vectordb/couchbase/couchbase.py +1446 -0
- agno/vectordb/lancedb/__init__.py +5 -0
- agno/vectordb/lancedb/lance_db.py +730 -98
- agno/vectordb/langchaindb/__init__.py +5 -0
- agno/vectordb/langchaindb/langchaindb.py +163 -0
- agno/vectordb/lightrag/__init__.py +5 -0
- agno/vectordb/lightrag/lightrag.py +388 -0
- agno/vectordb/llamaindex/__init__.py +3 -0
- agno/vectordb/llamaindex/llamaindexdb.py +166 -0
- agno/vectordb/milvus/__init__.py +3 -0
- agno/vectordb/milvus/milvus.py +966 -78
- agno/vectordb/mongodb/__init__.py +9 -1
- agno/vectordb/mongodb/mongodb.py +1175 -172
- agno/vectordb/pgvector/__init__.py +8 -0
- agno/vectordb/pgvector/pgvector.py +599 -115
- agno/vectordb/pineconedb/__init__.py +5 -1
- agno/vectordb/pineconedb/pineconedb.py +406 -43
- agno/vectordb/qdrant/__init__.py +4 -0
- agno/vectordb/qdrant/qdrant.py +914 -61
- agno/vectordb/redis/__init__.py +9 -0
- agno/vectordb/redis/redisdb.py +682 -0
- agno/vectordb/singlestore/__init__.py +8 -1
- agno/vectordb/singlestore/singlestore.py +771 -0
- agno/vectordb/surrealdb/__init__.py +3 -0
- agno/vectordb/surrealdb/surrealdb.py +663 -0
- agno/vectordb/upstashdb/__init__.py +5 -0
- agno/vectordb/upstashdb/upstashdb.py +718 -0
- agno/vectordb/weaviate/__init__.py +8 -0
- agno/vectordb/weaviate/index.py +15 -0
- agno/vectordb/weaviate/weaviate.py +1009 -0
- agno/workflow/__init__.py +23 -1
- agno/workflow/agent.py +299 -0
- agno/workflow/condition.py +759 -0
- agno/workflow/loop.py +756 -0
- agno/workflow/parallel.py +853 -0
- agno/workflow/router.py +723 -0
- agno/workflow/step.py +1564 -0
- agno/workflow/steps.py +613 -0
- agno/workflow/types.py +556 -0
- agno/workflow/workflow.py +4327 -514
- agno-2.3.13.dist-info/METADATA +639 -0
- agno-2.3.13.dist-info/RECORD +613 -0
- {agno-0.1.2.dist-info → agno-2.3.13.dist-info}/WHEEL +1 -1
- agno-2.3.13.dist-info/licenses/LICENSE +201 -0
- agno/api/playground.py +0 -91
- agno/api/schemas/playground.py +0 -22
- agno/api/schemas/user.py +0 -22
- agno/api/schemas/workspace.py +0 -46
- agno/api/user.py +0 -160
- agno/api/workspace.py +0 -151
- agno/cli/auth_server.py +0 -118
- agno/cli/config.py +0 -275
- agno/cli/console.py +0 -88
- agno/cli/credentials.py +0 -23
- agno/cli/entrypoint.py +0 -571
- agno/cli/operator.py +0 -355
- agno/cli/settings.py +0 -85
- agno/cli/ws/ws_cli.py +0 -817
- agno/constants.py +0 -13
- agno/document/__init__.py +0 -1
- agno/document/chunking/semantic.py +0 -47
- agno/document/chunking/strategy.py +0 -31
- agno/document/reader/__init__.py +0 -1
- agno/document/reader/arxiv_reader.py +0 -41
- agno/document/reader/base.py +0 -22
- agno/document/reader/csv_reader.py +0 -84
- agno/document/reader/docx_reader.py +0 -46
- agno/document/reader/firecrawl_reader.py +0 -99
- agno/document/reader/json_reader.py +0 -43
- agno/document/reader/pdf_reader.py +0 -219
- agno/document/reader/s3/pdf_reader.py +0 -46
- agno/document/reader/s3/text_reader.py +0 -51
- agno/document/reader/text_reader.py +0 -41
- agno/document/reader/website_reader.py +0 -175
- agno/document/reader/youtube_reader.py +0 -50
- agno/embedder/__init__.py +0 -1
- agno/embedder/azure_openai.py +0 -86
- agno/embedder/cohere.py +0 -72
- agno/embedder/fastembed.py +0 -37
- agno/embedder/google.py +0 -73
- agno/embedder/huggingface.py +0 -54
- agno/embedder/mistral.py +0 -80
- agno/embedder/ollama.py +0 -57
- agno/embedder/openai.py +0 -74
- agno/embedder/sentence_transformer.py +0 -38
- agno/embedder/voyageai.py +0 -64
- agno/eval/perf.py +0 -201
- agno/file/__init__.py +0 -1
- agno/file/file.py +0 -16
- agno/file/local/csv.py +0 -32
- agno/file/local/txt.py +0 -19
- agno/infra/app.py +0 -240
- agno/infra/base.py +0 -144
- agno/infra/context.py +0 -20
- agno/infra/db_app.py +0 -52
- agno/infra/resource.py +0 -205
- agno/infra/resources.py +0 -55
- agno/knowledge/agent.py +0 -230
- agno/knowledge/arxiv.py +0 -22
- agno/knowledge/combined.py +0 -22
- agno/knowledge/csv.py +0 -28
- agno/knowledge/csv_url.py +0 -19
- agno/knowledge/document.py +0 -20
- agno/knowledge/docx.py +0 -30
- agno/knowledge/json.py +0 -28
- agno/knowledge/langchain.py +0 -71
- agno/knowledge/llamaindex.py +0 -66
- agno/knowledge/pdf.py +0 -28
- agno/knowledge/pdf_url.py +0 -26
- agno/knowledge/s3/base.py +0 -60
- agno/knowledge/s3/pdf.py +0 -21
- agno/knowledge/s3/text.py +0 -23
- agno/knowledge/text.py +0 -30
- agno/knowledge/website.py +0 -88
- agno/knowledge/wikipedia.py +0 -31
- agno/knowledge/youtube.py +0 -22
- agno/memory/agent.py +0 -392
- agno/memory/classifier.py +0 -104
- agno/memory/db/__init__.py +0 -1
- agno/memory/db/base.py +0 -42
- agno/memory/db/mongodb.py +0 -189
- agno/memory/db/postgres.py +0 -203
- agno/memory/db/sqlite.py +0 -193
- agno/memory/memory.py +0 -15
- agno/memory/row.py +0 -36
- agno/memory/summarizer.py +0 -192
- agno/memory/summary.py +0 -19
- agno/memory/workflow.py +0 -38
- agno/models/google/gemini_openai.py +0 -26
- agno/models/ollama/hermes.py +0 -221
- agno/models/ollama/tools.py +0 -362
- agno/models/vertexai/gemini.py +0 -595
- agno/playground/__init__.py +0 -3
- agno/playground/async_router.py +0 -421
- agno/playground/deploy.py +0 -249
- agno/playground/operator.py +0 -92
- agno/playground/playground.py +0 -91
- agno/playground/schemas.py +0 -76
- agno/playground/serve.py +0 -55
- agno/playground/sync_router.py +0 -405
- agno/reasoning/agent.py +0 -68
- agno/run/response.py +0 -112
- agno/storage/agent/__init__.py +0 -0
- agno/storage/agent/base.py +0 -38
- agno/storage/agent/dynamodb.py +0 -350
- agno/storage/agent/json.py +0 -92
- agno/storage/agent/mongodb.py +0 -228
- agno/storage/agent/postgres.py +0 -367
- agno/storage/agent/session.py +0 -79
- agno/storage/agent/singlestore.py +0 -303
- agno/storage/agent/sqlite.py +0 -357
- agno/storage/agent/yaml.py +0 -93
- agno/storage/workflow/__init__.py +0 -0
- agno/storage/workflow/base.py +0 -40
- agno/storage/workflow/mongodb.py +0 -233
- agno/storage/workflow/postgres.py +0 -366
- agno/storage/workflow/session.py +0 -60
- agno/storage/workflow/sqlite.py +0 -359
- agno/tools/googlesearch.py +0 -88
- agno/utils/defaults.py +0 -57
- agno/utils/filesystem.py +0 -39
- agno/utils/git.py +0 -52
- agno/utils/json_io.py +0 -30
- agno/utils/load_env.py +0 -19
- agno/utils/py_io.py +0 -19
- agno/utils/pyproject.py +0 -18
- agno/utils/resource_filter.py +0 -31
- agno/vectordb/singlestore/s2vectordb.py +0 -390
- agno/vectordb/singlestore/s2vectordb2.py +0 -355
- agno/workspace/__init__.py +0 -0
- agno/workspace/config.py +0 -325
- agno/workspace/enums.py +0 -6
- agno/workspace/helpers.py +0 -48
- agno/workspace/operator.py +0 -758
- agno/workspace/settings.py +0 -63
- agno-0.1.2.dist-info/LICENSE +0 -375
- agno-0.1.2.dist-info/METADATA +0 -502
- agno-0.1.2.dist-info/RECORD +0 -352
- agno-0.1.2.dist-info/entry_points.txt +0 -3
- /agno/{cli → db/migrations}/__init__.py +0 -0
- /agno/{cli/ws → db/migrations/versions}/__init__.py +0 -0
- /agno/{document/chunking/__init__.py → db/schemas/metrics.py} +0 -0
- /agno/{document/reader/s3 → integrations}/__init__.py +0 -0
- /agno/{file/local → knowledge/chunking}/__init__.py +0 -0
- /agno/{infra → knowledge/remote_content}/__init__.py +0 -0
- /agno/{knowledge/s3 → tools/models}/__init__.py +0 -0
- /agno/{reranker → utils/models}/__init__.py +0 -0
- /agno/{storage → utils/print_response}/__init__.py +0 -0
- {agno-0.1.2.dist-info → agno-2.3.13.dist-info}/top_level.txt +0 -0
|
@@ -1,29 +1,60 @@
|
|
|
1
|
+
import asyncio
|
|
1
2
|
import json
|
|
2
3
|
from hashlib import md5
|
|
3
|
-
from
|
|
4
|
+
from os import getenv
|
|
5
|
+
from typing import Any, Dict, List, Optional, Union
|
|
4
6
|
|
|
5
7
|
try:
|
|
6
8
|
import lancedb
|
|
7
9
|
import pyarrow as pa
|
|
8
10
|
except ImportError:
|
|
9
|
-
raise ImportError("`lancedb` not installed.")
|
|
11
|
+
raise ImportError("`lancedb` not installed. Please install using `pip install lancedb`")
|
|
10
12
|
|
|
11
|
-
from agno.
|
|
12
|
-
from agno.
|
|
13
|
-
from agno.
|
|
14
|
-
from agno.
|
|
13
|
+
from agno.filters import FilterExpr
|
|
14
|
+
from agno.knowledge.document import Document
|
|
15
|
+
from agno.knowledge.embedder import Embedder
|
|
16
|
+
from agno.knowledge.reranker.base import Reranker
|
|
17
|
+
from agno.utils.log import log_debug, log_info, log_warning, logger
|
|
15
18
|
from agno.vectordb.base import VectorDb
|
|
16
19
|
from agno.vectordb.distance import Distance
|
|
17
20
|
from agno.vectordb.search import SearchType
|
|
18
21
|
|
|
19
22
|
|
|
20
23
|
class LanceDb(VectorDb):
|
|
24
|
+
"""
|
|
25
|
+
LanceDb class for managing vector operations with LanceDb
|
|
26
|
+
|
|
27
|
+
Args:
|
|
28
|
+
uri: The URI of the LanceDB database.
|
|
29
|
+
name: Name of the vector database.
|
|
30
|
+
description: Description of the vector database.
|
|
31
|
+
connection: The LanceDB connection to use.
|
|
32
|
+
table: The LanceDB table instance to use.
|
|
33
|
+
async_connection: The LanceDB async connection to use.
|
|
34
|
+
async_table: The LanceDB async table instance to use.
|
|
35
|
+
table_name: The name of the LanceDB table to use.
|
|
36
|
+
api_key: The API key to use for the LanceDB connection.
|
|
37
|
+
embedder: The embedder to use when embedding the document contents.
|
|
38
|
+
search_type: The search type to use when searching for documents.
|
|
39
|
+
distance: The distance metric to use when searching for documents.
|
|
40
|
+
nprobes: The number of probes to use when searching for documents.
|
|
41
|
+
reranker: The reranker to use when reranking documents.
|
|
42
|
+
use_tantivy: Whether to use Tantivy for full text search.
|
|
43
|
+
on_bad_vectors: What to do if the vector is bad. One of "error", "drop", "fill", "null".
|
|
44
|
+
fill_value: The value to fill the vector with if on_bad_vectors is "fill".
|
|
45
|
+
"""
|
|
46
|
+
|
|
21
47
|
def __init__(
|
|
22
48
|
self,
|
|
23
49
|
uri: lancedb.URI = "/tmp/lancedb",
|
|
50
|
+
name: Optional[str] = None,
|
|
51
|
+
description: Optional[str] = None,
|
|
52
|
+
id: Optional[str] = None,
|
|
53
|
+
connection: Optional[lancedb.LanceDBConnection] = None,
|
|
24
54
|
table: Optional[lancedb.db.LanceTable] = None,
|
|
55
|
+
async_connection: Optional[lancedb.AsyncConnection] = None,
|
|
56
|
+
async_table: Optional[lancedb.db.AsyncTable] = None,
|
|
25
57
|
table_name: Optional[str] = None,
|
|
26
|
-
connection: Optional[lancedb.LanceDBConnection] = None,
|
|
27
58
|
api_key: Optional[str] = None,
|
|
28
59
|
embedder: Optional[Embedder] = None,
|
|
29
60
|
search_type: SearchType = SearchType.vector,
|
|
@@ -31,12 +62,26 @@ class LanceDb(VectorDb):
|
|
|
31
62
|
nprobes: Optional[int] = None,
|
|
32
63
|
reranker: Optional[Reranker] = None,
|
|
33
64
|
use_tantivy: bool = True,
|
|
65
|
+
on_bad_vectors: Optional[str] = None, # One of "error", "drop", "fill", "null".
|
|
66
|
+
fill_value: Optional[float] = None, # Only used if on_bad_vectors is "fill"
|
|
34
67
|
):
|
|
68
|
+
# Dynamic ID generation based on unique identifiers
|
|
69
|
+
if id is None:
|
|
70
|
+
from agno.utils.string import generate_id
|
|
71
|
+
|
|
72
|
+
table_identifier = table_name or "default_table"
|
|
73
|
+
seed = f"{uri}#{table_identifier}"
|
|
74
|
+
id = generate_id(seed)
|
|
75
|
+
|
|
76
|
+
# Initialize base class with name, description, and generated ID
|
|
77
|
+
super().__init__(id=id, name=name, description=description)
|
|
78
|
+
|
|
35
79
|
# Embedder for embedding the document contents
|
|
36
80
|
if embedder is None:
|
|
37
|
-
from agno.embedder.openai import OpenAIEmbedder
|
|
81
|
+
from agno.knowledge.embedder.openai import OpenAIEmbedder
|
|
38
82
|
|
|
39
83
|
embedder = OpenAIEmbedder()
|
|
84
|
+
log_info("Embedder not provided, using OpenAIEmbedder as default.")
|
|
40
85
|
self.embedder: Embedder = embedder
|
|
41
86
|
self.dimensions: Optional[int] = self.embedder.dimensions
|
|
42
87
|
|
|
@@ -48,20 +93,33 @@ class LanceDb(VectorDb):
|
|
|
48
93
|
# Distance metric
|
|
49
94
|
self.distance: Distance = distance
|
|
50
95
|
|
|
96
|
+
# Remote LanceDB connection details
|
|
97
|
+
self.api_key: Optional[str] = api_key
|
|
98
|
+
|
|
51
99
|
# LanceDB connection details
|
|
52
100
|
self.uri: lancedb.URI = uri
|
|
53
|
-
self.connection: lancedb.
|
|
54
|
-
|
|
101
|
+
self.connection: lancedb.DBConnection = connection or lancedb.connect(uri=self.uri, api_key=api_key)
|
|
55
102
|
self.table: Optional[lancedb.db.LanceTable] = table
|
|
56
|
-
|
|
103
|
+
|
|
104
|
+
self.async_connection: Optional[lancedb.AsyncConnection] = async_connection
|
|
105
|
+
self.async_table: Optional[lancedb.db.AsyncTable] = async_table
|
|
57
106
|
|
|
58
107
|
if table_name and table_name in self.connection.table_names():
|
|
59
108
|
# Open the table if it exists
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
109
|
+
try:
|
|
110
|
+
self.table = self.connection.open_table(name=table_name)
|
|
111
|
+
self.table_name = self.table.name
|
|
112
|
+
self._vector_col = self.table.schema.names[0]
|
|
113
|
+
self._id = self.table.schema.names[1] # type: ignore
|
|
114
|
+
except ValueError as e:
|
|
115
|
+
# Table might have been dropped by async operations but sync connection hasn't updated
|
|
116
|
+
if "was not found" in str(e):
|
|
117
|
+
log_debug(f"Table {table_name} listed but not accessible, will create if needed")
|
|
118
|
+
self.table = None
|
|
119
|
+
else:
|
|
120
|
+
raise
|
|
121
|
+
|
|
122
|
+
# LanceDB table details
|
|
65
123
|
if self.table is None:
|
|
66
124
|
# LanceDB table details
|
|
67
125
|
if table:
|
|
@@ -73,7 +131,7 @@ class LanceDb(VectorDb):
|
|
|
73
131
|
self.table = table
|
|
74
132
|
self.table_name = self.table.name
|
|
75
133
|
self._vector_col = self.table.schema.names[0]
|
|
76
|
-
self._id = self.
|
|
134
|
+
self._id = self.table.schema.names[1] # type: ignore
|
|
77
135
|
else:
|
|
78
136
|
if not table_name:
|
|
79
137
|
raise ValueError("Either table or table_name should be provided.")
|
|
@@ -84,6 +142,8 @@ class LanceDb(VectorDb):
|
|
|
84
142
|
|
|
85
143
|
self.reranker: Optional[Reranker] = reranker
|
|
86
144
|
self.nprobes: Optional[int] = nprobes
|
|
145
|
+
self.on_bad_vectors: Optional[str] = on_bad_vectors
|
|
146
|
+
self.fill_value: Optional[float] = fill_value
|
|
87
147
|
self.fts_index_exists = False
|
|
88
148
|
self.use_tantivy = use_tantivy
|
|
89
149
|
|
|
@@ -95,91 +155,224 @@ class LanceDb(VectorDb):
|
|
|
95
155
|
"Please install tantivy-py `pip install tantivy` to use the full text search feature." # noqa: E501
|
|
96
156
|
)
|
|
97
157
|
|
|
98
|
-
|
|
158
|
+
log_debug(f"Initialized LanceDb with table: '{self.table_name}'")
|
|
159
|
+
|
|
160
|
+
def _prepare_vector(self, embedding) -> List[float]:
|
|
161
|
+
"""Prepare vector embedding for insertion, ensuring correct dimensions and type."""
|
|
162
|
+
if embedding is not None and len(embedding) > 0:
|
|
163
|
+
# Convert to list of floats
|
|
164
|
+
vector = [float(x) for x in embedding]
|
|
165
|
+
|
|
166
|
+
# Ensure vector has correct dimensions if specified
|
|
167
|
+
if self.dimensions:
|
|
168
|
+
if len(vector) != self.dimensions:
|
|
169
|
+
if len(vector) > self.dimensions:
|
|
170
|
+
# Truncate if too long
|
|
171
|
+
vector = vector[: self.dimensions]
|
|
172
|
+
log_debug(f"Truncated vector from {len(embedding)} to {self.dimensions} dimensions")
|
|
173
|
+
else:
|
|
174
|
+
# Pad with zeros if too short
|
|
175
|
+
vector.extend([0.0] * (self.dimensions - len(vector)))
|
|
176
|
+
log_debug(f"Padded vector from {len(embedding)} to {self.dimensions} dimensions")
|
|
177
|
+
|
|
178
|
+
return vector
|
|
179
|
+
else:
|
|
180
|
+
# Fallback if embedding is None or empty
|
|
181
|
+
return [0.0] * (self.dimensions or 1536)
|
|
182
|
+
|
|
183
|
+
async def _get_async_connection(self) -> lancedb.AsyncConnection:
|
|
184
|
+
"""Get or create an async connection to LanceDB."""
|
|
185
|
+
if self.async_connection is None:
|
|
186
|
+
self.async_connection = await lancedb.connect_async(self.uri)
|
|
187
|
+
# Only try to open table if it exists and we don't have it already
|
|
188
|
+
if self.async_table is None:
|
|
189
|
+
table_names = await self.async_connection.table_names()
|
|
190
|
+
if self.table_name in table_names:
|
|
191
|
+
try:
|
|
192
|
+
self.async_table = await self.async_connection.open_table(self.table_name)
|
|
193
|
+
except ValueError:
|
|
194
|
+
# Table might have been dropped by another operation
|
|
195
|
+
pass
|
|
196
|
+
return self.async_connection
|
|
197
|
+
|
|
198
|
+
def _refresh_sync_connection(self) -> None:
|
|
199
|
+
"""Refresh the sync connection to see changes made by async operations."""
|
|
200
|
+
try:
|
|
201
|
+
# Re-establish sync connection to see async changes
|
|
202
|
+
if self.connection and self.table_name in self.connection.table_names():
|
|
203
|
+
self.table = self.connection.open_table(self.table_name)
|
|
204
|
+
except Exception as e:
|
|
205
|
+
log_debug(f"Could not refresh sync connection: {e}")
|
|
206
|
+
# If refresh fails, we can still function but sync methods might not see async changes
|
|
99
207
|
|
|
100
208
|
def create(self) -> None:
|
|
101
209
|
"""Create the table if it does not exist."""
|
|
102
210
|
if not self.exists():
|
|
103
|
-
self.
|
|
211
|
+
self.table = self._init_table()
|
|
104
212
|
|
|
105
|
-
def
|
|
106
|
-
|
|
213
|
+
async def async_create(self) -> None:
|
|
214
|
+
"""Create the table asynchronously if it does not exist."""
|
|
215
|
+
if not await self.async_exists():
|
|
216
|
+
try:
|
|
217
|
+
conn = await self._get_async_connection()
|
|
218
|
+
schema = self._base_schema()
|
|
219
|
+
|
|
220
|
+
log_debug(f"Creating table asynchronously: {self.table_name}")
|
|
221
|
+
self.async_table = await conn.create_table(
|
|
222
|
+
self.table_name, schema=schema, mode="overwrite", exist_ok=True
|
|
223
|
+
)
|
|
224
|
+
log_debug(f"Successfully created async table: {self.table_name}")
|
|
225
|
+
except Exception as e:
|
|
226
|
+
logger.error(f"Error creating async table: {e}")
|
|
227
|
+
# Try to fall back to sync table creation
|
|
228
|
+
try:
|
|
229
|
+
log_debug("Falling back to sync table creation")
|
|
230
|
+
self.table = self._init_table()
|
|
231
|
+
log_debug("Sync table created successfully")
|
|
232
|
+
except Exception as sync_e:
|
|
233
|
+
logger.error(f"Sync table creation also failed: {sync_e}")
|
|
234
|
+
raise
|
|
235
|
+
|
|
236
|
+
def _base_schema(self) -> pa.Schema:
|
|
237
|
+
# Use fixed-size list for vector field as required by LanceDB
|
|
238
|
+
if self.dimensions:
|
|
239
|
+
vector_field = pa.field(self._vector_col, pa.list_(pa.float32(), self.dimensions))
|
|
240
|
+
else:
|
|
241
|
+
# Fallback to dynamic list if dimensions not known (should be rare)
|
|
242
|
+
vector_field = pa.field(self._vector_col, pa.list_(pa.float32()))
|
|
243
|
+
|
|
244
|
+
return pa.schema(
|
|
107
245
|
[
|
|
108
|
-
|
|
109
|
-
self._vector_col,
|
|
110
|
-
pa.list_(
|
|
111
|
-
pa.float32(),
|
|
112
|
-
len(self.embedder.get_embedding("test")), # type: ignore
|
|
113
|
-
),
|
|
114
|
-
),
|
|
246
|
+
vector_field,
|
|
115
247
|
pa.field(self._id, pa.string()),
|
|
116
248
|
pa.field("payload", pa.string()),
|
|
117
249
|
]
|
|
118
250
|
)
|
|
119
251
|
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
return tbl # type: ignore
|
|
123
|
-
|
|
124
|
-
def doc_exists(self, document: Document) -> bool:
|
|
125
|
-
"""
|
|
126
|
-
Validating if the document exists or not
|
|
252
|
+
def _init_table(self) -> lancedb.db.LanceTable:
|
|
253
|
+
schema = self._base_schema()
|
|
127
254
|
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
return len(result) > 0
|
|
136
|
-
return False
|
|
255
|
+
log_info(f"Creating table: {self.table_name}")
|
|
256
|
+
if self.api_key or getenv("LANCEDB_API_KEY"):
|
|
257
|
+
log_info("API key found, creating table in remote LanceDB")
|
|
258
|
+
tbl = self.connection.create_table(name=self.table_name, schema=schema, mode="overwrite") # type: ignore
|
|
259
|
+
else:
|
|
260
|
+
tbl = self.connection.create_table(name=self.table_name, schema=schema, mode="overwrite", exist_ok=True) # type: ignore
|
|
261
|
+
return tbl # type: ignore
|
|
137
262
|
|
|
138
|
-
def insert(self, documents: List[Document], filters: Optional[Dict[str, Any]] = None) -> None:
|
|
263
|
+
def insert(self, content_hash: str, documents: List[Document], filters: Optional[Dict[str, Any]] = None) -> None:
|
|
139
264
|
"""
|
|
140
265
|
Insert documents into the database.
|
|
141
266
|
|
|
142
267
|
Args:
|
|
143
268
|
documents (List[Document]): List of documents to insert
|
|
144
|
-
filters (Optional[Dict[str, Any]]): Filters to
|
|
269
|
+
filters (Optional[Dict[str, Any]]): Filters to add as metadata to documents
|
|
145
270
|
"""
|
|
146
|
-
logger.debug(f"Inserting {len(documents)} documents")
|
|
147
|
-
data = []
|
|
148
271
|
if len(documents) <= 0:
|
|
149
|
-
|
|
272
|
+
log_info("No documents to insert")
|
|
150
273
|
return
|
|
151
274
|
|
|
275
|
+
log_debug(f"Inserting {len(documents)} documents")
|
|
276
|
+
data = []
|
|
277
|
+
|
|
152
278
|
for document in documents:
|
|
279
|
+
# Add filters to document metadata if provided
|
|
280
|
+
if filters:
|
|
281
|
+
meta_data = document.meta_data.copy() if document.meta_data else {}
|
|
282
|
+
meta_data.update(filters)
|
|
283
|
+
document.meta_data = meta_data
|
|
284
|
+
|
|
153
285
|
document.embed(embedder=self.embedder)
|
|
154
286
|
cleaned_content = document.content.replace("\x00", "\ufffd")
|
|
155
|
-
|
|
287
|
+
# Include content_hash in ID to ensure uniqueness across different content hashes
|
|
288
|
+
base_id = document.id or md5(cleaned_content.encode()).hexdigest()
|
|
289
|
+
doc_id = str(md5(f"{base_id}_{content_hash}".encode()).hexdigest())
|
|
156
290
|
payload = {
|
|
157
291
|
"name": document.name,
|
|
158
292
|
"meta_data": document.meta_data,
|
|
159
293
|
"content": cleaned_content,
|
|
160
294
|
"usage": document.usage,
|
|
295
|
+
"content_id": document.content_id,
|
|
296
|
+
"content_hash": content_hash,
|
|
161
297
|
}
|
|
162
298
|
data.append(
|
|
163
299
|
{
|
|
164
300
|
"id": doc_id,
|
|
165
|
-
"vector": document.embedding,
|
|
301
|
+
"vector": self._prepare_vector(document.embedding),
|
|
166
302
|
"payload": json.dumps(payload),
|
|
167
303
|
}
|
|
168
304
|
)
|
|
169
|
-
|
|
305
|
+
log_debug(f"Parsed document: {document.name} ({document.meta_data})")
|
|
170
306
|
|
|
171
307
|
if self.table is None:
|
|
172
308
|
logger.error("Table not initialized. Please create the table first")
|
|
173
309
|
return
|
|
174
310
|
|
|
175
311
|
if not data:
|
|
176
|
-
|
|
312
|
+
log_debug("No new data to insert")
|
|
177
313
|
return
|
|
178
314
|
|
|
179
|
-
self.
|
|
180
|
-
|
|
315
|
+
if self.on_bad_vectors is not None:
|
|
316
|
+
self.table.add(data, on_bad_vectors=self.on_bad_vectors, fill_value=self.fill_value)
|
|
317
|
+
else:
|
|
318
|
+
self.table.add(data)
|
|
319
|
+
|
|
320
|
+
log_debug(f"Inserted {len(data)} documents")
|
|
321
|
+
|
|
322
|
+
async def async_insert(
|
|
323
|
+
self, content_hash: str, documents: List[Document], filters: Optional[Dict[str, Any]] = None
|
|
324
|
+
) -> None:
|
|
325
|
+
"""
|
|
326
|
+
Asynchronously insert documents into the database.
|
|
327
|
+
|
|
328
|
+
Note: Currently wraps sync insert method since LanceDB async insert has sync/async table
|
|
329
|
+
synchronization issues causing empty vectors. We still do async embedding for performance.
|
|
181
330
|
|
|
182
|
-
|
|
331
|
+
Args:
|
|
332
|
+
documents (List[Document]): List of documents to insert
|
|
333
|
+
filters (Optional[Dict[str, Any]]): Filters to apply while inserting documents
|
|
334
|
+
"""
|
|
335
|
+
if len(documents) <= 0:
|
|
336
|
+
log_debug("No documents to insert")
|
|
337
|
+
return
|
|
338
|
+
|
|
339
|
+
log_debug(f"Inserting {len(documents)} documents")
|
|
340
|
+
|
|
341
|
+
# Still do async embedding for performance
|
|
342
|
+
if self.embedder.enable_batch and hasattr(self.embedder, "async_get_embeddings_batch_and_usage"):
|
|
343
|
+
try:
|
|
344
|
+
doc_contents = [doc.content for doc in documents]
|
|
345
|
+
embeddings, usages = await self.embedder.async_get_embeddings_batch_and_usage(doc_contents)
|
|
346
|
+
|
|
347
|
+
for j, doc in enumerate(documents):
|
|
348
|
+
if j < len(embeddings):
|
|
349
|
+
doc.embedding = embeddings[j]
|
|
350
|
+
doc.usage = usages[j] if j < len(usages) else None
|
|
351
|
+
except Exception as e:
|
|
352
|
+
error_str = str(e).lower()
|
|
353
|
+
is_rate_limit = any(
|
|
354
|
+
phrase in error_str
|
|
355
|
+
for phrase in ["rate limit", "too many requests", "429", "trial key", "api calls / minute"]
|
|
356
|
+
)
|
|
357
|
+
if is_rate_limit:
|
|
358
|
+
logger.error(f"Rate limit detected during batch embedding. {e}")
|
|
359
|
+
raise e
|
|
360
|
+
else:
|
|
361
|
+
logger.warning(f"Async batch embedding failed, falling back to individual embeddings: {e}")
|
|
362
|
+
embed_tasks = [doc.async_embed(embedder=self.embedder) for doc in documents]
|
|
363
|
+
await asyncio.gather(*embed_tasks, return_exceptions=True)
|
|
364
|
+
else:
|
|
365
|
+
embed_tasks = [doc.async_embed(embedder=self.embedder) for doc in documents]
|
|
366
|
+
await asyncio.gather(*embed_tasks, return_exceptions=True)
|
|
367
|
+
|
|
368
|
+
# Use sync insert to avoid sync/async table synchronization issues
|
|
369
|
+
self.insert(content_hash, documents, filters)
|
|
370
|
+
|
|
371
|
+
def upsert_available(self) -> bool:
|
|
372
|
+
"""Check if upsert is available in LanceDB."""
|
|
373
|
+
return True
|
|
374
|
+
|
|
375
|
+
def upsert(self, content_hash: str, documents: List[Document], filters: Optional[Dict[str, Any]] = None) -> None:
|
|
183
376
|
"""
|
|
184
377
|
Upsert documents into the database.
|
|
185
378
|
|
|
@@ -187,28 +380,140 @@ class LanceDb(VectorDb):
|
|
|
187
380
|
documents (List[Document]): List of documents to upsert
|
|
188
381
|
filters (Optional[Dict[str, Any]]): Filters to apply while upserting
|
|
189
382
|
"""
|
|
190
|
-
self.
|
|
383
|
+
if self.content_hash_exists(content_hash):
|
|
384
|
+
self._delete_by_content_hash(content_hash)
|
|
385
|
+
self.insert(content_hash=content_hash, documents=documents, filters=filters)
|
|
386
|
+
|
|
387
|
+
async def async_upsert(
|
|
388
|
+
self, content_hash: str, documents: List[Document], filters: Optional[Dict[str, Any]] = None
|
|
389
|
+
) -> None:
|
|
390
|
+
"""
|
|
391
|
+
Asynchronously upsert documents into the database.
|
|
392
|
+
|
|
393
|
+
Note: Uses async embedding for performance, then sync upsert for reliability.
|
|
394
|
+
"""
|
|
395
|
+
if len(documents) > 0:
|
|
396
|
+
# Do async embedding for performance
|
|
397
|
+
if self.embedder.enable_batch and hasattr(self.embedder, "async_get_embeddings_batch_and_usage"):
|
|
398
|
+
try:
|
|
399
|
+
doc_contents = [doc.content for doc in documents]
|
|
400
|
+
embeddings, usages = await self.embedder.async_get_embeddings_batch_and_usage(doc_contents)
|
|
401
|
+
for j, doc in enumerate(documents):
|
|
402
|
+
if j < len(embeddings):
|
|
403
|
+
doc.embedding = embeddings[j]
|
|
404
|
+
doc.usage = usages[j] if j < len(usages) else None
|
|
405
|
+
except Exception as e:
|
|
406
|
+
error_str = str(e).lower()
|
|
407
|
+
is_rate_limit = any(
|
|
408
|
+
phrase in error_str
|
|
409
|
+
for phrase in ["rate limit", "too many requests", "429", "trial key", "api calls / minute"]
|
|
410
|
+
)
|
|
411
|
+
if is_rate_limit:
|
|
412
|
+
raise e
|
|
413
|
+
else:
|
|
414
|
+
embed_tasks = [doc.async_embed(embedder=self.embedder) for doc in documents]
|
|
415
|
+
await asyncio.gather(*embed_tasks, return_exceptions=True)
|
|
416
|
+
else:
|
|
417
|
+
embed_tasks = [doc.async_embed(embedder=self.embedder) for doc in documents]
|
|
418
|
+
await asyncio.gather(*embed_tasks, return_exceptions=True)
|
|
419
|
+
|
|
420
|
+
# Use sync upsert for reliability
|
|
421
|
+
self.upsert(content_hash=content_hash, documents=documents, filters=filters)
|
|
422
|
+
|
|
423
|
+
def search(
|
|
424
|
+
self, query: str, limit: int = 5, filters: Optional[Union[Dict[str, Any], List[FilterExpr]]] = None
|
|
425
|
+
) -> List[Document]:
|
|
426
|
+
"""
|
|
427
|
+
Search for documents matching the query.
|
|
428
|
+
|
|
429
|
+
Args:
|
|
430
|
+
query (str): Query string to search for
|
|
431
|
+
limit (int): Maximum number of results to return
|
|
432
|
+
filters (Optional[Dict[str, Any]]): Filters to apply to the search
|
|
433
|
+
|
|
434
|
+
Returns:
|
|
435
|
+
List[Document]: List of matching documents
|
|
436
|
+
"""
|
|
437
|
+
if self.connection:
|
|
438
|
+
self.table = self.connection.open_table(name=self.table_name)
|
|
439
|
+
|
|
440
|
+
results = None
|
|
441
|
+
|
|
442
|
+
if isinstance(filters, list):
|
|
443
|
+
log_warning("Filter Expressions are not yet supported in LanceDB. No filters will be applied.")
|
|
444
|
+
filters = None
|
|
191
445
|
|
|
192
|
-
def search(self, query: str, limit: int = 5, filters: Optional[Dict[str, Any]] = None) -> List[Document]:
|
|
193
446
|
if self.search_type == SearchType.vector:
|
|
194
|
-
|
|
447
|
+
results = self.vector_search(query, limit)
|
|
195
448
|
elif self.search_type == SearchType.keyword:
|
|
196
|
-
|
|
449
|
+
results = self.keyword_search(query, limit)
|
|
197
450
|
elif self.search_type == SearchType.hybrid:
|
|
198
|
-
|
|
451
|
+
results = self.hybrid_search(query, limit)
|
|
199
452
|
else:
|
|
200
453
|
logger.error(f"Invalid search type '{self.search_type}'.")
|
|
201
454
|
return []
|
|
202
455
|
|
|
203
|
-
|
|
456
|
+
if results is None:
|
|
457
|
+
return []
|
|
458
|
+
|
|
459
|
+
search_results = self._build_search_results(results)
|
|
460
|
+
|
|
461
|
+
# Filter results based on metadata if filters are provided
|
|
462
|
+
if filters and search_results:
|
|
463
|
+
filtered_results = []
|
|
464
|
+
for doc in search_results:
|
|
465
|
+
if doc.meta_data is None:
|
|
466
|
+
continue
|
|
467
|
+
|
|
468
|
+
# Check if all filter criteria match
|
|
469
|
+
match = True
|
|
470
|
+
for key, value in filters.items():
|
|
471
|
+
if key not in doc.meta_data or doc.meta_data[key] != value:
|
|
472
|
+
match = False
|
|
473
|
+
break
|
|
474
|
+
|
|
475
|
+
if match:
|
|
476
|
+
filtered_results.append(doc)
|
|
477
|
+
|
|
478
|
+
search_results = filtered_results
|
|
479
|
+
|
|
480
|
+
if self.reranker and search_results:
|
|
481
|
+
search_results = self.reranker.rerank(query=query, documents=search_results)
|
|
482
|
+
|
|
483
|
+
log_info(f"Found {len(search_results)} documents")
|
|
484
|
+
return search_results
|
|
485
|
+
|
|
486
|
+
async def async_search(
|
|
487
|
+
self, query: str, limit: int = 5, filters: Optional[Union[Dict[str, Any], List[FilterExpr]]] = None
|
|
488
|
+
) -> List[Document]:
|
|
489
|
+
"""
|
|
490
|
+
Asynchronously search for documents matching the query.
|
|
491
|
+
|
|
492
|
+
Note: Currently wraps sync search method since LanceDB async search has sync/async table
|
|
493
|
+
synchronization issues. Performance impact is minimal for search operations.
|
|
494
|
+
|
|
495
|
+
Args:
|
|
496
|
+
query (str): Query string to search for
|
|
497
|
+
limit (int): Maximum number of results to return
|
|
498
|
+
filters (Optional[Dict[str, Any]]): Filters to apply to the search
|
|
499
|
+
|
|
500
|
+
Returns:
|
|
501
|
+
List[Document]: List of matching documents
|
|
502
|
+
"""
|
|
503
|
+
# Wrap sync search method to avoid sync/async table synchronization issues
|
|
504
|
+
return self.search(query=query, limit=limit, filters=filters)
|
|
505
|
+
|
|
506
|
+
def vector_search(
|
|
507
|
+
self, query: str, limit: int = 5, filters: Optional[Union[Dict[str, Any], List[FilterExpr]]] = None
|
|
508
|
+
) -> List[Document]:
|
|
204
509
|
query_embedding = self.embedder.get_embedding(query)
|
|
205
510
|
if query_embedding is None:
|
|
206
511
|
logger.error(f"Error getting embedding for Query: {query}")
|
|
207
|
-
return
|
|
512
|
+
return None
|
|
208
513
|
|
|
209
514
|
if self.table is None:
|
|
210
515
|
logger.error("Table not initialized. Please create the table first")
|
|
211
|
-
return
|
|
516
|
+
return None # type: ignore
|
|
212
517
|
|
|
213
518
|
results = self.table.search(
|
|
214
519
|
query=query_embedding,
|
|
@@ -218,22 +523,20 @@ class LanceDb(VectorDb):
|
|
|
218
523
|
if self.nprobes:
|
|
219
524
|
results.nprobes(self.nprobes)
|
|
220
525
|
|
|
221
|
-
|
|
222
|
-
search_results = self._build_search_results(results)
|
|
223
|
-
|
|
224
|
-
if self.reranker:
|
|
225
|
-
search_results = self.reranker.rerank(query=query, documents=search_results)
|
|
526
|
+
return results.to_pandas()
|
|
226
527
|
|
|
227
|
-
|
|
228
|
-
|
|
229
|
-
|
|
528
|
+
def hybrid_search(
|
|
529
|
+
self, query: str, limit: int = 5, filters: Optional[Union[Dict[str, Any], List[FilterExpr]]] = None
|
|
530
|
+
) -> List[Document]:
|
|
230
531
|
query_embedding = self.embedder.get_embedding(query)
|
|
231
532
|
if query_embedding is None:
|
|
232
533
|
logger.error(f"Error getting embedding for Query: {query}")
|
|
233
534
|
return []
|
|
535
|
+
|
|
234
536
|
if self.table is None:
|
|
235
537
|
logger.error("Table not initialized. Please create the table first")
|
|
236
538
|
return []
|
|
539
|
+
|
|
237
540
|
if not self.fts_index_exists:
|
|
238
541
|
self.table.create_fts_index("payload", use_tantivy=self.use_tantivy, replace=True)
|
|
239
542
|
self.fts_index_exists = True
|
|
@@ -251,36 +554,25 @@ class LanceDb(VectorDb):
|
|
|
251
554
|
if self.nprobes:
|
|
252
555
|
results.nprobes(self.nprobes)
|
|
253
556
|
|
|
254
|
-
|
|
255
|
-
|
|
256
|
-
search_results = self._build_search_results(results)
|
|
257
|
-
|
|
258
|
-
if self.reranker:
|
|
259
|
-
search_results = self.reranker.rerank(query=query, documents=search_results)
|
|
260
|
-
|
|
261
|
-
return search_results
|
|
557
|
+
return results.to_pandas()
|
|
262
558
|
|
|
263
|
-
def keyword_search(
|
|
559
|
+
def keyword_search(
|
|
560
|
+
self, query: str, limit: int = 5, filters: Optional[Union[Dict[str, Any], List[FilterExpr]]] = None
|
|
561
|
+
) -> List[Document]:
|
|
264
562
|
if self.table is None:
|
|
265
563
|
logger.error("Table not initialized. Please create the table first")
|
|
266
564
|
return []
|
|
565
|
+
|
|
267
566
|
if not self.fts_index_exists:
|
|
268
567
|
self.table.create_fts_index("payload", use_tantivy=self.use_tantivy, replace=True)
|
|
269
568
|
self.fts_index_exists = True
|
|
270
569
|
|
|
271
|
-
results = (
|
|
272
|
-
|
|
273
|
-
|
|
274
|
-
|
|
275
|
-
)
|
|
276
|
-
.limit(limit)
|
|
277
|
-
.to_pandas()
|
|
278
|
-
)
|
|
279
|
-
search_results = self._build_search_results(results)
|
|
570
|
+
results = self.table.search(
|
|
571
|
+
query=query,
|
|
572
|
+
query_type="fts",
|
|
573
|
+
).limit(limit)
|
|
280
574
|
|
|
281
|
-
|
|
282
|
-
search_results = self.reranker.rerank(query=query, documents=search_results)
|
|
283
|
-
return search_results
|
|
575
|
+
return results.to_pandas()
|
|
284
576
|
|
|
285
577
|
def _build_search_results(self, results) -> List[Document]: # TODO: typehint pandas?
|
|
286
578
|
search_results: List[Document] = []
|
|
@@ -295,6 +587,7 @@ class LanceDb(VectorDb):
|
|
|
295
587
|
embedder=self.embedder,
|
|
296
588
|
embedding=item["vector"],
|
|
297
589
|
usage=payload["usage"],
|
|
590
|
+
content_id=payload.get("content_id"),
|
|
298
591
|
)
|
|
299
592
|
)
|
|
300
593
|
|
|
@@ -305,16 +598,66 @@ class LanceDb(VectorDb):
|
|
|
305
598
|
|
|
306
599
|
def drop(self) -> None:
|
|
307
600
|
if self.exists():
|
|
308
|
-
|
|
309
|
-
self.connection.drop_table(self.table_name)
|
|
601
|
+
log_debug(f"Deleting collection: {self.table_name}")
|
|
602
|
+
self.connection.drop_table(self.table_name) # type: ignore
|
|
603
|
+
# Clear the table reference after dropping
|
|
604
|
+
self.table = None
|
|
605
|
+
|
|
606
|
+
async def async_drop(self) -> None:
|
|
607
|
+
"""Drop the table asynchronously."""
|
|
608
|
+
if await self.async_exists():
|
|
609
|
+
log_debug(f"Deleting collection: {self.table_name}")
|
|
610
|
+
conn = await self._get_async_connection()
|
|
611
|
+
await conn.drop_table(self.table_name)
|
|
612
|
+
# Clear the async table reference after dropping
|
|
613
|
+
self.async_table = None
|
|
310
614
|
|
|
311
615
|
def exists(self) -> bool:
|
|
616
|
+
# If we have an async table that was created, the table exists
|
|
617
|
+
if self.async_table is not None:
|
|
618
|
+
return True
|
|
312
619
|
if self.connection:
|
|
313
|
-
|
|
314
|
-
return True
|
|
620
|
+
return self.table_name in self.connection.table_names()
|
|
315
621
|
return False
|
|
316
622
|
|
|
623
|
+
async def async_exists(self) -> bool:
|
|
624
|
+
"""Check if the table exists asynchronously."""
|
|
625
|
+
# If we have an async table that was created, the table exists
|
|
626
|
+
if self.async_table is not None:
|
|
627
|
+
return True
|
|
628
|
+
# Check if table exists in database without trying to open it
|
|
629
|
+
if self.async_connection is None:
|
|
630
|
+
self.async_connection = await lancedb.connect_async(self.uri)
|
|
631
|
+
table_names = await self.async_connection.table_names()
|
|
632
|
+
return self.table_name in table_names
|
|
633
|
+
|
|
634
|
+
async def async_get_count(self) -> int:
|
|
635
|
+
"""Get the number of rows in the table asynchronously."""
|
|
636
|
+
await self._get_async_connection()
|
|
637
|
+
if self.async_table is not None:
|
|
638
|
+
return await self.async_table.count_rows()
|
|
639
|
+
return 0
|
|
640
|
+
|
|
317
641
|
def get_count(self) -> int:
|
|
642
|
+
# If we have data in the async table but sync table isn't available, try to get count from async table
|
|
643
|
+
if self.async_table is not None:
|
|
644
|
+
try:
|
|
645
|
+
import asyncio
|
|
646
|
+
|
|
647
|
+
# Check if we're already in an event loop
|
|
648
|
+
try:
|
|
649
|
+
asyncio.get_running_loop()
|
|
650
|
+
# We're in an async context, can't use asyncio.run
|
|
651
|
+
log_debug("Already in async context, falling back to sync table for count")
|
|
652
|
+
except RuntimeError:
|
|
653
|
+
# No event loop running, safe to use asyncio.run
|
|
654
|
+
try:
|
|
655
|
+
return asyncio.run(self.async_get_count())
|
|
656
|
+
except Exception as e:
|
|
657
|
+
log_debug(f"Failed to get async count: {e}")
|
|
658
|
+
except Exception as e:
|
|
659
|
+
log_debug(f"Error in async count logic: {e}")
|
|
660
|
+
|
|
318
661
|
if self.exists() and self.table:
|
|
319
662
|
return self.table.count_rows()
|
|
320
663
|
return 0
|
|
@@ -326,4 +669,293 @@ class LanceDb(VectorDb):
|
|
|
326
669
|
return False
|
|
327
670
|
|
|
328
671
|
def name_exists(self, name: str) -> bool:
|
|
329
|
-
|
|
672
|
+
"""Check if a document with the given name exists in the database"""
|
|
673
|
+
if self.table is None:
|
|
674
|
+
return False
|
|
675
|
+
|
|
676
|
+
try:
|
|
677
|
+
result = self.table.search().select(["payload"]).to_pandas()
|
|
678
|
+
# Convert the JSON strings in payload column to dictionaries
|
|
679
|
+
payloads = result["payload"].apply(json.loads)
|
|
680
|
+
|
|
681
|
+
# Check if the name exists in any of the payloads
|
|
682
|
+
return any(payload.get("name") == name for payload in payloads)
|
|
683
|
+
except Exception as e:
|
|
684
|
+
logger.error(f"Error checking name existence: {e}")
|
|
685
|
+
return False
|
|
686
|
+
|
|
687
|
+
async def async_name_exists(self, name: str) -> bool:
|
|
688
|
+
raise NotImplementedError(f"Async not supported on {self.__class__.__name__}.")
|
|
689
|
+
|
|
690
|
+
def id_exists(self, id: str) -> bool:
|
|
691
|
+
"""Check if a document with the given ID exists in the database"""
|
|
692
|
+
if self.table is None:
|
|
693
|
+
logger.error("Table not initialized")
|
|
694
|
+
return False
|
|
695
|
+
|
|
696
|
+
try:
|
|
697
|
+
# Search for the document with the specific ID
|
|
698
|
+
result = self.table.search().where(f"{self._id} = '{id}'").to_pandas()
|
|
699
|
+
return len(result) > 0
|
|
700
|
+
except Exception as e:
|
|
701
|
+
logger.error(f"Error checking id existence: {e}")
|
|
702
|
+
return False
|
|
703
|
+
|
|
704
|
+
def delete_by_id(self, id: str) -> bool:
|
|
705
|
+
"""Delete content by ID."""
|
|
706
|
+
if self.table is None:
|
|
707
|
+
logger.error("Table not initialized")
|
|
708
|
+
return False
|
|
709
|
+
|
|
710
|
+
try:
|
|
711
|
+
# Delete rows where the id matches
|
|
712
|
+
self.table.delete(f"{self._id} = '{id}'")
|
|
713
|
+
log_info(f"Deleted records with id '{id}' from table '{self.table_name}'.")
|
|
714
|
+
return True
|
|
715
|
+
except Exception as e:
|
|
716
|
+
logger.error(f"Error deleting rows by id '{id}': {e}")
|
|
717
|
+
return False
|
|
718
|
+
|
|
719
|
+
def delete_by_name(self, name: str) -> bool:
|
|
720
|
+
"""Delete content by name."""
|
|
721
|
+
if self.table is None:
|
|
722
|
+
logger.error("Table not initialized")
|
|
723
|
+
return False
|
|
724
|
+
|
|
725
|
+
try:
|
|
726
|
+
total_count = self.table.count_rows()
|
|
727
|
+
result = self.table.search().select(["id", "payload"]).limit(total_count).to_pandas()
|
|
728
|
+
|
|
729
|
+
# Find matching IDs
|
|
730
|
+
ids_to_delete = []
|
|
731
|
+
for _, row in result.iterrows():
|
|
732
|
+
payload = json.loads(row["payload"])
|
|
733
|
+
if payload.get("name") == name:
|
|
734
|
+
ids_to_delete.append(row["id"])
|
|
735
|
+
|
|
736
|
+
# Delete matching records
|
|
737
|
+
if ids_to_delete:
|
|
738
|
+
for doc_id in ids_to_delete:
|
|
739
|
+
self.table.delete(f"{self._id} = '{doc_id}'")
|
|
740
|
+
log_info(f"Deleted {len(ids_to_delete)} records with name '{name}' from table '{self.table_name}'.")
|
|
741
|
+
return True
|
|
742
|
+
else:
|
|
743
|
+
log_info(f"No records found with name '{name}' to delete.")
|
|
744
|
+
return False
|
|
745
|
+
|
|
746
|
+
except Exception as e:
|
|
747
|
+
logger.error(f"Error deleting rows by name '{name}': {e}")
|
|
748
|
+
return False
|
|
749
|
+
|
|
750
|
+
def delete_by_metadata(self, metadata: Dict[str, Any]) -> bool:
|
|
751
|
+
"""Delete content by metadata."""
|
|
752
|
+
if self.table is None:
|
|
753
|
+
logger.error("Table not initialized")
|
|
754
|
+
return False
|
|
755
|
+
|
|
756
|
+
try:
|
|
757
|
+
total_count = self.table.count_rows()
|
|
758
|
+
result = self.table.search().select(["id", "payload"]).limit(total_count).to_pandas()
|
|
759
|
+
|
|
760
|
+
# Find matching IDs
|
|
761
|
+
ids_to_delete = []
|
|
762
|
+
for _, row in result.iterrows():
|
|
763
|
+
payload = json.loads(row["payload"])
|
|
764
|
+
doc_metadata = payload.get("meta_data", {})
|
|
765
|
+
|
|
766
|
+
# Check if all metadata key-value pairs match
|
|
767
|
+
match = True
|
|
768
|
+
for key, value in metadata.items():
|
|
769
|
+
if key not in doc_metadata or doc_metadata[key] != value:
|
|
770
|
+
match = False
|
|
771
|
+
break
|
|
772
|
+
|
|
773
|
+
if match:
|
|
774
|
+
ids_to_delete.append(row["id"])
|
|
775
|
+
|
|
776
|
+
# Delete matching records
|
|
777
|
+
if ids_to_delete:
|
|
778
|
+
for doc_id in ids_to_delete:
|
|
779
|
+
self.table.delete(f"{self._id} = '{doc_id}'")
|
|
780
|
+
log_info(
|
|
781
|
+
f"Deleted {len(ids_to_delete)} records with metadata '{metadata}' from table '{self.table_name}'."
|
|
782
|
+
)
|
|
783
|
+
return True
|
|
784
|
+
else:
|
|
785
|
+
log_info(f"No records found with metadata '{metadata}' to delete.")
|
|
786
|
+
return False
|
|
787
|
+
|
|
788
|
+
except Exception as e:
|
|
789
|
+
logger.error(f"Error deleting rows by metadata '{metadata}': {e}")
|
|
790
|
+
return False
|
|
791
|
+
|
|
792
|
+
def delete_by_content_id(self, content_id: str) -> bool:
|
|
793
|
+
"""Delete content by content ID."""
|
|
794
|
+
if self.table is None:
|
|
795
|
+
logger.error("Table not initialized")
|
|
796
|
+
return False
|
|
797
|
+
|
|
798
|
+
try:
|
|
799
|
+
total_count = self.table.count_rows()
|
|
800
|
+
result = self.table.search().select(["id", "payload"]).limit(total_count).to_pandas()
|
|
801
|
+
|
|
802
|
+
# Find matching IDs
|
|
803
|
+
ids_to_delete = []
|
|
804
|
+
for _, row in result.iterrows():
|
|
805
|
+
payload = json.loads(row["payload"])
|
|
806
|
+
if payload.get("content_id") == content_id:
|
|
807
|
+
ids_to_delete.append(row["id"])
|
|
808
|
+
|
|
809
|
+
# Delete matching records
|
|
810
|
+
if ids_to_delete:
|
|
811
|
+
for doc_id in ids_to_delete:
|
|
812
|
+
self.table.delete(f"{self._id} = '{doc_id}'")
|
|
813
|
+
log_info(
|
|
814
|
+
f"Deleted {len(ids_to_delete)} records with content_id '{content_id}' from table '{self.table_name}'."
|
|
815
|
+
)
|
|
816
|
+
return True
|
|
817
|
+
else:
|
|
818
|
+
log_info(f"No records found with content_id '{content_id}' to delete.")
|
|
819
|
+
return False
|
|
820
|
+
|
|
821
|
+
except Exception as e:
|
|
822
|
+
logger.error(f"Error deleting rows by content_id '{content_id}': {e}")
|
|
823
|
+
return False
|
|
824
|
+
|
|
825
|
+
def _delete_by_content_hash(self, content_hash: str) -> bool:
|
|
826
|
+
"""Delete content by content hash."""
|
|
827
|
+
if self.table is None:
|
|
828
|
+
logger.error("Table not initialized")
|
|
829
|
+
return False
|
|
830
|
+
|
|
831
|
+
try:
|
|
832
|
+
total_count = self.table.count_rows()
|
|
833
|
+
result = self.table.search().select(["id", "payload"]).limit(total_count).to_pandas()
|
|
834
|
+
|
|
835
|
+
# Find matching IDs
|
|
836
|
+
ids_to_delete = []
|
|
837
|
+
for _, row in result.iterrows():
|
|
838
|
+
payload = json.loads(row["payload"])
|
|
839
|
+
if payload.get("content_hash") == content_hash:
|
|
840
|
+
ids_to_delete.append(row["id"])
|
|
841
|
+
|
|
842
|
+
# Delete matching records
|
|
843
|
+
if ids_to_delete:
|
|
844
|
+
for doc_id in ids_to_delete:
|
|
845
|
+
self.table.delete(f"{self._id} = '{doc_id}'")
|
|
846
|
+
log_info(
|
|
847
|
+
f"Deleted {len(ids_to_delete)} records with content_hash '{content_hash}' from table '{self.table_name}'."
|
|
848
|
+
)
|
|
849
|
+
return True
|
|
850
|
+
else:
|
|
851
|
+
log_info(f"No records found with content_hash '{content_hash}' to delete.")
|
|
852
|
+
return False
|
|
853
|
+
|
|
854
|
+
except Exception as e:
|
|
855
|
+
logger.error(f"Error deleting rows by content_hash '{content_hash}': {e}")
|
|
856
|
+
return False
|
|
857
|
+
|
|
858
|
+
def content_hash_exists(self, content_hash: str) -> bool:
|
|
859
|
+
"""Check if documents with the given content hash exist."""
|
|
860
|
+
if self.table is None:
|
|
861
|
+
logger.error("Table not initialized")
|
|
862
|
+
return False
|
|
863
|
+
|
|
864
|
+
try:
|
|
865
|
+
total_count = self.table.count_rows()
|
|
866
|
+
result = self.table.search().select(["id", "payload"]).limit(total_count).to_pandas()
|
|
867
|
+
|
|
868
|
+
# Check if any records match the content_hash
|
|
869
|
+
for _, row in result.iterrows():
|
|
870
|
+
payload = json.loads(row["payload"])
|
|
871
|
+
if payload.get("content_hash") == content_hash:
|
|
872
|
+
return True
|
|
873
|
+
|
|
874
|
+
return False
|
|
875
|
+
|
|
876
|
+
except Exception as e:
|
|
877
|
+
logger.error(f"Error checking content_hash existence '{content_hash}': {e}")
|
|
878
|
+
return False
|
|
879
|
+
|
|
880
|
+
def update_metadata(self, content_id: str, metadata: Dict[str, Any]) -> None:
|
|
881
|
+
"""
|
|
882
|
+
Update the metadata for documents with the given content_id.
|
|
883
|
+
|
|
884
|
+
Args:
|
|
885
|
+
content_id (str): The content ID to update
|
|
886
|
+
metadata (Dict[str, Any]): The metadata to update
|
|
887
|
+
"""
|
|
888
|
+
import json
|
|
889
|
+
|
|
890
|
+
try:
|
|
891
|
+
if self.table is None:
|
|
892
|
+
logger.error("Table not initialized")
|
|
893
|
+
return
|
|
894
|
+
|
|
895
|
+
# Get all documents and filter in Python (LanceDB doesn't support JSON operators)
|
|
896
|
+
total_count = self.table.count_rows()
|
|
897
|
+
results = self.table.search().select(["id", "payload"]).limit(total_count).to_pandas()
|
|
898
|
+
|
|
899
|
+
if results.empty:
|
|
900
|
+
logger.debug("No documents found")
|
|
901
|
+
return
|
|
902
|
+
|
|
903
|
+
# Find matching documents with the given content_id
|
|
904
|
+
matching_rows = []
|
|
905
|
+
for _, row in results.iterrows():
|
|
906
|
+
payload = json.loads(row["payload"])
|
|
907
|
+
if payload.get("content_id") == content_id:
|
|
908
|
+
matching_rows.append(row)
|
|
909
|
+
|
|
910
|
+
if not matching_rows:
|
|
911
|
+
logger.debug(f"No documents found with content_id: {content_id}")
|
|
912
|
+
return
|
|
913
|
+
|
|
914
|
+
# Update each matching document
|
|
915
|
+
updated_count = 0
|
|
916
|
+
for row in matching_rows:
|
|
917
|
+
row_id = row["id"]
|
|
918
|
+
current_payload = json.loads(row["payload"])
|
|
919
|
+
|
|
920
|
+
# Merge existing metadata with new metadata
|
|
921
|
+
if "meta_data" in current_payload:
|
|
922
|
+
current_payload["meta_data"].update(metadata)
|
|
923
|
+
else:
|
|
924
|
+
current_payload["meta_data"] = metadata
|
|
925
|
+
|
|
926
|
+
if "filters" in current_payload:
|
|
927
|
+
if isinstance(current_payload["filters"], dict):
|
|
928
|
+
current_payload["filters"].update(metadata)
|
|
929
|
+
else:
|
|
930
|
+
current_payload["filters"] = metadata
|
|
931
|
+
else:
|
|
932
|
+
current_payload["filters"] = metadata
|
|
933
|
+
|
|
934
|
+
# Update the document
|
|
935
|
+
update_data = {"id": row_id, "payload": json.dumps(current_payload)}
|
|
936
|
+
|
|
937
|
+
# LanceDB doesn't have a direct update, so we need to delete and re-insert
|
|
938
|
+
# First, get all the existing data
|
|
939
|
+
vector_data = row["vector"] if "vector" in row else None
|
|
940
|
+
text_data = row["text"] if "text" in row else None
|
|
941
|
+
|
|
942
|
+
# Create complete update record
|
|
943
|
+
if vector_data is not None:
|
|
944
|
+
update_data["vector"] = vector_data
|
|
945
|
+
if text_data is not None:
|
|
946
|
+
update_data["text"] = text_data
|
|
947
|
+
|
|
948
|
+
# Delete old record and insert updated one
|
|
949
|
+
self.table.delete(f"id = '{row_id}'")
|
|
950
|
+
self.table.add([update_data])
|
|
951
|
+
updated_count += 1
|
|
952
|
+
|
|
953
|
+
logger.debug(f"Updated metadata for {updated_count} documents with content_id: {content_id}")
|
|
954
|
+
|
|
955
|
+
except Exception as e:
|
|
956
|
+
logger.error(f"Error updating metadata for content_id '{content_id}': {e}")
|
|
957
|
+
raise
|
|
958
|
+
|
|
959
|
+
def get_supported_search_types(self) -> List[str]:
|
|
960
|
+
"""Get the supported search types for this vector database."""
|
|
961
|
+
return [SearchType.vector, SearchType.keyword, SearchType.hybrid]
|