agno 0.1.2__py3-none-any.whl → 2.3.13__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- agno/__init__.py +8 -0
- agno/agent/__init__.py +44 -5
- agno/agent/agent.py +10531 -2975
- agno/api/agent.py +14 -53
- agno/api/api.py +7 -46
- agno/api/evals.py +22 -0
- agno/api/os.py +17 -0
- agno/api/routes.py +6 -25
- agno/api/schemas/__init__.py +9 -0
- agno/api/schemas/agent.py +6 -9
- agno/api/schemas/evals.py +16 -0
- agno/api/schemas/os.py +14 -0
- agno/api/schemas/team.py +10 -10
- agno/api/schemas/utils.py +21 -0
- agno/api/schemas/workflows.py +16 -0
- agno/api/settings.py +53 -0
- agno/api/team.py +22 -26
- agno/api/workflow.py +28 -0
- agno/cloud/aws/base.py +214 -0
- agno/cloud/aws/s3/__init__.py +2 -0
- agno/cloud/aws/s3/api_client.py +43 -0
- agno/cloud/aws/s3/bucket.py +195 -0
- agno/cloud/aws/s3/object.py +57 -0
- agno/compression/__init__.py +3 -0
- agno/compression/manager.py +247 -0
- agno/culture/__init__.py +3 -0
- agno/culture/manager.py +956 -0
- agno/db/__init__.py +24 -0
- agno/db/async_postgres/__init__.py +3 -0
- agno/db/base.py +946 -0
- agno/db/dynamo/__init__.py +3 -0
- agno/db/dynamo/dynamo.py +2781 -0
- agno/db/dynamo/schemas.py +442 -0
- agno/db/dynamo/utils.py +743 -0
- agno/db/firestore/__init__.py +3 -0
- agno/db/firestore/firestore.py +2379 -0
- agno/db/firestore/schemas.py +181 -0
- agno/db/firestore/utils.py +376 -0
- agno/db/gcs_json/__init__.py +3 -0
- agno/db/gcs_json/gcs_json_db.py +1791 -0
- agno/db/gcs_json/utils.py +228 -0
- agno/db/in_memory/__init__.py +3 -0
- agno/db/in_memory/in_memory_db.py +1312 -0
- agno/db/in_memory/utils.py +230 -0
- agno/db/json/__init__.py +3 -0
- agno/db/json/json_db.py +1777 -0
- agno/db/json/utils.py +230 -0
- agno/db/migrations/manager.py +199 -0
- agno/db/migrations/v1_to_v2.py +635 -0
- agno/db/migrations/versions/v2_3_0.py +938 -0
- agno/db/mongo/__init__.py +17 -0
- agno/db/mongo/async_mongo.py +2760 -0
- agno/db/mongo/mongo.py +2597 -0
- agno/db/mongo/schemas.py +119 -0
- agno/db/mongo/utils.py +276 -0
- agno/db/mysql/__init__.py +4 -0
- agno/db/mysql/async_mysql.py +2912 -0
- agno/db/mysql/mysql.py +2923 -0
- agno/db/mysql/schemas.py +186 -0
- agno/db/mysql/utils.py +488 -0
- agno/db/postgres/__init__.py +4 -0
- agno/db/postgres/async_postgres.py +2579 -0
- agno/db/postgres/postgres.py +2870 -0
- agno/db/postgres/schemas.py +187 -0
- agno/db/postgres/utils.py +442 -0
- agno/db/redis/__init__.py +3 -0
- agno/db/redis/redis.py +2141 -0
- agno/db/redis/schemas.py +159 -0
- agno/db/redis/utils.py +346 -0
- agno/db/schemas/__init__.py +4 -0
- agno/db/schemas/culture.py +120 -0
- agno/db/schemas/evals.py +34 -0
- agno/db/schemas/knowledge.py +40 -0
- agno/db/schemas/memory.py +61 -0
- agno/db/singlestore/__init__.py +3 -0
- agno/db/singlestore/schemas.py +179 -0
- agno/db/singlestore/singlestore.py +2877 -0
- agno/db/singlestore/utils.py +384 -0
- agno/db/sqlite/__init__.py +4 -0
- agno/db/sqlite/async_sqlite.py +2911 -0
- agno/db/sqlite/schemas.py +181 -0
- agno/db/sqlite/sqlite.py +2908 -0
- agno/db/sqlite/utils.py +429 -0
- agno/db/surrealdb/__init__.py +3 -0
- agno/db/surrealdb/metrics.py +292 -0
- agno/db/surrealdb/models.py +334 -0
- agno/db/surrealdb/queries.py +71 -0
- agno/db/surrealdb/surrealdb.py +1908 -0
- agno/db/surrealdb/utils.py +147 -0
- agno/db/utils.py +118 -0
- agno/eval/__init__.py +24 -0
- agno/eval/accuracy.py +666 -276
- agno/eval/agent_as_judge.py +861 -0
- agno/eval/base.py +29 -0
- agno/eval/performance.py +779 -0
- agno/eval/reliability.py +241 -62
- agno/eval/utils.py +120 -0
- agno/exceptions.py +143 -1
- agno/filters.py +354 -0
- agno/guardrails/__init__.py +6 -0
- agno/guardrails/base.py +19 -0
- agno/guardrails/openai.py +144 -0
- agno/guardrails/pii.py +94 -0
- agno/guardrails/prompt_injection.py +52 -0
- agno/hooks/__init__.py +3 -0
- agno/hooks/decorator.py +164 -0
- agno/integrations/discord/__init__.py +3 -0
- agno/integrations/discord/client.py +203 -0
- agno/knowledge/__init__.py +5 -1
- agno/{document → knowledge}/chunking/agentic.py +22 -14
- agno/{document → knowledge}/chunking/document.py +2 -2
- agno/{document → knowledge}/chunking/fixed.py +7 -6
- agno/knowledge/chunking/markdown.py +151 -0
- agno/{document → knowledge}/chunking/recursive.py +15 -3
- agno/knowledge/chunking/row.py +39 -0
- agno/knowledge/chunking/semantic.py +91 -0
- agno/knowledge/chunking/strategy.py +165 -0
- agno/knowledge/content.py +74 -0
- agno/knowledge/document/__init__.py +5 -0
- agno/{document → knowledge/document}/base.py +12 -2
- agno/knowledge/embedder/__init__.py +5 -0
- agno/knowledge/embedder/aws_bedrock.py +343 -0
- agno/knowledge/embedder/azure_openai.py +210 -0
- agno/{embedder → knowledge/embedder}/base.py +8 -0
- agno/knowledge/embedder/cohere.py +323 -0
- agno/knowledge/embedder/fastembed.py +62 -0
- agno/{embedder → knowledge/embedder}/fireworks.py +1 -1
- agno/knowledge/embedder/google.py +258 -0
- agno/knowledge/embedder/huggingface.py +94 -0
- agno/knowledge/embedder/jina.py +182 -0
- agno/knowledge/embedder/langdb.py +22 -0
- agno/knowledge/embedder/mistral.py +206 -0
- agno/knowledge/embedder/nebius.py +13 -0
- agno/knowledge/embedder/ollama.py +154 -0
- agno/knowledge/embedder/openai.py +195 -0
- agno/knowledge/embedder/sentence_transformer.py +63 -0
- agno/{embedder → knowledge/embedder}/together.py +1 -1
- agno/knowledge/embedder/vllm.py +262 -0
- agno/knowledge/embedder/voyageai.py +165 -0
- agno/knowledge/knowledge.py +3006 -0
- agno/knowledge/reader/__init__.py +7 -0
- agno/knowledge/reader/arxiv_reader.py +81 -0
- agno/knowledge/reader/base.py +95 -0
- agno/knowledge/reader/csv_reader.py +164 -0
- agno/knowledge/reader/docx_reader.py +82 -0
- agno/knowledge/reader/field_labeled_csv_reader.py +290 -0
- agno/knowledge/reader/firecrawl_reader.py +201 -0
- agno/knowledge/reader/json_reader.py +88 -0
- agno/knowledge/reader/markdown_reader.py +137 -0
- agno/knowledge/reader/pdf_reader.py +431 -0
- agno/knowledge/reader/pptx_reader.py +101 -0
- agno/knowledge/reader/reader_factory.py +313 -0
- agno/knowledge/reader/s3_reader.py +89 -0
- agno/knowledge/reader/tavily_reader.py +193 -0
- agno/knowledge/reader/text_reader.py +127 -0
- agno/knowledge/reader/web_search_reader.py +325 -0
- agno/knowledge/reader/website_reader.py +455 -0
- agno/knowledge/reader/wikipedia_reader.py +91 -0
- agno/knowledge/reader/youtube_reader.py +78 -0
- agno/knowledge/remote_content/remote_content.py +88 -0
- agno/knowledge/reranker/__init__.py +3 -0
- agno/{reranker → knowledge/reranker}/base.py +1 -1
- agno/{reranker → knowledge/reranker}/cohere.py +2 -2
- agno/knowledge/reranker/infinity.py +195 -0
- agno/knowledge/reranker/sentence_transformer.py +54 -0
- agno/knowledge/types.py +39 -0
- agno/knowledge/utils.py +234 -0
- agno/media.py +439 -95
- agno/memory/__init__.py +16 -3
- agno/memory/manager.py +1474 -123
- agno/memory/strategies/__init__.py +15 -0
- agno/memory/strategies/base.py +66 -0
- agno/memory/strategies/summarize.py +196 -0
- agno/memory/strategies/types.py +37 -0
- agno/models/aimlapi/__init__.py +5 -0
- agno/models/aimlapi/aimlapi.py +62 -0
- agno/models/anthropic/__init__.py +4 -0
- agno/models/anthropic/claude.py +960 -496
- agno/models/aws/__init__.py +15 -0
- agno/models/aws/bedrock.py +686 -451
- agno/models/aws/claude.py +190 -183
- agno/models/azure/__init__.py +18 -1
- agno/models/azure/ai_foundry.py +489 -0
- agno/models/azure/openai_chat.py +89 -40
- agno/models/base.py +2477 -550
- agno/models/cerebras/__init__.py +12 -0
- agno/models/cerebras/cerebras.py +565 -0
- agno/models/cerebras/cerebras_openai.py +131 -0
- agno/models/cohere/__init__.py +4 -0
- agno/models/cohere/chat.py +306 -492
- agno/models/cometapi/__init__.py +5 -0
- agno/models/cometapi/cometapi.py +74 -0
- agno/models/dashscope/__init__.py +5 -0
- agno/models/dashscope/dashscope.py +90 -0
- agno/models/deepinfra/__init__.py +5 -0
- agno/models/deepinfra/deepinfra.py +45 -0
- agno/models/deepseek/__init__.py +4 -0
- agno/models/deepseek/deepseek.py +110 -9
- agno/models/fireworks/__init__.py +4 -0
- agno/models/fireworks/fireworks.py +19 -22
- agno/models/google/__init__.py +3 -7
- agno/models/google/gemini.py +1717 -662
- agno/models/google/utils.py +22 -0
- agno/models/groq/__init__.py +4 -0
- agno/models/groq/groq.py +391 -666
- agno/models/huggingface/__init__.py +4 -0
- agno/models/huggingface/huggingface.py +266 -538
- agno/models/ibm/__init__.py +5 -0
- agno/models/ibm/watsonx.py +432 -0
- agno/models/internlm/__init__.py +3 -0
- agno/models/internlm/internlm.py +20 -3
- agno/models/langdb/__init__.py +1 -0
- agno/models/langdb/langdb.py +60 -0
- agno/models/litellm/__init__.py +14 -0
- agno/models/litellm/chat.py +503 -0
- agno/models/litellm/litellm_openai.py +42 -0
- agno/models/llama_cpp/__init__.py +5 -0
- agno/models/llama_cpp/llama_cpp.py +22 -0
- agno/models/lmstudio/__init__.py +5 -0
- agno/models/lmstudio/lmstudio.py +25 -0
- agno/models/message.py +361 -39
- agno/models/meta/__init__.py +12 -0
- agno/models/meta/llama.py +502 -0
- agno/models/meta/llama_openai.py +79 -0
- agno/models/metrics.py +120 -0
- agno/models/mistral/__init__.py +4 -0
- agno/models/mistral/mistral.py +293 -393
- agno/models/nebius/__init__.py +3 -0
- agno/models/nebius/nebius.py +53 -0
- agno/models/nexus/__init__.py +3 -0
- agno/models/nexus/nexus.py +22 -0
- agno/models/nvidia/__init__.py +4 -0
- agno/models/nvidia/nvidia.py +22 -3
- agno/models/ollama/__init__.py +4 -2
- agno/models/ollama/chat.py +257 -492
- agno/models/openai/__init__.py +7 -0
- agno/models/openai/chat.py +725 -770
- agno/models/openai/like.py +16 -2
- agno/models/openai/responses.py +1121 -0
- agno/models/openrouter/__init__.py +4 -0
- agno/models/openrouter/openrouter.py +62 -5
- agno/models/perplexity/__init__.py +5 -0
- agno/models/perplexity/perplexity.py +203 -0
- agno/models/portkey/__init__.py +3 -0
- agno/models/portkey/portkey.py +82 -0
- agno/models/requesty/__init__.py +5 -0
- agno/models/requesty/requesty.py +69 -0
- agno/models/response.py +177 -7
- agno/models/sambanova/__init__.py +4 -0
- agno/models/sambanova/sambanova.py +23 -4
- agno/models/siliconflow/__init__.py +5 -0
- agno/models/siliconflow/siliconflow.py +42 -0
- agno/models/together/__init__.py +4 -0
- agno/models/together/together.py +21 -164
- agno/models/utils.py +266 -0
- agno/models/vercel/__init__.py +3 -0
- agno/models/vercel/v0.py +43 -0
- agno/models/vertexai/__init__.py +0 -1
- agno/models/vertexai/claude.py +190 -0
- agno/models/vllm/__init__.py +3 -0
- agno/models/vllm/vllm.py +83 -0
- agno/models/xai/__init__.py +2 -0
- agno/models/xai/xai.py +111 -7
- agno/os/__init__.py +3 -0
- agno/os/app.py +1027 -0
- agno/os/auth.py +244 -0
- agno/os/config.py +126 -0
- agno/os/interfaces/__init__.py +1 -0
- agno/os/interfaces/a2a/__init__.py +3 -0
- agno/os/interfaces/a2a/a2a.py +42 -0
- agno/os/interfaces/a2a/router.py +249 -0
- agno/os/interfaces/a2a/utils.py +924 -0
- agno/os/interfaces/agui/__init__.py +3 -0
- agno/os/interfaces/agui/agui.py +47 -0
- agno/os/interfaces/agui/router.py +147 -0
- agno/os/interfaces/agui/utils.py +574 -0
- agno/os/interfaces/base.py +25 -0
- agno/os/interfaces/slack/__init__.py +3 -0
- agno/os/interfaces/slack/router.py +148 -0
- agno/os/interfaces/slack/security.py +30 -0
- agno/os/interfaces/slack/slack.py +47 -0
- agno/os/interfaces/whatsapp/__init__.py +3 -0
- agno/os/interfaces/whatsapp/router.py +210 -0
- agno/os/interfaces/whatsapp/security.py +55 -0
- agno/os/interfaces/whatsapp/whatsapp.py +36 -0
- agno/os/mcp.py +293 -0
- agno/os/middleware/__init__.py +9 -0
- agno/os/middleware/jwt.py +797 -0
- agno/os/router.py +258 -0
- agno/os/routers/__init__.py +3 -0
- agno/os/routers/agents/__init__.py +3 -0
- agno/os/routers/agents/router.py +599 -0
- agno/os/routers/agents/schema.py +261 -0
- agno/os/routers/evals/__init__.py +3 -0
- agno/os/routers/evals/evals.py +450 -0
- agno/os/routers/evals/schemas.py +174 -0
- agno/os/routers/evals/utils.py +231 -0
- agno/os/routers/health.py +31 -0
- agno/os/routers/home.py +52 -0
- agno/os/routers/knowledge/__init__.py +3 -0
- agno/os/routers/knowledge/knowledge.py +1008 -0
- agno/os/routers/knowledge/schemas.py +178 -0
- agno/os/routers/memory/__init__.py +3 -0
- agno/os/routers/memory/memory.py +661 -0
- agno/os/routers/memory/schemas.py +88 -0
- agno/os/routers/metrics/__init__.py +3 -0
- agno/os/routers/metrics/metrics.py +190 -0
- agno/os/routers/metrics/schemas.py +47 -0
- agno/os/routers/session/__init__.py +3 -0
- agno/os/routers/session/session.py +997 -0
- agno/os/routers/teams/__init__.py +3 -0
- agno/os/routers/teams/router.py +512 -0
- agno/os/routers/teams/schema.py +257 -0
- agno/os/routers/traces/__init__.py +3 -0
- agno/os/routers/traces/schemas.py +414 -0
- agno/os/routers/traces/traces.py +499 -0
- agno/os/routers/workflows/__init__.py +3 -0
- agno/os/routers/workflows/router.py +624 -0
- agno/os/routers/workflows/schema.py +75 -0
- agno/os/schema.py +534 -0
- agno/os/scopes.py +469 -0
- agno/{playground → os}/settings.py +7 -15
- agno/os/utils.py +973 -0
- agno/reasoning/anthropic.py +80 -0
- agno/reasoning/azure_ai_foundry.py +67 -0
- agno/reasoning/deepseek.py +63 -0
- agno/reasoning/default.py +97 -0
- agno/reasoning/gemini.py +73 -0
- agno/reasoning/groq.py +71 -0
- agno/reasoning/helpers.py +24 -1
- agno/reasoning/ollama.py +67 -0
- agno/reasoning/openai.py +86 -0
- agno/reasoning/step.py +2 -1
- agno/reasoning/vertexai.py +76 -0
- agno/run/__init__.py +6 -0
- agno/run/agent.py +822 -0
- agno/run/base.py +247 -0
- agno/run/cancel.py +81 -0
- agno/run/requirement.py +181 -0
- agno/run/team.py +767 -0
- agno/run/workflow.py +708 -0
- agno/session/__init__.py +10 -0
- agno/session/agent.py +260 -0
- agno/session/summary.py +265 -0
- agno/session/team.py +342 -0
- agno/session/workflow.py +501 -0
- agno/table.py +10 -0
- agno/team/__init__.py +37 -0
- agno/team/team.py +9536 -0
- agno/tools/__init__.py +7 -0
- agno/tools/agentql.py +120 -0
- agno/tools/airflow.py +22 -12
- agno/tools/api.py +122 -0
- agno/tools/apify.py +276 -83
- agno/tools/{arxiv_toolkit.py → arxiv.py} +20 -12
- agno/tools/aws_lambda.py +28 -7
- agno/tools/aws_ses.py +66 -0
- agno/tools/baidusearch.py +11 -4
- agno/tools/bitbucket.py +292 -0
- agno/tools/brandfetch.py +213 -0
- agno/tools/bravesearch.py +106 -0
- agno/tools/brightdata.py +367 -0
- agno/tools/browserbase.py +209 -0
- agno/tools/calcom.py +32 -23
- agno/tools/calculator.py +24 -37
- agno/tools/cartesia.py +187 -0
- agno/tools/{clickup_tool.py → clickup.py} +17 -28
- agno/tools/confluence.py +91 -26
- agno/tools/crawl4ai.py +139 -43
- agno/tools/csv_toolkit.py +28 -22
- agno/tools/dalle.py +36 -22
- agno/tools/daytona.py +475 -0
- agno/tools/decorator.py +169 -14
- agno/tools/desi_vocal.py +23 -11
- agno/tools/discord.py +32 -29
- agno/tools/docker.py +716 -0
- agno/tools/duckdb.py +76 -81
- agno/tools/duckduckgo.py +43 -40
- agno/tools/e2b.py +703 -0
- agno/tools/eleven_labs.py +65 -54
- agno/tools/email.py +13 -5
- agno/tools/evm.py +129 -0
- agno/tools/exa.py +324 -42
- agno/tools/fal.py +39 -35
- agno/tools/file.py +196 -30
- agno/tools/file_generation.py +356 -0
- agno/tools/financial_datasets.py +288 -0
- agno/tools/firecrawl.py +108 -33
- agno/tools/function.py +960 -122
- agno/tools/giphy.py +34 -12
- agno/tools/github.py +1294 -97
- agno/tools/gmail.py +922 -0
- agno/tools/google_bigquery.py +117 -0
- agno/tools/google_drive.py +271 -0
- agno/tools/google_maps.py +253 -0
- agno/tools/googlecalendar.py +607 -107
- agno/tools/googlesheets.py +377 -0
- agno/tools/hackernews.py +20 -12
- agno/tools/jina.py +24 -14
- agno/tools/jira.py +48 -19
- agno/tools/knowledge.py +218 -0
- agno/tools/linear.py +82 -43
- agno/tools/linkup.py +58 -0
- agno/tools/local_file_system.py +15 -7
- agno/tools/lumalab.py +41 -26
- agno/tools/mcp/__init__.py +10 -0
- agno/tools/mcp/mcp.py +331 -0
- agno/tools/mcp/multi_mcp.py +347 -0
- agno/tools/mcp/params.py +24 -0
- agno/tools/mcp_toolbox.py +284 -0
- agno/tools/mem0.py +193 -0
- agno/tools/memory.py +419 -0
- agno/tools/mlx_transcribe.py +11 -9
- agno/tools/models/azure_openai.py +190 -0
- agno/tools/models/gemini.py +203 -0
- agno/tools/models/groq.py +158 -0
- agno/tools/models/morph.py +186 -0
- agno/tools/models/nebius.py +124 -0
- agno/tools/models_labs.py +163 -82
- agno/tools/moviepy_video.py +18 -13
- agno/tools/nano_banana.py +151 -0
- agno/tools/neo4j.py +134 -0
- agno/tools/newspaper.py +15 -4
- agno/tools/newspaper4k.py +19 -6
- agno/tools/notion.py +204 -0
- agno/tools/openai.py +181 -17
- agno/tools/openbb.py +27 -20
- agno/tools/opencv.py +321 -0
- agno/tools/openweather.py +233 -0
- agno/tools/oxylabs.py +385 -0
- agno/tools/pandas.py +25 -15
- agno/tools/parallel.py +314 -0
- agno/tools/postgres.py +238 -185
- agno/tools/pubmed.py +125 -13
- agno/tools/python.py +48 -35
- agno/tools/reasoning.py +283 -0
- agno/tools/reddit.py +207 -29
- agno/tools/redshift.py +406 -0
- agno/tools/replicate.py +69 -26
- agno/tools/resend.py +11 -6
- agno/tools/scrapegraph.py +179 -19
- agno/tools/searxng.py +23 -31
- agno/tools/serpapi.py +15 -10
- agno/tools/serper.py +255 -0
- agno/tools/shell.py +23 -12
- agno/tools/shopify.py +1519 -0
- agno/tools/slack.py +56 -14
- agno/tools/sleep.py +8 -6
- agno/tools/spider.py +35 -11
- agno/tools/spotify.py +919 -0
- agno/tools/sql.py +34 -19
- agno/tools/tavily.py +158 -8
- agno/tools/telegram.py +18 -8
- agno/tools/todoist.py +218 -0
- agno/tools/toolkit.py +134 -9
- agno/tools/trafilatura.py +388 -0
- agno/tools/trello.py +25 -28
- agno/tools/twilio.py +18 -9
- agno/tools/user_control_flow.py +78 -0
- agno/tools/valyu.py +228 -0
- agno/tools/visualization.py +467 -0
- agno/tools/webbrowser.py +28 -0
- agno/tools/webex.py +76 -0
- agno/tools/website.py +23 -19
- agno/tools/webtools.py +45 -0
- agno/tools/whatsapp.py +286 -0
- agno/tools/wikipedia.py +28 -19
- agno/tools/workflow.py +285 -0
- agno/tools/{twitter.py → x.py} +142 -46
- agno/tools/yfinance.py +41 -39
- agno/tools/youtube.py +34 -17
- agno/tools/zendesk.py +15 -5
- agno/tools/zep.py +454 -0
- agno/tools/zoom.py +86 -37
- agno/tracing/__init__.py +12 -0
- agno/tracing/exporter.py +157 -0
- agno/tracing/schemas.py +276 -0
- agno/tracing/setup.py +111 -0
- agno/utils/agent.py +938 -0
- agno/utils/audio.py +37 -1
- agno/utils/certs.py +27 -0
- agno/utils/code_execution.py +11 -0
- agno/utils/common.py +103 -20
- agno/utils/cryptography.py +22 -0
- agno/utils/dttm.py +33 -0
- agno/utils/events.py +700 -0
- agno/utils/functions.py +107 -37
- agno/utils/gemini.py +426 -0
- agno/utils/hooks.py +171 -0
- agno/utils/http.py +185 -0
- agno/utils/json_schema.py +159 -37
- agno/utils/knowledge.py +36 -0
- agno/utils/location.py +19 -0
- agno/utils/log.py +221 -8
- agno/utils/mcp.py +214 -0
- agno/utils/media.py +335 -14
- agno/utils/merge_dict.py +22 -1
- agno/utils/message.py +77 -2
- agno/utils/models/ai_foundry.py +50 -0
- agno/utils/models/claude.py +373 -0
- agno/utils/models/cohere.py +94 -0
- agno/utils/models/llama.py +85 -0
- agno/utils/models/mistral.py +100 -0
- agno/utils/models/openai_responses.py +140 -0
- agno/utils/models/schema_utils.py +153 -0
- agno/utils/models/watsonx.py +41 -0
- agno/utils/openai.py +257 -0
- agno/utils/pickle.py +1 -1
- agno/utils/pprint.py +124 -8
- agno/utils/print_response/agent.py +930 -0
- agno/utils/print_response/team.py +1914 -0
- agno/utils/print_response/workflow.py +1668 -0
- agno/utils/prompts.py +111 -0
- agno/utils/reasoning.py +108 -0
- agno/utils/response.py +163 -0
- agno/utils/serialize.py +32 -0
- agno/utils/shell.py +4 -4
- agno/utils/streamlit.py +487 -0
- agno/utils/string.py +204 -51
- agno/utils/team.py +139 -0
- agno/utils/timer.py +9 -2
- agno/utils/tokens.py +657 -0
- agno/utils/tools.py +19 -1
- agno/utils/whatsapp.py +305 -0
- agno/utils/yaml_io.py +3 -3
- agno/vectordb/__init__.py +2 -0
- agno/vectordb/base.py +87 -9
- agno/vectordb/cassandra/__init__.py +5 -1
- agno/vectordb/cassandra/cassandra.py +383 -27
- agno/vectordb/chroma/__init__.py +4 -0
- agno/vectordb/chroma/chromadb.py +748 -83
- agno/vectordb/clickhouse/__init__.py +7 -1
- agno/vectordb/clickhouse/clickhousedb.py +554 -53
- agno/vectordb/couchbase/__init__.py +3 -0
- agno/vectordb/couchbase/couchbase.py +1446 -0
- agno/vectordb/lancedb/__init__.py +5 -0
- agno/vectordb/lancedb/lance_db.py +730 -98
- agno/vectordb/langchaindb/__init__.py +5 -0
- agno/vectordb/langchaindb/langchaindb.py +163 -0
- agno/vectordb/lightrag/__init__.py +5 -0
- agno/vectordb/lightrag/lightrag.py +388 -0
- agno/vectordb/llamaindex/__init__.py +3 -0
- agno/vectordb/llamaindex/llamaindexdb.py +166 -0
- agno/vectordb/milvus/__init__.py +3 -0
- agno/vectordb/milvus/milvus.py +966 -78
- agno/vectordb/mongodb/__init__.py +9 -1
- agno/vectordb/mongodb/mongodb.py +1175 -172
- agno/vectordb/pgvector/__init__.py +8 -0
- agno/vectordb/pgvector/pgvector.py +599 -115
- agno/vectordb/pineconedb/__init__.py +5 -1
- agno/vectordb/pineconedb/pineconedb.py +406 -43
- agno/vectordb/qdrant/__init__.py +4 -0
- agno/vectordb/qdrant/qdrant.py +914 -61
- agno/vectordb/redis/__init__.py +9 -0
- agno/vectordb/redis/redisdb.py +682 -0
- agno/vectordb/singlestore/__init__.py +8 -1
- agno/vectordb/singlestore/singlestore.py +771 -0
- agno/vectordb/surrealdb/__init__.py +3 -0
- agno/vectordb/surrealdb/surrealdb.py +663 -0
- agno/vectordb/upstashdb/__init__.py +5 -0
- agno/vectordb/upstashdb/upstashdb.py +718 -0
- agno/vectordb/weaviate/__init__.py +8 -0
- agno/vectordb/weaviate/index.py +15 -0
- agno/vectordb/weaviate/weaviate.py +1009 -0
- agno/workflow/__init__.py +23 -1
- agno/workflow/agent.py +299 -0
- agno/workflow/condition.py +759 -0
- agno/workflow/loop.py +756 -0
- agno/workflow/parallel.py +853 -0
- agno/workflow/router.py +723 -0
- agno/workflow/step.py +1564 -0
- agno/workflow/steps.py +613 -0
- agno/workflow/types.py +556 -0
- agno/workflow/workflow.py +4327 -514
- agno-2.3.13.dist-info/METADATA +639 -0
- agno-2.3.13.dist-info/RECORD +613 -0
- {agno-0.1.2.dist-info → agno-2.3.13.dist-info}/WHEEL +1 -1
- agno-2.3.13.dist-info/licenses/LICENSE +201 -0
- agno/api/playground.py +0 -91
- agno/api/schemas/playground.py +0 -22
- agno/api/schemas/user.py +0 -22
- agno/api/schemas/workspace.py +0 -46
- agno/api/user.py +0 -160
- agno/api/workspace.py +0 -151
- agno/cli/auth_server.py +0 -118
- agno/cli/config.py +0 -275
- agno/cli/console.py +0 -88
- agno/cli/credentials.py +0 -23
- agno/cli/entrypoint.py +0 -571
- agno/cli/operator.py +0 -355
- agno/cli/settings.py +0 -85
- agno/cli/ws/ws_cli.py +0 -817
- agno/constants.py +0 -13
- agno/document/__init__.py +0 -1
- agno/document/chunking/semantic.py +0 -47
- agno/document/chunking/strategy.py +0 -31
- agno/document/reader/__init__.py +0 -1
- agno/document/reader/arxiv_reader.py +0 -41
- agno/document/reader/base.py +0 -22
- agno/document/reader/csv_reader.py +0 -84
- agno/document/reader/docx_reader.py +0 -46
- agno/document/reader/firecrawl_reader.py +0 -99
- agno/document/reader/json_reader.py +0 -43
- agno/document/reader/pdf_reader.py +0 -219
- agno/document/reader/s3/pdf_reader.py +0 -46
- agno/document/reader/s3/text_reader.py +0 -51
- agno/document/reader/text_reader.py +0 -41
- agno/document/reader/website_reader.py +0 -175
- agno/document/reader/youtube_reader.py +0 -50
- agno/embedder/__init__.py +0 -1
- agno/embedder/azure_openai.py +0 -86
- agno/embedder/cohere.py +0 -72
- agno/embedder/fastembed.py +0 -37
- agno/embedder/google.py +0 -73
- agno/embedder/huggingface.py +0 -54
- agno/embedder/mistral.py +0 -80
- agno/embedder/ollama.py +0 -57
- agno/embedder/openai.py +0 -74
- agno/embedder/sentence_transformer.py +0 -38
- agno/embedder/voyageai.py +0 -64
- agno/eval/perf.py +0 -201
- agno/file/__init__.py +0 -1
- agno/file/file.py +0 -16
- agno/file/local/csv.py +0 -32
- agno/file/local/txt.py +0 -19
- agno/infra/app.py +0 -240
- agno/infra/base.py +0 -144
- agno/infra/context.py +0 -20
- agno/infra/db_app.py +0 -52
- agno/infra/resource.py +0 -205
- agno/infra/resources.py +0 -55
- agno/knowledge/agent.py +0 -230
- agno/knowledge/arxiv.py +0 -22
- agno/knowledge/combined.py +0 -22
- agno/knowledge/csv.py +0 -28
- agno/knowledge/csv_url.py +0 -19
- agno/knowledge/document.py +0 -20
- agno/knowledge/docx.py +0 -30
- agno/knowledge/json.py +0 -28
- agno/knowledge/langchain.py +0 -71
- agno/knowledge/llamaindex.py +0 -66
- agno/knowledge/pdf.py +0 -28
- agno/knowledge/pdf_url.py +0 -26
- agno/knowledge/s3/base.py +0 -60
- agno/knowledge/s3/pdf.py +0 -21
- agno/knowledge/s3/text.py +0 -23
- agno/knowledge/text.py +0 -30
- agno/knowledge/website.py +0 -88
- agno/knowledge/wikipedia.py +0 -31
- agno/knowledge/youtube.py +0 -22
- agno/memory/agent.py +0 -392
- agno/memory/classifier.py +0 -104
- agno/memory/db/__init__.py +0 -1
- agno/memory/db/base.py +0 -42
- agno/memory/db/mongodb.py +0 -189
- agno/memory/db/postgres.py +0 -203
- agno/memory/db/sqlite.py +0 -193
- agno/memory/memory.py +0 -15
- agno/memory/row.py +0 -36
- agno/memory/summarizer.py +0 -192
- agno/memory/summary.py +0 -19
- agno/memory/workflow.py +0 -38
- agno/models/google/gemini_openai.py +0 -26
- agno/models/ollama/hermes.py +0 -221
- agno/models/ollama/tools.py +0 -362
- agno/models/vertexai/gemini.py +0 -595
- agno/playground/__init__.py +0 -3
- agno/playground/async_router.py +0 -421
- agno/playground/deploy.py +0 -249
- agno/playground/operator.py +0 -92
- agno/playground/playground.py +0 -91
- agno/playground/schemas.py +0 -76
- agno/playground/serve.py +0 -55
- agno/playground/sync_router.py +0 -405
- agno/reasoning/agent.py +0 -68
- agno/run/response.py +0 -112
- agno/storage/agent/__init__.py +0 -0
- agno/storage/agent/base.py +0 -38
- agno/storage/agent/dynamodb.py +0 -350
- agno/storage/agent/json.py +0 -92
- agno/storage/agent/mongodb.py +0 -228
- agno/storage/agent/postgres.py +0 -367
- agno/storage/agent/session.py +0 -79
- agno/storage/agent/singlestore.py +0 -303
- agno/storage/agent/sqlite.py +0 -357
- agno/storage/agent/yaml.py +0 -93
- agno/storage/workflow/__init__.py +0 -0
- agno/storage/workflow/base.py +0 -40
- agno/storage/workflow/mongodb.py +0 -233
- agno/storage/workflow/postgres.py +0 -366
- agno/storage/workflow/session.py +0 -60
- agno/storage/workflow/sqlite.py +0 -359
- agno/tools/googlesearch.py +0 -88
- agno/utils/defaults.py +0 -57
- agno/utils/filesystem.py +0 -39
- agno/utils/git.py +0 -52
- agno/utils/json_io.py +0 -30
- agno/utils/load_env.py +0 -19
- agno/utils/py_io.py +0 -19
- agno/utils/pyproject.py +0 -18
- agno/utils/resource_filter.py +0 -31
- agno/vectordb/singlestore/s2vectordb.py +0 -390
- agno/vectordb/singlestore/s2vectordb2.py +0 -355
- agno/workspace/__init__.py +0 -0
- agno/workspace/config.py +0 -325
- agno/workspace/enums.py +0 -6
- agno/workspace/helpers.py +0 -48
- agno/workspace/operator.py +0 -758
- agno/workspace/settings.py +0 -63
- agno-0.1.2.dist-info/LICENSE +0 -375
- agno-0.1.2.dist-info/METADATA +0 -502
- agno-0.1.2.dist-info/RECORD +0 -352
- agno-0.1.2.dist-info/entry_points.txt +0 -3
- /agno/{cli → db/migrations}/__init__.py +0 -0
- /agno/{cli/ws → db/migrations/versions}/__init__.py +0 -0
- /agno/{document/chunking/__init__.py → db/schemas/metrics.py} +0 -0
- /agno/{document/reader/s3 → integrations}/__init__.py +0 -0
- /agno/{file/local → knowledge/chunking}/__init__.py +0 -0
- /agno/{infra → knowledge/remote_content}/__init__.py +0 -0
- /agno/{knowledge/s3 → tools/models}/__init__.py +0 -0
- /agno/{reranker → utils/models}/__init__.py +0 -0
- /agno/{storage → utils/print_response}/__init__.py +0 -0
- {agno-0.1.2.dist-info → agno-2.3.13.dist-info}/top_level.txt +0 -0
|
@@ -1,15 +1,21 @@
|
|
|
1
|
+
import asyncio
|
|
1
2
|
from hashlib import md5
|
|
2
3
|
from math import sqrt
|
|
3
4
|
from typing import Any, Dict, List, Optional, Union, cast
|
|
4
5
|
|
|
6
|
+
from agno.utils.string import generate_id
|
|
7
|
+
|
|
5
8
|
try:
|
|
9
|
+
from sqlalchemy import and_, not_, or_, update
|
|
6
10
|
from sqlalchemy.dialects import postgresql
|
|
7
11
|
from sqlalchemy.engine import Engine, create_engine
|
|
8
12
|
from sqlalchemy.inspection import inspect
|
|
9
13
|
from sqlalchemy.orm import Session, scoped_session, sessionmaker
|
|
10
14
|
from sqlalchemy.schema import Column, Index, MetaData, Table
|
|
15
|
+
from sqlalchemy.sql.elements import ColumnElement
|
|
11
16
|
from sqlalchemy.sql.expression import bindparam, desc, func, select, text
|
|
12
|
-
from sqlalchemy.types import DateTime, String
|
|
17
|
+
from sqlalchemy.types import DateTime, Integer, String
|
|
18
|
+
|
|
13
19
|
except ImportError:
|
|
14
20
|
raise ImportError("`sqlalchemy` not installed. Please install using `pip install sqlalchemy psycopg`")
|
|
15
21
|
|
|
@@ -18,10 +24,11 @@ try:
|
|
|
18
24
|
except ImportError:
|
|
19
25
|
raise ImportError("`pgvector` not installed. Please install using `pip install pgvector`")
|
|
20
26
|
|
|
21
|
-
from agno.
|
|
22
|
-
from agno.
|
|
23
|
-
from agno.
|
|
24
|
-
from agno.
|
|
27
|
+
from agno.filters import FilterExpr
|
|
28
|
+
from agno.knowledge.document import Document
|
|
29
|
+
from agno.knowledge.embedder import Embedder
|
|
30
|
+
from agno.knowledge.reranker.base import Reranker
|
|
31
|
+
from agno.utils.log import log_debug, log_error, log_info, log_warning
|
|
25
32
|
from agno.vectordb.base import VectorDb
|
|
26
33
|
from agno.vectordb.distance import Distance
|
|
27
34
|
from agno.vectordb.pgvector.index import HNSW, Ivfflat
|
|
@@ -40,6 +47,9 @@ class PgVector(VectorDb):
|
|
|
40
47
|
self,
|
|
41
48
|
table_name: str,
|
|
42
49
|
schema: str = "ai",
|
|
50
|
+
name: Optional[str] = None,
|
|
51
|
+
description: Optional[str] = None,
|
|
52
|
+
id: Optional[str] = None,
|
|
43
53
|
db_url: Optional[str] = None,
|
|
44
54
|
db_engine: Optional[Engine] = None,
|
|
45
55
|
embedder: Optional[Embedder] = None,
|
|
@@ -52,6 +62,7 @@ class PgVector(VectorDb):
|
|
|
52
62
|
schema_version: int = 1,
|
|
53
63
|
auto_upgrade_schema: bool = False,
|
|
54
64
|
reranker: Optional[Reranker] = None,
|
|
65
|
+
create_schema: bool = True,
|
|
55
66
|
):
|
|
56
67
|
"""
|
|
57
68
|
Initialize the PgVector instance.
|
|
@@ -59,6 +70,8 @@ class PgVector(VectorDb):
|
|
|
59
70
|
Args:
|
|
60
71
|
table_name (str): Name of the table to store vector data.
|
|
61
72
|
schema (str): Database schema name.
|
|
73
|
+
name (Optional[str]): Name of the vector database.
|
|
74
|
+
description (Optional[str]): Description of the vector database.
|
|
62
75
|
db_url (Optional[str]): Database connection URL.
|
|
63
76
|
db_engine (Optional[Engine]): SQLAlchemy database engine.
|
|
64
77
|
embedder (Optional[Embedder]): Embedder instance for creating embeddings.
|
|
@@ -70,6 +83,8 @@ class PgVector(VectorDb):
|
|
|
70
83
|
content_language (str): Language for full-text search.
|
|
71
84
|
schema_version (int): Version of the database schema.
|
|
72
85
|
auto_upgrade_schema (bool): Automatically upgrade schema if True.
|
|
86
|
+
create_schema (bool): Whether to automatically create the database schema if it doesn't exist.
|
|
87
|
+
Set to False if schema is managed externally (e.g., via migrations). Defaults to True.
|
|
73
88
|
"""
|
|
74
89
|
if not table_name:
|
|
75
90
|
raise ValueError("Table name must be provided.")
|
|
@@ -77,13 +92,22 @@ class PgVector(VectorDb):
|
|
|
77
92
|
if db_engine is None and db_url is None:
|
|
78
93
|
raise ValueError("Either 'db_url' or 'db_engine' must be provided.")
|
|
79
94
|
|
|
95
|
+
if id is None:
|
|
96
|
+
base_seed = db_url or str(db_engine.url) # type: ignore
|
|
97
|
+
schema_suffix = table_name if table_name is not None else "ai"
|
|
98
|
+
seed = f"{base_seed}#{schema_suffix}"
|
|
99
|
+
id = generate_id(seed)
|
|
100
|
+
|
|
101
|
+
# Initialize base class with name and description
|
|
102
|
+
super().__init__(id=id, name=name, description=description)
|
|
103
|
+
|
|
80
104
|
if db_engine is None:
|
|
81
105
|
if db_url is None:
|
|
82
106
|
raise ValueError("Must provide 'db_url' if 'db_engine' is None.")
|
|
83
107
|
try:
|
|
84
108
|
db_engine = create_engine(db_url)
|
|
85
109
|
except Exception as e:
|
|
86
|
-
|
|
110
|
+
log_error(f"Failed to create engine from 'db_url': {e}")
|
|
87
111
|
raise
|
|
88
112
|
|
|
89
113
|
# Database settings
|
|
@@ -95,9 +119,10 @@ class PgVector(VectorDb):
|
|
|
95
119
|
|
|
96
120
|
# Embedder for embedding the document contents
|
|
97
121
|
if embedder is None:
|
|
98
|
-
from agno.embedder.openai import OpenAIEmbedder
|
|
122
|
+
from agno.knowledge.embedder.openai import OpenAIEmbedder
|
|
99
123
|
|
|
100
124
|
embedder = OpenAIEmbedder()
|
|
125
|
+
log_info("Embedder not provided, using OpenAIEmbedder as default.")
|
|
101
126
|
self.embedder: Embedder = embedder
|
|
102
127
|
self.dimensions: Optional[int] = self.embedder.dimensions
|
|
103
128
|
|
|
@@ -125,11 +150,14 @@ class PgVector(VectorDb):
|
|
|
125
150
|
# Reranker instance
|
|
126
151
|
self.reranker: Optional[Reranker] = reranker
|
|
127
152
|
|
|
153
|
+
# Schema creation flag
|
|
154
|
+
self.create_schema: bool = create_schema
|
|
155
|
+
|
|
128
156
|
# Database session
|
|
129
157
|
self.Session: scoped_session = scoped_session(sessionmaker(bind=self.db_engine))
|
|
130
158
|
# Database table
|
|
131
159
|
self.table: Table = self.get_table()
|
|
132
|
-
|
|
160
|
+
log_debug(f"Initialized PgVector with table '{self.schema}.{self.table_name}'")
|
|
133
161
|
|
|
134
162
|
def get_table_v1(self) -> Table:
|
|
135
163
|
"""
|
|
@@ -153,6 +181,7 @@ class PgVector(VectorDb):
|
|
|
153
181
|
Column("created_at", DateTime(timezone=True), server_default=func.now()),
|
|
154
182
|
Column("updated_at", DateTime(timezone=True), onupdate=func.now()),
|
|
155
183
|
Column("content_hash", String),
|
|
184
|
+
Column("content_id", String),
|
|
156
185
|
extend_existing=True,
|
|
157
186
|
)
|
|
158
187
|
|
|
@@ -160,7 +189,7 @@ class PgVector(VectorDb):
|
|
|
160
189
|
Index(f"idx_{self.table_name}_id", table.c.id)
|
|
161
190
|
Index(f"idx_{self.table_name}_name", table.c.name)
|
|
162
191
|
Index(f"idx_{self.table_name}_content_hash", table.c.content_hash)
|
|
163
|
-
|
|
192
|
+
Index(f"idx_{self.table_name}_content_id", table.c.content_id)
|
|
164
193
|
return table
|
|
165
194
|
|
|
166
195
|
def get_table(self) -> Table:
|
|
@@ -182,11 +211,11 @@ class PgVector(VectorDb):
|
|
|
182
211
|
Returns:
|
|
183
212
|
bool: True if the table exists, False otherwise.
|
|
184
213
|
"""
|
|
185
|
-
|
|
214
|
+
log_debug(f"Checking if table '{self.table.fullname}' exists.")
|
|
186
215
|
try:
|
|
187
216
|
return inspect(self.db_engine).has_table(self.table_name, schema=self.schema)
|
|
188
217
|
except Exception as e:
|
|
189
|
-
|
|
218
|
+
log_error(f"Error checking if table exists: {e}")
|
|
190
219
|
return False
|
|
191
220
|
|
|
192
221
|
def create(self) -> None:
|
|
@@ -195,14 +224,18 @@ class PgVector(VectorDb):
|
|
|
195
224
|
"""
|
|
196
225
|
if not self.table_exists():
|
|
197
226
|
with self.Session() as sess, sess.begin():
|
|
198
|
-
|
|
227
|
+
log_debug("Creating extension: vector")
|
|
199
228
|
sess.execute(text("CREATE EXTENSION IF NOT EXISTS vector;"))
|
|
200
|
-
if self.schema is not None:
|
|
201
|
-
|
|
229
|
+
if self.create_schema and self.schema is not None:
|
|
230
|
+
log_debug(f"Creating schema: {self.schema}")
|
|
202
231
|
sess.execute(text(f"CREATE SCHEMA IF NOT EXISTS {self.schema};"))
|
|
203
|
-
|
|
232
|
+
log_debug(f"Creating table: {self.table_name}")
|
|
204
233
|
self.table.create(self.db_engine)
|
|
205
234
|
|
|
235
|
+
async def async_create(self) -> None:
|
|
236
|
+
"""Create the table asynchronously by running in a thread."""
|
|
237
|
+
await asyncio.to_thread(self.create)
|
|
238
|
+
|
|
206
239
|
def _record_exists(self, column, value) -> bool:
|
|
207
240
|
"""
|
|
208
241
|
Check if a record with the given column value exists in the table.
|
|
@@ -220,23 +253,9 @@ class PgVector(VectorDb):
|
|
|
220
253
|
result = sess.execute(stmt).first()
|
|
221
254
|
return result is not None
|
|
222
255
|
except Exception as e:
|
|
223
|
-
|
|
256
|
+
log_error(f"Error checking if record exists: {e}")
|
|
224
257
|
return False
|
|
225
258
|
|
|
226
|
-
def doc_exists(self, document: Document) -> bool:
|
|
227
|
-
"""
|
|
228
|
-
Check if a document with the same content hash exists in the table.
|
|
229
|
-
|
|
230
|
-
Args:
|
|
231
|
-
document (Document): The document to check.
|
|
232
|
-
|
|
233
|
-
Returns:
|
|
234
|
-
bool: True if the document exists, False otherwise.
|
|
235
|
-
"""
|
|
236
|
-
cleaned_content = document.content.replace("\x00", "\ufffd")
|
|
237
|
-
content_hash = md5(cleaned_content.encode()).hexdigest()
|
|
238
|
-
return self._record_exists(self.table.c.content_hash, content_hash)
|
|
239
|
-
|
|
240
259
|
def name_exists(self, name: str) -> bool:
|
|
241
260
|
"""
|
|
242
261
|
Check if a document with the given name exists in the table.
|
|
@@ -249,6 +268,10 @@ class PgVector(VectorDb):
|
|
|
249
268
|
"""
|
|
250
269
|
return self._record_exists(self.table.c.name, name)
|
|
251
270
|
|
|
271
|
+
async def async_name_exists(self, name: str) -> bool:
|
|
272
|
+
"""Check if name exists asynchronously by running in a thread."""
|
|
273
|
+
return await asyncio.to_thread(self.name_exists, name)
|
|
274
|
+
|
|
252
275
|
def id_exists(self, id: str) -> bool:
|
|
253
276
|
"""
|
|
254
277
|
Check if a document with the given ID exists in the table.
|
|
@@ -261,6 +284,12 @@ class PgVector(VectorDb):
|
|
|
261
284
|
"""
|
|
262
285
|
return self._record_exists(self.table.c.id, id)
|
|
263
286
|
|
|
287
|
+
def content_hash_exists(self, content_hash: str) -> bool:
|
|
288
|
+
"""
|
|
289
|
+
Check if a document with the given content hash exists in the table.
|
|
290
|
+
"""
|
|
291
|
+
return self._record_exists(self.table.c.content_hash, content_hash)
|
|
292
|
+
|
|
264
293
|
def _clean_content(self, content: str) -> str:
|
|
265
294
|
"""
|
|
266
295
|
Clean the content by replacing null characters.
|
|
@@ -275,6 +304,7 @@ class PgVector(VectorDb):
|
|
|
275
304
|
|
|
276
305
|
def insert(
|
|
277
306
|
self,
|
|
307
|
+
content_hash: str,
|
|
278
308
|
documents: List[Document],
|
|
279
309
|
filters: Optional[Dict[str, Any]] = None,
|
|
280
310
|
batch_size: int = 100,
|
|
@@ -283,6 +313,7 @@ class PgVector(VectorDb):
|
|
|
283
313
|
Insert documents into the database.
|
|
284
314
|
|
|
285
315
|
Args:
|
|
316
|
+
content_hash (str): The content hash to insert.
|
|
286
317
|
documents (List[Document]): List of documents to insert.
|
|
287
318
|
filters (Optional[Dict[str, Any]]): Filters to apply to the documents.
|
|
288
319
|
batch_size (int): Number of documents to insert in each batch.
|
|
@@ -291,18 +322,62 @@ class PgVector(VectorDb):
|
|
|
291
322
|
with self.Session() as sess:
|
|
292
323
|
for i in range(0, len(documents), batch_size):
|
|
293
324
|
batch_docs = documents[i : i + batch_size]
|
|
294
|
-
|
|
325
|
+
log_debug(f"Processing batch starting at index {i}, size: {len(batch_docs)}")
|
|
295
326
|
try:
|
|
296
327
|
# Prepare documents for insertion
|
|
297
328
|
batch_records = []
|
|
298
329
|
for doc in batch_docs:
|
|
299
330
|
try:
|
|
300
|
-
|
|
331
|
+
batch_records.append(self._get_document_record(doc, filters, content_hash))
|
|
332
|
+
except Exception as e:
|
|
333
|
+
log_error(f"Error processing document '{doc.name}': {e}")
|
|
334
|
+
|
|
335
|
+
# Insert the batch of records
|
|
336
|
+
insert_stmt = postgresql.insert(self.table)
|
|
337
|
+
sess.execute(insert_stmt, batch_records)
|
|
338
|
+
sess.commit() # Commit batch independently
|
|
339
|
+
log_info(f"Inserted batch of {len(batch_records)} documents.")
|
|
340
|
+
except Exception as e:
|
|
341
|
+
log_error(f"Error with batch starting at index {i}: {e}")
|
|
342
|
+
sess.rollback() # Rollback the current batch if there's an error
|
|
343
|
+
raise
|
|
344
|
+
except Exception as e:
|
|
345
|
+
log_error(f"Error inserting documents: {e}")
|
|
346
|
+
raise
|
|
347
|
+
|
|
348
|
+
async def async_insert(
|
|
349
|
+
self,
|
|
350
|
+
content_hash: str,
|
|
351
|
+
documents: List[Document],
|
|
352
|
+
filters: Optional[Dict[str, Any]] = None,
|
|
353
|
+
batch_size: int = 100,
|
|
354
|
+
) -> None:
|
|
355
|
+
"""Insert documents asynchronously with parallel embedding."""
|
|
356
|
+
try:
|
|
357
|
+
with self.Session() as sess:
|
|
358
|
+
for i in range(0, len(documents), batch_size):
|
|
359
|
+
batch_docs = documents[i : i + batch_size]
|
|
360
|
+
log_debug(f"Processing batch starting at index {i}, size: {len(batch_docs)}")
|
|
361
|
+
try:
|
|
362
|
+
# Embed all documents in the batch
|
|
363
|
+
await self._async_embed_documents(batch_docs)
|
|
364
|
+
|
|
365
|
+
# Prepare documents for insertion
|
|
366
|
+
batch_records = []
|
|
367
|
+
for doc in batch_docs:
|
|
368
|
+
try:
|
|
301
369
|
cleaned_content = self._clean_content(doc.content)
|
|
302
|
-
content_hash
|
|
303
|
-
|
|
370
|
+
# Include content_hash in ID to ensure uniqueness across different content hashes
|
|
371
|
+
# This allows the same URL/content to be inserted with different descriptions
|
|
372
|
+
base_id = doc.id or md5(cleaned_content.encode()).hexdigest()
|
|
373
|
+
record_id = md5(f"{base_id}_{content_hash}".encode()).hexdigest()
|
|
374
|
+
|
|
375
|
+
meta_data = doc.meta_data or {}
|
|
376
|
+
if filters:
|
|
377
|
+
meta_data.update(filters)
|
|
378
|
+
|
|
304
379
|
record = {
|
|
305
|
-
"id":
|
|
380
|
+
"id": record_id,
|
|
306
381
|
"name": doc.name,
|
|
307
382
|
"meta_data": doc.meta_data,
|
|
308
383
|
"filters": filters,
|
|
@@ -310,22 +385,24 @@ class PgVector(VectorDb):
|
|
|
310
385
|
"embedding": doc.embedding,
|
|
311
386
|
"usage": doc.usage,
|
|
312
387
|
"content_hash": content_hash,
|
|
388
|
+
"content_id": doc.content_id,
|
|
313
389
|
}
|
|
314
390
|
batch_records.append(record)
|
|
315
391
|
except Exception as e:
|
|
316
|
-
|
|
392
|
+
log_error(f"Error processing document '{doc.name}': {e}")
|
|
317
393
|
|
|
318
394
|
# Insert the batch of records
|
|
319
|
-
|
|
320
|
-
|
|
321
|
-
|
|
322
|
-
|
|
395
|
+
if batch_records:
|
|
396
|
+
insert_stmt = postgresql.insert(self.table)
|
|
397
|
+
sess.execute(insert_stmt, batch_records)
|
|
398
|
+
sess.commit() # Commit batch independently
|
|
399
|
+
log_info(f"Inserted batch of {len(batch_records)} documents.")
|
|
323
400
|
except Exception as e:
|
|
324
|
-
|
|
401
|
+
log_error(f"Error with batch starting at index {i}: {e}")
|
|
325
402
|
sess.rollback() # Rollback the current batch if there's an error
|
|
326
403
|
raise
|
|
327
404
|
except Exception as e:
|
|
328
|
-
|
|
405
|
+
log_error(f"Error inserting documents: {e}")
|
|
329
406
|
raise
|
|
330
407
|
|
|
331
408
|
def upsert_available(self) -> bool:
|
|
@@ -339,6 +416,27 @@ class PgVector(VectorDb):
|
|
|
339
416
|
|
|
340
417
|
def upsert(
|
|
341
418
|
self,
|
|
419
|
+
content_hash: str,
|
|
420
|
+
documents: List[Document],
|
|
421
|
+
filters: Optional[Dict[str, Any]] = None,
|
|
422
|
+
batch_size: int = 100,
|
|
423
|
+
) -> None:
|
|
424
|
+
"""
|
|
425
|
+
Upsert documents by content hash.
|
|
426
|
+
First delete all documents with the same content hash.
|
|
427
|
+
Then upsert the new documents.
|
|
428
|
+
"""
|
|
429
|
+
try:
|
|
430
|
+
if self.content_hash_exists(content_hash):
|
|
431
|
+
self._delete_by_content_hash(content_hash)
|
|
432
|
+
self._upsert(content_hash, documents, filters, batch_size)
|
|
433
|
+
except Exception as e:
|
|
434
|
+
log_error(f"Error upserting documents by content hash: {e}")
|
|
435
|
+
raise
|
|
436
|
+
|
|
437
|
+
def _upsert(
|
|
438
|
+
self,
|
|
439
|
+
content_hash: str,
|
|
342
440
|
documents: List[Document],
|
|
343
441
|
filters: Optional[Dict[str, Any]] = None,
|
|
344
442
|
batch_size: int = 100,
|
|
@@ -355,18 +453,209 @@ class PgVector(VectorDb):
|
|
|
355
453
|
with self.Session() as sess:
|
|
356
454
|
for i in range(0, len(documents), batch_size):
|
|
357
455
|
batch_docs = documents[i : i + batch_size]
|
|
358
|
-
|
|
456
|
+
log_info(f"Processing batch starting at index {i}, size: {len(batch_docs)}")
|
|
359
457
|
try:
|
|
360
458
|
# Prepare documents for upserting
|
|
361
|
-
|
|
459
|
+
batch_records_dict: Dict[str, Dict[str, Any]] = {} # Use dict to deduplicate by ID
|
|
362
460
|
for doc in batch_docs:
|
|
363
461
|
try:
|
|
364
|
-
|
|
462
|
+
record = self._get_document_record(doc, filters, content_hash)
|
|
463
|
+
# Use the generated record ID (which includes content_hash) for deduplication
|
|
464
|
+
batch_records_dict[record["id"]] = record
|
|
465
|
+
except Exception as e:
|
|
466
|
+
log_error(f"Error processing document '{doc.name}': {e}")
|
|
467
|
+
|
|
468
|
+
# Convert dict to list for upsert
|
|
469
|
+
batch_records = list(batch_records_dict.values())
|
|
470
|
+
if not batch_records:
|
|
471
|
+
log_info("No valid records to upsert in this batch.")
|
|
472
|
+
continue
|
|
473
|
+
|
|
474
|
+
# Upsert the batch of records
|
|
475
|
+
insert_stmt = postgresql.insert(self.table).values(batch_records)
|
|
476
|
+
upsert_stmt = insert_stmt.on_conflict_do_update(
|
|
477
|
+
index_elements=["id"],
|
|
478
|
+
set_={
|
|
479
|
+
"name": insert_stmt.excluded.name,
|
|
480
|
+
"meta_data": insert_stmt.excluded.meta_data,
|
|
481
|
+
"filters": insert_stmt.excluded.filters,
|
|
482
|
+
"content": insert_stmt.excluded.content,
|
|
483
|
+
"embedding": insert_stmt.excluded.embedding,
|
|
484
|
+
"usage": insert_stmt.excluded.usage,
|
|
485
|
+
"content_hash": insert_stmt.excluded.content_hash,
|
|
486
|
+
"content_id": insert_stmt.excluded.content_id,
|
|
487
|
+
},
|
|
488
|
+
)
|
|
489
|
+
sess.execute(upsert_stmt)
|
|
490
|
+
sess.commit() # Commit batch independently
|
|
491
|
+
log_info(f"Upserted batch of {len(batch_records)} documents.")
|
|
492
|
+
except Exception as e:
|
|
493
|
+
log_error(f"Error with batch starting at index {i}: {e}")
|
|
494
|
+
sess.rollback() # Rollback the current batch if there's an error
|
|
495
|
+
raise
|
|
496
|
+
except Exception as e:
|
|
497
|
+
log_error(f"Error upserting documents: {e}")
|
|
498
|
+
raise
|
|
499
|
+
|
|
500
|
+
def _get_document_record(
|
|
501
|
+
self, doc: Document, filters: Optional[Dict[str, Any]] = None, content_hash: str = ""
|
|
502
|
+
) -> Dict[str, Any]:
|
|
503
|
+
doc.embed(embedder=self.embedder)
|
|
504
|
+
cleaned_content = self._clean_content(doc.content)
|
|
505
|
+
# Include content_hash in ID to ensure uniqueness across different content hashes
|
|
506
|
+
# This allows the same URL/content to be inserted with different descriptions
|
|
507
|
+
base_id = doc.id or md5(cleaned_content.encode()).hexdigest()
|
|
508
|
+
record_id = md5(f"{base_id}_{content_hash}".encode()).hexdigest()
|
|
509
|
+
|
|
510
|
+
meta_data = doc.meta_data or {}
|
|
511
|
+
if filters:
|
|
512
|
+
meta_data.update(filters)
|
|
513
|
+
|
|
514
|
+
return {
|
|
515
|
+
"id": record_id,
|
|
516
|
+
"name": doc.name,
|
|
517
|
+
"meta_data": doc.meta_data,
|
|
518
|
+
"filters": filters,
|
|
519
|
+
"content": cleaned_content,
|
|
520
|
+
"embedding": doc.embedding,
|
|
521
|
+
"usage": doc.usage,
|
|
522
|
+
"content_hash": content_hash,
|
|
523
|
+
"content_id": doc.content_id,
|
|
524
|
+
}
|
|
525
|
+
|
|
526
|
+
async def _async_embed_documents(self, batch_docs: List[Document]) -> None:
|
|
527
|
+
"""
|
|
528
|
+
Embed a batch of documents using either batch embedding or individual embedding.
|
|
529
|
+
|
|
530
|
+
Args:
|
|
531
|
+
batch_docs: List of documents to embed
|
|
532
|
+
"""
|
|
533
|
+
if self.embedder.enable_batch and hasattr(self.embedder, "async_get_embeddings_batch_and_usage"):
|
|
534
|
+
# Use batch embedding when enabled and supported
|
|
535
|
+
try:
|
|
536
|
+
# Extract content from all documents
|
|
537
|
+
doc_contents = [doc.content for doc in batch_docs]
|
|
538
|
+
|
|
539
|
+
# Get batch embeddings and usage
|
|
540
|
+
embeddings, usages = await self.embedder.async_get_embeddings_batch_and_usage(doc_contents)
|
|
541
|
+
|
|
542
|
+
# Process documents with pre-computed embeddings
|
|
543
|
+
for j, doc in enumerate(batch_docs):
|
|
544
|
+
try:
|
|
545
|
+
if j < len(embeddings):
|
|
546
|
+
doc.embedding = embeddings[j]
|
|
547
|
+
doc.usage = usages[j] if j < len(usages) else None
|
|
548
|
+
except Exception as e:
|
|
549
|
+
log_error(f"Error assigning batch embedding to document '{doc.name}': {e}")
|
|
550
|
+
|
|
551
|
+
except Exception as e:
|
|
552
|
+
# Check if this is a rate limit error - don't fall back as it would make things worse
|
|
553
|
+
error_str = str(e).lower()
|
|
554
|
+
is_rate_limit = any(
|
|
555
|
+
phrase in error_str
|
|
556
|
+
for phrase in ["rate limit", "too many requests", "429", "trial key", "api calls / minute"]
|
|
557
|
+
)
|
|
558
|
+
|
|
559
|
+
if is_rate_limit:
|
|
560
|
+
log_error(f"Rate limit detected during batch embedding. {e}")
|
|
561
|
+
raise e
|
|
562
|
+
else:
|
|
563
|
+
log_warning(f"Async batch embedding failed, falling back to individual embeddings: {e}")
|
|
564
|
+
# Fall back to individual embedding
|
|
565
|
+
embed_tasks = [doc.async_embed(embedder=self.embedder) for doc in batch_docs]
|
|
566
|
+
results = await asyncio.gather(*embed_tasks, return_exceptions=True)
|
|
567
|
+
|
|
568
|
+
# Check for exceptions and handle them
|
|
569
|
+
for i, result in enumerate(results):
|
|
570
|
+
if isinstance(result, Exception):
|
|
571
|
+
error_msg = str(result)
|
|
572
|
+
# If it's an event loop closure error, log it but don't fail
|
|
573
|
+
if "Event loop is closed" in error_msg or "RuntimeError" in type(result).__name__:
|
|
574
|
+
log_warning(
|
|
575
|
+
f"Event loop closure during embedding for document {i}, but operation may have succeeded: {result}"
|
|
576
|
+
)
|
|
577
|
+
else:
|
|
578
|
+
log_error(f"Error embedding document {i}: {result}")
|
|
579
|
+
else:
|
|
580
|
+
# Use individual embedding
|
|
581
|
+
embed_tasks = [doc.async_embed(embedder=self.embedder) for doc in batch_docs]
|
|
582
|
+
results = await asyncio.gather(*embed_tasks, return_exceptions=True)
|
|
583
|
+
|
|
584
|
+
# Check for exceptions and handle them
|
|
585
|
+
for i, result in enumerate(results):
|
|
586
|
+
if isinstance(result, Exception):
|
|
587
|
+
error_msg = str(result)
|
|
588
|
+
# If it's an event loop closure error, log it but don't fail
|
|
589
|
+
if "Event loop is closed" in error_msg or "RuntimeError" in type(result).__name__:
|
|
590
|
+
log_warning(
|
|
591
|
+
f"Event loop closure during embedding for document {i}, but operation may have succeeded: {result}"
|
|
592
|
+
)
|
|
593
|
+
else:
|
|
594
|
+
log_error(f"Error embedding document {i}: {result}")
|
|
595
|
+
|
|
596
|
+
async def async_upsert(
|
|
597
|
+
self,
|
|
598
|
+
content_hash: str,
|
|
599
|
+
documents: List[Document],
|
|
600
|
+
filters: Optional[Dict[str, Any]] = None,
|
|
601
|
+
batch_size: int = 100,
|
|
602
|
+
) -> None:
|
|
603
|
+
"""Upsert documents asynchronously by running in a thread."""
|
|
604
|
+
try:
|
|
605
|
+
if self.content_hash_exists(content_hash):
|
|
606
|
+
self._delete_by_content_hash(content_hash)
|
|
607
|
+
await self._async_upsert(content_hash, documents, filters, batch_size)
|
|
608
|
+
except Exception as e:
|
|
609
|
+
log_error(f"Error upserting documents by content hash: {e}")
|
|
610
|
+
raise
|
|
611
|
+
|
|
612
|
+
async def _async_upsert(
|
|
613
|
+
self,
|
|
614
|
+
content_hash: str,
|
|
615
|
+
documents: List[Document],
|
|
616
|
+
filters: Optional[Dict[str, Any]] = None,
|
|
617
|
+
batch_size: int = 100,
|
|
618
|
+
) -> None:
|
|
619
|
+
"""
|
|
620
|
+
Upsert (insert or update) documents in the database.
|
|
621
|
+
|
|
622
|
+
Args:
|
|
623
|
+
documents (List[Document]): List of documents to upsert.
|
|
624
|
+
filters (Optional[Dict[str, Any]]): Filters to apply to the documents.
|
|
625
|
+
batch_size (int): Number of documents to upsert in each batch.
|
|
626
|
+
"""
|
|
627
|
+
try:
|
|
628
|
+
with self.Session() as sess:
|
|
629
|
+
for i in range(0, len(documents), batch_size):
|
|
630
|
+
batch_docs = documents[i : i + batch_size]
|
|
631
|
+
log_info(f"Processing batch starting at index {i}, size: {len(batch_docs)}")
|
|
632
|
+
try:
|
|
633
|
+
# Embed all documents in the batch
|
|
634
|
+
await self._async_embed_documents(batch_docs)
|
|
635
|
+
|
|
636
|
+
# Prepare documents for upserting
|
|
637
|
+
batch_records_dict = {} # Use dict to deduplicate by ID
|
|
638
|
+
for idx, doc in enumerate(batch_docs):
|
|
639
|
+
try:
|
|
365
640
|
cleaned_content = self._clean_content(doc.content)
|
|
366
|
-
content_hash
|
|
367
|
-
|
|
641
|
+
# Include content_hash in ID to ensure uniqueness across different content hashes
|
|
642
|
+
# This allows the same URL/content to be inserted with different descriptions
|
|
643
|
+
base_id = doc.id or md5(cleaned_content.encode()).hexdigest()
|
|
644
|
+
record_id = md5(f"{base_id}_{content_hash}".encode()).hexdigest()
|
|
645
|
+
|
|
646
|
+
if (
|
|
647
|
+
doc.embedding is not None
|
|
648
|
+
and isinstance(doc.embedding, list)
|
|
649
|
+
and len(doc.embedding) == 0
|
|
650
|
+
):
|
|
651
|
+
log_warning(f"Document {idx} '{doc.name}' has empty embedding (length 0)")
|
|
652
|
+
|
|
653
|
+
meta_data = doc.meta_data or {}
|
|
654
|
+
if filters:
|
|
655
|
+
meta_data.update(filters)
|
|
656
|
+
|
|
368
657
|
record = {
|
|
369
|
-
"id":
|
|
658
|
+
"id": record_id, # use record_id as a reproducible id to avoid duplicates while upsert
|
|
370
659
|
"name": doc.name,
|
|
371
660
|
"meta_data": doc.meta_data,
|
|
372
661
|
"filters": filters,
|
|
@@ -374,44 +663,81 @@ class PgVector(VectorDb):
|
|
|
374
663
|
"embedding": doc.embedding,
|
|
375
664
|
"usage": doc.usage,
|
|
376
665
|
"content_hash": content_hash,
|
|
666
|
+
"content_id": doc.content_id,
|
|
377
667
|
}
|
|
378
|
-
|
|
668
|
+
batch_records_dict[record_id] = record # This deduplicates by ID
|
|
379
669
|
except Exception as e:
|
|
380
|
-
|
|
670
|
+
log_error(f"Error processing document '{doc.name}': {e}")
|
|
671
|
+
|
|
672
|
+
# Convert dict to list for upsert
|
|
673
|
+
batch_records = list(batch_records_dict.values())
|
|
674
|
+
if not batch_records:
|
|
675
|
+
log_info("No valid records to upsert in this batch.")
|
|
676
|
+
continue
|
|
381
677
|
|
|
382
678
|
# Upsert the batch of records
|
|
383
679
|
insert_stmt = postgresql.insert(self.table).values(batch_records)
|
|
384
680
|
upsert_stmt = insert_stmt.on_conflict_do_update(
|
|
385
681
|
index_elements=["id"],
|
|
386
|
-
set_=
|
|
387
|
-
name
|
|
388
|
-
meta_data
|
|
389
|
-
filters
|
|
390
|
-
content
|
|
391
|
-
embedding
|
|
392
|
-
usage
|
|
393
|
-
content_hash
|
|
394
|
-
|
|
682
|
+
set_={
|
|
683
|
+
"name": insert_stmt.excluded.name,
|
|
684
|
+
"meta_data": insert_stmt.excluded.meta_data,
|
|
685
|
+
"filters": insert_stmt.excluded.filters,
|
|
686
|
+
"content": insert_stmt.excluded.content,
|
|
687
|
+
"embedding": insert_stmt.excluded.embedding,
|
|
688
|
+
"usage": insert_stmt.excluded.usage,
|
|
689
|
+
"content_hash": insert_stmt.excluded.content_hash,
|
|
690
|
+
"content_id": insert_stmt.excluded.content_id,
|
|
691
|
+
},
|
|
395
692
|
)
|
|
396
693
|
sess.execute(upsert_stmt)
|
|
397
694
|
sess.commit() # Commit batch independently
|
|
398
|
-
|
|
695
|
+
log_info(f"Upserted batch of {len(batch_records)} documents.")
|
|
399
696
|
except Exception as e:
|
|
400
|
-
|
|
697
|
+
log_error(f"Error with batch starting at index {i}: {e}")
|
|
401
698
|
sess.rollback() # Rollback the current batch if there's an error
|
|
402
699
|
raise
|
|
403
700
|
except Exception as e:
|
|
404
|
-
|
|
701
|
+
log_error(f"Error upserting documents: {e}")
|
|
702
|
+
raise
|
|
703
|
+
|
|
704
|
+
def update_metadata(self, content_id: str, metadata: Dict[str, Any]) -> None:
|
|
705
|
+
"""
|
|
706
|
+
Update the metadata for a document.
|
|
707
|
+
|
|
708
|
+
Args:
|
|
709
|
+
content_id (str): The ID of the document.
|
|
710
|
+
metadata (Dict[str, Any]): The metadata to update.
|
|
711
|
+
"""
|
|
712
|
+
try:
|
|
713
|
+
with self.Session() as sess:
|
|
714
|
+
# Merge JSONB for metadata, but replace filters entirely (absolute value)
|
|
715
|
+
stmt = (
|
|
716
|
+
update(self.table)
|
|
717
|
+
.where(self.table.c.content_id == content_id)
|
|
718
|
+
.values(
|
|
719
|
+
meta_data=func.coalesce(self.table.c.meta_data, text("'{}'::jsonb")).op("||")(
|
|
720
|
+
bindparam("md", type_=postgresql.JSONB)
|
|
721
|
+
),
|
|
722
|
+
filters=bindparam("ft", type_=postgresql.JSONB),
|
|
723
|
+
)
|
|
724
|
+
)
|
|
725
|
+
sess.execute(stmt, {"md": metadata, "ft": metadata})
|
|
726
|
+
sess.commit()
|
|
727
|
+
except Exception as e:
|
|
728
|
+
log_error(f"Error updating metadata for document {content_id}: {e}")
|
|
405
729
|
raise
|
|
406
730
|
|
|
407
|
-
def search(
|
|
731
|
+
def search(
|
|
732
|
+
self, query: str, limit: int = 5, filters: Optional[Union[Dict[str, Any], List[FilterExpr]]] = None
|
|
733
|
+
) -> List[Document]:
|
|
408
734
|
"""
|
|
409
735
|
Perform a search based on the configured search type.
|
|
410
736
|
|
|
411
737
|
Args:
|
|
412
738
|
query (str): The search query.
|
|
413
739
|
limit (int): Maximum number of results to return.
|
|
414
|
-
filters (Optional[Dict[str, Any]]): Filters to apply to the search.
|
|
740
|
+
filters (Optional[Union[Dict[str, Any], List[FilterExpr]]]): Filters to apply to the search.
|
|
415
741
|
|
|
416
742
|
Returns:
|
|
417
743
|
List[Document]: List of matching documents.
|
|
@@ -423,17 +749,46 @@ class PgVector(VectorDb):
|
|
|
423
749
|
elif self.search_type == SearchType.hybrid:
|
|
424
750
|
return self.hybrid_search(query=query, limit=limit, filters=filters)
|
|
425
751
|
else:
|
|
426
|
-
|
|
752
|
+
log_error(f"Invalid search type '{self.search_type}'.")
|
|
427
753
|
return []
|
|
428
754
|
|
|
429
|
-
def
|
|
755
|
+
async def async_search(
|
|
756
|
+
self, query: str, limit: int = 5, filters: Optional[Union[Dict[str, Any], List[FilterExpr]]] = None
|
|
757
|
+
) -> List[Document]:
|
|
758
|
+
"""Search asynchronously by running in a thread."""
|
|
759
|
+
return await asyncio.to_thread(self.search, query, limit, filters)
|
|
760
|
+
|
|
761
|
+
def _dsl_to_sqlalchemy(self, filter_expr, table) -> ColumnElement[bool]:
|
|
762
|
+
op = filter_expr["op"]
|
|
763
|
+
|
|
764
|
+
if op == "EQ":
|
|
765
|
+
return table.c.meta_data[filter_expr["key"]].astext == str(filter_expr["value"])
|
|
766
|
+
elif op == "IN":
|
|
767
|
+
# Postgres JSONB array containment
|
|
768
|
+
return table.c.meta_data[filter_expr["key"]].astext.in_([str(v) for v in filter_expr["values"]])
|
|
769
|
+
elif op == "GT":
|
|
770
|
+
return table.c.meta_data[filter_expr["key"]].astext.cast(Integer) > filter_expr["value"]
|
|
771
|
+
elif op == "LT":
|
|
772
|
+
return table.c.meta_data[filter_expr["key"]].astext.cast(Integer) < filter_expr["value"]
|
|
773
|
+
elif op == "NOT":
|
|
774
|
+
return not_(self._dsl_to_sqlalchemy(filter_expr["condition"], table))
|
|
775
|
+
elif op == "AND":
|
|
776
|
+
return and_(*[self._dsl_to_sqlalchemy(cond, table) for cond in filter_expr["conditions"]])
|
|
777
|
+
elif op == "OR":
|
|
778
|
+
return or_(*[self._dsl_to_sqlalchemy(cond, table) for cond in filter_expr["conditions"]])
|
|
779
|
+
else:
|
|
780
|
+
raise ValueError(f"Unknown filter operator: {op}")
|
|
781
|
+
|
|
782
|
+
def vector_search(
|
|
783
|
+
self, query: str, limit: int = 5, filters: Optional[Union[Dict[str, Any], List[FilterExpr]]] = None
|
|
784
|
+
) -> List[Document]:
|
|
430
785
|
"""
|
|
431
786
|
Perform a vector similarity search.
|
|
432
787
|
|
|
433
788
|
Args:
|
|
434
789
|
query (str): The search query.
|
|
435
790
|
limit (int): Maximum number of results to return.
|
|
436
|
-
filters (Optional[Dict[str, Any]]): Filters to apply to the search.
|
|
791
|
+
filters (Optional[Union[Dict[str, Any], List[FilterExpr]]]): Filters to apply to the search.
|
|
437
792
|
|
|
438
793
|
Returns:
|
|
439
794
|
List[Document]: List of matching documents.
|
|
@@ -442,7 +797,7 @@ class PgVector(VectorDb):
|
|
|
442
797
|
# Get the embedding for the query string
|
|
443
798
|
query_embedding = self.embedder.get_embedding(query)
|
|
444
799
|
if query_embedding is None:
|
|
445
|
-
|
|
800
|
+
log_error(f"Error getting embedding for Query: {query}")
|
|
446
801
|
return []
|
|
447
802
|
|
|
448
803
|
# Define the columns to select
|
|
@@ -460,7 +815,17 @@ class PgVector(VectorDb):
|
|
|
460
815
|
|
|
461
816
|
# Apply filters if provided
|
|
462
817
|
if filters is not None:
|
|
463
|
-
|
|
818
|
+
# Handle dict filters
|
|
819
|
+
if isinstance(filters, dict):
|
|
820
|
+
stmt = stmt.where(self.table.c.meta_data.contains(filters))
|
|
821
|
+
# Handle FilterExpr DSL
|
|
822
|
+
else:
|
|
823
|
+
# Convert each DSL expression to SQLAlchemy and AND them together
|
|
824
|
+
sqlalchemy_conditions = [
|
|
825
|
+
self._dsl_to_sqlalchemy(f.to_dict() if hasattr(f, "to_dict") else f, self.table)
|
|
826
|
+
for f in filters
|
|
827
|
+
]
|
|
828
|
+
stmt = stmt.where(and_(*sqlalchemy_conditions))
|
|
464
829
|
|
|
465
830
|
# Order the results based on the distance metric
|
|
466
831
|
if self.distance == Distance.l2:
|
|
@@ -470,14 +835,14 @@ class PgVector(VectorDb):
|
|
|
470
835
|
elif self.distance == Distance.max_inner_product:
|
|
471
836
|
stmt = stmt.order_by(self.table.c.embedding.max_inner_product(query_embedding))
|
|
472
837
|
else:
|
|
473
|
-
|
|
838
|
+
log_error(f"Unknown distance metric: {self.distance}")
|
|
474
839
|
return []
|
|
475
840
|
|
|
476
841
|
# Limit the number of results
|
|
477
842
|
stmt = stmt.limit(limit)
|
|
478
843
|
|
|
479
844
|
# Log the query for debugging
|
|
480
|
-
|
|
845
|
+
log_debug(f"Vector search query: {stmt}")
|
|
481
846
|
|
|
482
847
|
# Execute the query
|
|
483
848
|
try:
|
|
@@ -489,8 +854,8 @@ class PgVector(VectorDb):
|
|
|
489
854
|
sess.execute(text(f"SET LOCAL hnsw.ef_search = {self.vector_index.ef_search}"))
|
|
490
855
|
results = sess.execute(stmt).fetchall()
|
|
491
856
|
except Exception as e:
|
|
492
|
-
|
|
493
|
-
|
|
857
|
+
log_error(f"Error performing semantic search: {e}")
|
|
858
|
+
log_error("Table might not exist, creating for future use")
|
|
494
859
|
self.create()
|
|
495
860
|
return []
|
|
496
861
|
|
|
@@ -512,9 +877,10 @@ class PgVector(VectorDb):
|
|
|
512
877
|
if self.reranker:
|
|
513
878
|
search_results = self.reranker.rerank(query=query, documents=search_results)
|
|
514
879
|
|
|
880
|
+
log_info(f"Found {len(search_results)} documents")
|
|
515
881
|
return search_results
|
|
516
882
|
except Exception as e:
|
|
517
|
-
|
|
883
|
+
log_error(f"Error during vector search: {e}")
|
|
518
884
|
return []
|
|
519
885
|
|
|
520
886
|
def enable_prefix_matching(self, query: str) -> str:
|
|
@@ -532,14 +898,16 @@ class PgVector(VectorDb):
|
|
|
532
898
|
processed_words = [word + "*" for word in words]
|
|
533
899
|
return " ".join(processed_words)
|
|
534
900
|
|
|
535
|
-
def keyword_search(
|
|
901
|
+
def keyword_search(
|
|
902
|
+
self, query: str, limit: int = 5, filters: Optional[Union[Dict[str, Any], List[FilterExpr]]] = None
|
|
903
|
+
) -> List[Document]:
|
|
536
904
|
"""
|
|
537
905
|
Perform a keyword search on the 'content' column.
|
|
538
906
|
|
|
539
907
|
Args:
|
|
540
908
|
query (str): The search query.
|
|
541
909
|
limit (int): Maximum number of results to return.
|
|
542
|
-
filters (Optional[Dict[str, Any]]): Filters to apply to the search.
|
|
910
|
+
filters (Optional[Union[Dict[str, Any], List[FilterExpr]]]): Filters to apply to the search.
|
|
543
911
|
|
|
544
912
|
Returns:
|
|
545
913
|
List[Document]: List of matching documents.
|
|
@@ -568,8 +936,17 @@ class PgVector(VectorDb):
|
|
|
568
936
|
|
|
569
937
|
# Apply filters if provided
|
|
570
938
|
if filters is not None:
|
|
571
|
-
#
|
|
572
|
-
|
|
939
|
+
# Handle dict filters
|
|
940
|
+
if isinstance(filters, dict):
|
|
941
|
+
stmt = stmt.where(self.table.c.meta_data.contains(filters))
|
|
942
|
+
# Handle FilterExpr DSL
|
|
943
|
+
else:
|
|
944
|
+
# Convert each DSL expression to SQLAlchemy and AND them together
|
|
945
|
+
sqlalchemy_conditions = [
|
|
946
|
+
self._dsl_to_sqlalchemy(f.to_dict() if hasattr(f, "to_dict") else f, self.table)
|
|
947
|
+
for f in filters
|
|
948
|
+
]
|
|
949
|
+
stmt = stmt.where(and_(*sqlalchemy_conditions))
|
|
573
950
|
|
|
574
951
|
# Order by the relevance rank
|
|
575
952
|
stmt = stmt.order_by(text_rank.desc())
|
|
@@ -578,15 +955,15 @@ class PgVector(VectorDb):
|
|
|
578
955
|
stmt = stmt.limit(limit)
|
|
579
956
|
|
|
580
957
|
# Log the query for debugging
|
|
581
|
-
|
|
958
|
+
log_debug(f"Keyword search query: {stmt}")
|
|
582
959
|
|
|
583
960
|
# Execute the query
|
|
584
961
|
try:
|
|
585
962
|
with self.Session() as sess, sess.begin():
|
|
586
963
|
results = sess.execute(stmt).fetchall()
|
|
587
964
|
except Exception as e:
|
|
588
|
-
|
|
589
|
-
|
|
965
|
+
log_error(f"Error performing keyword search: {e}")
|
|
966
|
+
log_error("Table might not exist, creating for future use")
|
|
590
967
|
self.create()
|
|
591
968
|
return []
|
|
592
969
|
|
|
@@ -605,16 +982,17 @@ class PgVector(VectorDb):
|
|
|
605
982
|
)
|
|
606
983
|
)
|
|
607
984
|
|
|
985
|
+
log_info(f"Found {len(search_results)} documents")
|
|
608
986
|
return search_results
|
|
609
987
|
except Exception as e:
|
|
610
|
-
|
|
988
|
+
log_error(f"Error during keyword search: {e}")
|
|
611
989
|
return []
|
|
612
990
|
|
|
613
991
|
def hybrid_search(
|
|
614
992
|
self,
|
|
615
993
|
query: str,
|
|
616
994
|
limit: int = 5,
|
|
617
|
-
filters: Optional[Dict[str, Any]] = None,
|
|
995
|
+
filters: Optional[Union[Dict[str, Any], List[FilterExpr]]] = None,
|
|
618
996
|
) -> List[Document]:
|
|
619
997
|
"""
|
|
620
998
|
Perform a hybrid search combining vector similarity and full-text search.
|
|
@@ -622,7 +1000,7 @@ class PgVector(VectorDb):
|
|
|
622
1000
|
Args:
|
|
623
1001
|
query (str): The search query.
|
|
624
1002
|
limit (int): Maximum number of results to return.
|
|
625
|
-
filters (Optional[Dict[str, Any]]): Filters to apply to the search.
|
|
1003
|
+
filters (Optional[Union[Dict[str, Any], List[FilterExpr]]]): Filters to apply to the search.
|
|
626
1004
|
|
|
627
1005
|
Returns:
|
|
628
1006
|
List[Document]: List of matching documents.
|
|
@@ -631,7 +1009,7 @@ class PgVector(VectorDb):
|
|
|
631
1009
|
# Get the embedding for the query string
|
|
632
1010
|
query_embedding = self.embedder.get_embedding(query)
|
|
633
1011
|
if query_embedding is None:
|
|
634
|
-
|
|
1012
|
+
log_error(f"Error getting embedding for Query: {query}")
|
|
635
1013
|
return []
|
|
636
1014
|
|
|
637
1015
|
# Define the columns to select
|
|
@@ -669,7 +1047,7 @@ class PgVector(VectorDb):
|
|
|
669
1047
|
# Normalize to range [0, 1]
|
|
670
1048
|
vector_score = (raw_vector_score + 1) / 2
|
|
671
1049
|
else:
|
|
672
|
-
|
|
1050
|
+
log_error(f"Unknown distance metric: {self.distance}")
|
|
673
1051
|
return []
|
|
674
1052
|
|
|
675
1053
|
# Apply weights to control the influence of each score
|
|
@@ -689,7 +1067,17 @@ class PgVector(VectorDb):
|
|
|
689
1067
|
|
|
690
1068
|
# Apply filters if provided
|
|
691
1069
|
if filters is not None:
|
|
692
|
-
|
|
1070
|
+
# Handle dict filters
|
|
1071
|
+
if isinstance(filters, dict):
|
|
1072
|
+
stmt = stmt.where(self.table.c.meta_data.contains(filters))
|
|
1073
|
+
# Handle FilterExpr DSL
|
|
1074
|
+
else:
|
|
1075
|
+
# Convert each DSL expression to SQLAlchemy and AND them together
|
|
1076
|
+
sqlalchemy_conditions = [
|
|
1077
|
+
self._dsl_to_sqlalchemy(f.to_dict() if hasattr(f, "to_dict") else f, self.table)
|
|
1078
|
+
for f in filters
|
|
1079
|
+
]
|
|
1080
|
+
stmt = stmt.where(and_(*sqlalchemy_conditions))
|
|
693
1081
|
|
|
694
1082
|
# Order the results by the hybrid score in descending order
|
|
695
1083
|
stmt = stmt.order_by(desc("hybrid_score"))
|
|
@@ -698,7 +1086,7 @@ class PgVector(VectorDb):
|
|
|
698
1086
|
stmt = stmt.limit(limit)
|
|
699
1087
|
|
|
700
1088
|
# Log the query for debugging
|
|
701
|
-
|
|
1089
|
+
log_debug(f"Hybrid search query: {stmt}")
|
|
702
1090
|
|
|
703
1091
|
# Execute the query
|
|
704
1092
|
try:
|
|
@@ -710,7 +1098,7 @@ class PgVector(VectorDb):
|
|
|
710
1098
|
sess.execute(text(f"SET LOCAL hnsw.ef_search = {self.vector_index.ef_search}"))
|
|
711
1099
|
results = sess.execute(stmt).fetchall()
|
|
712
1100
|
except Exception as e:
|
|
713
|
-
|
|
1101
|
+
log_error(f"Error performing hybrid search: {e}")
|
|
714
1102
|
return []
|
|
715
1103
|
|
|
716
1104
|
# Process the results and convert to Document objects
|
|
@@ -728,9 +1116,14 @@ class PgVector(VectorDb):
|
|
|
728
1116
|
)
|
|
729
1117
|
)
|
|
730
1118
|
|
|
1119
|
+
if self.reranker:
|
|
1120
|
+
search_results = self.reranker.rerank(query=query, documents=search_results)
|
|
1121
|
+
|
|
1122
|
+
log_info(f"Found {len(search_results)} documents")
|
|
1123
|
+
|
|
731
1124
|
return search_results
|
|
732
1125
|
except Exception as e:
|
|
733
|
-
|
|
1126
|
+
log_error(f"Error during hybrid search: {e}")
|
|
734
1127
|
return []
|
|
735
1128
|
|
|
736
1129
|
def drop(self) -> None:
|
|
@@ -739,14 +1132,18 @@ class PgVector(VectorDb):
|
|
|
739
1132
|
"""
|
|
740
1133
|
if self.table_exists():
|
|
741
1134
|
try:
|
|
742
|
-
|
|
1135
|
+
log_debug(f"Dropping table '{self.table.fullname}'.")
|
|
743
1136
|
self.table.drop(self.db_engine)
|
|
744
|
-
|
|
1137
|
+
log_info(f"Table '{self.table.fullname}' dropped successfully.")
|
|
745
1138
|
except Exception as e:
|
|
746
|
-
|
|
1139
|
+
log_error(f"Error dropping table '{self.table.fullname}': {e}")
|
|
747
1140
|
raise
|
|
748
1141
|
else:
|
|
749
|
-
|
|
1142
|
+
log_info(f"Table '{self.table.fullname}' does not exist.")
|
|
1143
|
+
|
|
1144
|
+
async def async_drop(self) -> None:
|
|
1145
|
+
"""Drop the table asynchronously by running in a thread."""
|
|
1146
|
+
await asyncio.to_thread(self.drop)
|
|
750
1147
|
|
|
751
1148
|
def exists(self) -> bool:
|
|
752
1149
|
"""
|
|
@@ -757,6 +1154,10 @@ class PgVector(VectorDb):
|
|
|
757
1154
|
"""
|
|
758
1155
|
return self.table_exists()
|
|
759
1156
|
|
|
1157
|
+
async def async_exists(self) -> bool:
|
|
1158
|
+
"""Check if table exists asynchronously by running in a thread."""
|
|
1159
|
+
return await asyncio.to_thread(self.exists)
|
|
1160
|
+
|
|
760
1161
|
def get_count(self) -> int:
|
|
761
1162
|
"""
|
|
762
1163
|
Get the number of records in the table.
|
|
@@ -770,7 +1171,7 @@ class PgVector(VectorDb):
|
|
|
770
1171
|
result = sess.execute(stmt).scalar()
|
|
771
1172
|
return int(result) if result is not None else 0
|
|
772
1173
|
except Exception as e:
|
|
773
|
-
|
|
1174
|
+
log_error(f"Error getting count from table '{self.table.fullname}': {e}")
|
|
774
1175
|
return 0
|
|
775
1176
|
|
|
776
1177
|
def optimize(self, force_recreate: bool = False) -> None:
|
|
@@ -780,10 +1181,10 @@ class PgVector(VectorDb):
|
|
|
780
1181
|
Args:
|
|
781
1182
|
force_recreate (bool): If True, existing indexes will be dropped and recreated.
|
|
782
1183
|
"""
|
|
783
|
-
|
|
1184
|
+
log_debug("==== Optimizing Vector DB ====")
|
|
784
1185
|
self._create_vector_index(force_recreate=force_recreate)
|
|
785
1186
|
self._create_gin_index(force_recreate=force_recreate)
|
|
786
|
-
|
|
1187
|
+
log_debug("==== Optimized Vector DB ====")
|
|
787
1188
|
|
|
788
1189
|
def _index_exists(self, index_name: str) -> bool:
|
|
789
1190
|
"""
|
|
@@ -811,7 +1212,7 @@ class PgVector(VectorDb):
|
|
|
811
1212
|
drop_index_sql = f'DROP INDEX IF EXISTS "{self.schema}"."{index_name}";'
|
|
812
1213
|
sess.execute(text(drop_index_sql))
|
|
813
1214
|
except Exception as e:
|
|
814
|
-
|
|
1215
|
+
log_error(f"Error dropping index '{index_name}': {e}")
|
|
815
1216
|
raise
|
|
816
1217
|
|
|
817
1218
|
def _create_vector_index(self, force_recreate: bool = False) -> None:
|
|
@@ -822,7 +1223,7 @@ class PgVector(VectorDb):
|
|
|
822
1223
|
force_recreate (bool): If True, existing index will be dropped and recreated.
|
|
823
1224
|
"""
|
|
824
1225
|
if self.vector_index is None:
|
|
825
|
-
|
|
1226
|
+
log_debug("No vector index specified, skipping vector index optimization.")
|
|
826
1227
|
return
|
|
827
1228
|
|
|
828
1229
|
# Generate index name if not provided
|
|
@@ -844,12 +1245,12 @@ class PgVector(VectorDb):
|
|
|
844
1245
|
vector_index_exists = self._index_exists(self.vector_index.name)
|
|
845
1246
|
|
|
846
1247
|
if vector_index_exists:
|
|
847
|
-
|
|
1248
|
+
log_info(f"Vector index '{self.vector_index.name}' already exists.")
|
|
848
1249
|
if force_recreate:
|
|
849
|
-
|
|
1250
|
+
log_info(f"Force recreating vector index '{self.vector_index.name}'. Dropping existing index.")
|
|
850
1251
|
self._drop_index(self.vector_index.name)
|
|
851
1252
|
else:
|
|
852
|
-
|
|
1253
|
+
log_info(f"Skipping vector index creation as index '{self.vector_index.name}' already exists.")
|
|
853
1254
|
return
|
|
854
1255
|
|
|
855
1256
|
# Proceed to create the vector index
|
|
@@ -857,7 +1258,7 @@ class PgVector(VectorDb):
|
|
|
857
1258
|
with self.Session() as sess, sess.begin():
|
|
858
1259
|
# Set configuration parameters
|
|
859
1260
|
if self.vector_index.configuration:
|
|
860
|
-
|
|
1261
|
+
log_debug(f"Setting configuration: {self.vector_index.configuration}")
|
|
861
1262
|
for key, value in self.vector_index.configuration.items():
|
|
862
1263
|
sess.execute(text(f"SET {key} = :value;"), {"value": value})
|
|
863
1264
|
|
|
@@ -866,10 +1267,10 @@ class PgVector(VectorDb):
|
|
|
866
1267
|
elif isinstance(self.vector_index, HNSW):
|
|
867
1268
|
self._create_hnsw_index(sess, table_fullname, index_distance)
|
|
868
1269
|
else:
|
|
869
|
-
|
|
1270
|
+
log_error(f"Unknown index type: {type(self.vector_index)}")
|
|
870
1271
|
return
|
|
871
1272
|
except Exception as e:
|
|
872
|
-
|
|
1273
|
+
log_error(f"Error creating vector index '{self.vector_index.name}': {e}")
|
|
873
1274
|
raise
|
|
874
1275
|
|
|
875
1276
|
def _create_ivfflat_index(self, sess: Session, table_fullname: str, index_distance: str) -> None:
|
|
@@ -888,7 +1289,7 @@ class PgVector(VectorDb):
|
|
|
888
1289
|
num_lists = self.vector_index.lists
|
|
889
1290
|
if self.vector_index.dynamic_lists:
|
|
890
1291
|
total_records = self.get_count()
|
|
891
|
-
|
|
1292
|
+
log_debug(f"Number of records: {total_records}")
|
|
892
1293
|
if total_records < 1000000:
|
|
893
1294
|
num_lists = max(int(total_records / 1000), 1) # Ensure at least one list
|
|
894
1295
|
else:
|
|
@@ -897,7 +1298,7 @@ class PgVector(VectorDb):
|
|
|
897
1298
|
# Set ivfflat.probes
|
|
898
1299
|
sess.execute(text("SET ivfflat.probes = :probes;"), {"probes": self.vector_index.probes})
|
|
899
1300
|
|
|
900
|
-
|
|
1301
|
+
log_debug(
|
|
901
1302
|
f"Creating Ivfflat index '{self.vector_index.name}' on table '{table_fullname}' with "
|
|
902
1303
|
f"lists: {num_lists}, probes: {self.vector_index.probes}, "
|
|
903
1304
|
f"and distance metric: {index_distance}"
|
|
@@ -923,7 +1324,7 @@ class PgVector(VectorDb):
|
|
|
923
1324
|
# Cast index to HNSW for type hinting
|
|
924
1325
|
self.vector_index = cast(HNSW, self.vector_index)
|
|
925
1326
|
|
|
926
|
-
|
|
1327
|
+
log_debug(
|
|
927
1328
|
f"Creating HNSW index '{self.vector_index.name}' on table '{table_fullname}' with "
|
|
928
1329
|
f"m: {self.vector_index.m}, ef_construction: {self.vector_index.ef_construction}, "
|
|
929
1330
|
f"and distance metric: {index_distance}"
|
|
@@ -949,18 +1350,18 @@ class PgVector(VectorDb):
|
|
|
949
1350
|
gin_index_exists = self._index_exists(gin_index_name)
|
|
950
1351
|
|
|
951
1352
|
if gin_index_exists:
|
|
952
|
-
|
|
1353
|
+
log_info(f"GIN index '{gin_index_name}' already exists.")
|
|
953
1354
|
if force_recreate:
|
|
954
|
-
|
|
1355
|
+
log_info(f"Force recreating GIN index '{gin_index_name}'. Dropping existing index.")
|
|
955
1356
|
self._drop_index(gin_index_name)
|
|
956
1357
|
else:
|
|
957
|
-
|
|
1358
|
+
log_info(f"Skipping GIN index creation as index '{gin_index_name}' already exists.")
|
|
958
1359
|
return
|
|
959
1360
|
|
|
960
1361
|
# Proceed to create GIN index
|
|
961
1362
|
try:
|
|
962
1363
|
with self.Session() as sess, sess.begin():
|
|
963
|
-
|
|
1364
|
+
log_debug(f"Creating GIN index '{gin_index_name}' on table '{self.table.fullname}'.")
|
|
964
1365
|
# Create index
|
|
965
1366
|
create_gin_index_sql = text(
|
|
966
1367
|
f'CREATE INDEX "{gin_index_name}" ON {self.table.fullname} '
|
|
@@ -968,7 +1369,7 @@ class PgVector(VectorDb):
|
|
|
968
1369
|
)
|
|
969
1370
|
sess.execute(create_gin_index_sql)
|
|
970
1371
|
except Exception as e:
|
|
971
|
-
|
|
1372
|
+
log_error(f"Error creating GIN index '{gin_index_name}': {e}")
|
|
972
1373
|
raise
|
|
973
1374
|
|
|
974
1375
|
def delete(self) -> bool:
|
|
@@ -984,10 +1385,90 @@ class PgVector(VectorDb):
|
|
|
984
1385
|
with self.Session() as sess:
|
|
985
1386
|
sess.execute(delete(self.table))
|
|
986
1387
|
sess.commit()
|
|
987
|
-
|
|
1388
|
+
log_info(f"Deleted all records from table '{self.table.fullname}'.")
|
|
1389
|
+
return True
|
|
1390
|
+
except Exception as e:
|
|
1391
|
+
log_error(f"Error deleting rows from table '{self.table.fullname}': {e}")
|
|
1392
|
+
sess.rollback()
|
|
1393
|
+
return False
|
|
1394
|
+
|
|
1395
|
+
def delete_by_id(self, id: str) -> bool:
|
|
1396
|
+
"""
|
|
1397
|
+
Delete content by ID.
|
|
1398
|
+
"""
|
|
1399
|
+
try:
|
|
1400
|
+
with self.Session() as sess, sess.begin():
|
|
1401
|
+
stmt = self.table.delete().where(self.table.c.id == id)
|
|
1402
|
+
sess.execute(stmt)
|
|
1403
|
+
sess.commit()
|
|
1404
|
+
log_info(f"Deleted records with id '{id}' from table '{self.table.fullname}'.")
|
|
1405
|
+
return True
|
|
1406
|
+
except Exception as e:
|
|
1407
|
+
log_error(f"Error deleting rows from table '{self.table.fullname}': {e}")
|
|
1408
|
+
sess.rollback()
|
|
1409
|
+
return False
|
|
1410
|
+
|
|
1411
|
+
def delete_by_name(self, name: str) -> bool:
|
|
1412
|
+
"""
|
|
1413
|
+
Delete content by name.
|
|
1414
|
+
"""
|
|
1415
|
+
try:
|
|
1416
|
+
with self.Session() as sess, sess.begin():
|
|
1417
|
+
stmt = self.table.delete().where(self.table.c.name == name)
|
|
1418
|
+
sess.execute(stmt)
|
|
1419
|
+
sess.commit()
|
|
1420
|
+
log_info(f"Deleted records with name '{name}' from table '{self.table.fullname}'.")
|
|
1421
|
+
return True
|
|
1422
|
+
except Exception as e:
|
|
1423
|
+
log_error(f"Error deleting rows from table '{self.table.fullname}': {e}")
|
|
1424
|
+
sess.rollback()
|
|
1425
|
+
return False
|
|
1426
|
+
|
|
1427
|
+
def delete_by_metadata(self, metadata: Dict[str, Any]) -> bool:
|
|
1428
|
+
"""
|
|
1429
|
+
Delete content by metadata.
|
|
1430
|
+
"""
|
|
1431
|
+
try:
|
|
1432
|
+
with self.Session() as sess, sess.begin():
|
|
1433
|
+
stmt = self.table.delete().where(self.table.c.meta_data.contains(metadata))
|
|
1434
|
+
sess.execute(stmt)
|
|
1435
|
+
sess.commit()
|
|
1436
|
+
log_info(f"Deleted records with metadata '{metadata}' from table '{self.table.fullname}'.")
|
|
1437
|
+
return True
|
|
1438
|
+
except Exception as e:
|
|
1439
|
+
log_error(f"Error deleting rows from table '{self.table.fullname}': {e}")
|
|
1440
|
+
sess.rollback()
|
|
1441
|
+
return False
|
|
1442
|
+
|
|
1443
|
+
def delete_by_content_id(self, content_id: str) -> bool:
|
|
1444
|
+
"""
|
|
1445
|
+
Delete content by content ID.
|
|
1446
|
+
"""
|
|
1447
|
+
try:
|
|
1448
|
+
with self.Session() as sess, sess.begin():
|
|
1449
|
+
stmt = self.table.delete().where(self.table.c.content_id == content_id)
|
|
1450
|
+
sess.execute(stmt)
|
|
1451
|
+
sess.commit()
|
|
1452
|
+
log_info(f"Deleted records with content ID '{content_id}' from table '{self.table.fullname}'.")
|
|
988
1453
|
return True
|
|
989
1454
|
except Exception as e:
|
|
990
|
-
|
|
1455
|
+
log_error(f"Error deleting rows from table '{self.table.fullname}': {e}")
|
|
1456
|
+
sess.rollback()
|
|
1457
|
+
return False
|
|
1458
|
+
|
|
1459
|
+
def _delete_by_content_hash(self, content_hash: str) -> bool:
|
|
1460
|
+
"""
|
|
1461
|
+
Delete content by content hash.
|
|
1462
|
+
"""
|
|
1463
|
+
try:
|
|
1464
|
+
with self.Session() as sess, sess.begin():
|
|
1465
|
+
stmt = self.table.delete().where(self.table.c.content_hash == content_hash)
|
|
1466
|
+
sess.execute(stmt)
|
|
1467
|
+
sess.commit()
|
|
1468
|
+
log_info(f"Deleted records with content hash '{content_hash}' from table '{self.table.fullname}'.")
|
|
1469
|
+
return True
|
|
1470
|
+
except Exception as e:
|
|
1471
|
+
log_error(f"Error deleting rows from table '{self.table.fullname}': {e}")
|
|
991
1472
|
sess.rollback()
|
|
992
1473
|
return False
|
|
993
1474
|
|
|
@@ -1023,3 +1504,6 @@ class PgVector(VectorDb):
|
|
|
1023
1504
|
copied_obj.table = copied_obj.get_table()
|
|
1024
1505
|
|
|
1025
1506
|
return copied_obj
|
|
1507
|
+
|
|
1508
|
+
def get_supported_search_types(self) -> List[str]:
|
|
1509
|
+
return [SearchType.vector, SearchType.keyword, SearchType.hybrid]
|