agno 0.1.2__py3-none-any.whl → 2.3.13__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- agno/__init__.py +8 -0
- agno/agent/__init__.py +44 -5
- agno/agent/agent.py +10531 -2975
- agno/api/agent.py +14 -53
- agno/api/api.py +7 -46
- agno/api/evals.py +22 -0
- agno/api/os.py +17 -0
- agno/api/routes.py +6 -25
- agno/api/schemas/__init__.py +9 -0
- agno/api/schemas/agent.py +6 -9
- agno/api/schemas/evals.py +16 -0
- agno/api/schemas/os.py +14 -0
- agno/api/schemas/team.py +10 -10
- agno/api/schemas/utils.py +21 -0
- agno/api/schemas/workflows.py +16 -0
- agno/api/settings.py +53 -0
- agno/api/team.py +22 -26
- agno/api/workflow.py +28 -0
- agno/cloud/aws/base.py +214 -0
- agno/cloud/aws/s3/__init__.py +2 -0
- agno/cloud/aws/s3/api_client.py +43 -0
- agno/cloud/aws/s3/bucket.py +195 -0
- agno/cloud/aws/s3/object.py +57 -0
- agno/compression/__init__.py +3 -0
- agno/compression/manager.py +247 -0
- agno/culture/__init__.py +3 -0
- agno/culture/manager.py +956 -0
- agno/db/__init__.py +24 -0
- agno/db/async_postgres/__init__.py +3 -0
- agno/db/base.py +946 -0
- agno/db/dynamo/__init__.py +3 -0
- agno/db/dynamo/dynamo.py +2781 -0
- agno/db/dynamo/schemas.py +442 -0
- agno/db/dynamo/utils.py +743 -0
- agno/db/firestore/__init__.py +3 -0
- agno/db/firestore/firestore.py +2379 -0
- agno/db/firestore/schemas.py +181 -0
- agno/db/firestore/utils.py +376 -0
- agno/db/gcs_json/__init__.py +3 -0
- agno/db/gcs_json/gcs_json_db.py +1791 -0
- agno/db/gcs_json/utils.py +228 -0
- agno/db/in_memory/__init__.py +3 -0
- agno/db/in_memory/in_memory_db.py +1312 -0
- agno/db/in_memory/utils.py +230 -0
- agno/db/json/__init__.py +3 -0
- agno/db/json/json_db.py +1777 -0
- agno/db/json/utils.py +230 -0
- agno/db/migrations/manager.py +199 -0
- agno/db/migrations/v1_to_v2.py +635 -0
- agno/db/migrations/versions/v2_3_0.py +938 -0
- agno/db/mongo/__init__.py +17 -0
- agno/db/mongo/async_mongo.py +2760 -0
- agno/db/mongo/mongo.py +2597 -0
- agno/db/mongo/schemas.py +119 -0
- agno/db/mongo/utils.py +276 -0
- agno/db/mysql/__init__.py +4 -0
- agno/db/mysql/async_mysql.py +2912 -0
- agno/db/mysql/mysql.py +2923 -0
- agno/db/mysql/schemas.py +186 -0
- agno/db/mysql/utils.py +488 -0
- agno/db/postgres/__init__.py +4 -0
- agno/db/postgres/async_postgres.py +2579 -0
- agno/db/postgres/postgres.py +2870 -0
- agno/db/postgres/schemas.py +187 -0
- agno/db/postgres/utils.py +442 -0
- agno/db/redis/__init__.py +3 -0
- agno/db/redis/redis.py +2141 -0
- agno/db/redis/schemas.py +159 -0
- agno/db/redis/utils.py +346 -0
- agno/db/schemas/__init__.py +4 -0
- agno/db/schemas/culture.py +120 -0
- agno/db/schemas/evals.py +34 -0
- agno/db/schemas/knowledge.py +40 -0
- agno/db/schemas/memory.py +61 -0
- agno/db/singlestore/__init__.py +3 -0
- agno/db/singlestore/schemas.py +179 -0
- agno/db/singlestore/singlestore.py +2877 -0
- agno/db/singlestore/utils.py +384 -0
- agno/db/sqlite/__init__.py +4 -0
- agno/db/sqlite/async_sqlite.py +2911 -0
- agno/db/sqlite/schemas.py +181 -0
- agno/db/sqlite/sqlite.py +2908 -0
- agno/db/sqlite/utils.py +429 -0
- agno/db/surrealdb/__init__.py +3 -0
- agno/db/surrealdb/metrics.py +292 -0
- agno/db/surrealdb/models.py +334 -0
- agno/db/surrealdb/queries.py +71 -0
- agno/db/surrealdb/surrealdb.py +1908 -0
- agno/db/surrealdb/utils.py +147 -0
- agno/db/utils.py +118 -0
- agno/eval/__init__.py +24 -0
- agno/eval/accuracy.py +666 -276
- agno/eval/agent_as_judge.py +861 -0
- agno/eval/base.py +29 -0
- agno/eval/performance.py +779 -0
- agno/eval/reliability.py +241 -62
- agno/eval/utils.py +120 -0
- agno/exceptions.py +143 -1
- agno/filters.py +354 -0
- agno/guardrails/__init__.py +6 -0
- agno/guardrails/base.py +19 -0
- agno/guardrails/openai.py +144 -0
- agno/guardrails/pii.py +94 -0
- agno/guardrails/prompt_injection.py +52 -0
- agno/hooks/__init__.py +3 -0
- agno/hooks/decorator.py +164 -0
- agno/integrations/discord/__init__.py +3 -0
- agno/integrations/discord/client.py +203 -0
- agno/knowledge/__init__.py +5 -1
- agno/{document → knowledge}/chunking/agentic.py +22 -14
- agno/{document → knowledge}/chunking/document.py +2 -2
- agno/{document → knowledge}/chunking/fixed.py +7 -6
- agno/knowledge/chunking/markdown.py +151 -0
- agno/{document → knowledge}/chunking/recursive.py +15 -3
- agno/knowledge/chunking/row.py +39 -0
- agno/knowledge/chunking/semantic.py +91 -0
- agno/knowledge/chunking/strategy.py +165 -0
- agno/knowledge/content.py +74 -0
- agno/knowledge/document/__init__.py +5 -0
- agno/{document → knowledge/document}/base.py +12 -2
- agno/knowledge/embedder/__init__.py +5 -0
- agno/knowledge/embedder/aws_bedrock.py +343 -0
- agno/knowledge/embedder/azure_openai.py +210 -0
- agno/{embedder → knowledge/embedder}/base.py +8 -0
- agno/knowledge/embedder/cohere.py +323 -0
- agno/knowledge/embedder/fastembed.py +62 -0
- agno/{embedder → knowledge/embedder}/fireworks.py +1 -1
- agno/knowledge/embedder/google.py +258 -0
- agno/knowledge/embedder/huggingface.py +94 -0
- agno/knowledge/embedder/jina.py +182 -0
- agno/knowledge/embedder/langdb.py +22 -0
- agno/knowledge/embedder/mistral.py +206 -0
- agno/knowledge/embedder/nebius.py +13 -0
- agno/knowledge/embedder/ollama.py +154 -0
- agno/knowledge/embedder/openai.py +195 -0
- agno/knowledge/embedder/sentence_transformer.py +63 -0
- agno/{embedder → knowledge/embedder}/together.py +1 -1
- agno/knowledge/embedder/vllm.py +262 -0
- agno/knowledge/embedder/voyageai.py +165 -0
- agno/knowledge/knowledge.py +3006 -0
- agno/knowledge/reader/__init__.py +7 -0
- agno/knowledge/reader/arxiv_reader.py +81 -0
- agno/knowledge/reader/base.py +95 -0
- agno/knowledge/reader/csv_reader.py +164 -0
- agno/knowledge/reader/docx_reader.py +82 -0
- agno/knowledge/reader/field_labeled_csv_reader.py +290 -0
- agno/knowledge/reader/firecrawl_reader.py +201 -0
- agno/knowledge/reader/json_reader.py +88 -0
- agno/knowledge/reader/markdown_reader.py +137 -0
- agno/knowledge/reader/pdf_reader.py +431 -0
- agno/knowledge/reader/pptx_reader.py +101 -0
- agno/knowledge/reader/reader_factory.py +313 -0
- agno/knowledge/reader/s3_reader.py +89 -0
- agno/knowledge/reader/tavily_reader.py +193 -0
- agno/knowledge/reader/text_reader.py +127 -0
- agno/knowledge/reader/web_search_reader.py +325 -0
- agno/knowledge/reader/website_reader.py +455 -0
- agno/knowledge/reader/wikipedia_reader.py +91 -0
- agno/knowledge/reader/youtube_reader.py +78 -0
- agno/knowledge/remote_content/remote_content.py +88 -0
- agno/knowledge/reranker/__init__.py +3 -0
- agno/{reranker → knowledge/reranker}/base.py +1 -1
- agno/{reranker → knowledge/reranker}/cohere.py +2 -2
- agno/knowledge/reranker/infinity.py +195 -0
- agno/knowledge/reranker/sentence_transformer.py +54 -0
- agno/knowledge/types.py +39 -0
- agno/knowledge/utils.py +234 -0
- agno/media.py +439 -95
- agno/memory/__init__.py +16 -3
- agno/memory/manager.py +1474 -123
- agno/memory/strategies/__init__.py +15 -0
- agno/memory/strategies/base.py +66 -0
- agno/memory/strategies/summarize.py +196 -0
- agno/memory/strategies/types.py +37 -0
- agno/models/aimlapi/__init__.py +5 -0
- agno/models/aimlapi/aimlapi.py +62 -0
- agno/models/anthropic/__init__.py +4 -0
- agno/models/anthropic/claude.py +960 -496
- agno/models/aws/__init__.py +15 -0
- agno/models/aws/bedrock.py +686 -451
- agno/models/aws/claude.py +190 -183
- agno/models/azure/__init__.py +18 -1
- agno/models/azure/ai_foundry.py +489 -0
- agno/models/azure/openai_chat.py +89 -40
- agno/models/base.py +2477 -550
- agno/models/cerebras/__init__.py +12 -0
- agno/models/cerebras/cerebras.py +565 -0
- agno/models/cerebras/cerebras_openai.py +131 -0
- agno/models/cohere/__init__.py +4 -0
- agno/models/cohere/chat.py +306 -492
- agno/models/cometapi/__init__.py +5 -0
- agno/models/cometapi/cometapi.py +74 -0
- agno/models/dashscope/__init__.py +5 -0
- agno/models/dashscope/dashscope.py +90 -0
- agno/models/deepinfra/__init__.py +5 -0
- agno/models/deepinfra/deepinfra.py +45 -0
- agno/models/deepseek/__init__.py +4 -0
- agno/models/deepseek/deepseek.py +110 -9
- agno/models/fireworks/__init__.py +4 -0
- agno/models/fireworks/fireworks.py +19 -22
- agno/models/google/__init__.py +3 -7
- agno/models/google/gemini.py +1717 -662
- agno/models/google/utils.py +22 -0
- agno/models/groq/__init__.py +4 -0
- agno/models/groq/groq.py +391 -666
- agno/models/huggingface/__init__.py +4 -0
- agno/models/huggingface/huggingface.py +266 -538
- agno/models/ibm/__init__.py +5 -0
- agno/models/ibm/watsonx.py +432 -0
- agno/models/internlm/__init__.py +3 -0
- agno/models/internlm/internlm.py +20 -3
- agno/models/langdb/__init__.py +1 -0
- agno/models/langdb/langdb.py +60 -0
- agno/models/litellm/__init__.py +14 -0
- agno/models/litellm/chat.py +503 -0
- agno/models/litellm/litellm_openai.py +42 -0
- agno/models/llama_cpp/__init__.py +5 -0
- agno/models/llama_cpp/llama_cpp.py +22 -0
- agno/models/lmstudio/__init__.py +5 -0
- agno/models/lmstudio/lmstudio.py +25 -0
- agno/models/message.py +361 -39
- agno/models/meta/__init__.py +12 -0
- agno/models/meta/llama.py +502 -0
- agno/models/meta/llama_openai.py +79 -0
- agno/models/metrics.py +120 -0
- agno/models/mistral/__init__.py +4 -0
- agno/models/mistral/mistral.py +293 -393
- agno/models/nebius/__init__.py +3 -0
- agno/models/nebius/nebius.py +53 -0
- agno/models/nexus/__init__.py +3 -0
- agno/models/nexus/nexus.py +22 -0
- agno/models/nvidia/__init__.py +4 -0
- agno/models/nvidia/nvidia.py +22 -3
- agno/models/ollama/__init__.py +4 -2
- agno/models/ollama/chat.py +257 -492
- agno/models/openai/__init__.py +7 -0
- agno/models/openai/chat.py +725 -770
- agno/models/openai/like.py +16 -2
- agno/models/openai/responses.py +1121 -0
- agno/models/openrouter/__init__.py +4 -0
- agno/models/openrouter/openrouter.py +62 -5
- agno/models/perplexity/__init__.py +5 -0
- agno/models/perplexity/perplexity.py +203 -0
- agno/models/portkey/__init__.py +3 -0
- agno/models/portkey/portkey.py +82 -0
- agno/models/requesty/__init__.py +5 -0
- agno/models/requesty/requesty.py +69 -0
- agno/models/response.py +177 -7
- agno/models/sambanova/__init__.py +4 -0
- agno/models/sambanova/sambanova.py +23 -4
- agno/models/siliconflow/__init__.py +5 -0
- agno/models/siliconflow/siliconflow.py +42 -0
- agno/models/together/__init__.py +4 -0
- agno/models/together/together.py +21 -164
- agno/models/utils.py +266 -0
- agno/models/vercel/__init__.py +3 -0
- agno/models/vercel/v0.py +43 -0
- agno/models/vertexai/__init__.py +0 -1
- agno/models/vertexai/claude.py +190 -0
- agno/models/vllm/__init__.py +3 -0
- agno/models/vllm/vllm.py +83 -0
- agno/models/xai/__init__.py +2 -0
- agno/models/xai/xai.py +111 -7
- agno/os/__init__.py +3 -0
- agno/os/app.py +1027 -0
- agno/os/auth.py +244 -0
- agno/os/config.py +126 -0
- agno/os/interfaces/__init__.py +1 -0
- agno/os/interfaces/a2a/__init__.py +3 -0
- agno/os/interfaces/a2a/a2a.py +42 -0
- agno/os/interfaces/a2a/router.py +249 -0
- agno/os/interfaces/a2a/utils.py +924 -0
- agno/os/interfaces/agui/__init__.py +3 -0
- agno/os/interfaces/agui/agui.py +47 -0
- agno/os/interfaces/agui/router.py +147 -0
- agno/os/interfaces/agui/utils.py +574 -0
- agno/os/interfaces/base.py +25 -0
- agno/os/interfaces/slack/__init__.py +3 -0
- agno/os/interfaces/slack/router.py +148 -0
- agno/os/interfaces/slack/security.py +30 -0
- agno/os/interfaces/slack/slack.py +47 -0
- agno/os/interfaces/whatsapp/__init__.py +3 -0
- agno/os/interfaces/whatsapp/router.py +210 -0
- agno/os/interfaces/whatsapp/security.py +55 -0
- agno/os/interfaces/whatsapp/whatsapp.py +36 -0
- agno/os/mcp.py +293 -0
- agno/os/middleware/__init__.py +9 -0
- agno/os/middleware/jwt.py +797 -0
- agno/os/router.py +258 -0
- agno/os/routers/__init__.py +3 -0
- agno/os/routers/agents/__init__.py +3 -0
- agno/os/routers/agents/router.py +599 -0
- agno/os/routers/agents/schema.py +261 -0
- agno/os/routers/evals/__init__.py +3 -0
- agno/os/routers/evals/evals.py +450 -0
- agno/os/routers/evals/schemas.py +174 -0
- agno/os/routers/evals/utils.py +231 -0
- agno/os/routers/health.py +31 -0
- agno/os/routers/home.py +52 -0
- agno/os/routers/knowledge/__init__.py +3 -0
- agno/os/routers/knowledge/knowledge.py +1008 -0
- agno/os/routers/knowledge/schemas.py +178 -0
- agno/os/routers/memory/__init__.py +3 -0
- agno/os/routers/memory/memory.py +661 -0
- agno/os/routers/memory/schemas.py +88 -0
- agno/os/routers/metrics/__init__.py +3 -0
- agno/os/routers/metrics/metrics.py +190 -0
- agno/os/routers/metrics/schemas.py +47 -0
- agno/os/routers/session/__init__.py +3 -0
- agno/os/routers/session/session.py +997 -0
- agno/os/routers/teams/__init__.py +3 -0
- agno/os/routers/teams/router.py +512 -0
- agno/os/routers/teams/schema.py +257 -0
- agno/os/routers/traces/__init__.py +3 -0
- agno/os/routers/traces/schemas.py +414 -0
- agno/os/routers/traces/traces.py +499 -0
- agno/os/routers/workflows/__init__.py +3 -0
- agno/os/routers/workflows/router.py +624 -0
- agno/os/routers/workflows/schema.py +75 -0
- agno/os/schema.py +534 -0
- agno/os/scopes.py +469 -0
- agno/{playground → os}/settings.py +7 -15
- agno/os/utils.py +973 -0
- agno/reasoning/anthropic.py +80 -0
- agno/reasoning/azure_ai_foundry.py +67 -0
- agno/reasoning/deepseek.py +63 -0
- agno/reasoning/default.py +97 -0
- agno/reasoning/gemini.py +73 -0
- agno/reasoning/groq.py +71 -0
- agno/reasoning/helpers.py +24 -1
- agno/reasoning/ollama.py +67 -0
- agno/reasoning/openai.py +86 -0
- agno/reasoning/step.py +2 -1
- agno/reasoning/vertexai.py +76 -0
- agno/run/__init__.py +6 -0
- agno/run/agent.py +822 -0
- agno/run/base.py +247 -0
- agno/run/cancel.py +81 -0
- agno/run/requirement.py +181 -0
- agno/run/team.py +767 -0
- agno/run/workflow.py +708 -0
- agno/session/__init__.py +10 -0
- agno/session/agent.py +260 -0
- agno/session/summary.py +265 -0
- agno/session/team.py +342 -0
- agno/session/workflow.py +501 -0
- agno/table.py +10 -0
- agno/team/__init__.py +37 -0
- agno/team/team.py +9536 -0
- agno/tools/__init__.py +7 -0
- agno/tools/agentql.py +120 -0
- agno/tools/airflow.py +22 -12
- agno/tools/api.py +122 -0
- agno/tools/apify.py +276 -83
- agno/tools/{arxiv_toolkit.py → arxiv.py} +20 -12
- agno/tools/aws_lambda.py +28 -7
- agno/tools/aws_ses.py +66 -0
- agno/tools/baidusearch.py +11 -4
- agno/tools/bitbucket.py +292 -0
- agno/tools/brandfetch.py +213 -0
- agno/tools/bravesearch.py +106 -0
- agno/tools/brightdata.py +367 -0
- agno/tools/browserbase.py +209 -0
- agno/tools/calcom.py +32 -23
- agno/tools/calculator.py +24 -37
- agno/tools/cartesia.py +187 -0
- agno/tools/{clickup_tool.py → clickup.py} +17 -28
- agno/tools/confluence.py +91 -26
- agno/tools/crawl4ai.py +139 -43
- agno/tools/csv_toolkit.py +28 -22
- agno/tools/dalle.py +36 -22
- agno/tools/daytona.py +475 -0
- agno/tools/decorator.py +169 -14
- agno/tools/desi_vocal.py +23 -11
- agno/tools/discord.py +32 -29
- agno/tools/docker.py +716 -0
- agno/tools/duckdb.py +76 -81
- agno/tools/duckduckgo.py +43 -40
- agno/tools/e2b.py +703 -0
- agno/tools/eleven_labs.py +65 -54
- agno/tools/email.py +13 -5
- agno/tools/evm.py +129 -0
- agno/tools/exa.py +324 -42
- agno/tools/fal.py +39 -35
- agno/tools/file.py +196 -30
- agno/tools/file_generation.py +356 -0
- agno/tools/financial_datasets.py +288 -0
- agno/tools/firecrawl.py +108 -33
- agno/tools/function.py +960 -122
- agno/tools/giphy.py +34 -12
- agno/tools/github.py +1294 -97
- agno/tools/gmail.py +922 -0
- agno/tools/google_bigquery.py +117 -0
- agno/tools/google_drive.py +271 -0
- agno/tools/google_maps.py +253 -0
- agno/tools/googlecalendar.py +607 -107
- agno/tools/googlesheets.py +377 -0
- agno/tools/hackernews.py +20 -12
- agno/tools/jina.py +24 -14
- agno/tools/jira.py +48 -19
- agno/tools/knowledge.py +218 -0
- agno/tools/linear.py +82 -43
- agno/tools/linkup.py +58 -0
- agno/tools/local_file_system.py +15 -7
- agno/tools/lumalab.py +41 -26
- agno/tools/mcp/__init__.py +10 -0
- agno/tools/mcp/mcp.py +331 -0
- agno/tools/mcp/multi_mcp.py +347 -0
- agno/tools/mcp/params.py +24 -0
- agno/tools/mcp_toolbox.py +284 -0
- agno/tools/mem0.py +193 -0
- agno/tools/memory.py +419 -0
- agno/tools/mlx_transcribe.py +11 -9
- agno/tools/models/azure_openai.py +190 -0
- agno/tools/models/gemini.py +203 -0
- agno/tools/models/groq.py +158 -0
- agno/tools/models/morph.py +186 -0
- agno/tools/models/nebius.py +124 -0
- agno/tools/models_labs.py +163 -82
- agno/tools/moviepy_video.py +18 -13
- agno/tools/nano_banana.py +151 -0
- agno/tools/neo4j.py +134 -0
- agno/tools/newspaper.py +15 -4
- agno/tools/newspaper4k.py +19 -6
- agno/tools/notion.py +204 -0
- agno/tools/openai.py +181 -17
- agno/tools/openbb.py +27 -20
- agno/tools/opencv.py +321 -0
- agno/tools/openweather.py +233 -0
- agno/tools/oxylabs.py +385 -0
- agno/tools/pandas.py +25 -15
- agno/tools/parallel.py +314 -0
- agno/tools/postgres.py +238 -185
- agno/tools/pubmed.py +125 -13
- agno/tools/python.py +48 -35
- agno/tools/reasoning.py +283 -0
- agno/tools/reddit.py +207 -29
- agno/tools/redshift.py +406 -0
- agno/tools/replicate.py +69 -26
- agno/tools/resend.py +11 -6
- agno/tools/scrapegraph.py +179 -19
- agno/tools/searxng.py +23 -31
- agno/tools/serpapi.py +15 -10
- agno/tools/serper.py +255 -0
- agno/tools/shell.py +23 -12
- agno/tools/shopify.py +1519 -0
- agno/tools/slack.py +56 -14
- agno/tools/sleep.py +8 -6
- agno/tools/spider.py +35 -11
- agno/tools/spotify.py +919 -0
- agno/tools/sql.py +34 -19
- agno/tools/tavily.py +158 -8
- agno/tools/telegram.py +18 -8
- agno/tools/todoist.py +218 -0
- agno/tools/toolkit.py +134 -9
- agno/tools/trafilatura.py +388 -0
- agno/tools/trello.py +25 -28
- agno/tools/twilio.py +18 -9
- agno/tools/user_control_flow.py +78 -0
- agno/tools/valyu.py +228 -0
- agno/tools/visualization.py +467 -0
- agno/tools/webbrowser.py +28 -0
- agno/tools/webex.py +76 -0
- agno/tools/website.py +23 -19
- agno/tools/webtools.py +45 -0
- agno/tools/whatsapp.py +286 -0
- agno/tools/wikipedia.py +28 -19
- agno/tools/workflow.py +285 -0
- agno/tools/{twitter.py → x.py} +142 -46
- agno/tools/yfinance.py +41 -39
- agno/tools/youtube.py +34 -17
- agno/tools/zendesk.py +15 -5
- agno/tools/zep.py +454 -0
- agno/tools/zoom.py +86 -37
- agno/tracing/__init__.py +12 -0
- agno/tracing/exporter.py +157 -0
- agno/tracing/schemas.py +276 -0
- agno/tracing/setup.py +111 -0
- agno/utils/agent.py +938 -0
- agno/utils/audio.py +37 -1
- agno/utils/certs.py +27 -0
- agno/utils/code_execution.py +11 -0
- agno/utils/common.py +103 -20
- agno/utils/cryptography.py +22 -0
- agno/utils/dttm.py +33 -0
- agno/utils/events.py +700 -0
- agno/utils/functions.py +107 -37
- agno/utils/gemini.py +426 -0
- agno/utils/hooks.py +171 -0
- agno/utils/http.py +185 -0
- agno/utils/json_schema.py +159 -37
- agno/utils/knowledge.py +36 -0
- agno/utils/location.py +19 -0
- agno/utils/log.py +221 -8
- agno/utils/mcp.py +214 -0
- agno/utils/media.py +335 -14
- agno/utils/merge_dict.py +22 -1
- agno/utils/message.py +77 -2
- agno/utils/models/ai_foundry.py +50 -0
- agno/utils/models/claude.py +373 -0
- agno/utils/models/cohere.py +94 -0
- agno/utils/models/llama.py +85 -0
- agno/utils/models/mistral.py +100 -0
- agno/utils/models/openai_responses.py +140 -0
- agno/utils/models/schema_utils.py +153 -0
- agno/utils/models/watsonx.py +41 -0
- agno/utils/openai.py +257 -0
- agno/utils/pickle.py +1 -1
- agno/utils/pprint.py +124 -8
- agno/utils/print_response/agent.py +930 -0
- agno/utils/print_response/team.py +1914 -0
- agno/utils/print_response/workflow.py +1668 -0
- agno/utils/prompts.py +111 -0
- agno/utils/reasoning.py +108 -0
- agno/utils/response.py +163 -0
- agno/utils/serialize.py +32 -0
- agno/utils/shell.py +4 -4
- agno/utils/streamlit.py +487 -0
- agno/utils/string.py +204 -51
- agno/utils/team.py +139 -0
- agno/utils/timer.py +9 -2
- agno/utils/tokens.py +657 -0
- agno/utils/tools.py +19 -1
- agno/utils/whatsapp.py +305 -0
- agno/utils/yaml_io.py +3 -3
- agno/vectordb/__init__.py +2 -0
- agno/vectordb/base.py +87 -9
- agno/vectordb/cassandra/__init__.py +5 -1
- agno/vectordb/cassandra/cassandra.py +383 -27
- agno/vectordb/chroma/__init__.py +4 -0
- agno/vectordb/chroma/chromadb.py +748 -83
- agno/vectordb/clickhouse/__init__.py +7 -1
- agno/vectordb/clickhouse/clickhousedb.py +554 -53
- agno/vectordb/couchbase/__init__.py +3 -0
- agno/vectordb/couchbase/couchbase.py +1446 -0
- agno/vectordb/lancedb/__init__.py +5 -0
- agno/vectordb/lancedb/lance_db.py +730 -98
- agno/vectordb/langchaindb/__init__.py +5 -0
- agno/vectordb/langchaindb/langchaindb.py +163 -0
- agno/vectordb/lightrag/__init__.py +5 -0
- agno/vectordb/lightrag/lightrag.py +388 -0
- agno/vectordb/llamaindex/__init__.py +3 -0
- agno/vectordb/llamaindex/llamaindexdb.py +166 -0
- agno/vectordb/milvus/__init__.py +3 -0
- agno/vectordb/milvus/milvus.py +966 -78
- agno/vectordb/mongodb/__init__.py +9 -1
- agno/vectordb/mongodb/mongodb.py +1175 -172
- agno/vectordb/pgvector/__init__.py +8 -0
- agno/vectordb/pgvector/pgvector.py +599 -115
- agno/vectordb/pineconedb/__init__.py +5 -1
- agno/vectordb/pineconedb/pineconedb.py +406 -43
- agno/vectordb/qdrant/__init__.py +4 -0
- agno/vectordb/qdrant/qdrant.py +914 -61
- agno/vectordb/redis/__init__.py +9 -0
- agno/vectordb/redis/redisdb.py +682 -0
- agno/vectordb/singlestore/__init__.py +8 -1
- agno/vectordb/singlestore/singlestore.py +771 -0
- agno/vectordb/surrealdb/__init__.py +3 -0
- agno/vectordb/surrealdb/surrealdb.py +663 -0
- agno/vectordb/upstashdb/__init__.py +5 -0
- agno/vectordb/upstashdb/upstashdb.py +718 -0
- agno/vectordb/weaviate/__init__.py +8 -0
- agno/vectordb/weaviate/index.py +15 -0
- agno/vectordb/weaviate/weaviate.py +1009 -0
- agno/workflow/__init__.py +23 -1
- agno/workflow/agent.py +299 -0
- agno/workflow/condition.py +759 -0
- agno/workflow/loop.py +756 -0
- agno/workflow/parallel.py +853 -0
- agno/workflow/router.py +723 -0
- agno/workflow/step.py +1564 -0
- agno/workflow/steps.py +613 -0
- agno/workflow/types.py +556 -0
- agno/workflow/workflow.py +4327 -514
- agno-2.3.13.dist-info/METADATA +639 -0
- agno-2.3.13.dist-info/RECORD +613 -0
- {agno-0.1.2.dist-info → agno-2.3.13.dist-info}/WHEEL +1 -1
- agno-2.3.13.dist-info/licenses/LICENSE +201 -0
- agno/api/playground.py +0 -91
- agno/api/schemas/playground.py +0 -22
- agno/api/schemas/user.py +0 -22
- agno/api/schemas/workspace.py +0 -46
- agno/api/user.py +0 -160
- agno/api/workspace.py +0 -151
- agno/cli/auth_server.py +0 -118
- agno/cli/config.py +0 -275
- agno/cli/console.py +0 -88
- agno/cli/credentials.py +0 -23
- agno/cli/entrypoint.py +0 -571
- agno/cli/operator.py +0 -355
- agno/cli/settings.py +0 -85
- agno/cli/ws/ws_cli.py +0 -817
- agno/constants.py +0 -13
- agno/document/__init__.py +0 -1
- agno/document/chunking/semantic.py +0 -47
- agno/document/chunking/strategy.py +0 -31
- agno/document/reader/__init__.py +0 -1
- agno/document/reader/arxiv_reader.py +0 -41
- agno/document/reader/base.py +0 -22
- agno/document/reader/csv_reader.py +0 -84
- agno/document/reader/docx_reader.py +0 -46
- agno/document/reader/firecrawl_reader.py +0 -99
- agno/document/reader/json_reader.py +0 -43
- agno/document/reader/pdf_reader.py +0 -219
- agno/document/reader/s3/pdf_reader.py +0 -46
- agno/document/reader/s3/text_reader.py +0 -51
- agno/document/reader/text_reader.py +0 -41
- agno/document/reader/website_reader.py +0 -175
- agno/document/reader/youtube_reader.py +0 -50
- agno/embedder/__init__.py +0 -1
- agno/embedder/azure_openai.py +0 -86
- agno/embedder/cohere.py +0 -72
- agno/embedder/fastembed.py +0 -37
- agno/embedder/google.py +0 -73
- agno/embedder/huggingface.py +0 -54
- agno/embedder/mistral.py +0 -80
- agno/embedder/ollama.py +0 -57
- agno/embedder/openai.py +0 -74
- agno/embedder/sentence_transformer.py +0 -38
- agno/embedder/voyageai.py +0 -64
- agno/eval/perf.py +0 -201
- agno/file/__init__.py +0 -1
- agno/file/file.py +0 -16
- agno/file/local/csv.py +0 -32
- agno/file/local/txt.py +0 -19
- agno/infra/app.py +0 -240
- agno/infra/base.py +0 -144
- agno/infra/context.py +0 -20
- agno/infra/db_app.py +0 -52
- agno/infra/resource.py +0 -205
- agno/infra/resources.py +0 -55
- agno/knowledge/agent.py +0 -230
- agno/knowledge/arxiv.py +0 -22
- agno/knowledge/combined.py +0 -22
- agno/knowledge/csv.py +0 -28
- agno/knowledge/csv_url.py +0 -19
- agno/knowledge/document.py +0 -20
- agno/knowledge/docx.py +0 -30
- agno/knowledge/json.py +0 -28
- agno/knowledge/langchain.py +0 -71
- agno/knowledge/llamaindex.py +0 -66
- agno/knowledge/pdf.py +0 -28
- agno/knowledge/pdf_url.py +0 -26
- agno/knowledge/s3/base.py +0 -60
- agno/knowledge/s3/pdf.py +0 -21
- agno/knowledge/s3/text.py +0 -23
- agno/knowledge/text.py +0 -30
- agno/knowledge/website.py +0 -88
- agno/knowledge/wikipedia.py +0 -31
- agno/knowledge/youtube.py +0 -22
- agno/memory/agent.py +0 -392
- agno/memory/classifier.py +0 -104
- agno/memory/db/__init__.py +0 -1
- agno/memory/db/base.py +0 -42
- agno/memory/db/mongodb.py +0 -189
- agno/memory/db/postgres.py +0 -203
- agno/memory/db/sqlite.py +0 -193
- agno/memory/memory.py +0 -15
- agno/memory/row.py +0 -36
- agno/memory/summarizer.py +0 -192
- agno/memory/summary.py +0 -19
- agno/memory/workflow.py +0 -38
- agno/models/google/gemini_openai.py +0 -26
- agno/models/ollama/hermes.py +0 -221
- agno/models/ollama/tools.py +0 -362
- agno/models/vertexai/gemini.py +0 -595
- agno/playground/__init__.py +0 -3
- agno/playground/async_router.py +0 -421
- agno/playground/deploy.py +0 -249
- agno/playground/operator.py +0 -92
- agno/playground/playground.py +0 -91
- agno/playground/schemas.py +0 -76
- agno/playground/serve.py +0 -55
- agno/playground/sync_router.py +0 -405
- agno/reasoning/agent.py +0 -68
- agno/run/response.py +0 -112
- agno/storage/agent/__init__.py +0 -0
- agno/storage/agent/base.py +0 -38
- agno/storage/agent/dynamodb.py +0 -350
- agno/storage/agent/json.py +0 -92
- agno/storage/agent/mongodb.py +0 -228
- agno/storage/agent/postgres.py +0 -367
- agno/storage/agent/session.py +0 -79
- agno/storage/agent/singlestore.py +0 -303
- agno/storage/agent/sqlite.py +0 -357
- agno/storage/agent/yaml.py +0 -93
- agno/storage/workflow/__init__.py +0 -0
- agno/storage/workflow/base.py +0 -40
- agno/storage/workflow/mongodb.py +0 -233
- agno/storage/workflow/postgres.py +0 -366
- agno/storage/workflow/session.py +0 -60
- agno/storage/workflow/sqlite.py +0 -359
- agno/tools/googlesearch.py +0 -88
- agno/utils/defaults.py +0 -57
- agno/utils/filesystem.py +0 -39
- agno/utils/git.py +0 -52
- agno/utils/json_io.py +0 -30
- agno/utils/load_env.py +0 -19
- agno/utils/py_io.py +0 -19
- agno/utils/pyproject.py +0 -18
- agno/utils/resource_filter.py +0 -31
- agno/vectordb/singlestore/s2vectordb.py +0 -390
- agno/vectordb/singlestore/s2vectordb2.py +0 -355
- agno/workspace/__init__.py +0 -0
- agno/workspace/config.py +0 -325
- agno/workspace/enums.py +0 -6
- agno/workspace/helpers.py +0 -48
- agno/workspace/operator.py +0 -758
- agno/workspace/settings.py +0 -63
- agno-0.1.2.dist-info/LICENSE +0 -375
- agno-0.1.2.dist-info/METADATA +0 -502
- agno-0.1.2.dist-info/RECORD +0 -352
- agno-0.1.2.dist-info/entry_points.txt +0 -3
- /agno/{cli → db/migrations}/__init__.py +0 -0
- /agno/{cli/ws → db/migrations/versions}/__init__.py +0 -0
- /agno/{document/chunking/__init__.py → db/schemas/metrics.py} +0 -0
- /agno/{document/reader/s3 → integrations}/__init__.py +0 -0
- /agno/{file/local → knowledge/chunking}/__init__.py +0 -0
- /agno/{infra → knowledge/remote_content}/__init__.py +0 -0
- /agno/{knowledge/s3 → tools/models}/__init__.py +0 -0
- /agno/{reranker → utils/models}/__init__.py +0 -0
- /agno/{storage → utils/print_response}/__init__.py +0 -0
- {agno-0.1.2.dist-info → agno-2.3.13.dist-info}/top_level.txt +0 -0
|
@@ -1,26 +1,31 @@
|
|
|
1
|
+
import asyncio
|
|
1
2
|
from hashlib import md5
|
|
2
|
-
from typing import Any, Dict, List, Optional
|
|
3
|
+
from typing import Any, Dict, List, Optional, Union
|
|
3
4
|
|
|
4
5
|
from agno.vectordb.clickhouse.index import HNSW
|
|
5
6
|
|
|
6
7
|
try:
|
|
7
8
|
import clickhouse_connect
|
|
9
|
+
import clickhouse_connect.driver.asyncclient
|
|
8
10
|
import clickhouse_connect.driver.client
|
|
9
11
|
except ImportError:
|
|
10
12
|
raise ImportError("`clickhouse-connect` not installed. Use `pip install clickhouse-connect` to install it")
|
|
11
13
|
|
|
12
|
-
from agno.
|
|
13
|
-
from agno.
|
|
14
|
-
from agno.
|
|
14
|
+
from agno.filters import FilterExpr
|
|
15
|
+
from agno.knowledge.document import Document
|
|
16
|
+
from agno.knowledge.embedder import Embedder
|
|
17
|
+
from agno.utils.log import log_debug, log_info, log_warning, logger
|
|
15
18
|
from agno.vectordb.base import VectorDb
|
|
16
19
|
from agno.vectordb.distance import Distance
|
|
17
20
|
|
|
18
21
|
|
|
19
|
-
class
|
|
22
|
+
class Clickhouse(VectorDb):
|
|
20
23
|
def __init__(
|
|
21
24
|
self,
|
|
22
25
|
table_name: str,
|
|
23
26
|
host: str,
|
|
27
|
+
name: Optional[str] = None,
|
|
28
|
+
description: Optional[str] = None,
|
|
24
29
|
username: Optional[str] = None,
|
|
25
30
|
password: str = "",
|
|
26
31
|
port: int = 0,
|
|
@@ -28,32 +33,45 @@ class ClickhouseDb(VectorDb):
|
|
|
28
33
|
dsn: Optional[str] = None,
|
|
29
34
|
compress: str = "lz4",
|
|
30
35
|
client: Optional[clickhouse_connect.driver.client.Client] = None,
|
|
36
|
+
asyncclient: Optional[clickhouse_connect.driver.asyncclient.AsyncClient] = None,
|
|
31
37
|
embedder: Optional[Embedder] = None,
|
|
32
38
|
distance: Distance = Distance.cosine,
|
|
33
39
|
index: Optional[HNSW] = HNSW(),
|
|
34
40
|
):
|
|
41
|
+
# Store connection parameters as instance attributes
|
|
42
|
+
self.host = host
|
|
43
|
+
self.username = username
|
|
44
|
+
self.password = password
|
|
45
|
+
self.port = port
|
|
46
|
+
self.dsn = dsn
|
|
47
|
+
# Initialize base class with name and description
|
|
48
|
+
super().__init__(name=name, description=description)
|
|
49
|
+
|
|
50
|
+
self.compress = compress
|
|
51
|
+
self.database_name = database_name
|
|
35
52
|
if not client:
|
|
36
53
|
client = clickhouse_connect.get_client(
|
|
37
|
-
host=host,
|
|
38
|
-
username=username, # type: ignore
|
|
39
|
-
password=password,
|
|
40
|
-
database=database_name,
|
|
41
|
-
port=port,
|
|
42
|
-
dsn=dsn,
|
|
43
|
-
compress=compress,
|
|
54
|
+
host=self.host,
|
|
55
|
+
username=self.username, # type: ignore
|
|
56
|
+
password=self.password,
|
|
57
|
+
database=self.database_name,
|
|
58
|
+
port=self.port,
|
|
59
|
+
dsn=self.dsn,
|
|
60
|
+
compress=self.compress,
|
|
44
61
|
)
|
|
45
62
|
|
|
46
63
|
# Database attributes
|
|
47
64
|
self.client = client
|
|
48
|
-
self.
|
|
65
|
+
self.async_client = asyncclient
|
|
49
66
|
self.table_name = table_name
|
|
50
67
|
|
|
51
68
|
# Embedder for embedding the document contents
|
|
52
69
|
_embedder = embedder
|
|
53
70
|
if _embedder is None:
|
|
54
|
-
from agno.embedder.openai import OpenAIEmbedder
|
|
71
|
+
from agno.knowledge.embedder.openai import OpenAIEmbedder
|
|
55
72
|
|
|
56
73
|
_embedder = OpenAIEmbedder()
|
|
74
|
+
log_info("Embedder not provided, using OpenAIEmbedder as default.")
|
|
57
75
|
self.embedder: Embedder = _embedder
|
|
58
76
|
self.dimensions: Optional[int] = self.embedder.dimensions
|
|
59
77
|
|
|
@@ -63,6 +81,21 @@ class ClickhouseDb(VectorDb):
|
|
|
63
81
|
# Index for the collection
|
|
64
82
|
self.index: Optional[HNSW] = index
|
|
65
83
|
|
|
84
|
+
async def _ensure_async_client(self):
|
|
85
|
+
"""Ensure we have an initialized async client."""
|
|
86
|
+
if self.async_client is None:
|
|
87
|
+
self.async_client = await clickhouse_connect.get_async_client(
|
|
88
|
+
host=self.host,
|
|
89
|
+
username=self.username, # type: ignore
|
|
90
|
+
password=self.password,
|
|
91
|
+
database=self.database_name,
|
|
92
|
+
port=self.port,
|
|
93
|
+
dsn=self.dsn,
|
|
94
|
+
compress=self.compress,
|
|
95
|
+
settings={"allow_experimental_vector_similarity_index": 1},
|
|
96
|
+
)
|
|
97
|
+
return self.async_client
|
|
98
|
+
|
|
66
99
|
def _get_base_parameters(self) -> Dict[str, Any]:
|
|
67
100
|
return {
|
|
68
101
|
"table_name": self.table_name,
|
|
@@ -70,7 +103,7 @@ class ClickhouseDb(VectorDb):
|
|
|
70
103
|
}
|
|
71
104
|
|
|
72
105
|
def table_exists(self) -> bool:
|
|
73
|
-
|
|
106
|
+
log_debug(f"Checking if table exists: {self.table_name}")
|
|
74
107
|
try:
|
|
75
108
|
parameters = self._get_base_parameters()
|
|
76
109
|
return bool(
|
|
@@ -83,22 +116,38 @@ class ClickhouseDb(VectorDb):
|
|
|
83
116
|
logger.error(e)
|
|
84
117
|
return False
|
|
85
118
|
|
|
119
|
+
async def async_table_exists(self) -> bool:
|
|
120
|
+
"""Check if a table exists asynchronously."""
|
|
121
|
+
log_debug(f"Async checking if table exists: {self.table_name}")
|
|
122
|
+
try:
|
|
123
|
+
async_client = await self._ensure_async_client()
|
|
124
|
+
|
|
125
|
+
parameters = self._get_base_parameters()
|
|
126
|
+
result = await async_client.command(
|
|
127
|
+
"EXISTS TABLE {database_name:Identifier}.{table_name:Identifier}",
|
|
128
|
+
parameters=parameters,
|
|
129
|
+
)
|
|
130
|
+
return bool(result)
|
|
131
|
+
except Exception as e:
|
|
132
|
+
logger.error(f"Async error checking if table exists: {e}")
|
|
133
|
+
return False
|
|
134
|
+
|
|
86
135
|
def create(self) -> None:
|
|
87
136
|
if not self.table_exists():
|
|
88
|
-
|
|
137
|
+
log_debug(f"Creating Database: {self.database_name}")
|
|
89
138
|
parameters = {"database_name": self.database_name}
|
|
90
139
|
self.client.command(
|
|
91
140
|
"CREATE DATABASE IF NOT EXISTS {database_name:Identifier}",
|
|
92
141
|
parameters=parameters,
|
|
93
142
|
)
|
|
94
143
|
|
|
95
|
-
|
|
144
|
+
log_debug(f"Creating table: {self.table_name}")
|
|
96
145
|
|
|
97
146
|
parameters = self._get_base_parameters()
|
|
98
147
|
|
|
99
148
|
if isinstance(self.index, HNSW):
|
|
100
149
|
index = (
|
|
101
|
-
f"INDEX embedding_index embedding TYPE vector_similarity('hnsw', 'L2Distance', {self.index.quantization}, "
|
|
150
|
+
f"INDEX embedding_index embedding TYPE vector_similarity('hnsw', 'L2Distance', {self.embedder.dimensions}, {self.index.quantization}, "
|
|
102
151
|
f"{self.index.hnsw_max_connections_per_layer}, {self.index.hnsw_candidate_list_size_for_construction})"
|
|
103
152
|
)
|
|
104
153
|
self.client.command("SET allow_experimental_vector_similarity_index = 1")
|
|
@@ -115,6 +164,7 @@ class ClickhouseDb(VectorDb):
|
|
|
115
164
|
meta_data JSON DEFAULT '{{}}',
|
|
116
165
|
filters JSON DEFAULT '{{}}',
|
|
117
166
|
content String,
|
|
167
|
+
content_id String,
|
|
118
168
|
embedding Array(Float32),
|
|
119
169
|
usage JSON,
|
|
120
170
|
created_at DateTime('UTC') DEFAULT now(),
|
|
@@ -125,22 +175,50 @@ class ClickhouseDb(VectorDb):
|
|
|
125
175
|
parameters=parameters,
|
|
126
176
|
)
|
|
127
177
|
|
|
128
|
-
def
|
|
129
|
-
"""
|
|
130
|
-
|
|
178
|
+
async def async_create(self) -> None:
|
|
179
|
+
"""Create database and table asynchronously."""
|
|
180
|
+
if not await self.async_table_exists():
|
|
181
|
+
log_debug(f"Async creating Database: {self.database_name}")
|
|
182
|
+
async_client = await self._ensure_async_client()
|
|
131
183
|
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
parameters["content_hash"] = md5(cleaned_content.encode()).hexdigest()
|
|
184
|
+
parameters = {"database_name": self.database_name}
|
|
185
|
+
await async_client.command(
|
|
186
|
+
"CREATE DATABASE IF NOT EXISTS {database_name:Identifier}",
|
|
187
|
+
parameters=parameters,
|
|
188
|
+
)
|
|
138
189
|
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
190
|
+
log_debug(f"Async creating table: {self.table_name}")
|
|
191
|
+
parameters = self._get_base_parameters()
|
|
192
|
+
|
|
193
|
+
if isinstance(self.index, HNSW):
|
|
194
|
+
index = (
|
|
195
|
+
f"INDEX embedding_index embedding TYPE vector_similarity('hnsw', 'L2Distance', {self.index.quantization}, "
|
|
196
|
+
f"{self.index.hnsw_max_connections_per_layer}, {self.index.hnsw_candidate_list_size_for_construction})"
|
|
197
|
+
)
|
|
198
|
+
await async_client.command("SET allow_experimental_vector_similarity_index = 1")
|
|
199
|
+
else:
|
|
200
|
+
raise NotImplementedError(f"Not implemented index {type(self.index)!r} is passed")
|
|
201
|
+
|
|
202
|
+
await self.async_client.command("SET enable_json_type = 1") # type: ignore
|
|
203
|
+
|
|
204
|
+
await self.async_client.command( # type: ignore
|
|
205
|
+
f"""CREATE TABLE IF NOT EXISTS {{database_name:Identifier}}.{{table_name:Identifier}}
|
|
206
|
+
(
|
|
207
|
+
id String,
|
|
208
|
+
name String,
|
|
209
|
+
meta_data JSON DEFAULT '{{}}',
|
|
210
|
+
filters JSON DEFAULT '{{}}',
|
|
211
|
+
content String,
|
|
212
|
+
content_id String,
|
|
213
|
+
embedding Array(Float32),
|
|
214
|
+
usage JSON,
|
|
215
|
+
created_at DateTime('UTC') DEFAULT now(),
|
|
216
|
+
content_hash String,
|
|
217
|
+
PRIMARY KEY (id),
|
|
218
|
+
{index}
|
|
219
|
+
) ENGINE = ReplacingMergeTree ORDER BY id""",
|
|
220
|
+
parameters=parameters,
|
|
221
|
+
)
|
|
144
222
|
|
|
145
223
|
def name_exists(self, name: str) -> bool:
|
|
146
224
|
"""
|
|
@@ -156,7 +234,20 @@ class ClickhouseDb(VectorDb):
|
|
|
156
234
|
"SELECT name FROM {database_name:Identifier}.{table_name:Identifier} WHERE name = {name:String}",
|
|
157
235
|
parameters=parameters,
|
|
158
236
|
)
|
|
159
|
-
return
|
|
237
|
+
return len(result.result_rows) > 0 if result.result_rows else False
|
|
238
|
+
|
|
239
|
+
async def async_name_exists(self, name: str) -> bool:
|
|
240
|
+
"""Check if a document with given name exists asynchronously."""
|
|
241
|
+
parameters = self._get_base_parameters()
|
|
242
|
+
async_client = await self._ensure_async_client()
|
|
243
|
+
|
|
244
|
+
parameters["name"] = name
|
|
245
|
+
|
|
246
|
+
result = await async_client.query(
|
|
247
|
+
"SELECT name FROM {database_name:Identifier}.{table_name:Identifier} WHERE name = {name:String}",
|
|
248
|
+
parameters=parameters,
|
|
249
|
+
)
|
|
250
|
+
return len(result.result_rows) > 0 if result.result_rows else False
|
|
160
251
|
|
|
161
252
|
def id_exists(self, id: str) -> bool:
|
|
162
253
|
"""
|
|
@@ -172,10 +263,11 @@ class ClickhouseDb(VectorDb):
|
|
|
172
263
|
"SELECT id FROM {database_name:Identifier}.{table_name:Identifier} WHERE id = {id:String}",
|
|
173
264
|
parameters=parameters,
|
|
174
265
|
)
|
|
175
|
-
return
|
|
266
|
+
return len(result.result_rows) > 0 if result.result_rows else False
|
|
176
267
|
|
|
177
268
|
def insert(
|
|
178
269
|
self,
|
|
270
|
+
content_hash: str,
|
|
179
271
|
documents: List[Document],
|
|
180
272
|
filters: Optional[Dict[str, Any]] = None,
|
|
181
273
|
) -> None:
|
|
@@ -183,8 +275,7 @@ class ClickhouseDb(VectorDb):
|
|
|
183
275
|
for document in documents:
|
|
184
276
|
document.embed(embedder=self.embedder)
|
|
185
277
|
cleaned_content = document.content.replace("\x00", "\ufffd")
|
|
186
|
-
|
|
187
|
-
_id = document.id or content_hash
|
|
278
|
+
_id = md5(cleaned_content.encode()).hexdigest()
|
|
188
279
|
|
|
189
280
|
row: List[Any] = [
|
|
190
281
|
_id,
|
|
@@ -192,6 +283,7 @@ class ClickhouseDb(VectorDb):
|
|
|
192
283
|
document.meta_data,
|
|
193
284
|
filters,
|
|
194
285
|
cleaned_content,
|
|
286
|
+
document.content_id,
|
|
195
287
|
document.embedding,
|
|
196
288
|
document.usage,
|
|
197
289
|
content_hash,
|
|
@@ -207,18 +299,113 @@ class ClickhouseDb(VectorDb):
|
|
|
207
299
|
"meta_data",
|
|
208
300
|
"filters",
|
|
209
301
|
"content",
|
|
302
|
+
"content_id",
|
|
210
303
|
"embedding",
|
|
211
304
|
"usage",
|
|
212
305
|
"content_hash",
|
|
213
306
|
],
|
|
214
307
|
)
|
|
215
|
-
|
|
308
|
+
log_debug(f"Inserted {len(documents)} documents")
|
|
309
|
+
|
|
310
|
+
async def async_insert(
|
|
311
|
+
self, content_hash: str, documents: List[Document], filters: Optional[Dict[str, Any]] = None
|
|
312
|
+
) -> None:
|
|
313
|
+
"""Insert documents asynchronously."""
|
|
314
|
+
rows: List[List[Any]] = []
|
|
315
|
+
async_client = await self._ensure_async_client()
|
|
316
|
+
|
|
317
|
+
if self.embedder.enable_batch and hasattr(self.embedder, "async_get_embeddings_batch_and_usage"):
|
|
318
|
+
# Use batch embedding when enabled and supported
|
|
319
|
+
try:
|
|
320
|
+
# Extract content from all documents
|
|
321
|
+
doc_contents = [doc.content for doc in documents]
|
|
322
|
+
|
|
323
|
+
# Get batch embeddings and usage
|
|
324
|
+
embeddings, usages = await self.embedder.async_get_embeddings_batch_and_usage(doc_contents)
|
|
325
|
+
|
|
326
|
+
# Process documents with pre-computed embeddings
|
|
327
|
+
for j, doc in enumerate(documents):
|
|
328
|
+
try:
|
|
329
|
+
if j < len(embeddings):
|
|
330
|
+
doc.embedding = embeddings[j]
|
|
331
|
+
doc.usage = usages[j] if j < len(usages) else None
|
|
332
|
+
except Exception as e:
|
|
333
|
+
logger.error(f"Error assigning batch embedding to document '{doc.name}': {e}")
|
|
334
|
+
|
|
335
|
+
except Exception as e:
|
|
336
|
+
# Check if this is a rate limit error - don't fall back as it would make things worse
|
|
337
|
+
error_str = str(e).lower()
|
|
338
|
+
is_rate_limit = any(
|
|
339
|
+
phrase in error_str
|
|
340
|
+
for phrase in ["rate limit", "too many requests", "429", "trial key", "api calls / minute"]
|
|
341
|
+
)
|
|
342
|
+
|
|
343
|
+
if is_rate_limit:
|
|
344
|
+
logger.error(f"Rate limit detected during batch embedding. {e}")
|
|
345
|
+
raise e
|
|
346
|
+
else:
|
|
347
|
+
logger.warning(f"Async batch embedding failed, falling back to individual embeddings: {e}")
|
|
348
|
+
# Fall back to individual embedding
|
|
349
|
+
embed_tasks = [doc.async_embed(embedder=self.embedder) for doc in documents]
|
|
350
|
+
await asyncio.gather(*embed_tasks, return_exceptions=True)
|
|
351
|
+
else:
|
|
352
|
+
# Use individual embedding
|
|
353
|
+
embed_tasks = [document.async_embed(embedder=self.embedder) for document in documents]
|
|
354
|
+
await asyncio.gather(*embed_tasks, return_exceptions=True)
|
|
355
|
+
|
|
356
|
+
for document in documents:
|
|
357
|
+
cleaned_content = document.content.replace("\x00", "\ufffd")
|
|
358
|
+
_id = md5(cleaned_content.encode()).hexdigest()
|
|
359
|
+
|
|
360
|
+
row: List[Any] = [
|
|
361
|
+
_id,
|
|
362
|
+
document.name,
|
|
363
|
+
document.meta_data,
|
|
364
|
+
filters,
|
|
365
|
+
cleaned_content,
|
|
366
|
+
document.content_id,
|
|
367
|
+
document.embedding,
|
|
368
|
+
document.usage,
|
|
369
|
+
content_hash,
|
|
370
|
+
]
|
|
371
|
+
rows.append(row)
|
|
372
|
+
|
|
373
|
+
await async_client.insert(
|
|
374
|
+
f"{self.database_name}.{self.table_name}",
|
|
375
|
+
rows,
|
|
376
|
+
column_names=[
|
|
377
|
+
"id",
|
|
378
|
+
"name",
|
|
379
|
+
"meta_data",
|
|
380
|
+
"filters",
|
|
381
|
+
"content",
|
|
382
|
+
"content_id",
|
|
383
|
+
"embedding",
|
|
384
|
+
"usage",
|
|
385
|
+
"content_hash",
|
|
386
|
+
],
|
|
387
|
+
)
|
|
388
|
+
log_debug(f"Async inserted {len(documents)} documents")
|
|
216
389
|
|
|
217
390
|
def upsert_available(self) -> bool:
|
|
218
391
|
return True
|
|
219
392
|
|
|
220
393
|
def upsert(
|
|
221
394
|
self,
|
|
395
|
+
content_hash: str,
|
|
396
|
+
documents: List[Document],
|
|
397
|
+
filters: Optional[Dict[str, Any]] = None,
|
|
398
|
+
) -> None:
|
|
399
|
+
"""
|
|
400
|
+
Upsert documents into the database.
|
|
401
|
+
"""
|
|
402
|
+
if self.content_hash_exists(content_hash):
|
|
403
|
+
self._delete_by_content_hash(content_hash)
|
|
404
|
+
self.insert(content_hash=content_hash, documents=documents, filters=filters)
|
|
405
|
+
|
|
406
|
+
def _upsert(
|
|
407
|
+
self,
|
|
408
|
+
content_hash: str,
|
|
222
409
|
documents: List[Document],
|
|
223
410
|
filters: Optional[Dict[str, Any]] = None,
|
|
224
411
|
) -> None:
|
|
@@ -232,7 +419,7 @@ class ClickhouseDb(VectorDb):
|
|
|
232
419
|
"""
|
|
233
420
|
# We are using ReplacingMergeTree engine in our table, so we need to insert the documents,
|
|
234
421
|
# then call SELECT with FINAL
|
|
235
|
-
self.insert(documents=documents, filters=filters)
|
|
422
|
+
self.insert(content_hash=content_hash, documents=documents, filters=filters)
|
|
236
423
|
|
|
237
424
|
parameters = self._get_base_parameters()
|
|
238
425
|
self.client.query(
|
|
@@ -240,7 +427,33 @@ class ClickhouseDb(VectorDb):
|
|
|
240
427
|
parameters=parameters,
|
|
241
428
|
)
|
|
242
429
|
|
|
243
|
-
def
|
|
430
|
+
async def async_upsert(
|
|
431
|
+
self, content_hash: str, documents: List[Document], filters: Optional[Dict[str, Any]] = None
|
|
432
|
+
) -> None:
|
|
433
|
+
"""Upsert documents asynchronously."""
|
|
434
|
+
if self.content_hash_exists(content_hash):
|
|
435
|
+
self._delete_by_content_hash(content_hash)
|
|
436
|
+
await self._async_upsert(content_hash=content_hash, documents=documents, filters=filters)
|
|
437
|
+
|
|
438
|
+
async def _async_upsert(
|
|
439
|
+
self, content_hash: str, documents: List[Document], filters: Optional[Dict[str, Any]] = None
|
|
440
|
+
) -> None:
|
|
441
|
+
"""Upsert documents asynchronously."""
|
|
442
|
+
# We are using ReplacingMergeTree engine in our table, so we need to insert the documents,
|
|
443
|
+
# then call SELECT with FINAL
|
|
444
|
+
await self.async_insert(content_hash=content_hash, documents=documents, filters=filters)
|
|
445
|
+
|
|
446
|
+
parameters = self._get_base_parameters()
|
|
447
|
+
await self.async_client.query( # type: ignore
|
|
448
|
+
"SELECT id FROM {database_name:Identifier}.{table_name:Identifier} FINAL",
|
|
449
|
+
parameters=parameters,
|
|
450
|
+
)
|
|
451
|
+
|
|
452
|
+
def search(
|
|
453
|
+
self, query: str, limit: int = 5, filters: Optional[Union[Dict[str, Any], List[FilterExpr]]] = None
|
|
454
|
+
) -> List[Document]:
|
|
455
|
+
if filters is not None:
|
|
456
|
+
log_warning("Filters are not yet supported in Clickhouse. No filters will be applied.")
|
|
244
457
|
query_embedding = self.embedder.get_embedding(query)
|
|
245
458
|
if query_embedding is None:
|
|
246
459
|
logger.error(f"Error getting embedding for Query: {query}")
|
|
@@ -248,13 +461,6 @@ class ClickhouseDb(VectorDb):
|
|
|
248
461
|
|
|
249
462
|
parameters = self._get_base_parameters()
|
|
250
463
|
where_query = ""
|
|
251
|
-
if filters:
|
|
252
|
-
query_filters: List[str] = []
|
|
253
|
-
for key, value in filters.values():
|
|
254
|
-
query_filters.append(f"{{{key}_key:String}} = {{{key}_value:String}}")
|
|
255
|
-
parameters[f"{key}_key"] = key
|
|
256
|
-
parameters[f"{key}_value"] = value
|
|
257
|
-
where_query = f"WHERE {' AND '.join(query_filters)}"
|
|
258
464
|
|
|
259
465
|
order_by_query = ""
|
|
260
466
|
if self.distance == Distance.l2 or self.distance == Distance.max_inner_product:
|
|
@@ -265,12 +471,12 @@ class ClickhouseDb(VectorDb):
|
|
|
265
471
|
parameters["query_embedding"] = query_embedding
|
|
266
472
|
|
|
267
473
|
clickhouse_query = (
|
|
268
|
-
"SELECT name, meta_data, content, embedding, usage FROM "
|
|
474
|
+
"SELECT name, meta_data, content, content_id, embedding, usage FROM "
|
|
269
475
|
"{database_name:Identifier}.{table_name:Identifier} "
|
|
270
476
|
f"{where_query} {order_by_query} LIMIT {limit}"
|
|
271
477
|
)
|
|
272
|
-
|
|
273
|
-
|
|
478
|
+
log_debug(f"Query: {clickhouse_query}")
|
|
479
|
+
log_debug(f"Params: {parameters}")
|
|
274
480
|
|
|
275
481
|
try:
|
|
276
482
|
results = self.client.query(
|
|
@@ -291,9 +497,71 @@ class ClickhouseDb(VectorDb):
|
|
|
291
497
|
name=result[0],
|
|
292
498
|
meta_data=result[1],
|
|
293
499
|
content=result[2],
|
|
500
|
+
content_id=result[3],
|
|
501
|
+
embedder=self.embedder,
|
|
502
|
+
embedding=result[4],
|
|
503
|
+
usage=result[5],
|
|
504
|
+
)
|
|
505
|
+
)
|
|
506
|
+
|
|
507
|
+
return search_results
|
|
508
|
+
|
|
509
|
+
async def async_search(
|
|
510
|
+
self, query: str, limit: int = 5, filters: Optional[Union[Dict[str, Any], List[FilterExpr]]] = None
|
|
511
|
+
) -> List[Document]:
|
|
512
|
+
"""Search for documents asynchronously."""
|
|
513
|
+
async_client = await self._ensure_async_client()
|
|
514
|
+
|
|
515
|
+
if filters is not None:
|
|
516
|
+
log_warning("Filters are not yet supported in Clickhouse. No filters will be applied.")
|
|
517
|
+
|
|
518
|
+
query_embedding = self.embedder.get_embedding(query)
|
|
519
|
+
if query_embedding is None:
|
|
520
|
+
logger.error(f"Error getting embedding for Query: {query}")
|
|
521
|
+
return []
|
|
522
|
+
|
|
523
|
+
parameters = self._get_base_parameters()
|
|
524
|
+
where_query = ""
|
|
525
|
+
|
|
526
|
+
order_by_query = ""
|
|
527
|
+
if self.distance == Distance.l2 or self.distance == Distance.max_inner_product:
|
|
528
|
+
order_by_query = "ORDER BY L2Distance(embedding, {query_embedding:Array(Float32)})"
|
|
529
|
+
parameters["query_embedding"] = query_embedding
|
|
530
|
+
if self.distance == Distance.cosine:
|
|
531
|
+
order_by_query = "ORDER BY cosineDistance(embedding, {query_embedding:Array(Float32)})"
|
|
532
|
+
parameters["query_embedding"] = query_embedding
|
|
533
|
+
|
|
534
|
+
clickhouse_query = (
|
|
535
|
+
"SELECT name, meta_data, content, content_id, embedding, usage FROM "
|
|
536
|
+
"{database_name:Identifier}.{table_name:Identifier} "
|
|
537
|
+
f"{where_query} {order_by_query} LIMIT {limit}"
|
|
538
|
+
)
|
|
539
|
+
log_debug(f"Async Query: {clickhouse_query}")
|
|
540
|
+
log_debug(f"Async Params: {parameters}")
|
|
541
|
+
|
|
542
|
+
try:
|
|
543
|
+
results = await async_client.query(
|
|
544
|
+
clickhouse_query,
|
|
545
|
+
parameters=parameters,
|
|
546
|
+
)
|
|
547
|
+
except Exception as e:
|
|
548
|
+
logger.error(f"Async error searching for documents: {e}")
|
|
549
|
+
logger.error("Table might not exist, creating for future use")
|
|
550
|
+
await self.async_create()
|
|
551
|
+
return []
|
|
552
|
+
|
|
553
|
+
# Build search results
|
|
554
|
+
search_results: List[Document] = []
|
|
555
|
+
for result in results.result_rows:
|
|
556
|
+
search_results.append(
|
|
557
|
+
Document(
|
|
558
|
+
name=result[0],
|
|
559
|
+
meta_data=result[1],
|
|
560
|
+
content=result[2],
|
|
561
|
+
content_id=result[3],
|
|
294
562
|
embedder=self.embedder,
|
|
295
|
-
embedding=result[
|
|
296
|
-
usage=result[
|
|
563
|
+
embedding=result[4],
|
|
564
|
+
usage=result[5],
|
|
297
565
|
)
|
|
298
566
|
)
|
|
299
567
|
|
|
@@ -301,16 +569,29 @@ class ClickhouseDb(VectorDb):
|
|
|
301
569
|
|
|
302
570
|
def drop(self) -> None:
|
|
303
571
|
if self.table_exists():
|
|
304
|
-
|
|
572
|
+
log_debug(f"Deleting table: {self.table_name}")
|
|
305
573
|
parameters = self._get_base_parameters()
|
|
306
574
|
self.client.command(
|
|
307
575
|
"DROP TABLE {database_name:Identifier}.{table_name:Identifier}",
|
|
308
576
|
parameters=parameters,
|
|
309
577
|
)
|
|
310
578
|
|
|
579
|
+
async def async_drop(self) -> None:
|
|
580
|
+
"""Drop the table asynchronously."""
|
|
581
|
+
if await self.async_exists():
|
|
582
|
+
log_debug(f"Async dropping table: {self.table_name}")
|
|
583
|
+
parameters = self._get_base_parameters()
|
|
584
|
+
await self.async_client.command( # type: ignore
|
|
585
|
+
"DROP TABLE {database_name:Identifier}.{table_name:Identifier}",
|
|
586
|
+
parameters=parameters,
|
|
587
|
+
)
|
|
588
|
+
|
|
311
589
|
def exists(self) -> bool:
|
|
312
590
|
return self.table_exists()
|
|
313
591
|
|
|
592
|
+
async def async_exists(self) -> bool:
|
|
593
|
+
return await self.async_table_exists()
|
|
594
|
+
|
|
314
595
|
def get_count(self) -> int:
|
|
315
596
|
parameters = self._get_base_parameters()
|
|
316
597
|
result = self.client.query(
|
|
@@ -323,7 +604,7 @@ class ClickhouseDb(VectorDb):
|
|
|
323
604
|
return 0
|
|
324
605
|
|
|
325
606
|
def optimize(self) -> None:
|
|
326
|
-
|
|
607
|
+
log_debug("==== No need to optimize Clickhouse DB. Skipping this step ====")
|
|
327
608
|
|
|
328
609
|
def delete(self) -> bool:
|
|
329
610
|
parameters = self._get_base_parameters()
|
|
@@ -332,3 +613,223 @@ class ClickhouseDb(VectorDb):
|
|
|
332
613
|
parameters=parameters,
|
|
333
614
|
)
|
|
334
615
|
return True
|
|
616
|
+
|
|
617
|
+
def delete_by_id(self, id: str) -> bool:
|
|
618
|
+
"""
|
|
619
|
+
|
|
620
|
+
Delete a document by its ID.
|
|
621
|
+
|
|
622
|
+
Args:
|
|
623
|
+
id (str): The document ID to delete
|
|
624
|
+
|
|
625
|
+
Returns:
|
|
626
|
+
bool: True if document was deleted, False otherwise
|
|
627
|
+
"""
|
|
628
|
+
try:
|
|
629
|
+
log_debug(f"ClickHouse VectorDB : Deleting document with ID {id}")
|
|
630
|
+
if not self.id_exists(id):
|
|
631
|
+
return False
|
|
632
|
+
|
|
633
|
+
parameters = self._get_base_parameters()
|
|
634
|
+
parameters["id"] = id
|
|
635
|
+
|
|
636
|
+
self.client.command(
|
|
637
|
+
"DELETE FROM {database_name:Identifier}.{table_name:Identifier} WHERE id = {id:String}",
|
|
638
|
+
parameters=parameters,
|
|
639
|
+
)
|
|
640
|
+
return True
|
|
641
|
+
except Exception as e:
|
|
642
|
+
log_info(f"Error deleting document with ID {id}: {e}")
|
|
643
|
+
return False
|
|
644
|
+
|
|
645
|
+
def delete_by_name(self, name: str) -> bool:
|
|
646
|
+
"""
|
|
647
|
+
Delete documents by name.
|
|
648
|
+
|
|
649
|
+
Args:
|
|
650
|
+
name (str): The document name to delete
|
|
651
|
+
|
|
652
|
+
Returns:
|
|
653
|
+
bool: True if documents were deleted, False otherwise
|
|
654
|
+
"""
|
|
655
|
+
try:
|
|
656
|
+
log_debug(f"ClickHouse VectorDB : Deleting documents with name {name}")
|
|
657
|
+
if not self.name_exists(name):
|
|
658
|
+
return False
|
|
659
|
+
|
|
660
|
+
parameters = self._get_base_parameters()
|
|
661
|
+
parameters["name"] = name
|
|
662
|
+
|
|
663
|
+
self.client.command(
|
|
664
|
+
"DELETE FROM {database_name:Identifier}.{table_name:Identifier} WHERE name = {name:String}",
|
|
665
|
+
parameters=parameters,
|
|
666
|
+
)
|
|
667
|
+
return True
|
|
668
|
+
except Exception as e:
|
|
669
|
+
log_info(f"Error deleting documents with name {name}: {e}")
|
|
670
|
+
return False
|
|
671
|
+
|
|
672
|
+
def delete_by_metadata(self, metadata: Dict[str, Any]) -> bool:
|
|
673
|
+
"""
|
|
674
|
+
Delete documents by metadata.
|
|
675
|
+
|
|
676
|
+
Args:
|
|
677
|
+
metadata (Dict[str, Any]): The metadata to match for deletion
|
|
678
|
+
|
|
679
|
+
Returns:
|
|
680
|
+
bool: True if documents were deleted, False otherwise
|
|
681
|
+
"""
|
|
682
|
+
try:
|
|
683
|
+
log_debug(f"ClickHouse VectorDB : Deleting documents with metadata {metadata}")
|
|
684
|
+
parameters = self._get_base_parameters()
|
|
685
|
+
|
|
686
|
+
# Build WHERE clause for metadata matching using proper ClickHouse JSON syntax
|
|
687
|
+
where_conditions = []
|
|
688
|
+
for key, value in metadata.items():
|
|
689
|
+
if isinstance(value, bool):
|
|
690
|
+
where_conditions.append(f"JSONExtractBool(toString(filters), '{key}') = {str(value).lower()}")
|
|
691
|
+
elif isinstance(value, (int, float)):
|
|
692
|
+
where_conditions.append(f"JSONExtractFloat(toString(filters), '{key}') = {value}")
|
|
693
|
+
else:
|
|
694
|
+
where_conditions.append(f"JSONExtractString(toString(filters), '{key}') = '{value}'")
|
|
695
|
+
|
|
696
|
+
if not where_conditions:
|
|
697
|
+
return False
|
|
698
|
+
|
|
699
|
+
where_clause = " AND ".join(where_conditions)
|
|
700
|
+
|
|
701
|
+
self.client.command(
|
|
702
|
+
f"DELETE FROM {{database_name:Identifier}}.{{table_name:Identifier}} WHERE {where_clause}",
|
|
703
|
+
parameters=parameters,
|
|
704
|
+
)
|
|
705
|
+
return True
|
|
706
|
+
except Exception as e:
|
|
707
|
+
log_info(f"Error deleting documents with metadata {metadata}: {e}")
|
|
708
|
+
return False
|
|
709
|
+
|
|
710
|
+
def delete_by_content_id(self, content_id: str) -> bool:
|
|
711
|
+
"""
|
|
712
|
+
Delete documents by content ID.
|
|
713
|
+
|
|
714
|
+
Args:
|
|
715
|
+
content_id (str): The content ID to delete
|
|
716
|
+
|
|
717
|
+
Returns:
|
|
718
|
+
bool: True if documents were deleted, False otherwise
|
|
719
|
+
"""
|
|
720
|
+
try:
|
|
721
|
+
log_debug(f"ClickHouse VectorDB : Deleting documents with content_id {content_id}")
|
|
722
|
+
parameters = self._get_base_parameters()
|
|
723
|
+
parameters["content_id"] = content_id
|
|
724
|
+
|
|
725
|
+
self.client.command(
|
|
726
|
+
"DELETE FROM {database_name:Identifier}.{table_name:Identifier} WHERE content_id = {content_id:String}",
|
|
727
|
+
parameters=parameters,
|
|
728
|
+
)
|
|
729
|
+
return True
|
|
730
|
+
except Exception as e:
|
|
731
|
+
log_info(f"Error deleting documents with content_id {content_id}: {e}")
|
|
732
|
+
return False
|
|
733
|
+
|
|
734
|
+
def content_hash_exists(self, content_hash: str) -> bool:
|
|
735
|
+
"""
|
|
736
|
+
Validate if a row with this content_hash exists or not
|
|
737
|
+
|
|
738
|
+
Args:
|
|
739
|
+
content_hash (str): Content hash to check
|
|
740
|
+
"""
|
|
741
|
+
parameters = self._get_base_parameters()
|
|
742
|
+
parameters["content_hash"] = content_hash
|
|
743
|
+
|
|
744
|
+
result = self.client.query(
|
|
745
|
+
"SELECT content_hash FROM {database_name:Identifier}.{table_name:Identifier} WHERE content_hash = {content_hash:String}",
|
|
746
|
+
parameters=parameters,
|
|
747
|
+
)
|
|
748
|
+
return len(result.result_rows) > 0 if result.result_rows else False
|
|
749
|
+
|
|
750
|
+
def _delete_by_content_hash(self, content_hash: str) -> bool:
|
|
751
|
+
"""
|
|
752
|
+
Delete documents by content hash.
|
|
753
|
+
"""
|
|
754
|
+
try:
|
|
755
|
+
parameters = self._get_base_parameters()
|
|
756
|
+
parameters["content_hash"] = content_hash
|
|
757
|
+
|
|
758
|
+
self.client.command(
|
|
759
|
+
"DELETE FROM {database_name:Identifier}.{table_name:Identifier} WHERE content_hash = {content_hash:String}",
|
|
760
|
+
parameters=parameters,
|
|
761
|
+
)
|
|
762
|
+
return True
|
|
763
|
+
except Exception:
|
|
764
|
+
return False
|
|
765
|
+
|
|
766
|
+
def update_metadata(self, content_id: str, metadata: Dict[str, Any]) -> None:
|
|
767
|
+
"""
|
|
768
|
+
Update the metadata for documents with the given content_id.
|
|
769
|
+
|
|
770
|
+
Args:
|
|
771
|
+
content_id (str): The content ID to update
|
|
772
|
+
metadata (Dict[str, Any]): The metadata to update
|
|
773
|
+
"""
|
|
774
|
+
import json
|
|
775
|
+
|
|
776
|
+
try:
|
|
777
|
+
parameters = self._get_base_parameters()
|
|
778
|
+
parameters["content_id"] = content_id
|
|
779
|
+
|
|
780
|
+
# First, get existing documents with their current metadata and filters
|
|
781
|
+
result = self.client.query(
|
|
782
|
+
"SELECT id, meta_data, filters FROM {database_name:Identifier}.{table_name:Identifier} WHERE content_id = {content_id:String}",
|
|
783
|
+
parameters=parameters,
|
|
784
|
+
)
|
|
785
|
+
|
|
786
|
+
if not result.result_rows:
|
|
787
|
+
logger.debug(f"No documents found with content_id: {content_id}")
|
|
788
|
+
return
|
|
789
|
+
|
|
790
|
+
# Update each document
|
|
791
|
+
updated_count = 0
|
|
792
|
+
for row in result.result_rows:
|
|
793
|
+
doc_id, current_meta_json, current_filters_json = row
|
|
794
|
+
|
|
795
|
+
# Parse existing metadata
|
|
796
|
+
try:
|
|
797
|
+
current_metadata = json.loads(current_meta_json) if current_meta_json else {}
|
|
798
|
+
except (json.JSONDecodeError, TypeError):
|
|
799
|
+
current_metadata = {}
|
|
800
|
+
|
|
801
|
+
# Parse existing filters
|
|
802
|
+
try:
|
|
803
|
+
current_filters = json.loads(current_filters_json) if current_filters_json else {}
|
|
804
|
+
except (json.JSONDecodeError, TypeError):
|
|
805
|
+
current_filters = {}
|
|
806
|
+
|
|
807
|
+
# Merge existing metadata with new metadata
|
|
808
|
+
updated_metadata = current_metadata.copy()
|
|
809
|
+
updated_metadata.update(metadata)
|
|
810
|
+
|
|
811
|
+
# Merge existing filters with new metadata
|
|
812
|
+
updated_filters = current_filters.copy()
|
|
813
|
+
updated_filters.update(metadata)
|
|
814
|
+
|
|
815
|
+
# Update the document
|
|
816
|
+
update_params = parameters.copy()
|
|
817
|
+
update_params["doc_id"] = doc_id
|
|
818
|
+
update_params["metadata_json"] = json.dumps(updated_metadata)
|
|
819
|
+
update_params["filters_json"] = json.dumps(updated_filters)
|
|
820
|
+
|
|
821
|
+
self.client.command(
|
|
822
|
+
"ALTER TABLE {database_name:Identifier}.{table_name:Identifier} UPDATE meta_data = {metadata_json:String}, filters = {filters_json:String} WHERE id = {doc_id:String}",
|
|
823
|
+
parameters=update_params,
|
|
824
|
+
)
|
|
825
|
+
updated_count += 1
|
|
826
|
+
|
|
827
|
+
logger.debug(f"Updated metadata for {updated_count} documents with content_id: {content_id}")
|
|
828
|
+
|
|
829
|
+
except Exception as e:
|
|
830
|
+
logger.error(f"Error updating metadata for content_id '{content_id}': {e}")
|
|
831
|
+
raise
|
|
832
|
+
|
|
833
|
+
def get_supported_search_types(self) -> List[str]:
|
|
834
|
+
"""Get the supported search types for this vector database."""
|
|
835
|
+
return [] # Clickhouse doesn't use SearchType enum
|