agno 2.2.13__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- agno/__init__.py +8 -0
- agno/agent/__init__.py +51 -0
- agno/agent/agent.py +10405 -0
- agno/api/__init__.py +0 -0
- agno/api/agent.py +28 -0
- agno/api/api.py +40 -0
- agno/api/evals.py +22 -0
- agno/api/os.py +17 -0
- agno/api/routes.py +13 -0
- agno/api/schemas/__init__.py +9 -0
- agno/api/schemas/agent.py +16 -0
- agno/api/schemas/evals.py +16 -0
- agno/api/schemas/os.py +14 -0
- agno/api/schemas/response.py +6 -0
- agno/api/schemas/team.py +16 -0
- agno/api/schemas/utils.py +21 -0
- agno/api/schemas/workflows.py +16 -0
- agno/api/settings.py +53 -0
- agno/api/team.py +30 -0
- agno/api/workflow.py +28 -0
- agno/cloud/aws/base.py +214 -0
- agno/cloud/aws/s3/__init__.py +2 -0
- agno/cloud/aws/s3/api_client.py +43 -0
- agno/cloud/aws/s3/bucket.py +195 -0
- agno/cloud/aws/s3/object.py +57 -0
- agno/culture/__init__.py +3 -0
- agno/culture/manager.py +956 -0
- agno/db/__init__.py +24 -0
- agno/db/async_postgres/__init__.py +3 -0
- agno/db/base.py +598 -0
- agno/db/dynamo/__init__.py +3 -0
- agno/db/dynamo/dynamo.py +2042 -0
- agno/db/dynamo/schemas.py +314 -0
- agno/db/dynamo/utils.py +743 -0
- agno/db/firestore/__init__.py +3 -0
- agno/db/firestore/firestore.py +1795 -0
- agno/db/firestore/schemas.py +140 -0
- agno/db/firestore/utils.py +376 -0
- agno/db/gcs_json/__init__.py +3 -0
- agno/db/gcs_json/gcs_json_db.py +1335 -0
- agno/db/gcs_json/utils.py +228 -0
- agno/db/in_memory/__init__.py +3 -0
- agno/db/in_memory/in_memory_db.py +1160 -0
- agno/db/in_memory/utils.py +230 -0
- agno/db/json/__init__.py +3 -0
- agno/db/json/json_db.py +1328 -0
- agno/db/json/utils.py +230 -0
- agno/db/migrations/__init__.py +0 -0
- agno/db/migrations/v1_to_v2.py +635 -0
- agno/db/mongo/__init__.py +17 -0
- agno/db/mongo/async_mongo.py +2026 -0
- agno/db/mongo/mongo.py +1982 -0
- agno/db/mongo/schemas.py +87 -0
- agno/db/mongo/utils.py +259 -0
- agno/db/mysql/__init__.py +3 -0
- agno/db/mysql/mysql.py +2308 -0
- agno/db/mysql/schemas.py +138 -0
- agno/db/mysql/utils.py +355 -0
- agno/db/postgres/__init__.py +4 -0
- agno/db/postgres/async_postgres.py +1927 -0
- agno/db/postgres/postgres.py +2260 -0
- agno/db/postgres/schemas.py +139 -0
- agno/db/postgres/utils.py +442 -0
- agno/db/redis/__init__.py +3 -0
- agno/db/redis/redis.py +1660 -0
- agno/db/redis/schemas.py +123 -0
- agno/db/redis/utils.py +346 -0
- agno/db/schemas/__init__.py +4 -0
- agno/db/schemas/culture.py +120 -0
- agno/db/schemas/evals.py +33 -0
- agno/db/schemas/knowledge.py +40 -0
- agno/db/schemas/memory.py +46 -0
- agno/db/schemas/metrics.py +0 -0
- agno/db/singlestore/__init__.py +3 -0
- agno/db/singlestore/schemas.py +130 -0
- agno/db/singlestore/singlestore.py +2272 -0
- agno/db/singlestore/utils.py +384 -0
- agno/db/sqlite/__init__.py +4 -0
- agno/db/sqlite/async_sqlite.py +2293 -0
- agno/db/sqlite/schemas.py +133 -0
- agno/db/sqlite/sqlite.py +2288 -0
- agno/db/sqlite/utils.py +431 -0
- agno/db/surrealdb/__init__.py +3 -0
- agno/db/surrealdb/metrics.py +292 -0
- agno/db/surrealdb/models.py +309 -0
- agno/db/surrealdb/queries.py +71 -0
- agno/db/surrealdb/surrealdb.py +1353 -0
- agno/db/surrealdb/utils.py +147 -0
- agno/db/utils.py +116 -0
- agno/debug.py +18 -0
- agno/eval/__init__.py +14 -0
- agno/eval/accuracy.py +834 -0
- agno/eval/performance.py +773 -0
- agno/eval/reliability.py +306 -0
- agno/eval/utils.py +119 -0
- agno/exceptions.py +161 -0
- agno/filters.py +354 -0
- agno/guardrails/__init__.py +6 -0
- agno/guardrails/base.py +19 -0
- agno/guardrails/openai.py +144 -0
- agno/guardrails/pii.py +94 -0
- agno/guardrails/prompt_injection.py +52 -0
- agno/integrations/__init__.py +0 -0
- agno/integrations/discord/__init__.py +3 -0
- agno/integrations/discord/client.py +203 -0
- agno/knowledge/__init__.py +5 -0
- agno/knowledge/chunking/__init__.py +0 -0
- agno/knowledge/chunking/agentic.py +79 -0
- agno/knowledge/chunking/document.py +91 -0
- agno/knowledge/chunking/fixed.py +57 -0
- agno/knowledge/chunking/markdown.py +151 -0
- agno/knowledge/chunking/recursive.py +63 -0
- agno/knowledge/chunking/row.py +39 -0
- agno/knowledge/chunking/semantic.py +86 -0
- agno/knowledge/chunking/strategy.py +165 -0
- agno/knowledge/content.py +74 -0
- agno/knowledge/document/__init__.py +5 -0
- agno/knowledge/document/base.py +58 -0
- agno/knowledge/embedder/__init__.py +5 -0
- agno/knowledge/embedder/aws_bedrock.py +343 -0
- agno/knowledge/embedder/azure_openai.py +210 -0
- agno/knowledge/embedder/base.py +23 -0
- agno/knowledge/embedder/cohere.py +323 -0
- agno/knowledge/embedder/fastembed.py +62 -0
- agno/knowledge/embedder/fireworks.py +13 -0
- agno/knowledge/embedder/google.py +258 -0
- agno/knowledge/embedder/huggingface.py +94 -0
- agno/knowledge/embedder/jina.py +182 -0
- agno/knowledge/embedder/langdb.py +22 -0
- agno/knowledge/embedder/mistral.py +206 -0
- agno/knowledge/embedder/nebius.py +13 -0
- agno/knowledge/embedder/ollama.py +154 -0
- agno/knowledge/embedder/openai.py +195 -0
- agno/knowledge/embedder/sentence_transformer.py +63 -0
- agno/knowledge/embedder/together.py +13 -0
- agno/knowledge/embedder/vllm.py +262 -0
- agno/knowledge/embedder/voyageai.py +165 -0
- agno/knowledge/knowledge.py +1988 -0
- agno/knowledge/reader/__init__.py +7 -0
- agno/knowledge/reader/arxiv_reader.py +81 -0
- agno/knowledge/reader/base.py +95 -0
- agno/knowledge/reader/csv_reader.py +166 -0
- agno/knowledge/reader/docx_reader.py +82 -0
- agno/knowledge/reader/field_labeled_csv_reader.py +292 -0
- agno/knowledge/reader/firecrawl_reader.py +201 -0
- agno/knowledge/reader/json_reader.py +87 -0
- agno/knowledge/reader/markdown_reader.py +137 -0
- agno/knowledge/reader/pdf_reader.py +431 -0
- agno/knowledge/reader/pptx_reader.py +101 -0
- agno/knowledge/reader/reader_factory.py +313 -0
- agno/knowledge/reader/s3_reader.py +89 -0
- agno/knowledge/reader/tavily_reader.py +194 -0
- agno/knowledge/reader/text_reader.py +115 -0
- agno/knowledge/reader/web_search_reader.py +372 -0
- agno/knowledge/reader/website_reader.py +455 -0
- agno/knowledge/reader/wikipedia_reader.py +59 -0
- agno/knowledge/reader/youtube_reader.py +78 -0
- agno/knowledge/remote_content/__init__.py +0 -0
- agno/knowledge/remote_content/remote_content.py +88 -0
- agno/knowledge/reranker/__init__.py +3 -0
- agno/knowledge/reranker/base.py +14 -0
- agno/knowledge/reranker/cohere.py +64 -0
- agno/knowledge/reranker/infinity.py +195 -0
- agno/knowledge/reranker/sentence_transformer.py +54 -0
- agno/knowledge/types.py +39 -0
- agno/knowledge/utils.py +189 -0
- agno/media.py +462 -0
- agno/memory/__init__.py +3 -0
- agno/memory/manager.py +1327 -0
- agno/models/__init__.py +0 -0
- agno/models/aimlapi/__init__.py +5 -0
- agno/models/aimlapi/aimlapi.py +45 -0
- agno/models/anthropic/__init__.py +5 -0
- agno/models/anthropic/claude.py +757 -0
- agno/models/aws/__init__.py +15 -0
- agno/models/aws/bedrock.py +701 -0
- agno/models/aws/claude.py +378 -0
- agno/models/azure/__init__.py +18 -0
- agno/models/azure/ai_foundry.py +485 -0
- agno/models/azure/openai_chat.py +131 -0
- agno/models/base.py +2175 -0
- agno/models/cerebras/__init__.py +12 -0
- agno/models/cerebras/cerebras.py +501 -0
- agno/models/cerebras/cerebras_openai.py +112 -0
- agno/models/cohere/__init__.py +5 -0
- agno/models/cohere/chat.py +389 -0
- agno/models/cometapi/__init__.py +5 -0
- agno/models/cometapi/cometapi.py +57 -0
- agno/models/dashscope/__init__.py +5 -0
- agno/models/dashscope/dashscope.py +91 -0
- agno/models/deepinfra/__init__.py +5 -0
- agno/models/deepinfra/deepinfra.py +28 -0
- agno/models/deepseek/__init__.py +5 -0
- agno/models/deepseek/deepseek.py +61 -0
- agno/models/defaults.py +1 -0
- agno/models/fireworks/__init__.py +5 -0
- agno/models/fireworks/fireworks.py +26 -0
- agno/models/google/__init__.py +5 -0
- agno/models/google/gemini.py +1085 -0
- agno/models/groq/__init__.py +5 -0
- agno/models/groq/groq.py +556 -0
- agno/models/huggingface/__init__.py +5 -0
- agno/models/huggingface/huggingface.py +491 -0
- agno/models/ibm/__init__.py +5 -0
- agno/models/ibm/watsonx.py +422 -0
- agno/models/internlm/__init__.py +3 -0
- agno/models/internlm/internlm.py +26 -0
- agno/models/langdb/__init__.py +1 -0
- agno/models/langdb/langdb.py +48 -0
- agno/models/litellm/__init__.py +14 -0
- agno/models/litellm/chat.py +468 -0
- agno/models/litellm/litellm_openai.py +25 -0
- agno/models/llama_cpp/__init__.py +5 -0
- agno/models/llama_cpp/llama_cpp.py +22 -0
- agno/models/lmstudio/__init__.py +5 -0
- agno/models/lmstudio/lmstudio.py +25 -0
- agno/models/message.py +434 -0
- agno/models/meta/__init__.py +12 -0
- agno/models/meta/llama.py +475 -0
- agno/models/meta/llama_openai.py +78 -0
- agno/models/metrics.py +120 -0
- agno/models/mistral/__init__.py +5 -0
- agno/models/mistral/mistral.py +432 -0
- agno/models/nebius/__init__.py +3 -0
- agno/models/nebius/nebius.py +54 -0
- agno/models/nexus/__init__.py +3 -0
- agno/models/nexus/nexus.py +22 -0
- agno/models/nvidia/__init__.py +5 -0
- agno/models/nvidia/nvidia.py +28 -0
- agno/models/ollama/__init__.py +5 -0
- agno/models/ollama/chat.py +441 -0
- agno/models/openai/__init__.py +9 -0
- agno/models/openai/chat.py +883 -0
- agno/models/openai/like.py +27 -0
- agno/models/openai/responses.py +1050 -0
- agno/models/openrouter/__init__.py +5 -0
- agno/models/openrouter/openrouter.py +66 -0
- agno/models/perplexity/__init__.py +5 -0
- agno/models/perplexity/perplexity.py +187 -0
- agno/models/portkey/__init__.py +3 -0
- agno/models/portkey/portkey.py +81 -0
- agno/models/requesty/__init__.py +5 -0
- agno/models/requesty/requesty.py +52 -0
- agno/models/response.py +199 -0
- agno/models/sambanova/__init__.py +5 -0
- agno/models/sambanova/sambanova.py +28 -0
- agno/models/siliconflow/__init__.py +5 -0
- agno/models/siliconflow/siliconflow.py +25 -0
- agno/models/together/__init__.py +5 -0
- agno/models/together/together.py +25 -0
- agno/models/utils.py +266 -0
- agno/models/vercel/__init__.py +3 -0
- agno/models/vercel/v0.py +26 -0
- agno/models/vertexai/__init__.py +0 -0
- agno/models/vertexai/claude.py +70 -0
- agno/models/vllm/__init__.py +3 -0
- agno/models/vllm/vllm.py +78 -0
- agno/models/xai/__init__.py +3 -0
- agno/models/xai/xai.py +113 -0
- agno/os/__init__.py +3 -0
- agno/os/app.py +876 -0
- agno/os/auth.py +57 -0
- agno/os/config.py +104 -0
- agno/os/interfaces/__init__.py +1 -0
- agno/os/interfaces/a2a/__init__.py +3 -0
- agno/os/interfaces/a2a/a2a.py +42 -0
- agno/os/interfaces/a2a/router.py +250 -0
- agno/os/interfaces/a2a/utils.py +924 -0
- agno/os/interfaces/agui/__init__.py +3 -0
- agno/os/interfaces/agui/agui.py +47 -0
- agno/os/interfaces/agui/router.py +144 -0
- agno/os/interfaces/agui/utils.py +534 -0
- agno/os/interfaces/base.py +25 -0
- agno/os/interfaces/slack/__init__.py +3 -0
- agno/os/interfaces/slack/router.py +148 -0
- agno/os/interfaces/slack/security.py +30 -0
- agno/os/interfaces/slack/slack.py +47 -0
- agno/os/interfaces/whatsapp/__init__.py +3 -0
- agno/os/interfaces/whatsapp/router.py +211 -0
- agno/os/interfaces/whatsapp/security.py +53 -0
- agno/os/interfaces/whatsapp/whatsapp.py +36 -0
- agno/os/mcp.py +292 -0
- agno/os/middleware/__init__.py +7 -0
- agno/os/middleware/jwt.py +233 -0
- agno/os/router.py +1763 -0
- agno/os/routers/__init__.py +3 -0
- agno/os/routers/evals/__init__.py +3 -0
- agno/os/routers/evals/evals.py +430 -0
- agno/os/routers/evals/schemas.py +142 -0
- agno/os/routers/evals/utils.py +162 -0
- agno/os/routers/health.py +31 -0
- agno/os/routers/home.py +52 -0
- agno/os/routers/knowledge/__init__.py +3 -0
- agno/os/routers/knowledge/knowledge.py +997 -0
- agno/os/routers/knowledge/schemas.py +178 -0
- agno/os/routers/memory/__init__.py +3 -0
- agno/os/routers/memory/memory.py +515 -0
- agno/os/routers/memory/schemas.py +62 -0
- agno/os/routers/metrics/__init__.py +3 -0
- agno/os/routers/metrics/metrics.py +190 -0
- agno/os/routers/metrics/schemas.py +47 -0
- agno/os/routers/session/__init__.py +3 -0
- agno/os/routers/session/session.py +997 -0
- agno/os/schema.py +1055 -0
- agno/os/settings.py +43 -0
- agno/os/utils.py +630 -0
- agno/py.typed +0 -0
- agno/reasoning/__init__.py +0 -0
- agno/reasoning/anthropic.py +80 -0
- agno/reasoning/azure_ai_foundry.py +67 -0
- agno/reasoning/deepseek.py +63 -0
- agno/reasoning/default.py +97 -0
- agno/reasoning/gemini.py +73 -0
- agno/reasoning/groq.py +71 -0
- agno/reasoning/helpers.py +63 -0
- agno/reasoning/ollama.py +67 -0
- agno/reasoning/openai.py +86 -0
- agno/reasoning/step.py +31 -0
- agno/reasoning/vertexai.py +76 -0
- agno/run/__init__.py +6 -0
- agno/run/agent.py +787 -0
- agno/run/base.py +229 -0
- agno/run/cancel.py +81 -0
- agno/run/messages.py +32 -0
- agno/run/team.py +753 -0
- agno/run/workflow.py +708 -0
- agno/session/__init__.py +10 -0
- agno/session/agent.py +295 -0
- agno/session/summary.py +265 -0
- agno/session/team.py +392 -0
- agno/session/workflow.py +205 -0
- agno/team/__init__.py +37 -0
- agno/team/team.py +8793 -0
- agno/tools/__init__.py +10 -0
- agno/tools/agentql.py +120 -0
- agno/tools/airflow.py +69 -0
- agno/tools/api.py +122 -0
- agno/tools/apify.py +314 -0
- agno/tools/arxiv.py +127 -0
- agno/tools/aws_lambda.py +53 -0
- agno/tools/aws_ses.py +66 -0
- agno/tools/baidusearch.py +89 -0
- agno/tools/bitbucket.py +292 -0
- agno/tools/brandfetch.py +213 -0
- agno/tools/bravesearch.py +106 -0
- agno/tools/brightdata.py +367 -0
- agno/tools/browserbase.py +209 -0
- agno/tools/calcom.py +255 -0
- agno/tools/calculator.py +151 -0
- agno/tools/cartesia.py +187 -0
- agno/tools/clickup.py +244 -0
- agno/tools/confluence.py +240 -0
- agno/tools/crawl4ai.py +158 -0
- agno/tools/csv_toolkit.py +185 -0
- agno/tools/dalle.py +110 -0
- agno/tools/daytona.py +475 -0
- agno/tools/decorator.py +262 -0
- agno/tools/desi_vocal.py +108 -0
- agno/tools/discord.py +161 -0
- agno/tools/docker.py +716 -0
- agno/tools/duckdb.py +379 -0
- agno/tools/duckduckgo.py +91 -0
- agno/tools/e2b.py +703 -0
- agno/tools/eleven_labs.py +196 -0
- agno/tools/email.py +67 -0
- agno/tools/evm.py +129 -0
- agno/tools/exa.py +396 -0
- agno/tools/fal.py +127 -0
- agno/tools/file.py +240 -0
- agno/tools/file_generation.py +350 -0
- agno/tools/financial_datasets.py +288 -0
- agno/tools/firecrawl.py +143 -0
- agno/tools/function.py +1187 -0
- agno/tools/giphy.py +93 -0
- agno/tools/github.py +1760 -0
- agno/tools/gmail.py +922 -0
- agno/tools/google_bigquery.py +117 -0
- agno/tools/google_drive.py +270 -0
- agno/tools/google_maps.py +253 -0
- agno/tools/googlecalendar.py +674 -0
- agno/tools/googlesearch.py +98 -0
- agno/tools/googlesheets.py +377 -0
- agno/tools/hackernews.py +77 -0
- agno/tools/jina.py +101 -0
- agno/tools/jira.py +170 -0
- agno/tools/knowledge.py +218 -0
- agno/tools/linear.py +426 -0
- agno/tools/linkup.py +58 -0
- agno/tools/local_file_system.py +90 -0
- agno/tools/lumalab.py +183 -0
- agno/tools/mcp/__init__.py +10 -0
- agno/tools/mcp/mcp.py +331 -0
- agno/tools/mcp/multi_mcp.py +347 -0
- agno/tools/mcp/params.py +24 -0
- agno/tools/mcp_toolbox.py +284 -0
- agno/tools/mem0.py +193 -0
- agno/tools/memori.py +339 -0
- agno/tools/memory.py +419 -0
- agno/tools/mlx_transcribe.py +139 -0
- agno/tools/models/__init__.py +0 -0
- agno/tools/models/azure_openai.py +190 -0
- agno/tools/models/gemini.py +203 -0
- agno/tools/models/groq.py +158 -0
- agno/tools/models/morph.py +186 -0
- agno/tools/models/nebius.py +124 -0
- agno/tools/models_labs.py +195 -0
- agno/tools/moviepy_video.py +349 -0
- agno/tools/neo4j.py +134 -0
- agno/tools/newspaper.py +46 -0
- agno/tools/newspaper4k.py +93 -0
- agno/tools/notion.py +204 -0
- agno/tools/openai.py +202 -0
- agno/tools/openbb.py +160 -0
- agno/tools/opencv.py +321 -0
- agno/tools/openweather.py +233 -0
- agno/tools/oxylabs.py +385 -0
- agno/tools/pandas.py +102 -0
- agno/tools/parallel.py +314 -0
- agno/tools/postgres.py +257 -0
- agno/tools/pubmed.py +188 -0
- agno/tools/python.py +205 -0
- agno/tools/reasoning.py +283 -0
- agno/tools/reddit.py +467 -0
- agno/tools/replicate.py +117 -0
- agno/tools/resend.py +62 -0
- agno/tools/scrapegraph.py +222 -0
- agno/tools/searxng.py +152 -0
- agno/tools/serpapi.py +116 -0
- agno/tools/serper.py +255 -0
- agno/tools/shell.py +53 -0
- agno/tools/slack.py +136 -0
- agno/tools/sleep.py +20 -0
- agno/tools/spider.py +116 -0
- agno/tools/sql.py +154 -0
- agno/tools/streamlit/__init__.py +0 -0
- agno/tools/streamlit/components.py +113 -0
- agno/tools/tavily.py +254 -0
- agno/tools/telegram.py +48 -0
- agno/tools/todoist.py +218 -0
- agno/tools/tool_registry.py +1 -0
- agno/tools/toolkit.py +146 -0
- agno/tools/trafilatura.py +388 -0
- agno/tools/trello.py +274 -0
- agno/tools/twilio.py +186 -0
- agno/tools/user_control_flow.py +78 -0
- agno/tools/valyu.py +228 -0
- agno/tools/visualization.py +467 -0
- agno/tools/webbrowser.py +28 -0
- agno/tools/webex.py +76 -0
- agno/tools/website.py +54 -0
- agno/tools/webtools.py +45 -0
- agno/tools/whatsapp.py +286 -0
- agno/tools/wikipedia.py +63 -0
- agno/tools/workflow.py +278 -0
- agno/tools/x.py +335 -0
- agno/tools/yfinance.py +257 -0
- agno/tools/youtube.py +184 -0
- agno/tools/zendesk.py +82 -0
- agno/tools/zep.py +454 -0
- agno/tools/zoom.py +382 -0
- agno/utils/__init__.py +0 -0
- agno/utils/agent.py +820 -0
- agno/utils/audio.py +49 -0
- agno/utils/certs.py +27 -0
- agno/utils/code_execution.py +11 -0
- agno/utils/common.py +132 -0
- agno/utils/dttm.py +13 -0
- agno/utils/enum.py +22 -0
- agno/utils/env.py +11 -0
- agno/utils/events.py +696 -0
- agno/utils/format_str.py +16 -0
- agno/utils/functions.py +166 -0
- agno/utils/gemini.py +426 -0
- agno/utils/hooks.py +57 -0
- agno/utils/http.py +74 -0
- agno/utils/json_schema.py +234 -0
- agno/utils/knowledge.py +36 -0
- agno/utils/location.py +19 -0
- agno/utils/log.py +255 -0
- agno/utils/mcp.py +214 -0
- agno/utils/media.py +352 -0
- agno/utils/merge_dict.py +41 -0
- agno/utils/message.py +118 -0
- agno/utils/models/__init__.py +0 -0
- agno/utils/models/ai_foundry.py +43 -0
- agno/utils/models/claude.py +358 -0
- agno/utils/models/cohere.py +87 -0
- agno/utils/models/llama.py +78 -0
- agno/utils/models/mistral.py +98 -0
- agno/utils/models/openai_responses.py +140 -0
- agno/utils/models/schema_utils.py +153 -0
- agno/utils/models/watsonx.py +41 -0
- agno/utils/openai.py +257 -0
- agno/utils/pickle.py +32 -0
- agno/utils/pprint.py +178 -0
- agno/utils/print_response/__init__.py +0 -0
- agno/utils/print_response/agent.py +842 -0
- agno/utils/print_response/team.py +1724 -0
- agno/utils/print_response/workflow.py +1668 -0
- agno/utils/prompts.py +111 -0
- agno/utils/reasoning.py +108 -0
- agno/utils/response.py +163 -0
- agno/utils/response_iterator.py +17 -0
- agno/utils/safe_formatter.py +24 -0
- agno/utils/serialize.py +32 -0
- agno/utils/shell.py +22 -0
- agno/utils/streamlit.py +487 -0
- agno/utils/string.py +231 -0
- agno/utils/team.py +139 -0
- agno/utils/timer.py +41 -0
- agno/utils/tools.py +102 -0
- agno/utils/web.py +23 -0
- agno/utils/whatsapp.py +305 -0
- agno/utils/yaml_io.py +25 -0
- agno/vectordb/__init__.py +3 -0
- agno/vectordb/base.py +127 -0
- agno/vectordb/cassandra/__init__.py +5 -0
- agno/vectordb/cassandra/cassandra.py +501 -0
- agno/vectordb/cassandra/extra_param_mixin.py +11 -0
- agno/vectordb/cassandra/index.py +13 -0
- agno/vectordb/chroma/__init__.py +5 -0
- agno/vectordb/chroma/chromadb.py +929 -0
- agno/vectordb/clickhouse/__init__.py +9 -0
- agno/vectordb/clickhouse/clickhousedb.py +835 -0
- agno/vectordb/clickhouse/index.py +9 -0
- agno/vectordb/couchbase/__init__.py +3 -0
- agno/vectordb/couchbase/couchbase.py +1442 -0
- agno/vectordb/distance.py +7 -0
- agno/vectordb/lancedb/__init__.py +6 -0
- agno/vectordb/lancedb/lance_db.py +995 -0
- agno/vectordb/langchaindb/__init__.py +5 -0
- agno/vectordb/langchaindb/langchaindb.py +163 -0
- agno/vectordb/lightrag/__init__.py +5 -0
- agno/vectordb/lightrag/lightrag.py +388 -0
- agno/vectordb/llamaindex/__init__.py +3 -0
- agno/vectordb/llamaindex/llamaindexdb.py +166 -0
- agno/vectordb/milvus/__init__.py +4 -0
- agno/vectordb/milvus/milvus.py +1182 -0
- agno/vectordb/mongodb/__init__.py +9 -0
- agno/vectordb/mongodb/mongodb.py +1417 -0
- agno/vectordb/pgvector/__init__.py +12 -0
- agno/vectordb/pgvector/index.py +23 -0
- agno/vectordb/pgvector/pgvector.py +1462 -0
- agno/vectordb/pineconedb/__init__.py +5 -0
- agno/vectordb/pineconedb/pineconedb.py +747 -0
- agno/vectordb/qdrant/__init__.py +5 -0
- agno/vectordb/qdrant/qdrant.py +1134 -0
- agno/vectordb/redis/__init__.py +9 -0
- agno/vectordb/redis/redisdb.py +694 -0
- agno/vectordb/search.py +7 -0
- agno/vectordb/singlestore/__init__.py +10 -0
- agno/vectordb/singlestore/index.py +41 -0
- agno/vectordb/singlestore/singlestore.py +763 -0
- agno/vectordb/surrealdb/__init__.py +3 -0
- agno/vectordb/surrealdb/surrealdb.py +699 -0
- agno/vectordb/upstashdb/__init__.py +5 -0
- agno/vectordb/upstashdb/upstashdb.py +718 -0
- agno/vectordb/weaviate/__init__.py +8 -0
- agno/vectordb/weaviate/index.py +15 -0
- agno/vectordb/weaviate/weaviate.py +1005 -0
- agno/workflow/__init__.py +23 -0
- agno/workflow/agent.py +299 -0
- agno/workflow/condition.py +738 -0
- agno/workflow/loop.py +735 -0
- agno/workflow/parallel.py +824 -0
- agno/workflow/router.py +702 -0
- agno/workflow/step.py +1432 -0
- agno/workflow/steps.py +592 -0
- agno/workflow/types.py +520 -0
- agno/workflow/workflow.py +4321 -0
- agno-2.2.13.dist-info/METADATA +614 -0
- agno-2.2.13.dist-info/RECORD +575 -0
- agno-2.2.13.dist-info/WHEEL +5 -0
- agno-2.2.13.dist-info/licenses/LICENSE +201 -0
- agno-2.2.13.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,835 @@
|
|
|
1
|
+
import asyncio
|
|
2
|
+
from hashlib import md5
|
|
3
|
+
from typing import Any, Dict, List, Optional, Union
|
|
4
|
+
|
|
5
|
+
from agno.vectordb.clickhouse.index import HNSW
|
|
6
|
+
|
|
7
|
+
try:
|
|
8
|
+
import clickhouse_connect
|
|
9
|
+
import clickhouse_connect.driver.asyncclient
|
|
10
|
+
import clickhouse_connect.driver.client
|
|
11
|
+
except ImportError:
|
|
12
|
+
raise ImportError("`clickhouse-connect` not installed. Use `pip install clickhouse-connect` to install it")
|
|
13
|
+
|
|
14
|
+
from agno.filters import FilterExpr
|
|
15
|
+
from agno.knowledge.document import Document
|
|
16
|
+
from agno.knowledge.embedder import Embedder
|
|
17
|
+
from agno.utils.log import log_debug, log_info, log_warning, logger
|
|
18
|
+
from agno.vectordb.base import VectorDb
|
|
19
|
+
from agno.vectordb.distance import Distance
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
class Clickhouse(VectorDb):
|
|
23
|
+
def __init__(
|
|
24
|
+
self,
|
|
25
|
+
table_name: str,
|
|
26
|
+
host: str,
|
|
27
|
+
name: Optional[str] = None,
|
|
28
|
+
description: Optional[str] = None,
|
|
29
|
+
username: Optional[str] = None,
|
|
30
|
+
password: str = "",
|
|
31
|
+
port: int = 0,
|
|
32
|
+
database_name: str = "ai",
|
|
33
|
+
dsn: Optional[str] = None,
|
|
34
|
+
compress: str = "lz4",
|
|
35
|
+
client: Optional[clickhouse_connect.driver.client.Client] = None,
|
|
36
|
+
asyncclient: Optional[clickhouse_connect.driver.asyncclient.AsyncClient] = None,
|
|
37
|
+
embedder: Optional[Embedder] = None,
|
|
38
|
+
distance: Distance = Distance.cosine,
|
|
39
|
+
index: Optional[HNSW] = HNSW(),
|
|
40
|
+
):
|
|
41
|
+
# Store connection parameters as instance attributes
|
|
42
|
+
self.host = host
|
|
43
|
+
self.username = username
|
|
44
|
+
self.password = password
|
|
45
|
+
self.port = port
|
|
46
|
+
self.dsn = dsn
|
|
47
|
+
# Initialize base class with name and description
|
|
48
|
+
super().__init__(name=name, description=description)
|
|
49
|
+
|
|
50
|
+
self.compress = compress
|
|
51
|
+
self.database_name = database_name
|
|
52
|
+
if not client:
|
|
53
|
+
client = clickhouse_connect.get_client(
|
|
54
|
+
host=self.host,
|
|
55
|
+
username=self.username, # type: ignore
|
|
56
|
+
password=self.password,
|
|
57
|
+
database=self.database_name,
|
|
58
|
+
port=self.port,
|
|
59
|
+
dsn=self.dsn,
|
|
60
|
+
compress=self.compress,
|
|
61
|
+
)
|
|
62
|
+
|
|
63
|
+
# Database attributes
|
|
64
|
+
self.client = client
|
|
65
|
+
self.async_client = asyncclient
|
|
66
|
+
self.table_name = table_name
|
|
67
|
+
|
|
68
|
+
# Embedder for embedding the document contents
|
|
69
|
+
_embedder = embedder
|
|
70
|
+
if _embedder is None:
|
|
71
|
+
from agno.knowledge.embedder.openai import OpenAIEmbedder
|
|
72
|
+
|
|
73
|
+
_embedder = OpenAIEmbedder()
|
|
74
|
+
log_info("Embedder not provided, using OpenAIEmbedder as default.")
|
|
75
|
+
self.embedder: Embedder = _embedder
|
|
76
|
+
self.dimensions: Optional[int] = self.embedder.dimensions
|
|
77
|
+
|
|
78
|
+
# Distance metric
|
|
79
|
+
self.distance: Distance = distance
|
|
80
|
+
|
|
81
|
+
# Index for the collection
|
|
82
|
+
self.index: Optional[HNSW] = index
|
|
83
|
+
|
|
84
|
+
async def _ensure_async_client(self):
|
|
85
|
+
"""Ensure we have an initialized async client."""
|
|
86
|
+
if self.async_client is None:
|
|
87
|
+
self.async_client = await clickhouse_connect.get_async_client(
|
|
88
|
+
host=self.host,
|
|
89
|
+
username=self.username, # type: ignore
|
|
90
|
+
password=self.password,
|
|
91
|
+
database=self.database_name,
|
|
92
|
+
port=self.port,
|
|
93
|
+
dsn=self.dsn,
|
|
94
|
+
compress=self.compress,
|
|
95
|
+
settings={"allow_experimental_vector_similarity_index": 1},
|
|
96
|
+
)
|
|
97
|
+
return self.async_client
|
|
98
|
+
|
|
99
|
+
def _get_base_parameters(self) -> Dict[str, Any]:
|
|
100
|
+
return {
|
|
101
|
+
"table_name": self.table_name,
|
|
102
|
+
"database_name": self.database_name,
|
|
103
|
+
}
|
|
104
|
+
|
|
105
|
+
def table_exists(self) -> bool:
|
|
106
|
+
log_debug(f"Checking if table exists: {self.table_name}")
|
|
107
|
+
try:
|
|
108
|
+
parameters = self._get_base_parameters()
|
|
109
|
+
return bool(
|
|
110
|
+
self.client.command(
|
|
111
|
+
"EXISTS TABLE {database_name:Identifier}.{table_name:Identifier}",
|
|
112
|
+
parameters=parameters,
|
|
113
|
+
)
|
|
114
|
+
)
|
|
115
|
+
except Exception as e:
|
|
116
|
+
logger.error(e)
|
|
117
|
+
return False
|
|
118
|
+
|
|
119
|
+
async def async_table_exists(self) -> bool:
|
|
120
|
+
"""Check if a table exists asynchronously."""
|
|
121
|
+
log_debug(f"Async checking if table exists: {self.table_name}")
|
|
122
|
+
try:
|
|
123
|
+
async_client = await self._ensure_async_client()
|
|
124
|
+
|
|
125
|
+
parameters = self._get_base_parameters()
|
|
126
|
+
result = await async_client.command(
|
|
127
|
+
"EXISTS TABLE {database_name:Identifier}.{table_name:Identifier}",
|
|
128
|
+
parameters=parameters,
|
|
129
|
+
)
|
|
130
|
+
return bool(result)
|
|
131
|
+
except Exception as e:
|
|
132
|
+
logger.error(f"Async error checking if table exists: {e}")
|
|
133
|
+
return False
|
|
134
|
+
|
|
135
|
+
def create(self) -> None:
|
|
136
|
+
if not self.table_exists():
|
|
137
|
+
log_debug(f"Creating Database: {self.database_name}")
|
|
138
|
+
parameters = {"database_name": self.database_name}
|
|
139
|
+
self.client.command(
|
|
140
|
+
"CREATE DATABASE IF NOT EXISTS {database_name:Identifier}",
|
|
141
|
+
parameters=parameters,
|
|
142
|
+
)
|
|
143
|
+
|
|
144
|
+
log_debug(f"Creating table: {self.table_name}")
|
|
145
|
+
|
|
146
|
+
parameters = self._get_base_parameters()
|
|
147
|
+
|
|
148
|
+
if isinstance(self.index, HNSW):
|
|
149
|
+
index = (
|
|
150
|
+
f"INDEX embedding_index embedding TYPE vector_similarity('hnsw', 'L2Distance', {self.embedder.dimensions}, {self.index.quantization}, "
|
|
151
|
+
f"{self.index.hnsw_max_connections_per_layer}, {self.index.hnsw_candidate_list_size_for_construction})"
|
|
152
|
+
)
|
|
153
|
+
self.client.command("SET allow_experimental_vector_similarity_index = 1")
|
|
154
|
+
else:
|
|
155
|
+
raise NotImplementedError(f"Not implemented index {type(self.index)!r} is passed")
|
|
156
|
+
|
|
157
|
+
self.client.command("SET enable_json_type = 1")
|
|
158
|
+
|
|
159
|
+
self.client.command(
|
|
160
|
+
f"""CREATE TABLE IF NOT EXISTS {{database_name:Identifier}}.{{table_name:Identifier}}
|
|
161
|
+
(
|
|
162
|
+
id String,
|
|
163
|
+
name String,
|
|
164
|
+
meta_data JSON DEFAULT '{{}}',
|
|
165
|
+
filters JSON DEFAULT '{{}}',
|
|
166
|
+
content String,
|
|
167
|
+
content_id String,
|
|
168
|
+
embedding Array(Float32),
|
|
169
|
+
usage JSON,
|
|
170
|
+
created_at DateTime('UTC') DEFAULT now(),
|
|
171
|
+
content_hash String,
|
|
172
|
+
PRIMARY KEY (id),
|
|
173
|
+
{index}
|
|
174
|
+
) ENGINE = ReplacingMergeTree ORDER BY id""",
|
|
175
|
+
parameters=parameters,
|
|
176
|
+
)
|
|
177
|
+
|
|
178
|
+
async def async_create(self) -> None:
|
|
179
|
+
"""Create database and table asynchronously."""
|
|
180
|
+
if not await self.async_table_exists():
|
|
181
|
+
log_debug(f"Async creating Database: {self.database_name}")
|
|
182
|
+
async_client = await self._ensure_async_client()
|
|
183
|
+
|
|
184
|
+
parameters = {"database_name": self.database_name}
|
|
185
|
+
await async_client.command(
|
|
186
|
+
"CREATE DATABASE IF NOT EXISTS {database_name:Identifier}",
|
|
187
|
+
parameters=parameters,
|
|
188
|
+
)
|
|
189
|
+
|
|
190
|
+
log_debug(f"Async creating table: {self.table_name}")
|
|
191
|
+
parameters = self._get_base_parameters()
|
|
192
|
+
|
|
193
|
+
if isinstance(self.index, HNSW):
|
|
194
|
+
index = (
|
|
195
|
+
f"INDEX embedding_index embedding TYPE vector_similarity('hnsw', 'L2Distance', {self.index.quantization}, "
|
|
196
|
+
f"{self.index.hnsw_max_connections_per_layer}, {self.index.hnsw_candidate_list_size_for_construction})"
|
|
197
|
+
)
|
|
198
|
+
await async_client.command("SET allow_experimental_vector_similarity_index = 1")
|
|
199
|
+
else:
|
|
200
|
+
raise NotImplementedError(f"Not implemented index {type(self.index)!r} is passed")
|
|
201
|
+
|
|
202
|
+
await self.async_client.command("SET enable_json_type = 1") # type: ignore
|
|
203
|
+
|
|
204
|
+
await self.async_client.command( # type: ignore
|
|
205
|
+
f"""CREATE TABLE IF NOT EXISTS {{database_name:Identifier}}.{{table_name:Identifier}}
|
|
206
|
+
(
|
|
207
|
+
id String,
|
|
208
|
+
name String,
|
|
209
|
+
meta_data JSON DEFAULT '{{}}',
|
|
210
|
+
filters JSON DEFAULT '{{}}',
|
|
211
|
+
content String,
|
|
212
|
+
content_id String,
|
|
213
|
+
embedding Array(Float32),
|
|
214
|
+
usage JSON,
|
|
215
|
+
created_at DateTime('UTC') DEFAULT now(),
|
|
216
|
+
content_hash String,
|
|
217
|
+
PRIMARY KEY (id),
|
|
218
|
+
{index}
|
|
219
|
+
) ENGINE = ReplacingMergeTree ORDER BY id""",
|
|
220
|
+
parameters=parameters,
|
|
221
|
+
)
|
|
222
|
+
|
|
223
|
+
def name_exists(self, name: str) -> bool:
|
|
224
|
+
"""
|
|
225
|
+
Validate if a row with this name exists or not
|
|
226
|
+
|
|
227
|
+
Args:
|
|
228
|
+
name (str): Name to check
|
|
229
|
+
"""
|
|
230
|
+
parameters = self._get_base_parameters()
|
|
231
|
+
parameters["name"] = name
|
|
232
|
+
|
|
233
|
+
result = self.client.query(
|
|
234
|
+
"SELECT name FROM {database_name:Identifier}.{table_name:Identifier} WHERE name = {name:String}",
|
|
235
|
+
parameters=parameters,
|
|
236
|
+
)
|
|
237
|
+
return len(result.result_rows) > 0 if result.result_rows else False
|
|
238
|
+
|
|
239
|
+
async def async_name_exists(self, name: str) -> bool:
|
|
240
|
+
"""Check if a document with given name exists asynchronously."""
|
|
241
|
+
parameters = self._get_base_parameters()
|
|
242
|
+
async_client = await self._ensure_async_client()
|
|
243
|
+
|
|
244
|
+
parameters["name"] = name
|
|
245
|
+
|
|
246
|
+
result = await async_client.query(
|
|
247
|
+
"SELECT name FROM {database_name:Identifier}.{table_name:Identifier} WHERE name = {name:String}",
|
|
248
|
+
parameters=parameters,
|
|
249
|
+
)
|
|
250
|
+
return len(result.result_rows) > 0 if result.result_rows else False
|
|
251
|
+
|
|
252
|
+
def id_exists(self, id: str) -> bool:
|
|
253
|
+
"""
|
|
254
|
+
Validate if a row with this id exists or not
|
|
255
|
+
|
|
256
|
+
Args:
|
|
257
|
+
id (str): Id to check
|
|
258
|
+
"""
|
|
259
|
+
parameters = self._get_base_parameters()
|
|
260
|
+
parameters["id"] = id
|
|
261
|
+
|
|
262
|
+
result = self.client.query(
|
|
263
|
+
"SELECT id FROM {database_name:Identifier}.{table_name:Identifier} WHERE id = {id:String}",
|
|
264
|
+
parameters=parameters,
|
|
265
|
+
)
|
|
266
|
+
return len(result.result_rows) > 0 if result.result_rows else False
|
|
267
|
+
|
|
268
|
+
def insert(
|
|
269
|
+
self,
|
|
270
|
+
content_hash: str,
|
|
271
|
+
documents: List[Document],
|
|
272
|
+
filters: Optional[Dict[str, Any]] = None,
|
|
273
|
+
) -> None:
|
|
274
|
+
rows: List[List[Any]] = []
|
|
275
|
+
for document in documents:
|
|
276
|
+
document.embed(embedder=self.embedder)
|
|
277
|
+
cleaned_content = document.content.replace("\x00", "\ufffd")
|
|
278
|
+
_id = md5(cleaned_content.encode()).hexdigest()
|
|
279
|
+
|
|
280
|
+
row: List[Any] = [
|
|
281
|
+
_id,
|
|
282
|
+
document.name,
|
|
283
|
+
document.meta_data,
|
|
284
|
+
filters,
|
|
285
|
+
cleaned_content,
|
|
286
|
+
document.content_id,
|
|
287
|
+
document.embedding,
|
|
288
|
+
document.usage,
|
|
289
|
+
content_hash,
|
|
290
|
+
]
|
|
291
|
+
rows.append(row)
|
|
292
|
+
|
|
293
|
+
self.client.insert(
|
|
294
|
+
f"{self.database_name}.{self.table_name}",
|
|
295
|
+
rows,
|
|
296
|
+
column_names=[
|
|
297
|
+
"id",
|
|
298
|
+
"name",
|
|
299
|
+
"meta_data",
|
|
300
|
+
"filters",
|
|
301
|
+
"content",
|
|
302
|
+
"content_id",
|
|
303
|
+
"embedding",
|
|
304
|
+
"usage",
|
|
305
|
+
"content_hash",
|
|
306
|
+
],
|
|
307
|
+
)
|
|
308
|
+
log_debug(f"Inserted {len(documents)} documents")
|
|
309
|
+
|
|
310
|
+
async def async_insert(
|
|
311
|
+
self, content_hash: str, documents: List[Document], filters: Optional[Dict[str, Any]] = None
|
|
312
|
+
) -> None:
|
|
313
|
+
"""Insert documents asynchronously."""
|
|
314
|
+
rows: List[List[Any]] = []
|
|
315
|
+
async_client = await self._ensure_async_client()
|
|
316
|
+
|
|
317
|
+
if self.embedder.enable_batch and hasattr(self.embedder, "async_get_embeddings_batch_and_usage"):
|
|
318
|
+
# Use batch embedding when enabled and supported
|
|
319
|
+
try:
|
|
320
|
+
# Extract content from all documents
|
|
321
|
+
doc_contents = [doc.content for doc in documents]
|
|
322
|
+
|
|
323
|
+
# Get batch embeddings and usage
|
|
324
|
+
embeddings, usages = await self.embedder.async_get_embeddings_batch_and_usage(doc_contents)
|
|
325
|
+
|
|
326
|
+
# Process documents with pre-computed embeddings
|
|
327
|
+
for j, doc in enumerate(documents):
|
|
328
|
+
try:
|
|
329
|
+
if j < len(embeddings):
|
|
330
|
+
doc.embedding = embeddings[j]
|
|
331
|
+
doc.usage = usages[j] if j < len(usages) else None
|
|
332
|
+
except Exception as e:
|
|
333
|
+
logger.error(f"Error assigning batch embedding to document '{doc.name}': {e}")
|
|
334
|
+
|
|
335
|
+
except Exception as e:
|
|
336
|
+
# Check if this is a rate limit error - don't fall back as it would make things worse
|
|
337
|
+
error_str = str(e).lower()
|
|
338
|
+
is_rate_limit = any(
|
|
339
|
+
phrase in error_str
|
|
340
|
+
for phrase in ["rate limit", "too many requests", "429", "trial key", "api calls / minute"]
|
|
341
|
+
)
|
|
342
|
+
|
|
343
|
+
if is_rate_limit:
|
|
344
|
+
logger.error(f"Rate limit detected during batch embedding. {e}")
|
|
345
|
+
raise e
|
|
346
|
+
else:
|
|
347
|
+
logger.warning(f"Async batch embedding failed, falling back to individual embeddings: {e}")
|
|
348
|
+
# Fall back to individual embedding
|
|
349
|
+
embed_tasks = [doc.async_embed(embedder=self.embedder) for doc in documents]
|
|
350
|
+
await asyncio.gather(*embed_tasks, return_exceptions=True)
|
|
351
|
+
else:
|
|
352
|
+
# Use individual embedding
|
|
353
|
+
embed_tasks = [document.async_embed(embedder=self.embedder) for document in documents]
|
|
354
|
+
await asyncio.gather(*embed_tasks, return_exceptions=True)
|
|
355
|
+
|
|
356
|
+
for document in documents:
|
|
357
|
+
cleaned_content = document.content.replace("\x00", "\ufffd")
|
|
358
|
+
_id = md5(cleaned_content.encode()).hexdigest()
|
|
359
|
+
|
|
360
|
+
row: List[Any] = [
|
|
361
|
+
_id,
|
|
362
|
+
document.name,
|
|
363
|
+
document.meta_data,
|
|
364
|
+
filters,
|
|
365
|
+
cleaned_content,
|
|
366
|
+
document.content_id,
|
|
367
|
+
document.embedding,
|
|
368
|
+
document.usage,
|
|
369
|
+
content_hash,
|
|
370
|
+
]
|
|
371
|
+
rows.append(row)
|
|
372
|
+
|
|
373
|
+
await async_client.insert(
|
|
374
|
+
f"{self.database_name}.{self.table_name}",
|
|
375
|
+
rows,
|
|
376
|
+
column_names=[
|
|
377
|
+
"id",
|
|
378
|
+
"name",
|
|
379
|
+
"meta_data",
|
|
380
|
+
"filters",
|
|
381
|
+
"content",
|
|
382
|
+
"content_id",
|
|
383
|
+
"embedding",
|
|
384
|
+
"usage",
|
|
385
|
+
"content_hash",
|
|
386
|
+
],
|
|
387
|
+
)
|
|
388
|
+
log_debug(f"Async inserted {len(documents)} documents")
|
|
389
|
+
|
|
390
|
+
def upsert_available(self) -> bool:
|
|
391
|
+
return True
|
|
392
|
+
|
|
393
|
+
def upsert(
|
|
394
|
+
self,
|
|
395
|
+
content_hash: str,
|
|
396
|
+
documents: List[Document],
|
|
397
|
+
filters: Optional[Dict[str, Any]] = None,
|
|
398
|
+
) -> None:
|
|
399
|
+
"""
|
|
400
|
+
Upsert documents into the database.
|
|
401
|
+
"""
|
|
402
|
+
if self.content_hash_exists(content_hash):
|
|
403
|
+
self._delete_by_content_hash(content_hash)
|
|
404
|
+
self.insert(content_hash=content_hash, documents=documents, filters=filters)
|
|
405
|
+
|
|
406
|
+
def _upsert(
|
|
407
|
+
self,
|
|
408
|
+
content_hash: str,
|
|
409
|
+
documents: List[Document],
|
|
410
|
+
filters: Optional[Dict[str, Any]] = None,
|
|
411
|
+
) -> None:
|
|
412
|
+
"""
|
|
413
|
+
Upsert documents into the database.
|
|
414
|
+
|
|
415
|
+
Args:
|
|
416
|
+
documents (List[Document]): List of documents to upsert
|
|
417
|
+
filters (Optional[Dict[str, Any]]): Filters to apply while upserting documents
|
|
418
|
+
batch_size (int): Batch size for upserting documents
|
|
419
|
+
"""
|
|
420
|
+
# We are using ReplacingMergeTree engine in our table, so we need to insert the documents,
|
|
421
|
+
# then call SELECT with FINAL
|
|
422
|
+
self.insert(content_hash=content_hash, documents=documents, filters=filters)
|
|
423
|
+
|
|
424
|
+
parameters = self._get_base_parameters()
|
|
425
|
+
self.client.query(
|
|
426
|
+
"SELECT id FROM {database_name:Identifier}.{table_name:Identifier} FINAL",
|
|
427
|
+
parameters=parameters,
|
|
428
|
+
)
|
|
429
|
+
|
|
430
|
+
async def async_upsert(
|
|
431
|
+
self, content_hash: str, documents: List[Document], filters: Optional[Dict[str, Any]] = None
|
|
432
|
+
) -> None:
|
|
433
|
+
"""Upsert documents asynchronously."""
|
|
434
|
+
if self.content_hash_exists(content_hash):
|
|
435
|
+
self._delete_by_content_hash(content_hash)
|
|
436
|
+
await self._async_upsert(content_hash=content_hash, documents=documents, filters=filters)
|
|
437
|
+
|
|
438
|
+
async def _async_upsert(
|
|
439
|
+
self, content_hash: str, documents: List[Document], filters: Optional[Dict[str, Any]] = None
|
|
440
|
+
) -> None:
|
|
441
|
+
"""Upsert documents asynchronously."""
|
|
442
|
+
# We are using ReplacingMergeTree engine in our table, so we need to insert the documents,
|
|
443
|
+
# then call SELECT with FINAL
|
|
444
|
+
await self.async_insert(content_hash=content_hash, documents=documents, filters=filters)
|
|
445
|
+
|
|
446
|
+
parameters = self._get_base_parameters()
|
|
447
|
+
await self.async_client.query( # type: ignore
|
|
448
|
+
"SELECT id FROM {database_name:Identifier}.{table_name:Identifier} FINAL",
|
|
449
|
+
parameters=parameters,
|
|
450
|
+
)
|
|
451
|
+
|
|
452
|
+
def search(
|
|
453
|
+
self, query: str, limit: int = 5, filters: Optional[Union[Dict[str, Any], List[FilterExpr]]] = None
|
|
454
|
+
) -> List[Document]:
|
|
455
|
+
if filters is not None:
|
|
456
|
+
log_warning("Filters are not yet supported in Clickhouse. No filters will be applied.")
|
|
457
|
+
query_embedding = self.embedder.get_embedding(query)
|
|
458
|
+
if query_embedding is None:
|
|
459
|
+
logger.error(f"Error getting embedding for Query: {query}")
|
|
460
|
+
return []
|
|
461
|
+
|
|
462
|
+
parameters = self._get_base_parameters()
|
|
463
|
+
where_query = ""
|
|
464
|
+
|
|
465
|
+
order_by_query = ""
|
|
466
|
+
if self.distance == Distance.l2 or self.distance == Distance.max_inner_product:
|
|
467
|
+
order_by_query = "ORDER BY L2Distance(embedding, {query_embedding:Array(Float32)})"
|
|
468
|
+
parameters["query_embedding"] = query_embedding
|
|
469
|
+
if self.distance == Distance.cosine:
|
|
470
|
+
order_by_query = "ORDER BY cosineDistance(embedding, {query_embedding:Array(Float32)})"
|
|
471
|
+
parameters["query_embedding"] = query_embedding
|
|
472
|
+
|
|
473
|
+
clickhouse_query = (
|
|
474
|
+
"SELECT name, meta_data, content, content_id, embedding, usage FROM "
|
|
475
|
+
"{database_name:Identifier}.{table_name:Identifier} "
|
|
476
|
+
f"{where_query} {order_by_query} LIMIT {limit}"
|
|
477
|
+
)
|
|
478
|
+
log_debug(f"Query: {clickhouse_query}")
|
|
479
|
+
log_debug(f"Params: {parameters}")
|
|
480
|
+
|
|
481
|
+
try:
|
|
482
|
+
results = self.client.query(
|
|
483
|
+
clickhouse_query,
|
|
484
|
+
parameters=parameters,
|
|
485
|
+
)
|
|
486
|
+
except Exception as e:
|
|
487
|
+
logger.error(f"Error searching for documents: {e}")
|
|
488
|
+
logger.error("Table might not exist, creating for future use")
|
|
489
|
+
self.create()
|
|
490
|
+
return []
|
|
491
|
+
|
|
492
|
+
# Build search results
|
|
493
|
+
search_results: List[Document] = []
|
|
494
|
+
for result in results.result_rows:
|
|
495
|
+
search_results.append(
|
|
496
|
+
Document(
|
|
497
|
+
name=result[0],
|
|
498
|
+
meta_data=result[1],
|
|
499
|
+
content=result[2],
|
|
500
|
+
content_id=result[3],
|
|
501
|
+
embedder=self.embedder,
|
|
502
|
+
embedding=result[4],
|
|
503
|
+
usage=result[5],
|
|
504
|
+
)
|
|
505
|
+
)
|
|
506
|
+
|
|
507
|
+
return search_results
|
|
508
|
+
|
|
509
|
+
async def async_search(
|
|
510
|
+
self, query: str, limit: int = 5, filters: Optional[Union[Dict[str, Any], List[FilterExpr]]] = None
|
|
511
|
+
) -> List[Document]:
|
|
512
|
+
"""Search for documents asynchronously."""
|
|
513
|
+
async_client = await self._ensure_async_client()
|
|
514
|
+
|
|
515
|
+
if filters is not None:
|
|
516
|
+
log_warning("Filters are not yet supported in Clickhouse. No filters will be applied.")
|
|
517
|
+
|
|
518
|
+
query_embedding = self.embedder.get_embedding(query)
|
|
519
|
+
if query_embedding is None:
|
|
520
|
+
logger.error(f"Error getting embedding for Query: {query}")
|
|
521
|
+
return []
|
|
522
|
+
|
|
523
|
+
parameters = self._get_base_parameters()
|
|
524
|
+
where_query = ""
|
|
525
|
+
|
|
526
|
+
order_by_query = ""
|
|
527
|
+
if self.distance == Distance.l2 or self.distance == Distance.max_inner_product:
|
|
528
|
+
order_by_query = "ORDER BY L2Distance(embedding, {query_embedding:Array(Float32)})"
|
|
529
|
+
parameters["query_embedding"] = query_embedding
|
|
530
|
+
if self.distance == Distance.cosine:
|
|
531
|
+
order_by_query = "ORDER BY cosineDistance(embedding, {query_embedding:Array(Float32)})"
|
|
532
|
+
parameters["query_embedding"] = query_embedding
|
|
533
|
+
|
|
534
|
+
clickhouse_query = (
|
|
535
|
+
"SELECT name, meta_data, content, content_id, embedding, usage FROM "
|
|
536
|
+
"{database_name:Identifier}.{table_name:Identifier} "
|
|
537
|
+
f"{where_query} {order_by_query} LIMIT {limit}"
|
|
538
|
+
)
|
|
539
|
+
log_debug(f"Async Query: {clickhouse_query}")
|
|
540
|
+
log_debug(f"Async Params: {parameters}")
|
|
541
|
+
|
|
542
|
+
try:
|
|
543
|
+
results = await async_client.query(
|
|
544
|
+
clickhouse_query,
|
|
545
|
+
parameters=parameters,
|
|
546
|
+
)
|
|
547
|
+
except Exception as e:
|
|
548
|
+
logger.error(f"Async error searching for documents: {e}")
|
|
549
|
+
logger.error("Table might not exist, creating for future use")
|
|
550
|
+
await self.async_create()
|
|
551
|
+
return []
|
|
552
|
+
|
|
553
|
+
# Build search results
|
|
554
|
+
search_results: List[Document] = []
|
|
555
|
+
for result in results.result_rows:
|
|
556
|
+
search_results.append(
|
|
557
|
+
Document(
|
|
558
|
+
name=result[0],
|
|
559
|
+
meta_data=result[1],
|
|
560
|
+
content=result[2],
|
|
561
|
+
content_id=result[3],
|
|
562
|
+
embedder=self.embedder,
|
|
563
|
+
embedding=result[4],
|
|
564
|
+
usage=result[5],
|
|
565
|
+
)
|
|
566
|
+
)
|
|
567
|
+
|
|
568
|
+
return search_results
|
|
569
|
+
|
|
570
|
+
def drop(self) -> None:
|
|
571
|
+
if self.table_exists():
|
|
572
|
+
log_debug(f"Deleting table: {self.table_name}")
|
|
573
|
+
parameters = self._get_base_parameters()
|
|
574
|
+
self.client.command(
|
|
575
|
+
"DROP TABLE {database_name:Identifier}.{table_name:Identifier}",
|
|
576
|
+
parameters=parameters,
|
|
577
|
+
)
|
|
578
|
+
|
|
579
|
+
async def async_drop(self) -> None:
|
|
580
|
+
"""Drop the table asynchronously."""
|
|
581
|
+
if await self.async_exists():
|
|
582
|
+
log_debug(f"Async dropping table: {self.table_name}")
|
|
583
|
+
parameters = self._get_base_parameters()
|
|
584
|
+
await self.async_client.command( # type: ignore
|
|
585
|
+
"DROP TABLE {database_name:Identifier}.{table_name:Identifier}",
|
|
586
|
+
parameters=parameters,
|
|
587
|
+
)
|
|
588
|
+
|
|
589
|
+
def exists(self) -> bool:
|
|
590
|
+
return self.table_exists()
|
|
591
|
+
|
|
592
|
+
async def async_exists(self) -> bool:
|
|
593
|
+
return await self.async_table_exists()
|
|
594
|
+
|
|
595
|
+
def get_count(self) -> int:
|
|
596
|
+
parameters = self._get_base_parameters()
|
|
597
|
+
result = self.client.query(
|
|
598
|
+
"SELECT count(*) FROM {database_name:Identifier}.{table_name:Identifier}",
|
|
599
|
+
parameters=parameters,
|
|
600
|
+
)
|
|
601
|
+
|
|
602
|
+
if result.first_row:
|
|
603
|
+
return int(result.first_row[0])
|
|
604
|
+
return 0
|
|
605
|
+
|
|
606
|
+
def optimize(self) -> None:
|
|
607
|
+
log_debug("==== No need to optimize Clickhouse DB. Skipping this step ====")
|
|
608
|
+
|
|
609
|
+
def delete(self) -> bool:
|
|
610
|
+
parameters = self._get_base_parameters()
|
|
611
|
+
self.client.command(
|
|
612
|
+
"DELETE FROM {database_name:Identifier}.{table_name:Identifier}",
|
|
613
|
+
parameters=parameters,
|
|
614
|
+
)
|
|
615
|
+
return True
|
|
616
|
+
|
|
617
|
+
def delete_by_id(self, id: str) -> bool:
|
|
618
|
+
"""
|
|
619
|
+
|
|
620
|
+
Delete a document by its ID.
|
|
621
|
+
|
|
622
|
+
Args:
|
|
623
|
+
id (str): The document ID to delete
|
|
624
|
+
|
|
625
|
+
Returns:
|
|
626
|
+
bool: True if document was deleted, False otherwise
|
|
627
|
+
"""
|
|
628
|
+
try:
|
|
629
|
+
log_debug(f"ClickHouse VectorDB : Deleting document with ID {id}")
|
|
630
|
+
if not self.id_exists(id):
|
|
631
|
+
return False
|
|
632
|
+
|
|
633
|
+
parameters = self._get_base_parameters()
|
|
634
|
+
parameters["id"] = id
|
|
635
|
+
|
|
636
|
+
self.client.command(
|
|
637
|
+
"DELETE FROM {database_name:Identifier}.{table_name:Identifier} WHERE id = {id:String}",
|
|
638
|
+
parameters=parameters,
|
|
639
|
+
)
|
|
640
|
+
return True
|
|
641
|
+
except Exception as e:
|
|
642
|
+
log_info(f"Error deleting document with ID {id}: {e}")
|
|
643
|
+
return False
|
|
644
|
+
|
|
645
|
+
def delete_by_name(self, name: str) -> bool:
|
|
646
|
+
"""
|
|
647
|
+
Delete documents by name.
|
|
648
|
+
|
|
649
|
+
Args:
|
|
650
|
+
name (str): The document name to delete
|
|
651
|
+
|
|
652
|
+
Returns:
|
|
653
|
+
bool: True if documents were deleted, False otherwise
|
|
654
|
+
"""
|
|
655
|
+
try:
|
|
656
|
+
log_debug(f"ClickHouse VectorDB : Deleting documents with name {name}")
|
|
657
|
+
if not self.name_exists(name):
|
|
658
|
+
return False
|
|
659
|
+
|
|
660
|
+
parameters = self._get_base_parameters()
|
|
661
|
+
parameters["name"] = name
|
|
662
|
+
|
|
663
|
+
self.client.command(
|
|
664
|
+
"DELETE FROM {database_name:Identifier}.{table_name:Identifier} WHERE name = {name:String}",
|
|
665
|
+
parameters=parameters,
|
|
666
|
+
)
|
|
667
|
+
return True
|
|
668
|
+
except Exception as e:
|
|
669
|
+
log_info(f"Error deleting documents with name {name}: {e}")
|
|
670
|
+
return False
|
|
671
|
+
|
|
672
|
+
def delete_by_metadata(self, metadata: Dict[str, Any]) -> bool:
|
|
673
|
+
"""
|
|
674
|
+
Delete documents by metadata.
|
|
675
|
+
|
|
676
|
+
Args:
|
|
677
|
+
metadata (Dict[str, Any]): The metadata to match for deletion
|
|
678
|
+
|
|
679
|
+
Returns:
|
|
680
|
+
bool: True if documents were deleted, False otherwise
|
|
681
|
+
"""
|
|
682
|
+
try:
|
|
683
|
+
log_debug(f"ClickHouse VectorDB : Deleting documents with metadata {metadata}")
|
|
684
|
+
parameters = self._get_base_parameters()
|
|
685
|
+
|
|
686
|
+
# Build WHERE clause for metadata matching using proper ClickHouse JSON syntax
|
|
687
|
+
where_conditions = []
|
|
688
|
+
for key, value in metadata.items():
|
|
689
|
+
if isinstance(value, bool):
|
|
690
|
+
where_conditions.append(f"JSONExtractBool(toString(filters), '{key}') = {str(value).lower()}")
|
|
691
|
+
elif isinstance(value, (int, float)):
|
|
692
|
+
where_conditions.append(f"JSONExtractFloat(toString(filters), '{key}') = {value}")
|
|
693
|
+
else:
|
|
694
|
+
where_conditions.append(f"JSONExtractString(toString(filters), '{key}') = '{value}'")
|
|
695
|
+
|
|
696
|
+
if not where_conditions:
|
|
697
|
+
return False
|
|
698
|
+
|
|
699
|
+
where_clause = " AND ".join(where_conditions)
|
|
700
|
+
|
|
701
|
+
self.client.command(
|
|
702
|
+
f"DELETE FROM {{database_name:Identifier}}.{{table_name:Identifier}} WHERE {where_clause}",
|
|
703
|
+
parameters=parameters,
|
|
704
|
+
)
|
|
705
|
+
return True
|
|
706
|
+
except Exception as e:
|
|
707
|
+
log_info(f"Error deleting documents with metadata {metadata}: {e}")
|
|
708
|
+
return False
|
|
709
|
+
|
|
710
|
+
def delete_by_content_id(self, content_id: str) -> bool:
|
|
711
|
+
"""
|
|
712
|
+
Delete documents by content ID.
|
|
713
|
+
|
|
714
|
+
Args:
|
|
715
|
+
content_id (str): The content ID to delete
|
|
716
|
+
|
|
717
|
+
Returns:
|
|
718
|
+
bool: True if documents were deleted, False otherwise
|
|
719
|
+
"""
|
|
720
|
+
try:
|
|
721
|
+
log_debug(f"ClickHouse VectorDB : Deleting documents with content_id {content_id}")
|
|
722
|
+
parameters = self._get_base_parameters()
|
|
723
|
+
parameters["content_id"] = content_id
|
|
724
|
+
|
|
725
|
+
self.client.command(
|
|
726
|
+
"DELETE FROM {database_name:Identifier}.{table_name:Identifier} WHERE content_id = {content_id:String}",
|
|
727
|
+
parameters=parameters,
|
|
728
|
+
)
|
|
729
|
+
return True
|
|
730
|
+
except Exception as e:
|
|
731
|
+
log_info(f"Error deleting documents with content_id {content_id}: {e}")
|
|
732
|
+
return False
|
|
733
|
+
|
|
734
|
+
def content_hash_exists(self, content_hash: str) -> bool:
|
|
735
|
+
"""
|
|
736
|
+
Validate if a row with this content_hash exists or not
|
|
737
|
+
|
|
738
|
+
Args:
|
|
739
|
+
content_hash (str): Content hash to check
|
|
740
|
+
"""
|
|
741
|
+
parameters = self._get_base_parameters()
|
|
742
|
+
parameters["content_hash"] = content_hash
|
|
743
|
+
|
|
744
|
+
result = self.client.query(
|
|
745
|
+
"SELECT content_hash FROM {database_name:Identifier}.{table_name:Identifier} WHERE content_hash = {content_hash:String}",
|
|
746
|
+
parameters=parameters,
|
|
747
|
+
)
|
|
748
|
+
return len(result.result_rows) > 0 if result.result_rows else False
|
|
749
|
+
|
|
750
|
+
def _delete_by_content_hash(self, content_hash: str) -> bool:
|
|
751
|
+
"""
|
|
752
|
+
Delete documents by content hash.
|
|
753
|
+
"""
|
|
754
|
+
try:
|
|
755
|
+
parameters = self._get_base_parameters()
|
|
756
|
+
parameters["content_hash"] = content_hash
|
|
757
|
+
|
|
758
|
+
self.client.command(
|
|
759
|
+
"DELETE FROM {database_name:Identifier}.{table_name:Identifier} WHERE content_hash = {content_hash:String}",
|
|
760
|
+
parameters=parameters,
|
|
761
|
+
)
|
|
762
|
+
return True
|
|
763
|
+
except Exception:
|
|
764
|
+
return False
|
|
765
|
+
|
|
766
|
+
def update_metadata(self, content_id: str, metadata: Dict[str, Any]) -> None:
|
|
767
|
+
"""
|
|
768
|
+
Update the metadata for documents with the given content_id.
|
|
769
|
+
|
|
770
|
+
Args:
|
|
771
|
+
content_id (str): The content ID to update
|
|
772
|
+
metadata (Dict[str, Any]): The metadata to update
|
|
773
|
+
"""
|
|
774
|
+
import json
|
|
775
|
+
|
|
776
|
+
try:
|
|
777
|
+
parameters = self._get_base_parameters()
|
|
778
|
+
parameters["content_id"] = content_id
|
|
779
|
+
|
|
780
|
+
# First, get existing documents with their current metadata and filters
|
|
781
|
+
result = self.client.query(
|
|
782
|
+
"SELECT id, meta_data, filters FROM {database_name:Identifier}.{table_name:Identifier} WHERE content_id = {content_id:String}",
|
|
783
|
+
parameters=parameters,
|
|
784
|
+
)
|
|
785
|
+
|
|
786
|
+
if not result.result_rows:
|
|
787
|
+
logger.debug(f"No documents found with content_id: {content_id}")
|
|
788
|
+
return
|
|
789
|
+
|
|
790
|
+
# Update each document
|
|
791
|
+
updated_count = 0
|
|
792
|
+
for row in result.result_rows:
|
|
793
|
+
doc_id, current_meta_json, current_filters_json = row
|
|
794
|
+
|
|
795
|
+
# Parse existing metadata
|
|
796
|
+
try:
|
|
797
|
+
current_metadata = json.loads(current_meta_json) if current_meta_json else {}
|
|
798
|
+
except (json.JSONDecodeError, TypeError):
|
|
799
|
+
current_metadata = {}
|
|
800
|
+
|
|
801
|
+
# Parse existing filters
|
|
802
|
+
try:
|
|
803
|
+
current_filters = json.loads(current_filters_json) if current_filters_json else {}
|
|
804
|
+
except (json.JSONDecodeError, TypeError):
|
|
805
|
+
current_filters = {}
|
|
806
|
+
|
|
807
|
+
# Merge existing metadata with new metadata
|
|
808
|
+
updated_metadata = current_metadata.copy()
|
|
809
|
+
updated_metadata.update(metadata)
|
|
810
|
+
|
|
811
|
+
# Merge existing filters with new metadata
|
|
812
|
+
updated_filters = current_filters.copy()
|
|
813
|
+
updated_filters.update(metadata)
|
|
814
|
+
|
|
815
|
+
# Update the document
|
|
816
|
+
update_params = parameters.copy()
|
|
817
|
+
update_params["doc_id"] = doc_id
|
|
818
|
+
update_params["metadata_json"] = json.dumps(updated_metadata)
|
|
819
|
+
update_params["filters_json"] = json.dumps(updated_filters)
|
|
820
|
+
|
|
821
|
+
self.client.command(
|
|
822
|
+
"ALTER TABLE {database_name:Identifier}.{table_name:Identifier} UPDATE meta_data = {metadata_json:String}, filters = {filters_json:String} WHERE id = {doc_id:String}",
|
|
823
|
+
parameters=update_params,
|
|
824
|
+
)
|
|
825
|
+
updated_count += 1
|
|
826
|
+
|
|
827
|
+
logger.debug(f"Updated metadata for {updated_count} documents with content_id: {content_id}")
|
|
828
|
+
|
|
829
|
+
except Exception as e:
|
|
830
|
+
logger.error(f"Error updating metadata for content_id '{content_id}': {e}")
|
|
831
|
+
raise
|
|
832
|
+
|
|
833
|
+
def get_supported_search_types(self) -> List[str]:
|
|
834
|
+
"""Get the supported search types for this vector database."""
|
|
835
|
+
return [] # Clickhouse doesn't use SearchType enum
|