agno 2.2.13__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- agno/__init__.py +8 -0
- agno/agent/__init__.py +51 -0
- agno/agent/agent.py +10405 -0
- agno/api/__init__.py +0 -0
- agno/api/agent.py +28 -0
- agno/api/api.py +40 -0
- agno/api/evals.py +22 -0
- agno/api/os.py +17 -0
- agno/api/routes.py +13 -0
- agno/api/schemas/__init__.py +9 -0
- agno/api/schemas/agent.py +16 -0
- agno/api/schemas/evals.py +16 -0
- agno/api/schemas/os.py +14 -0
- agno/api/schemas/response.py +6 -0
- agno/api/schemas/team.py +16 -0
- agno/api/schemas/utils.py +21 -0
- agno/api/schemas/workflows.py +16 -0
- agno/api/settings.py +53 -0
- agno/api/team.py +30 -0
- agno/api/workflow.py +28 -0
- agno/cloud/aws/base.py +214 -0
- agno/cloud/aws/s3/__init__.py +2 -0
- agno/cloud/aws/s3/api_client.py +43 -0
- agno/cloud/aws/s3/bucket.py +195 -0
- agno/cloud/aws/s3/object.py +57 -0
- agno/culture/__init__.py +3 -0
- agno/culture/manager.py +956 -0
- agno/db/__init__.py +24 -0
- agno/db/async_postgres/__init__.py +3 -0
- agno/db/base.py +598 -0
- agno/db/dynamo/__init__.py +3 -0
- agno/db/dynamo/dynamo.py +2042 -0
- agno/db/dynamo/schemas.py +314 -0
- agno/db/dynamo/utils.py +743 -0
- agno/db/firestore/__init__.py +3 -0
- agno/db/firestore/firestore.py +1795 -0
- agno/db/firestore/schemas.py +140 -0
- agno/db/firestore/utils.py +376 -0
- agno/db/gcs_json/__init__.py +3 -0
- agno/db/gcs_json/gcs_json_db.py +1335 -0
- agno/db/gcs_json/utils.py +228 -0
- agno/db/in_memory/__init__.py +3 -0
- agno/db/in_memory/in_memory_db.py +1160 -0
- agno/db/in_memory/utils.py +230 -0
- agno/db/json/__init__.py +3 -0
- agno/db/json/json_db.py +1328 -0
- agno/db/json/utils.py +230 -0
- agno/db/migrations/__init__.py +0 -0
- agno/db/migrations/v1_to_v2.py +635 -0
- agno/db/mongo/__init__.py +17 -0
- agno/db/mongo/async_mongo.py +2026 -0
- agno/db/mongo/mongo.py +1982 -0
- agno/db/mongo/schemas.py +87 -0
- agno/db/mongo/utils.py +259 -0
- agno/db/mysql/__init__.py +3 -0
- agno/db/mysql/mysql.py +2308 -0
- agno/db/mysql/schemas.py +138 -0
- agno/db/mysql/utils.py +355 -0
- agno/db/postgres/__init__.py +4 -0
- agno/db/postgres/async_postgres.py +1927 -0
- agno/db/postgres/postgres.py +2260 -0
- agno/db/postgres/schemas.py +139 -0
- agno/db/postgres/utils.py +442 -0
- agno/db/redis/__init__.py +3 -0
- agno/db/redis/redis.py +1660 -0
- agno/db/redis/schemas.py +123 -0
- agno/db/redis/utils.py +346 -0
- agno/db/schemas/__init__.py +4 -0
- agno/db/schemas/culture.py +120 -0
- agno/db/schemas/evals.py +33 -0
- agno/db/schemas/knowledge.py +40 -0
- agno/db/schemas/memory.py +46 -0
- agno/db/schemas/metrics.py +0 -0
- agno/db/singlestore/__init__.py +3 -0
- agno/db/singlestore/schemas.py +130 -0
- agno/db/singlestore/singlestore.py +2272 -0
- agno/db/singlestore/utils.py +384 -0
- agno/db/sqlite/__init__.py +4 -0
- agno/db/sqlite/async_sqlite.py +2293 -0
- agno/db/sqlite/schemas.py +133 -0
- agno/db/sqlite/sqlite.py +2288 -0
- agno/db/sqlite/utils.py +431 -0
- agno/db/surrealdb/__init__.py +3 -0
- agno/db/surrealdb/metrics.py +292 -0
- agno/db/surrealdb/models.py +309 -0
- agno/db/surrealdb/queries.py +71 -0
- agno/db/surrealdb/surrealdb.py +1353 -0
- agno/db/surrealdb/utils.py +147 -0
- agno/db/utils.py +116 -0
- agno/debug.py +18 -0
- agno/eval/__init__.py +14 -0
- agno/eval/accuracy.py +834 -0
- agno/eval/performance.py +773 -0
- agno/eval/reliability.py +306 -0
- agno/eval/utils.py +119 -0
- agno/exceptions.py +161 -0
- agno/filters.py +354 -0
- agno/guardrails/__init__.py +6 -0
- agno/guardrails/base.py +19 -0
- agno/guardrails/openai.py +144 -0
- agno/guardrails/pii.py +94 -0
- agno/guardrails/prompt_injection.py +52 -0
- agno/integrations/__init__.py +0 -0
- agno/integrations/discord/__init__.py +3 -0
- agno/integrations/discord/client.py +203 -0
- agno/knowledge/__init__.py +5 -0
- agno/knowledge/chunking/__init__.py +0 -0
- agno/knowledge/chunking/agentic.py +79 -0
- agno/knowledge/chunking/document.py +91 -0
- agno/knowledge/chunking/fixed.py +57 -0
- agno/knowledge/chunking/markdown.py +151 -0
- agno/knowledge/chunking/recursive.py +63 -0
- agno/knowledge/chunking/row.py +39 -0
- agno/knowledge/chunking/semantic.py +86 -0
- agno/knowledge/chunking/strategy.py +165 -0
- agno/knowledge/content.py +74 -0
- agno/knowledge/document/__init__.py +5 -0
- agno/knowledge/document/base.py +58 -0
- agno/knowledge/embedder/__init__.py +5 -0
- agno/knowledge/embedder/aws_bedrock.py +343 -0
- agno/knowledge/embedder/azure_openai.py +210 -0
- agno/knowledge/embedder/base.py +23 -0
- agno/knowledge/embedder/cohere.py +323 -0
- agno/knowledge/embedder/fastembed.py +62 -0
- agno/knowledge/embedder/fireworks.py +13 -0
- agno/knowledge/embedder/google.py +258 -0
- agno/knowledge/embedder/huggingface.py +94 -0
- agno/knowledge/embedder/jina.py +182 -0
- agno/knowledge/embedder/langdb.py +22 -0
- agno/knowledge/embedder/mistral.py +206 -0
- agno/knowledge/embedder/nebius.py +13 -0
- agno/knowledge/embedder/ollama.py +154 -0
- agno/knowledge/embedder/openai.py +195 -0
- agno/knowledge/embedder/sentence_transformer.py +63 -0
- agno/knowledge/embedder/together.py +13 -0
- agno/knowledge/embedder/vllm.py +262 -0
- agno/knowledge/embedder/voyageai.py +165 -0
- agno/knowledge/knowledge.py +1988 -0
- agno/knowledge/reader/__init__.py +7 -0
- agno/knowledge/reader/arxiv_reader.py +81 -0
- agno/knowledge/reader/base.py +95 -0
- agno/knowledge/reader/csv_reader.py +166 -0
- agno/knowledge/reader/docx_reader.py +82 -0
- agno/knowledge/reader/field_labeled_csv_reader.py +292 -0
- agno/knowledge/reader/firecrawl_reader.py +201 -0
- agno/knowledge/reader/json_reader.py +87 -0
- agno/knowledge/reader/markdown_reader.py +137 -0
- agno/knowledge/reader/pdf_reader.py +431 -0
- agno/knowledge/reader/pptx_reader.py +101 -0
- agno/knowledge/reader/reader_factory.py +313 -0
- agno/knowledge/reader/s3_reader.py +89 -0
- agno/knowledge/reader/tavily_reader.py +194 -0
- agno/knowledge/reader/text_reader.py +115 -0
- agno/knowledge/reader/web_search_reader.py +372 -0
- agno/knowledge/reader/website_reader.py +455 -0
- agno/knowledge/reader/wikipedia_reader.py +59 -0
- agno/knowledge/reader/youtube_reader.py +78 -0
- agno/knowledge/remote_content/__init__.py +0 -0
- agno/knowledge/remote_content/remote_content.py +88 -0
- agno/knowledge/reranker/__init__.py +3 -0
- agno/knowledge/reranker/base.py +14 -0
- agno/knowledge/reranker/cohere.py +64 -0
- agno/knowledge/reranker/infinity.py +195 -0
- agno/knowledge/reranker/sentence_transformer.py +54 -0
- agno/knowledge/types.py +39 -0
- agno/knowledge/utils.py +189 -0
- agno/media.py +462 -0
- agno/memory/__init__.py +3 -0
- agno/memory/manager.py +1327 -0
- agno/models/__init__.py +0 -0
- agno/models/aimlapi/__init__.py +5 -0
- agno/models/aimlapi/aimlapi.py +45 -0
- agno/models/anthropic/__init__.py +5 -0
- agno/models/anthropic/claude.py +757 -0
- agno/models/aws/__init__.py +15 -0
- agno/models/aws/bedrock.py +701 -0
- agno/models/aws/claude.py +378 -0
- agno/models/azure/__init__.py +18 -0
- agno/models/azure/ai_foundry.py +485 -0
- agno/models/azure/openai_chat.py +131 -0
- agno/models/base.py +2175 -0
- agno/models/cerebras/__init__.py +12 -0
- agno/models/cerebras/cerebras.py +501 -0
- agno/models/cerebras/cerebras_openai.py +112 -0
- agno/models/cohere/__init__.py +5 -0
- agno/models/cohere/chat.py +389 -0
- agno/models/cometapi/__init__.py +5 -0
- agno/models/cometapi/cometapi.py +57 -0
- agno/models/dashscope/__init__.py +5 -0
- agno/models/dashscope/dashscope.py +91 -0
- agno/models/deepinfra/__init__.py +5 -0
- agno/models/deepinfra/deepinfra.py +28 -0
- agno/models/deepseek/__init__.py +5 -0
- agno/models/deepseek/deepseek.py +61 -0
- agno/models/defaults.py +1 -0
- agno/models/fireworks/__init__.py +5 -0
- agno/models/fireworks/fireworks.py +26 -0
- agno/models/google/__init__.py +5 -0
- agno/models/google/gemini.py +1085 -0
- agno/models/groq/__init__.py +5 -0
- agno/models/groq/groq.py +556 -0
- agno/models/huggingface/__init__.py +5 -0
- agno/models/huggingface/huggingface.py +491 -0
- agno/models/ibm/__init__.py +5 -0
- agno/models/ibm/watsonx.py +422 -0
- agno/models/internlm/__init__.py +3 -0
- agno/models/internlm/internlm.py +26 -0
- agno/models/langdb/__init__.py +1 -0
- agno/models/langdb/langdb.py +48 -0
- agno/models/litellm/__init__.py +14 -0
- agno/models/litellm/chat.py +468 -0
- agno/models/litellm/litellm_openai.py +25 -0
- agno/models/llama_cpp/__init__.py +5 -0
- agno/models/llama_cpp/llama_cpp.py +22 -0
- agno/models/lmstudio/__init__.py +5 -0
- agno/models/lmstudio/lmstudio.py +25 -0
- agno/models/message.py +434 -0
- agno/models/meta/__init__.py +12 -0
- agno/models/meta/llama.py +475 -0
- agno/models/meta/llama_openai.py +78 -0
- agno/models/metrics.py +120 -0
- agno/models/mistral/__init__.py +5 -0
- agno/models/mistral/mistral.py +432 -0
- agno/models/nebius/__init__.py +3 -0
- agno/models/nebius/nebius.py +54 -0
- agno/models/nexus/__init__.py +3 -0
- agno/models/nexus/nexus.py +22 -0
- agno/models/nvidia/__init__.py +5 -0
- agno/models/nvidia/nvidia.py +28 -0
- agno/models/ollama/__init__.py +5 -0
- agno/models/ollama/chat.py +441 -0
- agno/models/openai/__init__.py +9 -0
- agno/models/openai/chat.py +883 -0
- agno/models/openai/like.py +27 -0
- agno/models/openai/responses.py +1050 -0
- agno/models/openrouter/__init__.py +5 -0
- agno/models/openrouter/openrouter.py +66 -0
- agno/models/perplexity/__init__.py +5 -0
- agno/models/perplexity/perplexity.py +187 -0
- agno/models/portkey/__init__.py +3 -0
- agno/models/portkey/portkey.py +81 -0
- agno/models/requesty/__init__.py +5 -0
- agno/models/requesty/requesty.py +52 -0
- agno/models/response.py +199 -0
- agno/models/sambanova/__init__.py +5 -0
- agno/models/sambanova/sambanova.py +28 -0
- agno/models/siliconflow/__init__.py +5 -0
- agno/models/siliconflow/siliconflow.py +25 -0
- agno/models/together/__init__.py +5 -0
- agno/models/together/together.py +25 -0
- agno/models/utils.py +266 -0
- agno/models/vercel/__init__.py +3 -0
- agno/models/vercel/v0.py +26 -0
- agno/models/vertexai/__init__.py +0 -0
- agno/models/vertexai/claude.py +70 -0
- agno/models/vllm/__init__.py +3 -0
- agno/models/vllm/vllm.py +78 -0
- agno/models/xai/__init__.py +3 -0
- agno/models/xai/xai.py +113 -0
- agno/os/__init__.py +3 -0
- agno/os/app.py +876 -0
- agno/os/auth.py +57 -0
- agno/os/config.py +104 -0
- agno/os/interfaces/__init__.py +1 -0
- agno/os/interfaces/a2a/__init__.py +3 -0
- agno/os/interfaces/a2a/a2a.py +42 -0
- agno/os/interfaces/a2a/router.py +250 -0
- agno/os/interfaces/a2a/utils.py +924 -0
- agno/os/interfaces/agui/__init__.py +3 -0
- agno/os/interfaces/agui/agui.py +47 -0
- agno/os/interfaces/agui/router.py +144 -0
- agno/os/interfaces/agui/utils.py +534 -0
- agno/os/interfaces/base.py +25 -0
- agno/os/interfaces/slack/__init__.py +3 -0
- agno/os/interfaces/slack/router.py +148 -0
- agno/os/interfaces/slack/security.py +30 -0
- agno/os/interfaces/slack/slack.py +47 -0
- agno/os/interfaces/whatsapp/__init__.py +3 -0
- agno/os/interfaces/whatsapp/router.py +211 -0
- agno/os/interfaces/whatsapp/security.py +53 -0
- agno/os/interfaces/whatsapp/whatsapp.py +36 -0
- agno/os/mcp.py +292 -0
- agno/os/middleware/__init__.py +7 -0
- agno/os/middleware/jwt.py +233 -0
- agno/os/router.py +1763 -0
- agno/os/routers/__init__.py +3 -0
- agno/os/routers/evals/__init__.py +3 -0
- agno/os/routers/evals/evals.py +430 -0
- agno/os/routers/evals/schemas.py +142 -0
- agno/os/routers/evals/utils.py +162 -0
- agno/os/routers/health.py +31 -0
- agno/os/routers/home.py +52 -0
- agno/os/routers/knowledge/__init__.py +3 -0
- agno/os/routers/knowledge/knowledge.py +997 -0
- agno/os/routers/knowledge/schemas.py +178 -0
- agno/os/routers/memory/__init__.py +3 -0
- agno/os/routers/memory/memory.py +515 -0
- agno/os/routers/memory/schemas.py +62 -0
- agno/os/routers/metrics/__init__.py +3 -0
- agno/os/routers/metrics/metrics.py +190 -0
- agno/os/routers/metrics/schemas.py +47 -0
- agno/os/routers/session/__init__.py +3 -0
- agno/os/routers/session/session.py +997 -0
- agno/os/schema.py +1055 -0
- agno/os/settings.py +43 -0
- agno/os/utils.py +630 -0
- agno/py.typed +0 -0
- agno/reasoning/__init__.py +0 -0
- agno/reasoning/anthropic.py +80 -0
- agno/reasoning/azure_ai_foundry.py +67 -0
- agno/reasoning/deepseek.py +63 -0
- agno/reasoning/default.py +97 -0
- agno/reasoning/gemini.py +73 -0
- agno/reasoning/groq.py +71 -0
- agno/reasoning/helpers.py +63 -0
- agno/reasoning/ollama.py +67 -0
- agno/reasoning/openai.py +86 -0
- agno/reasoning/step.py +31 -0
- agno/reasoning/vertexai.py +76 -0
- agno/run/__init__.py +6 -0
- agno/run/agent.py +787 -0
- agno/run/base.py +229 -0
- agno/run/cancel.py +81 -0
- agno/run/messages.py +32 -0
- agno/run/team.py +753 -0
- agno/run/workflow.py +708 -0
- agno/session/__init__.py +10 -0
- agno/session/agent.py +295 -0
- agno/session/summary.py +265 -0
- agno/session/team.py +392 -0
- agno/session/workflow.py +205 -0
- agno/team/__init__.py +37 -0
- agno/team/team.py +8793 -0
- agno/tools/__init__.py +10 -0
- agno/tools/agentql.py +120 -0
- agno/tools/airflow.py +69 -0
- agno/tools/api.py +122 -0
- agno/tools/apify.py +314 -0
- agno/tools/arxiv.py +127 -0
- agno/tools/aws_lambda.py +53 -0
- agno/tools/aws_ses.py +66 -0
- agno/tools/baidusearch.py +89 -0
- agno/tools/bitbucket.py +292 -0
- agno/tools/brandfetch.py +213 -0
- agno/tools/bravesearch.py +106 -0
- agno/tools/brightdata.py +367 -0
- agno/tools/browserbase.py +209 -0
- agno/tools/calcom.py +255 -0
- agno/tools/calculator.py +151 -0
- agno/tools/cartesia.py +187 -0
- agno/tools/clickup.py +244 -0
- agno/tools/confluence.py +240 -0
- agno/tools/crawl4ai.py +158 -0
- agno/tools/csv_toolkit.py +185 -0
- agno/tools/dalle.py +110 -0
- agno/tools/daytona.py +475 -0
- agno/tools/decorator.py +262 -0
- agno/tools/desi_vocal.py +108 -0
- agno/tools/discord.py +161 -0
- agno/tools/docker.py +716 -0
- agno/tools/duckdb.py +379 -0
- agno/tools/duckduckgo.py +91 -0
- agno/tools/e2b.py +703 -0
- agno/tools/eleven_labs.py +196 -0
- agno/tools/email.py +67 -0
- agno/tools/evm.py +129 -0
- agno/tools/exa.py +396 -0
- agno/tools/fal.py +127 -0
- agno/tools/file.py +240 -0
- agno/tools/file_generation.py +350 -0
- agno/tools/financial_datasets.py +288 -0
- agno/tools/firecrawl.py +143 -0
- agno/tools/function.py +1187 -0
- agno/tools/giphy.py +93 -0
- agno/tools/github.py +1760 -0
- agno/tools/gmail.py +922 -0
- agno/tools/google_bigquery.py +117 -0
- agno/tools/google_drive.py +270 -0
- agno/tools/google_maps.py +253 -0
- agno/tools/googlecalendar.py +674 -0
- agno/tools/googlesearch.py +98 -0
- agno/tools/googlesheets.py +377 -0
- agno/tools/hackernews.py +77 -0
- agno/tools/jina.py +101 -0
- agno/tools/jira.py +170 -0
- agno/tools/knowledge.py +218 -0
- agno/tools/linear.py +426 -0
- agno/tools/linkup.py +58 -0
- agno/tools/local_file_system.py +90 -0
- agno/tools/lumalab.py +183 -0
- agno/tools/mcp/__init__.py +10 -0
- agno/tools/mcp/mcp.py +331 -0
- agno/tools/mcp/multi_mcp.py +347 -0
- agno/tools/mcp/params.py +24 -0
- agno/tools/mcp_toolbox.py +284 -0
- agno/tools/mem0.py +193 -0
- agno/tools/memori.py +339 -0
- agno/tools/memory.py +419 -0
- agno/tools/mlx_transcribe.py +139 -0
- agno/tools/models/__init__.py +0 -0
- agno/tools/models/azure_openai.py +190 -0
- agno/tools/models/gemini.py +203 -0
- agno/tools/models/groq.py +158 -0
- agno/tools/models/morph.py +186 -0
- agno/tools/models/nebius.py +124 -0
- agno/tools/models_labs.py +195 -0
- agno/tools/moviepy_video.py +349 -0
- agno/tools/neo4j.py +134 -0
- agno/tools/newspaper.py +46 -0
- agno/tools/newspaper4k.py +93 -0
- agno/tools/notion.py +204 -0
- agno/tools/openai.py +202 -0
- agno/tools/openbb.py +160 -0
- agno/tools/opencv.py +321 -0
- agno/tools/openweather.py +233 -0
- agno/tools/oxylabs.py +385 -0
- agno/tools/pandas.py +102 -0
- agno/tools/parallel.py +314 -0
- agno/tools/postgres.py +257 -0
- agno/tools/pubmed.py +188 -0
- agno/tools/python.py +205 -0
- agno/tools/reasoning.py +283 -0
- agno/tools/reddit.py +467 -0
- agno/tools/replicate.py +117 -0
- agno/tools/resend.py +62 -0
- agno/tools/scrapegraph.py +222 -0
- agno/tools/searxng.py +152 -0
- agno/tools/serpapi.py +116 -0
- agno/tools/serper.py +255 -0
- agno/tools/shell.py +53 -0
- agno/tools/slack.py +136 -0
- agno/tools/sleep.py +20 -0
- agno/tools/spider.py +116 -0
- agno/tools/sql.py +154 -0
- agno/tools/streamlit/__init__.py +0 -0
- agno/tools/streamlit/components.py +113 -0
- agno/tools/tavily.py +254 -0
- agno/tools/telegram.py +48 -0
- agno/tools/todoist.py +218 -0
- agno/tools/tool_registry.py +1 -0
- agno/tools/toolkit.py +146 -0
- agno/tools/trafilatura.py +388 -0
- agno/tools/trello.py +274 -0
- agno/tools/twilio.py +186 -0
- agno/tools/user_control_flow.py +78 -0
- agno/tools/valyu.py +228 -0
- agno/tools/visualization.py +467 -0
- agno/tools/webbrowser.py +28 -0
- agno/tools/webex.py +76 -0
- agno/tools/website.py +54 -0
- agno/tools/webtools.py +45 -0
- agno/tools/whatsapp.py +286 -0
- agno/tools/wikipedia.py +63 -0
- agno/tools/workflow.py +278 -0
- agno/tools/x.py +335 -0
- agno/tools/yfinance.py +257 -0
- agno/tools/youtube.py +184 -0
- agno/tools/zendesk.py +82 -0
- agno/tools/zep.py +454 -0
- agno/tools/zoom.py +382 -0
- agno/utils/__init__.py +0 -0
- agno/utils/agent.py +820 -0
- agno/utils/audio.py +49 -0
- agno/utils/certs.py +27 -0
- agno/utils/code_execution.py +11 -0
- agno/utils/common.py +132 -0
- agno/utils/dttm.py +13 -0
- agno/utils/enum.py +22 -0
- agno/utils/env.py +11 -0
- agno/utils/events.py +696 -0
- agno/utils/format_str.py +16 -0
- agno/utils/functions.py +166 -0
- agno/utils/gemini.py +426 -0
- agno/utils/hooks.py +57 -0
- agno/utils/http.py +74 -0
- agno/utils/json_schema.py +234 -0
- agno/utils/knowledge.py +36 -0
- agno/utils/location.py +19 -0
- agno/utils/log.py +255 -0
- agno/utils/mcp.py +214 -0
- agno/utils/media.py +352 -0
- agno/utils/merge_dict.py +41 -0
- agno/utils/message.py +118 -0
- agno/utils/models/__init__.py +0 -0
- agno/utils/models/ai_foundry.py +43 -0
- agno/utils/models/claude.py +358 -0
- agno/utils/models/cohere.py +87 -0
- agno/utils/models/llama.py +78 -0
- agno/utils/models/mistral.py +98 -0
- agno/utils/models/openai_responses.py +140 -0
- agno/utils/models/schema_utils.py +153 -0
- agno/utils/models/watsonx.py +41 -0
- agno/utils/openai.py +257 -0
- agno/utils/pickle.py +32 -0
- agno/utils/pprint.py +178 -0
- agno/utils/print_response/__init__.py +0 -0
- agno/utils/print_response/agent.py +842 -0
- agno/utils/print_response/team.py +1724 -0
- agno/utils/print_response/workflow.py +1668 -0
- agno/utils/prompts.py +111 -0
- agno/utils/reasoning.py +108 -0
- agno/utils/response.py +163 -0
- agno/utils/response_iterator.py +17 -0
- agno/utils/safe_formatter.py +24 -0
- agno/utils/serialize.py +32 -0
- agno/utils/shell.py +22 -0
- agno/utils/streamlit.py +487 -0
- agno/utils/string.py +231 -0
- agno/utils/team.py +139 -0
- agno/utils/timer.py +41 -0
- agno/utils/tools.py +102 -0
- agno/utils/web.py +23 -0
- agno/utils/whatsapp.py +305 -0
- agno/utils/yaml_io.py +25 -0
- agno/vectordb/__init__.py +3 -0
- agno/vectordb/base.py +127 -0
- agno/vectordb/cassandra/__init__.py +5 -0
- agno/vectordb/cassandra/cassandra.py +501 -0
- agno/vectordb/cassandra/extra_param_mixin.py +11 -0
- agno/vectordb/cassandra/index.py +13 -0
- agno/vectordb/chroma/__init__.py +5 -0
- agno/vectordb/chroma/chromadb.py +929 -0
- agno/vectordb/clickhouse/__init__.py +9 -0
- agno/vectordb/clickhouse/clickhousedb.py +835 -0
- agno/vectordb/clickhouse/index.py +9 -0
- agno/vectordb/couchbase/__init__.py +3 -0
- agno/vectordb/couchbase/couchbase.py +1442 -0
- agno/vectordb/distance.py +7 -0
- agno/vectordb/lancedb/__init__.py +6 -0
- agno/vectordb/lancedb/lance_db.py +995 -0
- agno/vectordb/langchaindb/__init__.py +5 -0
- agno/vectordb/langchaindb/langchaindb.py +163 -0
- agno/vectordb/lightrag/__init__.py +5 -0
- agno/vectordb/lightrag/lightrag.py +388 -0
- agno/vectordb/llamaindex/__init__.py +3 -0
- agno/vectordb/llamaindex/llamaindexdb.py +166 -0
- agno/vectordb/milvus/__init__.py +4 -0
- agno/vectordb/milvus/milvus.py +1182 -0
- agno/vectordb/mongodb/__init__.py +9 -0
- agno/vectordb/mongodb/mongodb.py +1417 -0
- agno/vectordb/pgvector/__init__.py +12 -0
- agno/vectordb/pgvector/index.py +23 -0
- agno/vectordb/pgvector/pgvector.py +1462 -0
- agno/vectordb/pineconedb/__init__.py +5 -0
- agno/vectordb/pineconedb/pineconedb.py +747 -0
- agno/vectordb/qdrant/__init__.py +5 -0
- agno/vectordb/qdrant/qdrant.py +1134 -0
- agno/vectordb/redis/__init__.py +9 -0
- agno/vectordb/redis/redisdb.py +694 -0
- agno/vectordb/search.py +7 -0
- agno/vectordb/singlestore/__init__.py +10 -0
- agno/vectordb/singlestore/index.py +41 -0
- agno/vectordb/singlestore/singlestore.py +763 -0
- agno/vectordb/surrealdb/__init__.py +3 -0
- agno/vectordb/surrealdb/surrealdb.py +699 -0
- agno/vectordb/upstashdb/__init__.py +5 -0
- agno/vectordb/upstashdb/upstashdb.py +718 -0
- agno/vectordb/weaviate/__init__.py +8 -0
- agno/vectordb/weaviate/index.py +15 -0
- agno/vectordb/weaviate/weaviate.py +1005 -0
- agno/workflow/__init__.py +23 -0
- agno/workflow/agent.py +299 -0
- agno/workflow/condition.py +738 -0
- agno/workflow/loop.py +735 -0
- agno/workflow/parallel.py +824 -0
- agno/workflow/router.py +702 -0
- agno/workflow/step.py +1432 -0
- agno/workflow/steps.py +592 -0
- agno/workflow/types.py +520 -0
- agno/workflow/workflow.py +4321 -0
- agno-2.2.13.dist-info/METADATA +614 -0
- agno-2.2.13.dist-info/RECORD +575 -0
- agno-2.2.13.dist-info/WHEEL +5 -0
- agno-2.2.13.dist-info/licenses/LICENSE +201 -0
- agno-2.2.13.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,763 @@
|
|
|
1
|
+
import asyncio
|
|
2
|
+
import json
|
|
3
|
+
from hashlib import md5
|
|
4
|
+
from typing import Any, Dict, List, Optional, Union
|
|
5
|
+
|
|
6
|
+
try:
|
|
7
|
+
from sqlalchemy.dialects import mysql
|
|
8
|
+
from sqlalchemy.engine import Engine, create_engine
|
|
9
|
+
from sqlalchemy.inspection import inspect
|
|
10
|
+
from sqlalchemy.orm import Session, sessionmaker
|
|
11
|
+
from sqlalchemy.schema import Column, MetaData, Table
|
|
12
|
+
from sqlalchemy.sql.expression import func, select, text, update
|
|
13
|
+
from sqlalchemy.types import DateTime
|
|
14
|
+
except ImportError:
|
|
15
|
+
raise ImportError("`sqlalchemy` not installed")
|
|
16
|
+
|
|
17
|
+
from agno.filters import FilterExpr
|
|
18
|
+
from agno.knowledge.document import Document
|
|
19
|
+
from agno.knowledge.embedder import Embedder
|
|
20
|
+
from agno.knowledge.reranker.base import Reranker
|
|
21
|
+
from agno.utils.log import log_debug, log_error, log_info, log_warning
|
|
22
|
+
from agno.vectordb.base import VectorDb
|
|
23
|
+
from agno.vectordb.distance import Distance
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
class SingleStore(VectorDb):
|
|
27
|
+
def __init__(
|
|
28
|
+
self,
|
|
29
|
+
collection: str,
|
|
30
|
+
schema: Optional[str] = "ai",
|
|
31
|
+
db_url: Optional[str] = None,
|
|
32
|
+
db_engine: Optional[Engine] = None,
|
|
33
|
+
embedder: Optional[Embedder] = None,
|
|
34
|
+
distance: Distance = Distance.cosine,
|
|
35
|
+
reranker: Optional[Reranker] = None,
|
|
36
|
+
name: Optional[str] = None,
|
|
37
|
+
description: Optional[str] = None,
|
|
38
|
+
# index: Optional[Union[Ivfflat, HNSW]] = HNSW(),
|
|
39
|
+
):
|
|
40
|
+
_engine: Optional[Engine] = db_engine
|
|
41
|
+
if _engine is None and db_url is not None:
|
|
42
|
+
_engine = create_engine(db_url)
|
|
43
|
+
|
|
44
|
+
if _engine is None:
|
|
45
|
+
raise ValueError("Must provide either db_url or db_engine")
|
|
46
|
+
|
|
47
|
+
self.collection: str = collection
|
|
48
|
+
self.schema: Optional[str] = schema
|
|
49
|
+
self.db_url: Optional[str] = db_url
|
|
50
|
+
# Initialize base class with name and description
|
|
51
|
+
super().__init__(name=name, description=description)
|
|
52
|
+
|
|
53
|
+
self.db_engine: Engine = _engine
|
|
54
|
+
self.metadata: MetaData = MetaData(schema=self.schema)
|
|
55
|
+
if embedder is None:
|
|
56
|
+
from agno.knowledge.embedder.openai import OpenAIEmbedder
|
|
57
|
+
|
|
58
|
+
embedder = OpenAIEmbedder()
|
|
59
|
+
log_info("Embedder not provided, using OpenAIEmbedder as default.")
|
|
60
|
+
self.embedder: Embedder = embedder
|
|
61
|
+
self.dimensions: Optional[int] = self.embedder.dimensions
|
|
62
|
+
|
|
63
|
+
self.distance: Distance = distance
|
|
64
|
+
# self.index: Optional[Union[Ivfflat, HNSW]] = index
|
|
65
|
+
self.Session: sessionmaker[Session] = sessionmaker(bind=self.db_engine)
|
|
66
|
+
self.reranker: Optional[Reranker] = reranker
|
|
67
|
+
self.table: Table = self.get_table()
|
|
68
|
+
|
|
69
|
+
def get_table(self) -> Table:
|
|
70
|
+
"""
|
|
71
|
+
Define the table structure.
|
|
72
|
+
|
|
73
|
+
Returns:
|
|
74
|
+
Table: SQLAlchemy Table object.
|
|
75
|
+
"""
|
|
76
|
+
return Table(
|
|
77
|
+
self.collection,
|
|
78
|
+
self.metadata,
|
|
79
|
+
Column("id", mysql.TEXT),
|
|
80
|
+
Column("name", mysql.TEXT),
|
|
81
|
+
Column("meta_data", mysql.TEXT),
|
|
82
|
+
Column("content", mysql.TEXT),
|
|
83
|
+
Column("embedding", mysql.TEXT), # Placeholder for the vector column
|
|
84
|
+
Column("usage", mysql.TEXT),
|
|
85
|
+
Column("created_at", DateTime(timezone=True), server_default=text("now()")),
|
|
86
|
+
Column("updated_at", DateTime(timezone=True), onupdate=text("now()")),
|
|
87
|
+
Column("content_hash", mysql.TEXT),
|
|
88
|
+
Column("content_id", mysql.TEXT),
|
|
89
|
+
extend_existing=True,
|
|
90
|
+
)
|
|
91
|
+
|
|
92
|
+
def create(self) -> None:
|
|
93
|
+
"""
|
|
94
|
+
Create the table if it does not exist.
|
|
95
|
+
"""
|
|
96
|
+
if not self.table_exists():
|
|
97
|
+
log_info(f"Creating table: {self.collection}")
|
|
98
|
+
with self.db_engine.connect() as connection:
|
|
99
|
+
connection.execute(
|
|
100
|
+
text(f"""
|
|
101
|
+
CREATE TABLE IF NOT EXISTS {self.schema}.{self.collection} (
|
|
102
|
+
id TEXT,
|
|
103
|
+
name TEXT,
|
|
104
|
+
meta_data TEXT,
|
|
105
|
+
content TEXT,
|
|
106
|
+
embedding VECTOR({self.dimensions}) NOT NULL,
|
|
107
|
+
`usage` TEXT,
|
|
108
|
+
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
|
|
109
|
+
updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP,
|
|
110
|
+
content_hash TEXT,
|
|
111
|
+
content_id TEXT
|
|
112
|
+
);
|
|
113
|
+
""")
|
|
114
|
+
)
|
|
115
|
+
# Call optimize to create indexes
|
|
116
|
+
self.optimize()
|
|
117
|
+
|
|
118
|
+
def table_exists(self) -> bool:
|
|
119
|
+
"""
|
|
120
|
+
Check if the table exists.
|
|
121
|
+
|
|
122
|
+
Returns:
|
|
123
|
+
bool: True if the table exists, False otherwise.
|
|
124
|
+
"""
|
|
125
|
+
log_debug(f"Checking if table exists: {self.table.name}")
|
|
126
|
+
try:
|
|
127
|
+
return inspect(self.db_engine).has_table(self.table.name, schema=self.schema)
|
|
128
|
+
except Exception as e:
|
|
129
|
+
log_error(e)
|
|
130
|
+
return False
|
|
131
|
+
|
|
132
|
+
def content_hash_exists(self, content_hash: str) -> bool:
|
|
133
|
+
"""
|
|
134
|
+
Validating if the document exists or not
|
|
135
|
+
|
|
136
|
+
Args:
|
|
137
|
+
document (Document): Document to validate
|
|
138
|
+
"""
|
|
139
|
+
with self.Session.begin() as sess:
|
|
140
|
+
stmt = select(self.table.c.name).where(self.table.c.content_hash == content_hash)
|
|
141
|
+
result = sess.execute(stmt).first()
|
|
142
|
+
return result is not None
|
|
143
|
+
|
|
144
|
+
def name_exists(self, name: str) -> bool:
|
|
145
|
+
"""
|
|
146
|
+
Validate if a row with this name exists or not
|
|
147
|
+
|
|
148
|
+
Args:
|
|
149
|
+
name (str): Name to check
|
|
150
|
+
"""
|
|
151
|
+
with self.Session.begin() as sess:
|
|
152
|
+
stmt = select(self.table.c.name).where(self.table.c.name == name)
|
|
153
|
+
result = sess.execute(stmt).first()
|
|
154
|
+
return result is not None
|
|
155
|
+
|
|
156
|
+
def id_exists(self, id: str) -> bool:
|
|
157
|
+
"""
|
|
158
|
+
Validate if a row with this id exists or not
|
|
159
|
+
|
|
160
|
+
Args:
|
|
161
|
+
id (str): Id to check
|
|
162
|
+
"""
|
|
163
|
+
with self.Session.begin() as sess:
|
|
164
|
+
stmt = select(self.table.c.id).where(self.table.c.id == id)
|
|
165
|
+
result = sess.execute(stmt).first()
|
|
166
|
+
return result is not None
|
|
167
|
+
|
|
168
|
+
def insert(
|
|
169
|
+
self,
|
|
170
|
+
content_hash: str,
|
|
171
|
+
documents: List[Document],
|
|
172
|
+
filters: Optional[Dict[str, Any]] = None,
|
|
173
|
+
batch_size: int = 10,
|
|
174
|
+
) -> None:
|
|
175
|
+
"""
|
|
176
|
+
Insert documents into the table.
|
|
177
|
+
|
|
178
|
+
Args:
|
|
179
|
+
documents (List[Document]): List of documents to insert.
|
|
180
|
+
filters (Optional[Dict[str, Any]]): Optional filters for the insert.
|
|
181
|
+
batch_size (int): Number of documents to insert in each batch.
|
|
182
|
+
"""
|
|
183
|
+
with self.Session.begin() as sess:
|
|
184
|
+
counter = 0
|
|
185
|
+
for document in documents:
|
|
186
|
+
document.embed(embedder=self.embedder)
|
|
187
|
+
cleaned_content = document.content.replace("\x00", "\ufffd")
|
|
188
|
+
record_id = md5(cleaned_content.encode()).hexdigest()
|
|
189
|
+
_id = document.id or record_id
|
|
190
|
+
|
|
191
|
+
meta_data_json = json.dumps(document.meta_data)
|
|
192
|
+
usage_json = json.dumps(document.usage)
|
|
193
|
+
|
|
194
|
+
# Convert embedding list to SingleStore VECTOR format
|
|
195
|
+
embeddings = f"[{','.join(map(str, document.embedding))}]" if document.embedding else None
|
|
196
|
+
|
|
197
|
+
stmt = mysql.insert(self.table).values(
|
|
198
|
+
id=_id,
|
|
199
|
+
name=document.name,
|
|
200
|
+
meta_data=meta_data_json,
|
|
201
|
+
content=cleaned_content,
|
|
202
|
+
embedding=embeddings,
|
|
203
|
+
usage=usage_json,
|
|
204
|
+
content_hash=content_hash,
|
|
205
|
+
content_id=document.content_id,
|
|
206
|
+
)
|
|
207
|
+
sess.execute(stmt)
|
|
208
|
+
counter += 1
|
|
209
|
+
log_debug(f"Inserted document: {document.name} ({document.meta_data})")
|
|
210
|
+
|
|
211
|
+
sess.commit()
|
|
212
|
+
log_debug(f"Committed {counter} documents")
|
|
213
|
+
|
|
214
|
+
def upsert_available(self) -> bool:
|
|
215
|
+
"""Indicate that upsert functionality is available."""
|
|
216
|
+
return True
|
|
217
|
+
|
|
218
|
+
def upsert(
|
|
219
|
+
self,
|
|
220
|
+
content_hash: str,
|
|
221
|
+
documents: List[Document],
|
|
222
|
+
filters: Optional[Dict[str, Any]] = None,
|
|
223
|
+
batch_size: int = 20,
|
|
224
|
+
) -> None:
|
|
225
|
+
if self.content_hash_exists(content_hash):
|
|
226
|
+
self._delete_by_content_hash(content_hash)
|
|
227
|
+
self._upsert(content_hash=content_hash, documents=documents, filters=filters, batch_size=batch_size)
|
|
228
|
+
|
|
229
|
+
def _upsert(
|
|
230
|
+
self,
|
|
231
|
+
content_hash: str,
|
|
232
|
+
documents: List[Document],
|
|
233
|
+
filters: Optional[Dict[str, Any]] = None,
|
|
234
|
+
batch_size: int = 20,
|
|
235
|
+
) -> None:
|
|
236
|
+
"""
|
|
237
|
+
Upsert (insert or update) documents in the table.
|
|
238
|
+
|
|
239
|
+
Args:
|
|
240
|
+
documents (List[Document]): List of documents to upsert.
|
|
241
|
+
filters (Optional[Dict[str, Any]]): Optional filters for the upsert.
|
|
242
|
+
batch_size (int): Number of documents to upsert in each batch.
|
|
243
|
+
"""
|
|
244
|
+
with self.Session.begin() as sess:
|
|
245
|
+
counter = 0
|
|
246
|
+
for document in documents:
|
|
247
|
+
document.embed(embedder=self.embedder)
|
|
248
|
+
cleaned_content = document.content.replace("\x00", "\ufffd")
|
|
249
|
+
record_id = md5(cleaned_content.encode()).hexdigest()
|
|
250
|
+
_id = document.id or record_id
|
|
251
|
+
|
|
252
|
+
meta_data_json = json.dumps(document.meta_data)
|
|
253
|
+
usage_json = json.dumps(document.usage)
|
|
254
|
+
|
|
255
|
+
# Convert embedding list to SingleStore VECTOR format
|
|
256
|
+
embeddings = f"[{','.join(map(str, document.embedding))}]" if document.embedding else None
|
|
257
|
+
stmt = (
|
|
258
|
+
mysql.insert(self.table)
|
|
259
|
+
.values(
|
|
260
|
+
id=_id,
|
|
261
|
+
name=document.name,
|
|
262
|
+
meta_data=meta_data_json,
|
|
263
|
+
content=cleaned_content,
|
|
264
|
+
embedding=embeddings,
|
|
265
|
+
usage=usage_json,
|
|
266
|
+
content_hash=content_hash,
|
|
267
|
+
content_id=document.content_id,
|
|
268
|
+
)
|
|
269
|
+
.on_duplicate_key_update(
|
|
270
|
+
name=document.name,
|
|
271
|
+
meta_data=meta_data_json,
|
|
272
|
+
content=cleaned_content,
|
|
273
|
+
embedding=embeddings,
|
|
274
|
+
usage=usage_json,
|
|
275
|
+
content_hash=content_hash,
|
|
276
|
+
content_id=document.content_id,
|
|
277
|
+
)
|
|
278
|
+
)
|
|
279
|
+
sess.execute(stmt)
|
|
280
|
+
counter += 1
|
|
281
|
+
log_debug(f"Upserted document: {document.name} ({document.meta_data})")
|
|
282
|
+
|
|
283
|
+
sess.commit()
|
|
284
|
+
log_debug(f"Committed {counter} documents")
|
|
285
|
+
|
|
286
|
+
def search(
|
|
287
|
+
self, query: str, limit: int = 5, filters: Optional[Union[Dict[str, Any], List[FilterExpr]]] = None
|
|
288
|
+
) -> List[Document]:
|
|
289
|
+
"""
|
|
290
|
+
Search for documents based on a query and optional filters.
|
|
291
|
+
|
|
292
|
+
Args:
|
|
293
|
+
query (str): The search query.
|
|
294
|
+
limit (int): The maximum number of results to return.
|
|
295
|
+
filters (Optional[Dict[str, Any]]): Optional filters for the search.
|
|
296
|
+
|
|
297
|
+
Returns:
|
|
298
|
+
List[Document]: List of documents that match the query.
|
|
299
|
+
"""
|
|
300
|
+
if filters is not None:
|
|
301
|
+
log_warning("Filters are not supported in SingleStore. No filters will be applied.")
|
|
302
|
+
query_embedding = self.embedder.get_embedding(query)
|
|
303
|
+
if query_embedding is None:
|
|
304
|
+
log_error(f"Error getting embedding for Query: {query}")
|
|
305
|
+
return []
|
|
306
|
+
|
|
307
|
+
columns = [
|
|
308
|
+
self.table.c.name,
|
|
309
|
+
self.table.c.meta_data,
|
|
310
|
+
self.table.c.content,
|
|
311
|
+
self.table.c.embedding,
|
|
312
|
+
self.table.c.usage,
|
|
313
|
+
self.table.c.content_id,
|
|
314
|
+
]
|
|
315
|
+
|
|
316
|
+
stmt = select(*columns)
|
|
317
|
+
|
|
318
|
+
# if filters is not None:
|
|
319
|
+
# for key, value in filters.items():
|
|
320
|
+
# if hasattr(self.table.c, key):
|
|
321
|
+
# stmt = stmt.where(getattr(self.table.c, key) == value)
|
|
322
|
+
|
|
323
|
+
if self.distance == Distance.l2:
|
|
324
|
+
stmt = stmt.order_by(self.table.c.embedding.max_inner_product(query_embedding))
|
|
325
|
+
if self.distance == Distance.cosine:
|
|
326
|
+
embeddings = json.dumps(query_embedding)
|
|
327
|
+
dot_product_expr = func.dot_product(self.table.c.embedding, text(":embedding"))
|
|
328
|
+
stmt = stmt.order_by(dot_product_expr.desc())
|
|
329
|
+
stmt = stmt.params(embedding=embeddings)
|
|
330
|
+
# stmt = stmt.order_by(self.table.c.embedding.cosine_distance(query_embedding))
|
|
331
|
+
if self.distance == Distance.max_inner_product:
|
|
332
|
+
stmt = stmt.order_by(self.table.c.embedding.max_inner_product(query_embedding))
|
|
333
|
+
|
|
334
|
+
stmt = stmt.limit(limit=limit)
|
|
335
|
+
log_debug(f"Query: {stmt}")
|
|
336
|
+
|
|
337
|
+
# Get neighbors
|
|
338
|
+
# This will only work if embedding column is created with `vector` data type.
|
|
339
|
+
with self.Session.begin() as sess:
|
|
340
|
+
sess.execute(text("SET vector_type_project_format = JSON"))
|
|
341
|
+
neighbors = sess.execute(stmt).fetchall() or []
|
|
342
|
+
# if self.index is not None:
|
|
343
|
+
# if isinstance(self.index, Ivfflat):
|
|
344
|
+
# # Assuming 'nprobe' is a relevant parameter to be set for the session
|
|
345
|
+
# # Update the session settings based on the Ivfflat index configuration
|
|
346
|
+
# sess.execute(text(f"SET SESSION nprobe = {self.index.nprobe}"))
|
|
347
|
+
# elif isinstance(self.index, HNSWFlat):
|
|
348
|
+
# # Assuming 'ef_search' is a relevant parameter to be set for the session
|
|
349
|
+
# # Update the session settings based on the HNSW index configuration
|
|
350
|
+
# sess.execute(text(f"SET SESSION ef_search = {self.index.ef_search}"))
|
|
351
|
+
|
|
352
|
+
# Build search results
|
|
353
|
+
search_results: List[Document] = []
|
|
354
|
+
for neighbor in neighbors:
|
|
355
|
+
meta_data_dict = json.loads(neighbor.meta_data) if neighbor.meta_data else {}
|
|
356
|
+
usage_dict = json.loads(neighbor.usage) if neighbor.usage else {}
|
|
357
|
+
|
|
358
|
+
# Convert SingleStore VECTOR type to list
|
|
359
|
+
embedding_list = []
|
|
360
|
+
if neighbor.embedding:
|
|
361
|
+
try:
|
|
362
|
+
embedding_list = json.loads(neighbor.embedding)
|
|
363
|
+
except Exception as e:
|
|
364
|
+
log_error(f"Error extracting vector: {e}")
|
|
365
|
+
embedding_list = []
|
|
366
|
+
|
|
367
|
+
search_results.append(
|
|
368
|
+
Document(
|
|
369
|
+
name=neighbor.name,
|
|
370
|
+
meta_data=meta_data_dict,
|
|
371
|
+
content=neighbor.content,
|
|
372
|
+
embedder=self.embedder,
|
|
373
|
+
embedding=embedding_list,
|
|
374
|
+
usage=usage_dict,
|
|
375
|
+
)
|
|
376
|
+
)
|
|
377
|
+
|
|
378
|
+
if self.reranker:
|
|
379
|
+
search_results = self.reranker.rerank(query=query, documents=search_results)
|
|
380
|
+
|
|
381
|
+
return search_results
|
|
382
|
+
|
|
383
|
+
def drop(self) -> None:
|
|
384
|
+
"""
|
|
385
|
+
Delete the table.
|
|
386
|
+
"""
|
|
387
|
+
if self.table_exists():
|
|
388
|
+
log_debug(f"Deleting table: {self.collection}")
|
|
389
|
+
self.table.drop(self.db_engine)
|
|
390
|
+
|
|
391
|
+
def exists(self) -> bool:
|
|
392
|
+
"""
|
|
393
|
+
Check if the table exists.
|
|
394
|
+
|
|
395
|
+
Returns:
|
|
396
|
+
bool: True if the table exists, False otherwise.
|
|
397
|
+
"""
|
|
398
|
+
return self.table_exists()
|
|
399
|
+
|
|
400
|
+
def get_count(self) -> int:
|
|
401
|
+
"""
|
|
402
|
+
Get the count of rows in the table.
|
|
403
|
+
|
|
404
|
+
Returns:
|
|
405
|
+
int: The count of rows.
|
|
406
|
+
"""
|
|
407
|
+
with self.Session.begin() as sess:
|
|
408
|
+
stmt = select(func.count(self.table.c.name)).select_from(self.table)
|
|
409
|
+
result = sess.execute(stmt).scalar()
|
|
410
|
+
if result is not None:
|
|
411
|
+
return int(result)
|
|
412
|
+
return 0
|
|
413
|
+
|
|
414
|
+
def optimize(self) -> None:
|
|
415
|
+
pass
|
|
416
|
+
|
|
417
|
+
def delete(self) -> bool:
|
|
418
|
+
"""
|
|
419
|
+
Clear all rows from the table.
|
|
420
|
+
|
|
421
|
+
Returns:
|
|
422
|
+
bool: True if the table was cleared, False otherwise.
|
|
423
|
+
"""
|
|
424
|
+
from sqlalchemy import delete
|
|
425
|
+
|
|
426
|
+
with self.Session.begin() as sess:
|
|
427
|
+
stmt = delete(self.table)
|
|
428
|
+
sess.execute(stmt)
|
|
429
|
+
return True
|
|
430
|
+
|
|
431
|
+
def delete_by_id(self, id: str) -> bool:
|
|
432
|
+
"""
|
|
433
|
+
Delete a document by its ID.
|
|
434
|
+
"""
|
|
435
|
+
from sqlalchemy import delete
|
|
436
|
+
|
|
437
|
+
try:
|
|
438
|
+
with self.Session.begin() as sess:
|
|
439
|
+
stmt = delete(self.table).where(self.table.c.id == id)
|
|
440
|
+
result = sess.execute(stmt) # type: ignore
|
|
441
|
+
log_info(f"Deleted {result.rowcount} records with ID {id} from table '{self.table.name}'.") # type: ignore
|
|
442
|
+
return result.rowcount > 0 # type: ignore
|
|
443
|
+
except Exception as e:
|
|
444
|
+
log_error(f"Error deleting document with ID {id}: {e}")
|
|
445
|
+
return False
|
|
446
|
+
|
|
447
|
+
def delete_by_content_id(self, content_id: str) -> bool:
|
|
448
|
+
"""
|
|
449
|
+
Delete a document by its content ID.
|
|
450
|
+
"""
|
|
451
|
+
from sqlalchemy import delete
|
|
452
|
+
|
|
453
|
+
try:
|
|
454
|
+
with self.Session.begin() as sess:
|
|
455
|
+
stmt = delete(self.table).where(self.table.c.content_id == content_id)
|
|
456
|
+
result = sess.execute(stmt) # type: ignore
|
|
457
|
+
log_info(
|
|
458
|
+
f"Deleted {result.rowcount} records with content_id {content_id} from table '{self.table.name}'." # type: ignore
|
|
459
|
+
)
|
|
460
|
+
return result.rowcount > 0 # type: ignore
|
|
461
|
+
except Exception as e:
|
|
462
|
+
log_error(f"Error deleting document with content_id {content_id}: {e}")
|
|
463
|
+
return False
|
|
464
|
+
|
|
465
|
+
def delete_by_name(self, name: str) -> bool:
|
|
466
|
+
"""
|
|
467
|
+
Delete a document by its name.
|
|
468
|
+
"""
|
|
469
|
+
from sqlalchemy import delete
|
|
470
|
+
|
|
471
|
+
try:
|
|
472
|
+
with self.Session.begin() as sess:
|
|
473
|
+
stmt = delete(self.table).where(self.table.c.name == name)
|
|
474
|
+
result = sess.execute(stmt) # type: ignore
|
|
475
|
+
log_info(f"Deleted {result.rowcount} records with name '{name}' from table '{self.table.name}'.") # type: ignore
|
|
476
|
+
return result.rowcount > 0 # type: ignore
|
|
477
|
+
except Exception as e:
|
|
478
|
+
log_error(f"Error deleting document with name {name}: {e}")
|
|
479
|
+
return False
|
|
480
|
+
|
|
481
|
+
def delete_by_metadata(self, metadata: Dict[str, Any]) -> bool:
|
|
482
|
+
"""
|
|
483
|
+
Delete documents by metadata.
|
|
484
|
+
"""
|
|
485
|
+
from sqlalchemy import delete
|
|
486
|
+
|
|
487
|
+
try:
|
|
488
|
+
with self.Session.begin() as sess:
|
|
489
|
+
# Convert metadata to JSON string for comparison
|
|
490
|
+
metadata_json = json.dumps(metadata, sort_keys=True)
|
|
491
|
+
stmt = delete(self.table).where(self.table.c.meta_data == metadata_json)
|
|
492
|
+
result = sess.execute(stmt) # type: ignore
|
|
493
|
+
log_info(f"Deleted {result.rowcount} records with metadata {metadata} from table '{self.table.name}'.") # type: ignore
|
|
494
|
+
return result.rowcount > 0 # type: ignore
|
|
495
|
+
except Exception as e:
|
|
496
|
+
log_error(f"Error deleting documents with metadata {metadata}: {e}")
|
|
497
|
+
return False
|
|
498
|
+
|
|
499
|
+
async def async_create(self) -> None:
|
|
500
|
+
raise NotImplementedError(f"Async not supported on {self.__class__.__name__}.")
|
|
501
|
+
|
|
502
|
+
async def async_insert(
|
|
503
|
+
self,
|
|
504
|
+
content_hash: str,
|
|
505
|
+
documents: List[Document],
|
|
506
|
+
filters: Optional[Dict[str, Any]] = None,
|
|
507
|
+
) -> None:
|
|
508
|
+
if self.embedder.enable_batch and hasattr(self.embedder, "async_get_embeddings_batch_and_usage"):
|
|
509
|
+
# Use batch embedding when enabled and supported
|
|
510
|
+
try:
|
|
511
|
+
# Extract content from all documents
|
|
512
|
+
doc_contents = [doc.content for doc in documents]
|
|
513
|
+
|
|
514
|
+
# Get batch embeddings and usage
|
|
515
|
+
embeddings, usages = await self.embedder.async_get_embeddings_batch_and_usage(doc_contents)
|
|
516
|
+
|
|
517
|
+
# Process documents with pre-computed embeddings
|
|
518
|
+
for j, doc in enumerate(documents):
|
|
519
|
+
try:
|
|
520
|
+
if j < len(embeddings):
|
|
521
|
+
doc.embedding = embeddings[j]
|
|
522
|
+
doc.usage = usages[j] if j < len(usages) else None
|
|
523
|
+
except Exception as e:
|
|
524
|
+
log_error(f"Error assigning batch embedding to document '{doc.name}': {e}")
|
|
525
|
+
|
|
526
|
+
except Exception as e:
|
|
527
|
+
# Check if this is a rate limit error - don't fall back as it would make things worse
|
|
528
|
+
error_str = str(e).lower()
|
|
529
|
+
is_rate_limit = any(
|
|
530
|
+
phrase in error_str
|
|
531
|
+
for phrase in ["rate limit", "too many requests", "429", "trial key", "api calls / minute"]
|
|
532
|
+
)
|
|
533
|
+
|
|
534
|
+
if is_rate_limit:
|
|
535
|
+
log_error(f"Rate limit detected during batch embedding. {e}")
|
|
536
|
+
raise e
|
|
537
|
+
else:
|
|
538
|
+
log_error(f"Async batch embedding failed, falling back to individual embeddings: {e}")
|
|
539
|
+
# Fall back to individual embedding
|
|
540
|
+
embed_tasks = [doc.async_embed(embedder=self.embedder) for doc in documents]
|
|
541
|
+
await asyncio.gather(*embed_tasks, return_exceptions=True)
|
|
542
|
+
else:
|
|
543
|
+
# Use individual embedding
|
|
544
|
+
embed_tasks = [document.async_embed(embedder=self.embedder) for document in documents]
|
|
545
|
+
await asyncio.gather(*embed_tasks, return_exceptions=True)
|
|
546
|
+
|
|
547
|
+
with self.Session.begin() as sess:
|
|
548
|
+
counter = 0
|
|
549
|
+
for document in documents:
|
|
550
|
+
cleaned_content = document.content.replace("\x00", "\ufffd")
|
|
551
|
+
record_id = md5(cleaned_content.encode()).hexdigest()
|
|
552
|
+
_id = document.id or record_id
|
|
553
|
+
|
|
554
|
+
meta_data_json = json.dumps(document.meta_data)
|
|
555
|
+
usage_json = json.dumps(document.usage)
|
|
556
|
+
|
|
557
|
+
# Convert embedding list to SingleStore VECTOR format
|
|
558
|
+
embeddings = f"[{','.join(map(str, document.embedding))}]" if document.embedding else None
|
|
559
|
+
|
|
560
|
+
stmt = mysql.insert(self.table).values(
|
|
561
|
+
id=_id,
|
|
562
|
+
name=document.name,
|
|
563
|
+
meta_data=meta_data_json,
|
|
564
|
+
content=cleaned_content,
|
|
565
|
+
embedding=embeddings,
|
|
566
|
+
usage=usage_json,
|
|
567
|
+
content_hash=content_hash,
|
|
568
|
+
content_id=document.content_id,
|
|
569
|
+
)
|
|
570
|
+
sess.execute(stmt)
|
|
571
|
+
counter += 1
|
|
572
|
+
log_debug(f"Inserted document: {document.name} ({document.meta_data})")
|
|
573
|
+
|
|
574
|
+
sess.commit()
|
|
575
|
+
log_debug(f"Committed {counter} documents")
|
|
576
|
+
|
|
577
|
+
async def async_upsert(
|
|
578
|
+
self,
|
|
579
|
+
content_hash: str,
|
|
580
|
+
documents: List[Document],
|
|
581
|
+
filters: Optional[Dict[str, Any]] = None,
|
|
582
|
+
) -> None:
|
|
583
|
+
"""
|
|
584
|
+
Upsert (insert or update) documents in the table.
|
|
585
|
+
|
|
586
|
+
Args:
|
|
587
|
+
documents (List[Document]): List of documents to upsert.
|
|
588
|
+
filters (Optional[Dict[str, Any]]): Optional filters for the upsert.
|
|
589
|
+
batch_size (int): Number of documents to upsert in each batch.
|
|
590
|
+
"""
|
|
591
|
+
|
|
592
|
+
if self.embedder.enable_batch and hasattr(self.embedder, "async_get_embeddings_batch_and_usage"):
|
|
593
|
+
# Use batch embedding when enabled and supported
|
|
594
|
+
try:
|
|
595
|
+
# Extract content from all documents
|
|
596
|
+
doc_contents = [doc.content for doc in documents]
|
|
597
|
+
|
|
598
|
+
# Get batch embeddings and usage
|
|
599
|
+
embeddings, usages = await self.embedder.async_get_embeddings_batch_and_usage(doc_contents)
|
|
600
|
+
|
|
601
|
+
# Process documents with pre-computed embeddings
|
|
602
|
+
for j, doc in enumerate(documents):
|
|
603
|
+
try:
|
|
604
|
+
if j < len(embeddings):
|
|
605
|
+
doc.embedding = embeddings[j]
|
|
606
|
+
doc.usage = usages[j] if j < len(usages) else None
|
|
607
|
+
except Exception as e:
|
|
608
|
+
log_error(f"Error assigning batch embedding to document '{doc.name}': {e}")
|
|
609
|
+
|
|
610
|
+
except Exception as e:
|
|
611
|
+
# Check if this is a rate limit error - don't fall back as it would make things worse
|
|
612
|
+
error_str = str(e).lower()
|
|
613
|
+
is_rate_limit = any(
|
|
614
|
+
phrase in error_str
|
|
615
|
+
for phrase in ["rate limit", "too many requests", "429", "trial key", "api calls / minute"]
|
|
616
|
+
)
|
|
617
|
+
|
|
618
|
+
if is_rate_limit:
|
|
619
|
+
log_error(f"Rate limit detected during batch embedding. {e}")
|
|
620
|
+
raise e
|
|
621
|
+
else:
|
|
622
|
+
log_error(f"Async batch embedding failed, falling back to individual embeddings: {e}")
|
|
623
|
+
# Fall back to individual embedding
|
|
624
|
+
embed_tasks = [doc.async_embed(embedder=self.embedder) for doc in documents]
|
|
625
|
+
await asyncio.gather(*embed_tasks, return_exceptions=True)
|
|
626
|
+
else:
|
|
627
|
+
# Use individual embedding
|
|
628
|
+
embed_tasks = [document.async_embed(embedder=self.embedder) for document in documents]
|
|
629
|
+
await asyncio.gather(*embed_tasks, return_exceptions=True)
|
|
630
|
+
|
|
631
|
+
with self.Session.begin() as sess:
|
|
632
|
+
counter = 0
|
|
633
|
+
for document in documents:
|
|
634
|
+
cleaned_content = document.content.replace("\x00", "\ufffd")
|
|
635
|
+
record_id = md5(cleaned_content.encode()).hexdigest()
|
|
636
|
+
_id = document.id or record_id
|
|
637
|
+
|
|
638
|
+
meta_data_json = json.dumps(document.meta_data)
|
|
639
|
+
usage_json = json.dumps(document.usage)
|
|
640
|
+
|
|
641
|
+
# Convert embedding list to SingleStore VECTOR format
|
|
642
|
+
embeddings = f"[{','.join(map(str, document.embedding))}]" if document.embedding else None
|
|
643
|
+
stmt = (
|
|
644
|
+
mysql.insert(self.table)
|
|
645
|
+
.values(
|
|
646
|
+
id=_id,
|
|
647
|
+
name=document.name,
|
|
648
|
+
meta_data=meta_data_json,
|
|
649
|
+
content=cleaned_content,
|
|
650
|
+
embedding=embeddings,
|
|
651
|
+
usage=usage_json,
|
|
652
|
+
content_hash=content_hash,
|
|
653
|
+
content_id=document.content_id,
|
|
654
|
+
)
|
|
655
|
+
.on_duplicate_key_update(
|
|
656
|
+
name=document.name,
|
|
657
|
+
meta_data=meta_data_json,
|
|
658
|
+
content=cleaned_content,
|
|
659
|
+
embedding=embeddings,
|
|
660
|
+
usage=usage_json,
|
|
661
|
+
content_hash=content_hash,
|
|
662
|
+
content_id=document.content_id,
|
|
663
|
+
)
|
|
664
|
+
)
|
|
665
|
+
sess.execute(stmt)
|
|
666
|
+
counter += 1
|
|
667
|
+
log_debug(f"Upserted document: {document.name} ({document.meta_data})")
|
|
668
|
+
|
|
669
|
+
sess.commit()
|
|
670
|
+
log_debug(f"Committed {counter} documents")
|
|
671
|
+
|
|
672
|
+
async def async_search(
|
|
673
|
+
self, query: str, limit: int = 5, filters: Optional[Union[Dict[str, Any], List[FilterExpr]]] = None
|
|
674
|
+
) -> List[Document]:
|
|
675
|
+
return self.search(query=query, limit=limit, filters=filters)
|
|
676
|
+
|
|
677
|
+
async def async_drop(self) -> None:
|
|
678
|
+
raise NotImplementedError(f"Async not supported on {self.__class__.__name__}.")
|
|
679
|
+
|
|
680
|
+
async def async_exists(self) -> bool:
|
|
681
|
+
raise NotImplementedError(f"Async not supported on {self.__class__.__name__}.")
|
|
682
|
+
|
|
683
|
+
async def async_name_exists(self, name: str) -> bool:
|
|
684
|
+
raise NotImplementedError(f"Async not supported on {self.__class__.__name__}.")
|
|
685
|
+
|
|
686
|
+
def _delete_by_content_hash(self, content_hash: str) -> bool:
|
|
687
|
+
"""
|
|
688
|
+
Delete documents by their content hash.
|
|
689
|
+
|
|
690
|
+
Args:
|
|
691
|
+
content_hash (str): The content hash to delete.
|
|
692
|
+
|
|
693
|
+
Returns:
|
|
694
|
+
bool: True if documents were deleted, False otherwise.
|
|
695
|
+
"""
|
|
696
|
+
from sqlalchemy import delete
|
|
697
|
+
|
|
698
|
+
try:
|
|
699
|
+
with self.Session.begin() as sess:
|
|
700
|
+
stmt = delete(self.table).where(self.table.c.content_hash == content_hash)
|
|
701
|
+
result = sess.execute(stmt) # type: ignore
|
|
702
|
+
log_info(
|
|
703
|
+
f"Deleted {result.rowcount} records with content_hash '{content_hash}' from table '{self.table.name}'." # type: ignore
|
|
704
|
+
)
|
|
705
|
+
return result.rowcount > 0 # type: ignore
|
|
706
|
+
except Exception as e:
|
|
707
|
+
log_error(f"Error deleting documents with content_hash {content_hash}: {e}")
|
|
708
|
+
return False
|
|
709
|
+
|
|
710
|
+
def update_metadata(self, content_id: str, metadata: Dict[str, Any]) -> None:
|
|
711
|
+
"""
|
|
712
|
+
Update the metadata for documents with the given content_id.
|
|
713
|
+
|
|
714
|
+
Args:
|
|
715
|
+
content_id (str): The content ID to update
|
|
716
|
+
metadata (Dict[str, Any]): The metadata to update
|
|
717
|
+
"""
|
|
718
|
+
import json
|
|
719
|
+
|
|
720
|
+
try:
|
|
721
|
+
with self.Session.begin() as sess:
|
|
722
|
+
# Find documents with the given content_id
|
|
723
|
+
stmt = select(self.table).where(self.table.c.content_id == content_id)
|
|
724
|
+
result = sess.execute(stmt) # type: ignore
|
|
725
|
+
|
|
726
|
+
updated_count = 0
|
|
727
|
+
for row in result:
|
|
728
|
+
# Parse existing metadata
|
|
729
|
+
current_metadata = json.loads(row.meta_data) if row.meta_data else {}
|
|
730
|
+
|
|
731
|
+
# Merge existing metadata with new metadata
|
|
732
|
+
updated_metadata = current_metadata.copy()
|
|
733
|
+
updated_metadata.update(metadata)
|
|
734
|
+
|
|
735
|
+
# Also update filters field within the metadata JSON
|
|
736
|
+
if "filters" not in updated_metadata:
|
|
737
|
+
updated_metadata["filters"] = {}
|
|
738
|
+
if isinstance(updated_metadata["filters"], dict):
|
|
739
|
+
updated_metadata["filters"].update(metadata)
|
|
740
|
+
else:
|
|
741
|
+
updated_metadata["filters"] = metadata
|
|
742
|
+
|
|
743
|
+
# Update the document (only meta_data column exists)
|
|
744
|
+
update_stmt = (
|
|
745
|
+
update(self.table)
|
|
746
|
+
.where(self.table.c.id == row.id)
|
|
747
|
+
.values(meta_data=json.dumps(updated_metadata))
|
|
748
|
+
)
|
|
749
|
+
sess.execute(update_stmt)
|
|
750
|
+
updated_count += 1
|
|
751
|
+
|
|
752
|
+
if updated_count == 0:
|
|
753
|
+
log_debug(f"No documents found with content_id: {content_id}")
|
|
754
|
+
else:
|
|
755
|
+
log_debug(f"Updated metadata for {updated_count} documents with content_id: {content_id}")
|
|
756
|
+
|
|
757
|
+
except Exception as e:
|
|
758
|
+
log_error(f"Error updating metadata for content_id '{content_id}': {e}")
|
|
759
|
+
raise
|
|
760
|
+
|
|
761
|
+
def get_supported_search_types(self) -> List[str]:
|
|
762
|
+
"""Get the supported search types for this vector database."""
|
|
763
|
+
return [] # SingleStore doesn't use SearchType enum
|