agno 2.2.13__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- agno/__init__.py +8 -0
- agno/agent/__init__.py +51 -0
- agno/agent/agent.py +10405 -0
- agno/api/__init__.py +0 -0
- agno/api/agent.py +28 -0
- agno/api/api.py +40 -0
- agno/api/evals.py +22 -0
- agno/api/os.py +17 -0
- agno/api/routes.py +13 -0
- agno/api/schemas/__init__.py +9 -0
- agno/api/schemas/agent.py +16 -0
- agno/api/schemas/evals.py +16 -0
- agno/api/schemas/os.py +14 -0
- agno/api/schemas/response.py +6 -0
- agno/api/schemas/team.py +16 -0
- agno/api/schemas/utils.py +21 -0
- agno/api/schemas/workflows.py +16 -0
- agno/api/settings.py +53 -0
- agno/api/team.py +30 -0
- agno/api/workflow.py +28 -0
- agno/cloud/aws/base.py +214 -0
- agno/cloud/aws/s3/__init__.py +2 -0
- agno/cloud/aws/s3/api_client.py +43 -0
- agno/cloud/aws/s3/bucket.py +195 -0
- agno/cloud/aws/s3/object.py +57 -0
- agno/culture/__init__.py +3 -0
- agno/culture/manager.py +956 -0
- agno/db/__init__.py +24 -0
- agno/db/async_postgres/__init__.py +3 -0
- agno/db/base.py +598 -0
- agno/db/dynamo/__init__.py +3 -0
- agno/db/dynamo/dynamo.py +2042 -0
- agno/db/dynamo/schemas.py +314 -0
- agno/db/dynamo/utils.py +743 -0
- agno/db/firestore/__init__.py +3 -0
- agno/db/firestore/firestore.py +1795 -0
- agno/db/firestore/schemas.py +140 -0
- agno/db/firestore/utils.py +376 -0
- agno/db/gcs_json/__init__.py +3 -0
- agno/db/gcs_json/gcs_json_db.py +1335 -0
- agno/db/gcs_json/utils.py +228 -0
- agno/db/in_memory/__init__.py +3 -0
- agno/db/in_memory/in_memory_db.py +1160 -0
- agno/db/in_memory/utils.py +230 -0
- agno/db/json/__init__.py +3 -0
- agno/db/json/json_db.py +1328 -0
- agno/db/json/utils.py +230 -0
- agno/db/migrations/__init__.py +0 -0
- agno/db/migrations/v1_to_v2.py +635 -0
- agno/db/mongo/__init__.py +17 -0
- agno/db/mongo/async_mongo.py +2026 -0
- agno/db/mongo/mongo.py +1982 -0
- agno/db/mongo/schemas.py +87 -0
- agno/db/mongo/utils.py +259 -0
- agno/db/mysql/__init__.py +3 -0
- agno/db/mysql/mysql.py +2308 -0
- agno/db/mysql/schemas.py +138 -0
- agno/db/mysql/utils.py +355 -0
- agno/db/postgres/__init__.py +4 -0
- agno/db/postgres/async_postgres.py +1927 -0
- agno/db/postgres/postgres.py +2260 -0
- agno/db/postgres/schemas.py +139 -0
- agno/db/postgres/utils.py +442 -0
- agno/db/redis/__init__.py +3 -0
- agno/db/redis/redis.py +1660 -0
- agno/db/redis/schemas.py +123 -0
- agno/db/redis/utils.py +346 -0
- agno/db/schemas/__init__.py +4 -0
- agno/db/schemas/culture.py +120 -0
- agno/db/schemas/evals.py +33 -0
- agno/db/schemas/knowledge.py +40 -0
- agno/db/schemas/memory.py +46 -0
- agno/db/schemas/metrics.py +0 -0
- agno/db/singlestore/__init__.py +3 -0
- agno/db/singlestore/schemas.py +130 -0
- agno/db/singlestore/singlestore.py +2272 -0
- agno/db/singlestore/utils.py +384 -0
- agno/db/sqlite/__init__.py +4 -0
- agno/db/sqlite/async_sqlite.py +2293 -0
- agno/db/sqlite/schemas.py +133 -0
- agno/db/sqlite/sqlite.py +2288 -0
- agno/db/sqlite/utils.py +431 -0
- agno/db/surrealdb/__init__.py +3 -0
- agno/db/surrealdb/metrics.py +292 -0
- agno/db/surrealdb/models.py +309 -0
- agno/db/surrealdb/queries.py +71 -0
- agno/db/surrealdb/surrealdb.py +1353 -0
- agno/db/surrealdb/utils.py +147 -0
- agno/db/utils.py +116 -0
- agno/debug.py +18 -0
- agno/eval/__init__.py +14 -0
- agno/eval/accuracy.py +834 -0
- agno/eval/performance.py +773 -0
- agno/eval/reliability.py +306 -0
- agno/eval/utils.py +119 -0
- agno/exceptions.py +161 -0
- agno/filters.py +354 -0
- agno/guardrails/__init__.py +6 -0
- agno/guardrails/base.py +19 -0
- agno/guardrails/openai.py +144 -0
- agno/guardrails/pii.py +94 -0
- agno/guardrails/prompt_injection.py +52 -0
- agno/integrations/__init__.py +0 -0
- agno/integrations/discord/__init__.py +3 -0
- agno/integrations/discord/client.py +203 -0
- agno/knowledge/__init__.py +5 -0
- agno/knowledge/chunking/__init__.py +0 -0
- agno/knowledge/chunking/agentic.py +79 -0
- agno/knowledge/chunking/document.py +91 -0
- agno/knowledge/chunking/fixed.py +57 -0
- agno/knowledge/chunking/markdown.py +151 -0
- agno/knowledge/chunking/recursive.py +63 -0
- agno/knowledge/chunking/row.py +39 -0
- agno/knowledge/chunking/semantic.py +86 -0
- agno/knowledge/chunking/strategy.py +165 -0
- agno/knowledge/content.py +74 -0
- agno/knowledge/document/__init__.py +5 -0
- agno/knowledge/document/base.py +58 -0
- agno/knowledge/embedder/__init__.py +5 -0
- agno/knowledge/embedder/aws_bedrock.py +343 -0
- agno/knowledge/embedder/azure_openai.py +210 -0
- agno/knowledge/embedder/base.py +23 -0
- agno/knowledge/embedder/cohere.py +323 -0
- agno/knowledge/embedder/fastembed.py +62 -0
- agno/knowledge/embedder/fireworks.py +13 -0
- agno/knowledge/embedder/google.py +258 -0
- agno/knowledge/embedder/huggingface.py +94 -0
- agno/knowledge/embedder/jina.py +182 -0
- agno/knowledge/embedder/langdb.py +22 -0
- agno/knowledge/embedder/mistral.py +206 -0
- agno/knowledge/embedder/nebius.py +13 -0
- agno/knowledge/embedder/ollama.py +154 -0
- agno/knowledge/embedder/openai.py +195 -0
- agno/knowledge/embedder/sentence_transformer.py +63 -0
- agno/knowledge/embedder/together.py +13 -0
- agno/knowledge/embedder/vllm.py +262 -0
- agno/knowledge/embedder/voyageai.py +165 -0
- agno/knowledge/knowledge.py +1988 -0
- agno/knowledge/reader/__init__.py +7 -0
- agno/knowledge/reader/arxiv_reader.py +81 -0
- agno/knowledge/reader/base.py +95 -0
- agno/knowledge/reader/csv_reader.py +166 -0
- agno/knowledge/reader/docx_reader.py +82 -0
- agno/knowledge/reader/field_labeled_csv_reader.py +292 -0
- agno/knowledge/reader/firecrawl_reader.py +201 -0
- agno/knowledge/reader/json_reader.py +87 -0
- agno/knowledge/reader/markdown_reader.py +137 -0
- agno/knowledge/reader/pdf_reader.py +431 -0
- agno/knowledge/reader/pptx_reader.py +101 -0
- agno/knowledge/reader/reader_factory.py +313 -0
- agno/knowledge/reader/s3_reader.py +89 -0
- agno/knowledge/reader/tavily_reader.py +194 -0
- agno/knowledge/reader/text_reader.py +115 -0
- agno/knowledge/reader/web_search_reader.py +372 -0
- agno/knowledge/reader/website_reader.py +455 -0
- agno/knowledge/reader/wikipedia_reader.py +59 -0
- agno/knowledge/reader/youtube_reader.py +78 -0
- agno/knowledge/remote_content/__init__.py +0 -0
- agno/knowledge/remote_content/remote_content.py +88 -0
- agno/knowledge/reranker/__init__.py +3 -0
- agno/knowledge/reranker/base.py +14 -0
- agno/knowledge/reranker/cohere.py +64 -0
- agno/knowledge/reranker/infinity.py +195 -0
- agno/knowledge/reranker/sentence_transformer.py +54 -0
- agno/knowledge/types.py +39 -0
- agno/knowledge/utils.py +189 -0
- agno/media.py +462 -0
- agno/memory/__init__.py +3 -0
- agno/memory/manager.py +1327 -0
- agno/models/__init__.py +0 -0
- agno/models/aimlapi/__init__.py +5 -0
- agno/models/aimlapi/aimlapi.py +45 -0
- agno/models/anthropic/__init__.py +5 -0
- agno/models/anthropic/claude.py +757 -0
- agno/models/aws/__init__.py +15 -0
- agno/models/aws/bedrock.py +701 -0
- agno/models/aws/claude.py +378 -0
- agno/models/azure/__init__.py +18 -0
- agno/models/azure/ai_foundry.py +485 -0
- agno/models/azure/openai_chat.py +131 -0
- agno/models/base.py +2175 -0
- agno/models/cerebras/__init__.py +12 -0
- agno/models/cerebras/cerebras.py +501 -0
- agno/models/cerebras/cerebras_openai.py +112 -0
- agno/models/cohere/__init__.py +5 -0
- agno/models/cohere/chat.py +389 -0
- agno/models/cometapi/__init__.py +5 -0
- agno/models/cometapi/cometapi.py +57 -0
- agno/models/dashscope/__init__.py +5 -0
- agno/models/dashscope/dashscope.py +91 -0
- agno/models/deepinfra/__init__.py +5 -0
- agno/models/deepinfra/deepinfra.py +28 -0
- agno/models/deepseek/__init__.py +5 -0
- agno/models/deepseek/deepseek.py +61 -0
- agno/models/defaults.py +1 -0
- agno/models/fireworks/__init__.py +5 -0
- agno/models/fireworks/fireworks.py +26 -0
- agno/models/google/__init__.py +5 -0
- agno/models/google/gemini.py +1085 -0
- agno/models/groq/__init__.py +5 -0
- agno/models/groq/groq.py +556 -0
- agno/models/huggingface/__init__.py +5 -0
- agno/models/huggingface/huggingface.py +491 -0
- agno/models/ibm/__init__.py +5 -0
- agno/models/ibm/watsonx.py +422 -0
- agno/models/internlm/__init__.py +3 -0
- agno/models/internlm/internlm.py +26 -0
- agno/models/langdb/__init__.py +1 -0
- agno/models/langdb/langdb.py +48 -0
- agno/models/litellm/__init__.py +14 -0
- agno/models/litellm/chat.py +468 -0
- agno/models/litellm/litellm_openai.py +25 -0
- agno/models/llama_cpp/__init__.py +5 -0
- agno/models/llama_cpp/llama_cpp.py +22 -0
- agno/models/lmstudio/__init__.py +5 -0
- agno/models/lmstudio/lmstudio.py +25 -0
- agno/models/message.py +434 -0
- agno/models/meta/__init__.py +12 -0
- agno/models/meta/llama.py +475 -0
- agno/models/meta/llama_openai.py +78 -0
- agno/models/metrics.py +120 -0
- agno/models/mistral/__init__.py +5 -0
- agno/models/mistral/mistral.py +432 -0
- agno/models/nebius/__init__.py +3 -0
- agno/models/nebius/nebius.py +54 -0
- agno/models/nexus/__init__.py +3 -0
- agno/models/nexus/nexus.py +22 -0
- agno/models/nvidia/__init__.py +5 -0
- agno/models/nvidia/nvidia.py +28 -0
- agno/models/ollama/__init__.py +5 -0
- agno/models/ollama/chat.py +441 -0
- agno/models/openai/__init__.py +9 -0
- agno/models/openai/chat.py +883 -0
- agno/models/openai/like.py +27 -0
- agno/models/openai/responses.py +1050 -0
- agno/models/openrouter/__init__.py +5 -0
- agno/models/openrouter/openrouter.py +66 -0
- agno/models/perplexity/__init__.py +5 -0
- agno/models/perplexity/perplexity.py +187 -0
- agno/models/portkey/__init__.py +3 -0
- agno/models/portkey/portkey.py +81 -0
- agno/models/requesty/__init__.py +5 -0
- agno/models/requesty/requesty.py +52 -0
- agno/models/response.py +199 -0
- agno/models/sambanova/__init__.py +5 -0
- agno/models/sambanova/sambanova.py +28 -0
- agno/models/siliconflow/__init__.py +5 -0
- agno/models/siliconflow/siliconflow.py +25 -0
- agno/models/together/__init__.py +5 -0
- agno/models/together/together.py +25 -0
- agno/models/utils.py +266 -0
- agno/models/vercel/__init__.py +3 -0
- agno/models/vercel/v0.py +26 -0
- agno/models/vertexai/__init__.py +0 -0
- agno/models/vertexai/claude.py +70 -0
- agno/models/vllm/__init__.py +3 -0
- agno/models/vllm/vllm.py +78 -0
- agno/models/xai/__init__.py +3 -0
- agno/models/xai/xai.py +113 -0
- agno/os/__init__.py +3 -0
- agno/os/app.py +876 -0
- agno/os/auth.py +57 -0
- agno/os/config.py +104 -0
- agno/os/interfaces/__init__.py +1 -0
- agno/os/interfaces/a2a/__init__.py +3 -0
- agno/os/interfaces/a2a/a2a.py +42 -0
- agno/os/interfaces/a2a/router.py +250 -0
- agno/os/interfaces/a2a/utils.py +924 -0
- agno/os/interfaces/agui/__init__.py +3 -0
- agno/os/interfaces/agui/agui.py +47 -0
- agno/os/interfaces/agui/router.py +144 -0
- agno/os/interfaces/agui/utils.py +534 -0
- agno/os/interfaces/base.py +25 -0
- agno/os/interfaces/slack/__init__.py +3 -0
- agno/os/interfaces/slack/router.py +148 -0
- agno/os/interfaces/slack/security.py +30 -0
- agno/os/interfaces/slack/slack.py +47 -0
- agno/os/interfaces/whatsapp/__init__.py +3 -0
- agno/os/interfaces/whatsapp/router.py +211 -0
- agno/os/interfaces/whatsapp/security.py +53 -0
- agno/os/interfaces/whatsapp/whatsapp.py +36 -0
- agno/os/mcp.py +292 -0
- agno/os/middleware/__init__.py +7 -0
- agno/os/middleware/jwt.py +233 -0
- agno/os/router.py +1763 -0
- agno/os/routers/__init__.py +3 -0
- agno/os/routers/evals/__init__.py +3 -0
- agno/os/routers/evals/evals.py +430 -0
- agno/os/routers/evals/schemas.py +142 -0
- agno/os/routers/evals/utils.py +162 -0
- agno/os/routers/health.py +31 -0
- agno/os/routers/home.py +52 -0
- agno/os/routers/knowledge/__init__.py +3 -0
- agno/os/routers/knowledge/knowledge.py +997 -0
- agno/os/routers/knowledge/schemas.py +178 -0
- agno/os/routers/memory/__init__.py +3 -0
- agno/os/routers/memory/memory.py +515 -0
- agno/os/routers/memory/schemas.py +62 -0
- agno/os/routers/metrics/__init__.py +3 -0
- agno/os/routers/metrics/metrics.py +190 -0
- agno/os/routers/metrics/schemas.py +47 -0
- agno/os/routers/session/__init__.py +3 -0
- agno/os/routers/session/session.py +997 -0
- agno/os/schema.py +1055 -0
- agno/os/settings.py +43 -0
- agno/os/utils.py +630 -0
- agno/py.typed +0 -0
- agno/reasoning/__init__.py +0 -0
- agno/reasoning/anthropic.py +80 -0
- agno/reasoning/azure_ai_foundry.py +67 -0
- agno/reasoning/deepseek.py +63 -0
- agno/reasoning/default.py +97 -0
- agno/reasoning/gemini.py +73 -0
- agno/reasoning/groq.py +71 -0
- agno/reasoning/helpers.py +63 -0
- agno/reasoning/ollama.py +67 -0
- agno/reasoning/openai.py +86 -0
- agno/reasoning/step.py +31 -0
- agno/reasoning/vertexai.py +76 -0
- agno/run/__init__.py +6 -0
- agno/run/agent.py +787 -0
- agno/run/base.py +229 -0
- agno/run/cancel.py +81 -0
- agno/run/messages.py +32 -0
- agno/run/team.py +753 -0
- agno/run/workflow.py +708 -0
- agno/session/__init__.py +10 -0
- agno/session/agent.py +295 -0
- agno/session/summary.py +265 -0
- agno/session/team.py +392 -0
- agno/session/workflow.py +205 -0
- agno/team/__init__.py +37 -0
- agno/team/team.py +8793 -0
- agno/tools/__init__.py +10 -0
- agno/tools/agentql.py +120 -0
- agno/tools/airflow.py +69 -0
- agno/tools/api.py +122 -0
- agno/tools/apify.py +314 -0
- agno/tools/arxiv.py +127 -0
- agno/tools/aws_lambda.py +53 -0
- agno/tools/aws_ses.py +66 -0
- agno/tools/baidusearch.py +89 -0
- agno/tools/bitbucket.py +292 -0
- agno/tools/brandfetch.py +213 -0
- agno/tools/bravesearch.py +106 -0
- agno/tools/brightdata.py +367 -0
- agno/tools/browserbase.py +209 -0
- agno/tools/calcom.py +255 -0
- agno/tools/calculator.py +151 -0
- agno/tools/cartesia.py +187 -0
- agno/tools/clickup.py +244 -0
- agno/tools/confluence.py +240 -0
- agno/tools/crawl4ai.py +158 -0
- agno/tools/csv_toolkit.py +185 -0
- agno/tools/dalle.py +110 -0
- agno/tools/daytona.py +475 -0
- agno/tools/decorator.py +262 -0
- agno/tools/desi_vocal.py +108 -0
- agno/tools/discord.py +161 -0
- agno/tools/docker.py +716 -0
- agno/tools/duckdb.py +379 -0
- agno/tools/duckduckgo.py +91 -0
- agno/tools/e2b.py +703 -0
- agno/tools/eleven_labs.py +196 -0
- agno/tools/email.py +67 -0
- agno/tools/evm.py +129 -0
- agno/tools/exa.py +396 -0
- agno/tools/fal.py +127 -0
- agno/tools/file.py +240 -0
- agno/tools/file_generation.py +350 -0
- agno/tools/financial_datasets.py +288 -0
- agno/tools/firecrawl.py +143 -0
- agno/tools/function.py +1187 -0
- agno/tools/giphy.py +93 -0
- agno/tools/github.py +1760 -0
- agno/tools/gmail.py +922 -0
- agno/tools/google_bigquery.py +117 -0
- agno/tools/google_drive.py +270 -0
- agno/tools/google_maps.py +253 -0
- agno/tools/googlecalendar.py +674 -0
- agno/tools/googlesearch.py +98 -0
- agno/tools/googlesheets.py +377 -0
- agno/tools/hackernews.py +77 -0
- agno/tools/jina.py +101 -0
- agno/tools/jira.py +170 -0
- agno/tools/knowledge.py +218 -0
- agno/tools/linear.py +426 -0
- agno/tools/linkup.py +58 -0
- agno/tools/local_file_system.py +90 -0
- agno/tools/lumalab.py +183 -0
- agno/tools/mcp/__init__.py +10 -0
- agno/tools/mcp/mcp.py +331 -0
- agno/tools/mcp/multi_mcp.py +347 -0
- agno/tools/mcp/params.py +24 -0
- agno/tools/mcp_toolbox.py +284 -0
- agno/tools/mem0.py +193 -0
- agno/tools/memori.py +339 -0
- agno/tools/memory.py +419 -0
- agno/tools/mlx_transcribe.py +139 -0
- agno/tools/models/__init__.py +0 -0
- agno/tools/models/azure_openai.py +190 -0
- agno/tools/models/gemini.py +203 -0
- agno/tools/models/groq.py +158 -0
- agno/tools/models/morph.py +186 -0
- agno/tools/models/nebius.py +124 -0
- agno/tools/models_labs.py +195 -0
- agno/tools/moviepy_video.py +349 -0
- agno/tools/neo4j.py +134 -0
- agno/tools/newspaper.py +46 -0
- agno/tools/newspaper4k.py +93 -0
- agno/tools/notion.py +204 -0
- agno/tools/openai.py +202 -0
- agno/tools/openbb.py +160 -0
- agno/tools/opencv.py +321 -0
- agno/tools/openweather.py +233 -0
- agno/tools/oxylabs.py +385 -0
- agno/tools/pandas.py +102 -0
- agno/tools/parallel.py +314 -0
- agno/tools/postgres.py +257 -0
- agno/tools/pubmed.py +188 -0
- agno/tools/python.py +205 -0
- agno/tools/reasoning.py +283 -0
- agno/tools/reddit.py +467 -0
- agno/tools/replicate.py +117 -0
- agno/tools/resend.py +62 -0
- agno/tools/scrapegraph.py +222 -0
- agno/tools/searxng.py +152 -0
- agno/tools/serpapi.py +116 -0
- agno/tools/serper.py +255 -0
- agno/tools/shell.py +53 -0
- agno/tools/slack.py +136 -0
- agno/tools/sleep.py +20 -0
- agno/tools/spider.py +116 -0
- agno/tools/sql.py +154 -0
- agno/tools/streamlit/__init__.py +0 -0
- agno/tools/streamlit/components.py +113 -0
- agno/tools/tavily.py +254 -0
- agno/tools/telegram.py +48 -0
- agno/tools/todoist.py +218 -0
- agno/tools/tool_registry.py +1 -0
- agno/tools/toolkit.py +146 -0
- agno/tools/trafilatura.py +388 -0
- agno/tools/trello.py +274 -0
- agno/tools/twilio.py +186 -0
- agno/tools/user_control_flow.py +78 -0
- agno/tools/valyu.py +228 -0
- agno/tools/visualization.py +467 -0
- agno/tools/webbrowser.py +28 -0
- agno/tools/webex.py +76 -0
- agno/tools/website.py +54 -0
- agno/tools/webtools.py +45 -0
- agno/tools/whatsapp.py +286 -0
- agno/tools/wikipedia.py +63 -0
- agno/tools/workflow.py +278 -0
- agno/tools/x.py +335 -0
- agno/tools/yfinance.py +257 -0
- agno/tools/youtube.py +184 -0
- agno/tools/zendesk.py +82 -0
- agno/tools/zep.py +454 -0
- agno/tools/zoom.py +382 -0
- agno/utils/__init__.py +0 -0
- agno/utils/agent.py +820 -0
- agno/utils/audio.py +49 -0
- agno/utils/certs.py +27 -0
- agno/utils/code_execution.py +11 -0
- agno/utils/common.py +132 -0
- agno/utils/dttm.py +13 -0
- agno/utils/enum.py +22 -0
- agno/utils/env.py +11 -0
- agno/utils/events.py +696 -0
- agno/utils/format_str.py +16 -0
- agno/utils/functions.py +166 -0
- agno/utils/gemini.py +426 -0
- agno/utils/hooks.py +57 -0
- agno/utils/http.py +74 -0
- agno/utils/json_schema.py +234 -0
- agno/utils/knowledge.py +36 -0
- agno/utils/location.py +19 -0
- agno/utils/log.py +255 -0
- agno/utils/mcp.py +214 -0
- agno/utils/media.py +352 -0
- agno/utils/merge_dict.py +41 -0
- agno/utils/message.py +118 -0
- agno/utils/models/__init__.py +0 -0
- agno/utils/models/ai_foundry.py +43 -0
- agno/utils/models/claude.py +358 -0
- agno/utils/models/cohere.py +87 -0
- agno/utils/models/llama.py +78 -0
- agno/utils/models/mistral.py +98 -0
- agno/utils/models/openai_responses.py +140 -0
- agno/utils/models/schema_utils.py +153 -0
- agno/utils/models/watsonx.py +41 -0
- agno/utils/openai.py +257 -0
- agno/utils/pickle.py +32 -0
- agno/utils/pprint.py +178 -0
- agno/utils/print_response/__init__.py +0 -0
- agno/utils/print_response/agent.py +842 -0
- agno/utils/print_response/team.py +1724 -0
- agno/utils/print_response/workflow.py +1668 -0
- agno/utils/prompts.py +111 -0
- agno/utils/reasoning.py +108 -0
- agno/utils/response.py +163 -0
- agno/utils/response_iterator.py +17 -0
- agno/utils/safe_formatter.py +24 -0
- agno/utils/serialize.py +32 -0
- agno/utils/shell.py +22 -0
- agno/utils/streamlit.py +487 -0
- agno/utils/string.py +231 -0
- agno/utils/team.py +139 -0
- agno/utils/timer.py +41 -0
- agno/utils/tools.py +102 -0
- agno/utils/web.py +23 -0
- agno/utils/whatsapp.py +305 -0
- agno/utils/yaml_io.py +25 -0
- agno/vectordb/__init__.py +3 -0
- agno/vectordb/base.py +127 -0
- agno/vectordb/cassandra/__init__.py +5 -0
- agno/vectordb/cassandra/cassandra.py +501 -0
- agno/vectordb/cassandra/extra_param_mixin.py +11 -0
- agno/vectordb/cassandra/index.py +13 -0
- agno/vectordb/chroma/__init__.py +5 -0
- agno/vectordb/chroma/chromadb.py +929 -0
- agno/vectordb/clickhouse/__init__.py +9 -0
- agno/vectordb/clickhouse/clickhousedb.py +835 -0
- agno/vectordb/clickhouse/index.py +9 -0
- agno/vectordb/couchbase/__init__.py +3 -0
- agno/vectordb/couchbase/couchbase.py +1442 -0
- agno/vectordb/distance.py +7 -0
- agno/vectordb/lancedb/__init__.py +6 -0
- agno/vectordb/lancedb/lance_db.py +995 -0
- agno/vectordb/langchaindb/__init__.py +5 -0
- agno/vectordb/langchaindb/langchaindb.py +163 -0
- agno/vectordb/lightrag/__init__.py +5 -0
- agno/vectordb/lightrag/lightrag.py +388 -0
- agno/vectordb/llamaindex/__init__.py +3 -0
- agno/vectordb/llamaindex/llamaindexdb.py +166 -0
- agno/vectordb/milvus/__init__.py +4 -0
- agno/vectordb/milvus/milvus.py +1182 -0
- agno/vectordb/mongodb/__init__.py +9 -0
- agno/vectordb/mongodb/mongodb.py +1417 -0
- agno/vectordb/pgvector/__init__.py +12 -0
- agno/vectordb/pgvector/index.py +23 -0
- agno/vectordb/pgvector/pgvector.py +1462 -0
- agno/vectordb/pineconedb/__init__.py +5 -0
- agno/vectordb/pineconedb/pineconedb.py +747 -0
- agno/vectordb/qdrant/__init__.py +5 -0
- agno/vectordb/qdrant/qdrant.py +1134 -0
- agno/vectordb/redis/__init__.py +9 -0
- agno/vectordb/redis/redisdb.py +694 -0
- agno/vectordb/search.py +7 -0
- agno/vectordb/singlestore/__init__.py +10 -0
- agno/vectordb/singlestore/index.py +41 -0
- agno/vectordb/singlestore/singlestore.py +763 -0
- agno/vectordb/surrealdb/__init__.py +3 -0
- agno/vectordb/surrealdb/surrealdb.py +699 -0
- agno/vectordb/upstashdb/__init__.py +5 -0
- agno/vectordb/upstashdb/upstashdb.py +718 -0
- agno/vectordb/weaviate/__init__.py +8 -0
- agno/vectordb/weaviate/index.py +15 -0
- agno/vectordb/weaviate/weaviate.py +1005 -0
- agno/workflow/__init__.py +23 -0
- agno/workflow/agent.py +299 -0
- agno/workflow/condition.py +738 -0
- agno/workflow/loop.py +735 -0
- agno/workflow/parallel.py +824 -0
- agno/workflow/router.py +702 -0
- agno/workflow/step.py +1432 -0
- agno/workflow/steps.py +592 -0
- agno/workflow/types.py +520 -0
- agno/workflow/workflow.py +4321 -0
- agno-2.2.13.dist-info/METADATA +614 -0
- agno-2.2.13.dist-info/RECORD +575 -0
- agno-2.2.13.dist-info/WHEEL +5 -0
- agno-2.2.13.dist-info/licenses/LICENSE +201 -0
- agno-2.2.13.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,747 @@
|
|
|
1
|
+
import asyncio
|
|
2
|
+
from typing import Any, Dict, List, Optional, Union
|
|
3
|
+
|
|
4
|
+
try:
|
|
5
|
+
from packaging import version
|
|
6
|
+
from pinecone import __version__
|
|
7
|
+
|
|
8
|
+
if version.parse(__version__).major >= 6:
|
|
9
|
+
import warnings
|
|
10
|
+
|
|
11
|
+
warnings.warn(
|
|
12
|
+
"We do not yet support Pinecone v6.x.x. We are actively working to achieve compatibility. "
|
|
13
|
+
"In the meantime, we recommend using Pinecone v5.4.2 for the best experience. Please run `pip install pinecone==5.4.2`",
|
|
14
|
+
UserWarning,
|
|
15
|
+
)
|
|
16
|
+
raise RuntimeError("Incompatible Pinecone version detected. Execution halted.")
|
|
17
|
+
|
|
18
|
+
from pinecone import Pinecone, PodSpec, ServerlessSpec
|
|
19
|
+
from pinecone.config import Config
|
|
20
|
+
|
|
21
|
+
except ImportError:
|
|
22
|
+
raise ImportError("The `pinecone` package is not installed, please install using `pip install pinecone`.")
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
from agno.filters import FilterExpr
|
|
26
|
+
from agno.knowledge.document import Document
|
|
27
|
+
from agno.knowledge.embedder import Embedder
|
|
28
|
+
from agno.knowledge.reranker.base import Reranker
|
|
29
|
+
from agno.utils.log import log_debug, log_info, log_warning, logger
|
|
30
|
+
from agno.vectordb.base import VectorDb
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
class PineconeDb(VectorDb):
|
|
34
|
+
"""A class representing a Pinecone database.
|
|
35
|
+
|
|
36
|
+
Args:
|
|
37
|
+
name (str): The name of the index.
|
|
38
|
+
dimension (int): The dimension of the embeddings.
|
|
39
|
+
spec (Union[Dict, ServerlessSpec, PodSpec]): The index spec.
|
|
40
|
+
metric (Optional[str], optional): The metric used for similarity search. Defaults to "cosine".
|
|
41
|
+
additional_headers (Optional[Dict[str, str]], optional): Additional headers to pass to the Pinecone client. Defaults to {}.
|
|
42
|
+
pool_threads (Optional[int], optional): The number of threads to use for the Pinecone client. Defaults to 1.
|
|
43
|
+
namespace: (Optional[str], optional): The namespace partition within the index that will be used. Defaults to None.
|
|
44
|
+
timeout (Optional[int], optional): The timeout for Pinecone operations. Defaults to None.
|
|
45
|
+
index_api (Optional[Any], optional): The Index API object. Defaults to None.
|
|
46
|
+
api_key (Optional[str], optional): The Pinecone API key. Defaults to None.
|
|
47
|
+
host (Optional[str], optional): The Pinecone host. Defaults to None.
|
|
48
|
+
config (Optional[Config], optional): The Pinecone config. Defaults to None.
|
|
49
|
+
**kwargs: Additional keyword arguments.
|
|
50
|
+
|
|
51
|
+
Attributes:
|
|
52
|
+
client (Pinecone): The Pinecone client.
|
|
53
|
+
index: The Pinecone index.
|
|
54
|
+
api_key (Optional[str]): The Pinecone API key.
|
|
55
|
+
host (Optional[str]): The Pinecone host.
|
|
56
|
+
config (Optional[Config]): The Pinecone config.
|
|
57
|
+
additional_headers (Optional[Dict[str, str]]): Additional headers to pass to the Pinecone client.
|
|
58
|
+
pool_threads (Optional[int]): The number of threads to use for the Pinecone client.
|
|
59
|
+
index_api (Optional[Any]): The Index API object.
|
|
60
|
+
name (str): The name of the index.
|
|
61
|
+
dimension (int): The dimension of the embeddings.
|
|
62
|
+
spec (Union[Dict, ServerlessSpec, PodSpec]): The index spec.
|
|
63
|
+
metric (Optional[str]): The metric used for similarity search.
|
|
64
|
+
timeout (Optional[int]): The timeout for Pinecone operations.
|
|
65
|
+
kwargs (Optional[Dict[str, str]]): Additional keyword arguments.
|
|
66
|
+
"""
|
|
67
|
+
|
|
68
|
+
def __init__(
|
|
69
|
+
self,
|
|
70
|
+
dimension: int,
|
|
71
|
+
spec: Union[Dict, ServerlessSpec, PodSpec],
|
|
72
|
+
name: Optional[str] = None,
|
|
73
|
+
description: Optional[str] = None,
|
|
74
|
+
id: Optional[str] = None,
|
|
75
|
+
embedder: Optional[Embedder] = None,
|
|
76
|
+
metric: Optional[str] = "cosine",
|
|
77
|
+
additional_headers: Optional[Dict[str, str]] = None,
|
|
78
|
+
pool_threads: Optional[int] = 1,
|
|
79
|
+
namespace: Optional[str] = None,
|
|
80
|
+
timeout: Optional[int] = None,
|
|
81
|
+
index_api: Optional[Any] = None,
|
|
82
|
+
api_key: Optional[str] = None,
|
|
83
|
+
host: Optional[str] = None,
|
|
84
|
+
config: Optional[Config] = None,
|
|
85
|
+
use_hybrid_search: bool = False,
|
|
86
|
+
hybrid_alpha: float = 0.5,
|
|
87
|
+
reranker: Optional[Reranker] = None,
|
|
88
|
+
**kwargs,
|
|
89
|
+
):
|
|
90
|
+
# Validate required parameters
|
|
91
|
+
if dimension is None or dimension <= 0:
|
|
92
|
+
raise ValueError("Dimension must be provided and greater than 0.")
|
|
93
|
+
if spec is None:
|
|
94
|
+
raise ValueError("Spec must be provided for Pinecone index.")
|
|
95
|
+
|
|
96
|
+
# Dynamic ID generation based on unique identifiers
|
|
97
|
+
if id is None:
|
|
98
|
+
from agno.utils.string import generate_id
|
|
99
|
+
|
|
100
|
+
index_name = name or "default_index"
|
|
101
|
+
seed = f"{host or 'pinecone'}#{index_name}#{dimension}"
|
|
102
|
+
id = generate_id(seed)
|
|
103
|
+
|
|
104
|
+
# Initialize base class with name, description, and generated ID
|
|
105
|
+
super().__init__(id=id, name=name, description=description)
|
|
106
|
+
|
|
107
|
+
self._client = None
|
|
108
|
+
self._index = None
|
|
109
|
+
self.api_key: Optional[str] = api_key
|
|
110
|
+
self.host: Optional[str] = host
|
|
111
|
+
self.config: Optional[Config] = config
|
|
112
|
+
self.additional_headers: Dict[str, str] = additional_headers or {}
|
|
113
|
+
self.pool_threads: Optional[int] = pool_threads
|
|
114
|
+
self.namespace: Optional[str] = namespace
|
|
115
|
+
self.index_api: Optional[Any] = index_api
|
|
116
|
+
self.dimension: Optional[int] = dimension
|
|
117
|
+
self.spec: Union[Dict, ServerlessSpec, PodSpec] = spec
|
|
118
|
+
self.metric: Optional[str] = metric
|
|
119
|
+
self.timeout: Optional[int] = timeout
|
|
120
|
+
self.kwargs: Optional[Dict[str, str]] = kwargs
|
|
121
|
+
self.use_hybrid_search: bool = use_hybrid_search
|
|
122
|
+
self.hybrid_alpha: float = hybrid_alpha
|
|
123
|
+
if self.use_hybrid_search:
|
|
124
|
+
try:
|
|
125
|
+
from pinecone_text.sparse import BM25Encoder
|
|
126
|
+
except ImportError:
|
|
127
|
+
raise ImportError(
|
|
128
|
+
"The `pinecone_text` package is not installed, please install using `pip install pinecone-text`."
|
|
129
|
+
)
|
|
130
|
+
|
|
131
|
+
self.sparse_encoder = BM25Encoder().default()
|
|
132
|
+
|
|
133
|
+
# Embedder for embedding the document contents
|
|
134
|
+
_embedder = embedder
|
|
135
|
+
if _embedder is None:
|
|
136
|
+
from agno.knowledge.embedder.openai import OpenAIEmbedder
|
|
137
|
+
|
|
138
|
+
_embedder = OpenAIEmbedder()
|
|
139
|
+
log_info("Embedder not provided, using OpenAIEmbedder as default.")
|
|
140
|
+
self.embedder: Embedder = _embedder
|
|
141
|
+
self.reranker: Optional[Reranker] = reranker
|
|
142
|
+
|
|
143
|
+
@property
|
|
144
|
+
def client(self) -> Pinecone:
|
|
145
|
+
"""The Pinecone client.
|
|
146
|
+
|
|
147
|
+
Returns:
|
|
148
|
+
Pinecone: The Pinecone client.
|
|
149
|
+
|
|
150
|
+
"""
|
|
151
|
+
if self._client is None:
|
|
152
|
+
log_debug("Creating Pinecone Client")
|
|
153
|
+
self._client = Pinecone(
|
|
154
|
+
api_key=self.api_key,
|
|
155
|
+
host=self.host,
|
|
156
|
+
config=self.config,
|
|
157
|
+
additional_headers=self.additional_headers,
|
|
158
|
+
pool_threads=self.pool_threads,
|
|
159
|
+
index_api=self.index_api,
|
|
160
|
+
**self.kwargs,
|
|
161
|
+
)
|
|
162
|
+
return self._client
|
|
163
|
+
|
|
164
|
+
@property
|
|
165
|
+
def index(self):
|
|
166
|
+
"""The Pinecone index.
|
|
167
|
+
|
|
168
|
+
Returns:
|
|
169
|
+
Pinecone.Index: The Pinecone index.
|
|
170
|
+
|
|
171
|
+
"""
|
|
172
|
+
if self._index is None:
|
|
173
|
+
log_debug(f"Connecting to Pinecone Index: {self.name}")
|
|
174
|
+
self._index = self.client.Index(self.name)
|
|
175
|
+
return self._index
|
|
176
|
+
|
|
177
|
+
def exists(self) -> bool:
|
|
178
|
+
"""Check if the index exists.
|
|
179
|
+
|
|
180
|
+
Returns:
|
|
181
|
+
bool: True if the index exists, False otherwise.
|
|
182
|
+
|
|
183
|
+
"""
|
|
184
|
+
list_indexes = self.client.list_indexes()
|
|
185
|
+
return self.name in list_indexes.names()
|
|
186
|
+
|
|
187
|
+
async def async_exists(self) -> bool:
|
|
188
|
+
"""Check if the index exists asynchronously."""
|
|
189
|
+
return await asyncio.to_thread(self.exists)
|
|
190
|
+
|
|
191
|
+
def create(self) -> None:
|
|
192
|
+
"""Create the index if it does not exist."""
|
|
193
|
+
if not self.exists():
|
|
194
|
+
log_debug(f"Creating index: {self.name}")
|
|
195
|
+
|
|
196
|
+
if self.use_hybrid_search:
|
|
197
|
+
self.metric = "dotproduct"
|
|
198
|
+
|
|
199
|
+
if self.dimension is None:
|
|
200
|
+
raise ValueError("Dimension is not set for this Pinecone index")
|
|
201
|
+
|
|
202
|
+
self.client.create_index(
|
|
203
|
+
name=self.name,
|
|
204
|
+
dimension=self.dimension,
|
|
205
|
+
spec=self.spec,
|
|
206
|
+
metric=self.metric if self.metric is not None else "cosine",
|
|
207
|
+
timeout=self.timeout,
|
|
208
|
+
)
|
|
209
|
+
|
|
210
|
+
async def async_create(self) -> None:
|
|
211
|
+
"""Create the index asynchronously if it does not exist."""
|
|
212
|
+
await asyncio.to_thread(self.create)
|
|
213
|
+
|
|
214
|
+
def drop(self) -> None:
|
|
215
|
+
"""Delete the index if it exists."""
|
|
216
|
+
if self.exists():
|
|
217
|
+
log_debug(f"Deleting index: {self.name}")
|
|
218
|
+
self.client.delete_index(name=self.name, timeout=self.timeout)
|
|
219
|
+
|
|
220
|
+
def doc_exists(self, document: Document) -> bool:
|
|
221
|
+
"""Check if a document exists in the index.
|
|
222
|
+
|
|
223
|
+
Args:
|
|
224
|
+
document (Document): The document to check.
|
|
225
|
+
|
|
226
|
+
Returns:
|
|
227
|
+
bool: True if the document exists, False otherwise.
|
|
228
|
+
|
|
229
|
+
"""
|
|
230
|
+
response = self.index.fetch(ids=[document.id], namespace=self.namespace)
|
|
231
|
+
return len(response.vectors) > 0
|
|
232
|
+
|
|
233
|
+
async def async_doc_exists(self, document: Document) -> bool:
|
|
234
|
+
"""Check if a document exists in the index asynchronously."""
|
|
235
|
+
return await asyncio.to_thread(self.doc_exists, document)
|
|
236
|
+
|
|
237
|
+
def name_exists(self, name: str) -> bool:
|
|
238
|
+
"""Check if an index with the given name exists.
|
|
239
|
+
|
|
240
|
+
Args:
|
|
241
|
+
name (str): The name of the index.
|
|
242
|
+
|
|
243
|
+
Returns:
|
|
244
|
+
bool: True if the index exists, False otherwise.
|
|
245
|
+
|
|
246
|
+
"""
|
|
247
|
+
try:
|
|
248
|
+
self.client.describe_index(name)
|
|
249
|
+
return True
|
|
250
|
+
except Exception:
|
|
251
|
+
return False
|
|
252
|
+
|
|
253
|
+
async def async_name_exists(self, name: str) -> bool:
|
|
254
|
+
"""Check if an index with the given name exists asynchronously."""
|
|
255
|
+
return await asyncio.to_thread(self.name_exists, name)
|
|
256
|
+
|
|
257
|
+
def upsert(
|
|
258
|
+
self,
|
|
259
|
+
content_hash: str,
|
|
260
|
+
documents: List[Document],
|
|
261
|
+
filters: Optional[Dict[str, Any]] = None,
|
|
262
|
+
) -> None:
|
|
263
|
+
if self.content_hash_exists(content_hash):
|
|
264
|
+
self._delete_by_content_hash(content_hash)
|
|
265
|
+
self._upsert(content_hash=content_hash, documents=documents, filters=filters)
|
|
266
|
+
|
|
267
|
+
def _upsert(
|
|
268
|
+
self,
|
|
269
|
+
content_hash: str,
|
|
270
|
+
documents: List[Document],
|
|
271
|
+
filters: Optional[Dict[str, Any]] = None,
|
|
272
|
+
namespace: Optional[str] = None,
|
|
273
|
+
batch_size: Optional[int] = None,
|
|
274
|
+
show_progress: bool = False,
|
|
275
|
+
) -> None:
|
|
276
|
+
"""insert documents into the index.
|
|
277
|
+
|
|
278
|
+
Args:
|
|
279
|
+
documents (List[Document]): The documents to upsert.
|
|
280
|
+
filters (Optional[Dict[str, Any]], optional): The filters for the upsert. Defaults to None.
|
|
281
|
+
namespace (Optional[str], optional): The namespace for the documents. Defaults to None.
|
|
282
|
+
batch_size (Optional[int], optional): The batch size for upsert. Defaults to None.
|
|
283
|
+
show_progress (bool, optional): Whether to show progress during upsert. Defaults to False.
|
|
284
|
+
|
|
285
|
+
"""
|
|
286
|
+
|
|
287
|
+
vectors = []
|
|
288
|
+
for document in documents:
|
|
289
|
+
document.embed(embedder=self.embedder)
|
|
290
|
+
document.meta_data["text"] = document.content
|
|
291
|
+
# Include name and content_id in metadata
|
|
292
|
+
metadata = document.meta_data.copy()
|
|
293
|
+
if filters:
|
|
294
|
+
metadata.update(filters)
|
|
295
|
+
|
|
296
|
+
if document.name:
|
|
297
|
+
metadata["name"] = document.name
|
|
298
|
+
if document.content_id:
|
|
299
|
+
metadata["content_id"] = document.content_id
|
|
300
|
+
|
|
301
|
+
metadata["content_hash"] = content_hash
|
|
302
|
+
|
|
303
|
+
data_to_upsert = {
|
|
304
|
+
"id": document.id,
|
|
305
|
+
"values": document.embedding,
|
|
306
|
+
"metadata": metadata,
|
|
307
|
+
}
|
|
308
|
+
if self.use_hybrid_search:
|
|
309
|
+
data_to_upsert["sparse_values"] = self.sparse_encoder.encode_documents(document.content)
|
|
310
|
+
vectors.append(data_to_upsert)
|
|
311
|
+
|
|
312
|
+
self.index.upsert(
|
|
313
|
+
vectors=vectors,
|
|
314
|
+
namespace=namespace or self.namespace,
|
|
315
|
+
batch_size=batch_size,
|
|
316
|
+
show_progress=show_progress,
|
|
317
|
+
)
|
|
318
|
+
|
|
319
|
+
async def async_upsert(
|
|
320
|
+
self,
|
|
321
|
+
content_hash: str,
|
|
322
|
+
documents: List[Document],
|
|
323
|
+
filters: Optional[Dict[str, Any]] = None,
|
|
324
|
+
namespace: Optional[str] = None,
|
|
325
|
+
batch_size: Optional[int] = None,
|
|
326
|
+
show_progress: bool = False,
|
|
327
|
+
) -> None:
|
|
328
|
+
"""Upsert documents into the index asynchronously with batching."""
|
|
329
|
+
if self.content_hash_exists(content_hash):
|
|
330
|
+
await asyncio.to_thread(self._delete_by_content_hash, content_hash)
|
|
331
|
+
if not documents:
|
|
332
|
+
return
|
|
333
|
+
|
|
334
|
+
# Pinecone has its own batching mechanism, but we'll add an additional layer
|
|
335
|
+
# to process document embedding in parallel
|
|
336
|
+
_batch_size = batch_size or 100
|
|
337
|
+
|
|
338
|
+
# Split documents into batches
|
|
339
|
+
batches = [documents[i : i + _batch_size] for i in range(0, len(documents), _batch_size)]
|
|
340
|
+
log_debug(f"Processing {len(documents)} documents in {len(batches)} batches for upsert")
|
|
341
|
+
|
|
342
|
+
# Process each batch in parallel
|
|
343
|
+
async def process_batch(batch_docs):
|
|
344
|
+
return await self._prepare_vectors(batch_docs, content_hash, filters)
|
|
345
|
+
|
|
346
|
+
# Run all batches in parallel
|
|
347
|
+
batch_vectors = await asyncio.gather(*[process_batch(batch) for batch in batches])
|
|
348
|
+
|
|
349
|
+
# Flatten vectors
|
|
350
|
+
all_vectors = [vector for batch in batch_vectors for vector in batch]
|
|
351
|
+
|
|
352
|
+
# Upsert all vectors
|
|
353
|
+
await asyncio.to_thread(
|
|
354
|
+
self._upsert_vectors, all_vectors, namespace or self.namespace, batch_size, show_progress
|
|
355
|
+
)
|
|
356
|
+
|
|
357
|
+
log_debug(f"Finished async upsert of {len(documents)} documents")
|
|
358
|
+
|
|
359
|
+
async def _prepare_vectors(
|
|
360
|
+
self, documents: List[Document], content_hash: str, filters: Optional[Dict[str, Any]] = None
|
|
361
|
+
) -> List[Dict[str, Any]]:
|
|
362
|
+
"""Prepare vectors for upsert."""
|
|
363
|
+
vectors = []
|
|
364
|
+
|
|
365
|
+
if self.embedder.enable_batch and hasattr(self.embedder, "async_get_embeddings_batch_and_usage"):
|
|
366
|
+
# Use batch embedding when enabled and supported
|
|
367
|
+
try:
|
|
368
|
+
# Extract content from all documents
|
|
369
|
+
doc_contents = [doc.content for doc in documents]
|
|
370
|
+
|
|
371
|
+
# Get batch embeddings and usage
|
|
372
|
+
embeddings, usages = await self.embedder.async_get_embeddings_batch_and_usage(doc_contents)
|
|
373
|
+
|
|
374
|
+
# Process documents with pre-computed embeddings
|
|
375
|
+
for j, doc in enumerate(documents):
|
|
376
|
+
try:
|
|
377
|
+
if j < len(embeddings):
|
|
378
|
+
doc.embedding = embeddings[j]
|
|
379
|
+
doc.usage = usages[j] if j < len(usages) else None
|
|
380
|
+
except Exception as e:
|
|
381
|
+
logger.error(f"Error assigning batch embedding to document '{doc.name}': {e}")
|
|
382
|
+
|
|
383
|
+
except Exception as e:
|
|
384
|
+
# Check if this is a rate limit error - don't fall back as it would make things worse
|
|
385
|
+
error_str = str(e).lower()
|
|
386
|
+
is_rate_limit = any(
|
|
387
|
+
phrase in error_str
|
|
388
|
+
for phrase in ["rate limit", "too many requests", "429", "trial key", "api calls / minute"]
|
|
389
|
+
)
|
|
390
|
+
|
|
391
|
+
if is_rate_limit:
|
|
392
|
+
logger.error(f"Rate limit detected during batch embedding. {e}")
|
|
393
|
+
raise e
|
|
394
|
+
else:
|
|
395
|
+
logger.warning(f"Async batch embedding failed, falling back to individual embeddings: {e}")
|
|
396
|
+
# Fall back to individual embedding
|
|
397
|
+
embed_tasks = [doc.async_embed(embedder=self.embedder) for doc in documents]
|
|
398
|
+
await asyncio.gather(*embed_tasks, return_exceptions=True)
|
|
399
|
+
else:
|
|
400
|
+
# Use individual embedding
|
|
401
|
+
embed_tasks = [document.async_embed(embedder=self.embedder) for document in documents]
|
|
402
|
+
await asyncio.gather(*embed_tasks, return_exceptions=True)
|
|
403
|
+
|
|
404
|
+
for doc in documents:
|
|
405
|
+
doc.meta_data["text"] = doc.content
|
|
406
|
+
# Include name and content_id in metadata
|
|
407
|
+
metadata = doc.meta_data.copy()
|
|
408
|
+
if filters:
|
|
409
|
+
metadata.update(filters)
|
|
410
|
+
|
|
411
|
+
if doc.name:
|
|
412
|
+
metadata["name"] = doc.name
|
|
413
|
+
if doc.content_id:
|
|
414
|
+
metadata["content_id"] = doc.content_id
|
|
415
|
+
|
|
416
|
+
metadata["content_hash"] = content_hash
|
|
417
|
+
|
|
418
|
+
data_to_upsert = {
|
|
419
|
+
"id": doc.id,
|
|
420
|
+
"values": doc.embedding,
|
|
421
|
+
"metadata": metadata,
|
|
422
|
+
}
|
|
423
|
+
if self.use_hybrid_search:
|
|
424
|
+
data_to_upsert["sparse_values"] = self.sparse_encoder.encode_documents(doc.content)
|
|
425
|
+
vectors.append(data_to_upsert)
|
|
426
|
+
return vectors
|
|
427
|
+
|
|
428
|
+
def _upsert_vectors(self, vectors, namespace, batch_size, show_progress):
|
|
429
|
+
"""Upsert vectors to the index."""
|
|
430
|
+
self.index.upsert(
|
|
431
|
+
vectors=vectors,
|
|
432
|
+
namespace=namespace,
|
|
433
|
+
batch_size=batch_size,
|
|
434
|
+
show_progress=show_progress,
|
|
435
|
+
)
|
|
436
|
+
|
|
437
|
+
async def async_insert(
|
|
438
|
+
self, content_hash: str, documents: List[Document], filters: Optional[Dict[str, Any]] = None
|
|
439
|
+
) -> None:
|
|
440
|
+
log_warning("Pinecone does not support insert operations. Redirecting to async_upsert instead.")
|
|
441
|
+
await self.async_upsert(content_hash=content_hash, documents=documents, filters=filters)
|
|
442
|
+
|
|
443
|
+
def upsert_available(self) -> bool:
|
|
444
|
+
"""Check if upsert operation is available.
|
|
445
|
+
|
|
446
|
+
Returns:
|
|
447
|
+
bool: True if upsert is available, False otherwise.
|
|
448
|
+
|
|
449
|
+
"""
|
|
450
|
+
return True
|
|
451
|
+
|
|
452
|
+
def insert(self, content_hash: str, documents: List[Document], filters: Optional[Dict[str, Any]] = None) -> None:
|
|
453
|
+
log_warning("Pinecone does not support insert operations. Redirecting to upsert instead.")
|
|
454
|
+
self.upsert(content_hash=content_hash, documents=documents, filters=filters)
|
|
455
|
+
|
|
456
|
+
def _hybrid_scale(self, dense: List[float], sparse: Dict[str, Any], alpha: float):
|
|
457
|
+
"""Hybrid vector scaling using a convex combination
|
|
458
|
+
1 is pure semantic search, 0 is pure keyword search
|
|
459
|
+
alpha * dense + (1 - alpha) * sparse
|
|
460
|
+
|
|
461
|
+
Args:
|
|
462
|
+
dense: Array of floats representing
|
|
463
|
+
sparse: a dict of `indices` and `values`
|
|
464
|
+
alpha: float between 0 and 1 where 0 == sparse only
|
|
465
|
+
and 1 == dense only
|
|
466
|
+
"""
|
|
467
|
+
if alpha < 0 or alpha > 1:
|
|
468
|
+
raise ValueError("Alpha must be between 0 and 1")
|
|
469
|
+
# scale sparse and dense vectors to create hybrid search vecs
|
|
470
|
+
hsparse = {"indices": sparse["indices"], "values": [v * (1 - alpha) for v in sparse["values"]]}
|
|
471
|
+
hdense = [v * alpha for v in dense]
|
|
472
|
+
return hdense, hsparse
|
|
473
|
+
|
|
474
|
+
def search(
|
|
475
|
+
self,
|
|
476
|
+
query: str,
|
|
477
|
+
limit: int = 5,
|
|
478
|
+
filters: Optional[Union[Dict[str, Any], List[FilterExpr]]] = None,
|
|
479
|
+
namespace: Optional[str] = None,
|
|
480
|
+
include_values: Optional[bool] = None,
|
|
481
|
+
) -> List[Document]:
|
|
482
|
+
"""Search for similar documents in the index.
|
|
483
|
+
|
|
484
|
+
Args:
|
|
485
|
+
query (str): The query to search for.
|
|
486
|
+
limit (int, optional): The maximum number of results to return. Defaults to 5.
|
|
487
|
+
filters (Optional[Dict[str, Union[str, float, int, bool, List, dict]]], optional): The filter for the search. Defaults to None.
|
|
488
|
+
namespace (Optional[str], optional): The namespace to search in. Defaults to None.
|
|
489
|
+
include_values (Optional[bool], optional): Whether to include values in the search results. Defaults to None.
|
|
490
|
+
include_metadata (Optional[bool], optional): Whether to include metadata in the search results. Defaults to None.
|
|
491
|
+
|
|
492
|
+
Returns:
|
|
493
|
+
List[Document]: The list of matching documents.
|
|
494
|
+
|
|
495
|
+
"""
|
|
496
|
+
if isinstance(filters, List):
|
|
497
|
+
log_warning("Filters Expressions are not supported in PineconeDB. No filters will be applied.")
|
|
498
|
+
filters = None
|
|
499
|
+
dense_embedding = self.embedder.get_embedding(query)
|
|
500
|
+
|
|
501
|
+
if self.use_hybrid_search:
|
|
502
|
+
sparse_embedding = self.sparse_encoder.encode_queries(query)
|
|
503
|
+
|
|
504
|
+
if dense_embedding is None:
|
|
505
|
+
logger.error(f"Error getting embedding for Query: {query}")
|
|
506
|
+
return []
|
|
507
|
+
|
|
508
|
+
if self.use_hybrid_search:
|
|
509
|
+
hdense, hsparse = self._hybrid_scale(dense_embedding, sparse_embedding, alpha=self.hybrid_alpha)
|
|
510
|
+
response = self.index.query(
|
|
511
|
+
vector=hdense,
|
|
512
|
+
sparse_vector=hsparse,
|
|
513
|
+
top_k=limit,
|
|
514
|
+
namespace=namespace or self.namespace,
|
|
515
|
+
filter=filters,
|
|
516
|
+
include_values=include_values,
|
|
517
|
+
include_metadata=True,
|
|
518
|
+
)
|
|
519
|
+
else:
|
|
520
|
+
response = self.index.query(
|
|
521
|
+
vector=dense_embedding,
|
|
522
|
+
top_k=limit,
|
|
523
|
+
namespace=namespace or self.namespace,
|
|
524
|
+
filter=filters,
|
|
525
|
+
include_values=include_values,
|
|
526
|
+
include_metadata=True,
|
|
527
|
+
)
|
|
528
|
+
|
|
529
|
+
search_results = [
|
|
530
|
+
Document(
|
|
531
|
+
content=(result.metadata.get("text", "") if result.metadata is not None else ""),
|
|
532
|
+
id=result.id,
|
|
533
|
+
embedding=result.values,
|
|
534
|
+
meta_data=result.metadata,
|
|
535
|
+
)
|
|
536
|
+
for result in response.matches
|
|
537
|
+
]
|
|
538
|
+
|
|
539
|
+
if self.reranker:
|
|
540
|
+
search_results = self.reranker.rerank(query=query, documents=search_results)
|
|
541
|
+
return search_results
|
|
542
|
+
|
|
543
|
+
async def async_search(
|
|
544
|
+
self,
|
|
545
|
+
query: str,
|
|
546
|
+
limit: int = 5,
|
|
547
|
+
filters: Optional[Union[Dict[str, Any], List[FilterExpr]]] = None,
|
|
548
|
+
namespace: Optional[str] = None,
|
|
549
|
+
include_values: Optional[bool] = None,
|
|
550
|
+
) -> List[Document]:
|
|
551
|
+
"""Search for similar documents in the index asynchronously."""
|
|
552
|
+
return await asyncio.to_thread(self.search, query, limit, filters, namespace, include_values)
|
|
553
|
+
|
|
554
|
+
def optimize(self) -> None:
|
|
555
|
+
"""Optimize the index.
|
|
556
|
+
|
|
557
|
+
This method can be left empty as Pinecone automatically optimizes indexes.
|
|
558
|
+
|
|
559
|
+
"""
|
|
560
|
+
pass
|
|
561
|
+
|
|
562
|
+
def delete(self, namespace: Optional[str] = None) -> bool:
|
|
563
|
+
"""Clear the index.
|
|
564
|
+
|
|
565
|
+
Args:
|
|
566
|
+
namespace (Optional[str], optional): The namespace to clear. Defaults to None.
|
|
567
|
+
|
|
568
|
+
"""
|
|
569
|
+
try:
|
|
570
|
+
self.index.delete(delete_all=True, namespace=namespace)
|
|
571
|
+
return True
|
|
572
|
+
except Exception:
|
|
573
|
+
return False
|
|
574
|
+
|
|
575
|
+
async def async_drop(self) -> None:
|
|
576
|
+
raise NotImplementedError(f"Async not supported on {self.__class__.__name__}.")
|
|
577
|
+
|
|
578
|
+
def delete_by_id(self, id: str) -> bool:
|
|
579
|
+
"""Delete a document by ID."""
|
|
580
|
+
try:
|
|
581
|
+
self.index.delete(ids=[id])
|
|
582
|
+
return True
|
|
583
|
+
except Exception as e:
|
|
584
|
+
log_warning(f"Error deleting document with ID {id}: {e}")
|
|
585
|
+
return False
|
|
586
|
+
|
|
587
|
+
def delete_by_name(self, name: str) -> bool:
|
|
588
|
+
"""Delete documents by name (stored in metadata)."""
|
|
589
|
+
try:
|
|
590
|
+
# Delete all documents where metadata.name equals the given name
|
|
591
|
+
self.index.delete(filter={"name": {"$eq": name}})
|
|
592
|
+
return True
|
|
593
|
+
except Exception as e:
|
|
594
|
+
log_warning(f"Error deleting documents with name {name}: {e}")
|
|
595
|
+
return False
|
|
596
|
+
|
|
597
|
+
def delete_by_metadata(self, metadata: Dict[str, Any]) -> bool:
|
|
598
|
+
"""Delete documents by metadata."""
|
|
599
|
+
try:
|
|
600
|
+
# Build filter for metadata matching
|
|
601
|
+
filter_conditions = {}
|
|
602
|
+
for key, value in metadata.items():
|
|
603
|
+
filter_conditions[key] = {"$eq": value}
|
|
604
|
+
|
|
605
|
+
self.index.delete(filter=filter_conditions)
|
|
606
|
+
return True
|
|
607
|
+
except Exception as e:
|
|
608
|
+
log_warning(f"Error deleting documents with metadata {metadata}: {e}")
|
|
609
|
+
return False
|
|
610
|
+
|
|
611
|
+
def delete_by_content_id(self, content_id: str) -> bool:
|
|
612
|
+
"""Delete documents by content ID (stored in metadata)."""
|
|
613
|
+
try:
|
|
614
|
+
# Delete all documents where metadata.content_id equals the given content_id
|
|
615
|
+
self.index.delete(filter={"content_id": {"$eq": content_id}})
|
|
616
|
+
return True
|
|
617
|
+
except Exception as e:
|
|
618
|
+
log_warning(f"Error deleting documents with content_id {content_id}: {e}")
|
|
619
|
+
return False
|
|
620
|
+
|
|
621
|
+
def get_count(self) -> int:
|
|
622
|
+
"""Get the count of documents in the index."""
|
|
623
|
+
try:
|
|
624
|
+
# Pinecone doesn't have a direct count method, so we use describe_index_stats
|
|
625
|
+
stats = self.index.describe_index_stats()
|
|
626
|
+
# The stats include total_vector_count which gives us the count
|
|
627
|
+
return stats.total_vector_count
|
|
628
|
+
except Exception as e:
|
|
629
|
+
log_warning(f"Error getting document count: {e}")
|
|
630
|
+
return 0
|
|
631
|
+
|
|
632
|
+
def id_exists(self, id: str) -> bool:
|
|
633
|
+
"""Check if a document with the given ID exists in the index.
|
|
634
|
+
|
|
635
|
+
Args:
|
|
636
|
+
id (str): The ID to check.
|
|
637
|
+
|
|
638
|
+
Returns:
|
|
639
|
+
bool: True if the document exists, False otherwise.
|
|
640
|
+
"""
|
|
641
|
+
try:
|
|
642
|
+
response = self.index.fetch(ids=[id], namespace=self.namespace)
|
|
643
|
+
return len(response.vectors) > 0
|
|
644
|
+
except Exception as e:
|
|
645
|
+
log_warning(f"Error checking if ID {id} exists: {e}")
|
|
646
|
+
return False
|
|
647
|
+
|
|
648
|
+
def content_hash_exists(self, content_hash: str) -> bool:
|
|
649
|
+
"""Check if documents with the given content hash exist in the index.
|
|
650
|
+
|
|
651
|
+
Args:
|
|
652
|
+
content_hash (str): The content hash to check.
|
|
653
|
+
|
|
654
|
+
Returns:
|
|
655
|
+
bool: True if documents with the content hash exist, False otherwise.
|
|
656
|
+
"""
|
|
657
|
+
try:
|
|
658
|
+
# Use a dummy vector to perform a minimal query with filter
|
|
659
|
+
# We only need to check if any results exist
|
|
660
|
+
if self.dimension is None:
|
|
661
|
+
raise ValueError("Dimension is not set for this Pinecone index")
|
|
662
|
+
dummy_vector = [0.0] * self.dimension
|
|
663
|
+
response = self.index.query(
|
|
664
|
+
vector=dummy_vector,
|
|
665
|
+
top_k=1,
|
|
666
|
+
namespace=self.namespace,
|
|
667
|
+
filter={"content_hash": {"$eq": content_hash}},
|
|
668
|
+
include_metadata=False,
|
|
669
|
+
include_values=False,
|
|
670
|
+
)
|
|
671
|
+
return len(response.matches) > 0
|
|
672
|
+
except Exception as e:
|
|
673
|
+
log_warning(f"Error checking if content_hash {content_hash} exists: {e}")
|
|
674
|
+
return False
|
|
675
|
+
|
|
676
|
+
def _delete_by_content_hash(self, content_hash: str) -> bool:
|
|
677
|
+
"""Delete documents by content hash (stored in metadata).
|
|
678
|
+
|
|
679
|
+
Args:
|
|
680
|
+
content_hash (str): The content hash to delete.
|
|
681
|
+
|
|
682
|
+
Returns:
|
|
683
|
+
bool: True if documents were deleted, False otherwise.
|
|
684
|
+
"""
|
|
685
|
+
try:
|
|
686
|
+
# Delete all documents where metadata.content_hash equals the given content_hash
|
|
687
|
+
self.index.delete(filter={"content_hash": {"$eq": content_hash}}, namespace=self.namespace)
|
|
688
|
+
return True
|
|
689
|
+
except Exception as e:
|
|
690
|
+
log_warning(f"Error deleting documents with content_hash {content_hash}: {e}")
|
|
691
|
+
return False
|
|
692
|
+
|
|
693
|
+
def update_metadata(self, content_id: str, metadata: Dict[str, Any]) -> None:
|
|
694
|
+
"""
|
|
695
|
+
Update the metadata for documents with the given content_id.
|
|
696
|
+
|
|
697
|
+
Args:
|
|
698
|
+
content_id (str): The content ID to update
|
|
699
|
+
metadata (Dict[str, Any]): The metadata to update
|
|
700
|
+
"""
|
|
701
|
+
try:
|
|
702
|
+
# Query for vectors with the given content_id
|
|
703
|
+
query_response = self.index.query(
|
|
704
|
+
filter={"content_id": {"$eq": content_id}},
|
|
705
|
+
top_k=10000, # Get all matching vectors
|
|
706
|
+
include_metadata=True,
|
|
707
|
+
namespace=self.namespace,
|
|
708
|
+
)
|
|
709
|
+
|
|
710
|
+
if not query_response.matches:
|
|
711
|
+
logger.debug(f"No documents found with content_id: {content_id}")
|
|
712
|
+
return
|
|
713
|
+
|
|
714
|
+
# Prepare updates for each matching vector
|
|
715
|
+
update_data = []
|
|
716
|
+
for match in query_response.matches:
|
|
717
|
+
vector_id = match.id
|
|
718
|
+
current_metadata = match.metadata or {}
|
|
719
|
+
|
|
720
|
+
# Merge existing metadata with new metadata
|
|
721
|
+
updated_metadata = current_metadata.copy()
|
|
722
|
+
updated_metadata.update(metadata)
|
|
723
|
+
|
|
724
|
+
if "filters" not in updated_metadata:
|
|
725
|
+
updated_metadata["filters"] = {}
|
|
726
|
+
if isinstance(updated_metadata["filters"], dict):
|
|
727
|
+
updated_metadata["filters"].update(metadata)
|
|
728
|
+
else:
|
|
729
|
+
updated_metadata["filters"] = metadata
|
|
730
|
+
|
|
731
|
+
update_data.append({"id": vector_id, "metadata": updated_metadata})
|
|
732
|
+
|
|
733
|
+
# Update vectors in batches
|
|
734
|
+
batch_size = 100
|
|
735
|
+
for i in range(0, len(update_data), batch_size):
|
|
736
|
+
batch = update_data[i : i + batch_size]
|
|
737
|
+
self.index.update(vectors=batch, namespace=self.namespace)
|
|
738
|
+
|
|
739
|
+
logger.debug(f"Updated metadata for {len(update_data)} documents with content_id: {content_id}")
|
|
740
|
+
|
|
741
|
+
except Exception as e:
|
|
742
|
+
logger.error(f"Error updating metadata for content_id '{content_id}': {e}")
|
|
743
|
+
raise
|
|
744
|
+
|
|
745
|
+
def get_supported_search_types(self) -> List[str]:
|
|
746
|
+
"""Get the supported search types for this vector database."""
|
|
747
|
+
return [] # PineconeDb doesn't use SearchType enum
|