agno 0.1.2__py3-none-any.whl → 2.3.13__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- agno/__init__.py +8 -0
- agno/agent/__init__.py +44 -5
- agno/agent/agent.py +10531 -2975
- agno/api/agent.py +14 -53
- agno/api/api.py +7 -46
- agno/api/evals.py +22 -0
- agno/api/os.py +17 -0
- agno/api/routes.py +6 -25
- agno/api/schemas/__init__.py +9 -0
- agno/api/schemas/agent.py +6 -9
- agno/api/schemas/evals.py +16 -0
- agno/api/schemas/os.py +14 -0
- agno/api/schemas/team.py +10 -10
- agno/api/schemas/utils.py +21 -0
- agno/api/schemas/workflows.py +16 -0
- agno/api/settings.py +53 -0
- agno/api/team.py +22 -26
- agno/api/workflow.py +28 -0
- agno/cloud/aws/base.py +214 -0
- agno/cloud/aws/s3/__init__.py +2 -0
- agno/cloud/aws/s3/api_client.py +43 -0
- agno/cloud/aws/s3/bucket.py +195 -0
- agno/cloud/aws/s3/object.py +57 -0
- agno/compression/__init__.py +3 -0
- agno/compression/manager.py +247 -0
- agno/culture/__init__.py +3 -0
- agno/culture/manager.py +956 -0
- agno/db/__init__.py +24 -0
- agno/db/async_postgres/__init__.py +3 -0
- agno/db/base.py +946 -0
- agno/db/dynamo/__init__.py +3 -0
- agno/db/dynamo/dynamo.py +2781 -0
- agno/db/dynamo/schemas.py +442 -0
- agno/db/dynamo/utils.py +743 -0
- agno/db/firestore/__init__.py +3 -0
- agno/db/firestore/firestore.py +2379 -0
- agno/db/firestore/schemas.py +181 -0
- agno/db/firestore/utils.py +376 -0
- agno/db/gcs_json/__init__.py +3 -0
- agno/db/gcs_json/gcs_json_db.py +1791 -0
- agno/db/gcs_json/utils.py +228 -0
- agno/db/in_memory/__init__.py +3 -0
- agno/db/in_memory/in_memory_db.py +1312 -0
- agno/db/in_memory/utils.py +230 -0
- agno/db/json/__init__.py +3 -0
- agno/db/json/json_db.py +1777 -0
- agno/db/json/utils.py +230 -0
- agno/db/migrations/manager.py +199 -0
- agno/db/migrations/v1_to_v2.py +635 -0
- agno/db/migrations/versions/v2_3_0.py +938 -0
- agno/db/mongo/__init__.py +17 -0
- agno/db/mongo/async_mongo.py +2760 -0
- agno/db/mongo/mongo.py +2597 -0
- agno/db/mongo/schemas.py +119 -0
- agno/db/mongo/utils.py +276 -0
- agno/db/mysql/__init__.py +4 -0
- agno/db/mysql/async_mysql.py +2912 -0
- agno/db/mysql/mysql.py +2923 -0
- agno/db/mysql/schemas.py +186 -0
- agno/db/mysql/utils.py +488 -0
- agno/db/postgres/__init__.py +4 -0
- agno/db/postgres/async_postgres.py +2579 -0
- agno/db/postgres/postgres.py +2870 -0
- agno/db/postgres/schemas.py +187 -0
- agno/db/postgres/utils.py +442 -0
- agno/db/redis/__init__.py +3 -0
- agno/db/redis/redis.py +2141 -0
- agno/db/redis/schemas.py +159 -0
- agno/db/redis/utils.py +346 -0
- agno/db/schemas/__init__.py +4 -0
- agno/db/schemas/culture.py +120 -0
- agno/db/schemas/evals.py +34 -0
- agno/db/schemas/knowledge.py +40 -0
- agno/db/schemas/memory.py +61 -0
- agno/db/singlestore/__init__.py +3 -0
- agno/db/singlestore/schemas.py +179 -0
- agno/db/singlestore/singlestore.py +2877 -0
- agno/db/singlestore/utils.py +384 -0
- agno/db/sqlite/__init__.py +4 -0
- agno/db/sqlite/async_sqlite.py +2911 -0
- agno/db/sqlite/schemas.py +181 -0
- agno/db/sqlite/sqlite.py +2908 -0
- agno/db/sqlite/utils.py +429 -0
- agno/db/surrealdb/__init__.py +3 -0
- agno/db/surrealdb/metrics.py +292 -0
- agno/db/surrealdb/models.py +334 -0
- agno/db/surrealdb/queries.py +71 -0
- agno/db/surrealdb/surrealdb.py +1908 -0
- agno/db/surrealdb/utils.py +147 -0
- agno/db/utils.py +118 -0
- agno/eval/__init__.py +24 -0
- agno/eval/accuracy.py +666 -276
- agno/eval/agent_as_judge.py +861 -0
- agno/eval/base.py +29 -0
- agno/eval/performance.py +779 -0
- agno/eval/reliability.py +241 -62
- agno/eval/utils.py +120 -0
- agno/exceptions.py +143 -1
- agno/filters.py +354 -0
- agno/guardrails/__init__.py +6 -0
- agno/guardrails/base.py +19 -0
- agno/guardrails/openai.py +144 -0
- agno/guardrails/pii.py +94 -0
- agno/guardrails/prompt_injection.py +52 -0
- agno/hooks/__init__.py +3 -0
- agno/hooks/decorator.py +164 -0
- agno/integrations/discord/__init__.py +3 -0
- agno/integrations/discord/client.py +203 -0
- agno/knowledge/__init__.py +5 -1
- agno/{document → knowledge}/chunking/agentic.py +22 -14
- agno/{document → knowledge}/chunking/document.py +2 -2
- agno/{document → knowledge}/chunking/fixed.py +7 -6
- agno/knowledge/chunking/markdown.py +151 -0
- agno/{document → knowledge}/chunking/recursive.py +15 -3
- agno/knowledge/chunking/row.py +39 -0
- agno/knowledge/chunking/semantic.py +91 -0
- agno/knowledge/chunking/strategy.py +165 -0
- agno/knowledge/content.py +74 -0
- agno/knowledge/document/__init__.py +5 -0
- agno/{document → knowledge/document}/base.py +12 -2
- agno/knowledge/embedder/__init__.py +5 -0
- agno/knowledge/embedder/aws_bedrock.py +343 -0
- agno/knowledge/embedder/azure_openai.py +210 -0
- agno/{embedder → knowledge/embedder}/base.py +8 -0
- agno/knowledge/embedder/cohere.py +323 -0
- agno/knowledge/embedder/fastembed.py +62 -0
- agno/{embedder → knowledge/embedder}/fireworks.py +1 -1
- agno/knowledge/embedder/google.py +258 -0
- agno/knowledge/embedder/huggingface.py +94 -0
- agno/knowledge/embedder/jina.py +182 -0
- agno/knowledge/embedder/langdb.py +22 -0
- agno/knowledge/embedder/mistral.py +206 -0
- agno/knowledge/embedder/nebius.py +13 -0
- agno/knowledge/embedder/ollama.py +154 -0
- agno/knowledge/embedder/openai.py +195 -0
- agno/knowledge/embedder/sentence_transformer.py +63 -0
- agno/{embedder → knowledge/embedder}/together.py +1 -1
- agno/knowledge/embedder/vllm.py +262 -0
- agno/knowledge/embedder/voyageai.py +165 -0
- agno/knowledge/knowledge.py +3006 -0
- agno/knowledge/reader/__init__.py +7 -0
- agno/knowledge/reader/arxiv_reader.py +81 -0
- agno/knowledge/reader/base.py +95 -0
- agno/knowledge/reader/csv_reader.py +164 -0
- agno/knowledge/reader/docx_reader.py +82 -0
- agno/knowledge/reader/field_labeled_csv_reader.py +290 -0
- agno/knowledge/reader/firecrawl_reader.py +201 -0
- agno/knowledge/reader/json_reader.py +88 -0
- agno/knowledge/reader/markdown_reader.py +137 -0
- agno/knowledge/reader/pdf_reader.py +431 -0
- agno/knowledge/reader/pptx_reader.py +101 -0
- agno/knowledge/reader/reader_factory.py +313 -0
- agno/knowledge/reader/s3_reader.py +89 -0
- agno/knowledge/reader/tavily_reader.py +193 -0
- agno/knowledge/reader/text_reader.py +127 -0
- agno/knowledge/reader/web_search_reader.py +325 -0
- agno/knowledge/reader/website_reader.py +455 -0
- agno/knowledge/reader/wikipedia_reader.py +91 -0
- agno/knowledge/reader/youtube_reader.py +78 -0
- agno/knowledge/remote_content/remote_content.py +88 -0
- agno/knowledge/reranker/__init__.py +3 -0
- agno/{reranker → knowledge/reranker}/base.py +1 -1
- agno/{reranker → knowledge/reranker}/cohere.py +2 -2
- agno/knowledge/reranker/infinity.py +195 -0
- agno/knowledge/reranker/sentence_transformer.py +54 -0
- agno/knowledge/types.py +39 -0
- agno/knowledge/utils.py +234 -0
- agno/media.py +439 -95
- agno/memory/__init__.py +16 -3
- agno/memory/manager.py +1474 -123
- agno/memory/strategies/__init__.py +15 -0
- agno/memory/strategies/base.py +66 -0
- agno/memory/strategies/summarize.py +196 -0
- agno/memory/strategies/types.py +37 -0
- agno/models/aimlapi/__init__.py +5 -0
- agno/models/aimlapi/aimlapi.py +62 -0
- agno/models/anthropic/__init__.py +4 -0
- agno/models/anthropic/claude.py +960 -496
- agno/models/aws/__init__.py +15 -0
- agno/models/aws/bedrock.py +686 -451
- agno/models/aws/claude.py +190 -183
- agno/models/azure/__init__.py +18 -1
- agno/models/azure/ai_foundry.py +489 -0
- agno/models/azure/openai_chat.py +89 -40
- agno/models/base.py +2477 -550
- agno/models/cerebras/__init__.py +12 -0
- agno/models/cerebras/cerebras.py +565 -0
- agno/models/cerebras/cerebras_openai.py +131 -0
- agno/models/cohere/__init__.py +4 -0
- agno/models/cohere/chat.py +306 -492
- agno/models/cometapi/__init__.py +5 -0
- agno/models/cometapi/cometapi.py +74 -0
- agno/models/dashscope/__init__.py +5 -0
- agno/models/dashscope/dashscope.py +90 -0
- agno/models/deepinfra/__init__.py +5 -0
- agno/models/deepinfra/deepinfra.py +45 -0
- agno/models/deepseek/__init__.py +4 -0
- agno/models/deepseek/deepseek.py +110 -9
- agno/models/fireworks/__init__.py +4 -0
- agno/models/fireworks/fireworks.py +19 -22
- agno/models/google/__init__.py +3 -7
- agno/models/google/gemini.py +1717 -662
- agno/models/google/utils.py +22 -0
- agno/models/groq/__init__.py +4 -0
- agno/models/groq/groq.py +391 -666
- agno/models/huggingface/__init__.py +4 -0
- agno/models/huggingface/huggingface.py +266 -538
- agno/models/ibm/__init__.py +5 -0
- agno/models/ibm/watsonx.py +432 -0
- agno/models/internlm/__init__.py +3 -0
- agno/models/internlm/internlm.py +20 -3
- agno/models/langdb/__init__.py +1 -0
- agno/models/langdb/langdb.py +60 -0
- agno/models/litellm/__init__.py +14 -0
- agno/models/litellm/chat.py +503 -0
- agno/models/litellm/litellm_openai.py +42 -0
- agno/models/llama_cpp/__init__.py +5 -0
- agno/models/llama_cpp/llama_cpp.py +22 -0
- agno/models/lmstudio/__init__.py +5 -0
- agno/models/lmstudio/lmstudio.py +25 -0
- agno/models/message.py +361 -39
- agno/models/meta/__init__.py +12 -0
- agno/models/meta/llama.py +502 -0
- agno/models/meta/llama_openai.py +79 -0
- agno/models/metrics.py +120 -0
- agno/models/mistral/__init__.py +4 -0
- agno/models/mistral/mistral.py +293 -393
- agno/models/nebius/__init__.py +3 -0
- agno/models/nebius/nebius.py +53 -0
- agno/models/nexus/__init__.py +3 -0
- agno/models/nexus/nexus.py +22 -0
- agno/models/nvidia/__init__.py +4 -0
- agno/models/nvidia/nvidia.py +22 -3
- agno/models/ollama/__init__.py +4 -2
- agno/models/ollama/chat.py +257 -492
- agno/models/openai/__init__.py +7 -0
- agno/models/openai/chat.py +725 -770
- agno/models/openai/like.py +16 -2
- agno/models/openai/responses.py +1121 -0
- agno/models/openrouter/__init__.py +4 -0
- agno/models/openrouter/openrouter.py +62 -5
- agno/models/perplexity/__init__.py +5 -0
- agno/models/perplexity/perplexity.py +203 -0
- agno/models/portkey/__init__.py +3 -0
- agno/models/portkey/portkey.py +82 -0
- agno/models/requesty/__init__.py +5 -0
- agno/models/requesty/requesty.py +69 -0
- agno/models/response.py +177 -7
- agno/models/sambanova/__init__.py +4 -0
- agno/models/sambanova/sambanova.py +23 -4
- agno/models/siliconflow/__init__.py +5 -0
- agno/models/siliconflow/siliconflow.py +42 -0
- agno/models/together/__init__.py +4 -0
- agno/models/together/together.py +21 -164
- agno/models/utils.py +266 -0
- agno/models/vercel/__init__.py +3 -0
- agno/models/vercel/v0.py +43 -0
- agno/models/vertexai/__init__.py +0 -1
- agno/models/vertexai/claude.py +190 -0
- agno/models/vllm/__init__.py +3 -0
- agno/models/vllm/vllm.py +83 -0
- agno/models/xai/__init__.py +2 -0
- agno/models/xai/xai.py +111 -7
- agno/os/__init__.py +3 -0
- agno/os/app.py +1027 -0
- agno/os/auth.py +244 -0
- agno/os/config.py +126 -0
- agno/os/interfaces/__init__.py +1 -0
- agno/os/interfaces/a2a/__init__.py +3 -0
- agno/os/interfaces/a2a/a2a.py +42 -0
- agno/os/interfaces/a2a/router.py +249 -0
- agno/os/interfaces/a2a/utils.py +924 -0
- agno/os/interfaces/agui/__init__.py +3 -0
- agno/os/interfaces/agui/agui.py +47 -0
- agno/os/interfaces/agui/router.py +147 -0
- agno/os/interfaces/agui/utils.py +574 -0
- agno/os/interfaces/base.py +25 -0
- agno/os/interfaces/slack/__init__.py +3 -0
- agno/os/interfaces/slack/router.py +148 -0
- agno/os/interfaces/slack/security.py +30 -0
- agno/os/interfaces/slack/slack.py +47 -0
- agno/os/interfaces/whatsapp/__init__.py +3 -0
- agno/os/interfaces/whatsapp/router.py +210 -0
- agno/os/interfaces/whatsapp/security.py +55 -0
- agno/os/interfaces/whatsapp/whatsapp.py +36 -0
- agno/os/mcp.py +293 -0
- agno/os/middleware/__init__.py +9 -0
- agno/os/middleware/jwt.py +797 -0
- agno/os/router.py +258 -0
- agno/os/routers/__init__.py +3 -0
- agno/os/routers/agents/__init__.py +3 -0
- agno/os/routers/agents/router.py +599 -0
- agno/os/routers/agents/schema.py +261 -0
- agno/os/routers/evals/__init__.py +3 -0
- agno/os/routers/evals/evals.py +450 -0
- agno/os/routers/evals/schemas.py +174 -0
- agno/os/routers/evals/utils.py +231 -0
- agno/os/routers/health.py +31 -0
- agno/os/routers/home.py +52 -0
- agno/os/routers/knowledge/__init__.py +3 -0
- agno/os/routers/knowledge/knowledge.py +1008 -0
- agno/os/routers/knowledge/schemas.py +178 -0
- agno/os/routers/memory/__init__.py +3 -0
- agno/os/routers/memory/memory.py +661 -0
- agno/os/routers/memory/schemas.py +88 -0
- agno/os/routers/metrics/__init__.py +3 -0
- agno/os/routers/metrics/metrics.py +190 -0
- agno/os/routers/metrics/schemas.py +47 -0
- agno/os/routers/session/__init__.py +3 -0
- agno/os/routers/session/session.py +997 -0
- agno/os/routers/teams/__init__.py +3 -0
- agno/os/routers/teams/router.py +512 -0
- agno/os/routers/teams/schema.py +257 -0
- agno/os/routers/traces/__init__.py +3 -0
- agno/os/routers/traces/schemas.py +414 -0
- agno/os/routers/traces/traces.py +499 -0
- agno/os/routers/workflows/__init__.py +3 -0
- agno/os/routers/workflows/router.py +624 -0
- agno/os/routers/workflows/schema.py +75 -0
- agno/os/schema.py +534 -0
- agno/os/scopes.py +469 -0
- agno/{playground → os}/settings.py +7 -15
- agno/os/utils.py +973 -0
- agno/reasoning/anthropic.py +80 -0
- agno/reasoning/azure_ai_foundry.py +67 -0
- agno/reasoning/deepseek.py +63 -0
- agno/reasoning/default.py +97 -0
- agno/reasoning/gemini.py +73 -0
- agno/reasoning/groq.py +71 -0
- agno/reasoning/helpers.py +24 -1
- agno/reasoning/ollama.py +67 -0
- agno/reasoning/openai.py +86 -0
- agno/reasoning/step.py +2 -1
- agno/reasoning/vertexai.py +76 -0
- agno/run/__init__.py +6 -0
- agno/run/agent.py +822 -0
- agno/run/base.py +247 -0
- agno/run/cancel.py +81 -0
- agno/run/requirement.py +181 -0
- agno/run/team.py +767 -0
- agno/run/workflow.py +708 -0
- agno/session/__init__.py +10 -0
- agno/session/agent.py +260 -0
- agno/session/summary.py +265 -0
- agno/session/team.py +342 -0
- agno/session/workflow.py +501 -0
- agno/table.py +10 -0
- agno/team/__init__.py +37 -0
- agno/team/team.py +9536 -0
- agno/tools/__init__.py +7 -0
- agno/tools/agentql.py +120 -0
- agno/tools/airflow.py +22 -12
- agno/tools/api.py +122 -0
- agno/tools/apify.py +276 -83
- agno/tools/{arxiv_toolkit.py → arxiv.py} +20 -12
- agno/tools/aws_lambda.py +28 -7
- agno/tools/aws_ses.py +66 -0
- agno/tools/baidusearch.py +11 -4
- agno/tools/bitbucket.py +292 -0
- agno/tools/brandfetch.py +213 -0
- agno/tools/bravesearch.py +106 -0
- agno/tools/brightdata.py +367 -0
- agno/tools/browserbase.py +209 -0
- agno/tools/calcom.py +32 -23
- agno/tools/calculator.py +24 -37
- agno/tools/cartesia.py +187 -0
- agno/tools/{clickup_tool.py → clickup.py} +17 -28
- agno/tools/confluence.py +91 -26
- agno/tools/crawl4ai.py +139 -43
- agno/tools/csv_toolkit.py +28 -22
- agno/tools/dalle.py +36 -22
- agno/tools/daytona.py +475 -0
- agno/tools/decorator.py +169 -14
- agno/tools/desi_vocal.py +23 -11
- agno/tools/discord.py +32 -29
- agno/tools/docker.py +716 -0
- agno/tools/duckdb.py +76 -81
- agno/tools/duckduckgo.py +43 -40
- agno/tools/e2b.py +703 -0
- agno/tools/eleven_labs.py +65 -54
- agno/tools/email.py +13 -5
- agno/tools/evm.py +129 -0
- agno/tools/exa.py +324 -42
- agno/tools/fal.py +39 -35
- agno/tools/file.py +196 -30
- agno/tools/file_generation.py +356 -0
- agno/tools/financial_datasets.py +288 -0
- agno/tools/firecrawl.py +108 -33
- agno/tools/function.py +960 -122
- agno/tools/giphy.py +34 -12
- agno/tools/github.py +1294 -97
- agno/tools/gmail.py +922 -0
- agno/tools/google_bigquery.py +117 -0
- agno/tools/google_drive.py +271 -0
- agno/tools/google_maps.py +253 -0
- agno/tools/googlecalendar.py +607 -107
- agno/tools/googlesheets.py +377 -0
- agno/tools/hackernews.py +20 -12
- agno/tools/jina.py +24 -14
- agno/tools/jira.py +48 -19
- agno/tools/knowledge.py +218 -0
- agno/tools/linear.py +82 -43
- agno/tools/linkup.py +58 -0
- agno/tools/local_file_system.py +15 -7
- agno/tools/lumalab.py +41 -26
- agno/tools/mcp/__init__.py +10 -0
- agno/tools/mcp/mcp.py +331 -0
- agno/tools/mcp/multi_mcp.py +347 -0
- agno/tools/mcp/params.py +24 -0
- agno/tools/mcp_toolbox.py +284 -0
- agno/tools/mem0.py +193 -0
- agno/tools/memory.py +419 -0
- agno/tools/mlx_transcribe.py +11 -9
- agno/tools/models/azure_openai.py +190 -0
- agno/tools/models/gemini.py +203 -0
- agno/tools/models/groq.py +158 -0
- agno/tools/models/morph.py +186 -0
- agno/tools/models/nebius.py +124 -0
- agno/tools/models_labs.py +163 -82
- agno/tools/moviepy_video.py +18 -13
- agno/tools/nano_banana.py +151 -0
- agno/tools/neo4j.py +134 -0
- agno/tools/newspaper.py +15 -4
- agno/tools/newspaper4k.py +19 -6
- agno/tools/notion.py +204 -0
- agno/tools/openai.py +181 -17
- agno/tools/openbb.py +27 -20
- agno/tools/opencv.py +321 -0
- agno/tools/openweather.py +233 -0
- agno/tools/oxylabs.py +385 -0
- agno/tools/pandas.py +25 -15
- agno/tools/parallel.py +314 -0
- agno/tools/postgres.py +238 -185
- agno/tools/pubmed.py +125 -13
- agno/tools/python.py +48 -35
- agno/tools/reasoning.py +283 -0
- agno/tools/reddit.py +207 -29
- agno/tools/redshift.py +406 -0
- agno/tools/replicate.py +69 -26
- agno/tools/resend.py +11 -6
- agno/tools/scrapegraph.py +179 -19
- agno/tools/searxng.py +23 -31
- agno/tools/serpapi.py +15 -10
- agno/tools/serper.py +255 -0
- agno/tools/shell.py +23 -12
- agno/tools/shopify.py +1519 -0
- agno/tools/slack.py +56 -14
- agno/tools/sleep.py +8 -6
- agno/tools/spider.py +35 -11
- agno/tools/spotify.py +919 -0
- agno/tools/sql.py +34 -19
- agno/tools/tavily.py +158 -8
- agno/tools/telegram.py +18 -8
- agno/tools/todoist.py +218 -0
- agno/tools/toolkit.py +134 -9
- agno/tools/trafilatura.py +388 -0
- agno/tools/trello.py +25 -28
- agno/tools/twilio.py +18 -9
- agno/tools/user_control_flow.py +78 -0
- agno/tools/valyu.py +228 -0
- agno/tools/visualization.py +467 -0
- agno/tools/webbrowser.py +28 -0
- agno/tools/webex.py +76 -0
- agno/tools/website.py +23 -19
- agno/tools/webtools.py +45 -0
- agno/tools/whatsapp.py +286 -0
- agno/tools/wikipedia.py +28 -19
- agno/tools/workflow.py +285 -0
- agno/tools/{twitter.py → x.py} +142 -46
- agno/tools/yfinance.py +41 -39
- agno/tools/youtube.py +34 -17
- agno/tools/zendesk.py +15 -5
- agno/tools/zep.py +454 -0
- agno/tools/zoom.py +86 -37
- agno/tracing/__init__.py +12 -0
- agno/tracing/exporter.py +157 -0
- agno/tracing/schemas.py +276 -0
- agno/tracing/setup.py +111 -0
- agno/utils/agent.py +938 -0
- agno/utils/audio.py +37 -1
- agno/utils/certs.py +27 -0
- agno/utils/code_execution.py +11 -0
- agno/utils/common.py +103 -20
- agno/utils/cryptography.py +22 -0
- agno/utils/dttm.py +33 -0
- agno/utils/events.py +700 -0
- agno/utils/functions.py +107 -37
- agno/utils/gemini.py +426 -0
- agno/utils/hooks.py +171 -0
- agno/utils/http.py +185 -0
- agno/utils/json_schema.py +159 -37
- agno/utils/knowledge.py +36 -0
- agno/utils/location.py +19 -0
- agno/utils/log.py +221 -8
- agno/utils/mcp.py +214 -0
- agno/utils/media.py +335 -14
- agno/utils/merge_dict.py +22 -1
- agno/utils/message.py +77 -2
- agno/utils/models/ai_foundry.py +50 -0
- agno/utils/models/claude.py +373 -0
- agno/utils/models/cohere.py +94 -0
- agno/utils/models/llama.py +85 -0
- agno/utils/models/mistral.py +100 -0
- agno/utils/models/openai_responses.py +140 -0
- agno/utils/models/schema_utils.py +153 -0
- agno/utils/models/watsonx.py +41 -0
- agno/utils/openai.py +257 -0
- agno/utils/pickle.py +1 -1
- agno/utils/pprint.py +124 -8
- agno/utils/print_response/agent.py +930 -0
- agno/utils/print_response/team.py +1914 -0
- agno/utils/print_response/workflow.py +1668 -0
- agno/utils/prompts.py +111 -0
- agno/utils/reasoning.py +108 -0
- agno/utils/response.py +163 -0
- agno/utils/serialize.py +32 -0
- agno/utils/shell.py +4 -4
- agno/utils/streamlit.py +487 -0
- agno/utils/string.py +204 -51
- agno/utils/team.py +139 -0
- agno/utils/timer.py +9 -2
- agno/utils/tokens.py +657 -0
- agno/utils/tools.py +19 -1
- agno/utils/whatsapp.py +305 -0
- agno/utils/yaml_io.py +3 -3
- agno/vectordb/__init__.py +2 -0
- agno/vectordb/base.py +87 -9
- agno/vectordb/cassandra/__init__.py +5 -1
- agno/vectordb/cassandra/cassandra.py +383 -27
- agno/vectordb/chroma/__init__.py +4 -0
- agno/vectordb/chroma/chromadb.py +748 -83
- agno/vectordb/clickhouse/__init__.py +7 -1
- agno/vectordb/clickhouse/clickhousedb.py +554 -53
- agno/vectordb/couchbase/__init__.py +3 -0
- agno/vectordb/couchbase/couchbase.py +1446 -0
- agno/vectordb/lancedb/__init__.py +5 -0
- agno/vectordb/lancedb/lance_db.py +730 -98
- agno/vectordb/langchaindb/__init__.py +5 -0
- agno/vectordb/langchaindb/langchaindb.py +163 -0
- agno/vectordb/lightrag/__init__.py +5 -0
- agno/vectordb/lightrag/lightrag.py +388 -0
- agno/vectordb/llamaindex/__init__.py +3 -0
- agno/vectordb/llamaindex/llamaindexdb.py +166 -0
- agno/vectordb/milvus/__init__.py +3 -0
- agno/vectordb/milvus/milvus.py +966 -78
- agno/vectordb/mongodb/__init__.py +9 -1
- agno/vectordb/mongodb/mongodb.py +1175 -172
- agno/vectordb/pgvector/__init__.py +8 -0
- agno/vectordb/pgvector/pgvector.py +599 -115
- agno/vectordb/pineconedb/__init__.py +5 -1
- agno/vectordb/pineconedb/pineconedb.py +406 -43
- agno/vectordb/qdrant/__init__.py +4 -0
- agno/vectordb/qdrant/qdrant.py +914 -61
- agno/vectordb/redis/__init__.py +9 -0
- agno/vectordb/redis/redisdb.py +682 -0
- agno/vectordb/singlestore/__init__.py +8 -1
- agno/vectordb/singlestore/singlestore.py +771 -0
- agno/vectordb/surrealdb/__init__.py +3 -0
- agno/vectordb/surrealdb/surrealdb.py +663 -0
- agno/vectordb/upstashdb/__init__.py +5 -0
- agno/vectordb/upstashdb/upstashdb.py +718 -0
- agno/vectordb/weaviate/__init__.py +8 -0
- agno/vectordb/weaviate/index.py +15 -0
- agno/vectordb/weaviate/weaviate.py +1009 -0
- agno/workflow/__init__.py +23 -1
- agno/workflow/agent.py +299 -0
- agno/workflow/condition.py +759 -0
- agno/workflow/loop.py +756 -0
- agno/workflow/parallel.py +853 -0
- agno/workflow/router.py +723 -0
- agno/workflow/step.py +1564 -0
- agno/workflow/steps.py +613 -0
- agno/workflow/types.py +556 -0
- agno/workflow/workflow.py +4327 -514
- agno-2.3.13.dist-info/METADATA +639 -0
- agno-2.3.13.dist-info/RECORD +613 -0
- {agno-0.1.2.dist-info → agno-2.3.13.dist-info}/WHEEL +1 -1
- agno-2.3.13.dist-info/licenses/LICENSE +201 -0
- agno/api/playground.py +0 -91
- agno/api/schemas/playground.py +0 -22
- agno/api/schemas/user.py +0 -22
- agno/api/schemas/workspace.py +0 -46
- agno/api/user.py +0 -160
- agno/api/workspace.py +0 -151
- agno/cli/auth_server.py +0 -118
- agno/cli/config.py +0 -275
- agno/cli/console.py +0 -88
- agno/cli/credentials.py +0 -23
- agno/cli/entrypoint.py +0 -571
- agno/cli/operator.py +0 -355
- agno/cli/settings.py +0 -85
- agno/cli/ws/ws_cli.py +0 -817
- agno/constants.py +0 -13
- agno/document/__init__.py +0 -1
- agno/document/chunking/semantic.py +0 -47
- agno/document/chunking/strategy.py +0 -31
- agno/document/reader/__init__.py +0 -1
- agno/document/reader/arxiv_reader.py +0 -41
- agno/document/reader/base.py +0 -22
- agno/document/reader/csv_reader.py +0 -84
- agno/document/reader/docx_reader.py +0 -46
- agno/document/reader/firecrawl_reader.py +0 -99
- agno/document/reader/json_reader.py +0 -43
- agno/document/reader/pdf_reader.py +0 -219
- agno/document/reader/s3/pdf_reader.py +0 -46
- agno/document/reader/s3/text_reader.py +0 -51
- agno/document/reader/text_reader.py +0 -41
- agno/document/reader/website_reader.py +0 -175
- agno/document/reader/youtube_reader.py +0 -50
- agno/embedder/__init__.py +0 -1
- agno/embedder/azure_openai.py +0 -86
- agno/embedder/cohere.py +0 -72
- agno/embedder/fastembed.py +0 -37
- agno/embedder/google.py +0 -73
- agno/embedder/huggingface.py +0 -54
- agno/embedder/mistral.py +0 -80
- agno/embedder/ollama.py +0 -57
- agno/embedder/openai.py +0 -74
- agno/embedder/sentence_transformer.py +0 -38
- agno/embedder/voyageai.py +0 -64
- agno/eval/perf.py +0 -201
- agno/file/__init__.py +0 -1
- agno/file/file.py +0 -16
- agno/file/local/csv.py +0 -32
- agno/file/local/txt.py +0 -19
- agno/infra/app.py +0 -240
- agno/infra/base.py +0 -144
- agno/infra/context.py +0 -20
- agno/infra/db_app.py +0 -52
- agno/infra/resource.py +0 -205
- agno/infra/resources.py +0 -55
- agno/knowledge/agent.py +0 -230
- agno/knowledge/arxiv.py +0 -22
- agno/knowledge/combined.py +0 -22
- agno/knowledge/csv.py +0 -28
- agno/knowledge/csv_url.py +0 -19
- agno/knowledge/document.py +0 -20
- agno/knowledge/docx.py +0 -30
- agno/knowledge/json.py +0 -28
- agno/knowledge/langchain.py +0 -71
- agno/knowledge/llamaindex.py +0 -66
- agno/knowledge/pdf.py +0 -28
- agno/knowledge/pdf_url.py +0 -26
- agno/knowledge/s3/base.py +0 -60
- agno/knowledge/s3/pdf.py +0 -21
- agno/knowledge/s3/text.py +0 -23
- agno/knowledge/text.py +0 -30
- agno/knowledge/website.py +0 -88
- agno/knowledge/wikipedia.py +0 -31
- agno/knowledge/youtube.py +0 -22
- agno/memory/agent.py +0 -392
- agno/memory/classifier.py +0 -104
- agno/memory/db/__init__.py +0 -1
- agno/memory/db/base.py +0 -42
- agno/memory/db/mongodb.py +0 -189
- agno/memory/db/postgres.py +0 -203
- agno/memory/db/sqlite.py +0 -193
- agno/memory/memory.py +0 -15
- agno/memory/row.py +0 -36
- agno/memory/summarizer.py +0 -192
- agno/memory/summary.py +0 -19
- agno/memory/workflow.py +0 -38
- agno/models/google/gemini_openai.py +0 -26
- agno/models/ollama/hermes.py +0 -221
- agno/models/ollama/tools.py +0 -362
- agno/models/vertexai/gemini.py +0 -595
- agno/playground/__init__.py +0 -3
- agno/playground/async_router.py +0 -421
- agno/playground/deploy.py +0 -249
- agno/playground/operator.py +0 -92
- agno/playground/playground.py +0 -91
- agno/playground/schemas.py +0 -76
- agno/playground/serve.py +0 -55
- agno/playground/sync_router.py +0 -405
- agno/reasoning/agent.py +0 -68
- agno/run/response.py +0 -112
- agno/storage/agent/__init__.py +0 -0
- agno/storage/agent/base.py +0 -38
- agno/storage/agent/dynamodb.py +0 -350
- agno/storage/agent/json.py +0 -92
- agno/storage/agent/mongodb.py +0 -228
- agno/storage/agent/postgres.py +0 -367
- agno/storage/agent/session.py +0 -79
- agno/storage/agent/singlestore.py +0 -303
- agno/storage/agent/sqlite.py +0 -357
- agno/storage/agent/yaml.py +0 -93
- agno/storage/workflow/__init__.py +0 -0
- agno/storage/workflow/base.py +0 -40
- agno/storage/workflow/mongodb.py +0 -233
- agno/storage/workflow/postgres.py +0 -366
- agno/storage/workflow/session.py +0 -60
- agno/storage/workflow/sqlite.py +0 -359
- agno/tools/googlesearch.py +0 -88
- agno/utils/defaults.py +0 -57
- agno/utils/filesystem.py +0 -39
- agno/utils/git.py +0 -52
- agno/utils/json_io.py +0 -30
- agno/utils/load_env.py +0 -19
- agno/utils/py_io.py +0 -19
- agno/utils/pyproject.py +0 -18
- agno/utils/resource_filter.py +0 -31
- agno/vectordb/singlestore/s2vectordb.py +0 -390
- agno/vectordb/singlestore/s2vectordb2.py +0 -355
- agno/workspace/__init__.py +0 -0
- agno/workspace/config.py +0 -325
- agno/workspace/enums.py +0 -6
- agno/workspace/helpers.py +0 -48
- agno/workspace/operator.py +0 -758
- agno/workspace/settings.py +0 -63
- agno-0.1.2.dist-info/LICENSE +0 -375
- agno-0.1.2.dist-info/METADATA +0 -502
- agno-0.1.2.dist-info/RECORD +0 -352
- agno-0.1.2.dist-info/entry_points.txt +0 -3
- /agno/{cli → db/migrations}/__init__.py +0 -0
- /agno/{cli/ws → db/migrations/versions}/__init__.py +0 -0
- /agno/{document/chunking/__init__.py → db/schemas/metrics.py} +0 -0
- /agno/{document/reader/s3 → integrations}/__init__.py +0 -0
- /agno/{file/local → knowledge/chunking}/__init__.py +0 -0
- /agno/{infra → knowledge/remote_content}/__init__.py +0 -0
- /agno/{knowledge/s3 → tools/models}/__init__.py +0 -0
- /agno/{reranker → utils/models}/__init__.py +0 -0
- /agno/{storage → utils/print_response}/__init__.py +0 -0
- {agno-0.1.2.dist-info → agno-2.3.13.dist-info}/top_level.txt +0 -0
|
@@ -1,19 +1,36 @@
|
|
|
1
|
+
import asyncio
|
|
1
2
|
from typing import Any, Dict, List, Optional, Union
|
|
2
3
|
|
|
3
4
|
try:
|
|
5
|
+
from packaging import version
|
|
6
|
+
from pinecone import __version__
|
|
7
|
+
|
|
8
|
+
if version.parse(__version__).major >= 6:
|
|
9
|
+
import warnings
|
|
10
|
+
|
|
11
|
+
warnings.warn(
|
|
12
|
+
"We do not yet support Pinecone v6.x.x. We are actively working to achieve compatibility. "
|
|
13
|
+
"In the meantime, we recommend using Pinecone v5.4.2 for the best experience. Please run `pip install pinecone==5.4.2`",
|
|
14
|
+
UserWarning,
|
|
15
|
+
)
|
|
16
|
+
raise RuntimeError("Incompatible Pinecone version detected. Execution halted.")
|
|
17
|
+
|
|
4
18
|
from pinecone import Pinecone, PodSpec, ServerlessSpec
|
|
5
19
|
from pinecone.config import Config
|
|
20
|
+
|
|
6
21
|
except ImportError:
|
|
7
22
|
raise ImportError("The `pinecone` package is not installed, please install using `pip install pinecone`.")
|
|
8
23
|
|
|
9
|
-
|
|
10
|
-
from agno.
|
|
11
|
-
from agno.
|
|
12
|
-
from agno.
|
|
24
|
+
|
|
25
|
+
from agno.filters import FilterExpr
|
|
26
|
+
from agno.knowledge.document import Document
|
|
27
|
+
from agno.knowledge.embedder import Embedder
|
|
28
|
+
from agno.knowledge.reranker.base import Reranker
|
|
29
|
+
from agno.utils.log import log_debug, log_info, log_warning, logger
|
|
13
30
|
from agno.vectordb.base import VectorDb
|
|
14
31
|
|
|
15
32
|
|
|
16
|
-
class
|
|
33
|
+
class PineconeDb(VectorDb):
|
|
17
34
|
"""A class representing a Pinecone database.
|
|
18
35
|
|
|
19
36
|
Args:
|
|
@@ -23,6 +40,7 @@ class PineconeDB(VectorDb):
|
|
|
23
40
|
metric (Optional[str], optional): The metric used for similarity search. Defaults to "cosine".
|
|
24
41
|
additional_headers (Optional[Dict[str, str]], optional): Additional headers to pass to the Pinecone client. Defaults to {}.
|
|
25
42
|
pool_threads (Optional[int], optional): The number of threads to use for the Pinecone client. Defaults to 1.
|
|
43
|
+
namespace: (Optional[str], optional): The namespace partition within the index that will be used. Defaults to None.
|
|
26
44
|
timeout (Optional[int], optional): The timeout for Pinecone operations. Defaults to None.
|
|
27
45
|
index_api (Optional[Any], optional): The Index API object. Defaults to None.
|
|
28
46
|
api_key (Optional[str], optional): The Pinecone API key. Defaults to None.
|
|
@@ -49,9 +67,11 @@ class PineconeDB(VectorDb):
|
|
|
49
67
|
|
|
50
68
|
def __init__(
|
|
51
69
|
self,
|
|
52
|
-
name: str,
|
|
53
70
|
dimension: int,
|
|
54
71
|
spec: Union[Dict, ServerlessSpec, PodSpec],
|
|
72
|
+
name: Optional[str] = None,
|
|
73
|
+
description: Optional[str] = None,
|
|
74
|
+
id: Optional[str] = None,
|
|
55
75
|
embedder: Optional[Embedder] = None,
|
|
56
76
|
metric: Optional[str] = "cosine",
|
|
57
77
|
additional_headers: Optional[Dict[str, str]] = None,
|
|
@@ -67,6 +87,23 @@ class PineconeDB(VectorDb):
|
|
|
67
87
|
reranker: Optional[Reranker] = None,
|
|
68
88
|
**kwargs,
|
|
69
89
|
):
|
|
90
|
+
# Validate required parameters
|
|
91
|
+
if dimension is None or dimension <= 0:
|
|
92
|
+
raise ValueError("Dimension must be provided and greater than 0.")
|
|
93
|
+
if spec is None:
|
|
94
|
+
raise ValueError("Spec must be provided for Pinecone index.")
|
|
95
|
+
|
|
96
|
+
# Dynamic ID generation based on unique identifiers
|
|
97
|
+
if id is None:
|
|
98
|
+
from agno.utils.string import generate_id
|
|
99
|
+
|
|
100
|
+
index_name = name or "default_index"
|
|
101
|
+
seed = f"{host or 'pinecone'}#{index_name}#{dimension}"
|
|
102
|
+
id = generate_id(seed)
|
|
103
|
+
|
|
104
|
+
# Initialize base class with name, description, and generated ID
|
|
105
|
+
super().__init__(id=id, name=name, description=description)
|
|
106
|
+
|
|
70
107
|
self._client = None
|
|
71
108
|
self._index = None
|
|
72
109
|
self.api_key: Optional[str] = api_key
|
|
@@ -76,7 +113,6 @@ class PineconeDB(VectorDb):
|
|
|
76
113
|
self.pool_threads: Optional[int] = pool_threads
|
|
77
114
|
self.namespace: Optional[str] = namespace
|
|
78
115
|
self.index_api: Optional[Any] = index_api
|
|
79
|
-
self.name: str = name
|
|
80
116
|
self.dimension: Optional[int] = dimension
|
|
81
117
|
self.spec: Union[Dict, ServerlessSpec, PodSpec] = spec
|
|
82
118
|
self.metric: Optional[str] = metric
|
|
@@ -97,9 +133,10 @@ class PineconeDB(VectorDb):
|
|
|
97
133
|
# Embedder for embedding the document contents
|
|
98
134
|
_embedder = embedder
|
|
99
135
|
if _embedder is None:
|
|
100
|
-
from agno.embedder.openai import OpenAIEmbedder
|
|
136
|
+
from agno.knowledge.embedder.openai import OpenAIEmbedder
|
|
101
137
|
|
|
102
138
|
_embedder = OpenAIEmbedder()
|
|
139
|
+
log_info("Embedder not provided, using OpenAIEmbedder as default.")
|
|
103
140
|
self.embedder: Embedder = _embedder
|
|
104
141
|
self.reranker: Optional[Reranker] = reranker
|
|
105
142
|
|
|
@@ -112,7 +149,7 @@ class PineconeDB(VectorDb):
|
|
|
112
149
|
|
|
113
150
|
"""
|
|
114
151
|
if self._client is None:
|
|
115
|
-
|
|
152
|
+
log_debug("Creating Pinecone Client")
|
|
116
153
|
self._client = Pinecone(
|
|
117
154
|
api_key=self.api_key,
|
|
118
155
|
host=self.host,
|
|
@@ -133,7 +170,7 @@ class PineconeDB(VectorDb):
|
|
|
133
170
|
|
|
134
171
|
"""
|
|
135
172
|
if self._index is None:
|
|
136
|
-
|
|
173
|
+
log_debug(f"Connecting to Pinecone Index: {self.name}")
|
|
137
174
|
self._index = self.client.Index(self.name)
|
|
138
175
|
return self._index
|
|
139
176
|
|
|
@@ -147,14 +184,21 @@ class PineconeDB(VectorDb):
|
|
|
147
184
|
list_indexes = self.client.list_indexes()
|
|
148
185
|
return self.name in list_indexes.names()
|
|
149
186
|
|
|
187
|
+
async def async_exists(self) -> bool:
|
|
188
|
+
"""Check if the index exists asynchronously."""
|
|
189
|
+
return await asyncio.to_thread(self.exists)
|
|
190
|
+
|
|
150
191
|
def create(self) -> None:
|
|
151
192
|
"""Create the index if it does not exist."""
|
|
152
193
|
if not self.exists():
|
|
153
|
-
|
|
194
|
+
log_debug(f"Creating index: {self.name}")
|
|
154
195
|
|
|
155
196
|
if self.use_hybrid_search:
|
|
156
197
|
self.metric = "dotproduct"
|
|
157
198
|
|
|
199
|
+
if self.dimension is None:
|
|
200
|
+
raise ValueError("Dimension is not set for this Pinecone index")
|
|
201
|
+
|
|
158
202
|
self.client.create_index(
|
|
159
203
|
name=self.name,
|
|
160
204
|
dimension=self.dimension,
|
|
@@ -163,25 +207,16 @@ class PineconeDB(VectorDb):
|
|
|
163
207
|
timeout=self.timeout,
|
|
164
208
|
)
|
|
165
209
|
|
|
210
|
+
async def async_create(self) -> None:
|
|
211
|
+
"""Create the index asynchronously if it does not exist."""
|
|
212
|
+
await asyncio.to_thread(self.create)
|
|
213
|
+
|
|
166
214
|
def drop(self) -> None:
|
|
167
215
|
"""Delete the index if it exists."""
|
|
168
216
|
if self.exists():
|
|
169
|
-
|
|
217
|
+
log_debug(f"Deleting index: {self.name}")
|
|
170
218
|
self.client.delete_index(name=self.name, timeout=self.timeout)
|
|
171
219
|
|
|
172
|
-
def doc_exists(self, document: Document) -> bool:
|
|
173
|
-
"""Check if a document exists in the index.
|
|
174
|
-
|
|
175
|
-
Args:
|
|
176
|
-
document (Document): The document to check.
|
|
177
|
-
|
|
178
|
-
Returns:
|
|
179
|
-
bool: True if the document exists, False otherwise.
|
|
180
|
-
|
|
181
|
-
"""
|
|
182
|
-
response = self.index.fetch(ids=[document.id])
|
|
183
|
-
return len(response.vectors) > 0
|
|
184
|
-
|
|
185
220
|
def name_exists(self, name: str) -> bool:
|
|
186
221
|
"""Check if an index with the given name exists.
|
|
187
222
|
|
|
@@ -198,8 +233,23 @@ class PineconeDB(VectorDb):
|
|
|
198
233
|
except Exception:
|
|
199
234
|
return False
|
|
200
235
|
|
|
236
|
+
async def async_name_exists(self, name: str) -> bool:
|
|
237
|
+
"""Check if an index with the given name exists asynchronously."""
|
|
238
|
+
return await asyncio.to_thread(self.name_exists, name)
|
|
239
|
+
|
|
201
240
|
def upsert(
|
|
202
241
|
self,
|
|
242
|
+
content_hash: str,
|
|
243
|
+
documents: List[Document],
|
|
244
|
+
filters: Optional[Dict[str, Any]] = None,
|
|
245
|
+
) -> None:
|
|
246
|
+
if self.content_hash_exists(content_hash):
|
|
247
|
+
self._delete_by_content_hash(content_hash)
|
|
248
|
+
self._upsert(content_hash=content_hash, documents=documents, filters=filters)
|
|
249
|
+
|
|
250
|
+
def _upsert(
|
|
251
|
+
self,
|
|
252
|
+
content_hash: str,
|
|
203
253
|
documents: List[Document],
|
|
204
254
|
filters: Optional[Dict[str, Any]] = None,
|
|
205
255
|
namespace: Optional[str] = None,
|
|
@@ -221,15 +271,145 @@ class PineconeDB(VectorDb):
|
|
|
221
271
|
for document in documents:
|
|
222
272
|
document.embed(embedder=self.embedder)
|
|
223
273
|
document.meta_data["text"] = document.content
|
|
274
|
+
# Include name and content_id in metadata
|
|
275
|
+
metadata = document.meta_data.copy()
|
|
276
|
+
if filters:
|
|
277
|
+
metadata.update(filters)
|
|
278
|
+
|
|
279
|
+
if document.name:
|
|
280
|
+
metadata["name"] = document.name
|
|
281
|
+
if document.content_id:
|
|
282
|
+
metadata["content_id"] = document.content_id
|
|
283
|
+
|
|
284
|
+
metadata["content_hash"] = content_hash
|
|
285
|
+
|
|
224
286
|
data_to_upsert = {
|
|
225
287
|
"id": document.id,
|
|
226
288
|
"values": document.embedding,
|
|
227
|
-
"metadata":
|
|
289
|
+
"metadata": metadata,
|
|
228
290
|
}
|
|
229
291
|
if self.use_hybrid_search:
|
|
230
292
|
data_to_upsert["sparse_values"] = self.sparse_encoder.encode_documents(document.content)
|
|
231
293
|
vectors.append(data_to_upsert)
|
|
232
294
|
|
|
295
|
+
self.index.upsert(
|
|
296
|
+
vectors=vectors,
|
|
297
|
+
namespace=namespace or self.namespace,
|
|
298
|
+
batch_size=batch_size,
|
|
299
|
+
show_progress=show_progress,
|
|
300
|
+
)
|
|
301
|
+
|
|
302
|
+
async def async_upsert(
|
|
303
|
+
self,
|
|
304
|
+
content_hash: str,
|
|
305
|
+
documents: List[Document],
|
|
306
|
+
filters: Optional[Dict[str, Any]] = None,
|
|
307
|
+
namespace: Optional[str] = None,
|
|
308
|
+
batch_size: Optional[int] = None,
|
|
309
|
+
show_progress: bool = False,
|
|
310
|
+
) -> None:
|
|
311
|
+
"""Upsert documents into the index asynchronously with batching."""
|
|
312
|
+
if self.content_hash_exists(content_hash):
|
|
313
|
+
await asyncio.to_thread(self._delete_by_content_hash, content_hash)
|
|
314
|
+
if not documents:
|
|
315
|
+
return
|
|
316
|
+
|
|
317
|
+
# Pinecone has its own batching mechanism, but we'll add an additional layer
|
|
318
|
+
# to process document embedding in parallel
|
|
319
|
+
_batch_size = batch_size or 100
|
|
320
|
+
|
|
321
|
+
# Split documents into batches
|
|
322
|
+
batches = [documents[i : i + _batch_size] for i in range(0, len(documents), _batch_size)]
|
|
323
|
+
log_debug(f"Processing {len(documents)} documents in {len(batches)} batches for upsert")
|
|
324
|
+
|
|
325
|
+
# Process each batch in parallel
|
|
326
|
+
async def process_batch(batch_docs):
|
|
327
|
+
return await self._prepare_vectors(batch_docs, content_hash, filters)
|
|
328
|
+
|
|
329
|
+
# Run all batches in parallel
|
|
330
|
+
batch_vectors = await asyncio.gather(*[process_batch(batch) for batch in batches])
|
|
331
|
+
|
|
332
|
+
# Flatten vectors
|
|
333
|
+
all_vectors = [vector for batch in batch_vectors for vector in batch]
|
|
334
|
+
|
|
335
|
+
# Upsert all vectors
|
|
336
|
+
await asyncio.to_thread(
|
|
337
|
+
self._upsert_vectors, all_vectors, namespace or self.namespace, batch_size, show_progress
|
|
338
|
+
)
|
|
339
|
+
|
|
340
|
+
log_debug(f"Finished async upsert of {len(documents)} documents")
|
|
341
|
+
|
|
342
|
+
async def _prepare_vectors(
|
|
343
|
+
self, documents: List[Document], content_hash: str, filters: Optional[Dict[str, Any]] = None
|
|
344
|
+
) -> List[Dict[str, Any]]:
|
|
345
|
+
"""Prepare vectors for upsert."""
|
|
346
|
+
vectors = []
|
|
347
|
+
|
|
348
|
+
if self.embedder.enable_batch and hasattr(self.embedder, "async_get_embeddings_batch_and_usage"):
|
|
349
|
+
# Use batch embedding when enabled and supported
|
|
350
|
+
try:
|
|
351
|
+
# Extract content from all documents
|
|
352
|
+
doc_contents = [doc.content for doc in documents]
|
|
353
|
+
|
|
354
|
+
# Get batch embeddings and usage
|
|
355
|
+
embeddings, usages = await self.embedder.async_get_embeddings_batch_and_usage(doc_contents)
|
|
356
|
+
|
|
357
|
+
# Process documents with pre-computed embeddings
|
|
358
|
+
for j, doc in enumerate(documents):
|
|
359
|
+
try:
|
|
360
|
+
if j < len(embeddings):
|
|
361
|
+
doc.embedding = embeddings[j]
|
|
362
|
+
doc.usage = usages[j] if j < len(usages) else None
|
|
363
|
+
except Exception as e:
|
|
364
|
+
logger.error(f"Error assigning batch embedding to document '{doc.name}': {e}")
|
|
365
|
+
|
|
366
|
+
except Exception as e:
|
|
367
|
+
# Check if this is a rate limit error - don't fall back as it would make things worse
|
|
368
|
+
error_str = str(e).lower()
|
|
369
|
+
is_rate_limit = any(
|
|
370
|
+
phrase in error_str
|
|
371
|
+
for phrase in ["rate limit", "too many requests", "429", "trial key", "api calls / minute"]
|
|
372
|
+
)
|
|
373
|
+
|
|
374
|
+
if is_rate_limit:
|
|
375
|
+
logger.error(f"Rate limit detected during batch embedding. {e}")
|
|
376
|
+
raise e
|
|
377
|
+
else:
|
|
378
|
+
logger.warning(f"Async batch embedding failed, falling back to individual embeddings: {e}")
|
|
379
|
+
# Fall back to individual embedding
|
|
380
|
+
embed_tasks = [doc.async_embed(embedder=self.embedder) for doc in documents]
|
|
381
|
+
await asyncio.gather(*embed_tasks, return_exceptions=True)
|
|
382
|
+
else:
|
|
383
|
+
# Use individual embedding
|
|
384
|
+
embed_tasks = [document.async_embed(embedder=self.embedder) for document in documents]
|
|
385
|
+
await asyncio.gather(*embed_tasks, return_exceptions=True)
|
|
386
|
+
|
|
387
|
+
for doc in documents:
|
|
388
|
+
doc.meta_data["text"] = doc.content
|
|
389
|
+
# Include name and content_id in metadata
|
|
390
|
+
metadata = doc.meta_data.copy()
|
|
391
|
+
if filters:
|
|
392
|
+
metadata.update(filters)
|
|
393
|
+
|
|
394
|
+
if doc.name:
|
|
395
|
+
metadata["name"] = doc.name
|
|
396
|
+
if doc.content_id:
|
|
397
|
+
metadata["content_id"] = doc.content_id
|
|
398
|
+
|
|
399
|
+
metadata["content_hash"] = content_hash
|
|
400
|
+
|
|
401
|
+
data_to_upsert = {
|
|
402
|
+
"id": doc.id,
|
|
403
|
+
"values": doc.embedding,
|
|
404
|
+
"metadata": metadata,
|
|
405
|
+
}
|
|
406
|
+
if self.use_hybrid_search:
|
|
407
|
+
data_to_upsert["sparse_values"] = self.sparse_encoder.encode_documents(doc.content)
|
|
408
|
+
vectors.append(data_to_upsert)
|
|
409
|
+
return vectors
|
|
410
|
+
|
|
411
|
+
def _upsert_vectors(self, vectors, namespace, batch_size, show_progress):
|
|
412
|
+
"""Upsert vectors to the index."""
|
|
233
413
|
self.index.upsert(
|
|
234
414
|
vectors=vectors,
|
|
235
415
|
namespace=namespace,
|
|
@@ -237,6 +417,12 @@ class PineconeDB(VectorDb):
|
|
|
237
417
|
show_progress=show_progress,
|
|
238
418
|
)
|
|
239
419
|
|
|
420
|
+
async def async_insert(
|
|
421
|
+
self, content_hash: str, documents: List[Document], filters: Optional[Dict[str, Any]] = None
|
|
422
|
+
) -> None:
|
|
423
|
+
log_warning("Pinecone does not support insert operations. Redirecting to async_upsert instead.")
|
|
424
|
+
await self.async_upsert(content_hash=content_hash, documents=documents, filters=filters)
|
|
425
|
+
|
|
240
426
|
def upsert_available(self) -> bool:
|
|
241
427
|
"""Check if upsert operation is available.
|
|
242
428
|
|
|
@@ -246,20 +432,9 @@ class PineconeDB(VectorDb):
|
|
|
246
432
|
"""
|
|
247
433
|
return True
|
|
248
434
|
|
|
249
|
-
def insert(self, documents: List[Document], filters: Optional[Dict[str, Any]] = None) -> None:
|
|
250
|
-
"
|
|
251
|
-
|
|
252
|
-
This method is not supported by Pinecone. Use `upsert` instead.
|
|
253
|
-
|
|
254
|
-
Args:
|
|
255
|
-
documents (List[Document]): The documents to insert.
|
|
256
|
-
filters (Optional[Dict[str, Any]], optional): The filters for the insert. Defaults to None.
|
|
257
|
-
|
|
258
|
-
Raises:
|
|
259
|
-
NotImplementedError: This method is not supported by Pinecone.
|
|
260
|
-
|
|
261
|
-
"""
|
|
262
|
-
raise NotImplementedError("Pinecone does not support insert operations. Use upsert instead.")
|
|
435
|
+
def insert(self, content_hash: str, documents: List[Document], filters: Optional[Dict[str, Any]] = None) -> None:
|
|
436
|
+
log_warning("Pinecone does not support insert operations. Redirecting to upsert instead.")
|
|
437
|
+
self.upsert(content_hash=content_hash, documents=documents, filters=filters)
|
|
263
438
|
|
|
264
439
|
def _hybrid_scale(self, dense: List[float], sparse: Dict[str, Any], alpha: float):
|
|
265
440
|
"""Hybrid vector scaling using a convex combination
|
|
@@ -283,7 +458,7 @@ class PineconeDB(VectorDb):
|
|
|
283
458
|
self,
|
|
284
459
|
query: str,
|
|
285
460
|
limit: int = 5,
|
|
286
|
-
filters: Optional[Dict[str,
|
|
461
|
+
filters: Optional[Union[Dict[str, Any], List[FilterExpr]]] = None,
|
|
287
462
|
namespace: Optional[str] = None,
|
|
288
463
|
include_values: Optional[bool] = None,
|
|
289
464
|
) -> List[Document]:
|
|
@@ -301,6 +476,9 @@ class PineconeDB(VectorDb):
|
|
|
301
476
|
List[Document]: The list of matching documents.
|
|
302
477
|
|
|
303
478
|
"""
|
|
479
|
+
if isinstance(filters, List):
|
|
480
|
+
log_warning("Filters Expressions are not supported in PineconeDB. No filters will be applied.")
|
|
481
|
+
filters = None
|
|
304
482
|
dense_embedding = self.embedder.get_embedding(query)
|
|
305
483
|
|
|
306
484
|
if self.use_hybrid_search:
|
|
@@ -316,7 +494,7 @@ class PineconeDB(VectorDb):
|
|
|
316
494
|
vector=hdense,
|
|
317
495
|
sparse_vector=hsparse,
|
|
318
496
|
top_k=limit,
|
|
319
|
-
namespace=namespace,
|
|
497
|
+
namespace=namespace or self.namespace,
|
|
320
498
|
filter=filters,
|
|
321
499
|
include_values=include_values,
|
|
322
500
|
include_metadata=True,
|
|
@@ -325,7 +503,7 @@ class PineconeDB(VectorDb):
|
|
|
325
503
|
response = self.index.query(
|
|
326
504
|
vector=dense_embedding,
|
|
327
505
|
top_k=limit,
|
|
328
|
-
namespace=namespace,
|
|
506
|
+
namespace=namespace or self.namespace,
|
|
329
507
|
filter=filters,
|
|
330
508
|
include_values=include_values,
|
|
331
509
|
include_metadata=True,
|
|
@@ -345,6 +523,17 @@ class PineconeDB(VectorDb):
|
|
|
345
523
|
search_results = self.reranker.rerank(query=query, documents=search_results)
|
|
346
524
|
return search_results
|
|
347
525
|
|
|
526
|
+
async def async_search(
|
|
527
|
+
self,
|
|
528
|
+
query: str,
|
|
529
|
+
limit: int = 5,
|
|
530
|
+
filters: Optional[Union[Dict[str, Any], List[FilterExpr]]] = None,
|
|
531
|
+
namespace: Optional[str] = None,
|
|
532
|
+
include_values: Optional[bool] = None,
|
|
533
|
+
) -> List[Document]:
|
|
534
|
+
"""Search for similar documents in the index asynchronously."""
|
|
535
|
+
return await asyncio.to_thread(self.search, query, limit, filters, namespace, include_values)
|
|
536
|
+
|
|
348
537
|
def optimize(self) -> None:
|
|
349
538
|
"""Optimize the index.
|
|
350
539
|
|
|
@@ -365,3 +554,177 @@ class PineconeDB(VectorDb):
|
|
|
365
554
|
return True
|
|
366
555
|
except Exception:
|
|
367
556
|
return False
|
|
557
|
+
|
|
558
|
+
async def async_drop(self) -> None:
|
|
559
|
+
raise NotImplementedError(f"Async not supported on {self.__class__.__name__}.")
|
|
560
|
+
|
|
561
|
+
def delete_by_id(self, id: str) -> bool:
|
|
562
|
+
"""Delete a document by ID."""
|
|
563
|
+
try:
|
|
564
|
+
self.index.delete(ids=[id])
|
|
565
|
+
return True
|
|
566
|
+
except Exception as e:
|
|
567
|
+
log_warning(f"Error deleting document with ID {id}: {e}")
|
|
568
|
+
return False
|
|
569
|
+
|
|
570
|
+
def delete_by_name(self, name: str) -> bool:
|
|
571
|
+
"""Delete documents by name (stored in metadata)."""
|
|
572
|
+
try:
|
|
573
|
+
# Delete all documents where metadata.name equals the given name
|
|
574
|
+
self.index.delete(filter={"name": {"$eq": name}})
|
|
575
|
+
return True
|
|
576
|
+
except Exception as e:
|
|
577
|
+
log_warning(f"Error deleting documents with name {name}: {e}")
|
|
578
|
+
return False
|
|
579
|
+
|
|
580
|
+
def delete_by_metadata(self, metadata: Dict[str, Any]) -> bool:
|
|
581
|
+
"""Delete documents by metadata."""
|
|
582
|
+
try:
|
|
583
|
+
# Build filter for metadata matching
|
|
584
|
+
filter_conditions = {}
|
|
585
|
+
for key, value in metadata.items():
|
|
586
|
+
filter_conditions[key] = {"$eq": value}
|
|
587
|
+
|
|
588
|
+
self.index.delete(filter=filter_conditions)
|
|
589
|
+
return True
|
|
590
|
+
except Exception as e:
|
|
591
|
+
log_warning(f"Error deleting documents with metadata {metadata}: {e}")
|
|
592
|
+
return False
|
|
593
|
+
|
|
594
|
+
def delete_by_content_id(self, content_id: str) -> bool:
|
|
595
|
+
"""Delete documents by content ID (stored in metadata)."""
|
|
596
|
+
try:
|
|
597
|
+
# Delete all documents where metadata.content_id equals the given content_id
|
|
598
|
+
self.index.delete(filter={"content_id": {"$eq": content_id}})
|
|
599
|
+
return True
|
|
600
|
+
except Exception as e:
|
|
601
|
+
log_warning(f"Error deleting documents with content_id {content_id}: {e}")
|
|
602
|
+
return False
|
|
603
|
+
|
|
604
|
+
def get_count(self) -> int:
|
|
605
|
+
"""Get the count of documents in the index."""
|
|
606
|
+
try:
|
|
607
|
+
# Pinecone doesn't have a direct count method, so we use describe_index_stats
|
|
608
|
+
stats = self.index.describe_index_stats()
|
|
609
|
+
# The stats include total_vector_count which gives us the count
|
|
610
|
+
return stats.total_vector_count
|
|
611
|
+
except Exception as e:
|
|
612
|
+
log_warning(f"Error getting document count: {e}")
|
|
613
|
+
return 0
|
|
614
|
+
|
|
615
|
+
def id_exists(self, id: str) -> bool:
|
|
616
|
+
"""Check if a document with the given ID exists in the index.
|
|
617
|
+
|
|
618
|
+
Args:
|
|
619
|
+
id (str): The ID to check.
|
|
620
|
+
|
|
621
|
+
Returns:
|
|
622
|
+
bool: True if the document exists, False otherwise.
|
|
623
|
+
"""
|
|
624
|
+
try:
|
|
625
|
+
response = self.index.fetch(ids=[id], namespace=self.namespace)
|
|
626
|
+
return len(response.vectors) > 0
|
|
627
|
+
except Exception as e:
|
|
628
|
+
log_warning(f"Error checking if ID {id} exists: {e}")
|
|
629
|
+
return False
|
|
630
|
+
|
|
631
|
+
def content_hash_exists(self, content_hash: str) -> bool:
|
|
632
|
+
"""Check if documents with the given content hash exist in the index.
|
|
633
|
+
|
|
634
|
+
Args:
|
|
635
|
+
content_hash (str): The content hash to check.
|
|
636
|
+
|
|
637
|
+
Returns:
|
|
638
|
+
bool: True if documents with the content hash exist, False otherwise.
|
|
639
|
+
"""
|
|
640
|
+
try:
|
|
641
|
+
# Use a dummy vector to perform a minimal query with filter
|
|
642
|
+
# We only need to check if any results exist
|
|
643
|
+
if self.dimension is None:
|
|
644
|
+
raise ValueError("Dimension is not set for this Pinecone index")
|
|
645
|
+
dummy_vector = [0.0] * self.dimension
|
|
646
|
+
response = self.index.query(
|
|
647
|
+
vector=dummy_vector,
|
|
648
|
+
top_k=1,
|
|
649
|
+
namespace=self.namespace,
|
|
650
|
+
filter={"content_hash": {"$eq": content_hash}},
|
|
651
|
+
include_metadata=False,
|
|
652
|
+
include_values=False,
|
|
653
|
+
)
|
|
654
|
+
return len(response.matches) > 0
|
|
655
|
+
except Exception as e:
|
|
656
|
+
log_warning(f"Error checking if content_hash {content_hash} exists: {e}")
|
|
657
|
+
return False
|
|
658
|
+
|
|
659
|
+
def _delete_by_content_hash(self, content_hash: str) -> bool:
|
|
660
|
+
"""Delete documents by content hash (stored in metadata).
|
|
661
|
+
|
|
662
|
+
Args:
|
|
663
|
+
content_hash (str): The content hash to delete.
|
|
664
|
+
|
|
665
|
+
Returns:
|
|
666
|
+
bool: True if documents were deleted, False otherwise.
|
|
667
|
+
"""
|
|
668
|
+
try:
|
|
669
|
+
# Delete all documents where metadata.content_hash equals the given content_hash
|
|
670
|
+
self.index.delete(filter={"content_hash": {"$eq": content_hash}}, namespace=self.namespace)
|
|
671
|
+
return True
|
|
672
|
+
except Exception as e:
|
|
673
|
+
log_warning(f"Error deleting documents with content_hash {content_hash}: {e}")
|
|
674
|
+
return False
|
|
675
|
+
|
|
676
|
+
def update_metadata(self, content_id: str, metadata: Dict[str, Any]) -> None:
|
|
677
|
+
"""
|
|
678
|
+
Update the metadata for documents with the given content_id.
|
|
679
|
+
|
|
680
|
+
Args:
|
|
681
|
+
content_id (str): The content ID to update
|
|
682
|
+
metadata (Dict[str, Any]): The metadata to update
|
|
683
|
+
"""
|
|
684
|
+
try:
|
|
685
|
+
# Query for vectors with the given content_id
|
|
686
|
+
query_response = self.index.query(
|
|
687
|
+
filter={"content_id": {"$eq": content_id}},
|
|
688
|
+
top_k=10000, # Get all matching vectors
|
|
689
|
+
include_metadata=True,
|
|
690
|
+
namespace=self.namespace,
|
|
691
|
+
)
|
|
692
|
+
|
|
693
|
+
if not query_response.matches:
|
|
694
|
+
logger.debug(f"No documents found with content_id: {content_id}")
|
|
695
|
+
return
|
|
696
|
+
|
|
697
|
+
# Prepare updates for each matching vector
|
|
698
|
+
update_data = []
|
|
699
|
+
for match in query_response.matches:
|
|
700
|
+
vector_id = match.id
|
|
701
|
+
current_metadata = match.metadata or {}
|
|
702
|
+
|
|
703
|
+
# Merge existing metadata with new metadata
|
|
704
|
+
updated_metadata = current_metadata.copy()
|
|
705
|
+
updated_metadata.update(metadata)
|
|
706
|
+
|
|
707
|
+
if "filters" not in updated_metadata:
|
|
708
|
+
updated_metadata["filters"] = {}
|
|
709
|
+
if isinstance(updated_metadata["filters"], dict):
|
|
710
|
+
updated_metadata["filters"].update(metadata)
|
|
711
|
+
else:
|
|
712
|
+
updated_metadata["filters"] = metadata
|
|
713
|
+
|
|
714
|
+
update_data.append({"id": vector_id, "metadata": updated_metadata})
|
|
715
|
+
|
|
716
|
+
# Update vectors in batches
|
|
717
|
+
batch_size = 100
|
|
718
|
+
for i in range(0, len(update_data), batch_size):
|
|
719
|
+
batch = update_data[i : i + batch_size]
|
|
720
|
+
self.index.update(vectors=batch, namespace=self.namespace)
|
|
721
|
+
|
|
722
|
+
logger.debug(f"Updated metadata for {len(update_data)} documents with content_id: {content_id}")
|
|
723
|
+
|
|
724
|
+
except Exception as e:
|
|
725
|
+
logger.error(f"Error updating metadata for content_id '{content_id}': {e}")
|
|
726
|
+
raise
|
|
727
|
+
|
|
728
|
+
def get_supported_search_types(self) -> List[str]:
|
|
729
|
+
"""Get the supported search types for this vector database."""
|
|
730
|
+
return [] # PineconeDb doesn't use SearchType enum
|