agno 1.8.1__py3-none-any.whl → 2.0.0a1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- agno/__init__.py +8 -0
- agno/agent/__init__.py +19 -27
- agno/agent/agent.py +2778 -4123
- agno/api/agent.py +9 -65
- agno/api/api.py +5 -46
- agno/api/evals.py +6 -17
- agno/api/os.py +17 -0
- agno/api/routes.py +6 -41
- agno/api/schemas/__init__.py +9 -0
- agno/api/schemas/agent.py +5 -21
- agno/api/schemas/evals.py +7 -16
- agno/api/schemas/os.py +14 -0
- agno/api/schemas/team.py +5 -21
- agno/api/schemas/utils.py +21 -0
- agno/api/schemas/workflows.py +11 -7
- agno/api/settings.py +53 -0
- agno/api/team.py +9 -64
- agno/api/workflow.py +28 -0
- agno/cloud/aws/base.py +214 -0
- agno/cloud/aws/s3/__init__.py +2 -0
- agno/cloud/aws/s3/api_client.py +43 -0
- agno/cloud/aws/s3/bucket.py +195 -0
- agno/cloud/aws/s3/object.py +57 -0
- agno/db/__init__.py +24 -0
- agno/db/base.py +245 -0
- agno/db/dynamo/__init__.py +3 -0
- agno/db/dynamo/dynamo.py +1749 -0
- agno/db/dynamo/schemas.py +278 -0
- agno/db/dynamo/utils.py +684 -0
- agno/db/firestore/__init__.py +3 -0
- agno/db/firestore/firestore.py +1438 -0
- agno/db/firestore/schemas.py +130 -0
- agno/db/firestore/utils.py +278 -0
- agno/db/gcs_json/__init__.py +3 -0
- agno/db/gcs_json/gcs_json_db.py +1001 -0
- agno/db/gcs_json/utils.py +194 -0
- agno/db/in_memory/__init__.py +3 -0
- agno/db/in_memory/in_memory_db.py +888 -0
- agno/db/in_memory/utils.py +172 -0
- agno/db/json/__init__.py +3 -0
- agno/db/json/json_db.py +1051 -0
- agno/db/json/utils.py +196 -0
- agno/db/migrations/v1_to_v2.py +162 -0
- agno/db/mongo/__init__.py +3 -0
- agno/db/mongo/mongo.py +1417 -0
- agno/db/mongo/schemas.py +77 -0
- agno/db/mongo/utils.py +204 -0
- agno/db/mysql/__init__.py +3 -0
- agno/db/mysql/mysql.py +1719 -0
- agno/db/mysql/schemas.py +124 -0
- agno/db/mysql/utils.py +298 -0
- agno/db/postgres/__init__.py +3 -0
- agno/db/postgres/postgres.py +1720 -0
- agno/db/postgres/schemas.py +124 -0
- agno/db/postgres/utils.py +281 -0
- agno/db/redis/__init__.py +3 -0
- agno/db/redis/redis.py +1371 -0
- agno/db/redis/schemas.py +109 -0
- agno/db/redis/utils.py +288 -0
- agno/db/schemas/__init__.py +3 -0
- agno/db/schemas/evals.py +33 -0
- agno/db/schemas/knowledge.py +40 -0
- agno/db/schemas/memory.py +46 -0
- agno/db/singlestore/__init__.py +3 -0
- agno/db/singlestore/schemas.py +116 -0
- agno/db/singlestore/singlestore.py +1722 -0
- agno/db/singlestore/utils.py +327 -0
- agno/db/sqlite/__init__.py +3 -0
- agno/db/sqlite/schemas.py +119 -0
- agno/db/sqlite/sqlite.py +1680 -0
- agno/db/sqlite/utils.py +269 -0
- agno/db/utils.py +88 -0
- agno/eval/__init__.py +14 -0
- agno/eval/accuracy.py +142 -43
- agno/eval/performance.py +88 -23
- agno/eval/reliability.py +73 -20
- agno/eval/utils.py +23 -13
- agno/integrations/discord/__init__.py +3 -0
- agno/{app → integrations}/discord/client.py +10 -10
- agno/knowledge/__init__.py +2 -2
- agno/{document → knowledge}/chunking/agentic.py +2 -2
- agno/{document → knowledge}/chunking/document.py +2 -2
- agno/{document → knowledge}/chunking/fixed.py +3 -3
- agno/{document → knowledge}/chunking/markdown.py +2 -2
- agno/{document → knowledge}/chunking/recursive.py +2 -2
- agno/{document → knowledge}/chunking/row.py +2 -2
- agno/knowledge/chunking/semantic.py +59 -0
- agno/knowledge/chunking/strategy.py +121 -0
- agno/knowledge/content.py +74 -0
- agno/knowledge/document/__init__.py +5 -0
- agno/{document → knowledge/document}/base.py +12 -2
- agno/knowledge/embedder/__init__.py +5 -0
- agno/{embedder → knowledge/embedder}/aws_bedrock.py +127 -1
- agno/{embedder → knowledge/embedder}/azure_openai.py +65 -1
- agno/{embedder → knowledge/embedder}/base.py +6 -0
- agno/{embedder → knowledge/embedder}/cohere.py +72 -1
- agno/{embedder → knowledge/embedder}/fastembed.py +17 -1
- agno/{embedder → knowledge/embedder}/fireworks.py +1 -1
- agno/{embedder → knowledge/embedder}/google.py +74 -1
- agno/{embedder → knowledge/embedder}/huggingface.py +36 -2
- agno/{embedder → knowledge/embedder}/jina.py +48 -2
- agno/knowledge/embedder/langdb.py +22 -0
- agno/knowledge/embedder/mistral.py +139 -0
- agno/{embedder → knowledge/embedder}/nebius.py +1 -1
- agno/{embedder → knowledge/embedder}/ollama.py +54 -3
- agno/knowledge/embedder/openai.py +223 -0
- agno/{embedder → knowledge/embedder}/sentence_transformer.py +16 -1
- agno/{embedder → knowledge/embedder}/together.py +1 -1
- agno/{embedder → knowledge/embedder}/voyageai.py +49 -1
- agno/knowledge/knowledge.py +1515 -0
- agno/knowledge/reader/__init__.py +7 -0
- agno/{document → knowledge}/reader/arxiv_reader.py +32 -4
- agno/knowledge/reader/base.py +88 -0
- agno/{document → knowledge}/reader/csv_reader.py +68 -15
- agno/knowledge/reader/docx_reader.py +83 -0
- agno/{document → knowledge}/reader/firecrawl_reader.py +42 -21
- agno/knowledge/reader/gcs_reader.py +67 -0
- agno/{document → knowledge}/reader/json_reader.py +30 -9
- agno/{document → knowledge}/reader/markdown_reader.py +36 -9
- agno/{document → knowledge}/reader/pdf_reader.py +79 -21
- agno/knowledge/reader/reader_factory.py +275 -0
- agno/knowledge/reader/s3_reader.py +171 -0
- agno/{document → knowledge}/reader/text_reader.py +31 -10
- agno/knowledge/reader/url_reader.py +84 -0
- agno/knowledge/reader/web_search_reader.py +389 -0
- agno/{document → knowledge}/reader/website_reader.py +37 -10
- agno/knowledge/reader/wikipedia_reader.py +59 -0
- agno/knowledge/reader/youtube_reader.py +78 -0
- agno/knowledge/remote_content/remote_content.py +88 -0
- agno/{reranker → knowledge/reranker}/base.py +1 -1
- agno/{reranker → knowledge/reranker}/cohere.py +2 -2
- agno/{reranker → knowledge/reranker}/infinity.py +2 -2
- agno/{reranker → knowledge/reranker}/sentence_transformer.py +2 -2
- agno/knowledge/types.py +30 -0
- agno/knowledge/utils.py +169 -0
- agno/memory/__init__.py +2 -10
- agno/memory/manager.py +1003 -148
- agno/models/aimlapi/__init__.py +2 -2
- agno/models/aimlapi/aimlapi.py +6 -6
- agno/models/anthropic/claude.py +129 -82
- agno/models/aws/bedrock.py +107 -175
- agno/models/aws/claude.py +64 -18
- agno/models/azure/ai_foundry.py +73 -23
- agno/models/base.py +347 -287
- agno/models/cerebras/cerebras.py +84 -27
- agno/models/cohere/chat.py +106 -98
- agno/models/google/gemini.py +100 -42
- agno/models/groq/groq.py +97 -35
- agno/models/huggingface/huggingface.py +92 -27
- agno/models/ibm/watsonx.py +72 -13
- agno/models/litellm/chat.py +85 -13
- agno/models/message.py +38 -144
- agno/models/meta/llama.py +85 -49
- agno/models/metrics.py +120 -0
- agno/models/mistral/mistral.py +90 -21
- agno/models/ollama/__init__.py +0 -2
- agno/models/ollama/chat.py +84 -46
- agno/models/openai/chat.py +121 -23
- agno/models/openai/responses.py +178 -105
- agno/models/perplexity/perplexity.py +26 -2
- agno/models/portkey/portkey.py +0 -7
- agno/models/response.py +14 -8
- agno/models/utils.py +20 -0
- agno/models/vercel/__init__.py +2 -2
- agno/models/vercel/v0.py +1 -1
- agno/models/vllm/__init__.py +2 -2
- agno/models/vllm/vllm.py +3 -3
- agno/models/xai/xai.py +10 -10
- agno/os/__init__.py +3 -0
- agno/os/app.py +393 -0
- agno/os/auth.py +47 -0
- agno/os/config.py +103 -0
- agno/os/interfaces/agui/__init__.py +3 -0
- agno/os/interfaces/agui/agui.py +31 -0
- agno/{app/agui/async_router.py → os/interfaces/agui/router.py} +16 -16
- agno/{app → os/interfaces}/agui/utils.py +65 -28
- agno/os/interfaces/base.py +21 -0
- agno/os/interfaces/slack/__init__.py +3 -0
- agno/{app/slack/async_router.py → os/interfaces/slack/router.py} +3 -5
- agno/os/interfaces/slack/slack.py +33 -0
- agno/os/interfaces/whatsapp/__init__.py +3 -0
- agno/{app/whatsapp/async_router.py → os/interfaces/whatsapp/router.py} +4 -7
- agno/os/interfaces/whatsapp/whatsapp.py +30 -0
- agno/os/router.py +843 -0
- agno/os/routers/__init__.py +3 -0
- agno/os/routers/evals/__init__.py +3 -0
- agno/os/routers/evals/evals.py +204 -0
- agno/os/routers/evals/schemas.py +142 -0
- agno/os/routers/evals/utils.py +161 -0
- agno/os/routers/knowledge/__init__.py +3 -0
- agno/os/routers/knowledge/knowledge.py +413 -0
- agno/os/routers/knowledge/schemas.py +118 -0
- agno/os/routers/memory/__init__.py +3 -0
- agno/os/routers/memory/memory.py +179 -0
- agno/os/routers/memory/schemas.py +58 -0
- agno/os/routers/metrics/__init__.py +3 -0
- agno/os/routers/metrics/metrics.py +58 -0
- agno/os/routers/metrics/schemas.py +47 -0
- agno/os/routers/session/__init__.py +3 -0
- agno/os/routers/session/session.py +163 -0
- agno/os/schema.py +892 -0
- agno/{app/playground → os}/settings.py +8 -15
- agno/os/utils.py +270 -0
- agno/reasoning/azure_ai_foundry.py +4 -4
- agno/reasoning/deepseek.py +4 -4
- agno/reasoning/default.py +6 -11
- agno/reasoning/groq.py +4 -4
- agno/reasoning/helpers.py +4 -6
- agno/reasoning/ollama.py +4 -4
- agno/reasoning/openai.py +4 -4
- agno/run/{response.py → agent.py} +144 -72
- agno/run/base.py +44 -58
- agno/run/cancel.py +83 -0
- agno/run/team.py +133 -77
- agno/run/workflow.py +537 -12
- agno/session/__init__.py +10 -0
- agno/session/agent.py +244 -0
- agno/session/summary.py +225 -0
- agno/session/team.py +262 -0
- agno/{storage/session/v2 → session}/workflow.py +47 -24
- agno/team/__init__.py +15 -16
- agno/team/team.py +2961 -4253
- agno/tools/agentql.py +14 -5
- agno/tools/airflow.py +9 -4
- agno/tools/api.py +7 -3
- agno/tools/apify.py +2 -46
- agno/tools/arxiv.py +8 -3
- agno/tools/aws_lambda.py +7 -5
- agno/tools/aws_ses.py +7 -1
- agno/tools/baidusearch.py +4 -1
- agno/tools/bitbucket.py +4 -4
- agno/tools/brandfetch.py +14 -11
- agno/tools/bravesearch.py +4 -1
- agno/tools/brightdata.py +42 -22
- agno/tools/browserbase.py +13 -4
- agno/tools/calcom.py +12 -10
- agno/tools/calculator.py +10 -27
- agno/tools/cartesia.py +18 -13
- agno/tools/{clickup_tool.py → clickup.py} +12 -25
- agno/tools/confluence.py +8 -8
- agno/tools/crawl4ai.py +7 -1
- agno/tools/csv_toolkit.py +9 -8
- agno/tools/dalle.py +18 -11
- agno/tools/daytona.py +13 -16
- agno/tools/decorator.py +6 -3
- agno/tools/desi_vocal.py +16 -7
- agno/tools/discord.py +11 -8
- agno/tools/docker.py +30 -42
- agno/tools/duckdb.py +34 -53
- agno/tools/duckduckgo.py +8 -7
- agno/tools/e2b.py +61 -61
- agno/tools/eleven_labs.py +35 -28
- agno/tools/email.py +4 -1
- agno/tools/evm.py +7 -1
- agno/tools/exa.py +19 -14
- agno/tools/fal.py +29 -29
- agno/tools/file.py +9 -8
- agno/tools/financial_datasets.py +25 -44
- agno/tools/firecrawl.py +22 -22
- agno/tools/function.py +68 -17
- agno/tools/giphy.py +22 -10
- agno/tools/github.py +48 -126
- agno/tools/gmail.py +45 -61
- agno/tools/google_bigquery.py +7 -6
- agno/tools/google_maps.py +11 -26
- agno/tools/googlesearch.py +7 -2
- agno/tools/googlesheets.py +21 -17
- agno/tools/hackernews.py +9 -5
- agno/tools/jina.py +5 -4
- agno/tools/jira.py +18 -9
- agno/tools/knowledge.py +31 -32
- agno/tools/linear.py +18 -33
- agno/tools/linkup.py +5 -1
- agno/tools/local_file_system.py +8 -5
- agno/tools/lumalab.py +31 -19
- agno/tools/mem0.py +18 -12
- agno/tools/memori.py +14 -10
- agno/tools/mlx_transcribe.py +3 -2
- agno/tools/models/azure_openai.py +32 -14
- agno/tools/models/gemini.py +58 -31
- agno/tools/models/groq.py +29 -20
- agno/tools/models/nebius.py +27 -11
- agno/tools/models_labs.py +39 -15
- agno/tools/moviepy_video.py +7 -6
- agno/tools/neo4j.py +10 -8
- agno/tools/newspaper.py +7 -2
- agno/tools/newspaper4k.py +8 -3
- agno/tools/openai.py +57 -26
- agno/tools/openbb.py +12 -11
- agno/tools/opencv.py +62 -46
- agno/tools/openweather.py +14 -12
- agno/tools/pandas.py +11 -3
- agno/tools/postgres.py +4 -12
- agno/tools/pubmed.py +4 -1
- agno/tools/python.py +9 -22
- agno/tools/reasoning.py +35 -27
- agno/tools/reddit.py +11 -26
- agno/tools/replicate.py +54 -41
- agno/tools/resend.py +4 -1
- agno/tools/scrapegraph.py +15 -14
- agno/tools/searxng.py +10 -23
- agno/tools/serpapi.py +6 -3
- agno/tools/serper.py +13 -4
- agno/tools/shell.py +9 -2
- agno/tools/slack.py +12 -11
- agno/tools/sleep.py +3 -2
- agno/tools/spider.py +24 -4
- agno/tools/sql.py +7 -6
- agno/tools/tavily.py +6 -4
- agno/tools/telegram.py +12 -4
- agno/tools/todoist.py +11 -31
- agno/tools/toolkit.py +1 -1
- agno/tools/trafilatura.py +22 -6
- agno/tools/trello.py +9 -22
- agno/tools/twilio.py +10 -3
- agno/tools/user_control_flow.py +6 -1
- agno/tools/valyu.py +34 -5
- agno/tools/visualization.py +19 -28
- agno/tools/webbrowser.py +4 -3
- agno/tools/webex.py +11 -7
- agno/tools/website.py +15 -46
- agno/tools/webtools.py +12 -4
- agno/tools/whatsapp.py +5 -9
- agno/tools/wikipedia.py +20 -13
- agno/tools/x.py +14 -13
- agno/tools/yfinance.py +13 -40
- agno/tools/youtube.py +26 -20
- agno/tools/zendesk.py +7 -2
- agno/tools/zep.py +10 -7
- agno/tools/zoom.py +10 -9
- agno/utils/common.py +1 -19
- agno/utils/events.py +95 -118
- agno/utils/knowledge.py +29 -0
- agno/utils/log.py +2 -2
- agno/utils/mcp.py +11 -5
- agno/utils/media.py +39 -0
- agno/utils/message.py +12 -1
- agno/utils/models/claude.py +6 -4
- agno/utils/models/mistral.py +8 -7
- agno/utils/models/schema_utils.py +3 -3
- agno/utils/pprint.py +33 -32
- agno/utils/print_response/agent.py +779 -0
- agno/utils/print_response/team.py +1565 -0
- agno/utils/print_response/workflow.py +1451 -0
- agno/utils/prompts.py +14 -14
- agno/utils/reasoning.py +87 -0
- agno/utils/response.py +42 -42
- agno/utils/string.py +8 -22
- agno/utils/team.py +50 -0
- agno/utils/timer.py +2 -2
- agno/vectordb/base.py +33 -21
- agno/vectordb/cassandra/cassandra.py +287 -23
- agno/vectordb/chroma/chromadb.py +482 -59
- agno/vectordb/clickhouse/clickhousedb.py +270 -63
- agno/vectordb/couchbase/couchbase.py +309 -29
- agno/vectordb/lancedb/lance_db.py +360 -21
- agno/vectordb/langchaindb/__init__.py +5 -0
- agno/vectordb/langchaindb/langchaindb.py +145 -0
- agno/vectordb/lightrag/__init__.py +5 -0
- agno/vectordb/lightrag/lightrag.py +374 -0
- agno/vectordb/llamaindex/llamaindexdb.py +127 -0
- agno/vectordb/milvus/milvus.py +242 -32
- agno/vectordb/mongodb/mongodb.py +200 -24
- agno/vectordb/pgvector/pgvector.py +319 -37
- agno/vectordb/pineconedb/pineconedb.py +221 -27
- agno/vectordb/qdrant/qdrant.py +334 -14
- agno/vectordb/singlestore/singlestore.py +286 -29
- agno/vectordb/surrealdb/surrealdb.py +187 -7
- agno/vectordb/upstashdb/upstashdb.py +342 -26
- agno/vectordb/weaviate/weaviate.py +227 -165
- agno/workflow/__init__.py +17 -13
- agno/workflow/{v2/condition.py → condition.py} +135 -32
- agno/workflow/{v2/loop.py → loop.py} +115 -28
- agno/workflow/{v2/parallel.py → parallel.py} +138 -108
- agno/workflow/{v2/router.py → router.py} +133 -32
- agno/workflow/{v2/step.py → step.py} +200 -42
- agno/workflow/{v2/steps.py → steps.py} +147 -66
- agno/workflow/types.py +482 -0
- agno/workflow/workflow.py +2394 -696
- agno-2.0.0a1.dist-info/METADATA +355 -0
- agno-2.0.0a1.dist-info/RECORD +514 -0
- agno/agent/metrics.py +0 -107
- agno/api/app.py +0 -35
- agno/api/playground.py +0 -92
- agno/api/schemas/app.py +0 -12
- agno/api/schemas/playground.py +0 -22
- agno/api/schemas/user.py +0 -35
- agno/api/schemas/workspace.py +0 -46
- agno/api/user.py +0 -160
- agno/api/workflows.py +0 -33
- agno/api/workspace.py +0 -175
- agno/app/agui/__init__.py +0 -3
- agno/app/agui/app.py +0 -17
- agno/app/agui/sync_router.py +0 -120
- agno/app/base.py +0 -186
- agno/app/discord/__init__.py +0 -3
- agno/app/fastapi/__init__.py +0 -3
- agno/app/fastapi/app.py +0 -107
- agno/app/fastapi/async_router.py +0 -457
- agno/app/fastapi/sync_router.py +0 -448
- agno/app/playground/app.py +0 -228
- agno/app/playground/async_router.py +0 -1050
- agno/app/playground/deploy.py +0 -249
- agno/app/playground/operator.py +0 -183
- agno/app/playground/schemas.py +0 -220
- agno/app/playground/serve.py +0 -55
- agno/app/playground/sync_router.py +0 -1042
- agno/app/playground/utils.py +0 -46
- agno/app/settings.py +0 -15
- agno/app/slack/__init__.py +0 -3
- agno/app/slack/app.py +0 -19
- agno/app/slack/sync_router.py +0 -92
- agno/app/utils.py +0 -54
- agno/app/whatsapp/__init__.py +0 -3
- agno/app/whatsapp/app.py +0 -15
- agno/app/whatsapp/sync_router.py +0 -197
- agno/cli/auth_server.py +0 -249
- agno/cli/config.py +0 -274
- agno/cli/console.py +0 -88
- agno/cli/credentials.py +0 -23
- agno/cli/entrypoint.py +0 -571
- agno/cli/operator.py +0 -357
- agno/cli/settings.py +0 -96
- agno/cli/ws/ws_cli.py +0 -817
- agno/constants.py +0 -13
- agno/document/__init__.py +0 -5
- agno/document/chunking/semantic.py +0 -45
- agno/document/chunking/strategy.py +0 -31
- agno/document/reader/__init__.py +0 -5
- agno/document/reader/base.py +0 -47
- agno/document/reader/docx_reader.py +0 -60
- agno/document/reader/gcs/pdf_reader.py +0 -44
- agno/document/reader/s3/pdf_reader.py +0 -59
- agno/document/reader/s3/text_reader.py +0 -63
- agno/document/reader/url_reader.py +0 -59
- agno/document/reader/youtube_reader.py +0 -58
- agno/embedder/__init__.py +0 -5
- agno/embedder/langdb.py +0 -80
- agno/embedder/mistral.py +0 -82
- agno/embedder/openai.py +0 -78
- agno/file/__init__.py +0 -5
- agno/file/file.py +0 -16
- agno/file/local/csv.py +0 -32
- agno/file/local/txt.py +0 -19
- agno/infra/app.py +0 -240
- agno/infra/base.py +0 -144
- agno/infra/context.py +0 -20
- agno/infra/db_app.py +0 -52
- agno/infra/resource.py +0 -205
- agno/infra/resources.py +0 -55
- agno/knowledge/agent.py +0 -702
- agno/knowledge/arxiv.py +0 -33
- agno/knowledge/combined.py +0 -36
- agno/knowledge/csv.py +0 -144
- agno/knowledge/csv_url.py +0 -124
- agno/knowledge/document.py +0 -223
- agno/knowledge/docx.py +0 -137
- agno/knowledge/firecrawl.py +0 -34
- agno/knowledge/gcs/__init__.py +0 -0
- agno/knowledge/gcs/base.py +0 -39
- agno/knowledge/gcs/pdf.py +0 -125
- agno/knowledge/json.py +0 -137
- agno/knowledge/langchain.py +0 -71
- agno/knowledge/light_rag.py +0 -273
- agno/knowledge/llamaindex.py +0 -66
- agno/knowledge/markdown.py +0 -154
- agno/knowledge/pdf.py +0 -164
- agno/knowledge/pdf_bytes.py +0 -42
- agno/knowledge/pdf_url.py +0 -148
- agno/knowledge/s3/__init__.py +0 -0
- agno/knowledge/s3/base.py +0 -64
- agno/knowledge/s3/pdf.py +0 -33
- agno/knowledge/s3/text.py +0 -34
- agno/knowledge/text.py +0 -141
- agno/knowledge/url.py +0 -46
- agno/knowledge/website.py +0 -179
- agno/knowledge/wikipedia.py +0 -32
- agno/knowledge/youtube.py +0 -35
- agno/memory/agent.py +0 -423
- agno/memory/classifier.py +0 -104
- agno/memory/db/__init__.py +0 -5
- agno/memory/db/base.py +0 -42
- agno/memory/db/mongodb.py +0 -189
- agno/memory/db/postgres.py +0 -203
- agno/memory/db/sqlite.py +0 -193
- agno/memory/memory.py +0 -22
- agno/memory/row.py +0 -36
- agno/memory/summarizer.py +0 -201
- agno/memory/summary.py +0 -19
- agno/memory/team.py +0 -415
- agno/memory/v2/__init__.py +0 -2
- agno/memory/v2/db/__init__.py +0 -1
- agno/memory/v2/db/base.py +0 -42
- agno/memory/v2/db/firestore.py +0 -339
- agno/memory/v2/db/mongodb.py +0 -196
- agno/memory/v2/db/postgres.py +0 -214
- agno/memory/v2/db/redis.py +0 -187
- agno/memory/v2/db/schema.py +0 -54
- agno/memory/v2/db/sqlite.py +0 -209
- agno/memory/v2/manager.py +0 -437
- agno/memory/v2/memory.py +0 -1097
- agno/memory/v2/schema.py +0 -55
- agno/memory/v2/summarizer.py +0 -215
- agno/memory/workflow.py +0 -38
- agno/models/ollama/tools.py +0 -430
- agno/models/qwen/__init__.py +0 -5
- agno/playground/__init__.py +0 -10
- agno/playground/deploy.py +0 -3
- agno/playground/playground.py +0 -3
- agno/playground/serve.py +0 -3
- agno/playground/settings.py +0 -3
- agno/reranker/__init__.py +0 -0
- agno/run/v2/__init__.py +0 -0
- agno/run/v2/workflow.py +0 -567
- agno/storage/__init__.py +0 -0
- agno/storage/agent/__init__.py +0 -0
- agno/storage/agent/dynamodb.py +0 -1
- agno/storage/agent/json.py +0 -1
- agno/storage/agent/mongodb.py +0 -1
- agno/storage/agent/postgres.py +0 -1
- agno/storage/agent/singlestore.py +0 -1
- agno/storage/agent/sqlite.py +0 -1
- agno/storage/agent/yaml.py +0 -1
- agno/storage/base.py +0 -60
- agno/storage/dynamodb.py +0 -673
- agno/storage/firestore.py +0 -297
- agno/storage/gcs_json.py +0 -261
- agno/storage/in_memory.py +0 -234
- agno/storage/json.py +0 -237
- agno/storage/mongodb.py +0 -328
- agno/storage/mysql.py +0 -685
- agno/storage/postgres.py +0 -682
- agno/storage/redis.py +0 -336
- agno/storage/session/__init__.py +0 -16
- agno/storage/session/agent.py +0 -64
- agno/storage/session/team.py +0 -63
- agno/storage/session/v2/__init__.py +0 -5
- agno/storage/session/workflow.py +0 -61
- agno/storage/singlestore.py +0 -606
- agno/storage/sqlite.py +0 -646
- agno/storage/workflow/__init__.py +0 -0
- agno/storage/workflow/mongodb.py +0 -1
- agno/storage/workflow/postgres.py +0 -1
- agno/storage/workflow/sqlite.py +0 -1
- agno/storage/yaml.py +0 -241
- agno/tools/thinking.py +0 -73
- agno/utils/defaults.py +0 -57
- agno/utils/filesystem.py +0 -39
- agno/utils/git.py +0 -52
- agno/utils/json_io.py +0 -30
- agno/utils/load_env.py +0 -19
- agno/utils/py_io.py +0 -19
- agno/utils/pyproject.py +0 -18
- agno/utils/resource_filter.py +0 -31
- agno/workflow/v2/__init__.py +0 -21
- agno/workflow/v2/types.py +0 -357
- agno/workflow/v2/workflow.py +0 -3312
- agno/workspace/__init__.py +0 -0
- agno/workspace/config.py +0 -325
- agno/workspace/enums.py +0 -6
- agno/workspace/helpers.py +0 -52
- agno/workspace/operator.py +0 -757
- agno/workspace/settings.py +0 -158
- agno-1.8.1.dist-info/METADATA +0 -982
- agno-1.8.1.dist-info/RECORD +0 -566
- agno-1.8.1.dist-info/entry_points.txt +0 -3
- /agno/{app → db/migrations}/__init__.py +0 -0
- /agno/{app/playground/__init__.py → db/schemas/metrics.py} +0 -0
- /agno/{cli → integrations}/__init__.py +0 -0
- /agno/{cli/ws → knowledge/chunking}/__init__.py +0 -0
- /agno/{document/chunking → knowledge/remote_content}/__init__.py +0 -0
- /agno/{document/reader/gcs → knowledge/reranker}/__init__.py +0 -0
- /agno/{document/reader/s3 → os/interfaces}/__init__.py +0 -0
- /agno/{app → os/interfaces}/slack/security.py +0 -0
- /agno/{app → os/interfaces}/whatsapp/security.py +0 -0
- /agno/{file/local → utils/print_response}/__init__.py +0 -0
- /agno/{infra → vectordb/llamaindex}/__init__.py +0 -0
- {agno-1.8.1.dist-info → agno-2.0.0a1.dist-info}/WHEEL +0 -0
- {agno-1.8.1.dist-info → agno-2.0.0a1.dist-info}/licenses/LICENSE +0 -0
- {agno-1.8.1.dist-info → agno-2.0.0a1.dist-info}/top_level.txt +0 -0
agno/vectordb/chroma/chromadb.py
CHANGED
|
@@ -1,21 +1,21 @@
|
|
|
1
1
|
import asyncio
|
|
2
2
|
from hashlib import md5
|
|
3
|
-
from typing import Any, Dict, List, Optional
|
|
3
|
+
from typing import Any, Dict, List, Mapping, Optional, Union, cast
|
|
4
4
|
|
|
5
5
|
try:
|
|
6
6
|
from chromadb import Client as ChromaDbClient
|
|
7
7
|
from chromadb import PersistentClient as PersistentChromaDbClient
|
|
8
8
|
from chromadb.api.client import ClientAPI
|
|
9
9
|
from chromadb.api.models.Collection import Collection
|
|
10
|
-
from chromadb.api.types import
|
|
10
|
+
from chromadb.api.types import QueryResult
|
|
11
11
|
|
|
12
12
|
except ImportError:
|
|
13
13
|
raise ImportError("The `chromadb` package is not installed. Please install it via `pip install chromadb`.")
|
|
14
14
|
|
|
15
|
-
from agno.document import Document
|
|
16
|
-
from agno.embedder import Embedder
|
|
17
|
-
from agno.reranker.base import Reranker
|
|
18
|
-
from agno.utils.log import log_debug, log_info, logger
|
|
15
|
+
from agno.knowledge.document import Document
|
|
16
|
+
from agno.knowledge.embedder import Embedder
|
|
17
|
+
from agno.knowledge.reranker.base import Reranker
|
|
18
|
+
from agno.utils.log import log_debug, log_error, log_info, logger
|
|
19
19
|
from agno.vectordb.base import VectorDb
|
|
20
20
|
from agno.vectordb.distance import Distance
|
|
21
21
|
|
|
@@ -36,7 +36,7 @@ class ChromaDb(VectorDb):
|
|
|
36
36
|
|
|
37
37
|
# Embedder for embedding the document contents
|
|
38
38
|
if embedder is None:
|
|
39
|
-
from agno.embedder.openai import OpenAIEmbedder
|
|
39
|
+
from agno.knowledge.embedder.openai import OpenAIEmbedder
|
|
40
40
|
|
|
41
41
|
embedder = OpenAIEmbedder()
|
|
42
42
|
log_info("Embedder not provided, using OpenAIEmbedder as default.")
|
|
@@ -91,60 +91,36 @@ class ChromaDb(VectorDb):
|
|
|
91
91
|
"""Create the collection asynchronously by running in a thread."""
|
|
92
92
|
await asyncio.to_thread(self.create)
|
|
93
93
|
|
|
94
|
-
def
|
|
95
|
-
"""Check if a document exists in the collection.
|
|
94
|
+
def name_exists(self, name: str) -> bool:
|
|
95
|
+
"""Check if a document with a given name exists in the collection.
|
|
96
96
|
Args:
|
|
97
|
-
|
|
97
|
+
name (str): Name of the document to check.
|
|
98
98
|
Returns:
|
|
99
|
-
bool: True if document exists, False otherwise.
|
|
100
|
-
"""
|
|
99
|
+
bool: True if document exists, False otherwise."""
|
|
101
100
|
if not self.client:
|
|
102
101
|
logger.warning("Client not initialized")
|
|
103
102
|
return False
|
|
104
103
|
|
|
105
104
|
try:
|
|
106
105
|
collection: Collection = self.client.get_collection(name=self.collection_name)
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
cleaned_content = document.content.replace("\x00", "\ufffd")
|
|
110
|
-
if cleaned_content in existing_documents: # type: ignore
|
|
111
|
-
return True
|
|
106
|
+
result = collection.get(where=cast(Any, {"name": {"$eq": name}}), limit=1)
|
|
107
|
+
return len(result.get("ids", [])) > 0
|
|
112
108
|
except Exception as e:
|
|
113
|
-
logger.error(f"
|
|
114
|
-
return False
|
|
115
|
-
|
|
116
|
-
async def async_doc_exists(self, document: Document) -> bool:
|
|
117
|
-
"""Check if a document exists asynchronously."""
|
|
118
|
-
return await asyncio.to_thread(self.doc_exists, document)
|
|
119
|
-
|
|
120
|
-
def name_exists(self, name: str) -> bool:
|
|
121
|
-
"""Check if a document with a given name exists in the collection.
|
|
122
|
-
Args:
|
|
123
|
-
name (str): Name of the document to check.
|
|
124
|
-
Returns:
|
|
125
|
-
bool: True if document exists, False otherwise."""
|
|
126
|
-
if self.client:
|
|
127
|
-
try:
|
|
128
|
-
collections: Collection = self.client.get_collection(name=self.collection_name)
|
|
129
|
-
for collection in collections: # type: ignore
|
|
130
|
-
if name in collection:
|
|
131
|
-
return True
|
|
132
|
-
except Exception as e:
|
|
133
|
-
logger.error(f"Document with given name does not exist: {e}")
|
|
109
|
+
logger.error(f"Error checking name existence: {e}")
|
|
134
110
|
return False
|
|
135
111
|
|
|
136
112
|
async def async_name_exists(self, name: str) -> bool:
|
|
137
113
|
"""Check if a document with given name exists asynchronously."""
|
|
138
114
|
return await asyncio.to_thread(self.name_exists, name)
|
|
139
115
|
|
|
140
|
-
def insert(self, documents: List[Document], filters: Optional[Dict[str, Any]] = None) -> None:
|
|
116
|
+
def insert(self, content_hash: str, documents: List[Document], filters: Optional[Dict[str, Any]] = None) -> None:
|
|
141
117
|
"""Insert documents into the collection.
|
|
142
118
|
|
|
143
119
|
Args:
|
|
144
120
|
documents (List[Document]): List of documents to insert
|
|
145
121
|
filters (Optional[Dict[str, Any]]): Filters to merge with document metadata
|
|
146
122
|
"""
|
|
147
|
-
|
|
123
|
+
log_info(f"Inserting {len(documents)} documents")
|
|
148
124
|
ids: List = []
|
|
149
125
|
docs: List = []
|
|
150
126
|
docs_embeddings: List = []
|
|
@@ -163,6 +139,14 @@ class ChromaDb(VectorDb):
|
|
|
163
139
|
if filters:
|
|
164
140
|
metadata.update(filters)
|
|
165
141
|
|
|
142
|
+
# Add name, content_id to metadata
|
|
143
|
+
if document.name is not None:
|
|
144
|
+
metadata["name"] = document.name
|
|
145
|
+
if document.content_id is not None:
|
|
146
|
+
metadata["content_id"] = document.content_id
|
|
147
|
+
|
|
148
|
+
metadata["content_hash"] = content_hash
|
|
149
|
+
|
|
166
150
|
docs_embeddings.append(document.embedding)
|
|
167
151
|
docs.append(cleaned_content)
|
|
168
152
|
ids.append(doc_id)
|
|
@@ -176,22 +160,82 @@ class ChromaDb(VectorDb):
|
|
|
176
160
|
self._collection.add(ids=ids, embeddings=docs_embeddings, documents=docs, metadatas=docs_metadata)
|
|
177
161
|
log_debug(f"Committed {len(docs)} documents")
|
|
178
162
|
|
|
179
|
-
async def async_insert(
|
|
163
|
+
async def async_insert(
|
|
164
|
+
self, content_hash: str, documents: List[Document], filters: Optional[Dict[str, Any]] = None
|
|
165
|
+
) -> None:
|
|
180
166
|
"""Insert documents asynchronously by running in a thread."""
|
|
181
|
-
|
|
167
|
+
log_info(f"Async Inserting {len(documents)} documents")
|
|
168
|
+
ids: List = []
|
|
169
|
+
docs: List = []
|
|
170
|
+
docs_embeddings: List = []
|
|
171
|
+
docs_metadata: List = []
|
|
172
|
+
|
|
173
|
+
if not self._collection:
|
|
174
|
+
self._collection = self.client.get_collection(name=self.collection_name)
|
|
175
|
+
|
|
176
|
+
try:
|
|
177
|
+
embed_tasks = [document.async_embed(embedder=self.embedder) for document in documents]
|
|
178
|
+
await asyncio.gather(*embed_tasks, return_exceptions=True)
|
|
179
|
+
except Exception as e:
|
|
180
|
+
log_error(f"Error processing document: {e}")
|
|
181
|
+
|
|
182
|
+
for document in documents:
|
|
183
|
+
cleaned_content = document.content.replace("\x00", "\ufffd")
|
|
184
|
+
doc_id = md5(cleaned_content.encode()).hexdigest()
|
|
185
|
+
|
|
186
|
+
# Handle metadata and filters
|
|
187
|
+
metadata = document.meta_data or {}
|
|
188
|
+
if filters:
|
|
189
|
+
metadata.update(filters)
|
|
190
|
+
|
|
191
|
+
# Add name, content_id to metadata
|
|
192
|
+
if document.name is not None:
|
|
193
|
+
metadata["name"] = document.name
|
|
194
|
+
if document.content_id is not None:
|
|
195
|
+
metadata["content_id"] = document.content_id
|
|
196
|
+
|
|
197
|
+
metadata["content_hash"] = content_hash
|
|
198
|
+
|
|
199
|
+
docs_embeddings.append(document.embedding)
|
|
200
|
+
docs.append(cleaned_content)
|
|
201
|
+
ids.append(doc_id)
|
|
202
|
+
docs_metadata.append(metadata)
|
|
203
|
+
log_debug(f"Prepared document: {document.id} | {document.name} | {metadata}")
|
|
204
|
+
|
|
205
|
+
if self._collection is None:
|
|
206
|
+
logger.warning("Collection does not exist")
|
|
207
|
+
else:
|
|
208
|
+
if len(docs) > 0:
|
|
209
|
+
self._collection.add(ids=ids, embeddings=docs_embeddings, documents=docs, metadatas=docs_metadata)
|
|
210
|
+
log_debug(f"Committed {len(docs)} documents")
|
|
182
211
|
|
|
183
212
|
def upsert_available(self) -> bool:
|
|
184
213
|
"""Check if upsert is available in ChromaDB."""
|
|
185
214
|
return True
|
|
186
215
|
|
|
187
|
-
def upsert(self, documents: List[Document], filters: Optional[Dict[str, Any]] = None) -> None:
|
|
216
|
+
def upsert(self, content_hash: str, documents: List[Document], filters: Optional[Dict[str, Any]] = None) -> None:
|
|
188
217
|
"""Upsert documents into the collection.
|
|
189
218
|
|
|
190
219
|
Args:
|
|
191
220
|
documents (List[Document]): List of documents to upsert
|
|
192
221
|
filters (Optional[Dict[str, Any]]): Filters to apply while upserting
|
|
193
222
|
"""
|
|
194
|
-
|
|
223
|
+
try:
|
|
224
|
+
if self.content_hash_exists(content_hash):
|
|
225
|
+
self._delete_by_content_hash(content_hash)
|
|
226
|
+
self._upsert(content_hash, documents, filters)
|
|
227
|
+
except Exception as e:
|
|
228
|
+
logger.error(f"Error upserting documents by content hash: {e}")
|
|
229
|
+
raise
|
|
230
|
+
|
|
231
|
+
def _upsert(self, content_hash: str, documents: List[Document], filters: Optional[Dict[str, Any]] = None) -> None:
|
|
232
|
+
"""Upsert documents into the collection.
|
|
233
|
+
|
|
234
|
+
Args:
|
|
235
|
+
documents (List[Document]): List of documents to upsert
|
|
236
|
+
filters (Optional[Dict[str, Any]]): Filters to apply while upserting
|
|
237
|
+
"""
|
|
238
|
+
log_info(f"Upserting {len(documents)} documents")
|
|
195
239
|
ids: List = []
|
|
196
240
|
docs: List = []
|
|
197
241
|
docs_embeddings: List = []
|
|
@@ -204,11 +248,25 @@ class ChromaDb(VectorDb):
|
|
|
204
248
|
document.embed(embedder=self.embedder)
|
|
205
249
|
cleaned_content = document.content.replace("\x00", "\ufffd")
|
|
206
250
|
doc_id = md5(cleaned_content.encode()).hexdigest()
|
|
251
|
+
|
|
252
|
+
# Handle metadata and filters
|
|
253
|
+
metadata = document.meta_data or {}
|
|
254
|
+
if filters:
|
|
255
|
+
metadata.update(filters)
|
|
256
|
+
|
|
257
|
+
# Add name, content_id to metadata
|
|
258
|
+
if document.name is not None:
|
|
259
|
+
metadata["name"] = document.name
|
|
260
|
+
if document.content_id is not None:
|
|
261
|
+
metadata["content_id"] = document.content_id
|
|
262
|
+
|
|
263
|
+
metadata["content_hash"] = content_hash
|
|
264
|
+
|
|
207
265
|
docs_embeddings.append(document.embedding)
|
|
208
266
|
docs.append(cleaned_content)
|
|
209
267
|
ids.append(doc_id)
|
|
210
|
-
docs_metadata.append(
|
|
211
|
-
log_debug(f"Upserted document: {document.id} | {document.name} | {
|
|
268
|
+
docs_metadata.append(metadata)
|
|
269
|
+
log_debug(f"Upserted document: {document.id} | {document.name} | {metadata}")
|
|
212
270
|
|
|
213
271
|
if self._collection is None:
|
|
214
272
|
logger.warning("Collection does not exist")
|
|
@@ -217,9 +275,68 @@ class ChromaDb(VectorDb):
|
|
|
217
275
|
self._collection.upsert(ids=ids, embeddings=docs_embeddings, documents=docs, metadatas=docs_metadata)
|
|
218
276
|
log_debug(f"Committed {len(docs)} documents")
|
|
219
277
|
|
|
220
|
-
async def
|
|
278
|
+
async def _async_upsert(
|
|
279
|
+
self, content_hash: str, documents: List[Document], filters: Optional[Dict[str, Any]] = None
|
|
280
|
+
) -> None:
|
|
281
|
+
"""Upsert documents into the collection.
|
|
282
|
+
|
|
283
|
+
Args:
|
|
284
|
+
documents (List[Document]): List of documents to upsert
|
|
285
|
+
filters (Optional[Dict[str, Any]]): Filters to apply while upserting
|
|
286
|
+
"""
|
|
287
|
+
log_info(f"Async Upserting {len(documents)} documents")
|
|
288
|
+
ids: List = []
|
|
289
|
+
docs: List = []
|
|
290
|
+
docs_embeddings: List = []
|
|
291
|
+
docs_metadata: List = []
|
|
292
|
+
|
|
293
|
+
if not self._collection:
|
|
294
|
+
self._collection = self.client.get_collection(name=self.collection_name)
|
|
295
|
+
|
|
296
|
+
embed_tasks = [document.async_embed(embedder=self.embedder) for document in documents]
|
|
297
|
+
await asyncio.gather(*embed_tasks, return_exceptions=True)
|
|
298
|
+
|
|
299
|
+
for document in documents:
|
|
300
|
+
cleaned_content = document.content.replace("\x00", "\ufffd")
|
|
301
|
+
doc_id = md5(cleaned_content.encode()).hexdigest()
|
|
302
|
+
|
|
303
|
+
# Handle metadata and filters
|
|
304
|
+
metadata = document.meta_data or {}
|
|
305
|
+
if filters:
|
|
306
|
+
metadata.update(filters)
|
|
307
|
+
|
|
308
|
+
# Add name, content_id to metadata
|
|
309
|
+
if document.name is not None:
|
|
310
|
+
metadata["name"] = document.name
|
|
311
|
+
if document.content_id is not None:
|
|
312
|
+
metadata["content_id"] = document.content_id
|
|
313
|
+
|
|
314
|
+
metadata["content_hash"] = content_hash
|
|
315
|
+
|
|
316
|
+
docs_embeddings.append(document.embedding)
|
|
317
|
+
docs.append(cleaned_content)
|
|
318
|
+
ids.append(doc_id)
|
|
319
|
+
docs_metadata.append(metadata)
|
|
320
|
+
log_debug(f"Upserted document: {document.id} | {document.name} | {metadata}")
|
|
321
|
+
|
|
322
|
+
if self._collection is None:
|
|
323
|
+
logger.warning("Collection does not exist")
|
|
324
|
+
else:
|
|
325
|
+
if len(docs) > 0:
|
|
326
|
+
self._collection.upsert(ids=ids, embeddings=docs_embeddings, documents=docs, metadatas=docs_metadata)
|
|
327
|
+
log_debug(f"Committed {len(docs)} documents")
|
|
328
|
+
|
|
329
|
+
async def async_upsert(
|
|
330
|
+
self, content_hash: str, documents: List[Document], filters: Optional[Dict[str, Any]] = None
|
|
331
|
+
) -> None:
|
|
221
332
|
"""Upsert documents asynchronously by running in a thread."""
|
|
222
|
-
|
|
333
|
+
try:
|
|
334
|
+
if self.content_hash_exists(content_hash):
|
|
335
|
+
self._delete_by_content_hash(content_hash)
|
|
336
|
+
await self._async_upsert(content_hash, documents, filters)
|
|
337
|
+
except Exception as e:
|
|
338
|
+
logger.error(f"Error upserting documents by content hash: {e}")
|
|
339
|
+
raise
|
|
223
340
|
|
|
224
341
|
def search(self, query: str, limit: int = 5, filters: Optional[Dict[str, Any]] = None) -> List[Document]:
|
|
225
342
|
"""Search the collection for a query.
|
|
@@ -257,24 +374,62 @@ class ChromaDb(VectorDb):
|
|
|
257
374
|
# Build search results
|
|
258
375
|
search_results: List[Document] = []
|
|
259
376
|
|
|
260
|
-
|
|
261
|
-
|
|
262
|
-
|
|
263
|
-
|
|
264
|
-
|
|
265
|
-
|
|
377
|
+
ids_list = result.get("ids", [[]])
|
|
378
|
+
metadata_list = result.get("metadatas", [[{}]])
|
|
379
|
+
documents_list = result.get("documents", [[]])
|
|
380
|
+
embeddings_list = result.get("embeddings")
|
|
381
|
+
distances_list = result.get("distances", [[]])
|
|
382
|
+
|
|
383
|
+
if not ids_list or not metadata_list or not documents_list or embeddings_list is None or not distances_list:
|
|
384
|
+
return search_results
|
|
385
|
+
|
|
386
|
+
ids = ids_list[0]
|
|
387
|
+
metadata = [dict(m) if m else {} for m in metadata_list[0]] # Convert to mutable dicts
|
|
388
|
+
documents = documents_list[0]
|
|
389
|
+
embeddings_raw = embeddings_list[0] if embeddings_list else []
|
|
390
|
+
embeddings = []
|
|
391
|
+
for e in embeddings_raw:
|
|
392
|
+
if hasattr(e, "tolist") and callable(getattr(e, "tolist", None)):
|
|
393
|
+
try:
|
|
394
|
+
embeddings.append(list(cast(Any, e).tolist()))
|
|
395
|
+
except (AttributeError, TypeError):
|
|
396
|
+
embeddings.append(list(e) if isinstance(e, (list, tuple)) else [])
|
|
397
|
+
elif isinstance(e, (list, tuple)):
|
|
398
|
+
embeddings.append([float(x) for x in e if isinstance(x, (int, float))])
|
|
399
|
+
elif isinstance(e, (int, float)):
|
|
400
|
+
embeddings.append([float(e)])
|
|
401
|
+
else:
|
|
402
|
+
embeddings.append([])
|
|
403
|
+
distances = distances_list[0]
|
|
266
404
|
|
|
267
405
|
for idx, distance in enumerate(distances):
|
|
268
|
-
|
|
406
|
+
if idx < len(metadata):
|
|
407
|
+
metadata[idx]["distances"] = distance
|
|
269
408
|
|
|
270
409
|
try:
|
|
271
|
-
for idx, (id_,
|
|
410
|
+
for idx, (id_, doc_metadata, document) in enumerate(zip(ids, metadata, documents)):
|
|
411
|
+
# Extract the fields we added to metadata
|
|
412
|
+
name_val = doc_metadata.pop("name", None)
|
|
413
|
+
content_id_val = doc_metadata.pop("content_id", None)
|
|
414
|
+
|
|
415
|
+
# Convert types to match Document constructor expectations
|
|
416
|
+
name = str(name_val) if name_val is not None and not isinstance(name_val, str) else name_val
|
|
417
|
+
content_id = (
|
|
418
|
+
str(content_id_val)
|
|
419
|
+
if content_id_val is not None and not isinstance(content_id_val, str)
|
|
420
|
+
else content_id_val
|
|
421
|
+
)
|
|
422
|
+
content = str(document) if document is not None else ""
|
|
423
|
+
embedding = embeddings[idx] if idx < len(embeddings) else None
|
|
424
|
+
|
|
272
425
|
search_results.append(
|
|
273
426
|
Document(
|
|
274
427
|
id=id_,
|
|
275
|
-
|
|
276
|
-
|
|
277
|
-
|
|
428
|
+
name=name,
|
|
429
|
+
meta_data=doc_metadata,
|
|
430
|
+
content=content,
|
|
431
|
+
embedding=embedding,
|
|
432
|
+
content_id=content_id,
|
|
278
433
|
)
|
|
279
434
|
)
|
|
280
435
|
except Exception as e:
|
|
@@ -360,3 +515,271 @@ class ChromaDb(VectorDb):
|
|
|
360
515
|
except Exception as e:
|
|
361
516
|
logger.error(f"Error clearing collection: {e}")
|
|
362
517
|
return False
|
|
518
|
+
|
|
519
|
+
def delete_by_id(self, id: str) -> bool:
|
|
520
|
+
"""Delete document by ID."""
|
|
521
|
+
if not self.client:
|
|
522
|
+
logger.error("Client not initialized")
|
|
523
|
+
return False
|
|
524
|
+
|
|
525
|
+
try:
|
|
526
|
+
collection: Collection = self.client.get_collection(name=self.collection_name)
|
|
527
|
+
|
|
528
|
+
# Check if document exists
|
|
529
|
+
if not self.id_exists(id):
|
|
530
|
+
log_info(f"Document with ID '{id}' not found")
|
|
531
|
+
return False
|
|
532
|
+
|
|
533
|
+
# Delete the document
|
|
534
|
+
collection.delete(ids=[id])
|
|
535
|
+
log_info(f"Deleted document with ID '{id}'")
|
|
536
|
+
return True
|
|
537
|
+
except Exception as e:
|
|
538
|
+
logger.error(f"Error deleting document by ID '{id}': {e}")
|
|
539
|
+
return False
|
|
540
|
+
|
|
541
|
+
def delete_by_name(self, name: str) -> bool:
|
|
542
|
+
"""Delete documents by name."""
|
|
543
|
+
if not self.client:
|
|
544
|
+
logger.error("Client not initialized")
|
|
545
|
+
return False
|
|
546
|
+
|
|
547
|
+
try:
|
|
548
|
+
collection: Collection = self.client.get_collection(name=self.collection_name)
|
|
549
|
+
|
|
550
|
+
# Find all documents with the given name
|
|
551
|
+
result = collection.get(where=cast(Any, {"name": {"$eq": name}}))
|
|
552
|
+
ids_to_delete = result.get("ids", [])
|
|
553
|
+
|
|
554
|
+
if not ids_to_delete:
|
|
555
|
+
log_info(f"No documents found with name '{name}'")
|
|
556
|
+
return False
|
|
557
|
+
|
|
558
|
+
# Delete all matching documents
|
|
559
|
+
collection.delete(ids=ids_to_delete)
|
|
560
|
+
log_info(f"Deleted {len(ids_to_delete)} documents with name '{name}'")
|
|
561
|
+
return True
|
|
562
|
+
except Exception as e:
|
|
563
|
+
logger.error(f"Error deleting documents by name '{name}': {e}")
|
|
564
|
+
return False
|
|
565
|
+
|
|
566
|
+
def delete_by_metadata(self, metadata: Dict[str, Any]) -> bool:
|
|
567
|
+
"""Delete documents by metadata."""
|
|
568
|
+
if not self.client:
|
|
569
|
+
logger.error("Client not initialized")
|
|
570
|
+
return False
|
|
571
|
+
|
|
572
|
+
try:
|
|
573
|
+
collection: Collection = self.client.get_collection(name=self.collection_name)
|
|
574
|
+
|
|
575
|
+
# Build where clause for metadata filtering
|
|
576
|
+
where_clause = {}
|
|
577
|
+
for key, value in metadata.items():
|
|
578
|
+
where_clause[key] = {"$eq": value}
|
|
579
|
+
|
|
580
|
+
# Find all documents with the matching metadata
|
|
581
|
+
result = collection.get(where=cast(Any, where_clause))
|
|
582
|
+
ids_to_delete = result.get("ids", [])
|
|
583
|
+
|
|
584
|
+
if not ids_to_delete:
|
|
585
|
+
log_info(f"No documents found with metadata '{metadata}'")
|
|
586
|
+
return False
|
|
587
|
+
|
|
588
|
+
# Delete all matching documents
|
|
589
|
+
collection.delete(ids=ids_to_delete)
|
|
590
|
+
log_info(f"Deleted {len(ids_to_delete)} documents with metadata '{metadata}'")
|
|
591
|
+
return True
|
|
592
|
+
except Exception as e:
|
|
593
|
+
logger.error(f"Error deleting documents by metadata '{metadata}': {e}")
|
|
594
|
+
return False
|
|
595
|
+
|
|
596
|
+
def delete_by_content_id(self, content_id: str) -> bool:
|
|
597
|
+
"""Delete documents by content ID."""
|
|
598
|
+
if not self.client:
|
|
599
|
+
logger.error("Client not initialized")
|
|
600
|
+
return False
|
|
601
|
+
|
|
602
|
+
try:
|
|
603
|
+
collection: Collection = self.client.get_collection(name=self.collection_name)
|
|
604
|
+
|
|
605
|
+
# Find all documents with the given content_id
|
|
606
|
+
result = collection.get(where=cast(Any, {"content_id": {"$eq": content_id}}))
|
|
607
|
+
ids_to_delete = result.get("ids", [])
|
|
608
|
+
|
|
609
|
+
if not ids_to_delete:
|
|
610
|
+
log_info(f"No documents found with content_id '{content_id}'")
|
|
611
|
+
return False
|
|
612
|
+
|
|
613
|
+
# Delete all matching documents
|
|
614
|
+
collection.delete(ids=ids_to_delete)
|
|
615
|
+
log_info(f"Deleted {len(ids_to_delete)} documents with content_id '{content_id}'")
|
|
616
|
+
return True
|
|
617
|
+
except Exception as e:
|
|
618
|
+
logger.error(f"Error deleting documents by content_id '{content_id}': {e}")
|
|
619
|
+
return False
|
|
620
|
+
|
|
621
|
+
def _delete_by_content_hash(self, content_hash: str) -> bool:
|
|
622
|
+
"""Delete documents by content hash."""
|
|
623
|
+
if not self.client:
|
|
624
|
+
logger.error("Client not initialized")
|
|
625
|
+
return False
|
|
626
|
+
|
|
627
|
+
try:
|
|
628
|
+
collection: Collection = self.client.get_collection(name=self.collection_name)
|
|
629
|
+
|
|
630
|
+
# Find all documents with the given content_hash
|
|
631
|
+
result = collection.get(where=cast(Any, {"content_hash": {"$eq": content_hash}}))
|
|
632
|
+
ids_to_delete = result.get("ids", [])
|
|
633
|
+
|
|
634
|
+
if not ids_to_delete:
|
|
635
|
+
log_info(f"No documents found with content_hash '{content_hash}'")
|
|
636
|
+
return False
|
|
637
|
+
|
|
638
|
+
# Delete all matching documents
|
|
639
|
+
collection.delete(ids=ids_to_delete)
|
|
640
|
+
log_info(f"Deleted {len(ids_to_delete)} documents with content_hash '{content_hash}'")
|
|
641
|
+
return True
|
|
642
|
+
except Exception as e:
|
|
643
|
+
logger.error(f"Error deleting documents by content_hash '{content_hash}': {e}")
|
|
644
|
+
return False
|
|
645
|
+
|
|
646
|
+
def id_exists(self, id: str) -> bool:
|
|
647
|
+
"""Check if a document with the given ID exists in the collection.
|
|
648
|
+
|
|
649
|
+
Args:
|
|
650
|
+
id (str): The document ID to check.
|
|
651
|
+
|
|
652
|
+
Returns:
|
|
653
|
+
bool: True if the document exists, False otherwise.
|
|
654
|
+
"""
|
|
655
|
+
if not self.client:
|
|
656
|
+
logger.error("Client not initialized")
|
|
657
|
+
return False
|
|
658
|
+
|
|
659
|
+
try:
|
|
660
|
+
collection: Collection = self.client.get_collection(name=self.collection_name)
|
|
661
|
+
print("COLLECTION_----------", collection)
|
|
662
|
+
# Try to get the document by ID
|
|
663
|
+
result = collection.get(ids=[id])
|
|
664
|
+
found_ids = result.get("ids", [])
|
|
665
|
+
|
|
666
|
+
# Return True if the document was found
|
|
667
|
+
return len(found_ids) > 0
|
|
668
|
+
except Exception as e:
|
|
669
|
+
logger.error(f"Error checking if ID '{id}' exists: {e}")
|
|
670
|
+
return False
|
|
671
|
+
|
|
672
|
+
def content_hash_exists(self, content_hash: str) -> bool:
|
|
673
|
+
"""Check if documents with the given content hash exist."""
|
|
674
|
+
if not self.client:
|
|
675
|
+
logger.error("Client not initialized")
|
|
676
|
+
return False
|
|
677
|
+
|
|
678
|
+
try:
|
|
679
|
+
collection: Collection = self.client.get_collection(name=self.collection_name)
|
|
680
|
+
|
|
681
|
+
# Try to query for documents with the given content_hash
|
|
682
|
+
try:
|
|
683
|
+
result = collection.get(where=cast(Any, {"content_hash": {"$eq": content_hash}}))
|
|
684
|
+
# Safely extract ids from result
|
|
685
|
+
if hasattr(result, "get") and callable(result.get):
|
|
686
|
+
found_ids = result.get("ids", [])
|
|
687
|
+
elif hasattr(result, "__getitem__") and "ids" in result:
|
|
688
|
+
found_ids = result["ids"]
|
|
689
|
+
else:
|
|
690
|
+
found_ids = []
|
|
691
|
+
|
|
692
|
+
# Return True if any documents were found
|
|
693
|
+
if isinstance(found_ids, (list, tuple)):
|
|
694
|
+
return len(found_ids) > 0
|
|
695
|
+
elif isinstance(found_ids, int):
|
|
696
|
+
# Some ChromaDB versions might return a count instead of a list
|
|
697
|
+
return found_ids > 0
|
|
698
|
+
else:
|
|
699
|
+
return False
|
|
700
|
+
|
|
701
|
+
except TypeError as te:
|
|
702
|
+
if "object of type 'int' has no len()" in str(te):
|
|
703
|
+
# Known issue with ChromaDB 0.5.0 - internal bug
|
|
704
|
+
# As a workaround, assume content doesn't exist to allow processing to continue
|
|
705
|
+
logger.warning(
|
|
706
|
+
f"ChromaDB internal error (version 0.5.0 bug): {te}. Assuming content_hash '{content_hash}' does not exist."
|
|
707
|
+
)
|
|
708
|
+
return False
|
|
709
|
+
else:
|
|
710
|
+
raise te
|
|
711
|
+
|
|
712
|
+
except Exception as e:
|
|
713
|
+
logger.error(f"Error checking if content_hash '{content_hash}' exists: {e}")
|
|
714
|
+
return False
|
|
715
|
+
|
|
716
|
+
def update_metadata(self, content_id: str, metadata: Dict[str, Any]) -> None:
|
|
717
|
+
"""
|
|
718
|
+
Update the metadata for documents with the given content_id.
|
|
719
|
+
|
|
720
|
+
Args:
|
|
721
|
+
content_id (str): The content ID to update
|
|
722
|
+
metadata (Dict[str, Any]): The metadata to update
|
|
723
|
+
"""
|
|
724
|
+
try:
|
|
725
|
+
if not self.client:
|
|
726
|
+
logger.error("Client not initialized")
|
|
727
|
+
return
|
|
728
|
+
|
|
729
|
+
collection: Collection = self.client.get_collection(name=self.collection_name)
|
|
730
|
+
|
|
731
|
+
# Find documents with the given content_id
|
|
732
|
+
try:
|
|
733
|
+
result = collection.get(where=cast(Any, {"content_id": {"$eq": content_id}}))
|
|
734
|
+
|
|
735
|
+
# Extract IDs and current metadata
|
|
736
|
+
if hasattr(result, "get") and callable(result.get):
|
|
737
|
+
ids = result.get("ids", [])
|
|
738
|
+
current_metadatas = result.get("metadatas", [])
|
|
739
|
+
elif hasattr(result, "__getitem__"):
|
|
740
|
+
ids = result.get("ids", []) if "ids" in result else []
|
|
741
|
+
current_metadatas = result.get("metadatas", []) if "metadatas" in result else []
|
|
742
|
+
else:
|
|
743
|
+
ids = []
|
|
744
|
+
current_metadatas = []
|
|
745
|
+
|
|
746
|
+
if not ids:
|
|
747
|
+
logger.debug(f"No documents found with content_id: {content_id}")
|
|
748
|
+
return
|
|
749
|
+
|
|
750
|
+
# Merge metadata for each document
|
|
751
|
+
updated_metadatas = []
|
|
752
|
+
for i, current_meta in enumerate(current_metadatas or []):
|
|
753
|
+
if current_meta is None:
|
|
754
|
+
meta_dict: Dict[str, Any] = {}
|
|
755
|
+
else:
|
|
756
|
+
meta_dict = dict(current_meta) # Convert Mapping to dict
|
|
757
|
+
updated_meta: Dict[str, Any] = meta_dict.copy()
|
|
758
|
+
updated_meta.update(metadata)
|
|
759
|
+
|
|
760
|
+
if "filters" not in updated_meta:
|
|
761
|
+
updated_meta["filters"] = {}
|
|
762
|
+
if isinstance(updated_meta["filters"], dict):
|
|
763
|
+
updated_meta["filters"].update(metadata)
|
|
764
|
+
else:
|
|
765
|
+
updated_meta["filters"] = metadata
|
|
766
|
+
updated_metadatas.append(updated_meta)
|
|
767
|
+
|
|
768
|
+
# Update the documents
|
|
769
|
+
# Convert to the expected type for ChromaDB
|
|
770
|
+
chroma_metadatas = cast(List[Mapping[str, Union[str, int, float, bool, None]]], updated_metadatas)
|
|
771
|
+
collection.update(ids=ids, metadatas=chroma_metadatas)
|
|
772
|
+
logger.debug(f"Updated metadata for {len(ids)} documents with content_id: {content_id}")
|
|
773
|
+
|
|
774
|
+
except TypeError as te:
|
|
775
|
+
if "object of type 'int' has no len()" in str(te):
|
|
776
|
+
logger.warning(
|
|
777
|
+
f"ChromaDB internal error (version 0.5.0 bug): {te}. Cannot update metadata for content_id '{content_id}'."
|
|
778
|
+
)
|
|
779
|
+
return
|
|
780
|
+
else:
|
|
781
|
+
raise te
|
|
782
|
+
|
|
783
|
+
except Exception as e:
|
|
784
|
+
logger.error(f"Error updating metadata for content_id '{content_id}': {e}")
|
|
785
|
+
raise
|