agno 1.8.1__py3-none-any.whl → 2.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- agno/__init__.py +8 -0
- agno/agent/__init__.py +19 -27
- agno/agent/agent.py +3143 -4170
- agno/api/agent.py +11 -67
- agno/api/api.py +5 -46
- agno/api/evals.py +8 -19
- agno/api/os.py +17 -0
- agno/api/routes.py +6 -41
- agno/api/schemas/__init__.py +9 -0
- agno/api/schemas/agent.py +5 -21
- agno/api/schemas/evals.py +7 -16
- agno/api/schemas/os.py +14 -0
- agno/api/schemas/team.py +5 -21
- agno/api/schemas/utils.py +21 -0
- agno/api/schemas/workflows.py +11 -7
- agno/api/settings.py +53 -0
- agno/api/team.py +11 -66
- agno/api/workflow.py +28 -0
- agno/cloud/aws/base.py +214 -0
- agno/cloud/aws/s3/__init__.py +2 -0
- agno/cloud/aws/s3/api_client.py +43 -0
- agno/cloud/aws/s3/bucket.py +195 -0
- agno/cloud/aws/s3/object.py +57 -0
- agno/db/__init__.py +24 -0
- agno/db/base.py +245 -0
- agno/db/dynamo/__init__.py +3 -0
- agno/db/dynamo/dynamo.py +1743 -0
- agno/db/dynamo/schemas.py +278 -0
- agno/db/dynamo/utils.py +684 -0
- agno/db/firestore/__init__.py +3 -0
- agno/db/firestore/firestore.py +1432 -0
- agno/db/firestore/schemas.py +130 -0
- agno/db/firestore/utils.py +278 -0
- agno/db/gcs_json/__init__.py +3 -0
- agno/db/gcs_json/gcs_json_db.py +1001 -0
- agno/db/gcs_json/utils.py +194 -0
- agno/db/in_memory/__init__.py +3 -0
- agno/db/in_memory/in_memory_db.py +882 -0
- agno/db/in_memory/utils.py +172 -0
- agno/db/json/__init__.py +3 -0
- agno/db/json/json_db.py +1045 -0
- agno/db/json/utils.py +196 -0
- agno/db/migrations/v1_to_v2.py +162 -0
- agno/db/mongo/__init__.py +3 -0
- agno/db/mongo/mongo.py +1416 -0
- agno/db/mongo/schemas.py +77 -0
- agno/db/mongo/utils.py +204 -0
- agno/db/mysql/__init__.py +3 -0
- agno/db/mysql/mysql.py +1719 -0
- agno/db/mysql/schemas.py +124 -0
- agno/db/mysql/utils.py +297 -0
- agno/db/postgres/__init__.py +3 -0
- agno/db/postgres/postgres.py +1710 -0
- agno/db/postgres/schemas.py +124 -0
- agno/db/postgres/utils.py +280 -0
- agno/db/redis/__init__.py +3 -0
- agno/db/redis/redis.py +1367 -0
- agno/db/redis/schemas.py +109 -0
- agno/db/redis/utils.py +288 -0
- agno/db/schemas/__init__.py +3 -0
- agno/db/schemas/evals.py +33 -0
- agno/db/schemas/knowledge.py +40 -0
- agno/db/schemas/memory.py +46 -0
- agno/db/singlestore/__init__.py +3 -0
- agno/db/singlestore/schemas.py +116 -0
- agno/db/singlestore/singlestore.py +1712 -0
- agno/db/singlestore/utils.py +326 -0
- agno/db/sqlite/__init__.py +3 -0
- agno/db/sqlite/schemas.py +119 -0
- agno/db/sqlite/sqlite.py +1676 -0
- agno/db/sqlite/utils.py +268 -0
- agno/db/utils.py +88 -0
- agno/eval/__init__.py +14 -0
- agno/eval/accuracy.py +154 -48
- agno/eval/performance.py +88 -23
- agno/eval/reliability.py +73 -20
- agno/eval/utils.py +23 -13
- agno/integrations/discord/__init__.py +3 -0
- agno/{app → integrations}/discord/client.py +15 -11
- agno/knowledge/__init__.py +2 -2
- agno/{document → knowledge}/chunking/agentic.py +2 -2
- agno/{document → knowledge}/chunking/document.py +2 -2
- agno/{document → knowledge}/chunking/fixed.py +3 -3
- agno/{document → knowledge}/chunking/markdown.py +2 -2
- agno/{document → knowledge}/chunking/recursive.py +2 -2
- agno/{document → knowledge}/chunking/row.py +2 -2
- agno/knowledge/chunking/semantic.py +59 -0
- agno/knowledge/chunking/strategy.py +121 -0
- agno/knowledge/content.py +74 -0
- agno/knowledge/document/__init__.py +5 -0
- agno/{document → knowledge/document}/base.py +12 -2
- agno/knowledge/embedder/__init__.py +5 -0
- agno/{embedder → knowledge/embedder}/aws_bedrock.py +127 -1
- agno/{embedder → knowledge/embedder}/azure_openai.py +65 -1
- agno/{embedder → knowledge/embedder}/base.py +6 -0
- agno/{embedder → knowledge/embedder}/cohere.py +72 -1
- agno/{embedder → knowledge/embedder}/fastembed.py +17 -1
- agno/{embedder → knowledge/embedder}/fireworks.py +1 -1
- agno/{embedder → knowledge/embedder}/google.py +74 -1
- agno/{embedder → knowledge/embedder}/huggingface.py +36 -2
- agno/{embedder → knowledge/embedder}/jina.py +48 -2
- agno/knowledge/embedder/langdb.py +22 -0
- agno/knowledge/embedder/mistral.py +139 -0
- agno/{embedder → knowledge/embedder}/nebius.py +1 -1
- agno/{embedder → knowledge/embedder}/ollama.py +54 -3
- agno/knowledge/embedder/openai.py +223 -0
- agno/{embedder → knowledge/embedder}/sentence_transformer.py +16 -1
- agno/{embedder → knowledge/embedder}/together.py +1 -1
- agno/{embedder → knowledge/embedder}/voyageai.py +49 -1
- agno/knowledge/knowledge.py +1551 -0
- agno/knowledge/reader/__init__.py +7 -0
- agno/{document → knowledge}/reader/arxiv_reader.py +32 -4
- agno/knowledge/reader/base.py +88 -0
- agno/{document → knowledge}/reader/csv_reader.py +47 -65
- agno/knowledge/reader/docx_reader.py +83 -0
- agno/{document → knowledge}/reader/firecrawl_reader.py +42 -21
- agno/{document → knowledge}/reader/json_reader.py +30 -9
- agno/{document → knowledge}/reader/markdown_reader.py +58 -9
- agno/{document → knowledge}/reader/pdf_reader.py +71 -126
- agno/knowledge/reader/reader_factory.py +268 -0
- agno/knowledge/reader/s3_reader.py +101 -0
- agno/{document → knowledge}/reader/text_reader.py +31 -10
- agno/knowledge/reader/url_reader.py +128 -0
- agno/knowledge/reader/web_search_reader.py +366 -0
- agno/{document → knowledge}/reader/website_reader.py +37 -10
- agno/knowledge/reader/wikipedia_reader.py +59 -0
- agno/knowledge/reader/youtube_reader.py +78 -0
- agno/knowledge/remote_content/remote_content.py +88 -0
- agno/{reranker → knowledge/reranker}/base.py +1 -1
- agno/{reranker → knowledge/reranker}/cohere.py +2 -2
- agno/{reranker → knowledge/reranker}/infinity.py +2 -2
- agno/{reranker → knowledge/reranker}/sentence_transformer.py +2 -2
- agno/knowledge/types.py +30 -0
- agno/knowledge/utils.py +169 -0
- agno/media.py +269 -268
- agno/memory/__init__.py +2 -10
- agno/memory/manager.py +1003 -148
- agno/models/aimlapi/__init__.py +2 -2
- agno/models/aimlapi/aimlapi.py +6 -6
- agno/models/anthropic/claude.py +131 -131
- agno/models/aws/bedrock.py +110 -182
- agno/models/aws/claude.py +64 -18
- agno/models/azure/ai_foundry.py +73 -23
- agno/models/base.py +346 -290
- agno/models/cerebras/cerebras.py +84 -27
- agno/models/cohere/chat.py +106 -98
- agno/models/google/gemini.py +105 -46
- agno/models/groq/groq.py +97 -35
- agno/models/huggingface/huggingface.py +92 -27
- agno/models/ibm/watsonx.py +72 -13
- agno/models/litellm/chat.py +85 -13
- agno/models/message.py +46 -151
- agno/models/meta/llama.py +85 -49
- agno/models/metrics.py +120 -0
- agno/models/mistral/mistral.py +90 -21
- agno/models/ollama/__init__.py +0 -2
- agno/models/ollama/chat.py +85 -47
- agno/models/openai/chat.py +154 -37
- agno/models/openai/responses.py +178 -105
- agno/models/perplexity/perplexity.py +26 -2
- agno/models/portkey/portkey.py +0 -7
- agno/models/response.py +15 -9
- agno/models/utils.py +20 -0
- agno/models/vercel/__init__.py +2 -2
- agno/models/vercel/v0.py +1 -1
- agno/models/vllm/__init__.py +2 -2
- agno/models/vllm/vllm.py +3 -3
- agno/models/xai/xai.py +10 -10
- agno/os/__init__.py +3 -0
- agno/os/app.py +497 -0
- agno/os/auth.py +47 -0
- agno/os/config.py +103 -0
- agno/os/interfaces/agui/__init__.py +3 -0
- agno/os/interfaces/agui/agui.py +31 -0
- agno/{app/agui/async_router.py → os/interfaces/agui/router.py} +16 -16
- agno/{app → os/interfaces}/agui/utils.py +77 -33
- agno/os/interfaces/base.py +21 -0
- agno/os/interfaces/slack/__init__.py +3 -0
- agno/{app/slack/async_router.py → os/interfaces/slack/router.py} +3 -5
- agno/os/interfaces/slack/slack.py +32 -0
- agno/os/interfaces/whatsapp/__init__.py +3 -0
- agno/{app/whatsapp/async_router.py → os/interfaces/whatsapp/router.py} +4 -7
- agno/os/interfaces/whatsapp/whatsapp.py +29 -0
- agno/os/mcp.py +235 -0
- agno/os/router.py +1400 -0
- agno/os/routers/__init__.py +3 -0
- agno/os/routers/evals/__init__.py +3 -0
- agno/os/routers/evals/evals.py +393 -0
- agno/os/routers/evals/schemas.py +142 -0
- agno/os/routers/evals/utils.py +161 -0
- agno/os/routers/knowledge/__init__.py +3 -0
- agno/os/routers/knowledge/knowledge.py +850 -0
- agno/os/routers/knowledge/schemas.py +118 -0
- agno/os/routers/memory/__init__.py +3 -0
- agno/os/routers/memory/memory.py +410 -0
- agno/os/routers/memory/schemas.py +58 -0
- agno/os/routers/metrics/__init__.py +3 -0
- agno/os/routers/metrics/metrics.py +178 -0
- agno/os/routers/metrics/schemas.py +47 -0
- agno/os/routers/session/__init__.py +3 -0
- agno/os/routers/session/session.py +536 -0
- agno/os/schema.py +945 -0
- agno/{app/playground → os}/settings.py +7 -15
- agno/os/utils.py +270 -0
- agno/reasoning/azure_ai_foundry.py +4 -4
- agno/reasoning/deepseek.py +4 -4
- agno/reasoning/default.py +6 -11
- agno/reasoning/groq.py +4 -4
- agno/reasoning/helpers.py +4 -6
- agno/reasoning/ollama.py +4 -4
- agno/reasoning/openai.py +4 -4
- agno/run/agent.py +633 -0
- agno/run/base.py +53 -77
- agno/run/cancel.py +81 -0
- agno/run/team.py +243 -96
- agno/run/workflow.py +550 -12
- agno/session/__init__.py +10 -0
- agno/session/agent.py +244 -0
- agno/session/summary.py +225 -0
- agno/session/team.py +262 -0
- agno/{storage/session/v2 → session}/workflow.py +47 -24
- agno/team/__init__.py +15 -16
- agno/team/team.py +3260 -4824
- agno/tools/agentql.py +14 -5
- agno/tools/airflow.py +9 -4
- agno/tools/api.py +7 -3
- agno/tools/apify.py +2 -46
- agno/tools/arxiv.py +8 -3
- agno/tools/aws_lambda.py +7 -5
- agno/tools/aws_ses.py +7 -1
- agno/tools/baidusearch.py +4 -1
- agno/tools/bitbucket.py +4 -4
- agno/tools/brandfetch.py +14 -11
- agno/tools/bravesearch.py +4 -1
- agno/tools/brightdata.py +43 -23
- agno/tools/browserbase.py +13 -4
- agno/tools/calcom.py +12 -10
- agno/tools/calculator.py +10 -27
- agno/tools/cartesia.py +20 -17
- agno/tools/{clickup_tool.py → clickup.py} +12 -25
- agno/tools/confluence.py +8 -8
- agno/tools/crawl4ai.py +7 -1
- agno/tools/csv_toolkit.py +9 -8
- agno/tools/dalle.py +22 -12
- agno/tools/daytona.py +13 -16
- agno/tools/decorator.py +6 -3
- agno/tools/desi_vocal.py +17 -8
- agno/tools/discord.py +11 -8
- agno/tools/docker.py +30 -42
- agno/tools/duckdb.py +34 -53
- agno/tools/duckduckgo.py +8 -7
- agno/tools/e2b.py +62 -62
- agno/tools/eleven_labs.py +36 -29
- agno/tools/email.py +4 -1
- agno/tools/evm.py +7 -1
- agno/tools/exa.py +19 -14
- agno/tools/fal.py +30 -30
- agno/tools/file.py +9 -8
- agno/tools/financial_datasets.py +25 -44
- agno/tools/firecrawl.py +22 -22
- agno/tools/function.py +127 -18
- agno/tools/giphy.py +23 -11
- agno/tools/github.py +48 -126
- agno/tools/gmail.py +45 -61
- agno/tools/google_bigquery.py +7 -6
- agno/tools/google_maps.py +11 -26
- agno/tools/googlesearch.py +7 -2
- agno/tools/googlesheets.py +21 -17
- agno/tools/hackernews.py +9 -5
- agno/tools/jina.py +5 -4
- agno/tools/jira.py +18 -9
- agno/tools/knowledge.py +31 -32
- agno/tools/linear.py +19 -34
- agno/tools/linkup.py +5 -1
- agno/tools/local_file_system.py +8 -5
- agno/tools/lumalab.py +32 -20
- agno/tools/mcp.py +1 -2
- agno/tools/mem0.py +18 -12
- agno/tools/memori.py +14 -10
- agno/tools/mlx_transcribe.py +3 -2
- agno/tools/models/azure_openai.py +33 -15
- agno/tools/models/gemini.py +59 -32
- agno/tools/models/groq.py +30 -23
- agno/tools/models/nebius.py +28 -12
- agno/tools/models_labs.py +40 -16
- agno/tools/moviepy_video.py +7 -6
- agno/tools/neo4j.py +10 -8
- agno/tools/newspaper.py +7 -2
- agno/tools/newspaper4k.py +8 -3
- agno/tools/openai.py +58 -32
- agno/tools/openbb.py +12 -11
- agno/tools/opencv.py +63 -47
- agno/tools/openweather.py +14 -12
- agno/tools/pandas.py +11 -3
- agno/tools/postgres.py +4 -12
- agno/tools/pubmed.py +4 -1
- agno/tools/python.py +9 -22
- agno/tools/reasoning.py +35 -27
- agno/tools/reddit.py +11 -26
- agno/tools/replicate.py +55 -42
- agno/tools/resend.py +4 -1
- agno/tools/scrapegraph.py +15 -14
- agno/tools/searxng.py +10 -23
- agno/tools/serpapi.py +6 -3
- agno/tools/serper.py +13 -4
- agno/tools/shell.py +9 -2
- agno/tools/slack.py +12 -11
- agno/tools/sleep.py +3 -2
- agno/tools/spider.py +24 -4
- agno/tools/sql.py +7 -6
- agno/tools/tavily.py +6 -4
- agno/tools/telegram.py +12 -4
- agno/tools/todoist.py +11 -31
- agno/tools/toolkit.py +1 -1
- agno/tools/trafilatura.py +22 -6
- agno/tools/trello.py +9 -22
- agno/tools/twilio.py +10 -3
- agno/tools/user_control_flow.py +6 -1
- agno/tools/valyu.py +34 -5
- agno/tools/visualization.py +19 -28
- agno/tools/webbrowser.py +4 -3
- agno/tools/webex.py +11 -7
- agno/tools/website.py +15 -46
- agno/tools/webtools.py +12 -4
- agno/tools/whatsapp.py +5 -9
- agno/tools/wikipedia.py +20 -13
- agno/tools/x.py +14 -13
- agno/tools/yfinance.py +13 -40
- agno/tools/youtube.py +26 -20
- agno/tools/zendesk.py +7 -2
- agno/tools/zep.py +10 -7
- agno/tools/zoom.py +10 -9
- agno/utils/common.py +1 -19
- agno/utils/events.py +100 -123
- agno/utils/gemini.py +32 -2
- agno/utils/knowledge.py +29 -0
- agno/utils/log.py +54 -4
- agno/utils/mcp.py +68 -10
- agno/utils/media.py +39 -0
- agno/utils/message.py +12 -1
- agno/utils/models/aws_claude.py +1 -1
- agno/utils/models/claude.py +47 -4
- agno/utils/models/cohere.py +1 -1
- agno/utils/models/mistral.py +8 -7
- agno/utils/models/schema_utils.py +3 -3
- agno/utils/models/watsonx.py +1 -1
- agno/utils/openai.py +1 -1
- agno/utils/pprint.py +33 -32
- agno/utils/print_response/agent.py +779 -0
- agno/utils/print_response/team.py +1669 -0
- agno/utils/print_response/workflow.py +1451 -0
- agno/utils/prompts.py +14 -14
- agno/utils/reasoning.py +87 -0
- agno/utils/response.py +42 -42
- agno/utils/streamlit.py +481 -0
- agno/utils/string.py +8 -22
- agno/utils/team.py +50 -0
- agno/utils/timer.py +2 -2
- agno/vectordb/base.py +33 -21
- agno/vectordb/cassandra/cassandra.py +287 -23
- agno/vectordb/chroma/chromadb.py +482 -59
- agno/vectordb/clickhouse/clickhousedb.py +270 -63
- agno/vectordb/couchbase/couchbase.py +309 -29
- agno/vectordb/lancedb/lance_db.py +360 -21
- agno/vectordb/langchaindb/__init__.py +5 -0
- agno/vectordb/langchaindb/langchaindb.py +145 -0
- agno/vectordb/lightrag/__init__.py +5 -0
- agno/vectordb/lightrag/lightrag.py +374 -0
- agno/vectordb/llamaindex/llamaindexdb.py +127 -0
- agno/vectordb/milvus/milvus.py +242 -32
- agno/vectordb/mongodb/mongodb.py +200 -24
- agno/vectordb/pgvector/pgvector.py +319 -37
- agno/vectordb/pineconedb/pineconedb.py +221 -27
- agno/vectordb/qdrant/qdrant.py +334 -14
- agno/vectordb/singlestore/singlestore.py +286 -29
- agno/vectordb/surrealdb/surrealdb.py +187 -7
- agno/vectordb/upstashdb/upstashdb.py +342 -26
- agno/vectordb/weaviate/weaviate.py +227 -165
- agno/workflow/__init__.py +17 -13
- agno/workflow/{v2/condition.py → condition.py} +135 -32
- agno/workflow/{v2/loop.py → loop.py} +115 -28
- agno/workflow/{v2/parallel.py → parallel.py} +138 -108
- agno/workflow/{v2/router.py → router.py} +133 -32
- agno/workflow/{v2/step.py → step.py} +207 -49
- agno/workflow/{v2/steps.py → steps.py} +147 -66
- agno/workflow/types.py +482 -0
- agno/workflow/workflow.py +2410 -696
- agno-2.0.0.dist-info/METADATA +494 -0
- agno-2.0.0.dist-info/RECORD +515 -0
- agno-2.0.0.dist-info/licenses/LICENSE +201 -0
- agno/agent/metrics.py +0 -107
- agno/api/app.py +0 -35
- agno/api/playground.py +0 -92
- agno/api/schemas/app.py +0 -12
- agno/api/schemas/playground.py +0 -22
- agno/api/schemas/user.py +0 -35
- agno/api/schemas/workspace.py +0 -46
- agno/api/user.py +0 -160
- agno/api/workflows.py +0 -33
- agno/api/workspace.py +0 -175
- agno/app/agui/__init__.py +0 -3
- agno/app/agui/app.py +0 -17
- agno/app/agui/sync_router.py +0 -120
- agno/app/base.py +0 -186
- agno/app/discord/__init__.py +0 -3
- agno/app/fastapi/__init__.py +0 -3
- agno/app/fastapi/app.py +0 -107
- agno/app/fastapi/async_router.py +0 -457
- agno/app/fastapi/sync_router.py +0 -448
- agno/app/playground/app.py +0 -228
- agno/app/playground/async_router.py +0 -1050
- agno/app/playground/deploy.py +0 -249
- agno/app/playground/operator.py +0 -183
- agno/app/playground/schemas.py +0 -220
- agno/app/playground/serve.py +0 -55
- agno/app/playground/sync_router.py +0 -1042
- agno/app/playground/utils.py +0 -46
- agno/app/settings.py +0 -15
- agno/app/slack/__init__.py +0 -3
- agno/app/slack/app.py +0 -19
- agno/app/slack/sync_router.py +0 -92
- agno/app/utils.py +0 -54
- agno/app/whatsapp/__init__.py +0 -3
- agno/app/whatsapp/app.py +0 -15
- agno/app/whatsapp/sync_router.py +0 -197
- agno/cli/auth_server.py +0 -249
- agno/cli/config.py +0 -274
- agno/cli/console.py +0 -88
- agno/cli/credentials.py +0 -23
- agno/cli/entrypoint.py +0 -571
- agno/cli/operator.py +0 -357
- agno/cli/settings.py +0 -96
- agno/cli/ws/ws_cli.py +0 -817
- agno/constants.py +0 -13
- agno/document/__init__.py +0 -5
- agno/document/chunking/semantic.py +0 -45
- agno/document/chunking/strategy.py +0 -31
- agno/document/reader/__init__.py +0 -5
- agno/document/reader/base.py +0 -47
- agno/document/reader/docx_reader.py +0 -60
- agno/document/reader/gcs/pdf_reader.py +0 -44
- agno/document/reader/s3/pdf_reader.py +0 -59
- agno/document/reader/s3/text_reader.py +0 -63
- agno/document/reader/url_reader.py +0 -59
- agno/document/reader/youtube_reader.py +0 -58
- agno/embedder/__init__.py +0 -5
- agno/embedder/langdb.py +0 -80
- agno/embedder/mistral.py +0 -82
- agno/embedder/openai.py +0 -78
- agno/file/__init__.py +0 -5
- agno/file/file.py +0 -16
- agno/file/local/csv.py +0 -32
- agno/file/local/txt.py +0 -19
- agno/infra/app.py +0 -240
- agno/infra/base.py +0 -144
- agno/infra/context.py +0 -20
- agno/infra/db_app.py +0 -52
- agno/infra/resource.py +0 -205
- agno/infra/resources.py +0 -55
- agno/knowledge/agent.py +0 -702
- agno/knowledge/arxiv.py +0 -33
- agno/knowledge/combined.py +0 -36
- agno/knowledge/csv.py +0 -144
- agno/knowledge/csv_url.py +0 -124
- agno/knowledge/document.py +0 -223
- agno/knowledge/docx.py +0 -137
- agno/knowledge/firecrawl.py +0 -34
- agno/knowledge/gcs/__init__.py +0 -0
- agno/knowledge/gcs/base.py +0 -39
- agno/knowledge/gcs/pdf.py +0 -125
- agno/knowledge/json.py +0 -137
- agno/knowledge/langchain.py +0 -71
- agno/knowledge/light_rag.py +0 -273
- agno/knowledge/llamaindex.py +0 -66
- agno/knowledge/markdown.py +0 -154
- agno/knowledge/pdf.py +0 -164
- agno/knowledge/pdf_bytes.py +0 -42
- agno/knowledge/pdf_url.py +0 -148
- agno/knowledge/s3/__init__.py +0 -0
- agno/knowledge/s3/base.py +0 -64
- agno/knowledge/s3/pdf.py +0 -33
- agno/knowledge/s3/text.py +0 -34
- agno/knowledge/text.py +0 -141
- agno/knowledge/url.py +0 -46
- agno/knowledge/website.py +0 -179
- agno/knowledge/wikipedia.py +0 -32
- agno/knowledge/youtube.py +0 -35
- agno/memory/agent.py +0 -423
- agno/memory/classifier.py +0 -104
- agno/memory/db/__init__.py +0 -5
- agno/memory/db/base.py +0 -42
- agno/memory/db/mongodb.py +0 -189
- agno/memory/db/postgres.py +0 -203
- agno/memory/db/sqlite.py +0 -193
- agno/memory/memory.py +0 -22
- agno/memory/row.py +0 -36
- agno/memory/summarizer.py +0 -201
- agno/memory/summary.py +0 -19
- agno/memory/team.py +0 -415
- agno/memory/v2/__init__.py +0 -2
- agno/memory/v2/db/__init__.py +0 -1
- agno/memory/v2/db/base.py +0 -42
- agno/memory/v2/db/firestore.py +0 -339
- agno/memory/v2/db/mongodb.py +0 -196
- agno/memory/v2/db/postgres.py +0 -214
- agno/memory/v2/db/redis.py +0 -187
- agno/memory/v2/db/schema.py +0 -54
- agno/memory/v2/db/sqlite.py +0 -209
- agno/memory/v2/manager.py +0 -437
- agno/memory/v2/memory.py +0 -1097
- agno/memory/v2/schema.py +0 -55
- agno/memory/v2/summarizer.py +0 -215
- agno/memory/workflow.py +0 -38
- agno/models/ollama/tools.py +0 -430
- agno/models/qwen/__init__.py +0 -5
- agno/playground/__init__.py +0 -10
- agno/playground/deploy.py +0 -3
- agno/playground/playground.py +0 -3
- agno/playground/serve.py +0 -3
- agno/playground/settings.py +0 -3
- agno/reranker/__init__.py +0 -0
- agno/run/response.py +0 -467
- agno/run/v2/__init__.py +0 -0
- agno/run/v2/workflow.py +0 -567
- agno/storage/__init__.py +0 -0
- agno/storage/agent/__init__.py +0 -0
- agno/storage/agent/dynamodb.py +0 -1
- agno/storage/agent/json.py +0 -1
- agno/storage/agent/mongodb.py +0 -1
- agno/storage/agent/postgres.py +0 -1
- agno/storage/agent/singlestore.py +0 -1
- agno/storage/agent/sqlite.py +0 -1
- agno/storage/agent/yaml.py +0 -1
- agno/storage/base.py +0 -60
- agno/storage/dynamodb.py +0 -673
- agno/storage/firestore.py +0 -297
- agno/storage/gcs_json.py +0 -261
- agno/storage/in_memory.py +0 -234
- agno/storage/json.py +0 -237
- agno/storage/mongodb.py +0 -328
- agno/storage/mysql.py +0 -685
- agno/storage/postgres.py +0 -682
- agno/storage/redis.py +0 -336
- agno/storage/session/__init__.py +0 -16
- agno/storage/session/agent.py +0 -64
- agno/storage/session/team.py +0 -63
- agno/storage/session/v2/__init__.py +0 -5
- agno/storage/session/workflow.py +0 -61
- agno/storage/singlestore.py +0 -606
- agno/storage/sqlite.py +0 -646
- agno/storage/workflow/__init__.py +0 -0
- agno/storage/workflow/mongodb.py +0 -1
- agno/storage/workflow/postgres.py +0 -1
- agno/storage/workflow/sqlite.py +0 -1
- agno/storage/yaml.py +0 -241
- agno/tools/thinking.py +0 -73
- agno/utils/defaults.py +0 -57
- agno/utils/filesystem.py +0 -39
- agno/utils/git.py +0 -52
- agno/utils/json_io.py +0 -30
- agno/utils/load_env.py +0 -19
- agno/utils/py_io.py +0 -19
- agno/utils/pyproject.py +0 -18
- agno/utils/resource_filter.py +0 -31
- agno/workflow/v2/__init__.py +0 -21
- agno/workflow/v2/types.py +0 -357
- agno/workflow/v2/workflow.py +0 -3312
- agno/workspace/__init__.py +0 -0
- agno/workspace/config.py +0 -325
- agno/workspace/enums.py +0 -6
- agno/workspace/helpers.py +0 -52
- agno/workspace/operator.py +0 -757
- agno/workspace/settings.py +0 -158
- agno-1.8.1.dist-info/METADATA +0 -982
- agno-1.8.1.dist-info/RECORD +0 -566
- agno-1.8.1.dist-info/entry_points.txt +0 -3
- agno-1.8.1.dist-info/licenses/LICENSE +0 -375
- /agno/{app → db/migrations}/__init__.py +0 -0
- /agno/{app/playground/__init__.py → db/schemas/metrics.py} +0 -0
- /agno/{cli → integrations}/__init__.py +0 -0
- /agno/{cli/ws → knowledge/chunking}/__init__.py +0 -0
- /agno/{document/chunking → knowledge/remote_content}/__init__.py +0 -0
- /agno/{document/reader/gcs → knowledge/reranker}/__init__.py +0 -0
- /agno/{document/reader/s3 → os/interfaces}/__init__.py +0 -0
- /agno/{app → os/interfaces}/slack/security.py +0 -0
- /agno/{app → os/interfaces}/whatsapp/security.py +0 -0
- /agno/{file/local → utils/print_response}/__init__.py +0 -0
- /agno/{infra → vectordb/llamaindex}/__init__.py +0 -0
- {agno-1.8.1.dist-info → agno-2.0.0.dist-info}/WHEEL +0 -0
- {agno-1.8.1.dist-info → agno-2.0.0.dist-info}/top_level.txt +0 -0
agno/knowledge/agent.py
DELETED
|
@@ -1,702 +0,0 @@
|
|
|
1
|
-
import asyncio
|
|
2
|
-
from pathlib import Path
|
|
3
|
-
from typing import Any, AsyncIterator, Dict, Iterator, List, Optional, Set, Tuple
|
|
4
|
-
|
|
5
|
-
from pydantic import BaseModel, ConfigDict, model_validator
|
|
6
|
-
|
|
7
|
-
from agno.document import Document
|
|
8
|
-
from agno.document.chunking.fixed import FixedSizeChunking
|
|
9
|
-
from agno.document.chunking.strategy import ChunkingStrategy
|
|
10
|
-
from agno.document.reader.base import Reader
|
|
11
|
-
from agno.utils.log import log_debug, log_info, logger
|
|
12
|
-
from agno.vectordb import VectorDb
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
class AgentKnowledge(BaseModel):
|
|
16
|
-
"""Base class for Agent knowledge"""
|
|
17
|
-
|
|
18
|
-
# Reader for reading documents from files, pdfs, urls, etc.
|
|
19
|
-
reader: Optional[Reader] = None
|
|
20
|
-
# Vector db for storing knowledge
|
|
21
|
-
vector_db: Optional[VectorDb] = None
|
|
22
|
-
# Number of relevant documents to return on search
|
|
23
|
-
num_documents: int = 5
|
|
24
|
-
# Number of documents to optimize the vector db on
|
|
25
|
-
optimize_on: Optional[int] = 1000
|
|
26
|
-
|
|
27
|
-
chunking_strategy: Optional[ChunkingStrategy] = None
|
|
28
|
-
model_config = ConfigDict(arbitrary_types_allowed=True)
|
|
29
|
-
|
|
30
|
-
valid_metadata_filters: Set[str] = None # type: ignore
|
|
31
|
-
|
|
32
|
-
@model_validator(mode="after")
|
|
33
|
-
def update_reader(self) -> "AgentKnowledge":
|
|
34
|
-
if self.reader is not None and self.reader.chunking_strategy is None:
|
|
35
|
-
self.reader.chunking_strategy = self.chunking_strategy or FixedSizeChunking()
|
|
36
|
-
return self
|
|
37
|
-
|
|
38
|
-
@property
|
|
39
|
-
def document_lists(self) -> Iterator[List[Document]]:
|
|
40
|
-
"""Iterator that yields lists of documents in the knowledge base
|
|
41
|
-
Each object yielded by the iterator is a list of documents.
|
|
42
|
-
"""
|
|
43
|
-
raise NotImplementedError
|
|
44
|
-
|
|
45
|
-
@property
|
|
46
|
-
async def async_document_lists(self) -> AsyncIterator[List[Document]]:
|
|
47
|
-
"""Iterator that yields lists of documents in the knowledge base
|
|
48
|
-
Each object yielded by the iterator is a list of documents.
|
|
49
|
-
"""
|
|
50
|
-
raise NotImplementedError
|
|
51
|
-
|
|
52
|
-
def _upsert_warning(self, upsert) -> None:
|
|
53
|
-
"""Log a warning if upsert is not available"""
|
|
54
|
-
if upsert and self.vector_db is not None and not self.vector_db.upsert_available():
|
|
55
|
-
log_info(
|
|
56
|
-
f"Vector db '{self.vector_db.__class__.__module__}' does not support upsert. Falling back to insert."
|
|
57
|
-
)
|
|
58
|
-
|
|
59
|
-
def _load_init(self, recreate: bool, upsert: bool) -> None:
|
|
60
|
-
"""Initial setup for loading knowledge base"""
|
|
61
|
-
if self.vector_db is None:
|
|
62
|
-
logger.warning("No vector db provided")
|
|
63
|
-
return
|
|
64
|
-
|
|
65
|
-
if recreate:
|
|
66
|
-
log_info("Dropping collection")
|
|
67
|
-
self.vector_db.drop()
|
|
68
|
-
|
|
69
|
-
if not self.vector_db.exists():
|
|
70
|
-
log_info("Creating collection")
|
|
71
|
-
self.vector_db.create()
|
|
72
|
-
|
|
73
|
-
self._upsert_warning(upsert)
|
|
74
|
-
|
|
75
|
-
async def _aload_init(self, recreate: bool, upsert: bool) -> None:
|
|
76
|
-
"""Initial async setup for loading knowledge base"""
|
|
77
|
-
if self.vector_db is None:
|
|
78
|
-
logger.warning("No vector db provided")
|
|
79
|
-
return
|
|
80
|
-
|
|
81
|
-
if recreate:
|
|
82
|
-
log_info("Dropping collection")
|
|
83
|
-
try:
|
|
84
|
-
await self.vector_db.async_drop()
|
|
85
|
-
except NotImplementedError:
|
|
86
|
-
logger.warning("Vector db does not support async drop, falling back to sync drop")
|
|
87
|
-
self.vector_db.drop()
|
|
88
|
-
|
|
89
|
-
if not self.vector_db.exists():
|
|
90
|
-
log_info("Creating collection")
|
|
91
|
-
try:
|
|
92
|
-
await self.vector_db.async_create()
|
|
93
|
-
except NotImplementedError:
|
|
94
|
-
logger.warning("Vector db does not support async create, falling back to sync create")
|
|
95
|
-
self.vector_db.create()
|
|
96
|
-
|
|
97
|
-
self._upsert_warning(upsert)
|
|
98
|
-
|
|
99
|
-
def search(
|
|
100
|
-
self, query: str, num_documents: Optional[int] = None, filters: Optional[Dict[str, Any]] = None
|
|
101
|
-
) -> List[Document]:
|
|
102
|
-
"""Returns relevant documents matching a query"""
|
|
103
|
-
try:
|
|
104
|
-
if self.vector_db is None:
|
|
105
|
-
logger.warning("No vector db provided")
|
|
106
|
-
return []
|
|
107
|
-
|
|
108
|
-
_num_documents = num_documents or self.num_documents
|
|
109
|
-
log_debug(f"Getting {_num_documents} relevant documents for query: {query}")
|
|
110
|
-
return self.vector_db.search(query=query, limit=_num_documents, filters=filters)
|
|
111
|
-
except Exception as e:
|
|
112
|
-
logger.error(f"Error searching for documents: {e}")
|
|
113
|
-
return []
|
|
114
|
-
|
|
115
|
-
async def async_search(
|
|
116
|
-
self, query: str, num_documents: Optional[int] = None, filters: Optional[Dict[str, Any]] = None
|
|
117
|
-
) -> List[Document]:
|
|
118
|
-
"""Returns relevant documents matching a query"""
|
|
119
|
-
try:
|
|
120
|
-
if self.vector_db is None:
|
|
121
|
-
logger.warning("No vector db provided")
|
|
122
|
-
return []
|
|
123
|
-
|
|
124
|
-
_num_documents = num_documents or self.num_documents
|
|
125
|
-
log_debug(f"Getting {_num_documents} relevant documents for query: {query}")
|
|
126
|
-
try:
|
|
127
|
-
return await self.vector_db.async_search(query=query, limit=_num_documents, filters=filters)
|
|
128
|
-
except NotImplementedError:
|
|
129
|
-
log_info("Vector db does not support async search")
|
|
130
|
-
return self.search(query=query, num_documents=_num_documents, filters=filters)
|
|
131
|
-
except Exception as e:
|
|
132
|
-
logger.error(f"Error searching for documents: {e}")
|
|
133
|
-
return []
|
|
134
|
-
|
|
135
|
-
def load(
|
|
136
|
-
self,
|
|
137
|
-
recreate: bool = False,
|
|
138
|
-
upsert: bool = False,
|
|
139
|
-
skip_existing: bool = True,
|
|
140
|
-
) -> None:
|
|
141
|
-
"""Load the knowledge base to the vector db
|
|
142
|
-
|
|
143
|
-
Args:
|
|
144
|
-
recreate (bool): If True, recreates the collection in the vector db. Defaults to False.
|
|
145
|
-
upsert (bool): If True, upserts documents to the vector db. Defaults to False.
|
|
146
|
-
skip_existing (bool): If True, skips documents which already exist in the vector db when inserting. Defaults to True.
|
|
147
|
-
"""
|
|
148
|
-
self._load_init(recreate, upsert)
|
|
149
|
-
if self.vector_db is None:
|
|
150
|
-
return
|
|
151
|
-
|
|
152
|
-
log_info("Loading knowledge base")
|
|
153
|
-
num_documents = 0
|
|
154
|
-
for document_list in self.document_lists:
|
|
155
|
-
documents_to_load = document_list
|
|
156
|
-
|
|
157
|
-
# Track metadata for filtering capabilities
|
|
158
|
-
for doc in document_list:
|
|
159
|
-
if doc.meta_data:
|
|
160
|
-
self._track_metadata_structure(doc.meta_data)
|
|
161
|
-
|
|
162
|
-
# Upsert documents if upsert is True and vector db supports upsert
|
|
163
|
-
if upsert and self.vector_db.upsert_available():
|
|
164
|
-
for doc in documents_to_load:
|
|
165
|
-
self.vector_db.upsert(documents=[doc], filters=doc.meta_data)
|
|
166
|
-
# Insert documents
|
|
167
|
-
else:
|
|
168
|
-
# Filter out documents which already exist in the vector db
|
|
169
|
-
if skip_existing:
|
|
170
|
-
log_debug("Filtering out existing documents before insertion.")
|
|
171
|
-
documents_to_load = self.filter_existing_documents(document_list)
|
|
172
|
-
|
|
173
|
-
if documents_to_load:
|
|
174
|
-
for doc in documents_to_load:
|
|
175
|
-
self.vector_db.insert(documents=[doc], filters=doc.meta_data)
|
|
176
|
-
|
|
177
|
-
num_documents += len(documents_to_load)
|
|
178
|
-
log_info(f"Added {num_documents} documents to knowledge base")
|
|
179
|
-
|
|
180
|
-
async def aload(
|
|
181
|
-
self,
|
|
182
|
-
recreate: bool = False,
|
|
183
|
-
upsert: bool = False,
|
|
184
|
-
skip_existing: bool = True,
|
|
185
|
-
) -> None:
|
|
186
|
-
"""Load the knowledge base to the vector db asynchronously
|
|
187
|
-
|
|
188
|
-
Args:
|
|
189
|
-
recreate (bool): If True, recreates the collection in the vector db. Defaults to False.
|
|
190
|
-
upsert (bool): If True, upserts documents to the vector db. Defaults to False.
|
|
191
|
-
skip_existing (bool): If True, skips documents which already exist in the vector db when inserting. Defaults to True.
|
|
192
|
-
"""
|
|
193
|
-
await self._aload_init(recreate, upsert)
|
|
194
|
-
if self.vector_db is None:
|
|
195
|
-
return
|
|
196
|
-
|
|
197
|
-
log_info("Loading knowledge base")
|
|
198
|
-
num_documents = 0
|
|
199
|
-
document_iterator = self.async_document_lists
|
|
200
|
-
async for document_list in document_iterator: # type: ignore
|
|
201
|
-
documents_to_load = document_list
|
|
202
|
-
# Track metadata for filtering capabilities
|
|
203
|
-
for doc in document_list:
|
|
204
|
-
if doc.meta_data:
|
|
205
|
-
self._track_metadata_structure(doc.meta_data)
|
|
206
|
-
|
|
207
|
-
# Upsert documents if upsert is True and vector db supports upsert
|
|
208
|
-
if upsert and self.vector_db.upsert_available():
|
|
209
|
-
for doc in documents_to_load:
|
|
210
|
-
await self.vector_db.async_upsert(documents=[doc], filters=doc.meta_data)
|
|
211
|
-
# Insert documents
|
|
212
|
-
else:
|
|
213
|
-
# Filter out documents which already exist in the vector db
|
|
214
|
-
if skip_existing:
|
|
215
|
-
log_debug("Filtering out existing documents before insertion.")
|
|
216
|
-
documents_to_load = await self.async_filter_existing_documents(document_list)
|
|
217
|
-
|
|
218
|
-
if documents_to_load:
|
|
219
|
-
for doc in documents_to_load:
|
|
220
|
-
await self.vector_db.async_insert(documents=[doc], filters=doc.meta_data)
|
|
221
|
-
|
|
222
|
-
num_documents += len(documents_to_load)
|
|
223
|
-
log_info(f"Added {num_documents} documents to knowledge base")
|
|
224
|
-
|
|
225
|
-
def load_documents(
|
|
226
|
-
self,
|
|
227
|
-
documents: List[Document],
|
|
228
|
-
upsert: bool = False,
|
|
229
|
-
skip_existing: bool = True,
|
|
230
|
-
filters: Optional[Dict[str, Any]] = None,
|
|
231
|
-
) -> None:
|
|
232
|
-
"""Load documents to the knowledge base
|
|
233
|
-
|
|
234
|
-
Args:
|
|
235
|
-
documents (List[Document]): List of documents to load
|
|
236
|
-
upsert (bool): If True, upserts documents to the vector db. Defaults to False.
|
|
237
|
-
skip_existing (bool): If True, skips documents which already exist in the vector db when inserting. Defaults to True.
|
|
238
|
-
filters (Optional[Dict[str, Any]]): Filters to add to each row that can be used to limit results during querying. Defaults to None.
|
|
239
|
-
"""
|
|
240
|
-
self._load_init(recreate=False, upsert=upsert)
|
|
241
|
-
if self.vector_db is None:
|
|
242
|
-
return
|
|
243
|
-
|
|
244
|
-
log_info("Loading knowledge base")
|
|
245
|
-
# Upsert documents if upsert is True
|
|
246
|
-
if upsert and self.vector_db.upsert_available():
|
|
247
|
-
self.vector_db.upsert(documents=documents, filters=filters)
|
|
248
|
-
log_info(f"Loaded {len(documents)} documents to knowledge base")
|
|
249
|
-
else:
|
|
250
|
-
# Filter out documents which already exist in the vector db
|
|
251
|
-
documents_to_load = (
|
|
252
|
-
[document for document in documents if not self.vector_db.doc_exists(document)]
|
|
253
|
-
if skip_existing
|
|
254
|
-
else documents
|
|
255
|
-
)
|
|
256
|
-
|
|
257
|
-
# Insert documents
|
|
258
|
-
if len(documents_to_load) > 0:
|
|
259
|
-
self.vector_db.insert(documents=documents_to_load, filters=filters)
|
|
260
|
-
log_info(f"Loaded {len(documents_to_load)} documents to knowledge base")
|
|
261
|
-
else:
|
|
262
|
-
log_info("No new documents to load")
|
|
263
|
-
|
|
264
|
-
async def async_load_documents(
|
|
265
|
-
self,
|
|
266
|
-
documents: List[Document],
|
|
267
|
-
upsert: bool = False,
|
|
268
|
-
skip_existing: bool = True,
|
|
269
|
-
filters: Optional[Dict[str, Any]] = None,
|
|
270
|
-
) -> None:
|
|
271
|
-
"""Load documents to the knowledge base
|
|
272
|
-
|
|
273
|
-
Args:
|
|
274
|
-
documents (List[Document]): List of documents to load
|
|
275
|
-
upsert (bool): If True, upserts documents to the vector db. Defaults to False.
|
|
276
|
-
skip_existing (bool): If True, skips documents which already exist in the vector db when inserting. Defaults to True.
|
|
277
|
-
filters (Optional[Dict[str, Any]]): Filters to add to each row that can be used to limit results during querying. Defaults to None.
|
|
278
|
-
"""
|
|
279
|
-
await self._aload_init(recreate=False, upsert=upsert)
|
|
280
|
-
if self.vector_db is None:
|
|
281
|
-
return
|
|
282
|
-
|
|
283
|
-
log_info("Loading knowledge base")
|
|
284
|
-
|
|
285
|
-
# Upsert documents if upsert is True
|
|
286
|
-
if upsert and self.vector_db.upsert_available():
|
|
287
|
-
try:
|
|
288
|
-
await self.vector_db.async_upsert(documents=documents, filters=filters)
|
|
289
|
-
except NotImplementedError:
|
|
290
|
-
logger.warning("Vector db does not support async upsert")
|
|
291
|
-
self.vector_db.upsert(documents=documents, filters=filters)
|
|
292
|
-
log_info(f"Loaded {len(documents)} documents to knowledge base")
|
|
293
|
-
else:
|
|
294
|
-
# Filter out documents which already exist in the vector db
|
|
295
|
-
if skip_existing:
|
|
296
|
-
try:
|
|
297
|
-
# Parallelize existence checks using asyncio.gather
|
|
298
|
-
existence_checks = await asyncio.gather(
|
|
299
|
-
*[self.vector_db.async_doc_exists(document) for document in documents], return_exceptions=True
|
|
300
|
-
)
|
|
301
|
-
|
|
302
|
-
documents_to_load = [
|
|
303
|
-
doc
|
|
304
|
-
for doc, exists in zip(documents, existence_checks)
|
|
305
|
-
if not (isinstance(exists, bool) and exists)
|
|
306
|
-
]
|
|
307
|
-
except NotImplementedError:
|
|
308
|
-
logger.warning("Vector db does not support async doc_exists")
|
|
309
|
-
documents_to_load = [document for document in documents if not self.vector_db.doc_exists(document)]
|
|
310
|
-
else:
|
|
311
|
-
documents_to_load = documents
|
|
312
|
-
|
|
313
|
-
# Insert documents
|
|
314
|
-
if len(documents_to_load) > 0:
|
|
315
|
-
try:
|
|
316
|
-
await self.vector_db.async_insert(documents=documents_to_load, filters=filters)
|
|
317
|
-
except NotImplementedError:
|
|
318
|
-
logger.warning("Vector db does not support async insert")
|
|
319
|
-
self.vector_db.insert(documents=documents_to_load, filters=filters)
|
|
320
|
-
log_info(f"Loaded {len(documents_to_load)} documents to knowledge base")
|
|
321
|
-
else:
|
|
322
|
-
log_info("No new documents to load")
|
|
323
|
-
|
|
324
|
-
def load_document(
|
|
325
|
-
self,
|
|
326
|
-
document: Document,
|
|
327
|
-
upsert: bool = False,
|
|
328
|
-
skip_existing: bool = True,
|
|
329
|
-
filters: Optional[Dict[str, Any]] = None,
|
|
330
|
-
) -> None:
|
|
331
|
-
"""Load a document to the knowledge base
|
|
332
|
-
|
|
333
|
-
Args:
|
|
334
|
-
document (Document): Document to load
|
|
335
|
-
upsert (bool): If True, upserts documents to the vector db. Defaults to False.
|
|
336
|
-
skip_existing (bool): If True, skips documents which already exist in the vector db. Defaults to True.
|
|
337
|
-
filters (Optional[Dict[str, Any]]): Filters to add to each row that can be used to limit results during querying. Defaults to None.
|
|
338
|
-
"""
|
|
339
|
-
self.load_documents(documents=[document], upsert=upsert, skip_existing=skip_existing, filters=filters)
|
|
340
|
-
|
|
341
|
-
async def async_load_document(
|
|
342
|
-
self,
|
|
343
|
-
document: Document,
|
|
344
|
-
upsert: bool = False,
|
|
345
|
-
skip_existing: bool = True,
|
|
346
|
-
filters: Optional[Dict[str, Any]] = None,
|
|
347
|
-
) -> None:
|
|
348
|
-
"""Load a document to the knowledge base
|
|
349
|
-
|
|
350
|
-
Args:
|
|
351
|
-
document (Document): Document to load
|
|
352
|
-
upsert (bool): If True, upserts documents to the vector db. Defaults to False.
|
|
353
|
-
skip_existing (bool): If True, skips documents which already exist in the vector db. Defaults to True.
|
|
354
|
-
filters (Optional[Dict[str, Any]]): Filters to add to each row that can be used to limit results during querying. Defaults to None.
|
|
355
|
-
"""
|
|
356
|
-
await self.async_load_documents(
|
|
357
|
-
documents=[document], upsert=upsert, skip_existing=skip_existing, filters=filters
|
|
358
|
-
)
|
|
359
|
-
|
|
360
|
-
def load_dict(
|
|
361
|
-
self,
|
|
362
|
-
document: Dict[str, Any],
|
|
363
|
-
upsert: bool = False,
|
|
364
|
-
skip_existing: bool = True,
|
|
365
|
-
filters: Optional[Dict[str, Any]] = None,
|
|
366
|
-
) -> None:
|
|
367
|
-
"""Load a dictionary representation of a document to the knowledge base
|
|
368
|
-
|
|
369
|
-
Args:
|
|
370
|
-
document (Dict[str, Any]): Dictionary representation of a document
|
|
371
|
-
upsert (bool): If True, upserts documents to the vector db. Defaults to False.
|
|
372
|
-
skip_existing (bool): If True, skips documents which already exist in the vector db. Defaults to True.
|
|
373
|
-
filters (Optional[Dict[str, Any]]): Filters to add to each row that can be used to limit results during querying. Defaults to None.
|
|
374
|
-
"""
|
|
375
|
-
self.load_documents(
|
|
376
|
-
documents=[Document.from_dict(document)], upsert=upsert, skip_existing=skip_existing, filters=filters
|
|
377
|
-
)
|
|
378
|
-
|
|
379
|
-
def load_json(
|
|
380
|
-
self, document: str, upsert: bool = False, skip_existing: bool = True, filters: Optional[Dict[str, Any]] = None
|
|
381
|
-
) -> None:
|
|
382
|
-
"""Load a json representation of a document to the knowledge base
|
|
383
|
-
|
|
384
|
-
Args:
|
|
385
|
-
document (str): Json representation of a document
|
|
386
|
-
upsert (bool): If True, upserts documents to the vector db. Defaults to False.
|
|
387
|
-
skip_existing (bool): If True, skips documents which already exist in the vector db. Defaults to True.
|
|
388
|
-
filters (Optional[Dict[str, Any]]): Filters to add to each row that can be used to limit results during querying. Defaults to None.
|
|
389
|
-
"""
|
|
390
|
-
self.load_documents(
|
|
391
|
-
documents=[Document.from_json(document)], upsert=upsert, skip_existing=skip_existing, filters=filters
|
|
392
|
-
)
|
|
393
|
-
|
|
394
|
-
def load_text(
|
|
395
|
-
self, text: str, upsert: bool = False, skip_existing: bool = True, filters: Optional[Dict[str, Any]] = None
|
|
396
|
-
) -> None:
|
|
397
|
-
"""Load a text to the knowledge base
|
|
398
|
-
|
|
399
|
-
Args:
|
|
400
|
-
text (str): Text to load to the knowledge base
|
|
401
|
-
upsert (bool): If True, upserts documents to the vector db. Defaults to False.
|
|
402
|
-
skip_existing (bool): If True, skips documents which already exist in the vector db. Defaults to True.
|
|
403
|
-
filters (Optional[Dict[str, Any]]): Filters to add to each row that can be used to limit results during querying. Defaults to None.
|
|
404
|
-
"""
|
|
405
|
-
self.load_documents(
|
|
406
|
-
documents=[Document(content=text)], upsert=upsert, skip_existing=skip_existing, filters=filters
|
|
407
|
-
)
|
|
408
|
-
|
|
409
|
-
def exists(self) -> bool:
|
|
410
|
-
"""Returns True if the knowledge base exists"""
|
|
411
|
-
if self.vector_db is None:
|
|
412
|
-
logger.warning("No vector db provided")
|
|
413
|
-
return False
|
|
414
|
-
return self.vector_db.exists()
|
|
415
|
-
|
|
416
|
-
def delete(self) -> bool:
|
|
417
|
-
"""Clear the knowledge base"""
|
|
418
|
-
if self.vector_db is None:
|
|
419
|
-
logger.warning("No vector db available")
|
|
420
|
-
return True
|
|
421
|
-
|
|
422
|
-
return self.vector_db.delete()
|
|
423
|
-
|
|
424
|
-
def filter_existing_documents(self, documents: List[Document]) -> List[Document]:
|
|
425
|
-
"""Filter out documents that already exist in the vector database.
|
|
426
|
-
|
|
427
|
-
This helper method is used across various knowledge base implementations
|
|
428
|
-
to avoid inserting duplicate documents.
|
|
429
|
-
|
|
430
|
-
Args:
|
|
431
|
-
documents (List[Document]): List of documents to filter
|
|
432
|
-
|
|
433
|
-
Returns:
|
|
434
|
-
List[Document]: Filtered list of documents that don't exist in the database
|
|
435
|
-
"""
|
|
436
|
-
if not self.vector_db:
|
|
437
|
-
log_debug("No vector database configured, skipping document filtering")
|
|
438
|
-
return documents
|
|
439
|
-
|
|
440
|
-
# Use set for O(1) lookups
|
|
441
|
-
seen_content = set()
|
|
442
|
-
original_count = len(documents)
|
|
443
|
-
filtered_documents = []
|
|
444
|
-
|
|
445
|
-
for doc in documents:
|
|
446
|
-
# Check hash and existence in DB
|
|
447
|
-
content_hash = doc.content # Assuming doc.content is reliable hash key
|
|
448
|
-
if content_hash not in seen_content and not self.vector_db.doc_exists(doc):
|
|
449
|
-
seen_content.add(content_hash)
|
|
450
|
-
filtered_documents.append(doc)
|
|
451
|
-
else:
|
|
452
|
-
log_debug(f"Skipping existing document: {doc.name} (or duplicate content)")
|
|
453
|
-
|
|
454
|
-
if len(filtered_documents) < original_count:
|
|
455
|
-
log_info(f"Skipped {original_count - len(filtered_documents)} existing/duplicate documents.")
|
|
456
|
-
|
|
457
|
-
return filtered_documents
|
|
458
|
-
|
|
459
|
-
async def async_filter_existing_documents(self, documents: List[Document]) -> List[Document]:
|
|
460
|
-
"""Filter out documents that already exist in the vector database.
|
|
461
|
-
|
|
462
|
-
This helper method is used across various knowledge base implementations
|
|
463
|
-
to avoid inserting duplicate documents.
|
|
464
|
-
|
|
465
|
-
Args:
|
|
466
|
-
documents (List[Document]): List of documents to filter
|
|
467
|
-
|
|
468
|
-
Returns:
|
|
469
|
-
List[Document]: Filtered list of documents that don't exist in the database
|
|
470
|
-
"""
|
|
471
|
-
from agno.utils.log import log_debug, log_info
|
|
472
|
-
|
|
473
|
-
if not self.vector_db:
|
|
474
|
-
log_debug("No vector database configured, skipping document filtering")
|
|
475
|
-
return documents
|
|
476
|
-
|
|
477
|
-
# Use set for O(1) lookups
|
|
478
|
-
seen_content = set()
|
|
479
|
-
original_count = len(documents)
|
|
480
|
-
filtered_documents = []
|
|
481
|
-
|
|
482
|
-
for doc in documents:
|
|
483
|
-
# Check hash and existence in DB
|
|
484
|
-
content_hash = doc.content # Assuming doc.content is reliable hash key
|
|
485
|
-
if content_hash not in seen_content and not await self.vector_db.async_doc_exists(doc):
|
|
486
|
-
seen_content.add(content_hash)
|
|
487
|
-
filtered_documents.append(doc)
|
|
488
|
-
else:
|
|
489
|
-
log_debug(f"Skipping existing document: {doc.name} (or duplicate content)")
|
|
490
|
-
|
|
491
|
-
if len(filtered_documents) < original_count:
|
|
492
|
-
log_info(f"Skipped {original_count - len(filtered_documents)} existing/duplicate documents.")
|
|
493
|
-
|
|
494
|
-
return filtered_documents
|
|
495
|
-
|
|
496
|
-
def _track_metadata_structure(self, metadata: Optional[Dict[str, Any]]) -> None:
|
|
497
|
-
"""Track metadata structure to enable filter extraction from queries
|
|
498
|
-
|
|
499
|
-
Args:
|
|
500
|
-
metadata (Optional[Dict[str, Any]]): Metadata to track
|
|
501
|
-
"""
|
|
502
|
-
if metadata:
|
|
503
|
-
if self.valid_metadata_filters is None:
|
|
504
|
-
self.valid_metadata_filters = set()
|
|
505
|
-
|
|
506
|
-
# Extract top-level keys to track as potential filter fields
|
|
507
|
-
for key in metadata.keys():
|
|
508
|
-
self.valid_metadata_filters.add(key)
|
|
509
|
-
|
|
510
|
-
def validate_filters(self, filters: Optional[Dict[str, Any]]) -> Tuple[Dict[str, Any], List[str]]:
|
|
511
|
-
if not filters:
|
|
512
|
-
return {}, []
|
|
513
|
-
|
|
514
|
-
valid_filters = {}
|
|
515
|
-
invalid_keys = []
|
|
516
|
-
|
|
517
|
-
# If no metadata filters tracked yet, all keys are considered invalid
|
|
518
|
-
if self.valid_metadata_filters is None:
|
|
519
|
-
invalid_keys = list(filters.keys())
|
|
520
|
-
log_debug(f"No valid metadata filters tracked yet. All filter keys considered invalid: {invalid_keys}")
|
|
521
|
-
return {}, invalid_keys
|
|
522
|
-
|
|
523
|
-
for key, value in filters.items():
|
|
524
|
-
# Handle both normal keys and prefixed keys like meta_data.key
|
|
525
|
-
base_key = key.split(".")[-1] if "." in key else key
|
|
526
|
-
if base_key in self.valid_metadata_filters or key in self.valid_metadata_filters:
|
|
527
|
-
valid_filters[key] = value
|
|
528
|
-
else:
|
|
529
|
-
invalid_keys.append(key)
|
|
530
|
-
log_debug(f"Invalid filter key: {key} - not present in knowledge base")
|
|
531
|
-
|
|
532
|
-
return valid_filters, invalid_keys
|
|
533
|
-
|
|
534
|
-
def initialize_valid_filters(self) -> None:
|
|
535
|
-
"""Refresh the valid metadata filters by scanning the documents in the knowledge base.
|
|
536
|
-
This will be required majorly for the case when load/aload is commented out but we still need a way to call document_lists for updating the valid metadata filters.
|
|
537
|
-
"""
|
|
538
|
-
if self.valid_metadata_filters is None:
|
|
539
|
-
for doc_list in self.document_lists:
|
|
540
|
-
for doc in doc_list:
|
|
541
|
-
if doc.meta_data:
|
|
542
|
-
self._track_metadata_structure(doc.meta_data)
|
|
543
|
-
|
|
544
|
-
def prepare_load(
|
|
545
|
-
self,
|
|
546
|
-
file_path: Path,
|
|
547
|
-
allowed_formats: Optional[List[str]],
|
|
548
|
-
metadata: Optional[Dict[str, Any]] = None,
|
|
549
|
-
recreate: bool = False,
|
|
550
|
-
is_url: bool = False,
|
|
551
|
-
) -> bool:
|
|
552
|
-
"""Validate file path and prepare collection for loading.
|
|
553
|
-
Args:
|
|
554
|
-
file_path (Path): Path to validate
|
|
555
|
-
allowed_formats (List[str]): List of allowed file formats
|
|
556
|
-
metadata (Optional[Dict[str, Any]]): Metadata to track
|
|
557
|
-
recreate (bool): Whether to recreate the collection
|
|
558
|
-
Returns:
|
|
559
|
-
bool: True if preparation succeeded, False otherwise
|
|
560
|
-
"""
|
|
561
|
-
# 1. Validate file path
|
|
562
|
-
if not is_url:
|
|
563
|
-
if not file_path.exists():
|
|
564
|
-
logger.error(f"File not found: {file_path}")
|
|
565
|
-
return False
|
|
566
|
-
|
|
567
|
-
if file_path.suffix not in allowed_formats: # type: ignore
|
|
568
|
-
logger.error(f"Unsupported file format: {file_path.suffix}")
|
|
569
|
-
return False
|
|
570
|
-
|
|
571
|
-
# 2. Track metadata
|
|
572
|
-
if metadata:
|
|
573
|
-
self._track_metadata_structure(metadata)
|
|
574
|
-
|
|
575
|
-
# 3. Prepare vector DB
|
|
576
|
-
self._load_init(recreate, upsert=False)
|
|
577
|
-
if self.vector_db is None:
|
|
578
|
-
return False
|
|
579
|
-
return True
|
|
580
|
-
|
|
581
|
-
async def aprepare_load(
|
|
582
|
-
self,
|
|
583
|
-
file_path: Path,
|
|
584
|
-
allowed_formats: List[str],
|
|
585
|
-
metadata: Optional[Dict[str, Any]] = None,
|
|
586
|
-
recreate: bool = False,
|
|
587
|
-
is_url: bool = False,
|
|
588
|
-
) -> bool:
|
|
589
|
-
"""Validate file path and prepare collection for loading.
|
|
590
|
-
Args:
|
|
591
|
-
file_path (Path): Path to validate
|
|
592
|
-
allowed_formats (List[str]): List of allowed file formats
|
|
593
|
-
metadata (Optional[Dict[str, Any]]): Metadata to track
|
|
594
|
-
recreate (bool): Whether to recreate the collection
|
|
595
|
-
Returns:
|
|
596
|
-
bool: True if preparation succeeded, False otherwise
|
|
597
|
-
"""
|
|
598
|
-
# 1. Validate file path
|
|
599
|
-
if not is_url:
|
|
600
|
-
if not file_path.exists():
|
|
601
|
-
logger.error(f"File not found: {file_path}")
|
|
602
|
-
return False
|
|
603
|
-
|
|
604
|
-
if file_path.suffix not in allowed_formats:
|
|
605
|
-
logger.error(f"Unsupported file format: {file_path.suffix}")
|
|
606
|
-
return False
|
|
607
|
-
|
|
608
|
-
# 2. Track metadata
|
|
609
|
-
if metadata:
|
|
610
|
-
self._track_metadata_structure(metadata)
|
|
611
|
-
|
|
612
|
-
# 3. Prepare vector DB
|
|
613
|
-
await self._aload_init(recreate, upsert=False)
|
|
614
|
-
if self.vector_db is None:
|
|
615
|
-
return False
|
|
616
|
-
return True
|
|
617
|
-
|
|
618
|
-
def process_documents(
|
|
619
|
-
self,
|
|
620
|
-
documents: List[Document],
|
|
621
|
-
metadata: Optional[Dict[str, Any]] = None,
|
|
622
|
-
upsert: bool = False,
|
|
623
|
-
skip_existing: bool = True,
|
|
624
|
-
source_info: str = "documents",
|
|
625
|
-
) -> None:
|
|
626
|
-
"""Process and load documents asynchronously.
|
|
627
|
-
Args:
|
|
628
|
-
documents (List[Document]): Documents to process
|
|
629
|
-
metadata (Optional[Dict[str, Any]]): Metadata to add to documents
|
|
630
|
-
upsert (bool): Whether to upsert documents
|
|
631
|
-
skip_existing (bool): Whether to skip existing documents
|
|
632
|
-
source_info (str): Information about document source for logging
|
|
633
|
-
"""
|
|
634
|
-
if not documents:
|
|
635
|
-
logger.warning(f"No documents were read from {source_info}")
|
|
636
|
-
return
|
|
637
|
-
|
|
638
|
-
log_info(f"Loading {len(documents)} documents from {source_info} with metadata: {metadata}")
|
|
639
|
-
|
|
640
|
-
self._upsert_warning(upsert)
|
|
641
|
-
|
|
642
|
-
# Decide loading strategy: upsert or insert (with optional skip)
|
|
643
|
-
if upsert and self.vector_db.upsert_available(): # type: ignore
|
|
644
|
-
log_debug(f"Upserting {len(documents)} documents.") # type: ignore
|
|
645
|
-
self.vector_db.upsert(documents=documents, filters=metadata) # type: ignore
|
|
646
|
-
else:
|
|
647
|
-
documents_to_insert = documents
|
|
648
|
-
if skip_existing:
|
|
649
|
-
log_debug("Filtering out existing documents before insertion.")
|
|
650
|
-
documents_to_insert = self.filter_existing_documents(documents)
|
|
651
|
-
|
|
652
|
-
if documents_to_insert: # type: ignore
|
|
653
|
-
# type: ignore
|
|
654
|
-
log_debug(f"Inserting {len(documents_to_insert)} new documents.")
|
|
655
|
-
self.vector_db.insert(documents=documents_to_insert, filters=metadata) # type: ignore
|
|
656
|
-
else:
|
|
657
|
-
log_info("No new documents to insert after filtering.")
|
|
658
|
-
|
|
659
|
-
log_info(f"Finished loading documents from {source_info}.")
|
|
660
|
-
|
|
661
|
-
async def aprocess_documents(
|
|
662
|
-
self,
|
|
663
|
-
documents: List[Document],
|
|
664
|
-
metadata: Optional[Dict[str, Any]] = None,
|
|
665
|
-
upsert: bool = False,
|
|
666
|
-
skip_existing: bool = True,
|
|
667
|
-
source_info: str = "documents",
|
|
668
|
-
) -> None:
|
|
669
|
-
"""Process and load documents asynchronously.
|
|
670
|
-
Args:
|
|
671
|
-
documents (List[Document]): Documents to process
|
|
672
|
-
metadata (Optional[Dict[str, Any]]): Metadata to add to documents
|
|
673
|
-
upsert (bool): Whether to upsert documents
|
|
674
|
-
skip_existing (bool): Whether to skip existing documents
|
|
675
|
-
source_info (str): Information about document source for logging
|
|
676
|
-
"""
|
|
677
|
-
if not documents:
|
|
678
|
-
logger.warning(f"No documents were read from {source_info}")
|
|
679
|
-
return
|
|
680
|
-
|
|
681
|
-
self._upsert_warning(upsert)
|
|
682
|
-
|
|
683
|
-
log_info(f"Loading {len(documents)} documents from {source_info} with metadata: {metadata}")
|
|
684
|
-
|
|
685
|
-
# Decide loading strategy: upsert or insert (with optional skip)
|
|
686
|
-
if upsert and self.vector_db.upsert_available(): # type: ignore
|
|
687
|
-
log_debug(f"Upserting {len(documents)} documents.")
|
|
688
|
-
# type: ignore
|
|
689
|
-
await self.vector_db.async_upsert(documents=documents, filters=metadata) # type: ignore
|
|
690
|
-
else:
|
|
691
|
-
documents_to_insert = documents
|
|
692
|
-
if skip_existing:
|
|
693
|
-
log_debug("Filtering out existing documents before insertion.")
|
|
694
|
-
documents_to_insert = await self.async_filter_existing_documents(documents)
|
|
695
|
-
|
|
696
|
-
if documents_to_insert: # type: ignore
|
|
697
|
-
log_debug(f"Inserting {len(documents_to_insert)} new documents.")
|
|
698
|
-
await self.vector_db.async_insert(documents=documents_to_insert, filters=metadata) # type: ignore
|
|
699
|
-
else:
|
|
700
|
-
log_info("No new documents to insert after filtering.")
|
|
701
|
-
|
|
702
|
-
log_info(f"Finished loading documents from {source_info}.")
|