agno 1.8.1__py3-none-any.whl → 2.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- agno/__init__.py +8 -0
- agno/agent/__init__.py +19 -27
- agno/agent/agent.py +3143 -4170
- agno/api/agent.py +11 -67
- agno/api/api.py +5 -46
- agno/api/evals.py +8 -19
- agno/api/os.py +17 -0
- agno/api/routes.py +6 -41
- agno/api/schemas/__init__.py +9 -0
- agno/api/schemas/agent.py +5 -21
- agno/api/schemas/evals.py +7 -16
- agno/api/schemas/os.py +14 -0
- agno/api/schemas/team.py +5 -21
- agno/api/schemas/utils.py +21 -0
- agno/api/schemas/workflows.py +11 -7
- agno/api/settings.py +53 -0
- agno/api/team.py +11 -66
- agno/api/workflow.py +28 -0
- agno/cloud/aws/base.py +214 -0
- agno/cloud/aws/s3/__init__.py +2 -0
- agno/cloud/aws/s3/api_client.py +43 -0
- agno/cloud/aws/s3/bucket.py +195 -0
- agno/cloud/aws/s3/object.py +57 -0
- agno/db/__init__.py +24 -0
- agno/db/base.py +245 -0
- agno/db/dynamo/__init__.py +3 -0
- agno/db/dynamo/dynamo.py +1743 -0
- agno/db/dynamo/schemas.py +278 -0
- agno/db/dynamo/utils.py +684 -0
- agno/db/firestore/__init__.py +3 -0
- agno/db/firestore/firestore.py +1432 -0
- agno/db/firestore/schemas.py +130 -0
- agno/db/firestore/utils.py +278 -0
- agno/db/gcs_json/__init__.py +3 -0
- agno/db/gcs_json/gcs_json_db.py +1001 -0
- agno/db/gcs_json/utils.py +194 -0
- agno/db/in_memory/__init__.py +3 -0
- agno/db/in_memory/in_memory_db.py +882 -0
- agno/db/in_memory/utils.py +172 -0
- agno/db/json/__init__.py +3 -0
- agno/db/json/json_db.py +1045 -0
- agno/db/json/utils.py +196 -0
- agno/db/migrations/v1_to_v2.py +162 -0
- agno/db/mongo/__init__.py +3 -0
- agno/db/mongo/mongo.py +1416 -0
- agno/db/mongo/schemas.py +77 -0
- agno/db/mongo/utils.py +204 -0
- agno/db/mysql/__init__.py +3 -0
- agno/db/mysql/mysql.py +1719 -0
- agno/db/mysql/schemas.py +124 -0
- agno/db/mysql/utils.py +297 -0
- agno/db/postgres/__init__.py +3 -0
- agno/db/postgres/postgres.py +1710 -0
- agno/db/postgres/schemas.py +124 -0
- agno/db/postgres/utils.py +280 -0
- agno/db/redis/__init__.py +3 -0
- agno/db/redis/redis.py +1367 -0
- agno/db/redis/schemas.py +109 -0
- agno/db/redis/utils.py +288 -0
- agno/db/schemas/__init__.py +3 -0
- agno/db/schemas/evals.py +33 -0
- agno/db/schemas/knowledge.py +40 -0
- agno/db/schemas/memory.py +46 -0
- agno/db/singlestore/__init__.py +3 -0
- agno/db/singlestore/schemas.py +116 -0
- agno/db/singlestore/singlestore.py +1712 -0
- agno/db/singlestore/utils.py +326 -0
- agno/db/sqlite/__init__.py +3 -0
- agno/db/sqlite/schemas.py +119 -0
- agno/db/sqlite/sqlite.py +1676 -0
- agno/db/sqlite/utils.py +268 -0
- agno/db/utils.py +88 -0
- agno/eval/__init__.py +14 -0
- agno/eval/accuracy.py +154 -48
- agno/eval/performance.py +88 -23
- agno/eval/reliability.py +73 -20
- agno/eval/utils.py +23 -13
- agno/integrations/discord/__init__.py +3 -0
- agno/{app → integrations}/discord/client.py +15 -11
- agno/knowledge/__init__.py +2 -2
- agno/{document → knowledge}/chunking/agentic.py +2 -2
- agno/{document → knowledge}/chunking/document.py +2 -2
- agno/{document → knowledge}/chunking/fixed.py +3 -3
- agno/{document → knowledge}/chunking/markdown.py +2 -2
- agno/{document → knowledge}/chunking/recursive.py +2 -2
- agno/{document → knowledge}/chunking/row.py +2 -2
- agno/knowledge/chunking/semantic.py +59 -0
- agno/knowledge/chunking/strategy.py +121 -0
- agno/knowledge/content.py +74 -0
- agno/knowledge/document/__init__.py +5 -0
- agno/{document → knowledge/document}/base.py +12 -2
- agno/knowledge/embedder/__init__.py +5 -0
- agno/{embedder → knowledge/embedder}/aws_bedrock.py +127 -1
- agno/{embedder → knowledge/embedder}/azure_openai.py +65 -1
- agno/{embedder → knowledge/embedder}/base.py +6 -0
- agno/{embedder → knowledge/embedder}/cohere.py +72 -1
- agno/{embedder → knowledge/embedder}/fastembed.py +17 -1
- agno/{embedder → knowledge/embedder}/fireworks.py +1 -1
- agno/{embedder → knowledge/embedder}/google.py +74 -1
- agno/{embedder → knowledge/embedder}/huggingface.py +36 -2
- agno/{embedder → knowledge/embedder}/jina.py +48 -2
- agno/knowledge/embedder/langdb.py +22 -0
- agno/knowledge/embedder/mistral.py +139 -0
- agno/{embedder → knowledge/embedder}/nebius.py +1 -1
- agno/{embedder → knowledge/embedder}/ollama.py +54 -3
- agno/knowledge/embedder/openai.py +223 -0
- agno/{embedder → knowledge/embedder}/sentence_transformer.py +16 -1
- agno/{embedder → knowledge/embedder}/together.py +1 -1
- agno/{embedder → knowledge/embedder}/voyageai.py +49 -1
- agno/knowledge/knowledge.py +1551 -0
- agno/knowledge/reader/__init__.py +7 -0
- agno/{document → knowledge}/reader/arxiv_reader.py +32 -4
- agno/knowledge/reader/base.py +88 -0
- agno/{document → knowledge}/reader/csv_reader.py +47 -65
- agno/knowledge/reader/docx_reader.py +83 -0
- agno/{document → knowledge}/reader/firecrawl_reader.py +42 -21
- agno/{document → knowledge}/reader/json_reader.py +30 -9
- agno/{document → knowledge}/reader/markdown_reader.py +58 -9
- agno/{document → knowledge}/reader/pdf_reader.py +71 -126
- agno/knowledge/reader/reader_factory.py +268 -0
- agno/knowledge/reader/s3_reader.py +101 -0
- agno/{document → knowledge}/reader/text_reader.py +31 -10
- agno/knowledge/reader/url_reader.py +128 -0
- agno/knowledge/reader/web_search_reader.py +366 -0
- agno/{document → knowledge}/reader/website_reader.py +37 -10
- agno/knowledge/reader/wikipedia_reader.py +59 -0
- agno/knowledge/reader/youtube_reader.py +78 -0
- agno/knowledge/remote_content/remote_content.py +88 -0
- agno/{reranker → knowledge/reranker}/base.py +1 -1
- agno/{reranker → knowledge/reranker}/cohere.py +2 -2
- agno/{reranker → knowledge/reranker}/infinity.py +2 -2
- agno/{reranker → knowledge/reranker}/sentence_transformer.py +2 -2
- agno/knowledge/types.py +30 -0
- agno/knowledge/utils.py +169 -0
- agno/media.py +269 -268
- agno/memory/__init__.py +2 -10
- agno/memory/manager.py +1003 -148
- agno/models/aimlapi/__init__.py +2 -2
- agno/models/aimlapi/aimlapi.py +6 -6
- agno/models/anthropic/claude.py +131 -131
- agno/models/aws/bedrock.py +110 -182
- agno/models/aws/claude.py +64 -18
- agno/models/azure/ai_foundry.py +73 -23
- agno/models/base.py +346 -290
- agno/models/cerebras/cerebras.py +84 -27
- agno/models/cohere/chat.py +106 -98
- agno/models/google/gemini.py +105 -46
- agno/models/groq/groq.py +97 -35
- agno/models/huggingface/huggingface.py +92 -27
- agno/models/ibm/watsonx.py +72 -13
- agno/models/litellm/chat.py +85 -13
- agno/models/message.py +46 -151
- agno/models/meta/llama.py +85 -49
- agno/models/metrics.py +120 -0
- agno/models/mistral/mistral.py +90 -21
- agno/models/ollama/__init__.py +0 -2
- agno/models/ollama/chat.py +85 -47
- agno/models/openai/chat.py +154 -37
- agno/models/openai/responses.py +178 -105
- agno/models/perplexity/perplexity.py +26 -2
- agno/models/portkey/portkey.py +0 -7
- agno/models/response.py +15 -9
- agno/models/utils.py +20 -0
- agno/models/vercel/__init__.py +2 -2
- agno/models/vercel/v0.py +1 -1
- agno/models/vllm/__init__.py +2 -2
- agno/models/vllm/vllm.py +3 -3
- agno/models/xai/xai.py +10 -10
- agno/os/__init__.py +3 -0
- agno/os/app.py +497 -0
- agno/os/auth.py +47 -0
- agno/os/config.py +103 -0
- agno/os/interfaces/agui/__init__.py +3 -0
- agno/os/interfaces/agui/agui.py +31 -0
- agno/{app/agui/async_router.py → os/interfaces/agui/router.py} +16 -16
- agno/{app → os/interfaces}/agui/utils.py +77 -33
- agno/os/interfaces/base.py +21 -0
- agno/os/interfaces/slack/__init__.py +3 -0
- agno/{app/slack/async_router.py → os/interfaces/slack/router.py} +3 -5
- agno/os/interfaces/slack/slack.py +32 -0
- agno/os/interfaces/whatsapp/__init__.py +3 -0
- agno/{app/whatsapp/async_router.py → os/interfaces/whatsapp/router.py} +4 -7
- agno/os/interfaces/whatsapp/whatsapp.py +29 -0
- agno/os/mcp.py +235 -0
- agno/os/router.py +1400 -0
- agno/os/routers/__init__.py +3 -0
- agno/os/routers/evals/__init__.py +3 -0
- agno/os/routers/evals/evals.py +393 -0
- agno/os/routers/evals/schemas.py +142 -0
- agno/os/routers/evals/utils.py +161 -0
- agno/os/routers/knowledge/__init__.py +3 -0
- agno/os/routers/knowledge/knowledge.py +850 -0
- agno/os/routers/knowledge/schemas.py +118 -0
- agno/os/routers/memory/__init__.py +3 -0
- agno/os/routers/memory/memory.py +410 -0
- agno/os/routers/memory/schemas.py +58 -0
- agno/os/routers/metrics/__init__.py +3 -0
- agno/os/routers/metrics/metrics.py +178 -0
- agno/os/routers/metrics/schemas.py +47 -0
- agno/os/routers/session/__init__.py +3 -0
- agno/os/routers/session/session.py +536 -0
- agno/os/schema.py +945 -0
- agno/{app/playground → os}/settings.py +7 -15
- agno/os/utils.py +270 -0
- agno/reasoning/azure_ai_foundry.py +4 -4
- agno/reasoning/deepseek.py +4 -4
- agno/reasoning/default.py +6 -11
- agno/reasoning/groq.py +4 -4
- agno/reasoning/helpers.py +4 -6
- agno/reasoning/ollama.py +4 -4
- agno/reasoning/openai.py +4 -4
- agno/run/agent.py +633 -0
- agno/run/base.py +53 -77
- agno/run/cancel.py +81 -0
- agno/run/team.py +243 -96
- agno/run/workflow.py +550 -12
- agno/session/__init__.py +10 -0
- agno/session/agent.py +244 -0
- agno/session/summary.py +225 -0
- agno/session/team.py +262 -0
- agno/{storage/session/v2 → session}/workflow.py +47 -24
- agno/team/__init__.py +15 -16
- agno/team/team.py +3260 -4824
- agno/tools/agentql.py +14 -5
- agno/tools/airflow.py +9 -4
- agno/tools/api.py +7 -3
- agno/tools/apify.py +2 -46
- agno/tools/arxiv.py +8 -3
- agno/tools/aws_lambda.py +7 -5
- agno/tools/aws_ses.py +7 -1
- agno/tools/baidusearch.py +4 -1
- agno/tools/bitbucket.py +4 -4
- agno/tools/brandfetch.py +14 -11
- agno/tools/bravesearch.py +4 -1
- agno/tools/brightdata.py +43 -23
- agno/tools/browserbase.py +13 -4
- agno/tools/calcom.py +12 -10
- agno/tools/calculator.py +10 -27
- agno/tools/cartesia.py +20 -17
- agno/tools/{clickup_tool.py → clickup.py} +12 -25
- agno/tools/confluence.py +8 -8
- agno/tools/crawl4ai.py +7 -1
- agno/tools/csv_toolkit.py +9 -8
- agno/tools/dalle.py +22 -12
- agno/tools/daytona.py +13 -16
- agno/tools/decorator.py +6 -3
- agno/tools/desi_vocal.py +17 -8
- agno/tools/discord.py +11 -8
- agno/tools/docker.py +30 -42
- agno/tools/duckdb.py +34 -53
- agno/tools/duckduckgo.py +8 -7
- agno/tools/e2b.py +62 -62
- agno/tools/eleven_labs.py +36 -29
- agno/tools/email.py +4 -1
- agno/tools/evm.py +7 -1
- agno/tools/exa.py +19 -14
- agno/tools/fal.py +30 -30
- agno/tools/file.py +9 -8
- agno/tools/financial_datasets.py +25 -44
- agno/tools/firecrawl.py +22 -22
- agno/tools/function.py +127 -18
- agno/tools/giphy.py +23 -11
- agno/tools/github.py +48 -126
- agno/tools/gmail.py +45 -61
- agno/tools/google_bigquery.py +7 -6
- agno/tools/google_maps.py +11 -26
- agno/tools/googlesearch.py +7 -2
- agno/tools/googlesheets.py +21 -17
- agno/tools/hackernews.py +9 -5
- agno/tools/jina.py +5 -4
- agno/tools/jira.py +18 -9
- agno/tools/knowledge.py +31 -32
- agno/tools/linear.py +19 -34
- agno/tools/linkup.py +5 -1
- agno/tools/local_file_system.py +8 -5
- agno/tools/lumalab.py +32 -20
- agno/tools/mcp.py +1 -2
- agno/tools/mem0.py +18 -12
- agno/tools/memori.py +14 -10
- agno/tools/mlx_transcribe.py +3 -2
- agno/tools/models/azure_openai.py +33 -15
- agno/tools/models/gemini.py +59 -32
- agno/tools/models/groq.py +30 -23
- agno/tools/models/nebius.py +28 -12
- agno/tools/models_labs.py +40 -16
- agno/tools/moviepy_video.py +7 -6
- agno/tools/neo4j.py +10 -8
- agno/tools/newspaper.py +7 -2
- agno/tools/newspaper4k.py +8 -3
- agno/tools/openai.py +58 -32
- agno/tools/openbb.py +12 -11
- agno/tools/opencv.py +63 -47
- agno/tools/openweather.py +14 -12
- agno/tools/pandas.py +11 -3
- agno/tools/postgres.py +4 -12
- agno/tools/pubmed.py +4 -1
- agno/tools/python.py +9 -22
- agno/tools/reasoning.py +35 -27
- agno/tools/reddit.py +11 -26
- agno/tools/replicate.py +55 -42
- agno/tools/resend.py +4 -1
- agno/tools/scrapegraph.py +15 -14
- agno/tools/searxng.py +10 -23
- agno/tools/serpapi.py +6 -3
- agno/tools/serper.py +13 -4
- agno/tools/shell.py +9 -2
- agno/tools/slack.py +12 -11
- agno/tools/sleep.py +3 -2
- agno/tools/spider.py +24 -4
- agno/tools/sql.py +7 -6
- agno/tools/tavily.py +6 -4
- agno/tools/telegram.py +12 -4
- agno/tools/todoist.py +11 -31
- agno/tools/toolkit.py +1 -1
- agno/tools/trafilatura.py +22 -6
- agno/tools/trello.py +9 -22
- agno/tools/twilio.py +10 -3
- agno/tools/user_control_flow.py +6 -1
- agno/tools/valyu.py +34 -5
- agno/tools/visualization.py +19 -28
- agno/tools/webbrowser.py +4 -3
- agno/tools/webex.py +11 -7
- agno/tools/website.py +15 -46
- agno/tools/webtools.py +12 -4
- agno/tools/whatsapp.py +5 -9
- agno/tools/wikipedia.py +20 -13
- agno/tools/x.py +14 -13
- agno/tools/yfinance.py +13 -40
- agno/tools/youtube.py +26 -20
- agno/tools/zendesk.py +7 -2
- agno/tools/zep.py +10 -7
- agno/tools/zoom.py +10 -9
- agno/utils/common.py +1 -19
- agno/utils/events.py +100 -123
- agno/utils/gemini.py +32 -2
- agno/utils/knowledge.py +29 -0
- agno/utils/log.py +54 -4
- agno/utils/mcp.py +68 -10
- agno/utils/media.py +39 -0
- agno/utils/message.py +12 -1
- agno/utils/models/aws_claude.py +1 -1
- agno/utils/models/claude.py +47 -4
- agno/utils/models/cohere.py +1 -1
- agno/utils/models/mistral.py +8 -7
- agno/utils/models/schema_utils.py +3 -3
- agno/utils/models/watsonx.py +1 -1
- agno/utils/openai.py +1 -1
- agno/utils/pprint.py +33 -32
- agno/utils/print_response/agent.py +779 -0
- agno/utils/print_response/team.py +1669 -0
- agno/utils/print_response/workflow.py +1451 -0
- agno/utils/prompts.py +14 -14
- agno/utils/reasoning.py +87 -0
- agno/utils/response.py +42 -42
- agno/utils/streamlit.py +481 -0
- agno/utils/string.py +8 -22
- agno/utils/team.py +50 -0
- agno/utils/timer.py +2 -2
- agno/vectordb/base.py +33 -21
- agno/vectordb/cassandra/cassandra.py +287 -23
- agno/vectordb/chroma/chromadb.py +482 -59
- agno/vectordb/clickhouse/clickhousedb.py +270 -63
- agno/vectordb/couchbase/couchbase.py +309 -29
- agno/vectordb/lancedb/lance_db.py +360 -21
- agno/vectordb/langchaindb/__init__.py +5 -0
- agno/vectordb/langchaindb/langchaindb.py +145 -0
- agno/vectordb/lightrag/__init__.py +5 -0
- agno/vectordb/lightrag/lightrag.py +374 -0
- agno/vectordb/llamaindex/llamaindexdb.py +127 -0
- agno/vectordb/milvus/milvus.py +242 -32
- agno/vectordb/mongodb/mongodb.py +200 -24
- agno/vectordb/pgvector/pgvector.py +319 -37
- agno/vectordb/pineconedb/pineconedb.py +221 -27
- agno/vectordb/qdrant/qdrant.py +334 -14
- agno/vectordb/singlestore/singlestore.py +286 -29
- agno/vectordb/surrealdb/surrealdb.py +187 -7
- agno/vectordb/upstashdb/upstashdb.py +342 -26
- agno/vectordb/weaviate/weaviate.py +227 -165
- agno/workflow/__init__.py +17 -13
- agno/workflow/{v2/condition.py → condition.py} +135 -32
- agno/workflow/{v2/loop.py → loop.py} +115 -28
- agno/workflow/{v2/parallel.py → parallel.py} +138 -108
- agno/workflow/{v2/router.py → router.py} +133 -32
- agno/workflow/{v2/step.py → step.py} +207 -49
- agno/workflow/{v2/steps.py → steps.py} +147 -66
- agno/workflow/types.py +482 -0
- agno/workflow/workflow.py +2410 -696
- agno-2.0.0.dist-info/METADATA +494 -0
- agno-2.0.0.dist-info/RECORD +515 -0
- agno-2.0.0.dist-info/licenses/LICENSE +201 -0
- agno/agent/metrics.py +0 -107
- agno/api/app.py +0 -35
- agno/api/playground.py +0 -92
- agno/api/schemas/app.py +0 -12
- agno/api/schemas/playground.py +0 -22
- agno/api/schemas/user.py +0 -35
- agno/api/schemas/workspace.py +0 -46
- agno/api/user.py +0 -160
- agno/api/workflows.py +0 -33
- agno/api/workspace.py +0 -175
- agno/app/agui/__init__.py +0 -3
- agno/app/agui/app.py +0 -17
- agno/app/agui/sync_router.py +0 -120
- agno/app/base.py +0 -186
- agno/app/discord/__init__.py +0 -3
- agno/app/fastapi/__init__.py +0 -3
- agno/app/fastapi/app.py +0 -107
- agno/app/fastapi/async_router.py +0 -457
- agno/app/fastapi/sync_router.py +0 -448
- agno/app/playground/app.py +0 -228
- agno/app/playground/async_router.py +0 -1050
- agno/app/playground/deploy.py +0 -249
- agno/app/playground/operator.py +0 -183
- agno/app/playground/schemas.py +0 -220
- agno/app/playground/serve.py +0 -55
- agno/app/playground/sync_router.py +0 -1042
- agno/app/playground/utils.py +0 -46
- agno/app/settings.py +0 -15
- agno/app/slack/__init__.py +0 -3
- agno/app/slack/app.py +0 -19
- agno/app/slack/sync_router.py +0 -92
- agno/app/utils.py +0 -54
- agno/app/whatsapp/__init__.py +0 -3
- agno/app/whatsapp/app.py +0 -15
- agno/app/whatsapp/sync_router.py +0 -197
- agno/cli/auth_server.py +0 -249
- agno/cli/config.py +0 -274
- agno/cli/console.py +0 -88
- agno/cli/credentials.py +0 -23
- agno/cli/entrypoint.py +0 -571
- agno/cli/operator.py +0 -357
- agno/cli/settings.py +0 -96
- agno/cli/ws/ws_cli.py +0 -817
- agno/constants.py +0 -13
- agno/document/__init__.py +0 -5
- agno/document/chunking/semantic.py +0 -45
- agno/document/chunking/strategy.py +0 -31
- agno/document/reader/__init__.py +0 -5
- agno/document/reader/base.py +0 -47
- agno/document/reader/docx_reader.py +0 -60
- agno/document/reader/gcs/pdf_reader.py +0 -44
- agno/document/reader/s3/pdf_reader.py +0 -59
- agno/document/reader/s3/text_reader.py +0 -63
- agno/document/reader/url_reader.py +0 -59
- agno/document/reader/youtube_reader.py +0 -58
- agno/embedder/__init__.py +0 -5
- agno/embedder/langdb.py +0 -80
- agno/embedder/mistral.py +0 -82
- agno/embedder/openai.py +0 -78
- agno/file/__init__.py +0 -5
- agno/file/file.py +0 -16
- agno/file/local/csv.py +0 -32
- agno/file/local/txt.py +0 -19
- agno/infra/app.py +0 -240
- agno/infra/base.py +0 -144
- agno/infra/context.py +0 -20
- agno/infra/db_app.py +0 -52
- agno/infra/resource.py +0 -205
- agno/infra/resources.py +0 -55
- agno/knowledge/agent.py +0 -702
- agno/knowledge/arxiv.py +0 -33
- agno/knowledge/combined.py +0 -36
- agno/knowledge/csv.py +0 -144
- agno/knowledge/csv_url.py +0 -124
- agno/knowledge/document.py +0 -223
- agno/knowledge/docx.py +0 -137
- agno/knowledge/firecrawl.py +0 -34
- agno/knowledge/gcs/__init__.py +0 -0
- agno/knowledge/gcs/base.py +0 -39
- agno/knowledge/gcs/pdf.py +0 -125
- agno/knowledge/json.py +0 -137
- agno/knowledge/langchain.py +0 -71
- agno/knowledge/light_rag.py +0 -273
- agno/knowledge/llamaindex.py +0 -66
- agno/knowledge/markdown.py +0 -154
- agno/knowledge/pdf.py +0 -164
- agno/knowledge/pdf_bytes.py +0 -42
- agno/knowledge/pdf_url.py +0 -148
- agno/knowledge/s3/__init__.py +0 -0
- agno/knowledge/s3/base.py +0 -64
- agno/knowledge/s3/pdf.py +0 -33
- agno/knowledge/s3/text.py +0 -34
- agno/knowledge/text.py +0 -141
- agno/knowledge/url.py +0 -46
- agno/knowledge/website.py +0 -179
- agno/knowledge/wikipedia.py +0 -32
- agno/knowledge/youtube.py +0 -35
- agno/memory/agent.py +0 -423
- agno/memory/classifier.py +0 -104
- agno/memory/db/__init__.py +0 -5
- agno/memory/db/base.py +0 -42
- agno/memory/db/mongodb.py +0 -189
- agno/memory/db/postgres.py +0 -203
- agno/memory/db/sqlite.py +0 -193
- agno/memory/memory.py +0 -22
- agno/memory/row.py +0 -36
- agno/memory/summarizer.py +0 -201
- agno/memory/summary.py +0 -19
- agno/memory/team.py +0 -415
- agno/memory/v2/__init__.py +0 -2
- agno/memory/v2/db/__init__.py +0 -1
- agno/memory/v2/db/base.py +0 -42
- agno/memory/v2/db/firestore.py +0 -339
- agno/memory/v2/db/mongodb.py +0 -196
- agno/memory/v2/db/postgres.py +0 -214
- agno/memory/v2/db/redis.py +0 -187
- agno/memory/v2/db/schema.py +0 -54
- agno/memory/v2/db/sqlite.py +0 -209
- agno/memory/v2/manager.py +0 -437
- agno/memory/v2/memory.py +0 -1097
- agno/memory/v2/schema.py +0 -55
- agno/memory/v2/summarizer.py +0 -215
- agno/memory/workflow.py +0 -38
- agno/models/ollama/tools.py +0 -430
- agno/models/qwen/__init__.py +0 -5
- agno/playground/__init__.py +0 -10
- agno/playground/deploy.py +0 -3
- agno/playground/playground.py +0 -3
- agno/playground/serve.py +0 -3
- agno/playground/settings.py +0 -3
- agno/reranker/__init__.py +0 -0
- agno/run/response.py +0 -467
- agno/run/v2/__init__.py +0 -0
- agno/run/v2/workflow.py +0 -567
- agno/storage/__init__.py +0 -0
- agno/storage/agent/__init__.py +0 -0
- agno/storage/agent/dynamodb.py +0 -1
- agno/storage/agent/json.py +0 -1
- agno/storage/agent/mongodb.py +0 -1
- agno/storage/agent/postgres.py +0 -1
- agno/storage/agent/singlestore.py +0 -1
- agno/storage/agent/sqlite.py +0 -1
- agno/storage/agent/yaml.py +0 -1
- agno/storage/base.py +0 -60
- agno/storage/dynamodb.py +0 -673
- agno/storage/firestore.py +0 -297
- agno/storage/gcs_json.py +0 -261
- agno/storage/in_memory.py +0 -234
- agno/storage/json.py +0 -237
- agno/storage/mongodb.py +0 -328
- agno/storage/mysql.py +0 -685
- agno/storage/postgres.py +0 -682
- agno/storage/redis.py +0 -336
- agno/storage/session/__init__.py +0 -16
- agno/storage/session/agent.py +0 -64
- agno/storage/session/team.py +0 -63
- agno/storage/session/v2/__init__.py +0 -5
- agno/storage/session/workflow.py +0 -61
- agno/storage/singlestore.py +0 -606
- agno/storage/sqlite.py +0 -646
- agno/storage/workflow/__init__.py +0 -0
- agno/storage/workflow/mongodb.py +0 -1
- agno/storage/workflow/postgres.py +0 -1
- agno/storage/workflow/sqlite.py +0 -1
- agno/storage/yaml.py +0 -241
- agno/tools/thinking.py +0 -73
- agno/utils/defaults.py +0 -57
- agno/utils/filesystem.py +0 -39
- agno/utils/git.py +0 -52
- agno/utils/json_io.py +0 -30
- agno/utils/load_env.py +0 -19
- agno/utils/py_io.py +0 -19
- agno/utils/pyproject.py +0 -18
- agno/utils/resource_filter.py +0 -31
- agno/workflow/v2/__init__.py +0 -21
- agno/workflow/v2/types.py +0 -357
- agno/workflow/v2/workflow.py +0 -3312
- agno/workspace/__init__.py +0 -0
- agno/workspace/config.py +0 -325
- agno/workspace/enums.py +0 -6
- agno/workspace/helpers.py +0 -52
- agno/workspace/operator.py +0 -757
- agno/workspace/settings.py +0 -158
- agno-1.8.1.dist-info/METADATA +0 -982
- agno-1.8.1.dist-info/RECORD +0 -566
- agno-1.8.1.dist-info/entry_points.txt +0 -3
- agno-1.8.1.dist-info/licenses/LICENSE +0 -375
- /agno/{app → db/migrations}/__init__.py +0 -0
- /agno/{app/playground/__init__.py → db/schemas/metrics.py} +0 -0
- /agno/{cli → integrations}/__init__.py +0 -0
- /agno/{cli/ws → knowledge/chunking}/__init__.py +0 -0
- /agno/{document/chunking → knowledge/remote_content}/__init__.py +0 -0
- /agno/{document/reader/gcs → knowledge/reranker}/__init__.py +0 -0
- /agno/{document/reader/s3 → os/interfaces}/__init__.py +0 -0
- /agno/{app → os/interfaces}/slack/security.py +0 -0
- /agno/{app → os/interfaces}/whatsapp/security.py +0 -0
- /agno/{file/local → utils/print_response}/__init__.py +0 -0
- /agno/{infra → vectordb/llamaindex}/__init__.py +0 -0
- {agno-1.8.1.dist-info → agno-2.0.0.dist-info}/WHEEL +0 -0
- {agno-1.8.1.dist-info → agno-2.0.0.dist-info}/top_level.txt +0 -0
|
@@ -1,27 +1,76 @@
|
|
|
1
1
|
import asyncio
|
|
2
2
|
import uuid
|
|
3
3
|
from pathlib import Path
|
|
4
|
-
from typing import IO, Any, List, Union
|
|
4
|
+
from typing import IO, Any, List, Optional, Union
|
|
5
5
|
|
|
6
|
-
from agno.
|
|
7
|
-
from agno.document.
|
|
6
|
+
from agno.knowledge.chunking.strategy import ChunkingStrategy, ChunkingStrategyType
|
|
7
|
+
from agno.knowledge.document.base import Document
|
|
8
|
+
from agno.knowledge.reader.base import Reader
|
|
9
|
+
from agno.knowledge.types import ContentType
|
|
8
10
|
from agno.utils.log import log_info, logger
|
|
9
11
|
|
|
12
|
+
DEFAULT_CHUNKER_STRATEGY: ChunkingStrategy
|
|
13
|
+
|
|
14
|
+
# Try to import MarkdownChunking, fallback to FixedSizeChunking if not available
|
|
15
|
+
try:
|
|
16
|
+
from agno.knowledge.chunking.markdown import MarkdownChunking
|
|
17
|
+
|
|
18
|
+
DEFAULT_CHUNKER_STRATEGY = MarkdownChunking()
|
|
19
|
+
MARKDOWN_CHUNKER_AVAILABLE = True
|
|
20
|
+
except ImportError:
|
|
21
|
+
from agno.knowledge.chunking.fixed import FixedSizeChunking
|
|
22
|
+
|
|
23
|
+
DEFAULT_CHUNKER_STRATEGY = FixedSizeChunking()
|
|
24
|
+
MARKDOWN_CHUNKER_AVAILABLE = False
|
|
25
|
+
|
|
10
26
|
|
|
11
27
|
class MarkdownReader(Reader):
|
|
12
28
|
"""Reader for Markdown files"""
|
|
13
29
|
|
|
14
|
-
|
|
30
|
+
@classmethod
|
|
31
|
+
def get_supported_chunking_strategies(self) -> List[ChunkingStrategyType]:
|
|
32
|
+
"""Get the list of supported chunking strategies for Markdown readers."""
|
|
33
|
+
strategies = [
|
|
34
|
+
ChunkingStrategyType.DOCUMENT_CHUNKER,
|
|
35
|
+
ChunkingStrategyType.AGENTIC_CHUNKER,
|
|
36
|
+
ChunkingStrategyType.RECURSIVE_CHUNKER,
|
|
37
|
+
ChunkingStrategyType.SEMANTIC_CHUNKER,
|
|
38
|
+
ChunkingStrategyType.FIXED_SIZE_CHUNKER,
|
|
39
|
+
]
|
|
40
|
+
|
|
41
|
+
# Only include MarkdownChunking if it's available
|
|
42
|
+
if MARKDOWN_CHUNKER_AVAILABLE:
|
|
43
|
+
strategies.insert(0, ChunkingStrategyType.MARKDOWN_CHUNKER)
|
|
44
|
+
|
|
45
|
+
return strategies
|
|
46
|
+
|
|
47
|
+
@classmethod
|
|
48
|
+
def get_supported_content_types(self) -> List[ContentType]:
|
|
49
|
+
return [ContentType.MARKDOWN]
|
|
50
|
+
|
|
51
|
+
def __init__(
|
|
52
|
+
self,
|
|
53
|
+
chunking_strategy: Optional[ChunkingStrategy] = None,
|
|
54
|
+
name: Optional[str] = None,
|
|
55
|
+
description: Optional[str] = None,
|
|
56
|
+
) -> None:
|
|
57
|
+
# Use the default chunking strategy if none provided
|
|
58
|
+
if chunking_strategy is None:
|
|
59
|
+
chunking_strategy = DEFAULT_CHUNKER_STRATEGY
|
|
60
|
+
|
|
61
|
+
super().__init__(chunking_strategy=chunking_strategy, name=name, description=description)
|
|
62
|
+
|
|
63
|
+
def read(self, file: Union[Path, IO[Any]], name: Optional[str] = None) -> List[Document]:
|
|
15
64
|
try:
|
|
16
65
|
if isinstance(file, Path):
|
|
17
66
|
if not file.exists():
|
|
18
67
|
raise FileNotFoundError(f"Could not find file: {file}")
|
|
19
68
|
log_info(f"Reading: {file}")
|
|
20
|
-
file_name = file.stem
|
|
69
|
+
file_name = name or file.stem
|
|
21
70
|
file_contents = file.read_text("utf-8")
|
|
22
71
|
else:
|
|
23
72
|
log_info(f"Reading uploaded file: {file.name}")
|
|
24
|
-
file_name = file.name.split(".")[0]
|
|
73
|
+
file_name = name or file.name.split(".")[0]
|
|
25
74
|
file.seek(0)
|
|
26
75
|
file_contents = file.read().decode("utf-8")
|
|
27
76
|
|
|
@@ -36,14 +85,14 @@ class MarkdownReader(Reader):
|
|
|
36
85
|
logger.error(f"Error reading: {file}: {e}")
|
|
37
86
|
return []
|
|
38
87
|
|
|
39
|
-
async def async_read(self, file: Union[Path, IO[Any]]) -> List[Document]:
|
|
88
|
+
async def async_read(self, file: Union[Path, IO[Any]], name: Optional[str] = None) -> List[Document]:
|
|
40
89
|
try:
|
|
41
90
|
if isinstance(file, Path):
|
|
42
91
|
if not file.exists():
|
|
43
92
|
raise FileNotFoundError(f"Could not find file: {file}")
|
|
44
93
|
|
|
45
94
|
log_info(f"Reading asynchronously: {file}")
|
|
46
|
-
file_name = file.stem
|
|
95
|
+
file_name = name or file.stem
|
|
47
96
|
|
|
48
97
|
try:
|
|
49
98
|
import aiofiles
|
|
@@ -55,7 +104,7 @@ class MarkdownReader(Reader):
|
|
|
55
104
|
file_contents = file.read_text("utf-8")
|
|
56
105
|
else:
|
|
57
106
|
log_info(f"Reading uploaded file asynchronously: {file.name}")
|
|
58
|
-
file_name = file.name.split(".")[0]
|
|
107
|
+
file_name = name or file.name.split(".")[0]
|
|
59
108
|
file.seek(0)
|
|
60
109
|
file_contents = file.read().decode("utf-8")
|
|
61
110
|
|
|
@@ -4,9 +4,10 @@ from pathlib import Path
|
|
|
4
4
|
from typing import IO, Any, List, Optional, Tuple, Union
|
|
5
5
|
from uuid import uuid4
|
|
6
6
|
|
|
7
|
-
from agno.
|
|
8
|
-
from agno.document.
|
|
9
|
-
from agno.
|
|
7
|
+
from agno.knowledge.chunking.strategy import ChunkingStrategyType
|
|
8
|
+
from agno.knowledge.document.base import Document
|
|
9
|
+
from agno.knowledge.reader.base import Reader
|
|
10
|
+
from agno.knowledge.types import ContentType
|
|
10
11
|
from agno.utils.log import log_error, log_info, logger
|
|
11
12
|
|
|
12
13
|
try:
|
|
@@ -95,9 +96,9 @@ def _clean_page_numbers(
|
|
|
95
96
|
Notes:
|
|
96
97
|
- The function scans for page numbers using a regular expression that matches digits at the start or end of a string.
|
|
97
98
|
- It evaluates several potential starting points for numbering (-2, -1, 0, 1, 2 shifts) to determine the most consistent sequence.
|
|
98
|
-
- If at least a specified ratio of pages (defined by `PAGE_NUMBERING_CORRECTNESS_RATIO_FOR_REMOVAL`) has correct sequential numbering,
|
|
99
|
+
- If at least a specified ratio of pages (defined by `PAGE_NUMBERING_CORRECTNESS_RATIO_FOR_REMOVAL`) has correct sequential numbering,
|
|
99
100
|
the page numbers are processed.
|
|
100
|
-
- If page numbers are found, the function will add formatted page numbers to each page's content if `page_start_numbering_format` or
|
|
101
|
+
- If page numbers are found, the function will add formatted page numbers to each page's content if `page_start_numbering_format` or
|
|
101
102
|
`page_end_numbering_format` is provided.
|
|
102
103
|
"""
|
|
103
104
|
assert len(extra_content) == 0 or len(extra_content) == len(page_content_list), (
|
|
@@ -190,8 +191,23 @@ class BasePDFReader(Reader):
|
|
|
190
191
|
self.page_end_numbering_format = page_end_numbering_format
|
|
191
192
|
self.password = password
|
|
192
193
|
|
|
194
|
+
if self.chunking_strategy is None:
|
|
195
|
+
from agno.knowledge.chunking.document import DocumentChunking
|
|
196
|
+
|
|
197
|
+
self.chunking_strategy = DocumentChunking(chunk_size=5000)
|
|
193
198
|
super().__init__(**kwargs)
|
|
194
199
|
|
|
200
|
+
@classmethod
|
|
201
|
+
def get_supported_chunking_strategies(self) -> List[ChunkingStrategyType]:
|
|
202
|
+
"""Get the list of supported chunking strategies for PDF readers."""
|
|
203
|
+
return [
|
|
204
|
+
ChunkingStrategyType.DOCUMENT_CHUNKER,
|
|
205
|
+
ChunkingStrategyType.FIXED_SIZE_CHUNKER,
|
|
206
|
+
ChunkingStrategyType.AGENTIC_CHUNKER,
|
|
207
|
+
ChunkingStrategyType.SEMANTIC_CHUNKER,
|
|
208
|
+
ChunkingStrategyType.RECURSIVE_CHUNKER,
|
|
209
|
+
]
|
|
210
|
+
|
|
195
211
|
def _build_chunked_documents(self, documents: List[Document]) -> List[Document]:
|
|
196
212
|
chunked_documents: List[Document] = []
|
|
197
213
|
for document in documents:
|
|
@@ -205,19 +221,19 @@ class BasePDFReader(Reader):
|
|
|
205
221
|
# Use provided password or fall back to instance password
|
|
206
222
|
pdf_password = password or self.password
|
|
207
223
|
if not pdf_password:
|
|
208
|
-
logger.error(f
|
|
224
|
+
logger.error(f'PDF file "{doc_name}" is password protected but no password provided')
|
|
209
225
|
return False
|
|
210
226
|
|
|
211
227
|
try:
|
|
212
228
|
decrypted_pdf = doc_reader.decrypt(pdf_password)
|
|
213
229
|
if decrypted_pdf:
|
|
214
|
-
log_info(f
|
|
230
|
+
log_info(f'Successfully decrypted PDF file "{doc_name}" with user password')
|
|
215
231
|
return True
|
|
216
232
|
else:
|
|
217
|
-
log_error(f
|
|
233
|
+
log_error(f'Failed to decrypt PDF file "{doc_name}": incorrect password')
|
|
218
234
|
return False
|
|
219
235
|
except Exception as e:
|
|
220
|
-
log_error(f
|
|
236
|
+
log_error(f'Error decrypting PDF file "{doc_name}": {e}')
|
|
221
237
|
return False
|
|
222
238
|
|
|
223
239
|
def _create_documents(self, pdf_content: List[str], doc_name: str, use_uuid_for_id: bool, page_number_shift):
|
|
@@ -306,9 +322,17 @@ class BasePDFReader(Reader):
|
|
|
306
322
|
class PDFReader(BasePDFReader):
|
|
307
323
|
"""Reader for PDF files"""
|
|
308
324
|
|
|
309
|
-
|
|
325
|
+
@classmethod
|
|
326
|
+
def get_supported_content_types(self) -> List[ContentType]:
|
|
327
|
+
return [ContentType.PDF]
|
|
328
|
+
|
|
329
|
+
def read(
|
|
330
|
+
self, pdf: Union[str, Path, IO[Any]], name: Optional[str] = None, password: Optional[str] = None
|
|
331
|
+
) -> List[Document]:
|
|
310
332
|
try:
|
|
311
|
-
if
|
|
333
|
+
if name:
|
|
334
|
+
doc_name = name
|
|
335
|
+
elif isinstance(pdf, str):
|
|
312
336
|
doc_name = pdf.split("/")[-1].split(".")[0].replace(" ", "_")
|
|
313
337
|
else:
|
|
314
338
|
doc_name = pdf.name.split(".")[0]
|
|
@@ -318,26 +342,18 @@ class PDFReader(BasePDFReader):
|
|
|
318
342
|
log_info(f"Reading: {doc_name}")
|
|
319
343
|
|
|
320
344
|
try:
|
|
321
|
-
|
|
345
|
+
DocumentReader(pdf)
|
|
322
346
|
except PdfStreamError as e:
|
|
323
347
|
logger.error(f"Error reading PDF: {e}")
|
|
324
348
|
return []
|
|
325
349
|
|
|
326
|
-
# Handle PDF decryption
|
|
327
|
-
if not self._decrypt_pdf(pdf_reader, doc_name, password):
|
|
328
|
-
return []
|
|
329
|
-
|
|
330
|
-
# Read and chunk.
|
|
331
|
-
return self._pdf_reader_to_documents(pdf_reader, doc_name, use_uuid_for_id=True)
|
|
332
|
-
|
|
333
|
-
async def async_read(self, pdf: Union[str, Path, IO[Any]], password: Optional[str] = None) -> List[Document]:
|
|
334
350
|
try:
|
|
335
351
|
if isinstance(pdf, str):
|
|
336
|
-
doc_name = pdf.split("/")[-1].split(".")[0].replace(" ", "_")
|
|
352
|
+
doc_name = name or pdf.split("/")[-1].split(".")[0].replace(" ", "_")
|
|
337
353
|
else:
|
|
338
|
-
doc_name = pdf.name.split(".")[0]
|
|
354
|
+
doc_name = name or pdf.name.split(".")[0]
|
|
339
355
|
except Exception:
|
|
340
|
-
doc_name = "pdf"
|
|
356
|
+
doc_name = name or "pdf"
|
|
341
357
|
|
|
342
358
|
log_info(f"Reading: {doc_name}")
|
|
343
359
|
|
|
@@ -351,73 +367,55 @@ class PDFReader(BasePDFReader):
|
|
|
351
367
|
if not self._decrypt_pdf(pdf_reader, doc_name, password):
|
|
352
368
|
return []
|
|
353
369
|
|
|
354
|
-
# Read and chunk
|
|
355
|
-
return
|
|
356
|
-
|
|
357
|
-
|
|
358
|
-
class PDFUrlReader(BasePDFReader):
|
|
359
|
-
"""Reader for PDF files from URL"""
|
|
360
|
-
|
|
361
|
-
def __init__(self, proxy: Optional[str] = None, password: Optional[str] = None, **kwargs):
|
|
362
|
-
super().__init__(password=password, **kwargs)
|
|
363
|
-
self.proxy = proxy
|
|
364
|
-
|
|
365
|
-
def read(self, url: str, password: Optional[str] = None) -> List[Document]:
|
|
366
|
-
if not url:
|
|
367
|
-
raise ValueError("No url provided")
|
|
368
|
-
|
|
369
|
-
from io import BytesIO
|
|
370
|
-
|
|
371
|
-
log_info(f"Reading: {url}")
|
|
372
|
-
|
|
373
|
-
# Retry the request up to 3 times with exponential backoff
|
|
374
|
-
response = fetch_with_retry(url, proxy=self.proxy)
|
|
375
|
-
|
|
376
|
-
doc_name = url.split("/")[-1].split(".")[0].replace("/", "_").replace(" ", "_")
|
|
377
|
-
pdf_reader = DocumentReader(BytesIO(response.content))
|
|
370
|
+
# Read and chunk
|
|
371
|
+
return self._pdf_reader_to_documents(pdf_reader, doc_name, use_uuid_for_id=True)
|
|
378
372
|
|
|
379
|
-
|
|
380
|
-
|
|
373
|
+
async def async_read(
|
|
374
|
+
self,
|
|
375
|
+
pdf: Optional[Union[str, Path, IO[Any]]] = None,
|
|
376
|
+
name: Optional[str] = None,
|
|
377
|
+
password: Optional[str] = None,
|
|
378
|
+
) -> List[Document]:
|
|
379
|
+
if pdf is None:
|
|
380
|
+
log_error("No pdf provided")
|
|
381
381
|
return []
|
|
382
382
|
|
|
383
|
-
|
|
384
|
-
|
|
385
|
-
|
|
386
|
-
|
|
387
|
-
|
|
388
|
-
|
|
389
|
-
|
|
390
|
-
from io import BytesIO
|
|
391
|
-
|
|
392
|
-
import httpx
|
|
393
|
-
|
|
394
|
-
log_info(f"Reading: {url}")
|
|
383
|
+
try:
|
|
384
|
+
if isinstance(pdf, str):
|
|
385
|
+
doc_name = name or pdf.split("/")[-1].split(".")[0].replace(" ", "_")
|
|
386
|
+
else:
|
|
387
|
+
doc_name = pdf.name.split(".")[0]
|
|
388
|
+
except Exception:
|
|
389
|
+
doc_name = name or "pdf"
|
|
395
390
|
|
|
396
|
-
|
|
397
|
-
async with httpx.AsyncClient(**client_args) as client: # type: ignore
|
|
398
|
-
response = await async_fetch_with_retry(url, client=client)
|
|
391
|
+
log_info(f"Reading: {doc_name}")
|
|
399
392
|
|
|
400
|
-
|
|
401
|
-
|
|
393
|
+
try:
|
|
394
|
+
pdf_reader = DocumentReader(pdf)
|
|
395
|
+
except PdfStreamError as e:
|
|
396
|
+
logger.error(f"Error reading PDF: {e}")
|
|
397
|
+
return []
|
|
402
398
|
|
|
403
399
|
# Handle PDF decryption
|
|
404
400
|
if not self._decrypt_pdf(pdf_reader, doc_name, password):
|
|
405
401
|
return []
|
|
406
402
|
|
|
407
403
|
# Read and chunk.
|
|
408
|
-
return await self._async_pdf_reader_to_documents(pdf_reader, doc_name, use_uuid_for_id=
|
|
404
|
+
return await self._async_pdf_reader_to_documents(pdf_reader, doc_name, use_uuid_for_id=True)
|
|
409
405
|
|
|
410
406
|
|
|
411
407
|
class PDFImageReader(BasePDFReader):
|
|
412
408
|
"""Reader for PDF files with text and images extraction"""
|
|
413
409
|
|
|
414
|
-
def read(
|
|
410
|
+
def read(
|
|
411
|
+
self, pdf: Union[str, Path, IO[Any]], name: Optional[str] = None, password: Optional[str] = None
|
|
412
|
+
) -> List[Document]:
|
|
415
413
|
if not pdf:
|
|
416
414
|
raise ValueError("No pdf provided")
|
|
417
415
|
|
|
418
416
|
try:
|
|
419
417
|
if isinstance(pdf, str):
|
|
420
|
-
doc_name = pdf.split("/")[-1].split(".")[0].replace(" ", "_")
|
|
418
|
+
doc_name = name or pdf.split("/")[-1].split(".")[0].replace(" ", "_")
|
|
421
419
|
else:
|
|
422
420
|
doc_name = pdf.name.split(".")[0]
|
|
423
421
|
except Exception:
|
|
@@ -433,13 +431,15 @@ class PDFImageReader(BasePDFReader):
|
|
|
433
431
|
# Read and chunk.
|
|
434
432
|
return self._pdf_reader_to_documents(pdf_reader, doc_name, read_images=True, use_uuid_for_id=False)
|
|
435
433
|
|
|
436
|
-
async def async_read(
|
|
434
|
+
async def async_read(
|
|
435
|
+
self, pdf: Union[str, Path, IO[Any]], name: Optional[str] = None, password: Optional[str] = None
|
|
436
|
+
) -> List[Document]:
|
|
437
437
|
if not pdf:
|
|
438
438
|
raise ValueError("No pdf provided")
|
|
439
439
|
|
|
440
440
|
try:
|
|
441
441
|
if isinstance(pdf, str):
|
|
442
|
-
doc_name = pdf.split("/")[-1].split(".")[0].replace(" ", "_")
|
|
442
|
+
doc_name = name or pdf.split("/")[-1].split(".")[0].replace(" ", "_")
|
|
443
443
|
else:
|
|
444
444
|
doc_name = pdf.name.split(".")[0]
|
|
445
445
|
except Exception:
|
|
@@ -454,58 +454,3 @@ class PDFImageReader(BasePDFReader):
|
|
|
454
454
|
|
|
455
455
|
# Read and chunk.
|
|
456
456
|
return await self._async_pdf_reader_to_documents(pdf_reader, doc_name, read_images=True, use_uuid_for_id=False)
|
|
457
|
-
|
|
458
|
-
|
|
459
|
-
class PDFUrlImageReader(BasePDFReader):
|
|
460
|
-
"""Reader for PDF files from URL with text and images extraction"""
|
|
461
|
-
|
|
462
|
-
def __init__(self, proxy: Optional[str] = None, password: Optional[str] = None, **kwargs):
|
|
463
|
-
super().__init__(password=password, **kwargs)
|
|
464
|
-
self.proxy = proxy
|
|
465
|
-
|
|
466
|
-
def read(self, url: str, password: Optional[str] = None) -> List[Document]:
|
|
467
|
-
if not url:
|
|
468
|
-
raise ValueError("No url provided")
|
|
469
|
-
|
|
470
|
-
from io import BytesIO
|
|
471
|
-
|
|
472
|
-
import httpx
|
|
473
|
-
|
|
474
|
-
# Read the PDF from the URL
|
|
475
|
-
log_info(f"Reading: {url}")
|
|
476
|
-
response = httpx.get(url, proxy=self.proxy) if self.proxy else httpx.get(url)
|
|
477
|
-
|
|
478
|
-
doc_name = url.split("/")[-1].split(".")[0].replace(" ", "_")
|
|
479
|
-
pdf_reader = DocumentReader(BytesIO(response.content))
|
|
480
|
-
|
|
481
|
-
# Handle PDF decryption
|
|
482
|
-
if not self._decrypt_pdf(pdf_reader, doc_name, password):
|
|
483
|
-
return []
|
|
484
|
-
|
|
485
|
-
# Read and chunk.
|
|
486
|
-
return self._pdf_reader_to_documents(pdf_reader, doc_name, read_images=True, use_uuid_for_id=False)
|
|
487
|
-
|
|
488
|
-
async def async_read(self, url: str, password: Optional[str] = None) -> List[Document]:
|
|
489
|
-
if not url:
|
|
490
|
-
raise ValueError("No url provided")
|
|
491
|
-
|
|
492
|
-
from io import BytesIO
|
|
493
|
-
|
|
494
|
-
import httpx
|
|
495
|
-
|
|
496
|
-
log_info(f"Reading: {url}")
|
|
497
|
-
|
|
498
|
-
client_args = {"proxy": self.proxy} if self.proxy else {}
|
|
499
|
-
async with httpx.AsyncClient(**client_args) as client: # type: ignore
|
|
500
|
-
response = await client.get(url)
|
|
501
|
-
response.raise_for_status()
|
|
502
|
-
|
|
503
|
-
doc_name = url.split("/")[-1].split(".")[0].replace(" ", "_")
|
|
504
|
-
pdf_reader = DocumentReader(BytesIO(response.content))
|
|
505
|
-
|
|
506
|
-
# Handle PDF decryption
|
|
507
|
-
if not self._decrypt_pdf(pdf_reader, doc_name, password):
|
|
508
|
-
return []
|
|
509
|
-
|
|
510
|
-
# Read and chunk.
|
|
511
|
-
return await self._async_pdf_reader_to_documents(pdf_reader, doc_name, read_images=True, use_uuid_for_id=False)
|
|
@@ -0,0 +1,268 @@
|
|
|
1
|
+
import os
|
|
2
|
+
from typing import Any, Callable, Dict, List, Optional
|
|
3
|
+
|
|
4
|
+
from agno.knowledge.reader.base import Reader
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
class ReaderFactory:
|
|
8
|
+
"""Factory for creating and managing document readers with lazy loading."""
|
|
9
|
+
|
|
10
|
+
# Cache for instantiated readers
|
|
11
|
+
_reader_cache: Dict[str, Reader] = {}
|
|
12
|
+
|
|
13
|
+
@classmethod
|
|
14
|
+
def _get_pdf_reader(cls, **kwargs) -> Reader:
|
|
15
|
+
"""Get PDF reader instance."""
|
|
16
|
+
from agno.knowledge.reader.pdf_reader import PDFReader
|
|
17
|
+
|
|
18
|
+
config: Dict[str, Any] = {
|
|
19
|
+
"chunk": True,
|
|
20
|
+
"chunk_size": 100,
|
|
21
|
+
"description": "Processes PDF documents with OCR support for images and text extraction",
|
|
22
|
+
}
|
|
23
|
+
config.update(kwargs)
|
|
24
|
+
return PDFReader(**config)
|
|
25
|
+
|
|
26
|
+
@classmethod
|
|
27
|
+
def _get_csv_reader(cls, **kwargs) -> Reader:
|
|
28
|
+
"""Get CSV reader instance."""
|
|
29
|
+
from agno.knowledge.reader.csv_reader import CSVReader
|
|
30
|
+
|
|
31
|
+
config: Dict[str, Any] = {
|
|
32
|
+
"name": "CSV Reader",
|
|
33
|
+
"description": "Parses CSV, XLSX, and XLS files with custom delimiter support",
|
|
34
|
+
}
|
|
35
|
+
config.update(kwargs)
|
|
36
|
+
return CSVReader(**config)
|
|
37
|
+
|
|
38
|
+
@classmethod
|
|
39
|
+
def _get_docx_reader(cls, **kwargs) -> Reader:
|
|
40
|
+
"""Get Docx reader instance."""
|
|
41
|
+
from agno.knowledge.reader.docx_reader import DocxReader
|
|
42
|
+
|
|
43
|
+
config: Dict[str, Any] = {
|
|
44
|
+
"name": "Docx Reader",
|
|
45
|
+
"description": "Extracts text content from Microsoft Word documents (.docx and .doc formats)",
|
|
46
|
+
}
|
|
47
|
+
config.update(kwargs)
|
|
48
|
+
return DocxReader(**config)
|
|
49
|
+
|
|
50
|
+
@classmethod
|
|
51
|
+
def _get_json_reader(cls, **kwargs) -> Reader:
|
|
52
|
+
"""Get JSON reader instance."""
|
|
53
|
+
from agno.knowledge.reader.json_reader import JSONReader
|
|
54
|
+
|
|
55
|
+
config: Dict[str, Any] = {
|
|
56
|
+
"name": "JSON Reader",
|
|
57
|
+
"description": "Processes JSON data structures and API responses with nested object handling",
|
|
58
|
+
}
|
|
59
|
+
config.update(kwargs)
|
|
60
|
+
return JSONReader(**config)
|
|
61
|
+
|
|
62
|
+
@classmethod
|
|
63
|
+
def _get_markdown_reader(cls, **kwargs) -> Reader:
|
|
64
|
+
"""Get Markdown reader instance."""
|
|
65
|
+
from agno.knowledge.reader.markdown_reader import MarkdownReader
|
|
66
|
+
|
|
67
|
+
config: Dict[str, Any] = {
|
|
68
|
+
"name": "Markdown Reader",
|
|
69
|
+
"description": "Processes Markdown documentation with header-aware chunking and formatting preservation",
|
|
70
|
+
}
|
|
71
|
+
config.update(kwargs)
|
|
72
|
+
return MarkdownReader(**config)
|
|
73
|
+
|
|
74
|
+
@classmethod
|
|
75
|
+
def _get_text_reader(cls, **kwargs) -> Reader:
|
|
76
|
+
"""Get Text reader instance."""
|
|
77
|
+
from agno.knowledge.reader.text_reader import TextReader
|
|
78
|
+
|
|
79
|
+
config: Dict[str, Any] = {
|
|
80
|
+
"name": "Text Reader",
|
|
81
|
+
"description": "Handles plain text files with customizable chunking strategies and encoding detection",
|
|
82
|
+
}
|
|
83
|
+
config.update(kwargs)
|
|
84
|
+
return TextReader(**config)
|
|
85
|
+
|
|
86
|
+
@classmethod
|
|
87
|
+
def _get_website_reader(cls, **kwargs) -> Reader:
|
|
88
|
+
"""Get Website reader instance."""
|
|
89
|
+
from agno.knowledge.reader.website_reader import WebsiteReader
|
|
90
|
+
|
|
91
|
+
config: Dict[str, Any] = {
|
|
92
|
+
"name": "Website Reader",
|
|
93
|
+
"description": "Scrapes and extracts content from web pages with HTML parsing and text cleaning",
|
|
94
|
+
}
|
|
95
|
+
config.update(kwargs)
|
|
96
|
+
return WebsiteReader(**config)
|
|
97
|
+
|
|
98
|
+
@classmethod
|
|
99
|
+
def _get_firecrawl_reader(cls, **kwargs) -> Reader:
|
|
100
|
+
"""Get Firecrawl reader instance."""
|
|
101
|
+
from agno.knowledge.reader.firecrawl_reader import FirecrawlReader
|
|
102
|
+
|
|
103
|
+
config: Dict[str, Any] = {
|
|
104
|
+
"api_key": kwargs.get("api_key") or os.getenv("FIRECRAWL_API_KEY"),
|
|
105
|
+
"mode": "crawl",
|
|
106
|
+
"name": "Firecrawl Reader",
|
|
107
|
+
"description": "Advanced web scraping and crawling with JavaScript rendering and structured data extraction",
|
|
108
|
+
}
|
|
109
|
+
config.update(kwargs)
|
|
110
|
+
return FirecrawlReader(**config)
|
|
111
|
+
|
|
112
|
+
@classmethod
|
|
113
|
+
def _get_youtube_reader(cls, **kwargs) -> Reader:
|
|
114
|
+
"""Get YouTube reader instance."""
|
|
115
|
+
from agno.knowledge.reader.youtube_reader import YouTubeReader
|
|
116
|
+
|
|
117
|
+
config: Dict[str, Any] = {
|
|
118
|
+
"name": "YouTube Reader",
|
|
119
|
+
"description": "Extracts transcripts and metadata from YouTube videos and playlists",
|
|
120
|
+
}
|
|
121
|
+
config.update(kwargs)
|
|
122
|
+
return YouTubeReader(**config)
|
|
123
|
+
|
|
124
|
+
@classmethod
|
|
125
|
+
def _get_arxiv_reader(cls, **kwargs) -> Reader:
|
|
126
|
+
"""Get Arxiv reader instance."""
|
|
127
|
+
from agno.knowledge.reader.arxiv_reader import ArxivReader
|
|
128
|
+
|
|
129
|
+
config: Dict[str, Any] = {
|
|
130
|
+
"name": "Arxiv Reader",
|
|
131
|
+
"description": "Downloads and processes academic papers from ArXiv with PDF parsing and metadata extraction",
|
|
132
|
+
}
|
|
133
|
+
config.update(kwargs)
|
|
134
|
+
return ArxivReader(**config)
|
|
135
|
+
|
|
136
|
+
@classmethod
|
|
137
|
+
def _get_wikipedia_reader(cls, **kwargs) -> Reader:
|
|
138
|
+
"""Get Wikipedia reader instance."""
|
|
139
|
+
from agno.knowledge.reader.wikipedia_reader import WikipediaReader
|
|
140
|
+
|
|
141
|
+
config: Dict[str, Any] = {
|
|
142
|
+
"name": "Wikipedia Reader",
|
|
143
|
+
"description": "Fetches and processes Wikipedia articles with section-aware chunking and link resolution",
|
|
144
|
+
}
|
|
145
|
+
config.update(kwargs)
|
|
146
|
+
return WikipediaReader(**config)
|
|
147
|
+
|
|
148
|
+
@classmethod
|
|
149
|
+
def _get_web_search_reader(cls, **kwargs) -> Reader:
|
|
150
|
+
"""Get Web Search reader instance."""
|
|
151
|
+
from agno.knowledge.reader.web_search_reader import WebSearchReader
|
|
152
|
+
|
|
153
|
+
config: Dict[str, Any] = {
|
|
154
|
+
"name": "Web Search Reader",
|
|
155
|
+
"description": "Executes web searches and processes results with relevance ranking and content extraction",
|
|
156
|
+
}
|
|
157
|
+
config.update(kwargs)
|
|
158
|
+
return WebSearchReader(**config)
|
|
159
|
+
|
|
160
|
+
@classmethod
|
|
161
|
+
def _get_reader_method(cls, reader_key: str) -> Callable[[], Reader]:
|
|
162
|
+
"""Get the appropriate reader method for the given key."""
|
|
163
|
+
method_name = f"_get_{reader_key}_reader"
|
|
164
|
+
if not hasattr(cls, method_name):
|
|
165
|
+
raise ValueError(f"Unknown reader: {reader_key}")
|
|
166
|
+
return getattr(cls, method_name)
|
|
167
|
+
|
|
168
|
+
@classmethod
|
|
169
|
+
def create_reader(cls, reader_key: str, **kwargs) -> Reader:
|
|
170
|
+
"""Create a reader instance with the given key and optional overrides."""
|
|
171
|
+
if reader_key in cls._reader_cache:
|
|
172
|
+
return cls._reader_cache[reader_key]
|
|
173
|
+
|
|
174
|
+
# Get the reader method and create the instance
|
|
175
|
+
reader_method = cls._get_reader_method(reader_key)
|
|
176
|
+
reader = reader_method(**kwargs)
|
|
177
|
+
|
|
178
|
+
# Cache the reader
|
|
179
|
+
cls._reader_cache[reader_key] = reader
|
|
180
|
+
|
|
181
|
+
return reader
|
|
182
|
+
|
|
183
|
+
@classmethod
|
|
184
|
+
def get_reader_for_extension(cls, extension: str) -> Reader:
|
|
185
|
+
"""Get the appropriate reader for a file extension."""
|
|
186
|
+
extension = extension.lower()
|
|
187
|
+
|
|
188
|
+
if extension in [".pdf", "application/pdf"]:
|
|
189
|
+
return cls.create_reader("pdf")
|
|
190
|
+
elif extension in [".csv", "text/csv"]:
|
|
191
|
+
return cls.create_reader("csv")
|
|
192
|
+
elif extension in [".docx", ".doc"]:
|
|
193
|
+
return cls.create_reader("docx")
|
|
194
|
+
elif extension == ".json":
|
|
195
|
+
return cls.create_reader("json")
|
|
196
|
+
elif extension in [".md", ".markdown"]:
|
|
197
|
+
return cls.create_reader("markdown")
|
|
198
|
+
elif extension in [".txt", ".text"]:
|
|
199
|
+
return cls.create_reader("text")
|
|
200
|
+
else:
|
|
201
|
+
# Default to text reader for unknown extensions
|
|
202
|
+
return cls.create_reader("text")
|
|
203
|
+
|
|
204
|
+
@classmethod
|
|
205
|
+
def get_reader_for_url(cls, url: str) -> Reader:
|
|
206
|
+
"""Get the appropriate reader for a URL."""
|
|
207
|
+
url_lower = url.lower()
|
|
208
|
+
|
|
209
|
+
# Check for YouTube URLs
|
|
210
|
+
if any(domain in url_lower for domain in ["youtube.com", "youtu.be"]):
|
|
211
|
+
return cls.create_reader("youtube")
|
|
212
|
+
|
|
213
|
+
# Default to URL reader
|
|
214
|
+
return cls.create_reader("url")
|
|
215
|
+
|
|
216
|
+
@classmethod
|
|
217
|
+
def get_all_reader_keys(cls) -> List[str]:
|
|
218
|
+
"""Get all available reader keys in priority order."""
|
|
219
|
+
# Extract reader keys from method names
|
|
220
|
+
|
|
221
|
+
PREFIX = "_get_"
|
|
222
|
+
SUFFIX = "_reader"
|
|
223
|
+
|
|
224
|
+
reader_keys = []
|
|
225
|
+
for attr_name in dir(cls):
|
|
226
|
+
if attr_name.startswith(PREFIX) and attr_name.endswith(SUFFIX):
|
|
227
|
+
reader_key = attr_name[len(PREFIX) : -len(SUFFIX)] # Remove "_get_" prefix and "_reader" suffix
|
|
228
|
+
reader_keys.append(reader_key)
|
|
229
|
+
|
|
230
|
+
# Define priority order for URL readers
|
|
231
|
+
url_reader_priority = ["url", "website", "firecrawl", "pdf_url", "csv_url", "youtube", "web_search"]
|
|
232
|
+
|
|
233
|
+
# Sort with URL readers in priority order, others alphabetically
|
|
234
|
+
def sort_key(reader_key):
|
|
235
|
+
if reader_key in url_reader_priority:
|
|
236
|
+
return (0, url_reader_priority.index(reader_key))
|
|
237
|
+
else:
|
|
238
|
+
return (1, reader_key)
|
|
239
|
+
|
|
240
|
+
reader_keys.sort(key=sort_key)
|
|
241
|
+
return reader_keys
|
|
242
|
+
|
|
243
|
+
@classmethod
|
|
244
|
+
def create_all_readers(cls) -> Dict[str, Reader]:
|
|
245
|
+
"""Create all readers and return them as a dictionary."""
|
|
246
|
+
readers = {}
|
|
247
|
+
for reader_key in cls.get_all_reader_keys():
|
|
248
|
+
readers[reader_key] = cls.create_reader(reader_key)
|
|
249
|
+
return readers
|
|
250
|
+
|
|
251
|
+
@classmethod
|
|
252
|
+
def clear_cache(cls):
|
|
253
|
+
"""Clear the reader cache."""
|
|
254
|
+
cls._reader_cache.clear()
|
|
255
|
+
|
|
256
|
+
@classmethod
|
|
257
|
+
def register_reader(
|
|
258
|
+
cls,
|
|
259
|
+
key: str,
|
|
260
|
+
reader_method,
|
|
261
|
+
name: str,
|
|
262
|
+
description: str,
|
|
263
|
+
extensions: Optional[List[str]] = None,
|
|
264
|
+
):
|
|
265
|
+
"""Register a new reader type."""
|
|
266
|
+
# Add the reader method to the class
|
|
267
|
+
method_name = f"_get_{key}_reader"
|
|
268
|
+
setattr(cls, method_name, classmethod(reader_method))
|