agno 1.8.2__py3-none-any.whl → 2.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- agno/agent/__init__.py +19 -27
- agno/agent/agent.py +3143 -4170
- agno/api/agent.py +11 -67
- agno/api/api.py +5 -46
- agno/api/evals.py +8 -19
- agno/api/os.py +17 -0
- agno/api/routes.py +6 -41
- agno/api/schemas/__init__.py +9 -0
- agno/api/schemas/agent.py +5 -21
- agno/api/schemas/evals.py +7 -16
- agno/api/schemas/os.py +14 -0
- agno/api/schemas/team.py +5 -21
- agno/api/schemas/utils.py +21 -0
- agno/api/schemas/workflows.py +11 -7
- agno/api/settings.py +53 -0
- agno/api/team.py +11 -66
- agno/api/workflow.py +28 -0
- agno/cloud/aws/base.py +214 -0
- agno/cloud/aws/s3/__init__.py +2 -0
- agno/cloud/aws/s3/api_client.py +43 -0
- agno/cloud/aws/s3/bucket.py +195 -0
- agno/cloud/aws/s3/object.py +57 -0
- agno/db/__init__.py +24 -0
- agno/db/base.py +245 -0
- agno/db/dynamo/__init__.py +3 -0
- agno/db/dynamo/dynamo.py +1743 -0
- agno/db/dynamo/schemas.py +278 -0
- agno/db/dynamo/utils.py +684 -0
- agno/db/firestore/__init__.py +3 -0
- agno/db/firestore/firestore.py +1432 -0
- agno/db/firestore/schemas.py +130 -0
- agno/db/firestore/utils.py +278 -0
- agno/db/gcs_json/__init__.py +3 -0
- agno/db/gcs_json/gcs_json_db.py +1001 -0
- agno/db/gcs_json/utils.py +194 -0
- agno/db/in_memory/__init__.py +3 -0
- agno/db/in_memory/in_memory_db.py +882 -0
- agno/db/in_memory/utils.py +172 -0
- agno/db/json/__init__.py +3 -0
- agno/db/json/json_db.py +1045 -0
- agno/db/json/utils.py +196 -0
- agno/db/migrations/v1_to_v2.py +162 -0
- agno/db/mongo/__init__.py +3 -0
- agno/db/mongo/mongo.py +1416 -0
- agno/db/mongo/schemas.py +77 -0
- agno/db/mongo/utils.py +204 -0
- agno/db/mysql/__init__.py +3 -0
- agno/db/mysql/mysql.py +1719 -0
- agno/db/mysql/schemas.py +124 -0
- agno/db/mysql/utils.py +297 -0
- agno/db/postgres/__init__.py +3 -0
- agno/db/postgres/postgres.py +1710 -0
- agno/db/postgres/schemas.py +124 -0
- agno/db/postgres/utils.py +280 -0
- agno/db/redis/__init__.py +3 -0
- agno/db/redis/redis.py +1367 -0
- agno/db/redis/schemas.py +109 -0
- agno/db/redis/utils.py +288 -0
- agno/db/schemas/__init__.py +3 -0
- agno/db/schemas/evals.py +33 -0
- agno/db/schemas/knowledge.py +40 -0
- agno/db/schemas/memory.py +46 -0
- agno/db/singlestore/__init__.py +3 -0
- agno/db/singlestore/schemas.py +116 -0
- agno/db/singlestore/singlestore.py +1712 -0
- agno/db/singlestore/utils.py +326 -0
- agno/db/sqlite/__init__.py +3 -0
- agno/db/sqlite/schemas.py +119 -0
- agno/db/sqlite/sqlite.py +1676 -0
- agno/db/sqlite/utils.py +268 -0
- agno/db/utils.py +88 -0
- agno/eval/__init__.py +14 -0
- agno/eval/accuracy.py +154 -48
- agno/eval/performance.py +88 -23
- agno/eval/reliability.py +73 -20
- agno/eval/utils.py +23 -13
- agno/integrations/discord/__init__.py +3 -0
- agno/{app → integrations}/discord/client.py +10 -10
- agno/knowledge/__init__.py +2 -2
- agno/{document → knowledge}/chunking/agentic.py +2 -2
- agno/{document → knowledge}/chunking/document.py +2 -2
- agno/{document → knowledge}/chunking/fixed.py +3 -3
- agno/{document → knowledge}/chunking/markdown.py +2 -2
- agno/{document → knowledge}/chunking/recursive.py +2 -2
- agno/{document → knowledge}/chunking/row.py +2 -2
- agno/knowledge/chunking/semantic.py +59 -0
- agno/knowledge/chunking/strategy.py +121 -0
- agno/knowledge/content.py +74 -0
- agno/knowledge/document/__init__.py +5 -0
- agno/{document → knowledge/document}/base.py +12 -2
- agno/knowledge/embedder/__init__.py +5 -0
- agno/{embedder → knowledge/embedder}/aws_bedrock.py +127 -1
- agno/{embedder → knowledge/embedder}/azure_openai.py +65 -1
- agno/{embedder → knowledge/embedder}/base.py +6 -0
- agno/{embedder → knowledge/embedder}/cohere.py +72 -1
- agno/{embedder → knowledge/embedder}/fastembed.py +17 -1
- agno/{embedder → knowledge/embedder}/fireworks.py +1 -1
- agno/{embedder → knowledge/embedder}/google.py +74 -1
- agno/{embedder → knowledge/embedder}/huggingface.py +36 -2
- agno/{embedder → knowledge/embedder}/jina.py +48 -2
- agno/knowledge/embedder/langdb.py +22 -0
- agno/knowledge/embedder/mistral.py +139 -0
- agno/{embedder → knowledge/embedder}/nebius.py +1 -1
- agno/{embedder → knowledge/embedder}/ollama.py +54 -3
- agno/knowledge/embedder/openai.py +223 -0
- agno/{embedder → knowledge/embedder}/sentence_transformer.py +16 -1
- agno/{embedder → knowledge/embedder}/together.py +1 -1
- agno/{embedder → knowledge/embedder}/voyageai.py +49 -1
- agno/knowledge/knowledge.py +1551 -0
- agno/knowledge/reader/__init__.py +7 -0
- agno/{document → knowledge}/reader/arxiv_reader.py +32 -4
- agno/knowledge/reader/base.py +88 -0
- agno/{document → knowledge}/reader/csv_reader.py +47 -65
- agno/knowledge/reader/docx_reader.py +83 -0
- agno/{document → knowledge}/reader/firecrawl_reader.py +42 -21
- agno/{document → knowledge}/reader/json_reader.py +30 -9
- agno/{document → knowledge}/reader/markdown_reader.py +58 -9
- agno/{document → knowledge}/reader/pdf_reader.py +71 -126
- agno/knowledge/reader/reader_factory.py +268 -0
- agno/knowledge/reader/s3_reader.py +101 -0
- agno/{document → knowledge}/reader/text_reader.py +31 -10
- agno/knowledge/reader/url_reader.py +128 -0
- agno/knowledge/reader/web_search_reader.py +366 -0
- agno/{document → knowledge}/reader/website_reader.py +37 -10
- agno/knowledge/reader/wikipedia_reader.py +59 -0
- agno/knowledge/reader/youtube_reader.py +78 -0
- agno/knowledge/remote_content/remote_content.py +88 -0
- agno/{reranker → knowledge/reranker}/base.py +1 -1
- agno/{reranker → knowledge/reranker}/cohere.py +2 -2
- agno/{reranker → knowledge/reranker}/infinity.py +2 -2
- agno/{reranker → knowledge/reranker}/sentence_transformer.py +2 -2
- agno/knowledge/types.py +30 -0
- agno/knowledge/utils.py +169 -0
- agno/media.py +269 -268
- agno/memory/__init__.py +2 -10
- agno/memory/manager.py +1003 -148
- agno/models/aimlapi/__init__.py +2 -2
- agno/models/aimlapi/aimlapi.py +6 -6
- agno/models/anthropic/claude.py +128 -72
- agno/models/aws/bedrock.py +107 -175
- agno/models/aws/claude.py +64 -18
- agno/models/azure/ai_foundry.py +73 -23
- agno/models/base.py +346 -290
- agno/models/cerebras/cerebras.py +84 -27
- agno/models/cohere/chat.py +106 -98
- agno/models/google/gemini.py +105 -46
- agno/models/groq/groq.py +97 -35
- agno/models/huggingface/huggingface.py +92 -27
- agno/models/ibm/watsonx.py +72 -13
- agno/models/litellm/chat.py +85 -13
- agno/models/message.py +46 -151
- agno/models/meta/llama.py +85 -49
- agno/models/metrics.py +120 -0
- agno/models/mistral/mistral.py +90 -21
- agno/models/ollama/__init__.py +0 -2
- agno/models/ollama/chat.py +85 -47
- agno/models/openai/chat.py +154 -37
- agno/models/openai/responses.py +178 -105
- agno/models/perplexity/perplexity.py +26 -2
- agno/models/portkey/portkey.py +0 -7
- agno/models/response.py +15 -9
- agno/models/utils.py +20 -0
- agno/models/vercel/__init__.py +2 -2
- agno/models/vercel/v0.py +1 -1
- agno/models/vllm/__init__.py +2 -2
- agno/models/vllm/vllm.py +3 -3
- agno/models/xai/xai.py +10 -10
- agno/os/__init__.py +3 -0
- agno/os/app.py +497 -0
- agno/os/auth.py +47 -0
- agno/os/config.py +103 -0
- agno/os/interfaces/agui/__init__.py +3 -0
- agno/os/interfaces/agui/agui.py +31 -0
- agno/{app/agui/async_router.py → os/interfaces/agui/router.py} +16 -16
- agno/{app → os/interfaces}/agui/utils.py +65 -28
- agno/os/interfaces/base.py +21 -0
- agno/os/interfaces/slack/__init__.py +3 -0
- agno/{app/slack/async_router.py → os/interfaces/slack/router.py} +3 -5
- agno/os/interfaces/slack/slack.py +32 -0
- agno/os/interfaces/whatsapp/__init__.py +3 -0
- agno/{app/whatsapp/async_router.py → os/interfaces/whatsapp/router.py} +4 -7
- agno/os/interfaces/whatsapp/whatsapp.py +29 -0
- agno/os/mcp.py +235 -0
- agno/os/router.py +1400 -0
- agno/os/routers/__init__.py +3 -0
- agno/os/routers/evals/__init__.py +3 -0
- agno/os/routers/evals/evals.py +393 -0
- agno/os/routers/evals/schemas.py +142 -0
- agno/os/routers/evals/utils.py +161 -0
- agno/os/routers/knowledge/__init__.py +3 -0
- agno/os/routers/knowledge/knowledge.py +850 -0
- agno/os/routers/knowledge/schemas.py +118 -0
- agno/os/routers/memory/__init__.py +3 -0
- agno/os/routers/memory/memory.py +410 -0
- agno/os/routers/memory/schemas.py +58 -0
- agno/os/routers/metrics/__init__.py +3 -0
- agno/os/routers/metrics/metrics.py +178 -0
- agno/os/routers/metrics/schemas.py +47 -0
- agno/os/routers/session/__init__.py +3 -0
- agno/os/routers/session/session.py +536 -0
- agno/os/schema.py +945 -0
- agno/{app/playground → os}/settings.py +7 -15
- agno/os/utils.py +270 -0
- agno/reasoning/azure_ai_foundry.py +4 -4
- agno/reasoning/deepseek.py +4 -4
- agno/reasoning/default.py +6 -11
- agno/reasoning/groq.py +4 -4
- agno/reasoning/helpers.py +4 -6
- agno/reasoning/ollama.py +4 -4
- agno/reasoning/openai.py +4 -4
- agno/run/agent.py +633 -0
- agno/run/base.py +53 -77
- agno/run/cancel.py +81 -0
- agno/run/team.py +243 -96
- agno/run/workflow.py +550 -12
- agno/session/__init__.py +10 -0
- agno/session/agent.py +244 -0
- agno/session/summary.py +225 -0
- agno/session/team.py +262 -0
- agno/{storage/session/v2 → session}/workflow.py +47 -24
- agno/team/__init__.py +15 -16
- agno/team/team.py +3260 -4824
- agno/tools/agentql.py +14 -5
- agno/tools/airflow.py +9 -4
- agno/tools/api.py +7 -3
- agno/tools/apify.py +2 -46
- agno/tools/arxiv.py +8 -3
- agno/tools/aws_lambda.py +7 -5
- agno/tools/aws_ses.py +7 -1
- agno/tools/baidusearch.py +4 -1
- agno/tools/bitbucket.py +4 -4
- agno/tools/brandfetch.py +14 -11
- agno/tools/bravesearch.py +4 -1
- agno/tools/brightdata.py +43 -23
- agno/tools/browserbase.py +13 -4
- agno/tools/calcom.py +12 -10
- agno/tools/calculator.py +10 -27
- agno/tools/cartesia.py +20 -17
- agno/tools/{clickup_tool.py → clickup.py} +12 -25
- agno/tools/confluence.py +8 -8
- agno/tools/crawl4ai.py +7 -1
- agno/tools/csv_toolkit.py +9 -8
- agno/tools/dalle.py +22 -12
- agno/tools/daytona.py +13 -16
- agno/tools/decorator.py +6 -3
- agno/tools/desi_vocal.py +17 -8
- agno/tools/discord.py +11 -8
- agno/tools/docker.py +30 -42
- agno/tools/duckdb.py +34 -53
- agno/tools/duckduckgo.py +8 -7
- agno/tools/e2b.py +62 -62
- agno/tools/eleven_labs.py +36 -29
- agno/tools/email.py +4 -1
- agno/tools/evm.py +7 -1
- agno/tools/exa.py +19 -14
- agno/tools/fal.py +30 -30
- agno/tools/file.py +9 -8
- agno/tools/financial_datasets.py +25 -44
- agno/tools/firecrawl.py +17 -18
- agno/tools/function.py +127 -18
- agno/tools/giphy.py +23 -11
- agno/tools/github.py +48 -126
- agno/tools/gmail.py +45 -61
- agno/tools/google_bigquery.py +7 -6
- agno/tools/google_maps.py +11 -26
- agno/tools/googlesearch.py +7 -2
- agno/tools/googlesheets.py +21 -17
- agno/tools/hackernews.py +9 -5
- agno/tools/jina.py +5 -4
- agno/tools/jira.py +18 -9
- agno/tools/knowledge.py +31 -32
- agno/tools/linear.py +18 -33
- agno/tools/linkup.py +5 -1
- agno/tools/local_file_system.py +8 -5
- agno/tools/lumalab.py +32 -20
- agno/tools/mcp.py +1 -2
- agno/tools/mem0.py +18 -12
- agno/tools/memori.py +14 -10
- agno/tools/mlx_transcribe.py +3 -2
- agno/tools/models/azure_openai.py +33 -15
- agno/tools/models/gemini.py +59 -32
- agno/tools/models/groq.py +30 -23
- agno/tools/models/nebius.py +28 -12
- agno/tools/models_labs.py +40 -16
- agno/tools/moviepy_video.py +7 -6
- agno/tools/neo4j.py +10 -8
- agno/tools/newspaper.py +7 -2
- agno/tools/newspaper4k.py +8 -3
- agno/tools/openai.py +58 -32
- agno/tools/openbb.py +12 -11
- agno/tools/opencv.py +63 -47
- agno/tools/openweather.py +14 -12
- agno/tools/pandas.py +11 -3
- agno/tools/postgres.py +4 -12
- agno/tools/pubmed.py +4 -1
- agno/tools/python.py +9 -22
- agno/tools/reasoning.py +35 -27
- agno/tools/reddit.py +11 -26
- agno/tools/replicate.py +55 -42
- agno/tools/resend.py +4 -1
- agno/tools/scrapegraph.py +15 -14
- agno/tools/searxng.py +10 -23
- agno/tools/serpapi.py +6 -3
- agno/tools/serper.py +13 -4
- agno/tools/shell.py +9 -2
- agno/tools/slack.py +12 -11
- agno/tools/sleep.py +3 -2
- agno/tools/spider.py +24 -4
- agno/tools/sql.py +7 -6
- agno/tools/tavily.py +6 -4
- agno/tools/telegram.py +12 -4
- agno/tools/todoist.py +11 -31
- agno/tools/toolkit.py +1 -1
- agno/tools/trafilatura.py +22 -6
- agno/tools/trello.py +9 -22
- agno/tools/twilio.py +10 -3
- agno/tools/user_control_flow.py +6 -1
- agno/tools/valyu.py +34 -5
- agno/tools/visualization.py +19 -28
- agno/tools/webbrowser.py +4 -3
- agno/tools/webex.py +11 -7
- agno/tools/website.py +15 -46
- agno/tools/webtools.py +12 -4
- agno/tools/whatsapp.py +5 -9
- agno/tools/wikipedia.py +20 -13
- agno/tools/x.py +14 -13
- agno/tools/yfinance.py +13 -40
- agno/tools/youtube.py +26 -20
- agno/tools/zendesk.py +7 -2
- agno/tools/zep.py +10 -7
- agno/tools/zoom.py +10 -9
- agno/utils/common.py +1 -19
- agno/utils/events.py +100 -123
- agno/utils/gemini.py +1 -1
- agno/utils/knowledge.py +29 -0
- agno/utils/log.py +54 -4
- agno/utils/mcp.py +68 -10
- agno/utils/media.py +39 -0
- agno/utils/message.py +12 -1
- agno/utils/models/aws_claude.py +1 -1
- agno/utils/models/claude.py +6 -12
- agno/utils/models/cohere.py +1 -1
- agno/utils/models/mistral.py +8 -7
- agno/utils/models/schema_utils.py +3 -3
- agno/utils/models/watsonx.py +1 -1
- agno/utils/openai.py +1 -1
- agno/utils/pprint.py +33 -32
- agno/utils/print_response/agent.py +779 -0
- agno/utils/print_response/team.py +1669 -0
- agno/utils/print_response/workflow.py +1451 -0
- agno/utils/prompts.py +14 -14
- agno/utils/reasoning.py +87 -0
- agno/utils/response.py +42 -42
- agno/utils/streamlit.py +481 -0
- agno/utils/string.py +8 -22
- agno/utils/team.py +50 -0
- agno/utils/timer.py +2 -2
- agno/vectordb/base.py +33 -21
- agno/vectordb/cassandra/cassandra.py +287 -23
- agno/vectordb/chroma/chromadb.py +482 -59
- agno/vectordb/clickhouse/clickhousedb.py +270 -63
- agno/vectordb/couchbase/couchbase.py +309 -29
- agno/vectordb/lancedb/lance_db.py +360 -21
- agno/vectordb/langchaindb/__init__.py +5 -0
- agno/vectordb/langchaindb/langchaindb.py +145 -0
- agno/vectordb/lightrag/__init__.py +5 -0
- agno/vectordb/lightrag/lightrag.py +374 -0
- agno/vectordb/llamaindex/llamaindexdb.py +127 -0
- agno/vectordb/milvus/milvus.py +242 -32
- agno/vectordb/mongodb/mongodb.py +200 -24
- agno/vectordb/pgvector/pgvector.py +319 -37
- agno/vectordb/pineconedb/pineconedb.py +221 -27
- agno/vectordb/qdrant/qdrant.py +334 -14
- agno/vectordb/singlestore/singlestore.py +286 -29
- agno/vectordb/surrealdb/surrealdb.py +187 -7
- agno/vectordb/upstashdb/upstashdb.py +342 -26
- agno/vectordb/weaviate/weaviate.py +227 -165
- agno/workflow/__init__.py +17 -13
- agno/workflow/{v2/condition.py → condition.py} +135 -32
- agno/workflow/{v2/loop.py → loop.py} +115 -28
- agno/workflow/{v2/parallel.py → parallel.py} +138 -108
- agno/workflow/{v2/router.py → router.py} +133 -32
- agno/workflow/{v2/step.py → step.py} +207 -49
- agno/workflow/{v2/steps.py → steps.py} +147 -66
- agno/workflow/types.py +482 -0
- agno/workflow/workflow.py +2410 -696
- agno-2.0.0.dist-info/METADATA +494 -0
- agno-2.0.0.dist-info/RECORD +515 -0
- agno-2.0.0.dist-info/licenses/LICENSE +201 -0
- agno/agent/metrics.py +0 -110
- agno/api/app.py +0 -35
- agno/api/playground.py +0 -92
- agno/api/schemas/app.py +0 -12
- agno/api/schemas/playground.py +0 -22
- agno/api/schemas/user.py +0 -35
- agno/api/schemas/workspace.py +0 -46
- agno/api/user.py +0 -160
- agno/api/workflows.py +0 -33
- agno/api/workspace.py +0 -175
- agno/app/agui/__init__.py +0 -3
- agno/app/agui/app.py +0 -17
- agno/app/agui/sync_router.py +0 -120
- agno/app/base.py +0 -186
- agno/app/discord/__init__.py +0 -3
- agno/app/fastapi/__init__.py +0 -3
- agno/app/fastapi/app.py +0 -107
- agno/app/fastapi/async_router.py +0 -457
- agno/app/fastapi/sync_router.py +0 -448
- agno/app/playground/app.py +0 -228
- agno/app/playground/async_router.py +0 -1053
- agno/app/playground/deploy.py +0 -249
- agno/app/playground/operator.py +0 -183
- agno/app/playground/schemas.py +0 -223
- agno/app/playground/serve.py +0 -55
- agno/app/playground/sync_router.py +0 -1045
- agno/app/playground/utils.py +0 -46
- agno/app/settings.py +0 -15
- agno/app/slack/__init__.py +0 -3
- agno/app/slack/app.py +0 -19
- agno/app/slack/sync_router.py +0 -92
- agno/app/utils.py +0 -54
- agno/app/whatsapp/__init__.py +0 -3
- agno/app/whatsapp/app.py +0 -15
- agno/app/whatsapp/sync_router.py +0 -197
- agno/cli/auth_server.py +0 -249
- agno/cli/config.py +0 -274
- agno/cli/console.py +0 -88
- agno/cli/credentials.py +0 -23
- agno/cli/entrypoint.py +0 -571
- agno/cli/operator.py +0 -357
- agno/cli/settings.py +0 -96
- agno/cli/ws/ws_cli.py +0 -817
- agno/constants.py +0 -13
- agno/document/__init__.py +0 -5
- agno/document/chunking/semantic.py +0 -45
- agno/document/chunking/strategy.py +0 -31
- agno/document/reader/__init__.py +0 -5
- agno/document/reader/base.py +0 -47
- agno/document/reader/docx_reader.py +0 -60
- agno/document/reader/gcs/pdf_reader.py +0 -44
- agno/document/reader/s3/pdf_reader.py +0 -59
- agno/document/reader/s3/text_reader.py +0 -63
- agno/document/reader/url_reader.py +0 -59
- agno/document/reader/youtube_reader.py +0 -58
- agno/embedder/__init__.py +0 -5
- agno/embedder/langdb.py +0 -80
- agno/embedder/mistral.py +0 -82
- agno/embedder/openai.py +0 -78
- agno/file/__init__.py +0 -5
- agno/file/file.py +0 -16
- agno/file/local/csv.py +0 -32
- agno/file/local/txt.py +0 -19
- agno/infra/app.py +0 -240
- agno/infra/base.py +0 -144
- agno/infra/context.py +0 -20
- agno/infra/db_app.py +0 -52
- agno/infra/resource.py +0 -205
- agno/infra/resources.py +0 -55
- agno/knowledge/agent.py +0 -702
- agno/knowledge/arxiv.py +0 -33
- agno/knowledge/combined.py +0 -36
- agno/knowledge/csv.py +0 -144
- agno/knowledge/csv_url.py +0 -124
- agno/knowledge/document.py +0 -223
- agno/knowledge/docx.py +0 -137
- agno/knowledge/firecrawl.py +0 -34
- agno/knowledge/gcs/__init__.py +0 -0
- agno/knowledge/gcs/base.py +0 -39
- agno/knowledge/gcs/pdf.py +0 -125
- agno/knowledge/json.py +0 -137
- agno/knowledge/langchain.py +0 -71
- agno/knowledge/light_rag.py +0 -273
- agno/knowledge/llamaindex.py +0 -66
- agno/knowledge/markdown.py +0 -154
- agno/knowledge/pdf.py +0 -164
- agno/knowledge/pdf_bytes.py +0 -42
- agno/knowledge/pdf_url.py +0 -148
- agno/knowledge/s3/__init__.py +0 -0
- agno/knowledge/s3/base.py +0 -64
- agno/knowledge/s3/pdf.py +0 -33
- agno/knowledge/s3/text.py +0 -34
- agno/knowledge/text.py +0 -141
- agno/knowledge/url.py +0 -46
- agno/knowledge/website.py +0 -179
- agno/knowledge/wikipedia.py +0 -32
- agno/knowledge/youtube.py +0 -35
- agno/memory/agent.py +0 -423
- agno/memory/classifier.py +0 -104
- agno/memory/db/__init__.py +0 -5
- agno/memory/db/base.py +0 -42
- agno/memory/db/mongodb.py +0 -189
- agno/memory/db/postgres.py +0 -203
- agno/memory/db/sqlite.py +0 -193
- agno/memory/memory.py +0 -22
- agno/memory/row.py +0 -36
- agno/memory/summarizer.py +0 -201
- agno/memory/summary.py +0 -19
- agno/memory/team.py +0 -415
- agno/memory/v2/__init__.py +0 -2
- agno/memory/v2/db/__init__.py +0 -1
- agno/memory/v2/db/base.py +0 -42
- agno/memory/v2/db/firestore.py +0 -339
- agno/memory/v2/db/mongodb.py +0 -196
- agno/memory/v2/db/postgres.py +0 -214
- agno/memory/v2/db/redis.py +0 -187
- agno/memory/v2/db/schema.py +0 -54
- agno/memory/v2/db/sqlite.py +0 -209
- agno/memory/v2/manager.py +0 -437
- agno/memory/v2/memory.py +0 -1097
- agno/memory/v2/schema.py +0 -55
- agno/memory/v2/summarizer.py +0 -215
- agno/memory/workflow.py +0 -38
- agno/models/ollama/tools.py +0 -430
- agno/models/qwen/__init__.py +0 -5
- agno/playground/__init__.py +0 -10
- agno/playground/deploy.py +0 -3
- agno/playground/playground.py +0 -3
- agno/playground/serve.py +0 -3
- agno/playground/settings.py +0 -3
- agno/reranker/__init__.py +0 -0
- agno/run/response.py +0 -467
- agno/run/v2/__init__.py +0 -0
- agno/run/v2/workflow.py +0 -567
- agno/storage/__init__.py +0 -0
- agno/storage/agent/__init__.py +0 -0
- agno/storage/agent/dynamodb.py +0 -1
- agno/storage/agent/json.py +0 -1
- agno/storage/agent/mongodb.py +0 -1
- agno/storage/agent/postgres.py +0 -1
- agno/storage/agent/singlestore.py +0 -1
- agno/storage/agent/sqlite.py +0 -1
- agno/storage/agent/yaml.py +0 -1
- agno/storage/base.py +0 -60
- agno/storage/dynamodb.py +0 -673
- agno/storage/firestore.py +0 -297
- agno/storage/gcs_json.py +0 -261
- agno/storage/in_memory.py +0 -234
- agno/storage/json.py +0 -237
- agno/storage/mongodb.py +0 -328
- agno/storage/mysql.py +0 -685
- agno/storage/postgres.py +0 -682
- agno/storage/redis.py +0 -336
- agno/storage/session/__init__.py +0 -16
- agno/storage/session/agent.py +0 -64
- agno/storage/session/team.py +0 -63
- agno/storage/session/v2/__init__.py +0 -5
- agno/storage/session/workflow.py +0 -61
- agno/storage/singlestore.py +0 -606
- agno/storage/sqlite.py +0 -646
- agno/storage/workflow/__init__.py +0 -0
- agno/storage/workflow/mongodb.py +0 -1
- agno/storage/workflow/postgres.py +0 -1
- agno/storage/workflow/sqlite.py +0 -1
- agno/storage/yaml.py +0 -241
- agno/tools/thinking.py +0 -73
- agno/utils/defaults.py +0 -57
- agno/utils/filesystem.py +0 -39
- agno/utils/git.py +0 -52
- agno/utils/json_io.py +0 -30
- agno/utils/load_env.py +0 -19
- agno/utils/py_io.py +0 -19
- agno/utils/pyproject.py +0 -18
- agno/utils/resource_filter.py +0 -31
- agno/workflow/v2/__init__.py +0 -21
- agno/workflow/v2/types.py +0 -357
- agno/workflow/v2/workflow.py +0 -3313
- agno/workspace/__init__.py +0 -0
- agno/workspace/config.py +0 -325
- agno/workspace/enums.py +0 -6
- agno/workspace/helpers.py +0 -52
- agno/workspace/operator.py +0 -757
- agno/workspace/settings.py +0 -158
- agno-1.8.2.dist-info/METADATA +0 -982
- agno-1.8.2.dist-info/RECORD +0 -566
- agno-1.8.2.dist-info/entry_points.txt +0 -3
- agno-1.8.2.dist-info/licenses/LICENSE +0 -375
- /agno/{app → db/migrations}/__init__.py +0 -0
- /agno/{app/playground/__init__.py → db/schemas/metrics.py} +0 -0
- /agno/{cli → integrations}/__init__.py +0 -0
- /agno/{cli/ws → knowledge/chunking}/__init__.py +0 -0
- /agno/{document/chunking → knowledge/remote_content}/__init__.py +0 -0
- /agno/{document/reader/gcs → knowledge/reranker}/__init__.py +0 -0
- /agno/{document/reader/s3 → os/interfaces}/__init__.py +0 -0
- /agno/{app → os/interfaces}/slack/security.py +0 -0
- /agno/{app → os/interfaces}/whatsapp/security.py +0 -0
- /agno/{file/local → utils/print_response}/__init__.py +0 -0
- /agno/{infra → vectordb/llamaindex}/__init__.py +0 -0
- {agno-1.8.2.dist-info → agno-2.0.0.dist-info}/WHEEL +0 -0
- {agno-1.8.2.dist-info → agno-2.0.0.dist-info}/top_level.txt +0 -0
|
@@ -1,8 +1,11 @@
|
|
|
1
1
|
import asyncio
|
|
2
|
-
from typing import List
|
|
2
|
+
from typing import List, Optional
|
|
3
3
|
|
|
4
|
-
from agno.
|
|
5
|
-
from agno.
|
|
4
|
+
from agno.knowledge.chunking.fixed import FixedSizeChunking
|
|
5
|
+
from agno.knowledge.chunking.strategy import ChunkingStrategy, ChunkingStrategyType
|
|
6
|
+
from agno.knowledge.document.base import Document
|
|
7
|
+
from agno.knowledge.reader.base import Reader
|
|
8
|
+
from agno.knowledge.types import ContentType
|
|
6
9
|
|
|
7
10
|
try:
|
|
8
11
|
import arxiv # noqa: F401
|
|
@@ -11,9 +14,34 @@ except ImportError:
|
|
|
11
14
|
|
|
12
15
|
|
|
13
16
|
class ArxivReader(Reader):
|
|
14
|
-
max_results: int = 5 # Top articles
|
|
15
17
|
sort_by: arxiv.SortCriterion = arxiv.SortCriterion.Relevance
|
|
16
18
|
|
|
19
|
+
@classmethod
|
|
20
|
+
def get_supported_chunking_strategies(self) -> List[ChunkingStrategyType]:
|
|
21
|
+
"""Get the list of supported chunking strategies for Arxiv readers."""
|
|
22
|
+
return [
|
|
23
|
+
ChunkingStrategyType.FIXED_SIZE_CHUNKER,
|
|
24
|
+
ChunkingStrategyType.AGENTIC_CHUNKER,
|
|
25
|
+
ChunkingStrategyType.DOCUMENT_CHUNKER,
|
|
26
|
+
ChunkingStrategyType.RECURSIVE_CHUNKER,
|
|
27
|
+
ChunkingStrategyType.SEMANTIC_CHUNKER,
|
|
28
|
+
]
|
|
29
|
+
|
|
30
|
+
@classmethod
|
|
31
|
+
def get_supported_content_types(self) -> List[ContentType]:
|
|
32
|
+
return [ContentType.TOPIC]
|
|
33
|
+
|
|
34
|
+
def __init__(
|
|
35
|
+
self,
|
|
36
|
+
chunking_strategy: Optional[ChunkingStrategy] = FixedSizeChunking(),
|
|
37
|
+
sort_by: arxiv.SortCriterion = arxiv.SortCriterion.Relevance,
|
|
38
|
+
**kwargs,
|
|
39
|
+
) -> None:
|
|
40
|
+
super().__init__(chunking_strategy=chunking_strategy, **kwargs)
|
|
41
|
+
|
|
42
|
+
# ArxivReader-specific attributes
|
|
43
|
+
self.sort_by = sort_by
|
|
44
|
+
|
|
17
45
|
def read(self, query: str) -> List[Document]:
|
|
18
46
|
"""
|
|
19
47
|
Search a query from arXiv database
|
|
@@ -0,0 +1,88 @@
|
|
|
1
|
+
import asyncio
|
|
2
|
+
from dataclasses import dataclass, field
|
|
3
|
+
from typing import Any, List, Optional
|
|
4
|
+
|
|
5
|
+
from agno.knowledge.chunking.fixed import FixedSizeChunking
|
|
6
|
+
from agno.knowledge.chunking.strategy import ChunkingStrategy, ChunkingStrategyFactory, ChunkingStrategyType
|
|
7
|
+
from agno.knowledge.document.base import Document
|
|
8
|
+
from agno.knowledge.types import ContentType
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
@dataclass
|
|
12
|
+
class Reader:
|
|
13
|
+
"""Base class for reading documents"""
|
|
14
|
+
|
|
15
|
+
chunk: bool = True
|
|
16
|
+
chunk_size: int = 5000
|
|
17
|
+
separators: List[str] = field(default_factory=lambda: ["\n", "\n\n", "\r", "\r\n", "\n\r", "\t", " ", " "])
|
|
18
|
+
chunking_strategy: Optional[ChunkingStrategy] = None
|
|
19
|
+
name: Optional[str] = None
|
|
20
|
+
description: Optional[str] = None
|
|
21
|
+
max_results: int = 5 # Maximum number of results to return (useful for search-based readers)
|
|
22
|
+
|
|
23
|
+
def __init__(
|
|
24
|
+
self,
|
|
25
|
+
chunk: bool = True,
|
|
26
|
+
chunk_size: int = 5000,
|
|
27
|
+
separators: Optional[List[str]] = None,
|
|
28
|
+
chunking_strategy: Optional[ChunkingStrategy] = None,
|
|
29
|
+
name: Optional[str] = None,
|
|
30
|
+
description: Optional[str] = None,
|
|
31
|
+
max_results: int = 5,
|
|
32
|
+
**kwargs,
|
|
33
|
+
) -> None:
|
|
34
|
+
self.chunk = chunk
|
|
35
|
+
self.chunk_size = chunk_size
|
|
36
|
+
self.separators = (
|
|
37
|
+
separators if separators is not None else ["\n", "\n\n", "\r", "\r\n", "\n\r", "\t", " ", " "]
|
|
38
|
+
)
|
|
39
|
+
self.chunking_strategy = chunking_strategy
|
|
40
|
+
self.name = name
|
|
41
|
+
self.description = description
|
|
42
|
+
self.max_results = max_results
|
|
43
|
+
|
|
44
|
+
def set_chunking_strategy_from_string(self, strategy_name: str, **kwargs) -> None:
|
|
45
|
+
"""Set the chunking strategy from a string name."""
|
|
46
|
+
try:
|
|
47
|
+
strategy_type = ChunkingStrategyType.from_string(strategy_name)
|
|
48
|
+
self.chunking_strategy = ChunkingStrategyFactory.create_strategy(strategy_type, **kwargs)
|
|
49
|
+
except ValueError as e:
|
|
50
|
+
raise ValueError(f"Failed to set chunking strategy: {e}")
|
|
51
|
+
|
|
52
|
+
def read(self, obj: Any, name: Optional[str] = None, password: Optional[str] = None) -> List[Document]:
|
|
53
|
+
raise NotImplementedError
|
|
54
|
+
|
|
55
|
+
async def async_read(self, obj: Any, name: Optional[str] = None, password: Optional[str] = None) -> List[Document]:
|
|
56
|
+
raise NotImplementedError
|
|
57
|
+
|
|
58
|
+
@classmethod
|
|
59
|
+
def get_supported_chunking_strategies(cls) -> List[ChunkingStrategyType]:
|
|
60
|
+
raise NotImplementedError
|
|
61
|
+
|
|
62
|
+
@classmethod
|
|
63
|
+
def get_supported_content_types(cls) -> List[ContentType]:
|
|
64
|
+
raise NotImplementedError
|
|
65
|
+
|
|
66
|
+
def chunk_document(self, document: Document) -> List[Document]:
|
|
67
|
+
if self.chunking_strategy is None:
|
|
68
|
+
self.chunking_strategy = FixedSizeChunking(chunk_size=self.chunk_size)
|
|
69
|
+
return self.chunking_strategy.chunk(document) # type: ignore
|
|
70
|
+
|
|
71
|
+
async def chunk_documents_async(self, documents: List[Document]) -> List[Document]:
|
|
72
|
+
"""
|
|
73
|
+
Asynchronously chunk a list of documents using the instance's chunk_document method.
|
|
74
|
+
|
|
75
|
+
Args:
|
|
76
|
+
documents: List of documents to be chunked.
|
|
77
|
+
|
|
78
|
+
Returns:
|
|
79
|
+
A flattened list of chunked documents.
|
|
80
|
+
"""
|
|
81
|
+
|
|
82
|
+
async def _chunk_document_async(doc: Document) -> List[Document]:
|
|
83
|
+
return await asyncio.to_thread(self.chunk_document, doc)
|
|
84
|
+
|
|
85
|
+
# Process chunking in parallel for all documents
|
|
86
|
+
chunked_lists = await asyncio.gather(*[_chunk_document_async(doc) for doc in documents])
|
|
87
|
+
# Flatten the result
|
|
88
|
+
return [chunk for sublist in chunked_lists for chunk in sublist]
|
|
@@ -1,28 +1,47 @@
|
|
|
1
1
|
import asyncio
|
|
2
2
|
import csv
|
|
3
3
|
import io
|
|
4
|
-
import os
|
|
5
4
|
from pathlib import Path
|
|
6
5
|
from typing import IO, Any, List, Optional, Union
|
|
7
|
-
from urllib.parse import urlparse
|
|
8
6
|
from uuid import uuid4
|
|
9
7
|
|
|
10
|
-
from agno.utils.http import async_fetch_with_retry, fetch_with_retry
|
|
11
|
-
|
|
12
8
|
try:
|
|
13
9
|
import aiofiles
|
|
14
10
|
except ImportError:
|
|
15
11
|
raise ImportError("`aiofiles` not installed. Please install it with `pip install aiofiles`")
|
|
16
12
|
|
|
17
|
-
from agno.
|
|
18
|
-
from agno.
|
|
13
|
+
from agno.knowledge.chunking.row import RowChunking
|
|
14
|
+
from agno.knowledge.chunking.strategy import ChunkingStrategy, ChunkingStrategyType
|
|
15
|
+
from agno.knowledge.document.base import Document
|
|
16
|
+
from agno.knowledge.reader.base import Reader
|
|
17
|
+
from agno.knowledge.types import ContentType
|
|
19
18
|
from agno.utils.log import logger
|
|
20
19
|
|
|
21
20
|
|
|
22
21
|
class CSVReader(Reader):
|
|
23
22
|
"""Reader for CSV files"""
|
|
24
23
|
|
|
25
|
-
def
|
|
24
|
+
def __init__(self, chunking_strategy: Optional[ChunkingStrategy] = RowChunking(), **kwargs):
|
|
25
|
+
super().__init__(chunking_strategy=chunking_strategy, **kwargs)
|
|
26
|
+
|
|
27
|
+
@classmethod
|
|
28
|
+
def get_supported_chunking_strategies(self) -> List[ChunkingStrategyType]:
|
|
29
|
+
"""Get the list of supported chunking strategies for CSV readers."""
|
|
30
|
+
return [
|
|
31
|
+
ChunkingStrategyType.ROW_CHUNKER,
|
|
32
|
+
ChunkingStrategyType.FIXED_SIZE_CHUNKER,
|
|
33
|
+
ChunkingStrategyType.AGENTIC_CHUNKER,
|
|
34
|
+
ChunkingStrategyType.DOCUMENT_CHUNKER,
|
|
35
|
+
ChunkingStrategyType.RECURSIVE_CHUNKER,
|
|
36
|
+
]
|
|
37
|
+
|
|
38
|
+
@classmethod
|
|
39
|
+
def get_supported_content_types(self) -> List[ContentType]:
|
|
40
|
+
return [ContentType.CSV, ContentType.XLSX, ContentType.XLS]
|
|
41
|
+
|
|
42
|
+
def read(
|
|
43
|
+
self, file: Union[Path, IO[Any]], delimiter: str = ",", quotechar: str = '"', name: Optional[str] = None
|
|
44
|
+
) -> List[Document]:
|
|
26
45
|
try:
|
|
27
46
|
if isinstance(file, Path):
|
|
28
47
|
if not file.exists():
|
|
@@ -30,11 +49,15 @@ class CSVReader(Reader):
|
|
|
30
49
|
logger.info(f"Reading: {file}")
|
|
31
50
|
file_content = file.open(newline="", mode="r", encoding="utf-8")
|
|
32
51
|
else:
|
|
33
|
-
logger.info(f"Reading retrieved file: {file.name}")
|
|
52
|
+
logger.info(f"Reading retrieved file: {name or file.name}")
|
|
34
53
|
file.seek(0)
|
|
35
54
|
file_content = io.StringIO(file.read().decode("utf-8")) # type: ignore
|
|
36
55
|
|
|
37
|
-
csv_name =
|
|
56
|
+
csv_name = name or (
|
|
57
|
+
Path(file.name).stem
|
|
58
|
+
if isinstance(file, Path)
|
|
59
|
+
else (getattr(file, "name", "csv_file").split(".")[0] if hasattr(file, "name") else "csv_file")
|
|
60
|
+
)
|
|
38
61
|
csv_content = ""
|
|
39
62
|
with file_content as csvfile:
|
|
40
63
|
csv_reader = csv.reader(csvfile, delimiter=delimiter, quotechar=quotechar)
|
|
@@ -55,11 +78,16 @@ class CSVReader(Reader):
|
|
|
55
78
|
return chunked_documents
|
|
56
79
|
return documents
|
|
57
80
|
except Exception as e:
|
|
58
|
-
logger.error(f"Error reading: {file
|
|
81
|
+
logger.error(f"Error reading: {getattr(file, 'name', str(file)) if isinstance(file, IO) else file}: {e}")
|
|
59
82
|
return []
|
|
60
83
|
|
|
61
84
|
async def async_read(
|
|
62
|
-
self,
|
|
85
|
+
self,
|
|
86
|
+
file: Union[Path, IO[Any]],
|
|
87
|
+
delimiter: str = ",",
|
|
88
|
+
quotechar: str = '"',
|
|
89
|
+
page_size: int = 1000,
|
|
90
|
+
name: Optional[str] = None,
|
|
63
91
|
) -> List[Document]:
|
|
64
92
|
"""
|
|
65
93
|
Read a CSV file asynchronously, processing batches of rows concurrently.
|
|
@@ -86,7 +114,11 @@ class CSVReader(Reader):
|
|
|
86
114
|
file.seek(0)
|
|
87
115
|
file_content_io = io.StringIO(file.read().decode("utf-8")) # type: ignore
|
|
88
116
|
|
|
89
|
-
csv_name =
|
|
117
|
+
csv_name = name or (
|
|
118
|
+
Path(file.name).stem
|
|
119
|
+
if isinstance(file, Path)
|
|
120
|
+
else (getattr(file, "name", "csv_file").split(".")[0] if hasattr(file, "name") else "csv_file")
|
|
121
|
+
)
|
|
90
122
|
|
|
91
123
|
file_content_io.seek(0)
|
|
92
124
|
csv_reader = csv.reader(file_content_io, delimiter=delimiter, quotechar=quotechar)
|
|
@@ -128,57 +160,7 @@ class CSVReader(Reader):
|
|
|
128
160
|
|
|
129
161
|
return documents
|
|
130
162
|
except Exception as e:
|
|
131
|
-
logger.error(
|
|
163
|
+
logger.error(
|
|
164
|
+
f"Error reading async: {getattr(file, 'name', str(file)) if isinstance(file, IO) else file}: {e}"
|
|
165
|
+
)
|
|
132
166
|
return []
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
class CSVUrlReader(Reader):
|
|
136
|
-
"""Reader for CSV files"""
|
|
137
|
-
|
|
138
|
-
def __init__(self, proxy: Optional[str] = None, **kwargs):
|
|
139
|
-
super().__init__(**kwargs)
|
|
140
|
-
self.proxy = proxy
|
|
141
|
-
|
|
142
|
-
def read(self, url: str) -> List[Document]:
|
|
143
|
-
if not url:
|
|
144
|
-
raise ValueError("No URL provided")
|
|
145
|
-
|
|
146
|
-
logger.info(f"Reading: {url}")
|
|
147
|
-
# Retry the request up to 3 times with exponential backoff
|
|
148
|
-
response = fetch_with_retry(url, proxy=self.proxy)
|
|
149
|
-
|
|
150
|
-
parsed_url = urlparse(url)
|
|
151
|
-
filename = os.path.basename(parsed_url.path) or "data.csv"
|
|
152
|
-
|
|
153
|
-
file_obj = io.BytesIO(response.content)
|
|
154
|
-
file_obj.name = filename
|
|
155
|
-
documents = CSVReader().read(file=file_obj)
|
|
156
|
-
|
|
157
|
-
file_obj.close()
|
|
158
|
-
|
|
159
|
-
return documents
|
|
160
|
-
|
|
161
|
-
async def async_read(self, url: str) -> List[Document]:
|
|
162
|
-
if not url:
|
|
163
|
-
raise ValueError("No URL provided")
|
|
164
|
-
|
|
165
|
-
import httpx
|
|
166
|
-
|
|
167
|
-
logger.info(f"Reading async: {url}")
|
|
168
|
-
|
|
169
|
-
client_args = {"proxy": self.proxy} if self.proxy else {}
|
|
170
|
-
async with httpx.AsyncClient(**client_args) as client: # type: ignore
|
|
171
|
-
response = await async_fetch_with_retry(url, client=client)
|
|
172
|
-
|
|
173
|
-
parsed_url = urlparse(url)
|
|
174
|
-
filename = os.path.basename(parsed_url.path) or "data.csv"
|
|
175
|
-
|
|
176
|
-
file_obj = io.BytesIO(response.content)
|
|
177
|
-
file_obj.name = filename
|
|
178
|
-
|
|
179
|
-
# Use the async version of CSVReader
|
|
180
|
-
documents = await CSVReader().async_read(file=file_obj)
|
|
181
|
-
|
|
182
|
-
file_obj.close()
|
|
183
|
-
|
|
184
|
-
return documents
|
|
@@ -0,0 +1,83 @@
|
|
|
1
|
+
import asyncio
|
|
2
|
+
from pathlib import Path
|
|
3
|
+
from typing import IO, Any, List, Optional, Union
|
|
4
|
+
from uuid import uuid4
|
|
5
|
+
|
|
6
|
+
from agno.knowledge.chunking.document import DocumentChunking
|
|
7
|
+
from agno.knowledge.chunking.strategy import ChunkingStrategy, ChunkingStrategyType
|
|
8
|
+
from agno.knowledge.document.base import Document
|
|
9
|
+
from agno.knowledge.reader.base import Reader
|
|
10
|
+
from agno.knowledge.types import ContentType
|
|
11
|
+
from agno.utils.log import log_info, logger
|
|
12
|
+
|
|
13
|
+
try:
|
|
14
|
+
from docx import Document as DocxDocument # type: ignore
|
|
15
|
+
except ImportError:
|
|
16
|
+
raise ImportError("The `python-docx` package is not installed. Please install it via `pip install python-docx`.")
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
class DocxReader(Reader):
|
|
20
|
+
"""Reader for Doc/Docx files"""
|
|
21
|
+
|
|
22
|
+
def __init__(self, chunking_strategy: Optional[ChunkingStrategy] = DocumentChunking(), **kwargs):
|
|
23
|
+
super().__init__(chunking_strategy=chunking_strategy, **kwargs)
|
|
24
|
+
|
|
25
|
+
@classmethod
|
|
26
|
+
def get_supported_chunking_strategies(self) -> List[ChunkingStrategyType]:
|
|
27
|
+
"""Get the list of supported chunking strategies for DOCX readers."""
|
|
28
|
+
return [
|
|
29
|
+
ChunkingStrategyType.DOCUMENT_CHUNKER,
|
|
30
|
+
ChunkingStrategyType.FIXED_SIZE_CHUNKER,
|
|
31
|
+
ChunkingStrategyType.SEMANTIC_CHUNKER,
|
|
32
|
+
ChunkingStrategyType.AGENTIC_CHUNKER,
|
|
33
|
+
ChunkingStrategyType.RECURSIVE_CHUNKER,
|
|
34
|
+
]
|
|
35
|
+
|
|
36
|
+
@classmethod
|
|
37
|
+
def get_supported_content_types(self) -> List[ContentType]:
|
|
38
|
+
return [ContentType.DOCX, ContentType.DOC]
|
|
39
|
+
|
|
40
|
+
def read(self, file: Union[Path, IO[Any]], name: Optional[str] = None) -> List[Document]:
|
|
41
|
+
"""Read a docx file and return a list of documents"""
|
|
42
|
+
try:
|
|
43
|
+
if isinstance(file, Path):
|
|
44
|
+
if not file.exists():
|
|
45
|
+
raise FileNotFoundError(f"Could not find file: {file}")
|
|
46
|
+
log_info(f"Reading: {file}")
|
|
47
|
+
docx_document = DocxDocument(str(file))
|
|
48
|
+
doc_name = name or file.stem
|
|
49
|
+
else:
|
|
50
|
+
log_info(f"Reading uploaded file: {getattr(file, 'name', 'docx_file')}")
|
|
51
|
+
docx_document = DocxDocument(file)
|
|
52
|
+
doc_name = name or (
|
|
53
|
+
getattr(file, "name", "docx_file").split(".")[0] if hasattr(file, "name") else "docx_file"
|
|
54
|
+
)
|
|
55
|
+
|
|
56
|
+
doc_content = "\n\n".join([para.text for para in docx_document.paragraphs])
|
|
57
|
+
|
|
58
|
+
documents = [
|
|
59
|
+
Document(
|
|
60
|
+
name=doc_name,
|
|
61
|
+
id=str(uuid4()),
|
|
62
|
+
content=doc_content,
|
|
63
|
+
)
|
|
64
|
+
]
|
|
65
|
+
|
|
66
|
+
if self.chunk:
|
|
67
|
+
chunked_documents = []
|
|
68
|
+
for document in documents:
|
|
69
|
+
chunked_documents.extend(self.chunk_document(document))
|
|
70
|
+
return chunked_documents
|
|
71
|
+
return documents
|
|
72
|
+
|
|
73
|
+
except Exception as e:
|
|
74
|
+
logger.error(f"Error reading file: {e}")
|
|
75
|
+
return []
|
|
76
|
+
|
|
77
|
+
async def async_read(self, file: Union[Path, IO[Any]], name: Optional[str] = None) -> List[Document]:
|
|
78
|
+
"""Asynchronously read a docx file and return a list of documents"""
|
|
79
|
+
try:
|
|
80
|
+
return await asyncio.to_thread(self.read, file, name)
|
|
81
|
+
except Exception as e:
|
|
82
|
+
logger.error(f"Error reading file asynchronously: {e}")
|
|
83
|
+
return []
|
|
@@ -2,9 +2,11 @@ import asyncio
|
|
|
2
2
|
from dataclasses import dataclass
|
|
3
3
|
from typing import Dict, List, Literal, Optional
|
|
4
4
|
|
|
5
|
-
from agno.
|
|
6
|
-
from agno.
|
|
7
|
-
from agno.document.
|
|
5
|
+
from agno.knowledge.chunking.semantic import SemanticChunking
|
|
6
|
+
from agno.knowledge.chunking.strategy import ChunkingStrategy, ChunkingStrategyType
|
|
7
|
+
from agno.knowledge.document.base import Document
|
|
8
|
+
from agno.knowledge.reader.base import Reader
|
|
9
|
+
from agno.knowledge.types import ContentType
|
|
8
10
|
from agno.utils.log import log_debug, logger
|
|
9
11
|
|
|
10
12
|
try:
|
|
@@ -26,17 +28,36 @@ class FirecrawlReader(Reader):
|
|
|
26
28
|
mode: Literal["scrape", "crawl"] = "scrape",
|
|
27
29
|
chunk: bool = True,
|
|
28
30
|
chunk_size: int = 5000,
|
|
29
|
-
chunking_strategy: Optional[ChunkingStrategy] =
|
|
31
|
+
chunking_strategy: Optional[ChunkingStrategy] = SemanticChunking(),
|
|
32
|
+
name: Optional[str] = None,
|
|
33
|
+
description: Optional[str] = None,
|
|
30
34
|
) -> None:
|
|
31
35
|
# Initialise base Reader (handles chunk_size / strategy)
|
|
32
|
-
super().__init__(
|
|
36
|
+
super().__init__(
|
|
37
|
+
chunk=chunk, chunk_size=chunk_size, chunking_strategy=chunking_strategy, name=name, description=description
|
|
38
|
+
)
|
|
33
39
|
|
|
34
40
|
# Firecrawl-specific attributes
|
|
35
41
|
self.api_key = api_key
|
|
36
42
|
self.params = params
|
|
37
43
|
self.mode = mode
|
|
38
44
|
|
|
39
|
-
|
|
45
|
+
@classmethod
|
|
46
|
+
def get_supported_chunking_strategies(self) -> List[ChunkingStrategyType]:
|
|
47
|
+
"""Get the list of supported chunking strategies for Firecrawl readers."""
|
|
48
|
+
return [
|
|
49
|
+
ChunkingStrategyType.SEMANTIC_CHUNKER,
|
|
50
|
+
ChunkingStrategyType.FIXED_SIZE_CHUNKER,
|
|
51
|
+
ChunkingStrategyType.AGENTIC_CHUNKER,
|
|
52
|
+
ChunkingStrategyType.DOCUMENT_CHUNKER,
|
|
53
|
+
ChunkingStrategyType.RECURSIVE_CHUNKER,
|
|
54
|
+
]
|
|
55
|
+
|
|
56
|
+
@classmethod
|
|
57
|
+
def get_supported_content_types(self) -> List[ContentType]:
|
|
58
|
+
return [ContentType.URL]
|
|
59
|
+
|
|
60
|
+
def scrape(self, url: str, name: Optional[str] = None) -> List[Document]:
|
|
40
61
|
"""
|
|
41
62
|
Scrapes a website and returns a list of documents.
|
|
42
63
|
|
|
@@ -71,12 +92,12 @@ class FirecrawlReader(Reader):
|
|
|
71
92
|
|
|
72
93
|
documents = []
|
|
73
94
|
if self.chunk and content: # Only chunk if there's content
|
|
74
|
-
documents.extend(self.chunk_document(Document(name=url, id=url, content=content)))
|
|
95
|
+
documents.extend(self.chunk_document(Document(name=name or url, id=url, content=content)))
|
|
75
96
|
else:
|
|
76
|
-
documents.append(Document(name=url, id=url, content=content))
|
|
97
|
+
documents.append(Document(name=name or url, id=url, content=content))
|
|
77
98
|
return documents
|
|
78
99
|
|
|
79
|
-
async def async_scrape(self, url: str) -> List[Document]:
|
|
100
|
+
async def async_scrape(self, url: str, name: Optional[str] = None) -> List[Document]:
|
|
80
101
|
"""
|
|
81
102
|
Asynchronously scrapes a website and returns a list of documents.
|
|
82
103
|
|
|
@@ -89,9 +110,9 @@ class FirecrawlReader(Reader):
|
|
|
89
110
|
log_debug(f"Async scraping: {url}")
|
|
90
111
|
|
|
91
112
|
# Use asyncio.to_thread to run the synchronous scrape in a thread
|
|
92
|
-
return await asyncio.to_thread(self.scrape, url)
|
|
113
|
+
return await asyncio.to_thread(self.scrape, url, name)
|
|
93
114
|
|
|
94
|
-
def crawl(self, url: str) -> List[Document]:
|
|
115
|
+
def crawl(self, url: str, name: Optional[str] = None) -> List[Document]:
|
|
95
116
|
"""
|
|
96
117
|
Crawls a website and returns a list of documents.
|
|
97
118
|
|
|
@@ -124,13 +145,13 @@ class FirecrawlReader(Reader):
|
|
|
124
145
|
|
|
125
146
|
if content: # Only create document if content exists
|
|
126
147
|
if self.chunk:
|
|
127
|
-
documents.extend(self.chunk_document(Document(name=url, id=url, content=content)))
|
|
148
|
+
documents.extend(self.chunk_document(Document(name=name or url, id=url, content=content)))
|
|
128
149
|
else:
|
|
129
|
-
documents.append(Document(name=url, id=url, content=content))
|
|
150
|
+
documents.append(Document(name=name or url, id=url, content=content))
|
|
130
151
|
|
|
131
152
|
return documents
|
|
132
153
|
|
|
133
|
-
async def async_crawl(self, url: str) -> List[Document]:
|
|
154
|
+
async def async_crawl(self, url: str, name: Optional[str] = None) -> List[Document]:
|
|
134
155
|
"""
|
|
135
156
|
Asynchronously crawls a website and returns a list of documents.
|
|
136
157
|
|
|
@@ -143,9 +164,9 @@ class FirecrawlReader(Reader):
|
|
|
143
164
|
log_debug(f"Async crawling: {url}")
|
|
144
165
|
|
|
145
166
|
# Use asyncio.to_thread to run the synchronous crawl in a thread
|
|
146
|
-
return await asyncio.to_thread(self.crawl, url)
|
|
167
|
+
return await asyncio.to_thread(self.crawl, url, name)
|
|
147
168
|
|
|
148
|
-
def read(self, url: str) -> List[Document]:
|
|
169
|
+
def read(self, url: str, name: Optional[str] = None) -> List[Document]:
|
|
149
170
|
"""
|
|
150
171
|
Reads from a URL based on the mode setting.
|
|
151
172
|
|
|
@@ -156,13 +177,13 @@ class FirecrawlReader(Reader):
|
|
|
156
177
|
A list of documents
|
|
157
178
|
"""
|
|
158
179
|
if self.mode == "scrape":
|
|
159
|
-
return self.scrape(url)
|
|
180
|
+
return self.scrape(url, name)
|
|
160
181
|
elif self.mode == "crawl":
|
|
161
|
-
return self.crawl(url)
|
|
182
|
+
return self.crawl(url, name)
|
|
162
183
|
else:
|
|
163
184
|
raise NotImplementedError(f"Mode {self.mode} not implemented")
|
|
164
185
|
|
|
165
|
-
async def async_read(self, url: str) -> List[Document]:
|
|
186
|
+
async def async_read(self, url: str, name: Optional[str] = None) -> List[Document]:
|
|
166
187
|
"""
|
|
167
188
|
Asynchronously reads from a URL based on the mode setting.
|
|
168
189
|
|
|
@@ -173,8 +194,8 @@ class FirecrawlReader(Reader):
|
|
|
173
194
|
A list of documents
|
|
174
195
|
"""
|
|
175
196
|
if self.mode == "scrape":
|
|
176
|
-
return await self.async_scrape(url)
|
|
197
|
+
return await self.async_scrape(url, name)
|
|
177
198
|
elif self.mode == "crawl":
|
|
178
|
-
return await self.async_crawl(url)
|
|
199
|
+
return await self.async_crawl(url, name)
|
|
179
200
|
else:
|
|
180
201
|
raise NotImplementedError(f"Mode {self.mode} not implemented")
|
|
@@ -2,11 +2,14 @@ import asyncio
|
|
|
2
2
|
import json
|
|
3
3
|
from io import BytesIO
|
|
4
4
|
from pathlib import Path
|
|
5
|
-
from typing import IO, Any, List, Union
|
|
5
|
+
from typing import IO, Any, List, Optional, Union
|
|
6
6
|
from uuid import uuid4
|
|
7
7
|
|
|
8
|
-
from agno.
|
|
9
|
-
from agno.
|
|
8
|
+
from agno.knowledge.chunking.fixed import FixedSizeChunking
|
|
9
|
+
from agno.knowledge.chunking.strategy import ChunkingStrategy, ChunkingStrategyType
|
|
10
|
+
from agno.knowledge.document.base import Document
|
|
11
|
+
from agno.knowledge.reader.base import Reader
|
|
12
|
+
from agno.knowledge.types import ContentType
|
|
10
13
|
from agno.utils.log import log_info
|
|
11
14
|
|
|
12
15
|
|
|
@@ -15,18 +18,36 @@ class JSONReader(Reader):
|
|
|
15
18
|
|
|
16
19
|
chunk: bool = False
|
|
17
20
|
|
|
18
|
-
def
|
|
21
|
+
def __init__(self, chunking_strategy: Optional[ChunkingStrategy] = FixedSizeChunking(), **kwargs):
|
|
22
|
+
super().__init__(chunking_strategy=chunking_strategy, **kwargs)
|
|
23
|
+
|
|
24
|
+
@classmethod
|
|
25
|
+
def get_supported_chunking_strategies(self) -> List[ChunkingStrategyType]:
|
|
26
|
+
"""Get the list of supported chunking strategies for JSON readers."""
|
|
27
|
+
return [
|
|
28
|
+
ChunkingStrategyType.FIXED_SIZE_CHUNKER,
|
|
29
|
+
ChunkingStrategyType.AGENTIC_CHUNKER,
|
|
30
|
+
ChunkingStrategyType.DOCUMENT_CHUNKER,
|
|
31
|
+
ChunkingStrategyType.RECURSIVE_CHUNKER,
|
|
32
|
+
ChunkingStrategyType.SEMANTIC_CHUNKER,
|
|
33
|
+
]
|
|
34
|
+
|
|
35
|
+
@classmethod
|
|
36
|
+
def get_supported_content_types(self) -> List[ContentType]:
|
|
37
|
+
return [ContentType.JSON]
|
|
38
|
+
|
|
39
|
+
def read(self, path: Union[Path, IO[Any]], name: Optional[str] = None) -> List[Document]:
|
|
19
40
|
try:
|
|
20
41
|
if isinstance(path, Path):
|
|
21
42
|
if not path.exists():
|
|
22
43
|
raise FileNotFoundError(f"Could not find file: {path}")
|
|
23
44
|
log_info(f"Reading: {path}")
|
|
24
|
-
json_name = path.name.split(".")[0]
|
|
45
|
+
json_name = name or path.name.split(".")[0]
|
|
25
46
|
json_contents = json.loads(path.read_text("utf-8"))
|
|
26
47
|
|
|
27
48
|
elif isinstance(path, BytesIO):
|
|
28
|
-
|
|
29
|
-
|
|
49
|
+
json_name = name or path.name.split(".")[0]
|
|
50
|
+
log_info(f"Reading uploaded file: {json_name}")
|
|
30
51
|
path.seek(0)
|
|
31
52
|
json_contents = json.load(path)
|
|
32
53
|
|
|
@@ -54,7 +75,7 @@ class JSONReader(Reader):
|
|
|
54
75
|
except Exception:
|
|
55
76
|
raise
|
|
56
77
|
|
|
57
|
-
async def async_read(self, path: Union[Path, IO[Any]]) -> List[Document]:
|
|
78
|
+
async def async_read(self, path: Union[Path, IO[Any]], name: Optional[str] = None) -> List[Document]:
|
|
58
79
|
"""Asynchronously read JSON files.
|
|
59
80
|
|
|
60
81
|
Args:
|
|
@@ -63,4 +84,4 @@ class JSONReader(Reader):
|
|
|
63
84
|
Returns:
|
|
64
85
|
List[Document]: List of documents from the JSON file
|
|
65
86
|
"""
|
|
66
|
-
return await asyncio.to_thread(self.read, path)
|
|
87
|
+
return await asyncio.to_thread(self.read, path, name)
|