agno 1.8.1__py3-none-any.whl → 2.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- agno/__init__.py +8 -0
- agno/agent/__init__.py +19 -27
- agno/agent/agent.py +3143 -4170
- agno/api/agent.py +11 -67
- agno/api/api.py +5 -46
- agno/api/evals.py +8 -19
- agno/api/os.py +17 -0
- agno/api/routes.py +6 -41
- agno/api/schemas/__init__.py +9 -0
- agno/api/schemas/agent.py +5 -21
- agno/api/schemas/evals.py +7 -16
- agno/api/schemas/os.py +14 -0
- agno/api/schemas/team.py +5 -21
- agno/api/schemas/utils.py +21 -0
- agno/api/schemas/workflows.py +11 -7
- agno/api/settings.py +53 -0
- agno/api/team.py +11 -66
- agno/api/workflow.py +28 -0
- agno/cloud/aws/base.py +214 -0
- agno/cloud/aws/s3/__init__.py +2 -0
- agno/cloud/aws/s3/api_client.py +43 -0
- agno/cloud/aws/s3/bucket.py +195 -0
- agno/cloud/aws/s3/object.py +57 -0
- agno/db/__init__.py +24 -0
- agno/db/base.py +245 -0
- agno/db/dynamo/__init__.py +3 -0
- agno/db/dynamo/dynamo.py +1743 -0
- agno/db/dynamo/schemas.py +278 -0
- agno/db/dynamo/utils.py +684 -0
- agno/db/firestore/__init__.py +3 -0
- agno/db/firestore/firestore.py +1432 -0
- agno/db/firestore/schemas.py +130 -0
- agno/db/firestore/utils.py +278 -0
- agno/db/gcs_json/__init__.py +3 -0
- agno/db/gcs_json/gcs_json_db.py +1001 -0
- agno/db/gcs_json/utils.py +194 -0
- agno/db/in_memory/__init__.py +3 -0
- agno/db/in_memory/in_memory_db.py +882 -0
- agno/db/in_memory/utils.py +172 -0
- agno/db/json/__init__.py +3 -0
- agno/db/json/json_db.py +1045 -0
- agno/db/json/utils.py +196 -0
- agno/db/migrations/v1_to_v2.py +162 -0
- agno/db/mongo/__init__.py +3 -0
- agno/db/mongo/mongo.py +1416 -0
- agno/db/mongo/schemas.py +77 -0
- agno/db/mongo/utils.py +204 -0
- agno/db/mysql/__init__.py +3 -0
- agno/db/mysql/mysql.py +1719 -0
- agno/db/mysql/schemas.py +124 -0
- agno/db/mysql/utils.py +297 -0
- agno/db/postgres/__init__.py +3 -0
- agno/db/postgres/postgres.py +1710 -0
- agno/db/postgres/schemas.py +124 -0
- agno/db/postgres/utils.py +280 -0
- agno/db/redis/__init__.py +3 -0
- agno/db/redis/redis.py +1367 -0
- agno/db/redis/schemas.py +109 -0
- agno/db/redis/utils.py +288 -0
- agno/db/schemas/__init__.py +3 -0
- agno/db/schemas/evals.py +33 -0
- agno/db/schemas/knowledge.py +40 -0
- agno/db/schemas/memory.py +46 -0
- agno/db/singlestore/__init__.py +3 -0
- agno/db/singlestore/schemas.py +116 -0
- agno/db/singlestore/singlestore.py +1712 -0
- agno/db/singlestore/utils.py +326 -0
- agno/db/sqlite/__init__.py +3 -0
- agno/db/sqlite/schemas.py +119 -0
- agno/db/sqlite/sqlite.py +1676 -0
- agno/db/sqlite/utils.py +268 -0
- agno/db/utils.py +88 -0
- agno/eval/__init__.py +14 -0
- agno/eval/accuracy.py +154 -48
- agno/eval/performance.py +88 -23
- agno/eval/reliability.py +73 -20
- agno/eval/utils.py +23 -13
- agno/integrations/discord/__init__.py +3 -0
- agno/{app → integrations}/discord/client.py +15 -11
- agno/knowledge/__init__.py +2 -2
- agno/{document → knowledge}/chunking/agentic.py +2 -2
- agno/{document → knowledge}/chunking/document.py +2 -2
- agno/{document → knowledge}/chunking/fixed.py +3 -3
- agno/{document → knowledge}/chunking/markdown.py +2 -2
- agno/{document → knowledge}/chunking/recursive.py +2 -2
- agno/{document → knowledge}/chunking/row.py +2 -2
- agno/knowledge/chunking/semantic.py +59 -0
- agno/knowledge/chunking/strategy.py +121 -0
- agno/knowledge/content.py +74 -0
- agno/knowledge/document/__init__.py +5 -0
- agno/{document → knowledge/document}/base.py +12 -2
- agno/knowledge/embedder/__init__.py +5 -0
- agno/{embedder → knowledge/embedder}/aws_bedrock.py +127 -1
- agno/{embedder → knowledge/embedder}/azure_openai.py +65 -1
- agno/{embedder → knowledge/embedder}/base.py +6 -0
- agno/{embedder → knowledge/embedder}/cohere.py +72 -1
- agno/{embedder → knowledge/embedder}/fastembed.py +17 -1
- agno/{embedder → knowledge/embedder}/fireworks.py +1 -1
- agno/{embedder → knowledge/embedder}/google.py +74 -1
- agno/{embedder → knowledge/embedder}/huggingface.py +36 -2
- agno/{embedder → knowledge/embedder}/jina.py +48 -2
- agno/knowledge/embedder/langdb.py +22 -0
- agno/knowledge/embedder/mistral.py +139 -0
- agno/{embedder → knowledge/embedder}/nebius.py +1 -1
- agno/{embedder → knowledge/embedder}/ollama.py +54 -3
- agno/knowledge/embedder/openai.py +223 -0
- agno/{embedder → knowledge/embedder}/sentence_transformer.py +16 -1
- agno/{embedder → knowledge/embedder}/together.py +1 -1
- agno/{embedder → knowledge/embedder}/voyageai.py +49 -1
- agno/knowledge/knowledge.py +1551 -0
- agno/knowledge/reader/__init__.py +7 -0
- agno/{document → knowledge}/reader/arxiv_reader.py +32 -4
- agno/knowledge/reader/base.py +88 -0
- agno/{document → knowledge}/reader/csv_reader.py +47 -65
- agno/knowledge/reader/docx_reader.py +83 -0
- agno/{document → knowledge}/reader/firecrawl_reader.py +42 -21
- agno/{document → knowledge}/reader/json_reader.py +30 -9
- agno/{document → knowledge}/reader/markdown_reader.py +58 -9
- agno/{document → knowledge}/reader/pdf_reader.py +71 -126
- agno/knowledge/reader/reader_factory.py +268 -0
- agno/knowledge/reader/s3_reader.py +101 -0
- agno/{document → knowledge}/reader/text_reader.py +31 -10
- agno/knowledge/reader/url_reader.py +128 -0
- agno/knowledge/reader/web_search_reader.py +366 -0
- agno/{document → knowledge}/reader/website_reader.py +37 -10
- agno/knowledge/reader/wikipedia_reader.py +59 -0
- agno/knowledge/reader/youtube_reader.py +78 -0
- agno/knowledge/remote_content/remote_content.py +88 -0
- agno/{reranker → knowledge/reranker}/base.py +1 -1
- agno/{reranker → knowledge/reranker}/cohere.py +2 -2
- agno/{reranker → knowledge/reranker}/infinity.py +2 -2
- agno/{reranker → knowledge/reranker}/sentence_transformer.py +2 -2
- agno/knowledge/types.py +30 -0
- agno/knowledge/utils.py +169 -0
- agno/media.py +269 -268
- agno/memory/__init__.py +2 -10
- agno/memory/manager.py +1003 -148
- agno/models/aimlapi/__init__.py +2 -2
- agno/models/aimlapi/aimlapi.py +6 -6
- agno/models/anthropic/claude.py +131 -131
- agno/models/aws/bedrock.py +110 -182
- agno/models/aws/claude.py +64 -18
- agno/models/azure/ai_foundry.py +73 -23
- agno/models/base.py +346 -290
- agno/models/cerebras/cerebras.py +84 -27
- agno/models/cohere/chat.py +106 -98
- agno/models/google/gemini.py +105 -46
- agno/models/groq/groq.py +97 -35
- agno/models/huggingface/huggingface.py +92 -27
- agno/models/ibm/watsonx.py +72 -13
- agno/models/litellm/chat.py +85 -13
- agno/models/message.py +46 -151
- agno/models/meta/llama.py +85 -49
- agno/models/metrics.py +120 -0
- agno/models/mistral/mistral.py +90 -21
- agno/models/ollama/__init__.py +0 -2
- agno/models/ollama/chat.py +85 -47
- agno/models/openai/chat.py +154 -37
- agno/models/openai/responses.py +178 -105
- agno/models/perplexity/perplexity.py +26 -2
- agno/models/portkey/portkey.py +0 -7
- agno/models/response.py +15 -9
- agno/models/utils.py +20 -0
- agno/models/vercel/__init__.py +2 -2
- agno/models/vercel/v0.py +1 -1
- agno/models/vllm/__init__.py +2 -2
- agno/models/vllm/vllm.py +3 -3
- agno/models/xai/xai.py +10 -10
- agno/os/__init__.py +3 -0
- agno/os/app.py +497 -0
- agno/os/auth.py +47 -0
- agno/os/config.py +103 -0
- agno/os/interfaces/agui/__init__.py +3 -0
- agno/os/interfaces/agui/agui.py +31 -0
- agno/{app/agui/async_router.py → os/interfaces/agui/router.py} +16 -16
- agno/{app → os/interfaces}/agui/utils.py +77 -33
- agno/os/interfaces/base.py +21 -0
- agno/os/interfaces/slack/__init__.py +3 -0
- agno/{app/slack/async_router.py → os/interfaces/slack/router.py} +3 -5
- agno/os/interfaces/slack/slack.py +32 -0
- agno/os/interfaces/whatsapp/__init__.py +3 -0
- agno/{app/whatsapp/async_router.py → os/interfaces/whatsapp/router.py} +4 -7
- agno/os/interfaces/whatsapp/whatsapp.py +29 -0
- agno/os/mcp.py +235 -0
- agno/os/router.py +1400 -0
- agno/os/routers/__init__.py +3 -0
- agno/os/routers/evals/__init__.py +3 -0
- agno/os/routers/evals/evals.py +393 -0
- agno/os/routers/evals/schemas.py +142 -0
- agno/os/routers/evals/utils.py +161 -0
- agno/os/routers/knowledge/__init__.py +3 -0
- agno/os/routers/knowledge/knowledge.py +850 -0
- agno/os/routers/knowledge/schemas.py +118 -0
- agno/os/routers/memory/__init__.py +3 -0
- agno/os/routers/memory/memory.py +410 -0
- agno/os/routers/memory/schemas.py +58 -0
- agno/os/routers/metrics/__init__.py +3 -0
- agno/os/routers/metrics/metrics.py +178 -0
- agno/os/routers/metrics/schemas.py +47 -0
- agno/os/routers/session/__init__.py +3 -0
- agno/os/routers/session/session.py +536 -0
- agno/os/schema.py +945 -0
- agno/{app/playground → os}/settings.py +7 -15
- agno/os/utils.py +270 -0
- agno/reasoning/azure_ai_foundry.py +4 -4
- agno/reasoning/deepseek.py +4 -4
- agno/reasoning/default.py +6 -11
- agno/reasoning/groq.py +4 -4
- agno/reasoning/helpers.py +4 -6
- agno/reasoning/ollama.py +4 -4
- agno/reasoning/openai.py +4 -4
- agno/run/agent.py +633 -0
- agno/run/base.py +53 -77
- agno/run/cancel.py +81 -0
- agno/run/team.py +243 -96
- agno/run/workflow.py +550 -12
- agno/session/__init__.py +10 -0
- agno/session/agent.py +244 -0
- agno/session/summary.py +225 -0
- agno/session/team.py +262 -0
- agno/{storage/session/v2 → session}/workflow.py +47 -24
- agno/team/__init__.py +15 -16
- agno/team/team.py +3260 -4824
- agno/tools/agentql.py +14 -5
- agno/tools/airflow.py +9 -4
- agno/tools/api.py +7 -3
- agno/tools/apify.py +2 -46
- agno/tools/arxiv.py +8 -3
- agno/tools/aws_lambda.py +7 -5
- agno/tools/aws_ses.py +7 -1
- agno/tools/baidusearch.py +4 -1
- agno/tools/bitbucket.py +4 -4
- agno/tools/brandfetch.py +14 -11
- agno/tools/bravesearch.py +4 -1
- agno/tools/brightdata.py +43 -23
- agno/tools/browserbase.py +13 -4
- agno/tools/calcom.py +12 -10
- agno/tools/calculator.py +10 -27
- agno/tools/cartesia.py +20 -17
- agno/tools/{clickup_tool.py → clickup.py} +12 -25
- agno/tools/confluence.py +8 -8
- agno/tools/crawl4ai.py +7 -1
- agno/tools/csv_toolkit.py +9 -8
- agno/tools/dalle.py +22 -12
- agno/tools/daytona.py +13 -16
- agno/tools/decorator.py +6 -3
- agno/tools/desi_vocal.py +17 -8
- agno/tools/discord.py +11 -8
- agno/tools/docker.py +30 -42
- agno/tools/duckdb.py +34 -53
- agno/tools/duckduckgo.py +8 -7
- agno/tools/e2b.py +62 -62
- agno/tools/eleven_labs.py +36 -29
- agno/tools/email.py +4 -1
- agno/tools/evm.py +7 -1
- agno/tools/exa.py +19 -14
- agno/tools/fal.py +30 -30
- agno/tools/file.py +9 -8
- agno/tools/financial_datasets.py +25 -44
- agno/tools/firecrawl.py +22 -22
- agno/tools/function.py +127 -18
- agno/tools/giphy.py +23 -11
- agno/tools/github.py +48 -126
- agno/tools/gmail.py +45 -61
- agno/tools/google_bigquery.py +7 -6
- agno/tools/google_maps.py +11 -26
- agno/tools/googlesearch.py +7 -2
- agno/tools/googlesheets.py +21 -17
- agno/tools/hackernews.py +9 -5
- agno/tools/jina.py +5 -4
- agno/tools/jira.py +18 -9
- agno/tools/knowledge.py +31 -32
- agno/tools/linear.py +19 -34
- agno/tools/linkup.py +5 -1
- agno/tools/local_file_system.py +8 -5
- agno/tools/lumalab.py +32 -20
- agno/tools/mcp.py +1 -2
- agno/tools/mem0.py +18 -12
- agno/tools/memori.py +14 -10
- agno/tools/mlx_transcribe.py +3 -2
- agno/tools/models/azure_openai.py +33 -15
- agno/tools/models/gemini.py +59 -32
- agno/tools/models/groq.py +30 -23
- agno/tools/models/nebius.py +28 -12
- agno/tools/models_labs.py +40 -16
- agno/tools/moviepy_video.py +7 -6
- agno/tools/neo4j.py +10 -8
- agno/tools/newspaper.py +7 -2
- agno/tools/newspaper4k.py +8 -3
- agno/tools/openai.py +58 -32
- agno/tools/openbb.py +12 -11
- agno/tools/opencv.py +63 -47
- agno/tools/openweather.py +14 -12
- agno/tools/pandas.py +11 -3
- agno/tools/postgres.py +4 -12
- agno/tools/pubmed.py +4 -1
- agno/tools/python.py +9 -22
- agno/tools/reasoning.py +35 -27
- agno/tools/reddit.py +11 -26
- agno/tools/replicate.py +55 -42
- agno/tools/resend.py +4 -1
- agno/tools/scrapegraph.py +15 -14
- agno/tools/searxng.py +10 -23
- agno/tools/serpapi.py +6 -3
- agno/tools/serper.py +13 -4
- agno/tools/shell.py +9 -2
- agno/tools/slack.py +12 -11
- agno/tools/sleep.py +3 -2
- agno/tools/spider.py +24 -4
- agno/tools/sql.py +7 -6
- agno/tools/tavily.py +6 -4
- agno/tools/telegram.py +12 -4
- agno/tools/todoist.py +11 -31
- agno/tools/toolkit.py +1 -1
- agno/tools/trafilatura.py +22 -6
- agno/tools/trello.py +9 -22
- agno/tools/twilio.py +10 -3
- agno/tools/user_control_flow.py +6 -1
- agno/tools/valyu.py +34 -5
- agno/tools/visualization.py +19 -28
- agno/tools/webbrowser.py +4 -3
- agno/tools/webex.py +11 -7
- agno/tools/website.py +15 -46
- agno/tools/webtools.py +12 -4
- agno/tools/whatsapp.py +5 -9
- agno/tools/wikipedia.py +20 -13
- agno/tools/x.py +14 -13
- agno/tools/yfinance.py +13 -40
- agno/tools/youtube.py +26 -20
- agno/tools/zendesk.py +7 -2
- agno/tools/zep.py +10 -7
- agno/tools/zoom.py +10 -9
- agno/utils/common.py +1 -19
- agno/utils/events.py +100 -123
- agno/utils/gemini.py +32 -2
- agno/utils/knowledge.py +29 -0
- agno/utils/log.py +54 -4
- agno/utils/mcp.py +68 -10
- agno/utils/media.py +39 -0
- agno/utils/message.py +12 -1
- agno/utils/models/aws_claude.py +1 -1
- agno/utils/models/claude.py +47 -4
- agno/utils/models/cohere.py +1 -1
- agno/utils/models/mistral.py +8 -7
- agno/utils/models/schema_utils.py +3 -3
- agno/utils/models/watsonx.py +1 -1
- agno/utils/openai.py +1 -1
- agno/utils/pprint.py +33 -32
- agno/utils/print_response/agent.py +779 -0
- agno/utils/print_response/team.py +1669 -0
- agno/utils/print_response/workflow.py +1451 -0
- agno/utils/prompts.py +14 -14
- agno/utils/reasoning.py +87 -0
- agno/utils/response.py +42 -42
- agno/utils/streamlit.py +481 -0
- agno/utils/string.py +8 -22
- agno/utils/team.py +50 -0
- agno/utils/timer.py +2 -2
- agno/vectordb/base.py +33 -21
- agno/vectordb/cassandra/cassandra.py +287 -23
- agno/vectordb/chroma/chromadb.py +482 -59
- agno/vectordb/clickhouse/clickhousedb.py +270 -63
- agno/vectordb/couchbase/couchbase.py +309 -29
- agno/vectordb/lancedb/lance_db.py +360 -21
- agno/vectordb/langchaindb/__init__.py +5 -0
- agno/vectordb/langchaindb/langchaindb.py +145 -0
- agno/vectordb/lightrag/__init__.py +5 -0
- agno/vectordb/lightrag/lightrag.py +374 -0
- agno/vectordb/llamaindex/llamaindexdb.py +127 -0
- agno/vectordb/milvus/milvus.py +242 -32
- agno/vectordb/mongodb/mongodb.py +200 -24
- agno/vectordb/pgvector/pgvector.py +319 -37
- agno/vectordb/pineconedb/pineconedb.py +221 -27
- agno/vectordb/qdrant/qdrant.py +334 -14
- agno/vectordb/singlestore/singlestore.py +286 -29
- agno/vectordb/surrealdb/surrealdb.py +187 -7
- agno/vectordb/upstashdb/upstashdb.py +342 -26
- agno/vectordb/weaviate/weaviate.py +227 -165
- agno/workflow/__init__.py +17 -13
- agno/workflow/{v2/condition.py → condition.py} +135 -32
- agno/workflow/{v2/loop.py → loop.py} +115 -28
- agno/workflow/{v2/parallel.py → parallel.py} +138 -108
- agno/workflow/{v2/router.py → router.py} +133 -32
- agno/workflow/{v2/step.py → step.py} +207 -49
- agno/workflow/{v2/steps.py → steps.py} +147 -66
- agno/workflow/types.py +482 -0
- agno/workflow/workflow.py +2410 -696
- agno-2.0.0.dist-info/METADATA +494 -0
- agno-2.0.0.dist-info/RECORD +515 -0
- agno-2.0.0.dist-info/licenses/LICENSE +201 -0
- agno/agent/metrics.py +0 -107
- agno/api/app.py +0 -35
- agno/api/playground.py +0 -92
- agno/api/schemas/app.py +0 -12
- agno/api/schemas/playground.py +0 -22
- agno/api/schemas/user.py +0 -35
- agno/api/schemas/workspace.py +0 -46
- agno/api/user.py +0 -160
- agno/api/workflows.py +0 -33
- agno/api/workspace.py +0 -175
- agno/app/agui/__init__.py +0 -3
- agno/app/agui/app.py +0 -17
- agno/app/agui/sync_router.py +0 -120
- agno/app/base.py +0 -186
- agno/app/discord/__init__.py +0 -3
- agno/app/fastapi/__init__.py +0 -3
- agno/app/fastapi/app.py +0 -107
- agno/app/fastapi/async_router.py +0 -457
- agno/app/fastapi/sync_router.py +0 -448
- agno/app/playground/app.py +0 -228
- agno/app/playground/async_router.py +0 -1050
- agno/app/playground/deploy.py +0 -249
- agno/app/playground/operator.py +0 -183
- agno/app/playground/schemas.py +0 -220
- agno/app/playground/serve.py +0 -55
- agno/app/playground/sync_router.py +0 -1042
- agno/app/playground/utils.py +0 -46
- agno/app/settings.py +0 -15
- agno/app/slack/__init__.py +0 -3
- agno/app/slack/app.py +0 -19
- agno/app/slack/sync_router.py +0 -92
- agno/app/utils.py +0 -54
- agno/app/whatsapp/__init__.py +0 -3
- agno/app/whatsapp/app.py +0 -15
- agno/app/whatsapp/sync_router.py +0 -197
- agno/cli/auth_server.py +0 -249
- agno/cli/config.py +0 -274
- agno/cli/console.py +0 -88
- agno/cli/credentials.py +0 -23
- agno/cli/entrypoint.py +0 -571
- agno/cli/operator.py +0 -357
- agno/cli/settings.py +0 -96
- agno/cli/ws/ws_cli.py +0 -817
- agno/constants.py +0 -13
- agno/document/__init__.py +0 -5
- agno/document/chunking/semantic.py +0 -45
- agno/document/chunking/strategy.py +0 -31
- agno/document/reader/__init__.py +0 -5
- agno/document/reader/base.py +0 -47
- agno/document/reader/docx_reader.py +0 -60
- agno/document/reader/gcs/pdf_reader.py +0 -44
- agno/document/reader/s3/pdf_reader.py +0 -59
- agno/document/reader/s3/text_reader.py +0 -63
- agno/document/reader/url_reader.py +0 -59
- agno/document/reader/youtube_reader.py +0 -58
- agno/embedder/__init__.py +0 -5
- agno/embedder/langdb.py +0 -80
- agno/embedder/mistral.py +0 -82
- agno/embedder/openai.py +0 -78
- agno/file/__init__.py +0 -5
- agno/file/file.py +0 -16
- agno/file/local/csv.py +0 -32
- agno/file/local/txt.py +0 -19
- agno/infra/app.py +0 -240
- agno/infra/base.py +0 -144
- agno/infra/context.py +0 -20
- agno/infra/db_app.py +0 -52
- agno/infra/resource.py +0 -205
- agno/infra/resources.py +0 -55
- agno/knowledge/agent.py +0 -702
- agno/knowledge/arxiv.py +0 -33
- agno/knowledge/combined.py +0 -36
- agno/knowledge/csv.py +0 -144
- agno/knowledge/csv_url.py +0 -124
- agno/knowledge/document.py +0 -223
- agno/knowledge/docx.py +0 -137
- agno/knowledge/firecrawl.py +0 -34
- agno/knowledge/gcs/__init__.py +0 -0
- agno/knowledge/gcs/base.py +0 -39
- agno/knowledge/gcs/pdf.py +0 -125
- agno/knowledge/json.py +0 -137
- agno/knowledge/langchain.py +0 -71
- agno/knowledge/light_rag.py +0 -273
- agno/knowledge/llamaindex.py +0 -66
- agno/knowledge/markdown.py +0 -154
- agno/knowledge/pdf.py +0 -164
- agno/knowledge/pdf_bytes.py +0 -42
- agno/knowledge/pdf_url.py +0 -148
- agno/knowledge/s3/__init__.py +0 -0
- agno/knowledge/s3/base.py +0 -64
- agno/knowledge/s3/pdf.py +0 -33
- agno/knowledge/s3/text.py +0 -34
- agno/knowledge/text.py +0 -141
- agno/knowledge/url.py +0 -46
- agno/knowledge/website.py +0 -179
- agno/knowledge/wikipedia.py +0 -32
- agno/knowledge/youtube.py +0 -35
- agno/memory/agent.py +0 -423
- agno/memory/classifier.py +0 -104
- agno/memory/db/__init__.py +0 -5
- agno/memory/db/base.py +0 -42
- agno/memory/db/mongodb.py +0 -189
- agno/memory/db/postgres.py +0 -203
- agno/memory/db/sqlite.py +0 -193
- agno/memory/memory.py +0 -22
- agno/memory/row.py +0 -36
- agno/memory/summarizer.py +0 -201
- agno/memory/summary.py +0 -19
- agno/memory/team.py +0 -415
- agno/memory/v2/__init__.py +0 -2
- agno/memory/v2/db/__init__.py +0 -1
- agno/memory/v2/db/base.py +0 -42
- agno/memory/v2/db/firestore.py +0 -339
- agno/memory/v2/db/mongodb.py +0 -196
- agno/memory/v2/db/postgres.py +0 -214
- agno/memory/v2/db/redis.py +0 -187
- agno/memory/v2/db/schema.py +0 -54
- agno/memory/v2/db/sqlite.py +0 -209
- agno/memory/v2/manager.py +0 -437
- agno/memory/v2/memory.py +0 -1097
- agno/memory/v2/schema.py +0 -55
- agno/memory/v2/summarizer.py +0 -215
- agno/memory/workflow.py +0 -38
- agno/models/ollama/tools.py +0 -430
- agno/models/qwen/__init__.py +0 -5
- agno/playground/__init__.py +0 -10
- agno/playground/deploy.py +0 -3
- agno/playground/playground.py +0 -3
- agno/playground/serve.py +0 -3
- agno/playground/settings.py +0 -3
- agno/reranker/__init__.py +0 -0
- agno/run/response.py +0 -467
- agno/run/v2/__init__.py +0 -0
- agno/run/v2/workflow.py +0 -567
- agno/storage/__init__.py +0 -0
- agno/storage/agent/__init__.py +0 -0
- agno/storage/agent/dynamodb.py +0 -1
- agno/storage/agent/json.py +0 -1
- agno/storage/agent/mongodb.py +0 -1
- agno/storage/agent/postgres.py +0 -1
- agno/storage/agent/singlestore.py +0 -1
- agno/storage/agent/sqlite.py +0 -1
- agno/storage/agent/yaml.py +0 -1
- agno/storage/base.py +0 -60
- agno/storage/dynamodb.py +0 -673
- agno/storage/firestore.py +0 -297
- agno/storage/gcs_json.py +0 -261
- agno/storage/in_memory.py +0 -234
- agno/storage/json.py +0 -237
- agno/storage/mongodb.py +0 -328
- agno/storage/mysql.py +0 -685
- agno/storage/postgres.py +0 -682
- agno/storage/redis.py +0 -336
- agno/storage/session/__init__.py +0 -16
- agno/storage/session/agent.py +0 -64
- agno/storage/session/team.py +0 -63
- agno/storage/session/v2/__init__.py +0 -5
- agno/storage/session/workflow.py +0 -61
- agno/storage/singlestore.py +0 -606
- agno/storage/sqlite.py +0 -646
- agno/storage/workflow/__init__.py +0 -0
- agno/storage/workflow/mongodb.py +0 -1
- agno/storage/workflow/postgres.py +0 -1
- agno/storage/workflow/sqlite.py +0 -1
- agno/storage/yaml.py +0 -241
- agno/tools/thinking.py +0 -73
- agno/utils/defaults.py +0 -57
- agno/utils/filesystem.py +0 -39
- agno/utils/git.py +0 -52
- agno/utils/json_io.py +0 -30
- agno/utils/load_env.py +0 -19
- agno/utils/py_io.py +0 -19
- agno/utils/pyproject.py +0 -18
- agno/utils/resource_filter.py +0 -31
- agno/workflow/v2/__init__.py +0 -21
- agno/workflow/v2/types.py +0 -357
- agno/workflow/v2/workflow.py +0 -3312
- agno/workspace/__init__.py +0 -0
- agno/workspace/config.py +0 -325
- agno/workspace/enums.py +0 -6
- agno/workspace/helpers.py +0 -52
- agno/workspace/operator.py +0 -757
- agno/workspace/settings.py +0 -158
- agno-1.8.1.dist-info/METADATA +0 -982
- agno-1.8.1.dist-info/RECORD +0 -566
- agno-1.8.1.dist-info/entry_points.txt +0 -3
- agno-1.8.1.dist-info/licenses/LICENSE +0 -375
- /agno/{app → db/migrations}/__init__.py +0 -0
- /agno/{app/playground/__init__.py → db/schemas/metrics.py} +0 -0
- /agno/{cli → integrations}/__init__.py +0 -0
- /agno/{cli/ws → knowledge/chunking}/__init__.py +0 -0
- /agno/{document/chunking → knowledge/remote_content}/__init__.py +0 -0
- /agno/{document/reader/gcs → knowledge/reranker}/__init__.py +0 -0
- /agno/{document/reader/s3 → os/interfaces}/__init__.py +0 -0
- /agno/{app → os/interfaces}/slack/security.py +0 -0
- /agno/{app → os/interfaces}/whatsapp/security.py +0 -0
- /agno/{file/local → utils/print_response}/__init__.py +0 -0
- /agno/{infra → vectordb/llamaindex}/__init__.py +0 -0
- {agno-1.8.1.dist-info → agno-2.0.0.dist-info}/WHEEL +0 -0
- {agno-1.8.1.dist-info → agno-2.0.0.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,101 @@
|
|
|
1
|
+
import asyncio
|
|
2
|
+
from io import BytesIO
|
|
3
|
+
from pathlib import Path
|
|
4
|
+
from typing import List, Optional
|
|
5
|
+
|
|
6
|
+
from agno.knowledge.chunking.fixed import FixedSizeChunking
|
|
7
|
+
from agno.knowledge.chunking.strategy import ChunkingStrategy, ChunkingStrategyType
|
|
8
|
+
from agno.knowledge.document.base import Document
|
|
9
|
+
from agno.knowledge.reader.base import Reader
|
|
10
|
+
from agno.knowledge.reader.pdf_reader import PDFReader
|
|
11
|
+
from agno.knowledge.reader.text_reader import TextReader
|
|
12
|
+
from agno.knowledge.types import ContentType
|
|
13
|
+
from agno.utils.log import log_info, logger
|
|
14
|
+
|
|
15
|
+
try:
|
|
16
|
+
from agno.aws.resource.s3.object import S3Object # type: ignore
|
|
17
|
+
except (ModuleNotFoundError, ImportError):
|
|
18
|
+
raise ImportError("`agno-aws` not installed. Please install using `pip install agno-aws`")
|
|
19
|
+
|
|
20
|
+
try:
|
|
21
|
+
import textract # noqa: F401
|
|
22
|
+
except ImportError:
|
|
23
|
+
raise ImportError("`textract` not installed. Please install it via `pip install textract`.")
|
|
24
|
+
|
|
25
|
+
try:
|
|
26
|
+
from pypdf import PdfReader as DocumentReader # noqa: F401
|
|
27
|
+
except ImportError:
|
|
28
|
+
raise ImportError("`pypdf` not installed. Please install it via `pip install pypdf`.")
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
class S3Reader(Reader):
|
|
32
|
+
"""Reader for S3 files"""
|
|
33
|
+
|
|
34
|
+
def __init__(self, chunking_strategy: Optional[ChunkingStrategy] = FixedSizeChunking(), **kwargs):
|
|
35
|
+
super().__init__(chunking_strategy=chunking_strategy, **kwargs)
|
|
36
|
+
|
|
37
|
+
@classmethod
|
|
38
|
+
def get_supported_chunking_strategies(self) -> List[ChunkingStrategyType]:
|
|
39
|
+
"""Get the list of supported chunking strategies for S3 readers."""
|
|
40
|
+
return [
|
|
41
|
+
ChunkingStrategyType.FIXED_SIZE_CHUNKER,
|
|
42
|
+
ChunkingStrategyType.AGENTIC_CHUNKER,
|
|
43
|
+
ChunkingStrategyType.DOCUMENT_CHUNKER,
|
|
44
|
+
ChunkingStrategyType.RECURSIVE_CHUNKER,
|
|
45
|
+
ChunkingStrategyType.SEMANTIC_CHUNKER,
|
|
46
|
+
]
|
|
47
|
+
|
|
48
|
+
@classmethod
|
|
49
|
+
def get_supported_content_types(self) -> List[ContentType]:
|
|
50
|
+
return [ContentType.FILE, ContentType.URL, ContentType.TEXT]
|
|
51
|
+
|
|
52
|
+
def read(self, name: Optional[str], s3_object: S3Object) -> List[Document]:
|
|
53
|
+
try:
|
|
54
|
+
log_info(f"Reading S3 file: {s3_object.uri}")
|
|
55
|
+
|
|
56
|
+
# Read PDF files
|
|
57
|
+
if s3_object.uri.endswith(".pdf"):
|
|
58
|
+
object_resource = s3_object.get_resource()
|
|
59
|
+
object_body = object_resource.get()["Body"]
|
|
60
|
+
doc_name = (
|
|
61
|
+
s3_object.name.split("/")[-1].split(".")[0].replace("/", "_").replace(" ", "_")
|
|
62
|
+
if name is None
|
|
63
|
+
else name
|
|
64
|
+
)
|
|
65
|
+
return PDFReader().read(pdf=BytesIO(object_body.read()), name=doc_name)
|
|
66
|
+
|
|
67
|
+
# Read text files
|
|
68
|
+
else:
|
|
69
|
+
doc_name = (
|
|
70
|
+
s3_object.name.split("/")[-1].split(".")[0].replace("/", "_").replace(" ", "_")
|
|
71
|
+
if name is None
|
|
72
|
+
else name
|
|
73
|
+
)
|
|
74
|
+
obj_name = s3_object.name.split("/")[-1]
|
|
75
|
+
temporary_file = Path("storage").joinpath(obj_name)
|
|
76
|
+
s3_object.download(temporary_file)
|
|
77
|
+
|
|
78
|
+
# TODO: Before we were using textract here. Needed?
|
|
79
|
+
# s3_object.download(temporary_file)
|
|
80
|
+
# doc_content = textract.process(temporary_file)
|
|
81
|
+
# documents = [
|
|
82
|
+
# Document(
|
|
83
|
+
# name=doc_name,
|
|
84
|
+
# id=doc_name,
|
|
85
|
+
# content=doc_content.decode("utf-8"),
|
|
86
|
+
# )
|
|
87
|
+
# ]
|
|
88
|
+
|
|
89
|
+
documents = TextReader().read(file=temporary_file, name=doc_name)
|
|
90
|
+
|
|
91
|
+
temporary_file.unlink()
|
|
92
|
+
return documents
|
|
93
|
+
|
|
94
|
+
except Exception as e:
|
|
95
|
+
logger.error(f"Error reading: {s3_object.uri}: {e}")
|
|
96
|
+
|
|
97
|
+
return []
|
|
98
|
+
|
|
99
|
+
async def async_read(self, name: Optional[str], s3_object: S3Object) -> List[Document]:
|
|
100
|
+
"""Asynchronously read S3 files by running the synchronous read operation in a thread."""
|
|
101
|
+
return await asyncio.to_thread(self.read, name, s3_object)
|
|
@@ -1,27 +1,48 @@
|
|
|
1
1
|
import asyncio
|
|
2
2
|
import uuid
|
|
3
3
|
from pathlib import Path
|
|
4
|
-
from typing import IO, Any, List, Union
|
|
4
|
+
from typing import IO, Any, List, Optional, Union
|
|
5
5
|
|
|
6
|
-
from agno.
|
|
7
|
-
from agno.
|
|
6
|
+
from agno.knowledge.chunking.fixed import FixedSizeChunking
|
|
7
|
+
from agno.knowledge.chunking.strategy import ChunkingStrategy, ChunkingStrategyType
|
|
8
|
+
from agno.knowledge.document.base import Document
|
|
9
|
+
from agno.knowledge.reader.base import Reader
|
|
10
|
+
from agno.knowledge.types import ContentType
|
|
8
11
|
from agno.utils.log import log_info, logger
|
|
9
12
|
|
|
10
13
|
|
|
11
14
|
class TextReader(Reader):
|
|
12
15
|
"""Reader for Text files"""
|
|
13
16
|
|
|
14
|
-
def
|
|
17
|
+
def __init__(self, chunking_strategy: Optional[ChunkingStrategy] = FixedSizeChunking(), **kwargs):
|
|
18
|
+
super().__init__(chunking_strategy=chunking_strategy, **kwargs)
|
|
19
|
+
|
|
20
|
+
@classmethod
|
|
21
|
+
def get_supported_chunking_strategies(self) -> List[ChunkingStrategyType]:
|
|
22
|
+
"""Get the list of supported chunking strategies for Text readers."""
|
|
23
|
+
return [
|
|
24
|
+
ChunkingStrategyType.FIXED_SIZE_CHUNKER,
|
|
25
|
+
ChunkingStrategyType.AGENTIC_CHUNKER,
|
|
26
|
+
ChunkingStrategyType.DOCUMENT_CHUNKER,
|
|
27
|
+
ChunkingStrategyType.RECURSIVE_CHUNKER,
|
|
28
|
+
ChunkingStrategyType.SEMANTIC_CHUNKER,
|
|
29
|
+
]
|
|
30
|
+
|
|
31
|
+
@classmethod
|
|
32
|
+
def get_supported_content_types(self) -> List[ContentType]:
|
|
33
|
+
return [ContentType.TXT]
|
|
34
|
+
|
|
35
|
+
def read(self, file: Union[Path, IO[Any]], name: Optional[str] = None) -> List[Document]:
|
|
15
36
|
try:
|
|
16
37
|
if isinstance(file, Path):
|
|
17
38
|
if not file.exists():
|
|
18
39
|
raise FileNotFoundError(f"Could not find file: {file}")
|
|
19
40
|
log_info(f"Reading: {file}")
|
|
20
|
-
file_name = file.stem
|
|
41
|
+
file_name = name or file.stem
|
|
21
42
|
file_contents = file.read_text("utf-8")
|
|
22
43
|
else:
|
|
23
|
-
|
|
24
|
-
|
|
44
|
+
file_name = name or file.name.split(".")[0]
|
|
45
|
+
log_info(f"Reading uploaded file: {file_name}")
|
|
25
46
|
file.seek(0)
|
|
26
47
|
file_contents = file.read().decode("utf-8")
|
|
27
48
|
|
|
@@ -42,14 +63,14 @@ class TextReader(Reader):
|
|
|
42
63
|
logger.error(f"Error reading: {file}: {e}")
|
|
43
64
|
return []
|
|
44
65
|
|
|
45
|
-
async def async_read(self, file: Union[Path, IO[Any]]) -> List[Document]:
|
|
66
|
+
async def async_read(self, file: Union[Path, IO[Any]], name: Optional[str] = None) -> List[Document]:
|
|
46
67
|
try:
|
|
47
68
|
if isinstance(file, Path):
|
|
48
69
|
if not file.exists():
|
|
49
70
|
raise FileNotFoundError(f"Could not find file: {file}")
|
|
50
71
|
|
|
51
72
|
log_info(f"Reading asynchronously: {file}")
|
|
52
|
-
file_name = file.stem
|
|
73
|
+
file_name = name or file.stem
|
|
53
74
|
|
|
54
75
|
try:
|
|
55
76
|
import aiofiles
|
|
@@ -61,7 +82,7 @@ class TextReader(Reader):
|
|
|
61
82
|
file_contents = file.read_text("utf-8")
|
|
62
83
|
else:
|
|
63
84
|
log_info(f"Reading uploaded file asynchronously: {file.name}")
|
|
64
|
-
file_name = file.name.split(".")[0]
|
|
85
|
+
file_name = name or file.name.split(".")[0]
|
|
65
86
|
file.seek(0)
|
|
66
87
|
file_contents = file.read().decode("utf-8")
|
|
67
88
|
|
|
@@ -0,0 +1,128 @@
|
|
|
1
|
+
from io import BytesIO
|
|
2
|
+
from os.path import basename
|
|
3
|
+
from pathlib import Path
|
|
4
|
+
from typing import List, Optional
|
|
5
|
+
from urllib.parse import urlparse
|
|
6
|
+
|
|
7
|
+
import httpx
|
|
8
|
+
|
|
9
|
+
from agno.knowledge.chunking.fixed import FixedSizeChunking
|
|
10
|
+
from agno.knowledge.chunking.strategy import ChunkingStrategy, ChunkingStrategyType
|
|
11
|
+
from agno.knowledge.document.base import Document
|
|
12
|
+
from agno.knowledge.reader.base import Reader
|
|
13
|
+
from agno.knowledge.reader.csv_reader import CSVReader
|
|
14
|
+
from agno.knowledge.reader.pdf_reader import PDFReader
|
|
15
|
+
from agno.knowledge.types import ContentType
|
|
16
|
+
from agno.utils.http import async_fetch_with_retry, fetch_with_retry
|
|
17
|
+
from agno.utils.log import log_debug
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
class URLReader(Reader):
|
|
21
|
+
"""Reader for general URL content"""
|
|
22
|
+
|
|
23
|
+
def __init__(
|
|
24
|
+
self, chunking_strategy: Optional[ChunkingStrategy] = FixedSizeChunking(), proxy: Optional[str] = None, **kwargs
|
|
25
|
+
):
|
|
26
|
+
super().__init__(chunking_strategy=chunking_strategy, **kwargs)
|
|
27
|
+
self.proxy = proxy
|
|
28
|
+
|
|
29
|
+
@classmethod
|
|
30
|
+
def get_supported_chunking_strategies(self) -> List[ChunkingStrategyType]:
|
|
31
|
+
"""Get the list of supported chunking strategies for URL readers."""
|
|
32
|
+
return [
|
|
33
|
+
ChunkingStrategyType.FIXED_SIZE_CHUNKER,
|
|
34
|
+
ChunkingStrategyType.AGENTIC_CHUNKER,
|
|
35
|
+
ChunkingStrategyType.DOCUMENT_CHUNKER,
|
|
36
|
+
ChunkingStrategyType.RECURSIVE_CHUNKER,
|
|
37
|
+
ChunkingStrategyType.SEMANTIC_CHUNKER,
|
|
38
|
+
]
|
|
39
|
+
|
|
40
|
+
@classmethod
|
|
41
|
+
def get_supported_content_types(self) -> List[ContentType]:
|
|
42
|
+
return [ContentType.URL]
|
|
43
|
+
|
|
44
|
+
def read(
|
|
45
|
+
self, url: str, id: Optional[str] = None, name: Optional[str] = None, password: Optional[str] = None
|
|
46
|
+
) -> List[Document]:
|
|
47
|
+
if not url:
|
|
48
|
+
raise ValueError("No url provided")
|
|
49
|
+
|
|
50
|
+
log_debug(f"Reading: {url}")
|
|
51
|
+
|
|
52
|
+
# Retry the request up to 3 times with exponential backoff
|
|
53
|
+
response = fetch_with_retry(url, proxy=self.proxy)
|
|
54
|
+
|
|
55
|
+
documents = self._create_documents(
|
|
56
|
+
url=url, text=response.text, content=response.content, id=id, name=name, password=password
|
|
57
|
+
)
|
|
58
|
+
|
|
59
|
+
if not self.chunk:
|
|
60
|
+
return documents
|
|
61
|
+
|
|
62
|
+
chunked_documents = []
|
|
63
|
+
for document in documents:
|
|
64
|
+
chunked_documents.append(self.chunk_document(document))
|
|
65
|
+
return [doc for sublist in chunked_documents for doc in sublist]
|
|
66
|
+
|
|
67
|
+
async def async_read(
|
|
68
|
+
self, url: str, id: Optional[str] = None, name: Optional[str] = None, password: Optional[str] = None
|
|
69
|
+
) -> List[Document]:
|
|
70
|
+
"""Async version of read method"""
|
|
71
|
+
if not url:
|
|
72
|
+
raise ValueError("No url provided")
|
|
73
|
+
|
|
74
|
+
log_debug(f"Reading async: {url}")
|
|
75
|
+
client_args = {"proxy": self.proxy} if self.proxy else {}
|
|
76
|
+
async with httpx.AsyncClient(**client_args) as client: # type: ignore
|
|
77
|
+
response = await async_fetch_with_retry(url, client=client)
|
|
78
|
+
|
|
79
|
+
documents = self._create_documents(
|
|
80
|
+
url=url, text=response.text, content=response.content, id=id, name=name, password=password
|
|
81
|
+
)
|
|
82
|
+
|
|
83
|
+
if not self.chunk:
|
|
84
|
+
return documents
|
|
85
|
+
|
|
86
|
+
return await self.chunk_documents_async(documents)
|
|
87
|
+
|
|
88
|
+
def _create_documents(
|
|
89
|
+
self,
|
|
90
|
+
url: str,
|
|
91
|
+
text: str,
|
|
92
|
+
content: bytes,
|
|
93
|
+
id: Optional[str] = None,
|
|
94
|
+
name: Optional[str] = None,
|
|
95
|
+
password: Optional[str] = None,
|
|
96
|
+
) -> List[Document]:
|
|
97
|
+
"""Helper method to create a document from URL content"""
|
|
98
|
+
|
|
99
|
+
# Determine file extension from URL
|
|
100
|
+
parsed_url = urlparse(url)
|
|
101
|
+
url_path = Path(parsed_url.path) # type: ignore
|
|
102
|
+
file_extension = url_path.suffix.lower()
|
|
103
|
+
|
|
104
|
+
# Read the document using the appropriate reader
|
|
105
|
+
if file_extension == ".csv":
|
|
106
|
+
filename = basename(parsed_url.path) or "data.csv"
|
|
107
|
+
return CSVReader().read(file=BytesIO(content), name=filename)
|
|
108
|
+
elif file_extension == ".pdf":
|
|
109
|
+
if password:
|
|
110
|
+
return PDFReader().read(pdf=BytesIO(content), name=name, password=password)
|
|
111
|
+
else:
|
|
112
|
+
return PDFReader().read(pdf=BytesIO(content), name=name)
|
|
113
|
+
else:
|
|
114
|
+
doc_name = name or parsed_url.path.strip("/").replace("/", "_").replace(" ", "_")
|
|
115
|
+
if not doc_name:
|
|
116
|
+
doc_name = parsed_url.netloc
|
|
117
|
+
if not doc_name:
|
|
118
|
+
doc_name = url
|
|
119
|
+
|
|
120
|
+
return [
|
|
121
|
+
Document(
|
|
122
|
+
name=doc_name,
|
|
123
|
+
id=id or doc_name,
|
|
124
|
+
meta_data={"url": url},
|
|
125
|
+
content=text,
|
|
126
|
+
size=len(text),
|
|
127
|
+
)
|
|
128
|
+
]
|
|
@@ -0,0 +1,366 @@
|
|
|
1
|
+
import asyncio
|
|
2
|
+
import random
|
|
3
|
+
import time
|
|
4
|
+
from dataclasses import dataclass, field
|
|
5
|
+
from typing import Dict, List, Literal, Optional, Set
|
|
6
|
+
from urllib.parse import urlparse
|
|
7
|
+
|
|
8
|
+
import httpx
|
|
9
|
+
|
|
10
|
+
from agno.knowledge.chunking.semantic import SemanticChunking
|
|
11
|
+
from agno.knowledge.chunking.strategy import ChunkingStrategy, ChunkingStrategyType
|
|
12
|
+
from agno.knowledge.document.base import Document
|
|
13
|
+
from agno.knowledge.reader.base import Reader
|
|
14
|
+
from agno.knowledge.types import ContentType
|
|
15
|
+
from agno.utils.log import log_debug, logger
|
|
16
|
+
|
|
17
|
+
try:
|
|
18
|
+
from bs4 import BeautifulSoup, Tag # noqa: F401
|
|
19
|
+
except ImportError:
|
|
20
|
+
raise ImportError("The `bs4` package is not installed. Please install it via `pip install beautifulsoup4`.")
|
|
21
|
+
|
|
22
|
+
try:
|
|
23
|
+
from ddgs import DDGS
|
|
24
|
+
except ImportError:
|
|
25
|
+
raise ImportError("The `ddgs` package is not installed. Please install it via `pip install ddgs`.")
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
@dataclass
|
|
29
|
+
class WebSearchReader(Reader):
|
|
30
|
+
"""Reader that uses web search to find content for a given query"""
|
|
31
|
+
|
|
32
|
+
search_timeout: int = 10
|
|
33
|
+
|
|
34
|
+
request_timeout: int = 30
|
|
35
|
+
delay_between_requests: float = 2.0 # Increased default delay
|
|
36
|
+
max_retries: int = 3
|
|
37
|
+
user_agent: str = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36"
|
|
38
|
+
|
|
39
|
+
# Search engine configuration
|
|
40
|
+
search_engine: Literal["duckduckgo", "google"] = "duckduckgo"
|
|
41
|
+
search_delay: float = 3.0 # Delay between search requests
|
|
42
|
+
max_search_retries: int = 2 # Retries for search operations
|
|
43
|
+
|
|
44
|
+
# Rate limiting
|
|
45
|
+
rate_limit_delay: float = 5.0 # Delay when rate limited
|
|
46
|
+
exponential_backoff: bool = True
|
|
47
|
+
|
|
48
|
+
# Internal state
|
|
49
|
+
_visited_urls: Set[str] = field(default_factory=set)
|
|
50
|
+
_last_search_time: float = field(default=0.0, init=False)
|
|
51
|
+
|
|
52
|
+
# Override default chunking strategy
|
|
53
|
+
chunking_strategy: Optional[ChunkingStrategy] = SemanticChunking()
|
|
54
|
+
|
|
55
|
+
@classmethod
|
|
56
|
+
def get_supported_chunking_strategies(self) -> List[ChunkingStrategyType]:
|
|
57
|
+
"""Get the list of supported chunking strategies for Web Search readers."""
|
|
58
|
+
return [
|
|
59
|
+
ChunkingStrategyType.AGENTIC_CHUNKER,
|
|
60
|
+
ChunkingStrategyType.DOCUMENT_CHUNKER,
|
|
61
|
+
ChunkingStrategyType.RECURSIVE_CHUNKER,
|
|
62
|
+
ChunkingStrategyType.SEMANTIC_CHUNKER,
|
|
63
|
+
ChunkingStrategyType.FIXED_SIZE_CHUNKER,
|
|
64
|
+
]
|
|
65
|
+
|
|
66
|
+
@classmethod
|
|
67
|
+
def get_supported_content_types(self) -> List[ContentType]:
|
|
68
|
+
return [ContentType.TOPIC]
|
|
69
|
+
|
|
70
|
+
def _respect_rate_limits(self):
|
|
71
|
+
"""Ensure we don't exceed rate limits"""
|
|
72
|
+
current_time = time.time()
|
|
73
|
+
time_since_last_search = current_time - self._last_search_time
|
|
74
|
+
|
|
75
|
+
if time_since_last_search < self.search_delay:
|
|
76
|
+
sleep_time = self.search_delay - time_since_last_search
|
|
77
|
+
log_debug(f"Rate limiting: sleeping for {sleep_time:.2f} seconds")
|
|
78
|
+
time.sleep(sleep_time)
|
|
79
|
+
|
|
80
|
+
self._last_search_time = time.time()
|
|
81
|
+
|
|
82
|
+
def _perform_duckduckgo_search(self, query: str) -> List[Dict[str, str]]:
|
|
83
|
+
"""Perform web search using DuckDuckGo with rate limiting"""
|
|
84
|
+
log_debug(f"Performing DuckDuckGo search for: {query}")
|
|
85
|
+
|
|
86
|
+
for attempt in range(self.max_search_retries):
|
|
87
|
+
try:
|
|
88
|
+
self._respect_rate_limits()
|
|
89
|
+
|
|
90
|
+
ddgs = DDGS(timeout=self.search_timeout)
|
|
91
|
+
search_results = ddgs.text(query=query, max_results=self.max_results)
|
|
92
|
+
|
|
93
|
+
# Convert to list and extract relevant fields
|
|
94
|
+
results = []
|
|
95
|
+
for result in search_results:
|
|
96
|
+
results.append(
|
|
97
|
+
{
|
|
98
|
+
"title": result.get("title", ""),
|
|
99
|
+
"url": result.get("link", ""),
|
|
100
|
+
"description": result.get("body", ""),
|
|
101
|
+
}
|
|
102
|
+
)
|
|
103
|
+
|
|
104
|
+
log_debug(f"Found {len(results)} search results")
|
|
105
|
+
return results
|
|
106
|
+
|
|
107
|
+
except Exception as e:
|
|
108
|
+
logger.warning(f"DuckDuckGo search attempt {attempt + 1} failed: {e}")
|
|
109
|
+
if "rate limit" in str(e).lower() or "429" in str(e):
|
|
110
|
+
# Rate limited - wait longer
|
|
111
|
+
wait_time = (
|
|
112
|
+
self.rate_limit_delay * (2**attempt) if self.exponential_backoff else self.rate_limit_delay
|
|
113
|
+
)
|
|
114
|
+
logger.info(f"Rate limited, waiting {wait_time} seconds before retry")
|
|
115
|
+
time.sleep(wait_time)
|
|
116
|
+
elif attempt < self.max_search_retries - 1:
|
|
117
|
+
# Other error - shorter wait
|
|
118
|
+
time.sleep(self.search_delay)
|
|
119
|
+
else:
|
|
120
|
+
logger.error(f"All DuckDuckGo search attempts failed: {e}")
|
|
121
|
+
return []
|
|
122
|
+
return []
|
|
123
|
+
|
|
124
|
+
def _perform_google_search(self, query: str) -> List[Dict[str, str]]:
|
|
125
|
+
"""Perform web search using Google (requires googlesearch-python)"""
|
|
126
|
+
log_debug(f"Performing Google search for: {query}")
|
|
127
|
+
|
|
128
|
+
try:
|
|
129
|
+
from googlesearch import search
|
|
130
|
+
except ImportError:
|
|
131
|
+
logger.error("Google search requires 'googlesearch-python'. Install with: pip install googlesearch-python")
|
|
132
|
+
return []
|
|
133
|
+
|
|
134
|
+
for attempt in range(self.max_search_retries):
|
|
135
|
+
try:
|
|
136
|
+
self._respect_rate_limits()
|
|
137
|
+
|
|
138
|
+
results = []
|
|
139
|
+
search_results = search(query, num_results=self.max_results, stop=self.max_results)
|
|
140
|
+
|
|
141
|
+
for result in search_results:
|
|
142
|
+
results.append(
|
|
143
|
+
{
|
|
144
|
+
"title": getattr(result, "title", ""),
|
|
145
|
+
"url": getattr(result, "url", ""),
|
|
146
|
+
"description": getattr(result, "description", ""),
|
|
147
|
+
}
|
|
148
|
+
)
|
|
149
|
+
|
|
150
|
+
log_debug(f"Found {len(results)} Google search results")
|
|
151
|
+
return results
|
|
152
|
+
|
|
153
|
+
except Exception as e:
|
|
154
|
+
logger.warning(f"Google search attempt {attempt + 1} failed: {e}")
|
|
155
|
+
if attempt < self.max_search_retries - 1:
|
|
156
|
+
time.sleep(self.search_delay)
|
|
157
|
+
else:
|
|
158
|
+
logger.error(f"All Google search attempts failed: {e}")
|
|
159
|
+
return []
|
|
160
|
+
|
|
161
|
+
return []
|
|
162
|
+
|
|
163
|
+
def _perform_web_search(self, query: str) -> List[Dict[str, str]]:
|
|
164
|
+
"""Perform web search using the configured search engine"""
|
|
165
|
+
if self.search_engine == "duckduckgo":
|
|
166
|
+
return self._perform_duckduckgo_search(query)
|
|
167
|
+
elif self.search_engine == "google":
|
|
168
|
+
return self._perform_google_search(query)
|
|
169
|
+
else:
|
|
170
|
+
logger.error(f"Unsupported search engine: {self.search_engine}")
|
|
171
|
+
return []
|
|
172
|
+
|
|
173
|
+
def _is_valid_url(self, url: str) -> bool:
|
|
174
|
+
"""Check if URL is valid and not already visited"""
|
|
175
|
+
try:
|
|
176
|
+
parsed = urlparse(url)
|
|
177
|
+
return bool(parsed.scheme in ["http", "https"] and parsed.netloc and url not in self._visited_urls)
|
|
178
|
+
except Exception:
|
|
179
|
+
return False
|
|
180
|
+
|
|
181
|
+
def _extract_text_from_html(self, html_content: str, url: str) -> str:
|
|
182
|
+
"""Extract clean text content from HTML"""
|
|
183
|
+
try:
|
|
184
|
+
soup = BeautifulSoup(html_content, "html.parser")
|
|
185
|
+
|
|
186
|
+
# Remove script and style elements
|
|
187
|
+
for script in soup(["script", "style"]):
|
|
188
|
+
script.decompose()
|
|
189
|
+
|
|
190
|
+
# Get text content
|
|
191
|
+
text = soup.get_text()
|
|
192
|
+
|
|
193
|
+
# Clean up whitespace
|
|
194
|
+
lines = (line.strip() for line in text.splitlines())
|
|
195
|
+
chunks = (phrase.strip() for line in lines for phrase in line.split(" "))
|
|
196
|
+
text = " ".join(chunk for chunk in chunks if chunk)
|
|
197
|
+
|
|
198
|
+
return text
|
|
199
|
+
|
|
200
|
+
except Exception as e:
|
|
201
|
+
logger.warning(f"Error extracting text from {url}: {e}")
|
|
202
|
+
return html_content
|
|
203
|
+
|
|
204
|
+
def _fetch_url_content(self, url: str) -> Optional[str]:
|
|
205
|
+
"""Fetch content from a URL with retry logic"""
|
|
206
|
+
headers = {"User-Agent": self.user_agent}
|
|
207
|
+
|
|
208
|
+
for attempt in range(self.max_retries):
|
|
209
|
+
try:
|
|
210
|
+
response = httpx.get(url, headers=headers, timeout=self.request_timeout, follow_redirects=True)
|
|
211
|
+
response.raise_for_status()
|
|
212
|
+
|
|
213
|
+
# Check if it's HTML content
|
|
214
|
+
content_type = response.headers.get("content-type", "").lower()
|
|
215
|
+
if "text/html" in content_type:
|
|
216
|
+
return self._extract_text_from_html(response.text, url)
|
|
217
|
+
else:
|
|
218
|
+
# For non-HTML content, return as-is
|
|
219
|
+
return response.text
|
|
220
|
+
|
|
221
|
+
except Exception as e:
|
|
222
|
+
logger.warning(f"Attempt {attempt + 1} failed for {url}: {e}")
|
|
223
|
+
if attempt < self.max_retries - 1:
|
|
224
|
+
time.sleep(random.uniform(1, 3)) # Random delay between retries
|
|
225
|
+
continue
|
|
226
|
+
|
|
227
|
+
logger.error(f"Failed to fetch content from {url} after {self.max_retries} attempts")
|
|
228
|
+
return None
|
|
229
|
+
|
|
230
|
+
def _create_document_from_url(self, url: str, content: str, search_result: Dict[str, str]) -> Document:
|
|
231
|
+
"""Create a Document object from URL content and search result metadata"""
|
|
232
|
+
# Use the URL as the document ID
|
|
233
|
+
doc_id = url
|
|
234
|
+
|
|
235
|
+
# Use the search result title as the document name, fallback to URL
|
|
236
|
+
doc_name = search_result.get("title", urlparse(url).netloc)
|
|
237
|
+
|
|
238
|
+
# Create metadata with search information
|
|
239
|
+
meta_data = {
|
|
240
|
+
"url": url,
|
|
241
|
+
"search_title": search_result.get("title", ""),
|
|
242
|
+
"search_description": search_result.get("description", ""),
|
|
243
|
+
"source": "web_search",
|
|
244
|
+
"search_engine": self.search_engine,
|
|
245
|
+
}
|
|
246
|
+
|
|
247
|
+
return Document(id=doc_id, name=doc_name, content=content, meta_data=meta_data)
|
|
248
|
+
|
|
249
|
+
def read(self, query: str) -> List[Document]:
|
|
250
|
+
"""Read content for a given query by performing web search and fetching content"""
|
|
251
|
+
if not query:
|
|
252
|
+
raise ValueError("Query cannot be empty")
|
|
253
|
+
|
|
254
|
+
log_debug(f"Starting web search reader for query: {query}")
|
|
255
|
+
|
|
256
|
+
# Perform web search
|
|
257
|
+
search_results = self._perform_web_search(query)
|
|
258
|
+
if not search_results:
|
|
259
|
+
logger.warning(f"No search results found for query: {query}")
|
|
260
|
+
return []
|
|
261
|
+
|
|
262
|
+
documents: List[Document] = []
|
|
263
|
+
|
|
264
|
+
for result in search_results:
|
|
265
|
+
url = result.get("url", "")
|
|
266
|
+
|
|
267
|
+
# Skip if URL is invalid or already visited
|
|
268
|
+
if not self._is_valid_url(url):
|
|
269
|
+
continue
|
|
270
|
+
|
|
271
|
+
# Mark URL as visited
|
|
272
|
+
self._visited_urls.add(url)
|
|
273
|
+
|
|
274
|
+
# Add delay between requests to be respectful
|
|
275
|
+
if len(documents) > 0:
|
|
276
|
+
time.sleep(self.delay_between_requests)
|
|
277
|
+
|
|
278
|
+
# Fetch content from URL
|
|
279
|
+
content = self._fetch_url_content(url)
|
|
280
|
+
if content is None:
|
|
281
|
+
continue
|
|
282
|
+
|
|
283
|
+
# Create document
|
|
284
|
+
document = self._create_document_from_url(url, content, result)
|
|
285
|
+
|
|
286
|
+
# Apply chunking if enabled
|
|
287
|
+
if self.chunk:
|
|
288
|
+
chunked_docs = self.chunk_document(document)
|
|
289
|
+
documents.extend(chunked_docs)
|
|
290
|
+
else:
|
|
291
|
+
documents.append(document)
|
|
292
|
+
|
|
293
|
+
# Stop if we've reached max_results
|
|
294
|
+
if len(documents) >= self.max_results:
|
|
295
|
+
break
|
|
296
|
+
|
|
297
|
+
log_debug(f"Created {len(documents)} documents from web search")
|
|
298
|
+
return documents
|
|
299
|
+
|
|
300
|
+
async def async_read(self, query: str) -> List[Document]:
|
|
301
|
+
"""Asynchronously read content for a given query"""
|
|
302
|
+
if not query:
|
|
303
|
+
raise ValueError("Query cannot be empty")
|
|
304
|
+
|
|
305
|
+
log_debug(f"Starting async web search reader for query: {query}")
|
|
306
|
+
|
|
307
|
+
# Perform web search (synchronous operation)
|
|
308
|
+
search_results = self._perform_web_search(query)
|
|
309
|
+
if not search_results:
|
|
310
|
+
logger.warning(f"No search results found for query: {query}")
|
|
311
|
+
return []
|
|
312
|
+
|
|
313
|
+
# Create tasks for fetching content from each URL
|
|
314
|
+
async def fetch_url_async(result: Dict[str, str]) -> Optional[Document]:
|
|
315
|
+
url = result.get("url", "")
|
|
316
|
+
|
|
317
|
+
# Skip if URL is invalid or already visited
|
|
318
|
+
if not self._is_valid_url(url):
|
|
319
|
+
return None
|
|
320
|
+
|
|
321
|
+
# Mark URL as visited
|
|
322
|
+
self._visited_urls.add(url)
|
|
323
|
+
|
|
324
|
+
try:
|
|
325
|
+
headers = {"User-Agent": self.user_agent}
|
|
326
|
+
async with httpx.AsyncClient(timeout=self.request_timeout) as client:
|
|
327
|
+
response = await client.get(url, headers=headers, follow_redirects=True)
|
|
328
|
+
response.raise_for_status()
|
|
329
|
+
|
|
330
|
+
content_type = response.headers.get("content-type", "").lower()
|
|
331
|
+
if "text/html" in content_type:
|
|
332
|
+
content = self._extract_text_from_html(response.text, url)
|
|
333
|
+
else:
|
|
334
|
+
content = response.text
|
|
335
|
+
|
|
336
|
+
document = self._create_document_from_url(url, content, result)
|
|
337
|
+
return document
|
|
338
|
+
|
|
339
|
+
except Exception as e:
|
|
340
|
+
logger.warning(f"Error fetching {url}: {e}")
|
|
341
|
+
return None
|
|
342
|
+
|
|
343
|
+
# Create tasks for all URLs
|
|
344
|
+
tasks = [fetch_url_async(result) for result in search_results]
|
|
345
|
+
|
|
346
|
+
# Execute all tasks concurrently with delays
|
|
347
|
+
documents = []
|
|
348
|
+
for i, task in enumerate(tasks):
|
|
349
|
+
if i > 0: # Add delay between requests (except for the first one)
|
|
350
|
+
await asyncio.sleep(self.delay_between_requests)
|
|
351
|
+
|
|
352
|
+
doc = await task
|
|
353
|
+
if doc is not None:
|
|
354
|
+
# Apply chunking if enabled
|
|
355
|
+
if self.chunk:
|
|
356
|
+
chunked_docs = await self.chunk_documents_async([doc])
|
|
357
|
+
documents.extend(chunked_docs)
|
|
358
|
+
else:
|
|
359
|
+
documents.append(doc)
|
|
360
|
+
|
|
361
|
+
# Stop if we've reached max_results
|
|
362
|
+
if len(documents) >= self.max_results:
|
|
363
|
+
break
|
|
364
|
+
|
|
365
|
+
log_debug(f"Created {len(documents)} documents from async web search")
|
|
366
|
+
return documents
|