agno 1.8.0__py3-none-any.whl → 2.0.0a1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- agno/__init__.py +8 -0
- agno/agent/__init__.py +19 -27
- agno/agent/agent.py +2781 -4126
- agno/api/agent.py +9 -65
- agno/api/api.py +5 -46
- agno/api/evals.py +6 -17
- agno/api/os.py +17 -0
- agno/api/routes.py +6 -41
- agno/api/schemas/__init__.py +9 -0
- agno/api/schemas/agent.py +5 -21
- agno/api/schemas/evals.py +7 -16
- agno/api/schemas/os.py +14 -0
- agno/api/schemas/team.py +5 -21
- agno/api/schemas/utils.py +21 -0
- agno/api/schemas/workflows.py +11 -7
- agno/api/settings.py +53 -0
- agno/api/team.py +9 -64
- agno/api/workflow.py +28 -0
- agno/cloud/aws/base.py +214 -0
- agno/cloud/aws/s3/__init__.py +2 -0
- agno/cloud/aws/s3/api_client.py +43 -0
- agno/cloud/aws/s3/bucket.py +195 -0
- agno/cloud/aws/s3/object.py +57 -0
- agno/db/__init__.py +24 -0
- agno/db/base.py +245 -0
- agno/db/dynamo/__init__.py +3 -0
- agno/db/dynamo/dynamo.py +1749 -0
- agno/db/dynamo/schemas.py +278 -0
- agno/db/dynamo/utils.py +684 -0
- agno/db/firestore/__init__.py +3 -0
- agno/db/firestore/firestore.py +1438 -0
- agno/db/firestore/schemas.py +130 -0
- agno/db/firestore/utils.py +278 -0
- agno/db/gcs_json/__init__.py +3 -0
- agno/db/gcs_json/gcs_json_db.py +1001 -0
- agno/db/gcs_json/utils.py +194 -0
- agno/db/in_memory/__init__.py +3 -0
- agno/db/in_memory/in_memory_db.py +888 -0
- agno/db/in_memory/utils.py +172 -0
- agno/db/json/__init__.py +3 -0
- agno/db/json/json_db.py +1051 -0
- agno/db/json/utils.py +196 -0
- agno/db/migrations/v1_to_v2.py +162 -0
- agno/db/mongo/__init__.py +3 -0
- agno/db/mongo/mongo.py +1417 -0
- agno/db/mongo/schemas.py +77 -0
- agno/db/mongo/utils.py +204 -0
- agno/db/mysql/__init__.py +3 -0
- agno/db/mysql/mysql.py +1719 -0
- agno/db/mysql/schemas.py +124 -0
- agno/db/mysql/utils.py +298 -0
- agno/db/postgres/__init__.py +3 -0
- agno/db/postgres/postgres.py +1720 -0
- agno/db/postgres/schemas.py +124 -0
- agno/db/postgres/utils.py +281 -0
- agno/db/redis/__init__.py +3 -0
- agno/db/redis/redis.py +1371 -0
- agno/db/redis/schemas.py +109 -0
- agno/db/redis/utils.py +288 -0
- agno/db/schemas/__init__.py +3 -0
- agno/db/schemas/evals.py +33 -0
- agno/db/schemas/knowledge.py +40 -0
- agno/db/schemas/memory.py +46 -0
- agno/db/singlestore/__init__.py +3 -0
- agno/db/singlestore/schemas.py +116 -0
- agno/db/singlestore/singlestore.py +1722 -0
- agno/db/singlestore/utils.py +327 -0
- agno/db/sqlite/__init__.py +3 -0
- agno/db/sqlite/schemas.py +119 -0
- agno/db/sqlite/sqlite.py +1680 -0
- agno/db/sqlite/utils.py +269 -0
- agno/db/utils.py +88 -0
- agno/eval/__init__.py +14 -0
- agno/eval/accuracy.py +142 -43
- agno/eval/performance.py +88 -23
- agno/eval/reliability.py +73 -20
- agno/eval/utils.py +23 -13
- agno/integrations/discord/__init__.py +3 -0
- agno/{app → integrations}/discord/client.py +10 -10
- agno/knowledge/__init__.py +2 -2
- agno/{document → knowledge}/chunking/agentic.py +2 -2
- agno/{document → knowledge}/chunking/document.py +2 -2
- agno/{document → knowledge}/chunking/fixed.py +3 -3
- agno/{document → knowledge}/chunking/markdown.py +2 -2
- agno/{document → knowledge}/chunking/recursive.py +2 -2
- agno/{document → knowledge}/chunking/row.py +2 -2
- agno/knowledge/chunking/semantic.py +59 -0
- agno/knowledge/chunking/strategy.py +121 -0
- agno/knowledge/content.py +74 -0
- agno/knowledge/document/__init__.py +5 -0
- agno/{document → knowledge/document}/base.py +12 -2
- agno/knowledge/embedder/__init__.py +5 -0
- agno/{embedder → knowledge/embedder}/aws_bedrock.py +127 -1
- agno/{embedder → knowledge/embedder}/azure_openai.py +65 -1
- agno/{embedder → knowledge/embedder}/base.py +6 -0
- agno/{embedder → knowledge/embedder}/cohere.py +72 -1
- agno/{embedder → knowledge/embedder}/fastembed.py +17 -1
- agno/{embedder → knowledge/embedder}/fireworks.py +1 -1
- agno/{embedder → knowledge/embedder}/google.py +74 -1
- agno/{embedder → knowledge/embedder}/huggingface.py +36 -2
- agno/{embedder → knowledge/embedder}/jina.py +48 -2
- agno/knowledge/embedder/langdb.py +22 -0
- agno/knowledge/embedder/mistral.py +139 -0
- agno/{embedder → knowledge/embedder}/nebius.py +1 -1
- agno/{embedder → knowledge/embedder}/ollama.py +54 -3
- agno/knowledge/embedder/openai.py +223 -0
- agno/{embedder → knowledge/embedder}/sentence_transformer.py +16 -1
- agno/{embedder → knowledge/embedder}/together.py +1 -1
- agno/{embedder → knowledge/embedder}/voyageai.py +49 -1
- agno/knowledge/knowledge.py +1515 -0
- agno/knowledge/reader/__init__.py +7 -0
- agno/{document → knowledge}/reader/arxiv_reader.py +32 -4
- agno/knowledge/reader/base.py +88 -0
- agno/{document → knowledge}/reader/csv_reader.py +68 -15
- agno/knowledge/reader/docx_reader.py +83 -0
- agno/{document → knowledge}/reader/firecrawl_reader.py +42 -21
- agno/knowledge/reader/gcs_reader.py +67 -0
- agno/{document → knowledge}/reader/json_reader.py +30 -9
- agno/{document → knowledge}/reader/markdown_reader.py +36 -9
- agno/{document → knowledge}/reader/pdf_reader.py +79 -21
- agno/knowledge/reader/reader_factory.py +275 -0
- agno/knowledge/reader/s3_reader.py +171 -0
- agno/{document → knowledge}/reader/text_reader.py +31 -10
- agno/knowledge/reader/url_reader.py +84 -0
- agno/knowledge/reader/web_search_reader.py +389 -0
- agno/{document → knowledge}/reader/website_reader.py +37 -10
- agno/knowledge/reader/wikipedia_reader.py +59 -0
- agno/knowledge/reader/youtube_reader.py +78 -0
- agno/knowledge/remote_content/remote_content.py +88 -0
- agno/{reranker → knowledge/reranker}/base.py +1 -1
- agno/{reranker → knowledge/reranker}/cohere.py +2 -2
- agno/{reranker → knowledge/reranker}/infinity.py +2 -2
- agno/{reranker → knowledge/reranker}/sentence_transformer.py +2 -2
- agno/knowledge/types.py +30 -0
- agno/knowledge/utils.py +169 -0
- agno/media.py +2 -2
- agno/memory/__init__.py +2 -10
- agno/memory/manager.py +1003 -148
- agno/models/aimlapi/__init__.py +2 -2
- agno/models/aimlapi/aimlapi.py +6 -6
- agno/models/anthropic/claude.py +129 -82
- agno/models/aws/bedrock.py +107 -175
- agno/models/aws/claude.py +64 -18
- agno/models/azure/ai_foundry.py +73 -23
- agno/models/base.py +347 -287
- agno/models/cerebras/cerebras.py +84 -27
- agno/models/cohere/chat.py +106 -98
- agno/models/dashscope/dashscope.py +14 -5
- agno/models/google/gemini.py +123 -53
- agno/models/groq/groq.py +97 -35
- agno/models/huggingface/huggingface.py +92 -27
- agno/models/ibm/watsonx.py +72 -13
- agno/models/litellm/chat.py +85 -13
- agno/models/message.py +38 -144
- agno/models/meta/llama.py +85 -49
- agno/models/metrics.py +120 -0
- agno/models/mistral/mistral.py +90 -21
- agno/models/ollama/__init__.py +0 -2
- agno/models/ollama/chat.py +84 -46
- agno/models/openai/chat.py +135 -27
- agno/models/openai/responses.py +233 -115
- agno/models/perplexity/perplexity.py +26 -2
- agno/models/portkey/portkey.py +0 -7
- agno/models/response.py +14 -8
- agno/models/utils.py +20 -0
- agno/models/vercel/__init__.py +2 -2
- agno/models/vercel/v0.py +1 -1
- agno/models/vllm/__init__.py +2 -2
- agno/models/vllm/vllm.py +3 -3
- agno/models/xai/xai.py +10 -10
- agno/os/__init__.py +3 -0
- agno/os/app.py +393 -0
- agno/os/auth.py +47 -0
- agno/os/config.py +103 -0
- agno/os/interfaces/agui/__init__.py +3 -0
- agno/os/interfaces/agui/agui.py +31 -0
- agno/{app/agui/async_router.py → os/interfaces/agui/router.py} +16 -16
- agno/{app → os/interfaces}/agui/utils.py +65 -28
- agno/os/interfaces/base.py +21 -0
- agno/os/interfaces/slack/__init__.py +3 -0
- agno/{app/slack/async_router.py → os/interfaces/slack/router.py} +3 -5
- agno/os/interfaces/slack/slack.py +33 -0
- agno/os/interfaces/whatsapp/__init__.py +3 -0
- agno/{app/whatsapp/async_router.py → os/interfaces/whatsapp/router.py} +4 -7
- agno/os/interfaces/whatsapp/whatsapp.py +30 -0
- agno/os/router.py +843 -0
- agno/os/routers/__init__.py +3 -0
- agno/os/routers/evals/__init__.py +3 -0
- agno/os/routers/evals/evals.py +204 -0
- agno/os/routers/evals/schemas.py +142 -0
- agno/os/routers/evals/utils.py +161 -0
- agno/os/routers/knowledge/__init__.py +3 -0
- agno/os/routers/knowledge/knowledge.py +413 -0
- agno/os/routers/knowledge/schemas.py +118 -0
- agno/os/routers/memory/__init__.py +3 -0
- agno/os/routers/memory/memory.py +179 -0
- agno/os/routers/memory/schemas.py +58 -0
- agno/os/routers/metrics/__init__.py +3 -0
- agno/os/routers/metrics/metrics.py +58 -0
- agno/os/routers/metrics/schemas.py +47 -0
- agno/os/routers/session/__init__.py +3 -0
- agno/os/routers/session/session.py +163 -0
- agno/os/schema.py +892 -0
- agno/{app/playground → os}/settings.py +8 -15
- agno/os/utils.py +270 -0
- agno/reasoning/azure_ai_foundry.py +4 -4
- agno/reasoning/deepseek.py +4 -4
- agno/reasoning/default.py +6 -11
- agno/reasoning/groq.py +4 -4
- agno/reasoning/helpers.py +4 -6
- agno/reasoning/ollama.py +4 -4
- agno/reasoning/openai.py +4 -4
- agno/run/{response.py → agent.py} +144 -72
- agno/run/base.py +44 -58
- agno/run/cancel.py +83 -0
- agno/run/team.py +133 -77
- agno/run/workflow.py +537 -12
- agno/session/__init__.py +10 -0
- agno/session/agent.py +244 -0
- agno/session/summary.py +225 -0
- agno/session/team.py +262 -0
- agno/{storage/session/v2 → session}/workflow.py +47 -24
- agno/team/__init__.py +15 -16
- agno/team/team.py +2967 -4243
- agno/tools/agentql.py +14 -5
- agno/tools/airflow.py +9 -4
- agno/tools/api.py +7 -3
- agno/tools/apify.py +2 -46
- agno/tools/arxiv.py +8 -3
- agno/tools/aws_lambda.py +7 -5
- agno/tools/aws_ses.py +7 -1
- agno/tools/baidusearch.py +4 -1
- agno/tools/bitbucket.py +4 -4
- agno/tools/brandfetch.py +14 -11
- agno/tools/bravesearch.py +4 -1
- agno/tools/brightdata.py +42 -22
- agno/tools/browserbase.py +13 -4
- agno/tools/calcom.py +12 -10
- agno/tools/calculator.py +10 -27
- agno/tools/cartesia.py +18 -13
- agno/tools/{clickup_tool.py → clickup.py} +12 -25
- agno/tools/confluence.py +71 -18
- agno/tools/crawl4ai.py +7 -1
- agno/tools/csv_toolkit.py +9 -8
- agno/tools/dalle.py +18 -11
- agno/tools/daytona.py +13 -16
- agno/tools/decorator.py +6 -3
- agno/tools/desi_vocal.py +16 -7
- agno/tools/discord.py +11 -8
- agno/tools/docker.py +30 -42
- agno/tools/duckdb.py +34 -53
- agno/tools/duckduckgo.py +8 -7
- agno/tools/e2b.py +62 -62
- agno/tools/eleven_labs.py +35 -28
- agno/tools/email.py +4 -1
- agno/tools/evm.py +7 -1
- agno/tools/exa.py +19 -14
- agno/tools/fal.py +29 -29
- agno/tools/file.py +9 -8
- agno/tools/financial_datasets.py +25 -44
- agno/tools/firecrawl.py +22 -22
- agno/tools/function.py +68 -17
- agno/tools/giphy.py +22 -10
- agno/tools/github.py +48 -126
- agno/tools/gmail.py +46 -62
- agno/tools/google_bigquery.py +7 -6
- agno/tools/google_maps.py +11 -26
- agno/tools/googlesearch.py +7 -2
- agno/tools/googlesheets.py +21 -17
- agno/tools/hackernews.py +9 -5
- agno/tools/jina.py +5 -4
- agno/tools/jira.py +18 -9
- agno/tools/knowledge.py +31 -32
- agno/tools/linear.py +18 -33
- agno/tools/linkup.py +5 -1
- agno/tools/local_file_system.py +8 -5
- agno/tools/lumalab.py +31 -19
- agno/tools/mem0.py +18 -12
- agno/tools/memori.py +14 -10
- agno/tools/mlx_transcribe.py +3 -2
- agno/tools/models/azure_openai.py +32 -14
- agno/tools/models/gemini.py +58 -31
- agno/tools/models/groq.py +29 -20
- agno/tools/models/nebius.py +27 -11
- agno/tools/models_labs.py +39 -15
- agno/tools/moviepy_video.py +7 -6
- agno/tools/neo4j.py +134 -0
- agno/tools/newspaper.py +7 -2
- agno/tools/newspaper4k.py +8 -3
- agno/tools/openai.py +57 -26
- agno/tools/openbb.py +12 -11
- agno/tools/opencv.py +62 -46
- agno/tools/openweather.py +14 -12
- agno/tools/pandas.py +11 -3
- agno/tools/postgres.py +4 -12
- agno/tools/pubmed.py +4 -1
- agno/tools/python.py +9 -22
- agno/tools/reasoning.py +35 -27
- agno/tools/reddit.py +11 -26
- agno/tools/replicate.py +54 -41
- agno/tools/resend.py +4 -1
- agno/tools/scrapegraph.py +15 -14
- agno/tools/searxng.py +10 -23
- agno/tools/serpapi.py +6 -3
- agno/tools/serper.py +13 -4
- agno/tools/shell.py +9 -2
- agno/tools/slack.py +12 -11
- agno/tools/sleep.py +3 -2
- agno/tools/spider.py +24 -4
- agno/tools/sql.py +7 -6
- agno/tools/tavily.py +6 -4
- agno/tools/telegram.py +12 -4
- agno/tools/todoist.py +11 -31
- agno/tools/toolkit.py +1 -1
- agno/tools/trafilatura.py +22 -6
- agno/tools/trello.py +9 -22
- agno/tools/twilio.py +10 -3
- agno/tools/user_control_flow.py +6 -1
- agno/tools/valyu.py +34 -5
- agno/tools/visualization.py +19 -28
- agno/tools/webbrowser.py +4 -3
- agno/tools/webex.py +11 -7
- agno/tools/website.py +15 -46
- agno/tools/webtools.py +12 -4
- agno/tools/whatsapp.py +5 -9
- agno/tools/wikipedia.py +20 -13
- agno/tools/x.py +14 -13
- agno/tools/yfinance.py +13 -40
- agno/tools/youtube.py +26 -20
- agno/tools/zendesk.py +7 -2
- agno/tools/zep.py +10 -7
- agno/tools/zoom.py +10 -9
- agno/utils/common.py +1 -19
- agno/utils/events.py +95 -118
- agno/utils/knowledge.py +29 -0
- agno/utils/location.py +2 -2
- agno/utils/log.py +2 -2
- agno/utils/mcp.py +11 -5
- agno/utils/media.py +39 -0
- agno/utils/message.py +12 -1
- agno/utils/models/claude.py +6 -4
- agno/utils/models/mistral.py +8 -7
- agno/utils/models/schema_utils.py +3 -3
- agno/utils/pprint.py +33 -32
- agno/utils/print_response/agent.py +779 -0
- agno/utils/print_response/team.py +1565 -0
- agno/utils/print_response/workflow.py +1451 -0
- agno/utils/prompts.py +14 -14
- agno/utils/reasoning.py +87 -0
- agno/utils/response.py +42 -42
- agno/utils/string.py +8 -22
- agno/utils/team.py +50 -0
- agno/utils/timer.py +2 -2
- agno/vectordb/base.py +33 -21
- agno/vectordb/cassandra/cassandra.py +287 -23
- agno/vectordb/chroma/chromadb.py +482 -59
- agno/vectordb/clickhouse/clickhousedb.py +270 -63
- agno/vectordb/couchbase/couchbase.py +309 -29
- agno/vectordb/lancedb/lance_db.py +360 -21
- agno/vectordb/langchaindb/__init__.py +5 -0
- agno/vectordb/langchaindb/langchaindb.py +145 -0
- agno/vectordb/lightrag/__init__.py +5 -0
- agno/vectordb/lightrag/lightrag.py +374 -0
- agno/vectordb/llamaindex/llamaindexdb.py +127 -0
- agno/vectordb/milvus/milvus.py +242 -32
- agno/vectordb/mongodb/mongodb.py +200 -24
- agno/vectordb/pgvector/pgvector.py +319 -37
- agno/vectordb/pineconedb/pineconedb.py +221 -27
- agno/vectordb/qdrant/qdrant.py +356 -14
- agno/vectordb/singlestore/singlestore.py +286 -29
- agno/vectordb/surrealdb/surrealdb.py +187 -7
- agno/vectordb/upstashdb/upstashdb.py +342 -26
- agno/vectordb/weaviate/weaviate.py +227 -165
- agno/workflow/__init__.py +17 -13
- agno/workflow/{v2/condition.py → condition.py} +135 -32
- agno/workflow/{v2/loop.py → loop.py} +115 -28
- agno/workflow/{v2/parallel.py → parallel.py} +138 -108
- agno/workflow/{v2/router.py → router.py} +133 -32
- agno/workflow/{v2/step.py → step.py} +200 -42
- agno/workflow/{v2/steps.py → steps.py} +147 -66
- agno/workflow/types.py +482 -0
- agno/workflow/workflow.py +2394 -696
- agno-2.0.0a1.dist-info/METADATA +355 -0
- agno-2.0.0a1.dist-info/RECORD +514 -0
- agno/agent/metrics.py +0 -107
- agno/api/app.py +0 -35
- agno/api/playground.py +0 -92
- agno/api/schemas/app.py +0 -12
- agno/api/schemas/playground.py +0 -22
- agno/api/schemas/user.py +0 -35
- agno/api/schemas/workspace.py +0 -46
- agno/api/user.py +0 -160
- agno/api/workflows.py +0 -33
- agno/api/workspace.py +0 -175
- agno/app/agui/__init__.py +0 -3
- agno/app/agui/app.py +0 -17
- agno/app/agui/sync_router.py +0 -120
- agno/app/base.py +0 -186
- agno/app/discord/__init__.py +0 -3
- agno/app/fastapi/__init__.py +0 -3
- agno/app/fastapi/app.py +0 -107
- agno/app/fastapi/async_router.py +0 -457
- agno/app/fastapi/sync_router.py +0 -448
- agno/app/playground/app.py +0 -228
- agno/app/playground/async_router.py +0 -1050
- agno/app/playground/deploy.py +0 -249
- agno/app/playground/operator.py +0 -183
- agno/app/playground/schemas.py +0 -220
- agno/app/playground/serve.py +0 -55
- agno/app/playground/sync_router.py +0 -1042
- agno/app/playground/utils.py +0 -46
- agno/app/settings.py +0 -15
- agno/app/slack/__init__.py +0 -3
- agno/app/slack/app.py +0 -19
- agno/app/slack/sync_router.py +0 -92
- agno/app/utils.py +0 -54
- agno/app/whatsapp/__init__.py +0 -3
- agno/app/whatsapp/app.py +0 -15
- agno/app/whatsapp/sync_router.py +0 -197
- agno/cli/auth_server.py +0 -249
- agno/cli/config.py +0 -274
- agno/cli/console.py +0 -88
- agno/cli/credentials.py +0 -23
- agno/cli/entrypoint.py +0 -571
- agno/cli/operator.py +0 -357
- agno/cli/settings.py +0 -96
- agno/cli/ws/ws_cli.py +0 -817
- agno/constants.py +0 -13
- agno/document/__init__.py +0 -5
- agno/document/chunking/semantic.py +0 -45
- agno/document/chunking/strategy.py +0 -31
- agno/document/reader/__init__.py +0 -5
- agno/document/reader/base.py +0 -47
- agno/document/reader/docx_reader.py +0 -60
- agno/document/reader/gcs/pdf_reader.py +0 -44
- agno/document/reader/s3/pdf_reader.py +0 -59
- agno/document/reader/s3/text_reader.py +0 -63
- agno/document/reader/url_reader.py +0 -59
- agno/document/reader/youtube_reader.py +0 -58
- agno/embedder/__init__.py +0 -5
- agno/embedder/langdb.py +0 -80
- agno/embedder/mistral.py +0 -82
- agno/embedder/openai.py +0 -78
- agno/file/__init__.py +0 -5
- agno/file/file.py +0 -16
- agno/file/local/csv.py +0 -32
- agno/file/local/txt.py +0 -19
- agno/infra/app.py +0 -240
- agno/infra/base.py +0 -144
- agno/infra/context.py +0 -20
- agno/infra/db_app.py +0 -52
- agno/infra/resource.py +0 -205
- agno/infra/resources.py +0 -55
- agno/knowledge/agent.py +0 -698
- agno/knowledge/arxiv.py +0 -33
- agno/knowledge/combined.py +0 -36
- agno/knowledge/csv.py +0 -144
- agno/knowledge/csv_url.py +0 -124
- agno/knowledge/document.py +0 -223
- agno/knowledge/docx.py +0 -137
- agno/knowledge/firecrawl.py +0 -34
- agno/knowledge/gcs/__init__.py +0 -0
- agno/knowledge/gcs/base.py +0 -39
- agno/knowledge/gcs/pdf.py +0 -125
- agno/knowledge/json.py +0 -137
- agno/knowledge/langchain.py +0 -71
- agno/knowledge/light_rag.py +0 -273
- agno/knowledge/llamaindex.py +0 -66
- agno/knowledge/markdown.py +0 -154
- agno/knowledge/pdf.py +0 -164
- agno/knowledge/pdf_bytes.py +0 -42
- agno/knowledge/pdf_url.py +0 -148
- agno/knowledge/s3/__init__.py +0 -0
- agno/knowledge/s3/base.py +0 -64
- agno/knowledge/s3/pdf.py +0 -33
- agno/knowledge/s3/text.py +0 -34
- agno/knowledge/text.py +0 -141
- agno/knowledge/url.py +0 -46
- agno/knowledge/website.py +0 -179
- agno/knowledge/wikipedia.py +0 -32
- agno/knowledge/youtube.py +0 -35
- agno/memory/agent.py +0 -423
- agno/memory/classifier.py +0 -104
- agno/memory/db/__init__.py +0 -5
- agno/memory/db/base.py +0 -42
- agno/memory/db/mongodb.py +0 -189
- agno/memory/db/postgres.py +0 -203
- agno/memory/db/sqlite.py +0 -193
- agno/memory/memory.py +0 -22
- agno/memory/row.py +0 -36
- agno/memory/summarizer.py +0 -201
- agno/memory/summary.py +0 -19
- agno/memory/team.py +0 -415
- agno/memory/v2/__init__.py +0 -2
- agno/memory/v2/db/__init__.py +0 -1
- agno/memory/v2/db/base.py +0 -42
- agno/memory/v2/db/firestore.py +0 -339
- agno/memory/v2/db/mongodb.py +0 -196
- agno/memory/v2/db/postgres.py +0 -214
- agno/memory/v2/db/redis.py +0 -187
- agno/memory/v2/db/schema.py +0 -54
- agno/memory/v2/db/sqlite.py +0 -209
- agno/memory/v2/manager.py +0 -437
- agno/memory/v2/memory.py +0 -1097
- agno/memory/v2/schema.py +0 -55
- agno/memory/v2/summarizer.py +0 -215
- agno/memory/workflow.py +0 -38
- agno/models/ollama/tools.py +0 -430
- agno/models/qwen/__init__.py +0 -5
- agno/playground/__init__.py +0 -10
- agno/playground/deploy.py +0 -3
- agno/playground/playground.py +0 -3
- agno/playground/serve.py +0 -3
- agno/playground/settings.py +0 -3
- agno/reranker/__init__.py +0 -0
- agno/run/v2/__init__.py +0 -0
- agno/run/v2/workflow.py +0 -567
- agno/storage/__init__.py +0 -0
- agno/storage/agent/__init__.py +0 -0
- agno/storage/agent/dynamodb.py +0 -1
- agno/storage/agent/json.py +0 -1
- agno/storage/agent/mongodb.py +0 -1
- agno/storage/agent/postgres.py +0 -1
- agno/storage/agent/singlestore.py +0 -1
- agno/storage/agent/sqlite.py +0 -1
- agno/storage/agent/yaml.py +0 -1
- agno/storage/base.py +0 -60
- agno/storage/dynamodb.py +0 -673
- agno/storage/firestore.py +0 -297
- agno/storage/gcs_json.py +0 -261
- agno/storage/in_memory.py +0 -234
- agno/storage/json.py +0 -237
- agno/storage/mongodb.py +0 -328
- agno/storage/mysql.py +0 -685
- agno/storage/postgres.py +0 -682
- agno/storage/redis.py +0 -336
- agno/storage/session/__init__.py +0 -16
- agno/storage/session/agent.py +0 -64
- agno/storage/session/team.py +0 -63
- agno/storage/session/v2/__init__.py +0 -5
- agno/storage/session/workflow.py +0 -61
- agno/storage/singlestore.py +0 -606
- agno/storage/sqlite.py +0 -646
- agno/storage/workflow/__init__.py +0 -0
- agno/storage/workflow/mongodb.py +0 -1
- agno/storage/workflow/postgres.py +0 -1
- agno/storage/workflow/sqlite.py +0 -1
- agno/storage/yaml.py +0 -241
- agno/tools/thinking.py +0 -73
- agno/utils/defaults.py +0 -57
- agno/utils/filesystem.py +0 -39
- agno/utils/git.py +0 -52
- agno/utils/json_io.py +0 -30
- agno/utils/load_env.py +0 -19
- agno/utils/py_io.py +0 -19
- agno/utils/pyproject.py +0 -18
- agno/utils/resource_filter.py +0 -31
- agno/workflow/v2/__init__.py +0 -21
- agno/workflow/v2/types.py +0 -357
- agno/workflow/v2/workflow.py +0 -3312
- agno/workspace/__init__.py +0 -0
- agno/workspace/config.py +0 -325
- agno/workspace/enums.py +0 -6
- agno/workspace/helpers.py +0 -52
- agno/workspace/operator.py +0 -757
- agno/workspace/settings.py +0 -158
- agno-1.8.0.dist-info/METADATA +0 -979
- agno-1.8.0.dist-info/RECORD +0 -565
- agno-1.8.0.dist-info/entry_points.txt +0 -3
- /agno/{app → db/migrations}/__init__.py +0 -0
- /agno/{app/playground/__init__.py → db/schemas/metrics.py} +0 -0
- /agno/{cli → integrations}/__init__.py +0 -0
- /agno/{cli/ws → knowledge/chunking}/__init__.py +0 -0
- /agno/{document/chunking → knowledge/remote_content}/__init__.py +0 -0
- /agno/{document/reader/gcs → knowledge/reranker}/__init__.py +0 -0
- /agno/{document/reader/s3 → os/interfaces}/__init__.py +0 -0
- /agno/{app → os/interfaces}/slack/security.py +0 -0
- /agno/{app → os/interfaces}/whatsapp/security.py +0 -0
- /agno/{file/local → utils/print_response}/__init__.py +0 -0
- /agno/{infra → vectordb/llamaindex}/__init__.py +0 -0
- {agno-1.8.0.dist-info → agno-2.0.0a1.dist-info}/WHEEL +0 -0
- {agno-1.8.0.dist-info → agno-2.0.0a1.dist-info}/licenses/LICENSE +0 -0
- {agno-1.8.0.dist-info → agno-2.0.0a1.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,389 @@
|
|
|
1
|
+
import asyncio
|
|
2
|
+
import random
|
|
3
|
+
import time
|
|
4
|
+
from dataclasses import dataclass, field
|
|
5
|
+
from typing import Dict, List, Literal, Optional, Set
|
|
6
|
+
from urllib.parse import urlparse
|
|
7
|
+
|
|
8
|
+
import httpx
|
|
9
|
+
|
|
10
|
+
from agno.knowledge.chunking.semantic import SemanticChunking
|
|
11
|
+
from agno.knowledge.chunking.strategy import ChunkingStrategy, ChunkingStrategyType
|
|
12
|
+
from agno.knowledge.document.base import Document
|
|
13
|
+
from agno.knowledge.reader.base import Reader
|
|
14
|
+
from agno.knowledge.reader.url_reader import URLReader
|
|
15
|
+
from agno.knowledge.types import ContentType
|
|
16
|
+
from agno.utils.log import log_debug, logger
|
|
17
|
+
|
|
18
|
+
try:
|
|
19
|
+
from bs4 import BeautifulSoup, Tag # noqa: F401
|
|
20
|
+
except ImportError:
|
|
21
|
+
raise ImportError("The `bs4` package is not installed. Please install it via `pip install beautifulsoup4`.")
|
|
22
|
+
|
|
23
|
+
try:
|
|
24
|
+
from ddgs import DDGS
|
|
25
|
+
except ImportError:
|
|
26
|
+
raise ImportError("The `ddgs` package is not installed. Please install it via `pip install ddgs`.")
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
@dataclass
|
|
30
|
+
class WebSearchReader(Reader):
|
|
31
|
+
"""Reader that uses web search to find content for a given query"""
|
|
32
|
+
|
|
33
|
+
search_timeout: int = 10
|
|
34
|
+
|
|
35
|
+
request_timeout: int = 30
|
|
36
|
+
delay_between_requests: float = 2.0 # Increased default delay
|
|
37
|
+
max_retries: int = 3
|
|
38
|
+
user_agent: str = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36"
|
|
39
|
+
|
|
40
|
+
# Search engine configuration
|
|
41
|
+
search_engine: Literal["duckduckgo", "google"] = "duckduckgo"
|
|
42
|
+
search_delay: float = 3.0 # Delay between search requests
|
|
43
|
+
max_search_retries: int = 2 # Retries for search operations
|
|
44
|
+
|
|
45
|
+
# Rate limiting
|
|
46
|
+
rate_limit_delay: float = 5.0 # Delay when rate limited
|
|
47
|
+
exponential_backoff: bool = True
|
|
48
|
+
|
|
49
|
+
# Internal state
|
|
50
|
+
_visited_urls: Set[str] = field(default_factory=set)
|
|
51
|
+
_url_reader: Optional[URLReader] = None
|
|
52
|
+
_last_search_time: float = field(default=0.0, init=False)
|
|
53
|
+
|
|
54
|
+
# Override default chunking strategy
|
|
55
|
+
chunking_strategy: Optional[ChunkingStrategy] = SemanticChunking()
|
|
56
|
+
|
|
57
|
+
def __post_init__(self):
|
|
58
|
+
"""Initialize the URL reader and chunking strategy after dataclass initialization"""
|
|
59
|
+
self._url_reader = URLReader()
|
|
60
|
+
|
|
61
|
+
@classmethod
|
|
62
|
+
def get_supported_chunking_strategies(self) -> List[ChunkingStrategyType]:
|
|
63
|
+
"""Get the list of supported chunking strategies for Web Search readers."""
|
|
64
|
+
return [
|
|
65
|
+
ChunkingStrategyType.AGENTIC_CHUNKING,
|
|
66
|
+
ChunkingStrategyType.DOCUMENT_CHUNKING,
|
|
67
|
+
ChunkingStrategyType.RECURSIVE_CHUNKING,
|
|
68
|
+
ChunkingStrategyType.SEMANTIC_CHUNKING,
|
|
69
|
+
ChunkingStrategyType.FIXED_SIZE_CHUNKING,
|
|
70
|
+
]
|
|
71
|
+
|
|
72
|
+
@classmethod
|
|
73
|
+
def get_supported_content_types(self) -> List[ContentType]:
|
|
74
|
+
return [ContentType.URL, ContentType.TEXT]
|
|
75
|
+
|
|
76
|
+
def _respect_rate_limits(self):
|
|
77
|
+
"""Ensure we don't exceed rate limits"""
|
|
78
|
+
current_time = time.time()
|
|
79
|
+
time_since_last_search = current_time - self._last_search_time
|
|
80
|
+
|
|
81
|
+
if time_since_last_search < self.search_delay:
|
|
82
|
+
sleep_time = self.search_delay - time_since_last_search
|
|
83
|
+
log_debug(f"Rate limiting: sleeping for {sleep_time:.2f} seconds")
|
|
84
|
+
time.sleep(sleep_time)
|
|
85
|
+
|
|
86
|
+
self._last_search_time = time.time()
|
|
87
|
+
|
|
88
|
+
def _perform_duckduckgo_search(self, query: str) -> List[Dict[str, str]]:
|
|
89
|
+
"""Perform web search using DuckDuckGo with rate limiting"""
|
|
90
|
+
log_debug(f"Performing DuckDuckGo search for: {query}")
|
|
91
|
+
|
|
92
|
+
for attempt in range(self.max_search_retries):
|
|
93
|
+
try:
|
|
94
|
+
self._respect_rate_limits()
|
|
95
|
+
|
|
96
|
+
ddgs = DDGS(timeout=self.search_timeout)
|
|
97
|
+
search_results = ddgs.text(query=query, max_results=self.max_results)
|
|
98
|
+
|
|
99
|
+
# Convert to list and extract relevant fields
|
|
100
|
+
results = []
|
|
101
|
+
for result in search_results:
|
|
102
|
+
results.append(
|
|
103
|
+
{
|
|
104
|
+
"title": result.get("title", ""),
|
|
105
|
+
"url": result.get("link", ""),
|
|
106
|
+
"description": result.get("body", ""),
|
|
107
|
+
}
|
|
108
|
+
)
|
|
109
|
+
|
|
110
|
+
log_debug(f"Found {len(results)} search results")
|
|
111
|
+
return results
|
|
112
|
+
|
|
113
|
+
except Exception as e:
|
|
114
|
+
logger.warning(f"DuckDuckGo search attempt {attempt + 1} failed: {e}")
|
|
115
|
+
if "rate limit" in str(e).lower() or "429" in str(e):
|
|
116
|
+
# Rate limited - wait longer
|
|
117
|
+
wait_time = (
|
|
118
|
+
self.rate_limit_delay * (2**attempt) if self.exponential_backoff else self.rate_limit_delay
|
|
119
|
+
)
|
|
120
|
+
logger.info(f"Rate limited, waiting {wait_time} seconds before retry")
|
|
121
|
+
time.sleep(wait_time)
|
|
122
|
+
elif attempt < self.max_search_retries - 1:
|
|
123
|
+
# Other error - shorter wait
|
|
124
|
+
time.sleep(self.search_delay)
|
|
125
|
+
else:
|
|
126
|
+
logger.error(f"All DuckDuckGo search attempts failed: {e}")
|
|
127
|
+
return []
|
|
128
|
+
return []
|
|
129
|
+
|
|
130
|
+
def _perform_google_search(self, query: str) -> List[Dict[str, str]]:
|
|
131
|
+
"""Perform web search using Google (requires googlesearch-python)"""
|
|
132
|
+
log_debug(f"Performing Google search for: {query}")
|
|
133
|
+
|
|
134
|
+
try:
|
|
135
|
+
from googlesearch import search
|
|
136
|
+
except ImportError:
|
|
137
|
+
logger.error("Google search requires 'googlesearch-python'. Install with: pip install googlesearch-python")
|
|
138
|
+
return []
|
|
139
|
+
|
|
140
|
+
for attempt in range(self.max_search_retries):
|
|
141
|
+
try:
|
|
142
|
+
self._respect_rate_limits()
|
|
143
|
+
|
|
144
|
+
results = []
|
|
145
|
+
search_results = search(query, num_results=self.max_results, stop=self.max_results)
|
|
146
|
+
|
|
147
|
+
for result in search_results:
|
|
148
|
+
results.append(
|
|
149
|
+
{
|
|
150
|
+
"title": getattr(result, "title", ""),
|
|
151
|
+
"url": getattr(result, "url", ""),
|
|
152
|
+
"description": getattr(result, "description", ""),
|
|
153
|
+
}
|
|
154
|
+
)
|
|
155
|
+
|
|
156
|
+
log_debug(f"Found {len(results)} Google search results")
|
|
157
|
+
return results
|
|
158
|
+
|
|
159
|
+
except Exception as e:
|
|
160
|
+
logger.warning(f"Google search attempt {attempt + 1} failed: {e}")
|
|
161
|
+
if attempt < self.max_search_retries - 1:
|
|
162
|
+
time.sleep(self.search_delay)
|
|
163
|
+
else:
|
|
164
|
+
logger.error(f"All Google search attempts failed: {e}")
|
|
165
|
+
return []
|
|
166
|
+
|
|
167
|
+
return []
|
|
168
|
+
|
|
169
|
+
def _perform_web_search(self, query: str) -> List[Dict[str, str]]:
|
|
170
|
+
"""Perform web search using the configured search engine"""
|
|
171
|
+
if self.search_engine == "duckduckgo":
|
|
172
|
+
return self._perform_duckduckgo_search(query)
|
|
173
|
+
elif self.search_engine == "google":
|
|
174
|
+
return self._perform_google_search(query)
|
|
175
|
+
else:
|
|
176
|
+
logger.error(f"Unsupported search engine: {self.search_engine}")
|
|
177
|
+
return []
|
|
178
|
+
|
|
179
|
+
def _is_valid_url(self, url: str) -> bool:
|
|
180
|
+
"""Check if URL is valid and not already visited"""
|
|
181
|
+
try:
|
|
182
|
+
parsed = urlparse(url)
|
|
183
|
+
return bool(parsed.scheme in ["http", "https"] and parsed.netloc and url not in self._visited_urls)
|
|
184
|
+
except Exception:
|
|
185
|
+
return False
|
|
186
|
+
|
|
187
|
+
def _extract_text_from_html(self, html_content: str, url: str) -> str:
|
|
188
|
+
"""Extract clean text content from HTML"""
|
|
189
|
+
try:
|
|
190
|
+
soup = BeautifulSoup(html_content, "html.parser")
|
|
191
|
+
|
|
192
|
+
# Remove script and style elements
|
|
193
|
+
for script in soup(["script", "style"]):
|
|
194
|
+
script.decompose()
|
|
195
|
+
|
|
196
|
+
# Get text content
|
|
197
|
+
text = soup.get_text()
|
|
198
|
+
|
|
199
|
+
# Clean up whitespace
|
|
200
|
+
lines = (line.strip() for line in text.splitlines())
|
|
201
|
+
chunks = (phrase.strip() for line in lines for phrase in line.split(" "))
|
|
202
|
+
text = " ".join(chunk for chunk in chunks if chunk)
|
|
203
|
+
|
|
204
|
+
return text
|
|
205
|
+
|
|
206
|
+
except Exception as e:
|
|
207
|
+
logger.warning(f"Error extracting text from {url}: {e}")
|
|
208
|
+
return html_content
|
|
209
|
+
|
|
210
|
+
def _fetch_url_content(self, url: str) -> Optional[str]:
|
|
211
|
+
"""Fetch content from a URL with retry logic"""
|
|
212
|
+
headers = {"User-Agent": self.user_agent}
|
|
213
|
+
|
|
214
|
+
for attempt in range(self.max_retries):
|
|
215
|
+
try:
|
|
216
|
+
response = httpx.get(url, headers=headers, timeout=self.request_timeout, follow_redirects=True)
|
|
217
|
+
response.raise_for_status()
|
|
218
|
+
|
|
219
|
+
# Check if it's HTML content
|
|
220
|
+
content_type = response.headers.get("content-type", "").lower()
|
|
221
|
+
if "text/html" in content_type:
|
|
222
|
+
return self._extract_text_from_html(response.text, url)
|
|
223
|
+
else:
|
|
224
|
+
# For non-HTML content, return as-is
|
|
225
|
+
return response.text
|
|
226
|
+
|
|
227
|
+
except Exception as e:
|
|
228
|
+
logger.warning(f"Attempt {attempt + 1} failed for {url}: {e}")
|
|
229
|
+
if attempt < self.max_retries - 1:
|
|
230
|
+
time.sleep(random.uniform(1, 3)) # Random delay between retries
|
|
231
|
+
continue
|
|
232
|
+
|
|
233
|
+
logger.error(f"Failed to fetch content from {url} after {self.max_retries} attempts")
|
|
234
|
+
return None
|
|
235
|
+
|
|
236
|
+
def _create_document_from_url(self, url: str, content: str, search_result: Dict[str, str]) -> Document:
|
|
237
|
+
"""Create a Document object from URL content and search result metadata"""
|
|
238
|
+
# Use the URL as the document ID
|
|
239
|
+
doc_id = url
|
|
240
|
+
|
|
241
|
+
# Use the search result title as the document name, fallback to URL
|
|
242
|
+
doc_name = search_result.get("title", urlparse(url).netloc)
|
|
243
|
+
|
|
244
|
+
# Create metadata with search information
|
|
245
|
+
meta_data = {
|
|
246
|
+
"url": url,
|
|
247
|
+
"search_title": search_result.get("title", ""),
|
|
248
|
+
"search_description": search_result.get("description", ""),
|
|
249
|
+
"source": "web_search",
|
|
250
|
+
"search_engine": self.search_engine,
|
|
251
|
+
}
|
|
252
|
+
|
|
253
|
+
return Document(id=doc_id, name=doc_name, content=content, meta_data=meta_data)
|
|
254
|
+
|
|
255
|
+
def read(self, query: str) -> List[Document]:
|
|
256
|
+
"""Read content for a given query by performing web search and fetching content"""
|
|
257
|
+
if not query:
|
|
258
|
+
raise ValueError("Query cannot be empty")
|
|
259
|
+
|
|
260
|
+
log_debug(f"Starting web search reader for query: {query}")
|
|
261
|
+
|
|
262
|
+
# Perform web search
|
|
263
|
+
search_results = self._perform_web_search(query)
|
|
264
|
+
if not search_results:
|
|
265
|
+
logger.warning(f"No search results found for query: {query}")
|
|
266
|
+
return []
|
|
267
|
+
|
|
268
|
+
documents: List[Document] = []
|
|
269
|
+
|
|
270
|
+
for result in search_results:
|
|
271
|
+
url = result.get("url", "")
|
|
272
|
+
|
|
273
|
+
# Skip if URL is invalid or already visited
|
|
274
|
+
if not self._is_valid_url(url):
|
|
275
|
+
continue
|
|
276
|
+
|
|
277
|
+
# Mark URL as visited
|
|
278
|
+
self._visited_urls.add(url)
|
|
279
|
+
|
|
280
|
+
# Add delay between requests to be respectful
|
|
281
|
+
if len(documents) > 0:
|
|
282
|
+
time.sleep(self.delay_between_requests)
|
|
283
|
+
|
|
284
|
+
# Fetch content from URL
|
|
285
|
+
content = self._fetch_url_content(url)
|
|
286
|
+
if content is None:
|
|
287
|
+
continue
|
|
288
|
+
|
|
289
|
+
# Create document
|
|
290
|
+
document = self._create_document_from_url(url, content, result)
|
|
291
|
+
|
|
292
|
+
# Apply chunking if enabled
|
|
293
|
+
if self.chunk:
|
|
294
|
+
chunked_docs = self.chunk_document(document)
|
|
295
|
+
documents.extend(chunked_docs)
|
|
296
|
+
else:
|
|
297
|
+
documents.append(document)
|
|
298
|
+
|
|
299
|
+
# Stop if we've reached max_results
|
|
300
|
+
if len(documents) >= self.max_results:
|
|
301
|
+
break
|
|
302
|
+
|
|
303
|
+
log_debug(f"Created {len(documents)} documents from web search")
|
|
304
|
+
return documents
|
|
305
|
+
|
|
306
|
+
async def async_read(self, query: str) -> List[Document]:
|
|
307
|
+
"""Asynchronously read content for a given query"""
|
|
308
|
+
if not query:
|
|
309
|
+
raise ValueError("Query cannot be empty")
|
|
310
|
+
|
|
311
|
+
log_debug(f"Starting async web search reader for query: {query}")
|
|
312
|
+
|
|
313
|
+
# Perform web search (synchronous operation)
|
|
314
|
+
search_results = self._perform_web_search(query)
|
|
315
|
+
if not search_results:
|
|
316
|
+
logger.warning(f"No search results found for query: {query}")
|
|
317
|
+
return []
|
|
318
|
+
|
|
319
|
+
# Create tasks for fetching content from each URL
|
|
320
|
+
async def fetch_url_async(result: Dict[str, str]) -> Optional[Document]:
|
|
321
|
+
url = result.get("url", "")
|
|
322
|
+
|
|
323
|
+
# Skip if URL is invalid or already visited
|
|
324
|
+
if not self._is_valid_url(url):
|
|
325
|
+
return None
|
|
326
|
+
|
|
327
|
+
# Mark URL as visited
|
|
328
|
+
self._visited_urls.add(url)
|
|
329
|
+
|
|
330
|
+
try:
|
|
331
|
+
# Use the URL reader for async fetching
|
|
332
|
+
if self._url_reader:
|
|
333
|
+
docs = await self._url_reader.async_read(url)
|
|
334
|
+
if docs:
|
|
335
|
+
# Use the first document and add search metadata
|
|
336
|
+
doc = docs[0]
|
|
337
|
+
doc.meta_data.update(
|
|
338
|
+
{
|
|
339
|
+
"search_title": result.get("title", ""),
|
|
340
|
+
"search_description": result.get("description", ""),
|
|
341
|
+
"source": "web_search",
|
|
342
|
+
"search_engine": self.search_engine,
|
|
343
|
+
}
|
|
344
|
+
)
|
|
345
|
+
return doc
|
|
346
|
+
|
|
347
|
+
# Fallback to manual async fetching
|
|
348
|
+
headers = {"User-Agent": self.user_agent}
|
|
349
|
+
async with httpx.AsyncClient(timeout=self.request_timeout) as client:
|
|
350
|
+
response = await client.get(url, headers=headers, follow_redirects=True)
|
|
351
|
+
response.raise_for_status()
|
|
352
|
+
|
|
353
|
+
content_type = response.headers.get("content-type", "").lower()
|
|
354
|
+
if "text/html" in content_type:
|
|
355
|
+
content = self._extract_text_from_html(response.text, url)
|
|
356
|
+
else:
|
|
357
|
+
content = response.text
|
|
358
|
+
|
|
359
|
+
document = self._create_document_from_url(url, content, result)
|
|
360
|
+
return document
|
|
361
|
+
|
|
362
|
+
except Exception as e:
|
|
363
|
+
logger.warning(f"Error fetching {url}: {e}")
|
|
364
|
+
return None
|
|
365
|
+
|
|
366
|
+
# Create tasks for all URLs
|
|
367
|
+
tasks = [fetch_url_async(result) for result in search_results]
|
|
368
|
+
|
|
369
|
+
# Execute all tasks concurrently with delays
|
|
370
|
+
documents = []
|
|
371
|
+
for i, task in enumerate(tasks):
|
|
372
|
+
if i > 0: # Add delay between requests (except for the first one)
|
|
373
|
+
await asyncio.sleep(self.delay_between_requests)
|
|
374
|
+
|
|
375
|
+
doc = await task
|
|
376
|
+
if doc is not None:
|
|
377
|
+
# Apply chunking if enabled
|
|
378
|
+
if self.chunk:
|
|
379
|
+
chunked_docs = await self.chunk_documents_async([doc])
|
|
380
|
+
documents.extend(chunked_docs)
|
|
381
|
+
else:
|
|
382
|
+
documents.append(doc)
|
|
383
|
+
|
|
384
|
+
# Stop if we've reached max_results
|
|
385
|
+
if len(documents) >= self.max_results:
|
|
386
|
+
break
|
|
387
|
+
|
|
388
|
+
log_debug(f"Created {len(documents)} documents from async web search")
|
|
389
|
+
return documents
|
|
@@ -7,8 +7,11 @@ from urllib.parse import urljoin, urlparse
|
|
|
7
7
|
|
|
8
8
|
import httpx
|
|
9
9
|
|
|
10
|
-
from agno.
|
|
11
|
-
from agno.
|
|
10
|
+
from agno.knowledge.chunking.semantic import SemanticChunking
|
|
11
|
+
from agno.knowledge.chunking.strategy import ChunkingStrategy, ChunkingStrategyType
|
|
12
|
+
from agno.knowledge.document.base import Document
|
|
13
|
+
from agno.knowledge.reader.base import Reader
|
|
14
|
+
from agno.knowledge.types import ContentType
|
|
12
15
|
from agno.utils.log import log_debug, logger
|
|
13
16
|
|
|
14
17
|
try:
|
|
@@ -28,9 +31,15 @@ class WebsiteReader(Reader):
|
|
|
28
31
|
_urls_to_crawl: List[Tuple[str, int]] = field(default_factory=list)
|
|
29
32
|
|
|
30
33
|
def __init__(
|
|
31
|
-
self,
|
|
34
|
+
self,
|
|
35
|
+
chunking_strategy: Optional[ChunkingStrategy] = SemanticChunking(),
|
|
36
|
+
max_depth: int = 3,
|
|
37
|
+
max_links: int = 10,
|
|
38
|
+
timeout: int = 10,
|
|
39
|
+
proxy: Optional[str] = None,
|
|
40
|
+
**kwargs,
|
|
32
41
|
):
|
|
33
|
-
super().__init__(**kwargs)
|
|
42
|
+
super().__init__(chunking_strategy=chunking_strategy, **kwargs)
|
|
34
43
|
self.max_depth = max_depth
|
|
35
44
|
self.max_links = max_links
|
|
36
45
|
self.proxy = proxy
|
|
@@ -39,6 +48,21 @@ class WebsiteReader(Reader):
|
|
|
39
48
|
self._visited = set()
|
|
40
49
|
self._urls_to_crawl = []
|
|
41
50
|
|
|
51
|
+
@classmethod
|
|
52
|
+
def get_supported_chunking_strategies(self) -> List[ChunkingStrategyType]:
|
|
53
|
+
"""Get the list of supported chunking strategies for Website readers."""
|
|
54
|
+
return [
|
|
55
|
+
ChunkingStrategyType.AGENTIC_CHUNKING,
|
|
56
|
+
ChunkingStrategyType.DOCUMENT_CHUNKING,
|
|
57
|
+
ChunkingStrategyType.RECURSIVE_CHUNKING,
|
|
58
|
+
ChunkingStrategyType.SEMANTIC_CHUNKING,
|
|
59
|
+
ChunkingStrategyType.FIXED_SIZE_CHUNKING,
|
|
60
|
+
]
|
|
61
|
+
|
|
62
|
+
@classmethod
|
|
63
|
+
def get_supported_content_types(self) -> List[ContentType]:
|
|
64
|
+
return [ContentType.URL]
|
|
65
|
+
|
|
42
66
|
def delay(self, min_seconds=1, max_seconds=3):
|
|
43
67
|
"""
|
|
44
68
|
Introduce a random delay.
|
|
@@ -316,7 +340,7 @@ class WebsiteReader(Reader):
|
|
|
316
340
|
|
|
317
341
|
return crawler_result
|
|
318
342
|
|
|
319
|
-
def read(self, url: str) -> List[Document]:
|
|
343
|
+
def read(self, url: str, name: Optional[str] = None) -> List[Document]:
|
|
320
344
|
"""
|
|
321
345
|
Reads a website and returns a list of documents.
|
|
322
346
|
|
|
@@ -338,7 +362,7 @@ class WebsiteReader(Reader):
|
|
|
338
362
|
documents.extend(
|
|
339
363
|
self.chunk_document(
|
|
340
364
|
Document(
|
|
341
|
-
name=url,
|
|
365
|
+
name=name or url,
|
|
342
366
|
id=str(crawled_url),
|
|
343
367
|
meta_data={"url": str(crawled_url)},
|
|
344
368
|
content=crawled_content,
|
|
@@ -348,7 +372,7 @@ class WebsiteReader(Reader):
|
|
|
348
372
|
else:
|
|
349
373
|
documents.append(
|
|
350
374
|
Document(
|
|
351
|
-
name=url,
|
|
375
|
+
name=name or url,
|
|
352
376
|
id=str(crawled_url),
|
|
353
377
|
meta_data={"url": str(crawled_url)},
|
|
354
378
|
content=crawled_content,
|
|
@@ -359,7 +383,7 @@ class WebsiteReader(Reader):
|
|
|
359
383
|
logger.error(f"Error reading website {url}: {e}")
|
|
360
384
|
raise
|
|
361
385
|
|
|
362
|
-
async def async_read(self, url: str) -> List[Document]:
|
|
386
|
+
async def async_read(self, url: str, name: Optional[str] = None) -> List[Document]:
|
|
363
387
|
"""
|
|
364
388
|
Asynchronously reads a website and returns a list of documents.
|
|
365
389
|
|
|
@@ -380,13 +404,16 @@ class WebsiteReader(Reader):
|
|
|
380
404
|
async def process_document(crawled_url, crawled_content):
|
|
381
405
|
if self.chunk:
|
|
382
406
|
doc = Document(
|
|
383
|
-
name=
|
|
407
|
+
name=name or url,
|
|
408
|
+
id=str(crawled_url),
|
|
409
|
+
meta_data={"url": str(crawled_url)},
|
|
410
|
+
content=crawled_content,
|
|
384
411
|
)
|
|
385
412
|
return self.chunk_document(doc)
|
|
386
413
|
else:
|
|
387
414
|
return [
|
|
388
415
|
Document(
|
|
389
|
-
name=url,
|
|
416
|
+
name=name or url,
|
|
390
417
|
id=str(crawled_url),
|
|
391
418
|
meta_data={"url": str(crawled_url)},
|
|
392
419
|
content=crawled_content,
|
|
@@ -0,0 +1,59 @@
|
|
|
1
|
+
from typing import List, Optional
|
|
2
|
+
|
|
3
|
+
from agno.knowledge.chunking.fixed import FixedSizeChunking
|
|
4
|
+
from agno.knowledge.chunking.strategy import ChunkingStrategy, ChunkingStrategyType
|
|
5
|
+
from agno.knowledge.document import Document
|
|
6
|
+
from agno.knowledge.reader.base import Reader
|
|
7
|
+
from agno.knowledge.types import ContentType
|
|
8
|
+
from agno.utils.log import log_debug, log_info
|
|
9
|
+
|
|
10
|
+
try:
|
|
11
|
+
import wikipedia # noqa: F401
|
|
12
|
+
except ImportError:
|
|
13
|
+
raise ImportError("The `wikipedia` package is not installed. Please install it via `pip install wikipedia`.")
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
class WikipediaReader(Reader):
|
|
17
|
+
auto_suggest: bool = True
|
|
18
|
+
|
|
19
|
+
def __init__(
|
|
20
|
+
self, chunking_strategy: Optional[ChunkingStrategy] = FixedSizeChunking(), auto_suggest: bool = True, **kwargs
|
|
21
|
+
):
|
|
22
|
+
super().__init__(chunking_strategy=chunking_strategy, **kwargs)
|
|
23
|
+
self.auto_suggest = auto_suggest
|
|
24
|
+
|
|
25
|
+
@classmethod
|
|
26
|
+
def get_supported_chunking_strategies(self) -> List[ChunkingStrategyType]:
|
|
27
|
+
"""Get the list of supported chunking strategies for Wikipedia readers."""
|
|
28
|
+
return [
|
|
29
|
+
ChunkingStrategyType.FIXED_SIZE_CHUNKING,
|
|
30
|
+
ChunkingStrategyType.AGENTIC_CHUNKING,
|
|
31
|
+
ChunkingStrategyType.DOCUMENT_CHUNKING,
|
|
32
|
+
ChunkingStrategyType.RECURSIVE_CHUNKING,
|
|
33
|
+
ChunkingStrategyType.SEMANTIC_CHUNKING,
|
|
34
|
+
]
|
|
35
|
+
|
|
36
|
+
@classmethod
|
|
37
|
+
def get_supported_content_types(self) -> List[ContentType]:
|
|
38
|
+
return [ContentType.TOPIC]
|
|
39
|
+
|
|
40
|
+
def read(self, topic: str) -> List[Document]:
|
|
41
|
+
log_debug(f"Reading Wikipedia topic: {topic}")
|
|
42
|
+
summary = None
|
|
43
|
+
try:
|
|
44
|
+
summary = wikipedia.summary(topic, auto_suggest=self.auto_suggest)
|
|
45
|
+
|
|
46
|
+
except wikipedia.exceptions.PageError:
|
|
47
|
+
summary = None
|
|
48
|
+
log_info("PageError: Page not found.")
|
|
49
|
+
|
|
50
|
+
# Only create Document if we successfully got a summary
|
|
51
|
+
if summary:
|
|
52
|
+
return [
|
|
53
|
+
Document(
|
|
54
|
+
name=topic,
|
|
55
|
+
meta_data={"topic": topic},
|
|
56
|
+
content=summary,
|
|
57
|
+
)
|
|
58
|
+
]
|
|
59
|
+
return []
|
|
@@ -0,0 +1,78 @@
|
|
|
1
|
+
import asyncio
|
|
2
|
+
from typing import List, Optional
|
|
3
|
+
|
|
4
|
+
from agno.knowledge.chunking.recursive import RecursiveChunking
|
|
5
|
+
from agno.knowledge.chunking.strategy import ChunkingStrategy, ChunkingStrategyType
|
|
6
|
+
from agno.knowledge.document.base import Document
|
|
7
|
+
from agno.knowledge.reader.base import Reader
|
|
8
|
+
from agno.knowledge.types import ContentType
|
|
9
|
+
from agno.utils.log import log_debug, log_error, log_info
|
|
10
|
+
|
|
11
|
+
try:
|
|
12
|
+
from youtube_transcript_api import YouTubeTranscriptApi
|
|
13
|
+
except ImportError:
|
|
14
|
+
raise ImportError(
|
|
15
|
+
"`youtube_transcript_api` not installed. Please install it via `pip install youtube_transcript_api`."
|
|
16
|
+
)
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
class YouTubeReader(Reader):
|
|
20
|
+
"""Reader for YouTube video transcripts"""
|
|
21
|
+
|
|
22
|
+
def __init__(self, chunking_strategy: Optional[ChunkingStrategy] = RecursiveChunking(), **kwargs):
|
|
23
|
+
super().__init__(chunking_strategy=chunking_strategy, **kwargs)
|
|
24
|
+
|
|
25
|
+
@classmethod
|
|
26
|
+
def get_supported_chunking_strategies(self) -> List[ChunkingStrategyType]:
|
|
27
|
+
"""Get the list of supported chunking strategies for YouTube readers."""
|
|
28
|
+
return [
|
|
29
|
+
ChunkingStrategyType.RECURSIVE_CHUNKING,
|
|
30
|
+
ChunkingStrategyType.AGENTIC_CHUNKING,
|
|
31
|
+
ChunkingStrategyType.DOCUMENT_CHUNKING,
|
|
32
|
+
ChunkingStrategyType.SEMANTIC_CHUNKING,
|
|
33
|
+
ChunkingStrategyType.FIXED_SIZE_CHUNKING,
|
|
34
|
+
]
|
|
35
|
+
|
|
36
|
+
@classmethod
|
|
37
|
+
def get_supported_content_types(self) -> List[ContentType]:
|
|
38
|
+
return [ContentType.URL, ContentType.YOUTUBE]
|
|
39
|
+
|
|
40
|
+
def read(self, url: str, name: Optional[str] = None) -> List[Document]:
|
|
41
|
+
try:
|
|
42
|
+
# Extract video ID from URL
|
|
43
|
+
video_id = url.split("v=")[-1].split("&")[0]
|
|
44
|
+
log_info(f"Reading transcript for video: {video_id}")
|
|
45
|
+
|
|
46
|
+
# Get transcript
|
|
47
|
+
log_debug(f"Fetching transcript for video: {video_id}")
|
|
48
|
+
# Create an instance of YouTubeTranscriptApi
|
|
49
|
+
ytt_api = YouTubeTranscriptApi()
|
|
50
|
+
transcript_data = ytt_api.fetch(video_id)
|
|
51
|
+
|
|
52
|
+
# Combine transcript segments into full text
|
|
53
|
+
transcript_text = ""
|
|
54
|
+
for segment in transcript_data:
|
|
55
|
+
transcript_text += f"{segment.text} "
|
|
56
|
+
|
|
57
|
+
documents = [
|
|
58
|
+
Document(
|
|
59
|
+
name=name or f"youtube_{video_id}",
|
|
60
|
+
id=f"youtube_{video_id}",
|
|
61
|
+
meta_data={"video_url": url, "video_id": video_id},
|
|
62
|
+
content=transcript_text.strip(),
|
|
63
|
+
)
|
|
64
|
+
]
|
|
65
|
+
|
|
66
|
+
if self.chunk:
|
|
67
|
+
chunked_documents = []
|
|
68
|
+
for document in documents:
|
|
69
|
+
chunked_documents.extend(self.chunk_document(document))
|
|
70
|
+
return chunked_documents
|
|
71
|
+
return documents
|
|
72
|
+
|
|
73
|
+
except Exception as e:
|
|
74
|
+
log_error(f"Error reading transcript for {url}: {e}")
|
|
75
|
+
return []
|
|
76
|
+
|
|
77
|
+
async def async_read(self, url: str) -> List[Document]:
|
|
78
|
+
return await asyncio.get_event_loop().run_in_executor(None, self.read, url)
|