agno 0.1.2__py3-none-any.whl → 2.3.13__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- agno/__init__.py +8 -0
- agno/agent/__init__.py +44 -5
- agno/agent/agent.py +10531 -2975
- agno/api/agent.py +14 -53
- agno/api/api.py +7 -46
- agno/api/evals.py +22 -0
- agno/api/os.py +17 -0
- agno/api/routes.py +6 -25
- agno/api/schemas/__init__.py +9 -0
- agno/api/schemas/agent.py +6 -9
- agno/api/schemas/evals.py +16 -0
- agno/api/schemas/os.py +14 -0
- agno/api/schemas/team.py +10 -10
- agno/api/schemas/utils.py +21 -0
- agno/api/schemas/workflows.py +16 -0
- agno/api/settings.py +53 -0
- agno/api/team.py +22 -26
- agno/api/workflow.py +28 -0
- agno/cloud/aws/base.py +214 -0
- agno/cloud/aws/s3/__init__.py +2 -0
- agno/cloud/aws/s3/api_client.py +43 -0
- agno/cloud/aws/s3/bucket.py +195 -0
- agno/cloud/aws/s3/object.py +57 -0
- agno/compression/__init__.py +3 -0
- agno/compression/manager.py +247 -0
- agno/culture/__init__.py +3 -0
- agno/culture/manager.py +956 -0
- agno/db/__init__.py +24 -0
- agno/db/async_postgres/__init__.py +3 -0
- agno/db/base.py +946 -0
- agno/db/dynamo/__init__.py +3 -0
- agno/db/dynamo/dynamo.py +2781 -0
- agno/db/dynamo/schemas.py +442 -0
- agno/db/dynamo/utils.py +743 -0
- agno/db/firestore/__init__.py +3 -0
- agno/db/firestore/firestore.py +2379 -0
- agno/db/firestore/schemas.py +181 -0
- agno/db/firestore/utils.py +376 -0
- agno/db/gcs_json/__init__.py +3 -0
- agno/db/gcs_json/gcs_json_db.py +1791 -0
- agno/db/gcs_json/utils.py +228 -0
- agno/db/in_memory/__init__.py +3 -0
- agno/db/in_memory/in_memory_db.py +1312 -0
- agno/db/in_memory/utils.py +230 -0
- agno/db/json/__init__.py +3 -0
- agno/db/json/json_db.py +1777 -0
- agno/db/json/utils.py +230 -0
- agno/db/migrations/manager.py +199 -0
- agno/db/migrations/v1_to_v2.py +635 -0
- agno/db/migrations/versions/v2_3_0.py +938 -0
- agno/db/mongo/__init__.py +17 -0
- agno/db/mongo/async_mongo.py +2760 -0
- agno/db/mongo/mongo.py +2597 -0
- agno/db/mongo/schemas.py +119 -0
- agno/db/mongo/utils.py +276 -0
- agno/db/mysql/__init__.py +4 -0
- agno/db/mysql/async_mysql.py +2912 -0
- agno/db/mysql/mysql.py +2923 -0
- agno/db/mysql/schemas.py +186 -0
- agno/db/mysql/utils.py +488 -0
- agno/db/postgres/__init__.py +4 -0
- agno/db/postgres/async_postgres.py +2579 -0
- agno/db/postgres/postgres.py +2870 -0
- agno/db/postgres/schemas.py +187 -0
- agno/db/postgres/utils.py +442 -0
- agno/db/redis/__init__.py +3 -0
- agno/db/redis/redis.py +2141 -0
- agno/db/redis/schemas.py +159 -0
- agno/db/redis/utils.py +346 -0
- agno/db/schemas/__init__.py +4 -0
- agno/db/schemas/culture.py +120 -0
- agno/db/schemas/evals.py +34 -0
- agno/db/schemas/knowledge.py +40 -0
- agno/db/schemas/memory.py +61 -0
- agno/db/singlestore/__init__.py +3 -0
- agno/db/singlestore/schemas.py +179 -0
- agno/db/singlestore/singlestore.py +2877 -0
- agno/db/singlestore/utils.py +384 -0
- agno/db/sqlite/__init__.py +4 -0
- agno/db/sqlite/async_sqlite.py +2911 -0
- agno/db/sqlite/schemas.py +181 -0
- agno/db/sqlite/sqlite.py +2908 -0
- agno/db/sqlite/utils.py +429 -0
- agno/db/surrealdb/__init__.py +3 -0
- agno/db/surrealdb/metrics.py +292 -0
- agno/db/surrealdb/models.py +334 -0
- agno/db/surrealdb/queries.py +71 -0
- agno/db/surrealdb/surrealdb.py +1908 -0
- agno/db/surrealdb/utils.py +147 -0
- agno/db/utils.py +118 -0
- agno/eval/__init__.py +24 -0
- agno/eval/accuracy.py +666 -276
- agno/eval/agent_as_judge.py +861 -0
- agno/eval/base.py +29 -0
- agno/eval/performance.py +779 -0
- agno/eval/reliability.py +241 -62
- agno/eval/utils.py +120 -0
- agno/exceptions.py +143 -1
- agno/filters.py +354 -0
- agno/guardrails/__init__.py +6 -0
- agno/guardrails/base.py +19 -0
- agno/guardrails/openai.py +144 -0
- agno/guardrails/pii.py +94 -0
- agno/guardrails/prompt_injection.py +52 -0
- agno/hooks/__init__.py +3 -0
- agno/hooks/decorator.py +164 -0
- agno/integrations/discord/__init__.py +3 -0
- agno/integrations/discord/client.py +203 -0
- agno/knowledge/__init__.py +5 -1
- agno/{document → knowledge}/chunking/agentic.py +22 -14
- agno/{document → knowledge}/chunking/document.py +2 -2
- agno/{document → knowledge}/chunking/fixed.py +7 -6
- agno/knowledge/chunking/markdown.py +151 -0
- agno/{document → knowledge}/chunking/recursive.py +15 -3
- agno/knowledge/chunking/row.py +39 -0
- agno/knowledge/chunking/semantic.py +91 -0
- agno/knowledge/chunking/strategy.py +165 -0
- agno/knowledge/content.py +74 -0
- agno/knowledge/document/__init__.py +5 -0
- agno/{document → knowledge/document}/base.py +12 -2
- agno/knowledge/embedder/__init__.py +5 -0
- agno/knowledge/embedder/aws_bedrock.py +343 -0
- agno/knowledge/embedder/azure_openai.py +210 -0
- agno/{embedder → knowledge/embedder}/base.py +8 -0
- agno/knowledge/embedder/cohere.py +323 -0
- agno/knowledge/embedder/fastembed.py +62 -0
- agno/{embedder → knowledge/embedder}/fireworks.py +1 -1
- agno/knowledge/embedder/google.py +258 -0
- agno/knowledge/embedder/huggingface.py +94 -0
- agno/knowledge/embedder/jina.py +182 -0
- agno/knowledge/embedder/langdb.py +22 -0
- agno/knowledge/embedder/mistral.py +206 -0
- agno/knowledge/embedder/nebius.py +13 -0
- agno/knowledge/embedder/ollama.py +154 -0
- agno/knowledge/embedder/openai.py +195 -0
- agno/knowledge/embedder/sentence_transformer.py +63 -0
- agno/{embedder → knowledge/embedder}/together.py +1 -1
- agno/knowledge/embedder/vllm.py +262 -0
- agno/knowledge/embedder/voyageai.py +165 -0
- agno/knowledge/knowledge.py +3006 -0
- agno/knowledge/reader/__init__.py +7 -0
- agno/knowledge/reader/arxiv_reader.py +81 -0
- agno/knowledge/reader/base.py +95 -0
- agno/knowledge/reader/csv_reader.py +164 -0
- agno/knowledge/reader/docx_reader.py +82 -0
- agno/knowledge/reader/field_labeled_csv_reader.py +290 -0
- agno/knowledge/reader/firecrawl_reader.py +201 -0
- agno/knowledge/reader/json_reader.py +88 -0
- agno/knowledge/reader/markdown_reader.py +137 -0
- agno/knowledge/reader/pdf_reader.py +431 -0
- agno/knowledge/reader/pptx_reader.py +101 -0
- agno/knowledge/reader/reader_factory.py +313 -0
- agno/knowledge/reader/s3_reader.py +89 -0
- agno/knowledge/reader/tavily_reader.py +193 -0
- agno/knowledge/reader/text_reader.py +127 -0
- agno/knowledge/reader/web_search_reader.py +325 -0
- agno/knowledge/reader/website_reader.py +455 -0
- agno/knowledge/reader/wikipedia_reader.py +91 -0
- agno/knowledge/reader/youtube_reader.py +78 -0
- agno/knowledge/remote_content/remote_content.py +88 -0
- agno/knowledge/reranker/__init__.py +3 -0
- agno/{reranker → knowledge/reranker}/base.py +1 -1
- agno/{reranker → knowledge/reranker}/cohere.py +2 -2
- agno/knowledge/reranker/infinity.py +195 -0
- agno/knowledge/reranker/sentence_transformer.py +54 -0
- agno/knowledge/types.py +39 -0
- agno/knowledge/utils.py +234 -0
- agno/media.py +439 -95
- agno/memory/__init__.py +16 -3
- agno/memory/manager.py +1474 -123
- agno/memory/strategies/__init__.py +15 -0
- agno/memory/strategies/base.py +66 -0
- agno/memory/strategies/summarize.py +196 -0
- agno/memory/strategies/types.py +37 -0
- agno/models/aimlapi/__init__.py +5 -0
- agno/models/aimlapi/aimlapi.py +62 -0
- agno/models/anthropic/__init__.py +4 -0
- agno/models/anthropic/claude.py +960 -496
- agno/models/aws/__init__.py +15 -0
- agno/models/aws/bedrock.py +686 -451
- agno/models/aws/claude.py +190 -183
- agno/models/azure/__init__.py +18 -1
- agno/models/azure/ai_foundry.py +489 -0
- agno/models/azure/openai_chat.py +89 -40
- agno/models/base.py +2477 -550
- agno/models/cerebras/__init__.py +12 -0
- agno/models/cerebras/cerebras.py +565 -0
- agno/models/cerebras/cerebras_openai.py +131 -0
- agno/models/cohere/__init__.py +4 -0
- agno/models/cohere/chat.py +306 -492
- agno/models/cometapi/__init__.py +5 -0
- agno/models/cometapi/cometapi.py +74 -0
- agno/models/dashscope/__init__.py +5 -0
- agno/models/dashscope/dashscope.py +90 -0
- agno/models/deepinfra/__init__.py +5 -0
- agno/models/deepinfra/deepinfra.py +45 -0
- agno/models/deepseek/__init__.py +4 -0
- agno/models/deepseek/deepseek.py +110 -9
- agno/models/fireworks/__init__.py +4 -0
- agno/models/fireworks/fireworks.py +19 -22
- agno/models/google/__init__.py +3 -7
- agno/models/google/gemini.py +1717 -662
- agno/models/google/utils.py +22 -0
- agno/models/groq/__init__.py +4 -0
- agno/models/groq/groq.py +391 -666
- agno/models/huggingface/__init__.py +4 -0
- agno/models/huggingface/huggingface.py +266 -538
- agno/models/ibm/__init__.py +5 -0
- agno/models/ibm/watsonx.py +432 -0
- agno/models/internlm/__init__.py +3 -0
- agno/models/internlm/internlm.py +20 -3
- agno/models/langdb/__init__.py +1 -0
- agno/models/langdb/langdb.py +60 -0
- agno/models/litellm/__init__.py +14 -0
- agno/models/litellm/chat.py +503 -0
- agno/models/litellm/litellm_openai.py +42 -0
- agno/models/llama_cpp/__init__.py +5 -0
- agno/models/llama_cpp/llama_cpp.py +22 -0
- agno/models/lmstudio/__init__.py +5 -0
- agno/models/lmstudio/lmstudio.py +25 -0
- agno/models/message.py +361 -39
- agno/models/meta/__init__.py +12 -0
- agno/models/meta/llama.py +502 -0
- agno/models/meta/llama_openai.py +79 -0
- agno/models/metrics.py +120 -0
- agno/models/mistral/__init__.py +4 -0
- agno/models/mistral/mistral.py +293 -393
- agno/models/nebius/__init__.py +3 -0
- agno/models/nebius/nebius.py +53 -0
- agno/models/nexus/__init__.py +3 -0
- agno/models/nexus/nexus.py +22 -0
- agno/models/nvidia/__init__.py +4 -0
- agno/models/nvidia/nvidia.py +22 -3
- agno/models/ollama/__init__.py +4 -2
- agno/models/ollama/chat.py +257 -492
- agno/models/openai/__init__.py +7 -0
- agno/models/openai/chat.py +725 -770
- agno/models/openai/like.py +16 -2
- agno/models/openai/responses.py +1121 -0
- agno/models/openrouter/__init__.py +4 -0
- agno/models/openrouter/openrouter.py +62 -5
- agno/models/perplexity/__init__.py +5 -0
- agno/models/perplexity/perplexity.py +203 -0
- agno/models/portkey/__init__.py +3 -0
- agno/models/portkey/portkey.py +82 -0
- agno/models/requesty/__init__.py +5 -0
- agno/models/requesty/requesty.py +69 -0
- agno/models/response.py +177 -7
- agno/models/sambanova/__init__.py +4 -0
- agno/models/sambanova/sambanova.py +23 -4
- agno/models/siliconflow/__init__.py +5 -0
- agno/models/siliconflow/siliconflow.py +42 -0
- agno/models/together/__init__.py +4 -0
- agno/models/together/together.py +21 -164
- agno/models/utils.py +266 -0
- agno/models/vercel/__init__.py +3 -0
- agno/models/vercel/v0.py +43 -0
- agno/models/vertexai/__init__.py +0 -1
- agno/models/vertexai/claude.py +190 -0
- agno/models/vllm/__init__.py +3 -0
- agno/models/vllm/vllm.py +83 -0
- agno/models/xai/__init__.py +2 -0
- agno/models/xai/xai.py +111 -7
- agno/os/__init__.py +3 -0
- agno/os/app.py +1027 -0
- agno/os/auth.py +244 -0
- agno/os/config.py +126 -0
- agno/os/interfaces/__init__.py +1 -0
- agno/os/interfaces/a2a/__init__.py +3 -0
- agno/os/interfaces/a2a/a2a.py +42 -0
- agno/os/interfaces/a2a/router.py +249 -0
- agno/os/interfaces/a2a/utils.py +924 -0
- agno/os/interfaces/agui/__init__.py +3 -0
- agno/os/interfaces/agui/agui.py +47 -0
- agno/os/interfaces/agui/router.py +147 -0
- agno/os/interfaces/agui/utils.py +574 -0
- agno/os/interfaces/base.py +25 -0
- agno/os/interfaces/slack/__init__.py +3 -0
- agno/os/interfaces/slack/router.py +148 -0
- agno/os/interfaces/slack/security.py +30 -0
- agno/os/interfaces/slack/slack.py +47 -0
- agno/os/interfaces/whatsapp/__init__.py +3 -0
- agno/os/interfaces/whatsapp/router.py +210 -0
- agno/os/interfaces/whatsapp/security.py +55 -0
- agno/os/interfaces/whatsapp/whatsapp.py +36 -0
- agno/os/mcp.py +293 -0
- agno/os/middleware/__init__.py +9 -0
- agno/os/middleware/jwt.py +797 -0
- agno/os/router.py +258 -0
- agno/os/routers/__init__.py +3 -0
- agno/os/routers/agents/__init__.py +3 -0
- agno/os/routers/agents/router.py +599 -0
- agno/os/routers/agents/schema.py +261 -0
- agno/os/routers/evals/__init__.py +3 -0
- agno/os/routers/evals/evals.py +450 -0
- agno/os/routers/evals/schemas.py +174 -0
- agno/os/routers/evals/utils.py +231 -0
- agno/os/routers/health.py +31 -0
- agno/os/routers/home.py +52 -0
- agno/os/routers/knowledge/__init__.py +3 -0
- agno/os/routers/knowledge/knowledge.py +1008 -0
- agno/os/routers/knowledge/schemas.py +178 -0
- agno/os/routers/memory/__init__.py +3 -0
- agno/os/routers/memory/memory.py +661 -0
- agno/os/routers/memory/schemas.py +88 -0
- agno/os/routers/metrics/__init__.py +3 -0
- agno/os/routers/metrics/metrics.py +190 -0
- agno/os/routers/metrics/schemas.py +47 -0
- agno/os/routers/session/__init__.py +3 -0
- agno/os/routers/session/session.py +997 -0
- agno/os/routers/teams/__init__.py +3 -0
- agno/os/routers/teams/router.py +512 -0
- agno/os/routers/teams/schema.py +257 -0
- agno/os/routers/traces/__init__.py +3 -0
- agno/os/routers/traces/schemas.py +414 -0
- agno/os/routers/traces/traces.py +499 -0
- agno/os/routers/workflows/__init__.py +3 -0
- agno/os/routers/workflows/router.py +624 -0
- agno/os/routers/workflows/schema.py +75 -0
- agno/os/schema.py +534 -0
- agno/os/scopes.py +469 -0
- agno/{playground → os}/settings.py +7 -15
- agno/os/utils.py +973 -0
- agno/reasoning/anthropic.py +80 -0
- agno/reasoning/azure_ai_foundry.py +67 -0
- agno/reasoning/deepseek.py +63 -0
- agno/reasoning/default.py +97 -0
- agno/reasoning/gemini.py +73 -0
- agno/reasoning/groq.py +71 -0
- agno/reasoning/helpers.py +24 -1
- agno/reasoning/ollama.py +67 -0
- agno/reasoning/openai.py +86 -0
- agno/reasoning/step.py +2 -1
- agno/reasoning/vertexai.py +76 -0
- agno/run/__init__.py +6 -0
- agno/run/agent.py +822 -0
- agno/run/base.py +247 -0
- agno/run/cancel.py +81 -0
- agno/run/requirement.py +181 -0
- agno/run/team.py +767 -0
- agno/run/workflow.py +708 -0
- agno/session/__init__.py +10 -0
- agno/session/agent.py +260 -0
- agno/session/summary.py +265 -0
- agno/session/team.py +342 -0
- agno/session/workflow.py +501 -0
- agno/table.py +10 -0
- agno/team/__init__.py +37 -0
- agno/team/team.py +9536 -0
- agno/tools/__init__.py +7 -0
- agno/tools/agentql.py +120 -0
- agno/tools/airflow.py +22 -12
- agno/tools/api.py +122 -0
- agno/tools/apify.py +276 -83
- agno/tools/{arxiv_toolkit.py → arxiv.py} +20 -12
- agno/tools/aws_lambda.py +28 -7
- agno/tools/aws_ses.py +66 -0
- agno/tools/baidusearch.py +11 -4
- agno/tools/bitbucket.py +292 -0
- agno/tools/brandfetch.py +213 -0
- agno/tools/bravesearch.py +106 -0
- agno/tools/brightdata.py +367 -0
- agno/tools/browserbase.py +209 -0
- agno/tools/calcom.py +32 -23
- agno/tools/calculator.py +24 -37
- agno/tools/cartesia.py +187 -0
- agno/tools/{clickup_tool.py → clickup.py} +17 -28
- agno/tools/confluence.py +91 -26
- agno/tools/crawl4ai.py +139 -43
- agno/tools/csv_toolkit.py +28 -22
- agno/tools/dalle.py +36 -22
- agno/tools/daytona.py +475 -0
- agno/tools/decorator.py +169 -14
- agno/tools/desi_vocal.py +23 -11
- agno/tools/discord.py +32 -29
- agno/tools/docker.py +716 -0
- agno/tools/duckdb.py +76 -81
- agno/tools/duckduckgo.py +43 -40
- agno/tools/e2b.py +703 -0
- agno/tools/eleven_labs.py +65 -54
- agno/tools/email.py +13 -5
- agno/tools/evm.py +129 -0
- agno/tools/exa.py +324 -42
- agno/tools/fal.py +39 -35
- agno/tools/file.py +196 -30
- agno/tools/file_generation.py +356 -0
- agno/tools/financial_datasets.py +288 -0
- agno/tools/firecrawl.py +108 -33
- agno/tools/function.py +960 -122
- agno/tools/giphy.py +34 -12
- agno/tools/github.py +1294 -97
- agno/tools/gmail.py +922 -0
- agno/tools/google_bigquery.py +117 -0
- agno/tools/google_drive.py +271 -0
- agno/tools/google_maps.py +253 -0
- agno/tools/googlecalendar.py +607 -107
- agno/tools/googlesheets.py +377 -0
- agno/tools/hackernews.py +20 -12
- agno/tools/jina.py +24 -14
- agno/tools/jira.py +48 -19
- agno/tools/knowledge.py +218 -0
- agno/tools/linear.py +82 -43
- agno/tools/linkup.py +58 -0
- agno/tools/local_file_system.py +15 -7
- agno/tools/lumalab.py +41 -26
- agno/tools/mcp/__init__.py +10 -0
- agno/tools/mcp/mcp.py +331 -0
- agno/tools/mcp/multi_mcp.py +347 -0
- agno/tools/mcp/params.py +24 -0
- agno/tools/mcp_toolbox.py +284 -0
- agno/tools/mem0.py +193 -0
- agno/tools/memory.py +419 -0
- agno/tools/mlx_transcribe.py +11 -9
- agno/tools/models/azure_openai.py +190 -0
- agno/tools/models/gemini.py +203 -0
- agno/tools/models/groq.py +158 -0
- agno/tools/models/morph.py +186 -0
- agno/tools/models/nebius.py +124 -0
- agno/tools/models_labs.py +163 -82
- agno/tools/moviepy_video.py +18 -13
- agno/tools/nano_banana.py +151 -0
- agno/tools/neo4j.py +134 -0
- agno/tools/newspaper.py +15 -4
- agno/tools/newspaper4k.py +19 -6
- agno/tools/notion.py +204 -0
- agno/tools/openai.py +181 -17
- agno/tools/openbb.py +27 -20
- agno/tools/opencv.py +321 -0
- agno/tools/openweather.py +233 -0
- agno/tools/oxylabs.py +385 -0
- agno/tools/pandas.py +25 -15
- agno/tools/parallel.py +314 -0
- agno/tools/postgres.py +238 -185
- agno/tools/pubmed.py +125 -13
- agno/tools/python.py +48 -35
- agno/tools/reasoning.py +283 -0
- agno/tools/reddit.py +207 -29
- agno/tools/redshift.py +406 -0
- agno/tools/replicate.py +69 -26
- agno/tools/resend.py +11 -6
- agno/tools/scrapegraph.py +179 -19
- agno/tools/searxng.py +23 -31
- agno/tools/serpapi.py +15 -10
- agno/tools/serper.py +255 -0
- agno/tools/shell.py +23 -12
- agno/tools/shopify.py +1519 -0
- agno/tools/slack.py +56 -14
- agno/tools/sleep.py +8 -6
- agno/tools/spider.py +35 -11
- agno/tools/spotify.py +919 -0
- agno/tools/sql.py +34 -19
- agno/tools/tavily.py +158 -8
- agno/tools/telegram.py +18 -8
- agno/tools/todoist.py +218 -0
- agno/tools/toolkit.py +134 -9
- agno/tools/trafilatura.py +388 -0
- agno/tools/trello.py +25 -28
- agno/tools/twilio.py +18 -9
- agno/tools/user_control_flow.py +78 -0
- agno/tools/valyu.py +228 -0
- agno/tools/visualization.py +467 -0
- agno/tools/webbrowser.py +28 -0
- agno/tools/webex.py +76 -0
- agno/tools/website.py +23 -19
- agno/tools/webtools.py +45 -0
- agno/tools/whatsapp.py +286 -0
- agno/tools/wikipedia.py +28 -19
- agno/tools/workflow.py +285 -0
- agno/tools/{twitter.py → x.py} +142 -46
- agno/tools/yfinance.py +41 -39
- agno/tools/youtube.py +34 -17
- agno/tools/zendesk.py +15 -5
- agno/tools/zep.py +454 -0
- agno/tools/zoom.py +86 -37
- agno/tracing/__init__.py +12 -0
- agno/tracing/exporter.py +157 -0
- agno/tracing/schemas.py +276 -0
- agno/tracing/setup.py +111 -0
- agno/utils/agent.py +938 -0
- agno/utils/audio.py +37 -1
- agno/utils/certs.py +27 -0
- agno/utils/code_execution.py +11 -0
- agno/utils/common.py +103 -20
- agno/utils/cryptography.py +22 -0
- agno/utils/dttm.py +33 -0
- agno/utils/events.py +700 -0
- agno/utils/functions.py +107 -37
- agno/utils/gemini.py +426 -0
- agno/utils/hooks.py +171 -0
- agno/utils/http.py +185 -0
- agno/utils/json_schema.py +159 -37
- agno/utils/knowledge.py +36 -0
- agno/utils/location.py +19 -0
- agno/utils/log.py +221 -8
- agno/utils/mcp.py +214 -0
- agno/utils/media.py +335 -14
- agno/utils/merge_dict.py +22 -1
- agno/utils/message.py +77 -2
- agno/utils/models/ai_foundry.py +50 -0
- agno/utils/models/claude.py +373 -0
- agno/utils/models/cohere.py +94 -0
- agno/utils/models/llama.py +85 -0
- agno/utils/models/mistral.py +100 -0
- agno/utils/models/openai_responses.py +140 -0
- agno/utils/models/schema_utils.py +153 -0
- agno/utils/models/watsonx.py +41 -0
- agno/utils/openai.py +257 -0
- agno/utils/pickle.py +1 -1
- agno/utils/pprint.py +124 -8
- agno/utils/print_response/agent.py +930 -0
- agno/utils/print_response/team.py +1914 -0
- agno/utils/print_response/workflow.py +1668 -0
- agno/utils/prompts.py +111 -0
- agno/utils/reasoning.py +108 -0
- agno/utils/response.py +163 -0
- agno/utils/serialize.py +32 -0
- agno/utils/shell.py +4 -4
- agno/utils/streamlit.py +487 -0
- agno/utils/string.py +204 -51
- agno/utils/team.py +139 -0
- agno/utils/timer.py +9 -2
- agno/utils/tokens.py +657 -0
- agno/utils/tools.py +19 -1
- agno/utils/whatsapp.py +305 -0
- agno/utils/yaml_io.py +3 -3
- agno/vectordb/__init__.py +2 -0
- agno/vectordb/base.py +87 -9
- agno/vectordb/cassandra/__init__.py +5 -1
- agno/vectordb/cassandra/cassandra.py +383 -27
- agno/vectordb/chroma/__init__.py +4 -0
- agno/vectordb/chroma/chromadb.py +748 -83
- agno/vectordb/clickhouse/__init__.py +7 -1
- agno/vectordb/clickhouse/clickhousedb.py +554 -53
- agno/vectordb/couchbase/__init__.py +3 -0
- agno/vectordb/couchbase/couchbase.py +1446 -0
- agno/vectordb/lancedb/__init__.py +5 -0
- agno/vectordb/lancedb/lance_db.py +730 -98
- agno/vectordb/langchaindb/__init__.py +5 -0
- agno/vectordb/langchaindb/langchaindb.py +163 -0
- agno/vectordb/lightrag/__init__.py +5 -0
- agno/vectordb/lightrag/lightrag.py +388 -0
- agno/vectordb/llamaindex/__init__.py +3 -0
- agno/vectordb/llamaindex/llamaindexdb.py +166 -0
- agno/vectordb/milvus/__init__.py +3 -0
- agno/vectordb/milvus/milvus.py +966 -78
- agno/vectordb/mongodb/__init__.py +9 -1
- agno/vectordb/mongodb/mongodb.py +1175 -172
- agno/vectordb/pgvector/__init__.py +8 -0
- agno/vectordb/pgvector/pgvector.py +599 -115
- agno/vectordb/pineconedb/__init__.py +5 -1
- agno/vectordb/pineconedb/pineconedb.py +406 -43
- agno/vectordb/qdrant/__init__.py +4 -0
- agno/vectordb/qdrant/qdrant.py +914 -61
- agno/vectordb/redis/__init__.py +9 -0
- agno/vectordb/redis/redisdb.py +682 -0
- agno/vectordb/singlestore/__init__.py +8 -1
- agno/vectordb/singlestore/singlestore.py +771 -0
- agno/vectordb/surrealdb/__init__.py +3 -0
- agno/vectordb/surrealdb/surrealdb.py +663 -0
- agno/vectordb/upstashdb/__init__.py +5 -0
- agno/vectordb/upstashdb/upstashdb.py +718 -0
- agno/vectordb/weaviate/__init__.py +8 -0
- agno/vectordb/weaviate/index.py +15 -0
- agno/vectordb/weaviate/weaviate.py +1009 -0
- agno/workflow/__init__.py +23 -1
- agno/workflow/agent.py +299 -0
- agno/workflow/condition.py +759 -0
- agno/workflow/loop.py +756 -0
- agno/workflow/parallel.py +853 -0
- agno/workflow/router.py +723 -0
- agno/workflow/step.py +1564 -0
- agno/workflow/steps.py +613 -0
- agno/workflow/types.py +556 -0
- agno/workflow/workflow.py +4327 -514
- agno-2.3.13.dist-info/METADATA +639 -0
- agno-2.3.13.dist-info/RECORD +613 -0
- {agno-0.1.2.dist-info → agno-2.3.13.dist-info}/WHEEL +1 -1
- agno-2.3.13.dist-info/licenses/LICENSE +201 -0
- agno/api/playground.py +0 -91
- agno/api/schemas/playground.py +0 -22
- agno/api/schemas/user.py +0 -22
- agno/api/schemas/workspace.py +0 -46
- agno/api/user.py +0 -160
- agno/api/workspace.py +0 -151
- agno/cli/auth_server.py +0 -118
- agno/cli/config.py +0 -275
- agno/cli/console.py +0 -88
- agno/cli/credentials.py +0 -23
- agno/cli/entrypoint.py +0 -571
- agno/cli/operator.py +0 -355
- agno/cli/settings.py +0 -85
- agno/cli/ws/ws_cli.py +0 -817
- agno/constants.py +0 -13
- agno/document/__init__.py +0 -1
- agno/document/chunking/semantic.py +0 -47
- agno/document/chunking/strategy.py +0 -31
- agno/document/reader/__init__.py +0 -1
- agno/document/reader/arxiv_reader.py +0 -41
- agno/document/reader/base.py +0 -22
- agno/document/reader/csv_reader.py +0 -84
- agno/document/reader/docx_reader.py +0 -46
- agno/document/reader/firecrawl_reader.py +0 -99
- agno/document/reader/json_reader.py +0 -43
- agno/document/reader/pdf_reader.py +0 -219
- agno/document/reader/s3/pdf_reader.py +0 -46
- agno/document/reader/s3/text_reader.py +0 -51
- agno/document/reader/text_reader.py +0 -41
- agno/document/reader/website_reader.py +0 -175
- agno/document/reader/youtube_reader.py +0 -50
- agno/embedder/__init__.py +0 -1
- agno/embedder/azure_openai.py +0 -86
- agno/embedder/cohere.py +0 -72
- agno/embedder/fastembed.py +0 -37
- agno/embedder/google.py +0 -73
- agno/embedder/huggingface.py +0 -54
- agno/embedder/mistral.py +0 -80
- agno/embedder/ollama.py +0 -57
- agno/embedder/openai.py +0 -74
- agno/embedder/sentence_transformer.py +0 -38
- agno/embedder/voyageai.py +0 -64
- agno/eval/perf.py +0 -201
- agno/file/__init__.py +0 -1
- agno/file/file.py +0 -16
- agno/file/local/csv.py +0 -32
- agno/file/local/txt.py +0 -19
- agno/infra/app.py +0 -240
- agno/infra/base.py +0 -144
- agno/infra/context.py +0 -20
- agno/infra/db_app.py +0 -52
- agno/infra/resource.py +0 -205
- agno/infra/resources.py +0 -55
- agno/knowledge/agent.py +0 -230
- agno/knowledge/arxiv.py +0 -22
- agno/knowledge/combined.py +0 -22
- agno/knowledge/csv.py +0 -28
- agno/knowledge/csv_url.py +0 -19
- agno/knowledge/document.py +0 -20
- agno/knowledge/docx.py +0 -30
- agno/knowledge/json.py +0 -28
- agno/knowledge/langchain.py +0 -71
- agno/knowledge/llamaindex.py +0 -66
- agno/knowledge/pdf.py +0 -28
- agno/knowledge/pdf_url.py +0 -26
- agno/knowledge/s3/base.py +0 -60
- agno/knowledge/s3/pdf.py +0 -21
- agno/knowledge/s3/text.py +0 -23
- agno/knowledge/text.py +0 -30
- agno/knowledge/website.py +0 -88
- agno/knowledge/wikipedia.py +0 -31
- agno/knowledge/youtube.py +0 -22
- agno/memory/agent.py +0 -392
- agno/memory/classifier.py +0 -104
- agno/memory/db/__init__.py +0 -1
- agno/memory/db/base.py +0 -42
- agno/memory/db/mongodb.py +0 -189
- agno/memory/db/postgres.py +0 -203
- agno/memory/db/sqlite.py +0 -193
- agno/memory/memory.py +0 -15
- agno/memory/row.py +0 -36
- agno/memory/summarizer.py +0 -192
- agno/memory/summary.py +0 -19
- agno/memory/workflow.py +0 -38
- agno/models/google/gemini_openai.py +0 -26
- agno/models/ollama/hermes.py +0 -221
- agno/models/ollama/tools.py +0 -362
- agno/models/vertexai/gemini.py +0 -595
- agno/playground/__init__.py +0 -3
- agno/playground/async_router.py +0 -421
- agno/playground/deploy.py +0 -249
- agno/playground/operator.py +0 -92
- agno/playground/playground.py +0 -91
- agno/playground/schemas.py +0 -76
- agno/playground/serve.py +0 -55
- agno/playground/sync_router.py +0 -405
- agno/reasoning/agent.py +0 -68
- agno/run/response.py +0 -112
- agno/storage/agent/__init__.py +0 -0
- agno/storage/agent/base.py +0 -38
- agno/storage/agent/dynamodb.py +0 -350
- agno/storage/agent/json.py +0 -92
- agno/storage/agent/mongodb.py +0 -228
- agno/storage/agent/postgres.py +0 -367
- agno/storage/agent/session.py +0 -79
- agno/storage/agent/singlestore.py +0 -303
- agno/storage/agent/sqlite.py +0 -357
- agno/storage/agent/yaml.py +0 -93
- agno/storage/workflow/__init__.py +0 -0
- agno/storage/workflow/base.py +0 -40
- agno/storage/workflow/mongodb.py +0 -233
- agno/storage/workflow/postgres.py +0 -366
- agno/storage/workflow/session.py +0 -60
- agno/storage/workflow/sqlite.py +0 -359
- agno/tools/googlesearch.py +0 -88
- agno/utils/defaults.py +0 -57
- agno/utils/filesystem.py +0 -39
- agno/utils/git.py +0 -52
- agno/utils/json_io.py +0 -30
- agno/utils/load_env.py +0 -19
- agno/utils/py_io.py +0 -19
- agno/utils/pyproject.py +0 -18
- agno/utils/resource_filter.py +0 -31
- agno/vectordb/singlestore/s2vectordb.py +0 -390
- agno/vectordb/singlestore/s2vectordb2.py +0 -355
- agno/workspace/__init__.py +0 -0
- agno/workspace/config.py +0 -325
- agno/workspace/enums.py +0 -6
- agno/workspace/helpers.py +0 -48
- agno/workspace/operator.py +0 -758
- agno/workspace/settings.py +0 -63
- agno-0.1.2.dist-info/LICENSE +0 -375
- agno-0.1.2.dist-info/METADATA +0 -502
- agno-0.1.2.dist-info/RECORD +0 -352
- agno-0.1.2.dist-info/entry_points.txt +0 -3
- /agno/{cli → db/migrations}/__init__.py +0 -0
- /agno/{cli/ws → db/migrations/versions}/__init__.py +0 -0
- /agno/{document/chunking/__init__.py → db/schemas/metrics.py} +0 -0
- /agno/{document/reader/s3 → integrations}/__init__.py +0 -0
- /agno/{file/local → knowledge/chunking}/__init__.py +0 -0
- /agno/{infra → knowledge/remote_content}/__init__.py +0 -0
- /agno/{knowledge/s3 → tools/models}/__init__.py +0 -0
- /agno/{reranker → utils/models}/__init__.py +0 -0
- /agno/{storage → utils/print_response}/__init__.py +0 -0
- {agno-0.1.2.dist-info → agno-2.3.13.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,455 @@
|
|
|
1
|
+
import asyncio
|
|
2
|
+
import random
|
|
3
|
+
import time
|
|
4
|
+
from dataclasses import dataclass, field
|
|
5
|
+
from typing import Dict, List, Optional, Set, Tuple
|
|
6
|
+
from urllib.parse import urljoin, urlparse
|
|
7
|
+
|
|
8
|
+
import httpx
|
|
9
|
+
|
|
10
|
+
from agno.knowledge.chunking.semantic import SemanticChunking
|
|
11
|
+
from agno.knowledge.chunking.strategy import ChunkingStrategy, ChunkingStrategyType
|
|
12
|
+
from agno.knowledge.document.base import Document
|
|
13
|
+
from agno.knowledge.reader.base import Reader
|
|
14
|
+
from agno.knowledge.types import ContentType
|
|
15
|
+
from agno.utils.log import log_debug, log_error, log_warning
|
|
16
|
+
|
|
17
|
+
try:
|
|
18
|
+
from bs4 import BeautifulSoup, Tag # noqa: F401
|
|
19
|
+
except ImportError:
|
|
20
|
+
raise ImportError("The `bs4` package is not installed. Please install it via `pip install beautifulsoup4`.")
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
@dataclass
|
|
24
|
+
class WebsiteReader(Reader):
|
|
25
|
+
"""Reader for Websites"""
|
|
26
|
+
|
|
27
|
+
max_depth: int = 3
|
|
28
|
+
max_links: int = 10
|
|
29
|
+
|
|
30
|
+
_visited: Set[str] = field(default_factory=set)
|
|
31
|
+
_urls_to_crawl: List[Tuple[str, int]] = field(default_factory=list)
|
|
32
|
+
|
|
33
|
+
def __init__(
|
|
34
|
+
self,
|
|
35
|
+
chunking_strategy: Optional[ChunkingStrategy] = SemanticChunking(),
|
|
36
|
+
max_depth: int = 3,
|
|
37
|
+
max_links: int = 10,
|
|
38
|
+
timeout: int = 10,
|
|
39
|
+
proxy: Optional[str] = None,
|
|
40
|
+
**kwargs,
|
|
41
|
+
):
|
|
42
|
+
super().__init__(chunking_strategy=chunking_strategy, **kwargs)
|
|
43
|
+
self.max_depth = max_depth
|
|
44
|
+
self.max_links = max_links
|
|
45
|
+
self.proxy = proxy
|
|
46
|
+
self.timeout = timeout
|
|
47
|
+
|
|
48
|
+
self._visited = set()
|
|
49
|
+
self._urls_to_crawl = []
|
|
50
|
+
|
|
51
|
+
@classmethod
|
|
52
|
+
def get_supported_chunking_strategies(self) -> List[ChunkingStrategyType]:
|
|
53
|
+
"""Get the list of supported chunking strategies for Website readers."""
|
|
54
|
+
return [
|
|
55
|
+
ChunkingStrategyType.AGENTIC_CHUNKER,
|
|
56
|
+
ChunkingStrategyType.DOCUMENT_CHUNKER,
|
|
57
|
+
ChunkingStrategyType.RECURSIVE_CHUNKER,
|
|
58
|
+
ChunkingStrategyType.SEMANTIC_CHUNKER,
|
|
59
|
+
ChunkingStrategyType.FIXED_SIZE_CHUNKER,
|
|
60
|
+
]
|
|
61
|
+
|
|
62
|
+
@classmethod
|
|
63
|
+
def get_supported_content_types(self) -> List[ContentType]:
|
|
64
|
+
return [ContentType.URL]
|
|
65
|
+
|
|
66
|
+
def delay(self, min_seconds=1, max_seconds=3):
|
|
67
|
+
"""
|
|
68
|
+
Introduce a random delay.
|
|
69
|
+
|
|
70
|
+
:param min_seconds: Minimum number of seconds to delay. Default is 1.
|
|
71
|
+
:param max_seconds: Maximum number of seconds to delay. Default is 3.
|
|
72
|
+
"""
|
|
73
|
+
sleep_time = random.uniform(min_seconds, max_seconds)
|
|
74
|
+
time.sleep(sleep_time)
|
|
75
|
+
|
|
76
|
+
async def async_delay(self, min_seconds=1, max_seconds=3):
|
|
77
|
+
"""
|
|
78
|
+
Introduce a random delay asynchronously.
|
|
79
|
+
|
|
80
|
+
:param min_seconds: Minimum number of seconds to delay. Default is 1.
|
|
81
|
+
:param max_seconds: Maximum number of seconds to delay. Default is 3.
|
|
82
|
+
"""
|
|
83
|
+
sleep_time = random.uniform(min_seconds, max_seconds)
|
|
84
|
+
await asyncio.sleep(sleep_time)
|
|
85
|
+
|
|
86
|
+
def _get_primary_domain(self, url: str) -> str:
|
|
87
|
+
"""
|
|
88
|
+
Extract primary domain from the given URL.
|
|
89
|
+
|
|
90
|
+
:param url: The URL to extract the primary domain from.
|
|
91
|
+
:return: The primary domain.
|
|
92
|
+
"""
|
|
93
|
+
domain_parts = urlparse(url).netloc.split(".")
|
|
94
|
+
# Return primary domain (excluding subdomains)
|
|
95
|
+
return ".".join(domain_parts[-2:])
|
|
96
|
+
|
|
97
|
+
def _extract_main_content(self, soup: BeautifulSoup) -> str:
|
|
98
|
+
"""
|
|
99
|
+
Extracts the main content from a BeautifulSoup object.
|
|
100
|
+
|
|
101
|
+
:param soup: The BeautifulSoup object to extract the main content from.
|
|
102
|
+
:return: The main content.
|
|
103
|
+
"""
|
|
104
|
+
|
|
105
|
+
def match(tag: Tag) -> bool:
|
|
106
|
+
"""
|
|
107
|
+
Check if the tag matches any of the relevant tags or class names
|
|
108
|
+
"""
|
|
109
|
+
if not isinstance(tag, Tag):
|
|
110
|
+
return False
|
|
111
|
+
|
|
112
|
+
if tag.name in ["article", "main", "section"]:
|
|
113
|
+
return True
|
|
114
|
+
|
|
115
|
+
classes_attr = tag.get("class")
|
|
116
|
+
classes: List[str] = classes_attr if isinstance(classes_attr, list) else []
|
|
117
|
+
content_classes = ["content", "main-content", "post-content", "entry-content", "article-body"]
|
|
118
|
+
if any(cls in content_classes for cls in classes):
|
|
119
|
+
return True
|
|
120
|
+
|
|
121
|
+
# Check for common content IDs
|
|
122
|
+
tag_id = tag.get("id", "")
|
|
123
|
+
if tag_id in ["content", "main", "article"]:
|
|
124
|
+
return True
|
|
125
|
+
|
|
126
|
+
return False
|
|
127
|
+
|
|
128
|
+
# Try to find main content element
|
|
129
|
+
element = soup.find(match)
|
|
130
|
+
if element and hasattr(element, "find_all"):
|
|
131
|
+
# Remove common unwanted elements from the found content
|
|
132
|
+
for unwanted in element.find_all(["script", "style", "nav", "header", "footer"]):
|
|
133
|
+
unwanted.decompose()
|
|
134
|
+
return element.get_text(strip=True, separator=" ")
|
|
135
|
+
|
|
136
|
+
# Fallback: get full page content
|
|
137
|
+
for unwanted in soup.find_all(["script", "style", "nav", "header", "footer"]):
|
|
138
|
+
unwanted.decompose()
|
|
139
|
+
return soup.get_text(strip=True, separator=" ")
|
|
140
|
+
|
|
141
|
+
def crawl(self, url: str, starting_depth: int = 1) -> Dict[str, str]:
|
|
142
|
+
"""
|
|
143
|
+
Crawls a website and returns a dictionary of URLs and their corresponding content.
|
|
144
|
+
|
|
145
|
+
Parameters:
|
|
146
|
+
- url (str): The starting URL to begin the crawl.
|
|
147
|
+
- starting_depth (int, optional): The starting depth level for the crawl. Defaults to 1.
|
|
148
|
+
|
|
149
|
+
Returns:
|
|
150
|
+
- Dict[str, str]: A dictionary where each key is a URL and the corresponding value is the main
|
|
151
|
+
content extracted from that URL.
|
|
152
|
+
|
|
153
|
+
Raises:
|
|
154
|
+
- httpx.HTTPStatusError: If there's an HTTP status error.
|
|
155
|
+
- httpx.RequestError: If there's a request-related error (connection, timeout, etc).
|
|
156
|
+
|
|
157
|
+
Note:
|
|
158
|
+
The function focuses on extracting the main content by prioritizing content inside common HTML tags
|
|
159
|
+
like `<article>`, `<main>`, and `<div>` with class names such as "content", "main-content", etc.
|
|
160
|
+
The crawler will also respect the `max_depth` attribute of the WebCrawler class, ensuring it does not
|
|
161
|
+
crawl deeper than the specified depth.
|
|
162
|
+
"""
|
|
163
|
+
num_links = 0
|
|
164
|
+
crawler_result: Dict[str, str] = {}
|
|
165
|
+
primary_domain = self._get_primary_domain(url)
|
|
166
|
+
# Add starting URL with its depth to the global list
|
|
167
|
+
self._urls_to_crawl.append((url, starting_depth))
|
|
168
|
+
while self._urls_to_crawl:
|
|
169
|
+
# Unpack URL and depth from the global list
|
|
170
|
+
current_url, current_depth = self._urls_to_crawl.pop(0)
|
|
171
|
+
|
|
172
|
+
# Skip if
|
|
173
|
+
# - URL is already visited
|
|
174
|
+
# - does not end with the primary domain,
|
|
175
|
+
# - exceeds max depth
|
|
176
|
+
# - exceeds max links
|
|
177
|
+
if (
|
|
178
|
+
current_url in self._visited
|
|
179
|
+
or not urlparse(current_url).netloc.endswith(primary_domain)
|
|
180
|
+
or (current_depth > self.max_depth and current_url != url)
|
|
181
|
+
or num_links >= self.max_links
|
|
182
|
+
):
|
|
183
|
+
continue
|
|
184
|
+
|
|
185
|
+
self._visited.add(current_url)
|
|
186
|
+
self.delay()
|
|
187
|
+
|
|
188
|
+
try:
|
|
189
|
+
log_debug(f"Crawling: {current_url}")
|
|
190
|
+
|
|
191
|
+
response = (
|
|
192
|
+
httpx.get(current_url, timeout=self.timeout, proxy=self.proxy, follow_redirects=True)
|
|
193
|
+
if self.proxy
|
|
194
|
+
else httpx.get(current_url, timeout=self.timeout, follow_redirects=True)
|
|
195
|
+
)
|
|
196
|
+
response.raise_for_status()
|
|
197
|
+
|
|
198
|
+
soup = BeautifulSoup(response.content, "html.parser")
|
|
199
|
+
|
|
200
|
+
# Extract main content
|
|
201
|
+
main_content = self._extract_main_content(soup)
|
|
202
|
+
if main_content:
|
|
203
|
+
crawler_result[current_url] = main_content
|
|
204
|
+
num_links += 1
|
|
205
|
+
|
|
206
|
+
# Add found URLs to the global list, with incremented depth
|
|
207
|
+
for link in soup.find_all("a", href=True):
|
|
208
|
+
if not isinstance(link, Tag):
|
|
209
|
+
continue
|
|
210
|
+
|
|
211
|
+
href_str = str(link["href"])
|
|
212
|
+
full_url = urljoin(current_url, href_str)
|
|
213
|
+
|
|
214
|
+
if not isinstance(full_url, str):
|
|
215
|
+
continue
|
|
216
|
+
|
|
217
|
+
parsed_url = urlparse(full_url)
|
|
218
|
+
if parsed_url.netloc.endswith(primary_domain) and not any(
|
|
219
|
+
parsed_url.path.endswith(ext) for ext in [".pdf", ".jpg", ".png"]
|
|
220
|
+
):
|
|
221
|
+
full_url_str = str(full_url)
|
|
222
|
+
if (
|
|
223
|
+
full_url_str not in self._visited
|
|
224
|
+
and (full_url_str, current_depth + 1) not in self._urls_to_crawl
|
|
225
|
+
):
|
|
226
|
+
self._urls_to_crawl.append((full_url_str, current_depth + 1))
|
|
227
|
+
|
|
228
|
+
except httpx.HTTPStatusError as e:
|
|
229
|
+
# Log HTTP status errors but continue crawling other pages
|
|
230
|
+
# Skip redirect errors (3xx) as they should be handled by follow_redirects
|
|
231
|
+
if e.response.status_code >= 300 and e.response.status_code < 400:
|
|
232
|
+
log_debug(f"Redirect encountered for {current_url}, skipping: {e}")
|
|
233
|
+
else:
|
|
234
|
+
log_warning(f"HTTP status error while crawling {current_url}: {e}")
|
|
235
|
+
# For the initial URL, we should raise the error only if it's not a redirect
|
|
236
|
+
if current_url == url and not crawler_result and not (300 <= e.response.status_code < 400):
|
|
237
|
+
raise
|
|
238
|
+
except httpx.RequestError as e:
|
|
239
|
+
# Log request errors but continue crawling other pages
|
|
240
|
+
log_warning(f"Request error while crawling {current_url}: {e}")
|
|
241
|
+
# For the initial URL, we should raise the error
|
|
242
|
+
if current_url == url and not crawler_result:
|
|
243
|
+
raise
|
|
244
|
+
except Exception as e:
|
|
245
|
+
# Log other exceptions but continue crawling other pages
|
|
246
|
+
log_warning(f"Failed to crawl {current_url}: {e}")
|
|
247
|
+
# For the initial URL, we should raise the error
|
|
248
|
+
if current_url == url and not crawler_result:
|
|
249
|
+
# Wrap non-HTTP exceptions in a RequestError
|
|
250
|
+
raise httpx.RequestError(f"Failed to crawl starting URL {url}: {str(e)}", request=None) from e
|
|
251
|
+
|
|
252
|
+
# If we couldn't crawl any pages, raise an error
|
|
253
|
+
if not crawler_result:
|
|
254
|
+
raise httpx.RequestError(f"Failed to extract any content from {url}", request=None)
|
|
255
|
+
|
|
256
|
+
return crawler_result
|
|
257
|
+
|
|
258
|
+
async def async_crawl(self, url: str, starting_depth: int = 1) -> Dict[str, str]:
|
|
259
|
+
"""
|
|
260
|
+
Asynchronously crawls a website and returns a dictionary of URLs and their corresponding content.
|
|
261
|
+
|
|
262
|
+
Parameters:
|
|
263
|
+
- url (str): The starting URL to begin the crawl.
|
|
264
|
+
- starting_depth (int, optional): The starting depth level for the crawl. Defaults to 1.
|
|
265
|
+
|
|
266
|
+
Returns:
|
|
267
|
+
- Dict[str, str]: A dictionary where each key is a URL and the corresponding value is the main
|
|
268
|
+
content extracted from that URL.
|
|
269
|
+
|
|
270
|
+
Raises:
|
|
271
|
+
- httpx.HTTPStatusError: If there's an HTTP status error.
|
|
272
|
+
- httpx.RequestError: If there's a request-related error (connection, timeout, etc).
|
|
273
|
+
"""
|
|
274
|
+
num_links = 0
|
|
275
|
+
crawler_result: Dict[str, str] = {}
|
|
276
|
+
primary_domain = self._get_primary_domain(url)
|
|
277
|
+
|
|
278
|
+
# Clear previously visited URLs and URLs to crawl
|
|
279
|
+
self._visited = set()
|
|
280
|
+
self._urls_to_crawl = [(url, starting_depth)]
|
|
281
|
+
|
|
282
|
+
client_args = {"proxy": self.proxy} if self.proxy else {}
|
|
283
|
+
async with httpx.AsyncClient(**client_args) as client: # type: ignore
|
|
284
|
+
while self._urls_to_crawl and num_links < self.max_links:
|
|
285
|
+
current_url, current_depth = self._urls_to_crawl.pop(0)
|
|
286
|
+
|
|
287
|
+
if (
|
|
288
|
+
current_url in self._visited
|
|
289
|
+
or not urlparse(current_url).netloc.endswith(primary_domain)
|
|
290
|
+
or current_depth > self.max_depth
|
|
291
|
+
or num_links >= self.max_links
|
|
292
|
+
):
|
|
293
|
+
continue
|
|
294
|
+
|
|
295
|
+
self._visited.add(current_url)
|
|
296
|
+
await self.async_delay()
|
|
297
|
+
|
|
298
|
+
try:
|
|
299
|
+
log_debug(f"Crawling asynchronously: {current_url}")
|
|
300
|
+
response = await client.get(current_url, timeout=self.timeout, follow_redirects=True)
|
|
301
|
+
response.raise_for_status()
|
|
302
|
+
|
|
303
|
+
soup = BeautifulSoup(response.content, "html.parser")
|
|
304
|
+
|
|
305
|
+
# Extract main content
|
|
306
|
+
main_content = self._extract_main_content(soup)
|
|
307
|
+
if main_content:
|
|
308
|
+
crawler_result[current_url] = main_content
|
|
309
|
+
num_links += 1
|
|
310
|
+
|
|
311
|
+
# Add found URLs to the list, with incremented depth
|
|
312
|
+
for link in soup.find_all("a", href=True):
|
|
313
|
+
if not isinstance(link, Tag):
|
|
314
|
+
continue
|
|
315
|
+
|
|
316
|
+
href_str = str(link["href"])
|
|
317
|
+
full_url = urljoin(current_url, href_str)
|
|
318
|
+
|
|
319
|
+
if not isinstance(full_url, str):
|
|
320
|
+
continue
|
|
321
|
+
|
|
322
|
+
parsed_url = urlparse(full_url)
|
|
323
|
+
if parsed_url.netloc.endswith(primary_domain) and not any(
|
|
324
|
+
parsed_url.path.endswith(ext) for ext in [".pdf", ".jpg", ".png"]
|
|
325
|
+
):
|
|
326
|
+
full_url_str = str(full_url)
|
|
327
|
+
if (
|
|
328
|
+
full_url_str not in self._visited
|
|
329
|
+
and (full_url_str, current_depth + 1) not in self._urls_to_crawl
|
|
330
|
+
):
|
|
331
|
+
self._urls_to_crawl.append((full_url_str, current_depth + 1))
|
|
332
|
+
|
|
333
|
+
except httpx.HTTPStatusError as e:
|
|
334
|
+
# Log HTTP status errors but continue crawling other pages
|
|
335
|
+
log_warning(f"HTTP status error while crawling asynchronously {current_url}: {e}")
|
|
336
|
+
# For the initial URL, we should raise the error
|
|
337
|
+
if current_url == url and not crawler_result:
|
|
338
|
+
raise
|
|
339
|
+
except httpx.RequestError as e:
|
|
340
|
+
# Log request errors but continue crawling other pages
|
|
341
|
+
log_warning(f"Request error while crawling asynchronously {current_url}: {e}")
|
|
342
|
+
# For the initial URL, we should raise the error
|
|
343
|
+
if current_url == url and not crawler_result:
|
|
344
|
+
raise
|
|
345
|
+
except Exception as e:
|
|
346
|
+
# Log other exceptions but continue crawling other pages
|
|
347
|
+
log_warning(f"Failed to crawl asynchronously {current_url}: {e}")
|
|
348
|
+
# For the initial URL, we should raise the error
|
|
349
|
+
if current_url == url and not crawler_result:
|
|
350
|
+
# Wrap non-HTTP exceptions in a RequestError
|
|
351
|
+
raise httpx.RequestError(
|
|
352
|
+
f"Failed to crawl starting URL {url} asynchronously: {str(e)}", request=None
|
|
353
|
+
) from e
|
|
354
|
+
|
|
355
|
+
# If we couldn't crawl any pages, raise an error
|
|
356
|
+
if not crawler_result:
|
|
357
|
+
raise httpx.RequestError(f"Failed to extract any content from {url} asynchronously", request=None)
|
|
358
|
+
|
|
359
|
+
return crawler_result
|
|
360
|
+
|
|
361
|
+
def read(self, url: str, name: Optional[str] = None) -> List[Document]:
|
|
362
|
+
"""
|
|
363
|
+
Reads a website and returns a list of documents.
|
|
364
|
+
|
|
365
|
+
This function first converts the website into a dictionary of URLs and their corresponding content.
|
|
366
|
+
Then iterates through the dictionary and returns chunks of content.
|
|
367
|
+
|
|
368
|
+
:param url: The URL of the website to read.
|
|
369
|
+
:return: A list of documents.
|
|
370
|
+
:raises httpx.HTTPStatusError: If there's an HTTP status error.
|
|
371
|
+
:raises httpx.RequestError: If there's a request-related error.
|
|
372
|
+
"""
|
|
373
|
+
|
|
374
|
+
log_debug(f"Reading: {url}")
|
|
375
|
+
try:
|
|
376
|
+
crawler_result = self.crawl(url)
|
|
377
|
+
documents = []
|
|
378
|
+
for crawled_url, crawled_content in crawler_result.items():
|
|
379
|
+
if self.chunk:
|
|
380
|
+
documents.extend(
|
|
381
|
+
self.chunk_document(
|
|
382
|
+
Document(
|
|
383
|
+
name=name or url,
|
|
384
|
+
id=str(crawled_url),
|
|
385
|
+
meta_data={"url": str(crawled_url)},
|
|
386
|
+
content=crawled_content,
|
|
387
|
+
)
|
|
388
|
+
)
|
|
389
|
+
)
|
|
390
|
+
else:
|
|
391
|
+
documents.append(
|
|
392
|
+
Document(
|
|
393
|
+
name=name or url,
|
|
394
|
+
id=str(crawled_url),
|
|
395
|
+
meta_data={"url": str(crawled_url)},
|
|
396
|
+
content=crawled_content,
|
|
397
|
+
)
|
|
398
|
+
)
|
|
399
|
+
return documents
|
|
400
|
+
except (httpx.HTTPStatusError, httpx.RequestError) as e:
|
|
401
|
+
log_error(f"Error reading website {url}: {e}")
|
|
402
|
+
raise
|
|
403
|
+
|
|
404
|
+
async def async_read(self, url: str, name: Optional[str] = None) -> List[Document]:
|
|
405
|
+
"""
|
|
406
|
+
Asynchronously reads a website and returns a list of documents.
|
|
407
|
+
|
|
408
|
+
This function first converts the website into a dictionary of URLs and their corresponding content.
|
|
409
|
+
Then iterates through the dictionary and returns chunks of content.
|
|
410
|
+
|
|
411
|
+
:param url: The URL of the website to read.
|
|
412
|
+
:return: A list of documents.
|
|
413
|
+
:raises httpx.HTTPStatusError: If there's an HTTP status error.
|
|
414
|
+
:raises httpx.RequestError: If there's a request-related error.
|
|
415
|
+
"""
|
|
416
|
+
log_debug(f"Reading asynchronously: {url}")
|
|
417
|
+
try:
|
|
418
|
+
crawler_result = await self.async_crawl(url)
|
|
419
|
+
documents = []
|
|
420
|
+
|
|
421
|
+
# Process documents in parallel
|
|
422
|
+
async def process_document(crawled_url, crawled_content):
|
|
423
|
+
if self.chunk:
|
|
424
|
+
doc = Document(
|
|
425
|
+
name=name or url,
|
|
426
|
+
id=str(crawled_url),
|
|
427
|
+
meta_data={"url": str(crawled_url)},
|
|
428
|
+
content=crawled_content,
|
|
429
|
+
)
|
|
430
|
+
return self.chunk_document(doc)
|
|
431
|
+
else:
|
|
432
|
+
return [
|
|
433
|
+
Document(
|
|
434
|
+
name=name or url,
|
|
435
|
+
id=str(crawled_url),
|
|
436
|
+
meta_data={"url": str(crawled_url)},
|
|
437
|
+
content=crawled_content,
|
|
438
|
+
)
|
|
439
|
+
]
|
|
440
|
+
|
|
441
|
+
# Use asyncio.gather to process all documents in parallel
|
|
442
|
+
tasks = [
|
|
443
|
+
process_document(crawled_url, crawled_content)
|
|
444
|
+
for crawled_url, crawled_content in crawler_result.items()
|
|
445
|
+
]
|
|
446
|
+
results = await asyncio.gather(*tasks)
|
|
447
|
+
|
|
448
|
+
# Flatten the results
|
|
449
|
+
for doc_list in results:
|
|
450
|
+
documents.extend(doc_list)
|
|
451
|
+
|
|
452
|
+
return documents
|
|
453
|
+
except (httpx.HTTPStatusError, httpx.RequestError) as e:
|
|
454
|
+
log_error(f"Error reading website asynchronously {url}: {e}")
|
|
455
|
+
raise
|
|
@@ -0,0 +1,91 @@
|
|
|
1
|
+
import asyncio
|
|
2
|
+
from typing import List, Optional
|
|
3
|
+
|
|
4
|
+
from agno.knowledge.chunking.fixed import FixedSizeChunking
|
|
5
|
+
from agno.knowledge.chunking.strategy import ChunkingStrategy, ChunkingStrategyType
|
|
6
|
+
from agno.knowledge.document import Document
|
|
7
|
+
from agno.knowledge.reader.base import Reader
|
|
8
|
+
from agno.knowledge.types import ContentType
|
|
9
|
+
from agno.utils.log import log_debug, log_info
|
|
10
|
+
|
|
11
|
+
try:
|
|
12
|
+
import wikipedia # noqa: F401
|
|
13
|
+
except ImportError:
|
|
14
|
+
raise ImportError("The `wikipedia` package is not installed. Please install it via `pip install wikipedia`.")
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
class WikipediaReader(Reader):
|
|
18
|
+
auto_suggest: bool = True
|
|
19
|
+
|
|
20
|
+
def __init__(
|
|
21
|
+
self, chunking_strategy: Optional[ChunkingStrategy] = FixedSizeChunking(), auto_suggest: bool = True, **kwargs
|
|
22
|
+
):
|
|
23
|
+
super().__init__(chunking_strategy=chunking_strategy, **kwargs)
|
|
24
|
+
self.auto_suggest = auto_suggest
|
|
25
|
+
|
|
26
|
+
@classmethod
|
|
27
|
+
def get_supported_chunking_strategies(self) -> List[ChunkingStrategyType]:
|
|
28
|
+
"""Get the list of supported chunking strategies for Wikipedia readers."""
|
|
29
|
+
return [
|
|
30
|
+
ChunkingStrategyType.FIXED_SIZE_CHUNKER,
|
|
31
|
+
ChunkingStrategyType.AGENTIC_CHUNKER,
|
|
32
|
+
ChunkingStrategyType.DOCUMENT_CHUNKER,
|
|
33
|
+
ChunkingStrategyType.RECURSIVE_CHUNKER,
|
|
34
|
+
ChunkingStrategyType.SEMANTIC_CHUNKER,
|
|
35
|
+
]
|
|
36
|
+
|
|
37
|
+
@classmethod
|
|
38
|
+
def get_supported_content_types(self) -> List[ContentType]:
|
|
39
|
+
return [ContentType.TOPIC]
|
|
40
|
+
|
|
41
|
+
def read(self, topic: str) -> List[Document]:
|
|
42
|
+
log_debug(f"Reading Wikipedia topic: {topic}")
|
|
43
|
+
summary = None
|
|
44
|
+
try:
|
|
45
|
+
summary = wikipedia.summary(topic, auto_suggest=self.auto_suggest)
|
|
46
|
+
|
|
47
|
+
except wikipedia.exceptions.PageError:
|
|
48
|
+
summary = None
|
|
49
|
+
log_info("Wikipedia Error: Page not found.")
|
|
50
|
+
|
|
51
|
+
# Only create Document if we successfully got a summary
|
|
52
|
+
if summary:
|
|
53
|
+
return [
|
|
54
|
+
Document(
|
|
55
|
+
name=topic,
|
|
56
|
+
meta_data={"topic": topic},
|
|
57
|
+
content=summary,
|
|
58
|
+
)
|
|
59
|
+
]
|
|
60
|
+
return []
|
|
61
|
+
|
|
62
|
+
async def async_read(self, topic: str) -> List[Document]:
|
|
63
|
+
"""
|
|
64
|
+
Asynchronously read content from Wikipedia.
|
|
65
|
+
|
|
66
|
+
Args:
|
|
67
|
+
topic: The Wikipedia topic to read
|
|
68
|
+
|
|
69
|
+
Returns:
|
|
70
|
+
A list of documents containing the Wikipedia summary
|
|
71
|
+
"""
|
|
72
|
+
log_debug(f"Async reading Wikipedia topic: {topic}")
|
|
73
|
+
summary = None
|
|
74
|
+
try:
|
|
75
|
+
# Run the synchronous wikipedia API call in a thread pool
|
|
76
|
+
summary = await asyncio.to_thread(wikipedia.summary, topic, auto_suggest=self.auto_suggest)
|
|
77
|
+
|
|
78
|
+
except wikipedia.exceptions.PageError:
|
|
79
|
+
summary = None
|
|
80
|
+
log_info("Wikipedia Error: Page not found.")
|
|
81
|
+
|
|
82
|
+
# Only create Document if we successfully got a summary
|
|
83
|
+
if summary:
|
|
84
|
+
return [
|
|
85
|
+
Document(
|
|
86
|
+
name=topic,
|
|
87
|
+
meta_data={"topic": topic},
|
|
88
|
+
content=summary,
|
|
89
|
+
)
|
|
90
|
+
]
|
|
91
|
+
return []
|
|
@@ -0,0 +1,78 @@
|
|
|
1
|
+
import asyncio
|
|
2
|
+
from typing import List, Optional
|
|
3
|
+
|
|
4
|
+
from agno.knowledge.chunking.recursive import RecursiveChunking
|
|
5
|
+
from agno.knowledge.chunking.strategy import ChunkingStrategy, ChunkingStrategyType
|
|
6
|
+
from agno.knowledge.document.base import Document
|
|
7
|
+
from agno.knowledge.reader.base import Reader
|
|
8
|
+
from agno.knowledge.types import ContentType
|
|
9
|
+
from agno.utils.log import log_debug, log_error, log_info
|
|
10
|
+
|
|
11
|
+
try:
|
|
12
|
+
from youtube_transcript_api import YouTubeTranscriptApi
|
|
13
|
+
except ImportError:
|
|
14
|
+
raise ImportError(
|
|
15
|
+
"`youtube_transcript_api` not installed. Please install it via `pip install youtube_transcript_api`."
|
|
16
|
+
)
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
class YouTubeReader(Reader):
|
|
20
|
+
"""Reader for YouTube video transcripts"""
|
|
21
|
+
|
|
22
|
+
def __init__(self, chunking_strategy: Optional[ChunkingStrategy] = RecursiveChunking(), **kwargs):
|
|
23
|
+
super().__init__(chunking_strategy=chunking_strategy, **kwargs)
|
|
24
|
+
|
|
25
|
+
@classmethod
|
|
26
|
+
def get_supported_chunking_strategies(self) -> List[ChunkingStrategyType]:
|
|
27
|
+
"""Get the list of supported chunking strategies for YouTube readers."""
|
|
28
|
+
return [
|
|
29
|
+
ChunkingStrategyType.RECURSIVE_CHUNKER,
|
|
30
|
+
ChunkingStrategyType.AGENTIC_CHUNKER,
|
|
31
|
+
ChunkingStrategyType.DOCUMENT_CHUNKER,
|
|
32
|
+
ChunkingStrategyType.SEMANTIC_CHUNKER,
|
|
33
|
+
ChunkingStrategyType.FIXED_SIZE_CHUNKER,
|
|
34
|
+
]
|
|
35
|
+
|
|
36
|
+
@classmethod
|
|
37
|
+
def get_supported_content_types(self) -> List[ContentType]:
|
|
38
|
+
return [ContentType.YOUTUBE]
|
|
39
|
+
|
|
40
|
+
def read(self, url: str, name: Optional[str] = None) -> List[Document]:
|
|
41
|
+
try:
|
|
42
|
+
# Extract video ID from URL
|
|
43
|
+
video_id = url.split("v=")[-1].split("&")[0]
|
|
44
|
+
log_info(f"Reading transcript for video: {video_id}")
|
|
45
|
+
|
|
46
|
+
# Get transcript
|
|
47
|
+
log_debug(f"Fetching transcript for video: {video_id}")
|
|
48
|
+
# Create an instance of YouTubeTranscriptApi
|
|
49
|
+
ytt_api = YouTubeTranscriptApi()
|
|
50
|
+
transcript_data = ytt_api.fetch(video_id)
|
|
51
|
+
|
|
52
|
+
# Combine transcript segments into full text
|
|
53
|
+
transcript_text = ""
|
|
54
|
+
for segment in transcript_data:
|
|
55
|
+
transcript_text += f"{segment.text} "
|
|
56
|
+
|
|
57
|
+
documents = [
|
|
58
|
+
Document(
|
|
59
|
+
name=name or f"youtube_{video_id}",
|
|
60
|
+
id=f"youtube_{video_id}",
|
|
61
|
+
meta_data={"video_url": url, "video_id": video_id},
|
|
62
|
+
content=transcript_text.strip(),
|
|
63
|
+
)
|
|
64
|
+
]
|
|
65
|
+
|
|
66
|
+
if self.chunk:
|
|
67
|
+
chunked_documents = []
|
|
68
|
+
for document in documents:
|
|
69
|
+
chunked_documents.extend(self.chunk_document(document))
|
|
70
|
+
return chunked_documents
|
|
71
|
+
return documents
|
|
72
|
+
|
|
73
|
+
except Exception as e:
|
|
74
|
+
log_error(f"Error reading transcript for {url}: {e}")
|
|
75
|
+
return []
|
|
76
|
+
|
|
77
|
+
async def async_read(self, url: str) -> List[Document]:
|
|
78
|
+
return await asyncio.get_event_loop().run_in_executor(None, self.read, url)
|