agno 0.1.2__py3-none-any.whl → 2.3.13__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- agno/__init__.py +8 -0
- agno/agent/__init__.py +44 -5
- agno/agent/agent.py +10531 -2975
- agno/api/agent.py +14 -53
- agno/api/api.py +7 -46
- agno/api/evals.py +22 -0
- agno/api/os.py +17 -0
- agno/api/routes.py +6 -25
- agno/api/schemas/__init__.py +9 -0
- agno/api/schemas/agent.py +6 -9
- agno/api/schemas/evals.py +16 -0
- agno/api/schemas/os.py +14 -0
- agno/api/schemas/team.py +10 -10
- agno/api/schemas/utils.py +21 -0
- agno/api/schemas/workflows.py +16 -0
- agno/api/settings.py +53 -0
- agno/api/team.py +22 -26
- agno/api/workflow.py +28 -0
- agno/cloud/aws/base.py +214 -0
- agno/cloud/aws/s3/__init__.py +2 -0
- agno/cloud/aws/s3/api_client.py +43 -0
- agno/cloud/aws/s3/bucket.py +195 -0
- agno/cloud/aws/s3/object.py +57 -0
- agno/compression/__init__.py +3 -0
- agno/compression/manager.py +247 -0
- agno/culture/__init__.py +3 -0
- agno/culture/manager.py +956 -0
- agno/db/__init__.py +24 -0
- agno/db/async_postgres/__init__.py +3 -0
- agno/db/base.py +946 -0
- agno/db/dynamo/__init__.py +3 -0
- agno/db/dynamo/dynamo.py +2781 -0
- agno/db/dynamo/schemas.py +442 -0
- agno/db/dynamo/utils.py +743 -0
- agno/db/firestore/__init__.py +3 -0
- agno/db/firestore/firestore.py +2379 -0
- agno/db/firestore/schemas.py +181 -0
- agno/db/firestore/utils.py +376 -0
- agno/db/gcs_json/__init__.py +3 -0
- agno/db/gcs_json/gcs_json_db.py +1791 -0
- agno/db/gcs_json/utils.py +228 -0
- agno/db/in_memory/__init__.py +3 -0
- agno/db/in_memory/in_memory_db.py +1312 -0
- agno/db/in_memory/utils.py +230 -0
- agno/db/json/__init__.py +3 -0
- agno/db/json/json_db.py +1777 -0
- agno/db/json/utils.py +230 -0
- agno/db/migrations/manager.py +199 -0
- agno/db/migrations/v1_to_v2.py +635 -0
- agno/db/migrations/versions/v2_3_0.py +938 -0
- agno/db/mongo/__init__.py +17 -0
- agno/db/mongo/async_mongo.py +2760 -0
- agno/db/mongo/mongo.py +2597 -0
- agno/db/mongo/schemas.py +119 -0
- agno/db/mongo/utils.py +276 -0
- agno/db/mysql/__init__.py +4 -0
- agno/db/mysql/async_mysql.py +2912 -0
- agno/db/mysql/mysql.py +2923 -0
- agno/db/mysql/schemas.py +186 -0
- agno/db/mysql/utils.py +488 -0
- agno/db/postgres/__init__.py +4 -0
- agno/db/postgres/async_postgres.py +2579 -0
- agno/db/postgres/postgres.py +2870 -0
- agno/db/postgres/schemas.py +187 -0
- agno/db/postgres/utils.py +442 -0
- agno/db/redis/__init__.py +3 -0
- agno/db/redis/redis.py +2141 -0
- agno/db/redis/schemas.py +159 -0
- agno/db/redis/utils.py +346 -0
- agno/db/schemas/__init__.py +4 -0
- agno/db/schemas/culture.py +120 -0
- agno/db/schemas/evals.py +34 -0
- agno/db/schemas/knowledge.py +40 -0
- agno/db/schemas/memory.py +61 -0
- agno/db/singlestore/__init__.py +3 -0
- agno/db/singlestore/schemas.py +179 -0
- agno/db/singlestore/singlestore.py +2877 -0
- agno/db/singlestore/utils.py +384 -0
- agno/db/sqlite/__init__.py +4 -0
- agno/db/sqlite/async_sqlite.py +2911 -0
- agno/db/sqlite/schemas.py +181 -0
- agno/db/sqlite/sqlite.py +2908 -0
- agno/db/sqlite/utils.py +429 -0
- agno/db/surrealdb/__init__.py +3 -0
- agno/db/surrealdb/metrics.py +292 -0
- agno/db/surrealdb/models.py +334 -0
- agno/db/surrealdb/queries.py +71 -0
- agno/db/surrealdb/surrealdb.py +1908 -0
- agno/db/surrealdb/utils.py +147 -0
- agno/db/utils.py +118 -0
- agno/eval/__init__.py +24 -0
- agno/eval/accuracy.py +666 -276
- agno/eval/agent_as_judge.py +861 -0
- agno/eval/base.py +29 -0
- agno/eval/performance.py +779 -0
- agno/eval/reliability.py +241 -62
- agno/eval/utils.py +120 -0
- agno/exceptions.py +143 -1
- agno/filters.py +354 -0
- agno/guardrails/__init__.py +6 -0
- agno/guardrails/base.py +19 -0
- agno/guardrails/openai.py +144 -0
- agno/guardrails/pii.py +94 -0
- agno/guardrails/prompt_injection.py +52 -0
- agno/hooks/__init__.py +3 -0
- agno/hooks/decorator.py +164 -0
- agno/integrations/discord/__init__.py +3 -0
- agno/integrations/discord/client.py +203 -0
- agno/knowledge/__init__.py +5 -1
- agno/{document → knowledge}/chunking/agentic.py +22 -14
- agno/{document → knowledge}/chunking/document.py +2 -2
- agno/{document → knowledge}/chunking/fixed.py +7 -6
- agno/knowledge/chunking/markdown.py +151 -0
- agno/{document → knowledge}/chunking/recursive.py +15 -3
- agno/knowledge/chunking/row.py +39 -0
- agno/knowledge/chunking/semantic.py +91 -0
- agno/knowledge/chunking/strategy.py +165 -0
- agno/knowledge/content.py +74 -0
- agno/knowledge/document/__init__.py +5 -0
- agno/{document → knowledge/document}/base.py +12 -2
- agno/knowledge/embedder/__init__.py +5 -0
- agno/knowledge/embedder/aws_bedrock.py +343 -0
- agno/knowledge/embedder/azure_openai.py +210 -0
- agno/{embedder → knowledge/embedder}/base.py +8 -0
- agno/knowledge/embedder/cohere.py +323 -0
- agno/knowledge/embedder/fastembed.py +62 -0
- agno/{embedder → knowledge/embedder}/fireworks.py +1 -1
- agno/knowledge/embedder/google.py +258 -0
- agno/knowledge/embedder/huggingface.py +94 -0
- agno/knowledge/embedder/jina.py +182 -0
- agno/knowledge/embedder/langdb.py +22 -0
- agno/knowledge/embedder/mistral.py +206 -0
- agno/knowledge/embedder/nebius.py +13 -0
- agno/knowledge/embedder/ollama.py +154 -0
- agno/knowledge/embedder/openai.py +195 -0
- agno/knowledge/embedder/sentence_transformer.py +63 -0
- agno/{embedder → knowledge/embedder}/together.py +1 -1
- agno/knowledge/embedder/vllm.py +262 -0
- agno/knowledge/embedder/voyageai.py +165 -0
- agno/knowledge/knowledge.py +3006 -0
- agno/knowledge/reader/__init__.py +7 -0
- agno/knowledge/reader/arxiv_reader.py +81 -0
- agno/knowledge/reader/base.py +95 -0
- agno/knowledge/reader/csv_reader.py +164 -0
- agno/knowledge/reader/docx_reader.py +82 -0
- agno/knowledge/reader/field_labeled_csv_reader.py +290 -0
- agno/knowledge/reader/firecrawl_reader.py +201 -0
- agno/knowledge/reader/json_reader.py +88 -0
- agno/knowledge/reader/markdown_reader.py +137 -0
- agno/knowledge/reader/pdf_reader.py +431 -0
- agno/knowledge/reader/pptx_reader.py +101 -0
- agno/knowledge/reader/reader_factory.py +313 -0
- agno/knowledge/reader/s3_reader.py +89 -0
- agno/knowledge/reader/tavily_reader.py +193 -0
- agno/knowledge/reader/text_reader.py +127 -0
- agno/knowledge/reader/web_search_reader.py +325 -0
- agno/knowledge/reader/website_reader.py +455 -0
- agno/knowledge/reader/wikipedia_reader.py +91 -0
- agno/knowledge/reader/youtube_reader.py +78 -0
- agno/knowledge/remote_content/remote_content.py +88 -0
- agno/knowledge/reranker/__init__.py +3 -0
- agno/{reranker → knowledge/reranker}/base.py +1 -1
- agno/{reranker → knowledge/reranker}/cohere.py +2 -2
- agno/knowledge/reranker/infinity.py +195 -0
- agno/knowledge/reranker/sentence_transformer.py +54 -0
- agno/knowledge/types.py +39 -0
- agno/knowledge/utils.py +234 -0
- agno/media.py +439 -95
- agno/memory/__init__.py +16 -3
- agno/memory/manager.py +1474 -123
- agno/memory/strategies/__init__.py +15 -0
- agno/memory/strategies/base.py +66 -0
- agno/memory/strategies/summarize.py +196 -0
- agno/memory/strategies/types.py +37 -0
- agno/models/aimlapi/__init__.py +5 -0
- agno/models/aimlapi/aimlapi.py +62 -0
- agno/models/anthropic/__init__.py +4 -0
- agno/models/anthropic/claude.py +960 -496
- agno/models/aws/__init__.py +15 -0
- agno/models/aws/bedrock.py +686 -451
- agno/models/aws/claude.py +190 -183
- agno/models/azure/__init__.py +18 -1
- agno/models/azure/ai_foundry.py +489 -0
- agno/models/azure/openai_chat.py +89 -40
- agno/models/base.py +2477 -550
- agno/models/cerebras/__init__.py +12 -0
- agno/models/cerebras/cerebras.py +565 -0
- agno/models/cerebras/cerebras_openai.py +131 -0
- agno/models/cohere/__init__.py +4 -0
- agno/models/cohere/chat.py +306 -492
- agno/models/cometapi/__init__.py +5 -0
- agno/models/cometapi/cometapi.py +74 -0
- agno/models/dashscope/__init__.py +5 -0
- agno/models/dashscope/dashscope.py +90 -0
- agno/models/deepinfra/__init__.py +5 -0
- agno/models/deepinfra/deepinfra.py +45 -0
- agno/models/deepseek/__init__.py +4 -0
- agno/models/deepseek/deepseek.py +110 -9
- agno/models/fireworks/__init__.py +4 -0
- agno/models/fireworks/fireworks.py +19 -22
- agno/models/google/__init__.py +3 -7
- agno/models/google/gemini.py +1717 -662
- agno/models/google/utils.py +22 -0
- agno/models/groq/__init__.py +4 -0
- agno/models/groq/groq.py +391 -666
- agno/models/huggingface/__init__.py +4 -0
- agno/models/huggingface/huggingface.py +266 -538
- agno/models/ibm/__init__.py +5 -0
- agno/models/ibm/watsonx.py +432 -0
- agno/models/internlm/__init__.py +3 -0
- agno/models/internlm/internlm.py +20 -3
- agno/models/langdb/__init__.py +1 -0
- agno/models/langdb/langdb.py +60 -0
- agno/models/litellm/__init__.py +14 -0
- agno/models/litellm/chat.py +503 -0
- agno/models/litellm/litellm_openai.py +42 -0
- agno/models/llama_cpp/__init__.py +5 -0
- agno/models/llama_cpp/llama_cpp.py +22 -0
- agno/models/lmstudio/__init__.py +5 -0
- agno/models/lmstudio/lmstudio.py +25 -0
- agno/models/message.py +361 -39
- agno/models/meta/__init__.py +12 -0
- agno/models/meta/llama.py +502 -0
- agno/models/meta/llama_openai.py +79 -0
- agno/models/metrics.py +120 -0
- agno/models/mistral/__init__.py +4 -0
- agno/models/mistral/mistral.py +293 -393
- agno/models/nebius/__init__.py +3 -0
- agno/models/nebius/nebius.py +53 -0
- agno/models/nexus/__init__.py +3 -0
- agno/models/nexus/nexus.py +22 -0
- agno/models/nvidia/__init__.py +4 -0
- agno/models/nvidia/nvidia.py +22 -3
- agno/models/ollama/__init__.py +4 -2
- agno/models/ollama/chat.py +257 -492
- agno/models/openai/__init__.py +7 -0
- agno/models/openai/chat.py +725 -770
- agno/models/openai/like.py +16 -2
- agno/models/openai/responses.py +1121 -0
- agno/models/openrouter/__init__.py +4 -0
- agno/models/openrouter/openrouter.py +62 -5
- agno/models/perplexity/__init__.py +5 -0
- agno/models/perplexity/perplexity.py +203 -0
- agno/models/portkey/__init__.py +3 -0
- agno/models/portkey/portkey.py +82 -0
- agno/models/requesty/__init__.py +5 -0
- agno/models/requesty/requesty.py +69 -0
- agno/models/response.py +177 -7
- agno/models/sambanova/__init__.py +4 -0
- agno/models/sambanova/sambanova.py +23 -4
- agno/models/siliconflow/__init__.py +5 -0
- agno/models/siliconflow/siliconflow.py +42 -0
- agno/models/together/__init__.py +4 -0
- agno/models/together/together.py +21 -164
- agno/models/utils.py +266 -0
- agno/models/vercel/__init__.py +3 -0
- agno/models/vercel/v0.py +43 -0
- agno/models/vertexai/__init__.py +0 -1
- agno/models/vertexai/claude.py +190 -0
- agno/models/vllm/__init__.py +3 -0
- agno/models/vllm/vllm.py +83 -0
- agno/models/xai/__init__.py +2 -0
- agno/models/xai/xai.py +111 -7
- agno/os/__init__.py +3 -0
- agno/os/app.py +1027 -0
- agno/os/auth.py +244 -0
- agno/os/config.py +126 -0
- agno/os/interfaces/__init__.py +1 -0
- agno/os/interfaces/a2a/__init__.py +3 -0
- agno/os/interfaces/a2a/a2a.py +42 -0
- agno/os/interfaces/a2a/router.py +249 -0
- agno/os/interfaces/a2a/utils.py +924 -0
- agno/os/interfaces/agui/__init__.py +3 -0
- agno/os/interfaces/agui/agui.py +47 -0
- agno/os/interfaces/agui/router.py +147 -0
- agno/os/interfaces/agui/utils.py +574 -0
- agno/os/interfaces/base.py +25 -0
- agno/os/interfaces/slack/__init__.py +3 -0
- agno/os/interfaces/slack/router.py +148 -0
- agno/os/interfaces/slack/security.py +30 -0
- agno/os/interfaces/slack/slack.py +47 -0
- agno/os/interfaces/whatsapp/__init__.py +3 -0
- agno/os/interfaces/whatsapp/router.py +210 -0
- agno/os/interfaces/whatsapp/security.py +55 -0
- agno/os/interfaces/whatsapp/whatsapp.py +36 -0
- agno/os/mcp.py +293 -0
- agno/os/middleware/__init__.py +9 -0
- agno/os/middleware/jwt.py +797 -0
- agno/os/router.py +258 -0
- agno/os/routers/__init__.py +3 -0
- agno/os/routers/agents/__init__.py +3 -0
- agno/os/routers/agents/router.py +599 -0
- agno/os/routers/agents/schema.py +261 -0
- agno/os/routers/evals/__init__.py +3 -0
- agno/os/routers/evals/evals.py +450 -0
- agno/os/routers/evals/schemas.py +174 -0
- agno/os/routers/evals/utils.py +231 -0
- agno/os/routers/health.py +31 -0
- agno/os/routers/home.py +52 -0
- agno/os/routers/knowledge/__init__.py +3 -0
- agno/os/routers/knowledge/knowledge.py +1008 -0
- agno/os/routers/knowledge/schemas.py +178 -0
- agno/os/routers/memory/__init__.py +3 -0
- agno/os/routers/memory/memory.py +661 -0
- agno/os/routers/memory/schemas.py +88 -0
- agno/os/routers/metrics/__init__.py +3 -0
- agno/os/routers/metrics/metrics.py +190 -0
- agno/os/routers/metrics/schemas.py +47 -0
- agno/os/routers/session/__init__.py +3 -0
- agno/os/routers/session/session.py +997 -0
- agno/os/routers/teams/__init__.py +3 -0
- agno/os/routers/teams/router.py +512 -0
- agno/os/routers/teams/schema.py +257 -0
- agno/os/routers/traces/__init__.py +3 -0
- agno/os/routers/traces/schemas.py +414 -0
- agno/os/routers/traces/traces.py +499 -0
- agno/os/routers/workflows/__init__.py +3 -0
- agno/os/routers/workflows/router.py +624 -0
- agno/os/routers/workflows/schema.py +75 -0
- agno/os/schema.py +534 -0
- agno/os/scopes.py +469 -0
- agno/{playground → os}/settings.py +7 -15
- agno/os/utils.py +973 -0
- agno/reasoning/anthropic.py +80 -0
- agno/reasoning/azure_ai_foundry.py +67 -0
- agno/reasoning/deepseek.py +63 -0
- agno/reasoning/default.py +97 -0
- agno/reasoning/gemini.py +73 -0
- agno/reasoning/groq.py +71 -0
- agno/reasoning/helpers.py +24 -1
- agno/reasoning/ollama.py +67 -0
- agno/reasoning/openai.py +86 -0
- agno/reasoning/step.py +2 -1
- agno/reasoning/vertexai.py +76 -0
- agno/run/__init__.py +6 -0
- agno/run/agent.py +822 -0
- agno/run/base.py +247 -0
- agno/run/cancel.py +81 -0
- agno/run/requirement.py +181 -0
- agno/run/team.py +767 -0
- agno/run/workflow.py +708 -0
- agno/session/__init__.py +10 -0
- agno/session/agent.py +260 -0
- agno/session/summary.py +265 -0
- agno/session/team.py +342 -0
- agno/session/workflow.py +501 -0
- agno/table.py +10 -0
- agno/team/__init__.py +37 -0
- agno/team/team.py +9536 -0
- agno/tools/__init__.py +7 -0
- agno/tools/agentql.py +120 -0
- agno/tools/airflow.py +22 -12
- agno/tools/api.py +122 -0
- agno/tools/apify.py +276 -83
- agno/tools/{arxiv_toolkit.py → arxiv.py} +20 -12
- agno/tools/aws_lambda.py +28 -7
- agno/tools/aws_ses.py +66 -0
- agno/tools/baidusearch.py +11 -4
- agno/tools/bitbucket.py +292 -0
- agno/tools/brandfetch.py +213 -0
- agno/tools/bravesearch.py +106 -0
- agno/tools/brightdata.py +367 -0
- agno/tools/browserbase.py +209 -0
- agno/tools/calcom.py +32 -23
- agno/tools/calculator.py +24 -37
- agno/tools/cartesia.py +187 -0
- agno/tools/{clickup_tool.py → clickup.py} +17 -28
- agno/tools/confluence.py +91 -26
- agno/tools/crawl4ai.py +139 -43
- agno/tools/csv_toolkit.py +28 -22
- agno/tools/dalle.py +36 -22
- agno/tools/daytona.py +475 -0
- agno/tools/decorator.py +169 -14
- agno/tools/desi_vocal.py +23 -11
- agno/tools/discord.py +32 -29
- agno/tools/docker.py +716 -0
- agno/tools/duckdb.py +76 -81
- agno/tools/duckduckgo.py +43 -40
- agno/tools/e2b.py +703 -0
- agno/tools/eleven_labs.py +65 -54
- agno/tools/email.py +13 -5
- agno/tools/evm.py +129 -0
- agno/tools/exa.py +324 -42
- agno/tools/fal.py +39 -35
- agno/tools/file.py +196 -30
- agno/tools/file_generation.py +356 -0
- agno/tools/financial_datasets.py +288 -0
- agno/tools/firecrawl.py +108 -33
- agno/tools/function.py +960 -122
- agno/tools/giphy.py +34 -12
- agno/tools/github.py +1294 -97
- agno/tools/gmail.py +922 -0
- agno/tools/google_bigquery.py +117 -0
- agno/tools/google_drive.py +271 -0
- agno/tools/google_maps.py +253 -0
- agno/tools/googlecalendar.py +607 -107
- agno/tools/googlesheets.py +377 -0
- agno/tools/hackernews.py +20 -12
- agno/tools/jina.py +24 -14
- agno/tools/jira.py +48 -19
- agno/tools/knowledge.py +218 -0
- agno/tools/linear.py +82 -43
- agno/tools/linkup.py +58 -0
- agno/tools/local_file_system.py +15 -7
- agno/tools/lumalab.py +41 -26
- agno/tools/mcp/__init__.py +10 -0
- agno/tools/mcp/mcp.py +331 -0
- agno/tools/mcp/multi_mcp.py +347 -0
- agno/tools/mcp/params.py +24 -0
- agno/tools/mcp_toolbox.py +284 -0
- agno/tools/mem0.py +193 -0
- agno/tools/memory.py +419 -0
- agno/tools/mlx_transcribe.py +11 -9
- agno/tools/models/azure_openai.py +190 -0
- agno/tools/models/gemini.py +203 -0
- agno/tools/models/groq.py +158 -0
- agno/tools/models/morph.py +186 -0
- agno/tools/models/nebius.py +124 -0
- agno/tools/models_labs.py +163 -82
- agno/tools/moviepy_video.py +18 -13
- agno/tools/nano_banana.py +151 -0
- agno/tools/neo4j.py +134 -0
- agno/tools/newspaper.py +15 -4
- agno/tools/newspaper4k.py +19 -6
- agno/tools/notion.py +204 -0
- agno/tools/openai.py +181 -17
- agno/tools/openbb.py +27 -20
- agno/tools/opencv.py +321 -0
- agno/tools/openweather.py +233 -0
- agno/tools/oxylabs.py +385 -0
- agno/tools/pandas.py +25 -15
- agno/tools/parallel.py +314 -0
- agno/tools/postgres.py +238 -185
- agno/tools/pubmed.py +125 -13
- agno/tools/python.py +48 -35
- agno/tools/reasoning.py +283 -0
- agno/tools/reddit.py +207 -29
- agno/tools/redshift.py +406 -0
- agno/tools/replicate.py +69 -26
- agno/tools/resend.py +11 -6
- agno/tools/scrapegraph.py +179 -19
- agno/tools/searxng.py +23 -31
- agno/tools/serpapi.py +15 -10
- agno/tools/serper.py +255 -0
- agno/tools/shell.py +23 -12
- agno/tools/shopify.py +1519 -0
- agno/tools/slack.py +56 -14
- agno/tools/sleep.py +8 -6
- agno/tools/spider.py +35 -11
- agno/tools/spotify.py +919 -0
- agno/tools/sql.py +34 -19
- agno/tools/tavily.py +158 -8
- agno/tools/telegram.py +18 -8
- agno/tools/todoist.py +218 -0
- agno/tools/toolkit.py +134 -9
- agno/tools/trafilatura.py +388 -0
- agno/tools/trello.py +25 -28
- agno/tools/twilio.py +18 -9
- agno/tools/user_control_flow.py +78 -0
- agno/tools/valyu.py +228 -0
- agno/tools/visualization.py +467 -0
- agno/tools/webbrowser.py +28 -0
- agno/tools/webex.py +76 -0
- agno/tools/website.py +23 -19
- agno/tools/webtools.py +45 -0
- agno/tools/whatsapp.py +286 -0
- agno/tools/wikipedia.py +28 -19
- agno/tools/workflow.py +285 -0
- agno/tools/{twitter.py → x.py} +142 -46
- agno/tools/yfinance.py +41 -39
- agno/tools/youtube.py +34 -17
- agno/tools/zendesk.py +15 -5
- agno/tools/zep.py +454 -0
- agno/tools/zoom.py +86 -37
- agno/tracing/__init__.py +12 -0
- agno/tracing/exporter.py +157 -0
- agno/tracing/schemas.py +276 -0
- agno/tracing/setup.py +111 -0
- agno/utils/agent.py +938 -0
- agno/utils/audio.py +37 -1
- agno/utils/certs.py +27 -0
- agno/utils/code_execution.py +11 -0
- agno/utils/common.py +103 -20
- agno/utils/cryptography.py +22 -0
- agno/utils/dttm.py +33 -0
- agno/utils/events.py +700 -0
- agno/utils/functions.py +107 -37
- agno/utils/gemini.py +426 -0
- agno/utils/hooks.py +171 -0
- agno/utils/http.py +185 -0
- agno/utils/json_schema.py +159 -37
- agno/utils/knowledge.py +36 -0
- agno/utils/location.py +19 -0
- agno/utils/log.py +221 -8
- agno/utils/mcp.py +214 -0
- agno/utils/media.py +335 -14
- agno/utils/merge_dict.py +22 -1
- agno/utils/message.py +77 -2
- agno/utils/models/ai_foundry.py +50 -0
- agno/utils/models/claude.py +373 -0
- agno/utils/models/cohere.py +94 -0
- agno/utils/models/llama.py +85 -0
- agno/utils/models/mistral.py +100 -0
- agno/utils/models/openai_responses.py +140 -0
- agno/utils/models/schema_utils.py +153 -0
- agno/utils/models/watsonx.py +41 -0
- agno/utils/openai.py +257 -0
- agno/utils/pickle.py +1 -1
- agno/utils/pprint.py +124 -8
- agno/utils/print_response/agent.py +930 -0
- agno/utils/print_response/team.py +1914 -0
- agno/utils/print_response/workflow.py +1668 -0
- agno/utils/prompts.py +111 -0
- agno/utils/reasoning.py +108 -0
- agno/utils/response.py +163 -0
- agno/utils/serialize.py +32 -0
- agno/utils/shell.py +4 -4
- agno/utils/streamlit.py +487 -0
- agno/utils/string.py +204 -51
- agno/utils/team.py +139 -0
- agno/utils/timer.py +9 -2
- agno/utils/tokens.py +657 -0
- agno/utils/tools.py +19 -1
- agno/utils/whatsapp.py +305 -0
- agno/utils/yaml_io.py +3 -3
- agno/vectordb/__init__.py +2 -0
- agno/vectordb/base.py +87 -9
- agno/vectordb/cassandra/__init__.py +5 -1
- agno/vectordb/cassandra/cassandra.py +383 -27
- agno/vectordb/chroma/__init__.py +4 -0
- agno/vectordb/chroma/chromadb.py +748 -83
- agno/vectordb/clickhouse/__init__.py +7 -1
- agno/vectordb/clickhouse/clickhousedb.py +554 -53
- agno/vectordb/couchbase/__init__.py +3 -0
- agno/vectordb/couchbase/couchbase.py +1446 -0
- agno/vectordb/lancedb/__init__.py +5 -0
- agno/vectordb/lancedb/lance_db.py +730 -98
- agno/vectordb/langchaindb/__init__.py +5 -0
- agno/vectordb/langchaindb/langchaindb.py +163 -0
- agno/vectordb/lightrag/__init__.py +5 -0
- agno/vectordb/lightrag/lightrag.py +388 -0
- agno/vectordb/llamaindex/__init__.py +3 -0
- agno/vectordb/llamaindex/llamaindexdb.py +166 -0
- agno/vectordb/milvus/__init__.py +3 -0
- agno/vectordb/milvus/milvus.py +966 -78
- agno/vectordb/mongodb/__init__.py +9 -1
- agno/vectordb/mongodb/mongodb.py +1175 -172
- agno/vectordb/pgvector/__init__.py +8 -0
- agno/vectordb/pgvector/pgvector.py +599 -115
- agno/vectordb/pineconedb/__init__.py +5 -1
- agno/vectordb/pineconedb/pineconedb.py +406 -43
- agno/vectordb/qdrant/__init__.py +4 -0
- agno/vectordb/qdrant/qdrant.py +914 -61
- agno/vectordb/redis/__init__.py +9 -0
- agno/vectordb/redis/redisdb.py +682 -0
- agno/vectordb/singlestore/__init__.py +8 -1
- agno/vectordb/singlestore/singlestore.py +771 -0
- agno/vectordb/surrealdb/__init__.py +3 -0
- agno/vectordb/surrealdb/surrealdb.py +663 -0
- agno/vectordb/upstashdb/__init__.py +5 -0
- agno/vectordb/upstashdb/upstashdb.py +718 -0
- agno/vectordb/weaviate/__init__.py +8 -0
- agno/vectordb/weaviate/index.py +15 -0
- agno/vectordb/weaviate/weaviate.py +1009 -0
- agno/workflow/__init__.py +23 -1
- agno/workflow/agent.py +299 -0
- agno/workflow/condition.py +759 -0
- agno/workflow/loop.py +756 -0
- agno/workflow/parallel.py +853 -0
- agno/workflow/router.py +723 -0
- agno/workflow/step.py +1564 -0
- agno/workflow/steps.py +613 -0
- agno/workflow/types.py +556 -0
- agno/workflow/workflow.py +4327 -514
- agno-2.3.13.dist-info/METADATA +639 -0
- agno-2.3.13.dist-info/RECORD +613 -0
- {agno-0.1.2.dist-info → agno-2.3.13.dist-info}/WHEEL +1 -1
- agno-2.3.13.dist-info/licenses/LICENSE +201 -0
- agno/api/playground.py +0 -91
- agno/api/schemas/playground.py +0 -22
- agno/api/schemas/user.py +0 -22
- agno/api/schemas/workspace.py +0 -46
- agno/api/user.py +0 -160
- agno/api/workspace.py +0 -151
- agno/cli/auth_server.py +0 -118
- agno/cli/config.py +0 -275
- agno/cli/console.py +0 -88
- agno/cli/credentials.py +0 -23
- agno/cli/entrypoint.py +0 -571
- agno/cli/operator.py +0 -355
- agno/cli/settings.py +0 -85
- agno/cli/ws/ws_cli.py +0 -817
- agno/constants.py +0 -13
- agno/document/__init__.py +0 -1
- agno/document/chunking/semantic.py +0 -47
- agno/document/chunking/strategy.py +0 -31
- agno/document/reader/__init__.py +0 -1
- agno/document/reader/arxiv_reader.py +0 -41
- agno/document/reader/base.py +0 -22
- agno/document/reader/csv_reader.py +0 -84
- agno/document/reader/docx_reader.py +0 -46
- agno/document/reader/firecrawl_reader.py +0 -99
- agno/document/reader/json_reader.py +0 -43
- agno/document/reader/pdf_reader.py +0 -219
- agno/document/reader/s3/pdf_reader.py +0 -46
- agno/document/reader/s3/text_reader.py +0 -51
- agno/document/reader/text_reader.py +0 -41
- agno/document/reader/website_reader.py +0 -175
- agno/document/reader/youtube_reader.py +0 -50
- agno/embedder/__init__.py +0 -1
- agno/embedder/azure_openai.py +0 -86
- agno/embedder/cohere.py +0 -72
- agno/embedder/fastembed.py +0 -37
- agno/embedder/google.py +0 -73
- agno/embedder/huggingface.py +0 -54
- agno/embedder/mistral.py +0 -80
- agno/embedder/ollama.py +0 -57
- agno/embedder/openai.py +0 -74
- agno/embedder/sentence_transformer.py +0 -38
- agno/embedder/voyageai.py +0 -64
- agno/eval/perf.py +0 -201
- agno/file/__init__.py +0 -1
- agno/file/file.py +0 -16
- agno/file/local/csv.py +0 -32
- agno/file/local/txt.py +0 -19
- agno/infra/app.py +0 -240
- agno/infra/base.py +0 -144
- agno/infra/context.py +0 -20
- agno/infra/db_app.py +0 -52
- agno/infra/resource.py +0 -205
- agno/infra/resources.py +0 -55
- agno/knowledge/agent.py +0 -230
- agno/knowledge/arxiv.py +0 -22
- agno/knowledge/combined.py +0 -22
- agno/knowledge/csv.py +0 -28
- agno/knowledge/csv_url.py +0 -19
- agno/knowledge/document.py +0 -20
- agno/knowledge/docx.py +0 -30
- agno/knowledge/json.py +0 -28
- agno/knowledge/langchain.py +0 -71
- agno/knowledge/llamaindex.py +0 -66
- agno/knowledge/pdf.py +0 -28
- agno/knowledge/pdf_url.py +0 -26
- agno/knowledge/s3/base.py +0 -60
- agno/knowledge/s3/pdf.py +0 -21
- agno/knowledge/s3/text.py +0 -23
- agno/knowledge/text.py +0 -30
- agno/knowledge/website.py +0 -88
- agno/knowledge/wikipedia.py +0 -31
- agno/knowledge/youtube.py +0 -22
- agno/memory/agent.py +0 -392
- agno/memory/classifier.py +0 -104
- agno/memory/db/__init__.py +0 -1
- agno/memory/db/base.py +0 -42
- agno/memory/db/mongodb.py +0 -189
- agno/memory/db/postgres.py +0 -203
- agno/memory/db/sqlite.py +0 -193
- agno/memory/memory.py +0 -15
- agno/memory/row.py +0 -36
- agno/memory/summarizer.py +0 -192
- agno/memory/summary.py +0 -19
- agno/memory/workflow.py +0 -38
- agno/models/google/gemini_openai.py +0 -26
- agno/models/ollama/hermes.py +0 -221
- agno/models/ollama/tools.py +0 -362
- agno/models/vertexai/gemini.py +0 -595
- agno/playground/__init__.py +0 -3
- agno/playground/async_router.py +0 -421
- agno/playground/deploy.py +0 -249
- agno/playground/operator.py +0 -92
- agno/playground/playground.py +0 -91
- agno/playground/schemas.py +0 -76
- agno/playground/serve.py +0 -55
- agno/playground/sync_router.py +0 -405
- agno/reasoning/agent.py +0 -68
- agno/run/response.py +0 -112
- agno/storage/agent/__init__.py +0 -0
- agno/storage/agent/base.py +0 -38
- agno/storage/agent/dynamodb.py +0 -350
- agno/storage/agent/json.py +0 -92
- agno/storage/agent/mongodb.py +0 -228
- agno/storage/agent/postgres.py +0 -367
- agno/storage/agent/session.py +0 -79
- agno/storage/agent/singlestore.py +0 -303
- agno/storage/agent/sqlite.py +0 -357
- agno/storage/agent/yaml.py +0 -93
- agno/storage/workflow/__init__.py +0 -0
- agno/storage/workflow/base.py +0 -40
- agno/storage/workflow/mongodb.py +0 -233
- agno/storage/workflow/postgres.py +0 -366
- agno/storage/workflow/session.py +0 -60
- agno/storage/workflow/sqlite.py +0 -359
- agno/tools/googlesearch.py +0 -88
- agno/utils/defaults.py +0 -57
- agno/utils/filesystem.py +0 -39
- agno/utils/git.py +0 -52
- agno/utils/json_io.py +0 -30
- agno/utils/load_env.py +0 -19
- agno/utils/py_io.py +0 -19
- agno/utils/pyproject.py +0 -18
- agno/utils/resource_filter.py +0 -31
- agno/vectordb/singlestore/s2vectordb.py +0 -390
- agno/vectordb/singlestore/s2vectordb2.py +0 -355
- agno/workspace/__init__.py +0 -0
- agno/workspace/config.py +0 -325
- agno/workspace/enums.py +0 -6
- agno/workspace/helpers.py +0 -48
- agno/workspace/operator.py +0 -758
- agno/workspace/settings.py +0 -63
- agno-0.1.2.dist-info/LICENSE +0 -375
- agno-0.1.2.dist-info/METADATA +0 -502
- agno-0.1.2.dist-info/RECORD +0 -352
- agno-0.1.2.dist-info/entry_points.txt +0 -3
- /agno/{cli → db/migrations}/__init__.py +0 -0
- /agno/{cli/ws → db/migrations/versions}/__init__.py +0 -0
- /agno/{document/chunking/__init__.py → db/schemas/metrics.py} +0 -0
- /agno/{document/reader/s3 → integrations}/__init__.py +0 -0
- /agno/{file/local → knowledge/chunking}/__init__.py +0 -0
- /agno/{infra → knowledge/remote_content}/__init__.py +0 -0
- /agno/{knowledge/s3 → tools/models}/__init__.py +0 -0
- /agno/{reranker → utils/models}/__init__.py +0 -0
- /agno/{storage → utils/print_response}/__init__.py +0 -0
- {agno-0.1.2.dist-info → agno-2.3.13.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,127 @@
|
|
|
1
|
+
import asyncio
|
|
2
|
+
import uuid
|
|
3
|
+
from pathlib import Path
|
|
4
|
+
from typing import IO, Any, List, Optional, Union
|
|
5
|
+
|
|
6
|
+
from agno.knowledge.chunking.fixed import FixedSizeChunking
|
|
7
|
+
from agno.knowledge.chunking.strategy import ChunkingStrategy, ChunkingStrategyType
|
|
8
|
+
from agno.knowledge.document.base import Document
|
|
9
|
+
from agno.knowledge.reader.base import Reader
|
|
10
|
+
from agno.knowledge.types import ContentType
|
|
11
|
+
from agno.utils.log import log_debug, log_error, log_warning
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
class TextReader(Reader):
|
|
15
|
+
"""Reader for Text files"""
|
|
16
|
+
|
|
17
|
+
def __init__(self, chunking_strategy: Optional[ChunkingStrategy] = FixedSizeChunking(), **kwargs):
|
|
18
|
+
super().__init__(chunking_strategy=chunking_strategy, **kwargs)
|
|
19
|
+
|
|
20
|
+
@classmethod
|
|
21
|
+
def get_supported_chunking_strategies(self) -> List[ChunkingStrategyType]:
|
|
22
|
+
"""Get the list of supported chunking strategies for Text readers."""
|
|
23
|
+
return [
|
|
24
|
+
ChunkingStrategyType.FIXED_SIZE_CHUNKER,
|
|
25
|
+
ChunkingStrategyType.AGENTIC_CHUNKER,
|
|
26
|
+
ChunkingStrategyType.DOCUMENT_CHUNKER,
|
|
27
|
+
ChunkingStrategyType.RECURSIVE_CHUNKER,
|
|
28
|
+
ChunkingStrategyType.SEMANTIC_CHUNKER,
|
|
29
|
+
]
|
|
30
|
+
|
|
31
|
+
@classmethod
|
|
32
|
+
def get_supported_content_types(self) -> List[ContentType]:
|
|
33
|
+
return [ContentType.TXT]
|
|
34
|
+
|
|
35
|
+
def read(self, file: Union[Path, IO[Any]], name: Optional[str] = None) -> List[Document]:
|
|
36
|
+
try:
|
|
37
|
+
if isinstance(file, Path):
|
|
38
|
+
if not file.exists():
|
|
39
|
+
raise FileNotFoundError(f"Could not find file: {file}")
|
|
40
|
+
log_debug(f"Reading: {file}")
|
|
41
|
+
file_name = name or file.stem
|
|
42
|
+
file_contents = file.read_text(self.encoding or "utf-8")
|
|
43
|
+
else:
|
|
44
|
+
# Handle BytesIO and other file-like objects that may not have a name attribute
|
|
45
|
+
if name:
|
|
46
|
+
file_name = name
|
|
47
|
+
elif hasattr(file, "name") and file.name is not None:
|
|
48
|
+
file_name = file.name.split(".")[0]
|
|
49
|
+
else:
|
|
50
|
+
file_name = "text_file"
|
|
51
|
+
log_debug(f"Reading uploaded file: {file_name}")
|
|
52
|
+
file.seek(0)
|
|
53
|
+
file_contents = file.read().decode(self.encoding or "utf-8")
|
|
54
|
+
|
|
55
|
+
documents = [
|
|
56
|
+
Document(
|
|
57
|
+
name=file_name,
|
|
58
|
+
id=str(uuid.uuid4()),
|
|
59
|
+
content=file_contents,
|
|
60
|
+
)
|
|
61
|
+
]
|
|
62
|
+
if self.chunk:
|
|
63
|
+
chunked_documents = []
|
|
64
|
+
for document in documents:
|
|
65
|
+
chunked_documents.extend(self.chunk_document(document))
|
|
66
|
+
return chunked_documents
|
|
67
|
+
return documents
|
|
68
|
+
except Exception as e:
|
|
69
|
+
log_error(f"Error reading: {file}: {e}")
|
|
70
|
+
return []
|
|
71
|
+
|
|
72
|
+
async def async_read(self, file: Union[Path, IO[Any]], name: Optional[str] = None) -> List[Document]:
|
|
73
|
+
try:
|
|
74
|
+
if isinstance(file, Path):
|
|
75
|
+
if not file.exists():
|
|
76
|
+
raise FileNotFoundError(f"Could not find file: {file}")
|
|
77
|
+
|
|
78
|
+
log_debug(f"Reading asynchronously: {file}")
|
|
79
|
+
file_name = name or file.stem
|
|
80
|
+
|
|
81
|
+
try:
|
|
82
|
+
import aiofiles
|
|
83
|
+
|
|
84
|
+
async with aiofiles.open(file, "r", encoding=self.encoding or "utf-8") as f:
|
|
85
|
+
file_contents = await f.read()
|
|
86
|
+
except ImportError:
|
|
87
|
+
log_warning("aiofiles not installed, using synchronous file I/O")
|
|
88
|
+
file_contents = file.read_text(self.encoding or "utf-8")
|
|
89
|
+
else:
|
|
90
|
+
# Handle BytesIO and other file-like objects that may not have a name attribute
|
|
91
|
+
if name:
|
|
92
|
+
file_name = name
|
|
93
|
+
elif hasattr(file, "name") and file.name is not None:
|
|
94
|
+
file_name = file.name.split(".")[0]
|
|
95
|
+
else:
|
|
96
|
+
file_name = "text_file"
|
|
97
|
+
log_debug(f"Reading uploaded file asynchronously: {file_name}")
|
|
98
|
+
file.seek(0)
|
|
99
|
+
file_contents = file.read().decode(self.encoding or "utf-8")
|
|
100
|
+
|
|
101
|
+
document = Document(
|
|
102
|
+
name=file_name,
|
|
103
|
+
id=str(uuid.uuid4()),
|
|
104
|
+
content=file_contents,
|
|
105
|
+
)
|
|
106
|
+
|
|
107
|
+
if self.chunk:
|
|
108
|
+
return await self._async_chunk_document(document)
|
|
109
|
+
return [document]
|
|
110
|
+
except Exception as e:
|
|
111
|
+
log_error(f"Error reading asynchronously: {file}: {e}")
|
|
112
|
+
return []
|
|
113
|
+
|
|
114
|
+
async def _async_chunk_document(self, document: Document) -> List[Document]:
|
|
115
|
+
if not self.chunk or not document:
|
|
116
|
+
return [document]
|
|
117
|
+
|
|
118
|
+
async def process_chunk(chunk_doc: Document) -> Document:
|
|
119
|
+
return chunk_doc
|
|
120
|
+
|
|
121
|
+
chunked_documents = self.chunk_document(document)
|
|
122
|
+
|
|
123
|
+
if not chunked_documents:
|
|
124
|
+
return [document]
|
|
125
|
+
|
|
126
|
+
tasks = [process_chunk(chunk_doc) for chunk_doc in chunked_documents]
|
|
127
|
+
return await asyncio.gather(*tasks)
|
|
@@ -0,0 +1,325 @@
|
|
|
1
|
+
import asyncio
|
|
2
|
+
import random
|
|
3
|
+
import time
|
|
4
|
+
from dataclasses import dataclass, field
|
|
5
|
+
from typing import Dict, List, Literal, Optional, Set
|
|
6
|
+
from urllib.parse import urlparse
|
|
7
|
+
|
|
8
|
+
import httpx
|
|
9
|
+
|
|
10
|
+
from agno.knowledge.chunking.semantic import SemanticChunking
|
|
11
|
+
from agno.knowledge.chunking.strategy import ChunkingStrategy, ChunkingStrategyType
|
|
12
|
+
from agno.knowledge.document.base import Document
|
|
13
|
+
from agno.knowledge.reader.base import Reader
|
|
14
|
+
from agno.knowledge.types import ContentType
|
|
15
|
+
from agno.utils.log import log_debug, logger
|
|
16
|
+
|
|
17
|
+
try:
|
|
18
|
+
from bs4 import BeautifulSoup, Tag # noqa: F401
|
|
19
|
+
except ImportError:
|
|
20
|
+
raise ImportError("The `bs4` package is not installed. Please install it via `pip install beautifulsoup4`.")
|
|
21
|
+
|
|
22
|
+
try:
|
|
23
|
+
from ddgs import DDGS
|
|
24
|
+
except ImportError:
|
|
25
|
+
raise ImportError("The `ddgs` package is not installed. Please install it via `pip install ddgs`.")
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
@dataclass
|
|
29
|
+
class WebSearchReader(Reader):
|
|
30
|
+
"""Reader that uses web search to find content for a given query"""
|
|
31
|
+
|
|
32
|
+
search_timeout: int = 10
|
|
33
|
+
|
|
34
|
+
request_timeout: int = 30
|
|
35
|
+
delay_between_requests: float = 2.0 # Increased default delay
|
|
36
|
+
max_retries: int = 3
|
|
37
|
+
user_agent: str = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36"
|
|
38
|
+
|
|
39
|
+
# Search engine configuration
|
|
40
|
+
search_engine: Literal["duckduckgo"] = "duckduckgo"
|
|
41
|
+
search_delay: float = 3.0 # Delay between search requests
|
|
42
|
+
max_search_retries: int = 2 # Retries for search operations
|
|
43
|
+
|
|
44
|
+
# Rate limiting
|
|
45
|
+
rate_limit_delay: float = 5.0 # Delay when rate limited
|
|
46
|
+
exponential_backoff: bool = True
|
|
47
|
+
|
|
48
|
+
# Internal state
|
|
49
|
+
_visited_urls: Set[str] = field(default_factory=set)
|
|
50
|
+
_last_search_time: float = field(default=0.0, init=False)
|
|
51
|
+
|
|
52
|
+
# Override default chunking strategy
|
|
53
|
+
chunking_strategy: Optional[ChunkingStrategy] = SemanticChunking()
|
|
54
|
+
|
|
55
|
+
@classmethod
|
|
56
|
+
def get_supported_chunking_strategies(self) -> List[ChunkingStrategyType]:
|
|
57
|
+
"""Get the list of supported chunking strategies for Web Search readers."""
|
|
58
|
+
return [
|
|
59
|
+
ChunkingStrategyType.AGENTIC_CHUNKER,
|
|
60
|
+
ChunkingStrategyType.DOCUMENT_CHUNKER,
|
|
61
|
+
ChunkingStrategyType.RECURSIVE_CHUNKER,
|
|
62
|
+
ChunkingStrategyType.SEMANTIC_CHUNKER,
|
|
63
|
+
ChunkingStrategyType.FIXED_SIZE_CHUNKER,
|
|
64
|
+
]
|
|
65
|
+
|
|
66
|
+
@classmethod
|
|
67
|
+
def get_supported_content_types(self) -> List[ContentType]:
|
|
68
|
+
return [ContentType.TOPIC]
|
|
69
|
+
|
|
70
|
+
def _respect_rate_limits(self):
|
|
71
|
+
"""Ensure we don't exceed rate limits"""
|
|
72
|
+
current_time = time.time()
|
|
73
|
+
time_since_last_search = current_time - self._last_search_time
|
|
74
|
+
|
|
75
|
+
if time_since_last_search < self.search_delay:
|
|
76
|
+
sleep_time = self.search_delay - time_since_last_search
|
|
77
|
+
log_debug(f"Rate limiting: sleeping for {sleep_time:.2f} seconds")
|
|
78
|
+
time.sleep(sleep_time)
|
|
79
|
+
|
|
80
|
+
self._last_search_time = time.time()
|
|
81
|
+
|
|
82
|
+
def _perform_duckduckgo_search(self, query: str) -> List[Dict[str, str]]:
|
|
83
|
+
"""Perform web search using DuckDuckGo with rate limiting"""
|
|
84
|
+
log_debug(f"Performing DuckDuckGo search for: {query}")
|
|
85
|
+
|
|
86
|
+
for attempt in range(self.max_search_retries):
|
|
87
|
+
try:
|
|
88
|
+
self._respect_rate_limits()
|
|
89
|
+
|
|
90
|
+
ddgs = DDGS(timeout=self.search_timeout)
|
|
91
|
+
search_results = ddgs.text(query=query, max_results=self.max_results)
|
|
92
|
+
|
|
93
|
+
# Convert to list and extract relevant fields
|
|
94
|
+
results = []
|
|
95
|
+
for result in search_results:
|
|
96
|
+
results.append(
|
|
97
|
+
{
|
|
98
|
+
"title": result.get("title", ""),
|
|
99
|
+
"url": result.get("href", ""),
|
|
100
|
+
"description": result.get("body", ""),
|
|
101
|
+
}
|
|
102
|
+
)
|
|
103
|
+
|
|
104
|
+
log_debug(f"Found {len(results)} search results")
|
|
105
|
+
return results
|
|
106
|
+
|
|
107
|
+
except Exception as e:
|
|
108
|
+
logger.warning(f"DuckDuckGo search attempt {attempt + 1} failed: {e}")
|
|
109
|
+
if "rate limit" in str(e).lower() or "429" in str(e):
|
|
110
|
+
# Rate limited - wait longer
|
|
111
|
+
wait_time = (
|
|
112
|
+
self.rate_limit_delay * (2**attempt) if self.exponential_backoff else self.rate_limit_delay
|
|
113
|
+
)
|
|
114
|
+
logger.info(f"Rate limited, waiting {wait_time} seconds before retry")
|
|
115
|
+
time.sleep(wait_time)
|
|
116
|
+
elif attempt < self.max_search_retries - 1:
|
|
117
|
+
# Other error - shorter wait
|
|
118
|
+
time.sleep(self.search_delay)
|
|
119
|
+
else:
|
|
120
|
+
logger.error(f"All DuckDuckGo search attempts failed: {e}")
|
|
121
|
+
return []
|
|
122
|
+
return []
|
|
123
|
+
|
|
124
|
+
def _perform_web_search(self, query: str) -> List[Dict[str, str]]:
|
|
125
|
+
"""Perform web search using the configured search engine"""
|
|
126
|
+
if self.search_engine == "duckduckgo":
|
|
127
|
+
return self._perform_duckduckgo_search(query)
|
|
128
|
+
else:
|
|
129
|
+
logger.error(f"Unsupported search engine: {self.search_engine}")
|
|
130
|
+
return []
|
|
131
|
+
|
|
132
|
+
def _is_valid_url(self, url: str) -> bool:
|
|
133
|
+
"""Check if URL is valid and not already visited"""
|
|
134
|
+
try:
|
|
135
|
+
parsed = urlparse(url)
|
|
136
|
+
return bool(parsed.scheme in ["http", "https"] and parsed.netloc and url not in self._visited_urls)
|
|
137
|
+
except Exception:
|
|
138
|
+
return False
|
|
139
|
+
|
|
140
|
+
def _extract_text_from_html(self, html_content: str, url: str) -> str:
|
|
141
|
+
"""Extract clean text content from HTML"""
|
|
142
|
+
try:
|
|
143
|
+
soup = BeautifulSoup(html_content, "html.parser")
|
|
144
|
+
|
|
145
|
+
# Remove script and style elements
|
|
146
|
+
for script in soup(["script", "style"]):
|
|
147
|
+
script.decompose()
|
|
148
|
+
|
|
149
|
+
# Get text content
|
|
150
|
+
text = soup.get_text()
|
|
151
|
+
|
|
152
|
+
# Clean up whitespace
|
|
153
|
+
lines = (line.strip() for line in text.splitlines())
|
|
154
|
+
chunks = (phrase.strip() for line in lines for phrase in line.split(" "))
|
|
155
|
+
text = " ".join(chunk for chunk in chunks if chunk)
|
|
156
|
+
|
|
157
|
+
return text
|
|
158
|
+
|
|
159
|
+
except Exception as e:
|
|
160
|
+
logger.warning(f"Error extracting text from {url}: {e}")
|
|
161
|
+
return html_content
|
|
162
|
+
|
|
163
|
+
def _fetch_url_content(self, url: str) -> Optional[str]:
|
|
164
|
+
"""Fetch content from a URL with retry logic"""
|
|
165
|
+
headers = {"User-Agent": self.user_agent}
|
|
166
|
+
|
|
167
|
+
for attempt in range(self.max_retries):
|
|
168
|
+
try:
|
|
169
|
+
response = httpx.get(url, headers=headers, timeout=self.request_timeout, follow_redirects=True)
|
|
170
|
+
response.raise_for_status()
|
|
171
|
+
|
|
172
|
+
# Check if it's HTML content
|
|
173
|
+
content_type = response.headers.get("content-type", "").lower()
|
|
174
|
+
if "text/html" in content_type:
|
|
175
|
+
return self._extract_text_from_html(response.text, url)
|
|
176
|
+
else:
|
|
177
|
+
# For non-HTML content, return as-is
|
|
178
|
+
return response.text
|
|
179
|
+
|
|
180
|
+
except Exception as e:
|
|
181
|
+
logger.warning(f"Attempt {attempt + 1} failed for {url}: {e}")
|
|
182
|
+
if attempt < self.max_retries - 1:
|
|
183
|
+
time.sleep(random.uniform(1, 3)) # Random delay between retries
|
|
184
|
+
continue
|
|
185
|
+
|
|
186
|
+
logger.error(f"Failed to fetch content from {url} after {self.max_retries} attempts")
|
|
187
|
+
return None
|
|
188
|
+
|
|
189
|
+
def _create_document_from_url(self, url: str, content: str, search_result: Dict[str, str]) -> Document:
|
|
190
|
+
"""Create a Document object from URL content and search result metadata"""
|
|
191
|
+
# Use the URL as the document ID
|
|
192
|
+
doc_id = url
|
|
193
|
+
|
|
194
|
+
# Use the search result title as the document name, fallback to URL
|
|
195
|
+
doc_name = search_result.get("title", urlparse(url).netloc)
|
|
196
|
+
|
|
197
|
+
# Create metadata with search information
|
|
198
|
+
meta_data = {
|
|
199
|
+
"url": url,
|
|
200
|
+
"search_title": search_result.get("title", ""),
|
|
201
|
+
"search_description": search_result.get("description", ""),
|
|
202
|
+
"source": "web_search",
|
|
203
|
+
"search_engine": self.search_engine,
|
|
204
|
+
}
|
|
205
|
+
|
|
206
|
+
return Document(id=doc_id, name=doc_name, content=content, meta_data=meta_data)
|
|
207
|
+
|
|
208
|
+
def read(self, query: str) -> List[Document]:
|
|
209
|
+
"""Read content for a given query by performing web search and fetching content"""
|
|
210
|
+
if not query:
|
|
211
|
+
raise ValueError("Query cannot be empty")
|
|
212
|
+
|
|
213
|
+
log_debug(f"Starting web search reader for query: {query}")
|
|
214
|
+
|
|
215
|
+
# Perform web search
|
|
216
|
+
search_results = self._perform_web_search(query)
|
|
217
|
+
if not search_results:
|
|
218
|
+
logger.warning(f"No search results found for query: {query}")
|
|
219
|
+
return []
|
|
220
|
+
|
|
221
|
+
documents: List[Document] = []
|
|
222
|
+
|
|
223
|
+
for result in search_results:
|
|
224
|
+
url = result.get("url", "")
|
|
225
|
+
|
|
226
|
+
# Skip if URL is invalid or already visited
|
|
227
|
+
if not self._is_valid_url(url):
|
|
228
|
+
continue
|
|
229
|
+
|
|
230
|
+
# Mark URL as visited
|
|
231
|
+
self._visited_urls.add(url)
|
|
232
|
+
|
|
233
|
+
# Add delay between requests to be respectful
|
|
234
|
+
if len(documents) > 0:
|
|
235
|
+
time.sleep(self.delay_between_requests)
|
|
236
|
+
|
|
237
|
+
# Fetch content from URL
|
|
238
|
+
content = self._fetch_url_content(url)
|
|
239
|
+
if content is None:
|
|
240
|
+
continue
|
|
241
|
+
|
|
242
|
+
# Create document
|
|
243
|
+
document = self._create_document_from_url(url, content, result)
|
|
244
|
+
|
|
245
|
+
# Apply chunking if enabled
|
|
246
|
+
if self.chunk:
|
|
247
|
+
chunked_docs = self.chunk_document(document)
|
|
248
|
+
documents.extend(chunked_docs)
|
|
249
|
+
else:
|
|
250
|
+
documents.append(document)
|
|
251
|
+
|
|
252
|
+
# Stop if we've reached max_results
|
|
253
|
+
if len(documents) >= self.max_results:
|
|
254
|
+
break
|
|
255
|
+
|
|
256
|
+
log_debug(f"Created {len(documents)} documents from web search")
|
|
257
|
+
return documents
|
|
258
|
+
|
|
259
|
+
async def async_read(self, query: str) -> List[Document]:
|
|
260
|
+
"""Asynchronously read content for a given query"""
|
|
261
|
+
if not query:
|
|
262
|
+
raise ValueError("Query cannot be empty")
|
|
263
|
+
|
|
264
|
+
log_debug(f"Starting async web search reader for query: {query}")
|
|
265
|
+
|
|
266
|
+
# Perform web search (synchronous operation)
|
|
267
|
+
search_results = self._perform_web_search(query)
|
|
268
|
+
if not search_results:
|
|
269
|
+
logger.warning(f"No search results found for query: {query}")
|
|
270
|
+
return []
|
|
271
|
+
|
|
272
|
+
# Create tasks for fetching content from each URL
|
|
273
|
+
async def fetch_url_async(result: Dict[str, str]) -> Optional[Document]:
|
|
274
|
+
url = result.get("url", "")
|
|
275
|
+
|
|
276
|
+
# Skip if URL is invalid or already visited
|
|
277
|
+
if not self._is_valid_url(url):
|
|
278
|
+
return None
|
|
279
|
+
|
|
280
|
+
# Mark URL as visited
|
|
281
|
+
self._visited_urls.add(url)
|
|
282
|
+
|
|
283
|
+
try:
|
|
284
|
+
headers = {"User-Agent": self.user_agent}
|
|
285
|
+
async with httpx.AsyncClient(timeout=self.request_timeout) as client:
|
|
286
|
+
response = await client.get(url, headers=headers, follow_redirects=True)
|
|
287
|
+
response.raise_for_status()
|
|
288
|
+
|
|
289
|
+
content_type = response.headers.get("content-type", "").lower()
|
|
290
|
+
if "text/html" in content_type:
|
|
291
|
+
content = self._extract_text_from_html(response.text, url)
|
|
292
|
+
else:
|
|
293
|
+
content = response.text
|
|
294
|
+
|
|
295
|
+
document = self._create_document_from_url(url, content, result)
|
|
296
|
+
return document
|
|
297
|
+
|
|
298
|
+
except Exception as e:
|
|
299
|
+
logger.warning(f"Error fetching {url}: {e}")
|
|
300
|
+
return None
|
|
301
|
+
|
|
302
|
+
# Create tasks for all URLs
|
|
303
|
+
tasks = [fetch_url_async(result) for result in search_results]
|
|
304
|
+
|
|
305
|
+
# Execute all tasks concurrently with delays
|
|
306
|
+
documents = []
|
|
307
|
+
for i, task in enumerate(tasks):
|
|
308
|
+
if i > 0: # Add delay between requests (except for the first one)
|
|
309
|
+
await asyncio.sleep(self.delay_between_requests)
|
|
310
|
+
|
|
311
|
+
doc = await task
|
|
312
|
+
if doc is not None:
|
|
313
|
+
# Apply chunking if enabled
|
|
314
|
+
if self.chunk:
|
|
315
|
+
chunked_docs = await self.chunk_documents_async([doc])
|
|
316
|
+
documents.extend(chunked_docs)
|
|
317
|
+
else:
|
|
318
|
+
documents.append(doc)
|
|
319
|
+
|
|
320
|
+
# Stop if we've reached max_results
|
|
321
|
+
if len(documents) >= self.max_results:
|
|
322
|
+
break
|
|
323
|
+
|
|
324
|
+
log_debug(f"Created {len(documents)} documents from async web search")
|
|
325
|
+
return documents
|