agno 2.2.13__py3-none-any.whl → 2.4.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- agno/agent/__init__.py +6 -0
- agno/agent/agent.py +5252 -3145
- agno/agent/remote.py +525 -0
- agno/api/api.py +2 -0
- agno/client/__init__.py +3 -0
- agno/client/a2a/__init__.py +10 -0
- agno/client/a2a/client.py +554 -0
- agno/client/a2a/schemas.py +112 -0
- agno/client/a2a/utils.py +369 -0
- agno/client/os.py +2669 -0
- agno/compression/__init__.py +3 -0
- agno/compression/manager.py +247 -0
- agno/culture/manager.py +2 -2
- agno/db/base.py +927 -6
- agno/db/dynamo/dynamo.py +788 -2
- agno/db/dynamo/schemas.py +128 -0
- agno/db/dynamo/utils.py +26 -3
- agno/db/firestore/firestore.py +674 -50
- agno/db/firestore/schemas.py +41 -0
- agno/db/firestore/utils.py +25 -10
- agno/db/gcs_json/gcs_json_db.py +506 -3
- agno/db/gcs_json/utils.py +14 -2
- agno/db/in_memory/in_memory_db.py +203 -4
- agno/db/in_memory/utils.py +14 -2
- agno/db/json/json_db.py +498 -2
- agno/db/json/utils.py +14 -2
- agno/db/migrations/manager.py +199 -0
- agno/db/migrations/utils.py +19 -0
- agno/db/migrations/v1_to_v2.py +54 -16
- agno/db/migrations/versions/__init__.py +0 -0
- agno/db/migrations/versions/v2_3_0.py +977 -0
- agno/db/mongo/async_mongo.py +1013 -39
- agno/db/mongo/mongo.py +684 -4
- agno/db/mongo/schemas.py +48 -0
- agno/db/mongo/utils.py +17 -0
- agno/db/mysql/__init__.py +2 -1
- agno/db/mysql/async_mysql.py +2958 -0
- agno/db/mysql/mysql.py +722 -53
- agno/db/mysql/schemas.py +77 -11
- agno/db/mysql/utils.py +151 -8
- agno/db/postgres/async_postgres.py +1254 -137
- agno/db/postgres/postgres.py +2316 -93
- agno/db/postgres/schemas.py +153 -21
- agno/db/postgres/utils.py +22 -7
- agno/db/redis/redis.py +531 -3
- agno/db/redis/schemas.py +36 -0
- agno/db/redis/utils.py +31 -15
- agno/db/schemas/evals.py +1 -0
- agno/db/schemas/memory.py +20 -9
- agno/db/singlestore/schemas.py +70 -1
- agno/db/singlestore/singlestore.py +737 -74
- agno/db/singlestore/utils.py +13 -3
- agno/db/sqlite/async_sqlite.py +1069 -89
- agno/db/sqlite/schemas.py +133 -1
- agno/db/sqlite/sqlite.py +2203 -165
- agno/db/sqlite/utils.py +21 -11
- agno/db/surrealdb/models.py +25 -0
- agno/db/surrealdb/surrealdb.py +603 -1
- agno/db/utils.py +60 -0
- agno/eval/__init__.py +26 -3
- agno/eval/accuracy.py +25 -12
- agno/eval/agent_as_judge.py +871 -0
- agno/eval/base.py +29 -0
- agno/eval/performance.py +10 -4
- agno/eval/reliability.py +22 -13
- agno/eval/utils.py +2 -1
- agno/exceptions.py +42 -0
- agno/hooks/__init__.py +3 -0
- agno/hooks/decorator.py +164 -0
- agno/integrations/discord/client.py +13 -2
- agno/knowledge/__init__.py +4 -0
- agno/knowledge/chunking/code.py +90 -0
- agno/knowledge/chunking/document.py +65 -4
- agno/knowledge/chunking/fixed.py +4 -1
- agno/knowledge/chunking/markdown.py +102 -11
- agno/knowledge/chunking/recursive.py +2 -2
- agno/knowledge/chunking/semantic.py +130 -48
- agno/knowledge/chunking/strategy.py +18 -0
- agno/knowledge/embedder/azure_openai.py +0 -1
- agno/knowledge/embedder/google.py +1 -1
- agno/knowledge/embedder/mistral.py +1 -1
- agno/knowledge/embedder/nebius.py +1 -1
- agno/knowledge/embedder/openai.py +16 -12
- agno/knowledge/filesystem.py +412 -0
- agno/knowledge/knowledge.py +4261 -1199
- agno/knowledge/protocol.py +134 -0
- agno/knowledge/reader/arxiv_reader.py +3 -2
- agno/knowledge/reader/base.py +9 -7
- agno/knowledge/reader/csv_reader.py +91 -42
- agno/knowledge/reader/docx_reader.py +9 -10
- agno/knowledge/reader/excel_reader.py +225 -0
- agno/knowledge/reader/field_labeled_csv_reader.py +38 -48
- agno/knowledge/reader/firecrawl_reader.py +3 -2
- agno/knowledge/reader/json_reader.py +16 -22
- agno/knowledge/reader/markdown_reader.py +15 -14
- agno/knowledge/reader/pdf_reader.py +33 -28
- agno/knowledge/reader/pptx_reader.py +9 -10
- agno/knowledge/reader/reader_factory.py +135 -1
- agno/knowledge/reader/s3_reader.py +8 -16
- agno/knowledge/reader/tavily_reader.py +3 -3
- agno/knowledge/reader/text_reader.py +15 -14
- agno/knowledge/reader/utils/__init__.py +17 -0
- agno/knowledge/reader/utils/spreadsheet.py +114 -0
- agno/knowledge/reader/web_search_reader.py +8 -65
- agno/knowledge/reader/website_reader.py +16 -13
- agno/knowledge/reader/wikipedia_reader.py +36 -3
- agno/knowledge/reader/youtube_reader.py +3 -2
- agno/knowledge/remote_content/__init__.py +33 -0
- agno/knowledge/remote_content/config.py +266 -0
- agno/knowledge/remote_content/remote_content.py +105 -17
- agno/knowledge/utils.py +76 -22
- agno/learn/__init__.py +71 -0
- agno/learn/config.py +463 -0
- agno/learn/curate.py +185 -0
- agno/learn/machine.py +725 -0
- agno/learn/schemas.py +1114 -0
- agno/learn/stores/__init__.py +38 -0
- agno/learn/stores/decision_log.py +1156 -0
- agno/learn/stores/entity_memory.py +3275 -0
- agno/learn/stores/learned_knowledge.py +1583 -0
- agno/learn/stores/protocol.py +117 -0
- agno/learn/stores/session_context.py +1217 -0
- agno/learn/stores/user_memory.py +1495 -0
- agno/learn/stores/user_profile.py +1220 -0
- agno/learn/utils.py +209 -0
- agno/media.py +22 -6
- agno/memory/__init__.py +14 -1
- agno/memory/manager.py +223 -8
- agno/memory/strategies/__init__.py +15 -0
- agno/memory/strategies/base.py +66 -0
- agno/memory/strategies/summarize.py +196 -0
- agno/memory/strategies/types.py +37 -0
- agno/models/aimlapi/aimlapi.py +17 -0
- agno/models/anthropic/claude.py +434 -59
- agno/models/aws/bedrock.py +121 -20
- agno/models/aws/claude.py +131 -274
- agno/models/azure/ai_foundry.py +10 -6
- agno/models/azure/openai_chat.py +33 -10
- agno/models/base.py +1162 -561
- agno/models/cerebras/cerebras.py +120 -24
- agno/models/cerebras/cerebras_openai.py +21 -2
- agno/models/cohere/chat.py +65 -6
- agno/models/cometapi/cometapi.py +18 -1
- agno/models/dashscope/dashscope.py +2 -3
- agno/models/deepinfra/deepinfra.py +18 -1
- agno/models/deepseek/deepseek.py +69 -3
- agno/models/fireworks/fireworks.py +18 -1
- agno/models/google/gemini.py +959 -89
- agno/models/google/utils.py +22 -0
- agno/models/groq/groq.py +48 -18
- agno/models/huggingface/huggingface.py +17 -6
- agno/models/ibm/watsonx.py +16 -6
- agno/models/internlm/internlm.py +18 -1
- agno/models/langdb/langdb.py +13 -1
- agno/models/litellm/chat.py +88 -9
- agno/models/litellm/litellm_openai.py +18 -1
- agno/models/message.py +24 -5
- agno/models/meta/llama.py +40 -13
- agno/models/meta/llama_openai.py +22 -21
- agno/models/metrics.py +12 -0
- agno/models/mistral/mistral.py +8 -4
- agno/models/n1n/__init__.py +3 -0
- agno/models/n1n/n1n.py +57 -0
- agno/models/nebius/nebius.py +6 -7
- agno/models/nvidia/nvidia.py +20 -3
- agno/models/ollama/__init__.py +2 -0
- agno/models/ollama/chat.py +17 -6
- agno/models/ollama/responses.py +100 -0
- agno/models/openai/__init__.py +2 -0
- agno/models/openai/chat.py +117 -26
- agno/models/openai/open_responses.py +46 -0
- agno/models/openai/responses.py +110 -32
- agno/models/openrouter/__init__.py +2 -0
- agno/models/openrouter/openrouter.py +67 -2
- agno/models/openrouter/responses.py +146 -0
- agno/models/perplexity/perplexity.py +19 -1
- agno/models/portkey/portkey.py +7 -6
- agno/models/requesty/requesty.py +19 -2
- agno/models/response.py +20 -2
- agno/models/sambanova/sambanova.py +20 -3
- agno/models/siliconflow/siliconflow.py +19 -2
- agno/models/together/together.py +20 -3
- agno/models/vercel/v0.py +20 -3
- agno/models/vertexai/claude.py +124 -4
- agno/models/vllm/vllm.py +19 -14
- agno/models/xai/xai.py +19 -2
- agno/os/app.py +467 -137
- agno/os/auth.py +253 -5
- agno/os/config.py +22 -0
- agno/os/interfaces/a2a/a2a.py +7 -6
- agno/os/interfaces/a2a/router.py +635 -26
- agno/os/interfaces/a2a/utils.py +32 -33
- agno/os/interfaces/agui/agui.py +5 -3
- agno/os/interfaces/agui/router.py +26 -16
- agno/os/interfaces/agui/utils.py +97 -57
- agno/os/interfaces/base.py +7 -7
- agno/os/interfaces/slack/router.py +16 -7
- agno/os/interfaces/slack/slack.py +7 -7
- agno/os/interfaces/whatsapp/router.py +35 -7
- agno/os/interfaces/whatsapp/security.py +3 -1
- agno/os/interfaces/whatsapp/whatsapp.py +11 -8
- agno/os/managers.py +326 -0
- agno/os/mcp.py +652 -79
- agno/os/middleware/__init__.py +4 -0
- agno/os/middleware/jwt.py +718 -115
- agno/os/middleware/trailing_slash.py +27 -0
- agno/os/router.py +105 -1558
- agno/os/routers/agents/__init__.py +3 -0
- agno/os/routers/agents/router.py +655 -0
- agno/os/routers/agents/schema.py +288 -0
- agno/os/routers/components/__init__.py +3 -0
- agno/os/routers/components/components.py +475 -0
- agno/os/routers/database.py +155 -0
- agno/os/routers/evals/evals.py +111 -18
- agno/os/routers/evals/schemas.py +38 -5
- agno/os/routers/evals/utils.py +80 -11
- agno/os/routers/health.py +3 -3
- agno/os/routers/knowledge/knowledge.py +284 -35
- agno/os/routers/knowledge/schemas.py +14 -2
- agno/os/routers/memory/memory.py +274 -11
- agno/os/routers/memory/schemas.py +44 -3
- agno/os/routers/metrics/metrics.py +30 -15
- agno/os/routers/metrics/schemas.py +10 -6
- agno/os/routers/registry/__init__.py +3 -0
- agno/os/routers/registry/registry.py +337 -0
- agno/os/routers/session/session.py +143 -14
- agno/os/routers/teams/__init__.py +3 -0
- agno/os/routers/teams/router.py +550 -0
- agno/os/routers/teams/schema.py +280 -0
- agno/os/routers/traces/__init__.py +3 -0
- agno/os/routers/traces/schemas.py +414 -0
- agno/os/routers/traces/traces.py +549 -0
- agno/os/routers/workflows/__init__.py +3 -0
- agno/os/routers/workflows/router.py +757 -0
- agno/os/routers/workflows/schema.py +139 -0
- agno/os/schema.py +157 -584
- agno/os/scopes.py +469 -0
- agno/os/settings.py +3 -0
- agno/os/utils.py +574 -185
- agno/reasoning/anthropic.py +85 -1
- agno/reasoning/azure_ai_foundry.py +93 -1
- agno/reasoning/deepseek.py +102 -2
- agno/reasoning/default.py +6 -7
- agno/reasoning/gemini.py +87 -3
- agno/reasoning/groq.py +109 -2
- agno/reasoning/helpers.py +6 -7
- agno/reasoning/manager.py +1238 -0
- agno/reasoning/ollama.py +93 -1
- agno/reasoning/openai.py +115 -1
- agno/reasoning/vertexai.py +85 -1
- agno/registry/__init__.py +3 -0
- agno/registry/registry.py +68 -0
- agno/remote/__init__.py +3 -0
- agno/remote/base.py +581 -0
- agno/run/__init__.py +2 -4
- agno/run/agent.py +134 -19
- agno/run/base.py +49 -1
- agno/run/cancel.py +65 -52
- agno/run/cancellation_management/__init__.py +9 -0
- agno/run/cancellation_management/base.py +78 -0
- agno/run/cancellation_management/in_memory_cancellation_manager.py +100 -0
- agno/run/cancellation_management/redis_cancellation_manager.py +236 -0
- agno/run/requirement.py +181 -0
- agno/run/team.py +111 -19
- agno/run/workflow.py +2 -1
- agno/session/agent.py +57 -92
- agno/session/summary.py +1 -1
- agno/session/team.py +62 -115
- agno/session/workflow.py +353 -57
- agno/skills/__init__.py +17 -0
- agno/skills/agent_skills.py +377 -0
- agno/skills/errors.py +32 -0
- agno/skills/loaders/__init__.py +4 -0
- agno/skills/loaders/base.py +27 -0
- agno/skills/loaders/local.py +216 -0
- agno/skills/skill.py +65 -0
- agno/skills/utils.py +107 -0
- agno/skills/validator.py +277 -0
- agno/table.py +10 -0
- agno/team/__init__.py +5 -1
- agno/team/remote.py +447 -0
- agno/team/team.py +3769 -2202
- agno/tools/brandfetch.py +27 -18
- agno/tools/browserbase.py +225 -16
- agno/tools/crawl4ai.py +3 -0
- agno/tools/duckduckgo.py +25 -71
- agno/tools/exa.py +0 -21
- agno/tools/file.py +14 -13
- agno/tools/file_generation.py +12 -6
- agno/tools/firecrawl.py +15 -7
- agno/tools/function.py +94 -113
- agno/tools/google_bigquery.py +11 -2
- agno/tools/google_drive.py +4 -3
- agno/tools/knowledge.py +9 -4
- agno/tools/mcp/mcp.py +301 -18
- agno/tools/mcp/multi_mcp.py +269 -14
- agno/tools/mem0.py +11 -10
- agno/tools/memory.py +47 -46
- agno/tools/mlx_transcribe.py +10 -7
- agno/tools/models/nebius.py +5 -5
- agno/tools/models_labs.py +20 -10
- agno/tools/nano_banana.py +151 -0
- agno/tools/parallel.py +0 -7
- agno/tools/postgres.py +76 -36
- agno/tools/python.py +14 -6
- agno/tools/reasoning.py +30 -23
- agno/tools/redshift.py +406 -0
- agno/tools/shopify.py +1519 -0
- agno/tools/spotify.py +919 -0
- agno/tools/tavily.py +4 -1
- agno/tools/toolkit.py +253 -18
- agno/tools/websearch.py +93 -0
- agno/tools/website.py +1 -1
- agno/tools/wikipedia.py +1 -1
- agno/tools/workflow.py +56 -48
- agno/tools/yfinance.py +12 -11
- agno/tracing/__init__.py +12 -0
- agno/tracing/exporter.py +161 -0
- agno/tracing/schemas.py +276 -0
- agno/tracing/setup.py +112 -0
- agno/utils/agent.py +251 -10
- agno/utils/cryptography.py +22 -0
- agno/utils/dttm.py +33 -0
- agno/utils/events.py +264 -7
- agno/utils/hooks.py +111 -3
- agno/utils/http.py +161 -2
- agno/utils/mcp.py +49 -8
- agno/utils/media.py +22 -1
- agno/utils/models/ai_foundry.py +9 -2
- agno/utils/models/claude.py +20 -5
- agno/utils/models/cohere.py +9 -2
- agno/utils/models/llama.py +9 -2
- agno/utils/models/mistral.py +4 -2
- agno/utils/os.py +0 -0
- agno/utils/print_response/agent.py +99 -16
- agno/utils/print_response/team.py +223 -24
- agno/utils/print_response/workflow.py +0 -2
- agno/utils/prompts.py +8 -6
- agno/utils/remote.py +23 -0
- agno/utils/response.py +1 -13
- agno/utils/string.py +91 -2
- agno/utils/team.py +62 -12
- agno/utils/tokens.py +657 -0
- agno/vectordb/base.py +15 -2
- agno/vectordb/cassandra/cassandra.py +1 -1
- agno/vectordb/chroma/__init__.py +2 -1
- agno/vectordb/chroma/chromadb.py +468 -23
- agno/vectordb/clickhouse/clickhousedb.py +1 -1
- agno/vectordb/couchbase/couchbase.py +6 -2
- agno/vectordb/lancedb/lance_db.py +7 -38
- agno/vectordb/lightrag/lightrag.py +7 -6
- agno/vectordb/milvus/milvus.py +118 -84
- agno/vectordb/mongodb/__init__.py +2 -1
- agno/vectordb/mongodb/mongodb.py +14 -31
- agno/vectordb/pgvector/pgvector.py +120 -66
- agno/vectordb/pineconedb/pineconedb.py +2 -19
- agno/vectordb/qdrant/__init__.py +2 -1
- agno/vectordb/qdrant/qdrant.py +33 -56
- agno/vectordb/redis/__init__.py +2 -1
- agno/vectordb/redis/redisdb.py +19 -31
- agno/vectordb/singlestore/singlestore.py +17 -9
- agno/vectordb/surrealdb/surrealdb.py +2 -38
- agno/vectordb/weaviate/__init__.py +2 -1
- agno/vectordb/weaviate/weaviate.py +7 -3
- agno/workflow/__init__.py +5 -1
- agno/workflow/agent.py +2 -2
- agno/workflow/condition.py +12 -10
- agno/workflow/loop.py +28 -9
- agno/workflow/parallel.py +21 -13
- agno/workflow/remote.py +362 -0
- agno/workflow/router.py +12 -9
- agno/workflow/step.py +261 -36
- agno/workflow/steps.py +12 -8
- agno/workflow/types.py +40 -77
- agno/workflow/workflow.py +939 -213
- {agno-2.2.13.dist-info → agno-2.4.3.dist-info}/METADATA +134 -181
- agno-2.4.3.dist-info/RECORD +677 -0
- {agno-2.2.13.dist-info → agno-2.4.3.dist-info}/WHEEL +1 -1
- agno/tools/googlesearch.py +0 -98
- agno/tools/memori.py +0 -339
- agno-2.2.13.dist-info/RECORD +0 -575
- {agno-2.2.13.dist-info → agno-2.4.3.dist-info}/licenses/LICENSE +0 -0
- {agno-2.2.13.dist-info → agno-2.4.3.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,114 @@
|
|
|
1
|
+
from datetime import date, datetime
|
|
2
|
+
from pathlib import Path
|
|
3
|
+
from typing import IO, Any, Iterable, List, Optional, Sequence, Tuple, Union
|
|
4
|
+
from uuid import uuid4
|
|
5
|
+
|
|
6
|
+
from agno.knowledge.document.base import Document
|
|
7
|
+
from agno.utils.log import log_debug
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
def stringify_cell_value(value: Any) -> str:
|
|
11
|
+
"""Convert cell value to string, normalizing dates and line endings."""
|
|
12
|
+
if value is None:
|
|
13
|
+
return ""
|
|
14
|
+
|
|
15
|
+
if isinstance(value, datetime):
|
|
16
|
+
return value.isoformat()
|
|
17
|
+
if isinstance(value, date):
|
|
18
|
+
return value.isoformat()
|
|
19
|
+
|
|
20
|
+
if isinstance(value, float) and value.is_integer():
|
|
21
|
+
return str(int(value))
|
|
22
|
+
|
|
23
|
+
result = str(value)
|
|
24
|
+
# Normalize all line endings to space to preserve row integrity in CSV-like output
|
|
25
|
+
# Must handle CRLF first before individual CR/LF to avoid double-spacing
|
|
26
|
+
result = result.replace("\r\n", " ") # Windows (CRLF)
|
|
27
|
+
result = result.replace("\r", " ") # Old Mac (CR)
|
|
28
|
+
result = result.replace("\n", " ") # Unix (LF)
|
|
29
|
+
return result
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
def get_workbook_name(file: Union[Path, IO[Any]], name: Optional[str]) -> str:
|
|
33
|
+
"""Extract workbook name from file path or name parameter."""
|
|
34
|
+
if name:
|
|
35
|
+
return Path(name).stem
|
|
36
|
+
if isinstance(file, Path):
|
|
37
|
+
return file.stem
|
|
38
|
+
# getattr returns None when attribute exists but is None, so check explicitly
|
|
39
|
+
file_name = getattr(file, "name", None)
|
|
40
|
+
if file_name:
|
|
41
|
+
return Path(file_name).stem
|
|
42
|
+
return "workbook"
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
def infer_file_extension(file: Union[Path, IO[Any]], name: Optional[str]) -> str:
|
|
46
|
+
"""Infer file extension from Path, IO object, or explicit name."""
|
|
47
|
+
if isinstance(file, Path):
|
|
48
|
+
return file.suffix.lower()
|
|
49
|
+
|
|
50
|
+
file_name = getattr(file, "name", None)
|
|
51
|
+
if isinstance(file_name, str) and file_name:
|
|
52
|
+
return Path(file_name).suffix.lower()
|
|
53
|
+
|
|
54
|
+
if name:
|
|
55
|
+
return Path(name).suffix.lower()
|
|
56
|
+
|
|
57
|
+
return ""
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
def convert_xls_cell_value(cell_value: Any, cell_type: int, datemode: int) -> Any:
|
|
61
|
+
"""Convert xlrd cell value to Python type (dates and booleans need conversion)."""
|
|
62
|
+
try:
|
|
63
|
+
import xlrd
|
|
64
|
+
except ImportError:
|
|
65
|
+
return cell_value
|
|
66
|
+
|
|
67
|
+
if cell_type == xlrd.XL_CELL_DATE:
|
|
68
|
+
try:
|
|
69
|
+
date_tuple = xlrd.xldate_as_tuple(cell_value, datemode)
|
|
70
|
+
return datetime(*date_tuple)
|
|
71
|
+
except Exception:
|
|
72
|
+
return cell_value
|
|
73
|
+
if cell_type == xlrd.XL_CELL_BOOLEAN:
|
|
74
|
+
return bool(cell_value)
|
|
75
|
+
return cell_value
|
|
76
|
+
|
|
77
|
+
|
|
78
|
+
def row_to_csv_line(row_values: Sequence[Any]) -> str:
|
|
79
|
+
"""Convert row values to CSV-like string, trimming trailing empty cells."""
|
|
80
|
+
values = [stringify_cell_value(v) for v in row_values]
|
|
81
|
+
while values and values[-1] == "":
|
|
82
|
+
values.pop()
|
|
83
|
+
|
|
84
|
+
return ", ".join(values)
|
|
85
|
+
|
|
86
|
+
|
|
87
|
+
def excel_rows_to_documents(
|
|
88
|
+
*,
|
|
89
|
+
workbook_name: str,
|
|
90
|
+
sheets: Iterable[Tuple[str, int, Iterable[Sequence[Any]]]],
|
|
91
|
+
) -> List[Document]:
|
|
92
|
+
"""Convert Excel sheet rows to Documents (one per sheet)."""
|
|
93
|
+
documents = []
|
|
94
|
+
for sheet_name, sheet_index, rows in sheets:
|
|
95
|
+
lines = []
|
|
96
|
+
for row in rows:
|
|
97
|
+
line = row_to_csv_line(row)
|
|
98
|
+
if line:
|
|
99
|
+
lines.append(line)
|
|
100
|
+
|
|
101
|
+
if not lines:
|
|
102
|
+
log_debug(f"Sheet '{sheet_name}' is empty, skipping")
|
|
103
|
+
continue
|
|
104
|
+
|
|
105
|
+
documents.append(
|
|
106
|
+
Document(
|
|
107
|
+
name=workbook_name,
|
|
108
|
+
id=str(uuid4()),
|
|
109
|
+
meta_data={"sheet_name": sheet_name, "sheet_index": sheet_index},
|
|
110
|
+
content="\n".join(lines),
|
|
111
|
+
)
|
|
112
|
+
)
|
|
113
|
+
|
|
114
|
+
return documents
|
|
@@ -37,7 +37,7 @@ class WebSearchReader(Reader):
|
|
|
37
37
|
user_agent: str = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36"
|
|
38
38
|
|
|
39
39
|
# Search engine configuration
|
|
40
|
-
search_engine: Literal["duckduckgo"
|
|
40
|
+
search_engine: Literal["duckduckgo"] = "duckduckgo"
|
|
41
41
|
search_delay: float = 3.0 # Delay between search requests
|
|
42
42
|
max_search_retries: int = 2 # Retries for search operations
|
|
43
43
|
|
|
@@ -53,9 +53,10 @@ class WebSearchReader(Reader):
|
|
|
53
53
|
chunking_strategy: Optional[ChunkingStrategy] = SemanticChunking()
|
|
54
54
|
|
|
55
55
|
@classmethod
|
|
56
|
-
def get_supported_chunking_strategies(
|
|
56
|
+
def get_supported_chunking_strategies(cls) -> List[ChunkingStrategyType]:
|
|
57
57
|
"""Get the list of supported chunking strategies for Web Search readers."""
|
|
58
58
|
return [
|
|
59
|
+
ChunkingStrategyType.CODE_CHUNKER,
|
|
59
60
|
ChunkingStrategyType.AGENTIC_CHUNKER,
|
|
60
61
|
ChunkingStrategyType.DOCUMENT_CHUNKER,
|
|
61
62
|
ChunkingStrategyType.RECURSIVE_CHUNKER,
|
|
@@ -64,7 +65,7 @@ class WebSearchReader(Reader):
|
|
|
64
65
|
]
|
|
65
66
|
|
|
66
67
|
@classmethod
|
|
67
|
-
def get_supported_content_types(
|
|
68
|
+
def get_supported_content_types(cls) -> List[ContentType]:
|
|
68
69
|
return [ContentType.TOPIC]
|
|
69
70
|
|
|
70
71
|
def _respect_rate_limits(self):
|
|
@@ -121,57 +122,10 @@ class WebSearchReader(Reader):
|
|
|
121
122
|
return []
|
|
122
123
|
return []
|
|
123
124
|
|
|
124
|
-
def _perform_google_search(self, query: str) -> List[Dict[str, str]]:
|
|
125
|
-
"""Perform web search using Google (requires googlesearch-python)"""
|
|
126
|
-
log_debug(f"Performing Google search for: {query}")
|
|
127
|
-
|
|
128
|
-
try:
|
|
129
|
-
from googlesearch import search
|
|
130
|
-
except ImportError:
|
|
131
|
-
logger.error("Google search requires 'googlesearch-python'. Install with: pip install googlesearch-python")
|
|
132
|
-
return []
|
|
133
|
-
|
|
134
|
-
for attempt in range(self.max_search_retries):
|
|
135
|
-
try:
|
|
136
|
-
self._respect_rate_limits()
|
|
137
|
-
|
|
138
|
-
results = []
|
|
139
|
-
# Use the basic search function without unsupported parameters
|
|
140
|
-
# The googlesearch-python library's search function only accepts basic parameters
|
|
141
|
-
search_results = search(query)
|
|
142
|
-
|
|
143
|
-
# Convert iterator to list and limit results
|
|
144
|
-
result_list = list(search_results)[: self.max_results]
|
|
145
|
-
|
|
146
|
-
for result in result_list:
|
|
147
|
-
# The search function returns URLs as strings
|
|
148
|
-
results.append(
|
|
149
|
-
{
|
|
150
|
-
"title": "", # Google search doesn't provide titles directly
|
|
151
|
-
"url": result,
|
|
152
|
-
"description": "", # Google search doesn't provide descriptions directly
|
|
153
|
-
}
|
|
154
|
-
)
|
|
155
|
-
|
|
156
|
-
log_debug(f"Found {len(results)} Google search results")
|
|
157
|
-
return results
|
|
158
|
-
|
|
159
|
-
except Exception as e:
|
|
160
|
-
logger.warning(f"Google search attempt {attempt + 1} failed: {e}")
|
|
161
|
-
if attempt < self.max_search_retries - 1:
|
|
162
|
-
time.sleep(self.search_delay)
|
|
163
|
-
else:
|
|
164
|
-
logger.error(f"All Google search attempts failed: {e}")
|
|
165
|
-
return []
|
|
166
|
-
|
|
167
|
-
return []
|
|
168
|
-
|
|
169
125
|
def _perform_web_search(self, query: str) -> List[Dict[str, str]]:
|
|
170
126
|
"""Perform web search using the configured search engine"""
|
|
171
127
|
if self.search_engine == "duckduckgo":
|
|
172
128
|
return self._perform_duckduckgo_search(query)
|
|
173
|
-
elif self.search_engine == "google":
|
|
174
|
-
return self._perform_google_search(query)
|
|
175
129
|
else:
|
|
176
130
|
logger.error(f"Unsupported search engine: {self.search_engine}")
|
|
177
131
|
return []
|
|
@@ -310,21 +264,17 @@ class WebSearchReader(Reader):
|
|
|
310
264
|
|
|
311
265
|
log_debug(f"Starting async web search reader for query: {query}")
|
|
312
266
|
|
|
313
|
-
# Perform web search (synchronous operation)
|
|
314
267
|
search_results = self._perform_web_search(query)
|
|
315
268
|
if not search_results:
|
|
316
269
|
logger.warning(f"No search results found for query: {query}")
|
|
317
270
|
return []
|
|
318
271
|
|
|
319
|
-
# Create tasks for fetching content from each URL
|
|
320
272
|
async def fetch_url_async(result: Dict[str, str]) -> Optional[Document]:
|
|
321
273
|
url = result.get("url", "")
|
|
322
274
|
|
|
323
|
-
# Skip if URL is invalid or already visited
|
|
324
275
|
if not self._is_valid_url(url):
|
|
325
276
|
return None
|
|
326
277
|
|
|
327
|
-
# Mark URL as visited
|
|
328
278
|
self._visited_urls.add(url)
|
|
329
279
|
|
|
330
280
|
try:
|
|
@@ -339,32 +289,25 @@ class WebSearchReader(Reader):
|
|
|
339
289
|
else:
|
|
340
290
|
content = response.text
|
|
341
291
|
|
|
342
|
-
|
|
343
|
-
return document
|
|
292
|
+
return self._create_document_from_url(url, content, result)
|
|
344
293
|
|
|
345
294
|
except Exception as e:
|
|
346
295
|
logger.warning(f"Error fetching {url}: {e}")
|
|
347
296
|
return None
|
|
348
297
|
|
|
349
|
-
# Create tasks for all URLs
|
|
350
|
-
tasks = [fetch_url_async(result) for result in search_results]
|
|
351
|
-
|
|
352
|
-
# Execute all tasks concurrently with delays
|
|
353
298
|
documents = []
|
|
354
|
-
for i,
|
|
355
|
-
if i > 0:
|
|
299
|
+
for i, result in enumerate(search_results):
|
|
300
|
+
if i > 0:
|
|
356
301
|
await asyncio.sleep(self.delay_between_requests)
|
|
357
302
|
|
|
358
|
-
doc = await
|
|
303
|
+
doc = await fetch_url_async(result)
|
|
359
304
|
if doc is not None:
|
|
360
|
-
# Apply chunking if enabled
|
|
361
305
|
if self.chunk:
|
|
362
306
|
chunked_docs = await self.chunk_documents_async([doc])
|
|
363
307
|
documents.extend(chunked_docs)
|
|
364
308
|
else:
|
|
365
309
|
documents.append(doc)
|
|
366
310
|
|
|
367
|
-
# Stop if we've reached max_results
|
|
368
311
|
if len(documents) >= self.max_results:
|
|
369
312
|
break
|
|
370
313
|
|
|
@@ -12,7 +12,7 @@ from agno.knowledge.chunking.strategy import ChunkingStrategy, ChunkingStrategyT
|
|
|
12
12
|
from agno.knowledge.document.base import Document
|
|
13
13
|
from agno.knowledge.reader.base import Reader
|
|
14
14
|
from agno.knowledge.types import ContentType
|
|
15
|
-
from agno.utils.log import log_debug,
|
|
15
|
+
from agno.utils.log import log_debug, log_error, log_warning
|
|
16
16
|
|
|
17
17
|
try:
|
|
18
18
|
from bs4 import BeautifulSoup, Tag # noqa: F401
|
|
@@ -49,9 +49,10 @@ class WebsiteReader(Reader):
|
|
|
49
49
|
self._urls_to_crawl = []
|
|
50
50
|
|
|
51
51
|
@classmethod
|
|
52
|
-
def get_supported_chunking_strategies(
|
|
52
|
+
def get_supported_chunking_strategies(cls) -> List[ChunkingStrategyType]:
|
|
53
53
|
"""Get the list of supported chunking strategies for Website readers."""
|
|
54
54
|
return [
|
|
55
|
+
ChunkingStrategyType.CODE_CHUNKER,
|
|
55
56
|
ChunkingStrategyType.AGENTIC_CHUNKER,
|
|
56
57
|
ChunkingStrategyType.DOCUMENT_CHUNKER,
|
|
57
58
|
ChunkingStrategyType.RECURSIVE_CHUNKER,
|
|
@@ -60,7 +61,7 @@ class WebsiteReader(Reader):
|
|
|
60
61
|
]
|
|
61
62
|
|
|
62
63
|
@classmethod
|
|
63
|
-
def get_supported_content_types(
|
|
64
|
+
def get_supported_content_types(cls) -> List[ContentType]:
|
|
64
65
|
return [ContentType.URL]
|
|
65
66
|
|
|
66
67
|
def delay(self, min_seconds=1, max_seconds=3):
|
|
@@ -229,21 +230,21 @@ class WebsiteReader(Reader):
|
|
|
229
230
|
# Log HTTP status errors but continue crawling other pages
|
|
230
231
|
# Skip redirect errors (3xx) as they should be handled by follow_redirects
|
|
231
232
|
if e.response.status_code >= 300 and e.response.status_code < 400:
|
|
232
|
-
|
|
233
|
+
log_debug(f"Redirect encountered for {current_url}, skipping: {e}")
|
|
233
234
|
else:
|
|
234
|
-
|
|
235
|
+
log_warning(f"HTTP status error while crawling {current_url}: {e}")
|
|
235
236
|
# For the initial URL, we should raise the error only if it's not a redirect
|
|
236
237
|
if current_url == url and not crawler_result and not (300 <= e.response.status_code < 400):
|
|
237
238
|
raise
|
|
238
239
|
except httpx.RequestError as e:
|
|
239
240
|
# Log request errors but continue crawling other pages
|
|
240
|
-
|
|
241
|
+
log_warning(f"Request error while crawling {current_url}: {e}")
|
|
241
242
|
# For the initial URL, we should raise the error
|
|
242
243
|
if current_url == url and not crawler_result:
|
|
243
244
|
raise
|
|
244
245
|
except Exception as e:
|
|
245
246
|
# Log other exceptions but continue crawling other pages
|
|
246
|
-
|
|
247
|
+
log_warning(f"Failed to crawl {current_url}: {e}")
|
|
247
248
|
# For the initial URL, we should raise the error
|
|
248
249
|
if current_url == url and not crawler_result:
|
|
249
250
|
# Wrap non-HTTP exceptions in a RequestError
|
|
@@ -332,19 +333,19 @@ class WebsiteReader(Reader):
|
|
|
332
333
|
|
|
333
334
|
except httpx.HTTPStatusError as e:
|
|
334
335
|
# Log HTTP status errors but continue crawling other pages
|
|
335
|
-
|
|
336
|
+
log_warning(f"HTTP status error while crawling asynchronously {current_url}: {e}")
|
|
336
337
|
# For the initial URL, we should raise the error
|
|
337
338
|
if current_url == url and not crawler_result:
|
|
338
339
|
raise
|
|
339
340
|
except httpx.RequestError as e:
|
|
340
341
|
# Log request errors but continue crawling other pages
|
|
341
|
-
|
|
342
|
+
log_warning(f"Request error while crawling asynchronously {current_url}: {e}")
|
|
342
343
|
# For the initial URL, we should raise the error
|
|
343
344
|
if current_url == url and not crawler_result:
|
|
344
345
|
raise
|
|
345
346
|
except Exception as e:
|
|
346
347
|
# Log other exceptions but continue crawling other pages
|
|
347
|
-
|
|
348
|
+
log_warning(f"Failed to crawl asynchronously {current_url}: {e}")
|
|
348
349
|
# For the initial URL, we should raise the error
|
|
349
350
|
if current_url == url and not crawler_result:
|
|
350
351
|
# Wrap non-HTTP exceptions in a RequestError
|
|
@@ -398,7 +399,7 @@ class WebsiteReader(Reader):
|
|
|
398
399
|
)
|
|
399
400
|
return documents
|
|
400
401
|
except (httpx.HTTPStatusError, httpx.RequestError) as e:
|
|
401
|
-
|
|
402
|
+
log_error(f"Error reading website {url}: {e}")
|
|
402
403
|
raise
|
|
403
404
|
|
|
404
405
|
async def async_read(self, url: str, name: Optional[str] = None) -> List[Document]:
|
|
@@ -427,7 +428,8 @@ class WebsiteReader(Reader):
|
|
|
427
428
|
meta_data={"url": str(crawled_url)},
|
|
428
429
|
content=crawled_content,
|
|
429
430
|
)
|
|
430
|
-
|
|
431
|
+
chunks = self.chunk_document(doc)
|
|
432
|
+
return chunks
|
|
431
433
|
else:
|
|
432
434
|
return [
|
|
433
435
|
Document(
|
|
@@ -443,6 +445,7 @@ class WebsiteReader(Reader):
|
|
|
443
445
|
process_document(crawled_url, crawled_content)
|
|
444
446
|
for crawled_url, crawled_content in crawler_result.items()
|
|
445
447
|
]
|
|
448
|
+
|
|
446
449
|
results = await asyncio.gather(*tasks)
|
|
447
450
|
|
|
448
451
|
# Flatten the results
|
|
@@ -451,5 +454,5 @@ class WebsiteReader(Reader):
|
|
|
451
454
|
|
|
452
455
|
return documents
|
|
453
456
|
except (httpx.HTTPStatusError, httpx.RequestError) as e:
|
|
454
|
-
|
|
457
|
+
log_error(f"Error reading website asynchronously {url}: {e}")
|
|
455
458
|
raise
|
|
@@ -1,3 +1,4 @@
|
|
|
1
|
+
import asyncio
|
|
1
2
|
from typing import List, Optional
|
|
2
3
|
|
|
3
4
|
from agno.knowledge.chunking.fixed import FixedSizeChunking
|
|
@@ -23,9 +24,10 @@ class WikipediaReader(Reader):
|
|
|
23
24
|
self.auto_suggest = auto_suggest
|
|
24
25
|
|
|
25
26
|
@classmethod
|
|
26
|
-
def get_supported_chunking_strategies(
|
|
27
|
+
def get_supported_chunking_strategies(cls) -> List[ChunkingStrategyType]:
|
|
27
28
|
"""Get the list of supported chunking strategies for Wikipedia readers."""
|
|
28
29
|
return [
|
|
30
|
+
ChunkingStrategyType.CODE_CHUNKER,
|
|
29
31
|
ChunkingStrategyType.FIXED_SIZE_CHUNKER,
|
|
30
32
|
ChunkingStrategyType.AGENTIC_CHUNKER,
|
|
31
33
|
ChunkingStrategyType.DOCUMENT_CHUNKER,
|
|
@@ -34,7 +36,7 @@ class WikipediaReader(Reader):
|
|
|
34
36
|
]
|
|
35
37
|
|
|
36
38
|
@classmethod
|
|
37
|
-
def get_supported_content_types(
|
|
39
|
+
def get_supported_content_types(cls) -> List[ContentType]:
|
|
38
40
|
return [ContentType.TOPIC]
|
|
39
41
|
|
|
40
42
|
def read(self, topic: str) -> List[Document]:
|
|
@@ -45,7 +47,38 @@ class WikipediaReader(Reader):
|
|
|
45
47
|
|
|
46
48
|
except wikipedia.exceptions.PageError:
|
|
47
49
|
summary = None
|
|
48
|
-
log_info("
|
|
50
|
+
log_info("Wikipedia Error: Page not found.")
|
|
51
|
+
|
|
52
|
+
# Only create Document if we successfully got a summary
|
|
53
|
+
if summary:
|
|
54
|
+
return [
|
|
55
|
+
Document(
|
|
56
|
+
name=topic,
|
|
57
|
+
meta_data={"topic": topic},
|
|
58
|
+
content=summary,
|
|
59
|
+
)
|
|
60
|
+
]
|
|
61
|
+
return []
|
|
62
|
+
|
|
63
|
+
async def async_read(self, topic: str) -> List[Document]:
|
|
64
|
+
"""
|
|
65
|
+
Asynchronously read content from Wikipedia.
|
|
66
|
+
|
|
67
|
+
Args:
|
|
68
|
+
topic: The Wikipedia topic to read
|
|
69
|
+
|
|
70
|
+
Returns:
|
|
71
|
+
A list of documents containing the Wikipedia summary
|
|
72
|
+
"""
|
|
73
|
+
log_debug(f"Async reading Wikipedia topic: {topic}")
|
|
74
|
+
summary = None
|
|
75
|
+
try:
|
|
76
|
+
# Run the synchronous wikipedia API call in a thread pool
|
|
77
|
+
summary = await asyncio.to_thread(wikipedia.summary, topic, auto_suggest=self.auto_suggest)
|
|
78
|
+
|
|
79
|
+
except wikipedia.exceptions.PageError:
|
|
80
|
+
summary = None
|
|
81
|
+
log_info("Wikipedia Error: Page not found.")
|
|
49
82
|
|
|
50
83
|
# Only create Document if we successfully got a summary
|
|
51
84
|
if summary:
|
|
@@ -23,10 +23,11 @@ class YouTubeReader(Reader):
|
|
|
23
23
|
super().__init__(chunking_strategy=chunking_strategy, **kwargs)
|
|
24
24
|
|
|
25
25
|
@classmethod
|
|
26
|
-
def get_supported_chunking_strategies(
|
|
26
|
+
def get_supported_chunking_strategies(cls) -> List[ChunkingStrategyType]:
|
|
27
27
|
"""Get the list of supported chunking strategies for YouTube readers."""
|
|
28
28
|
return [
|
|
29
29
|
ChunkingStrategyType.RECURSIVE_CHUNKER,
|
|
30
|
+
ChunkingStrategyType.CODE_CHUNKER,
|
|
30
31
|
ChunkingStrategyType.AGENTIC_CHUNKER,
|
|
31
32
|
ChunkingStrategyType.DOCUMENT_CHUNKER,
|
|
32
33
|
ChunkingStrategyType.SEMANTIC_CHUNKER,
|
|
@@ -34,7 +35,7 @@ class YouTubeReader(Reader):
|
|
|
34
35
|
]
|
|
35
36
|
|
|
36
37
|
@classmethod
|
|
37
|
-
def get_supported_content_types(
|
|
38
|
+
def get_supported_content_types(cls) -> List[ContentType]:
|
|
38
39
|
return [ContentType.YOUTUBE]
|
|
39
40
|
|
|
40
41
|
def read(self, url: str, name: Optional[str] = None) -> List[Document]:
|
|
@@ -0,0 +1,33 @@
|
|
|
1
|
+
from agno.knowledge.remote_content.config import (
|
|
2
|
+
AzureBlobConfig,
|
|
3
|
+
GcsConfig,
|
|
4
|
+
GitHubConfig,
|
|
5
|
+
RemoteContentConfig,
|
|
6
|
+
S3Config,
|
|
7
|
+
SharePointConfig,
|
|
8
|
+
)
|
|
9
|
+
from agno.knowledge.remote_content.remote_content import (
|
|
10
|
+
AzureBlobContent,
|
|
11
|
+
GCSContent,
|
|
12
|
+
GitHubContent,
|
|
13
|
+
RemoteContent,
|
|
14
|
+
S3Content,
|
|
15
|
+
SharePointContent,
|
|
16
|
+
)
|
|
17
|
+
|
|
18
|
+
__all__ = [
|
|
19
|
+
# Config classes
|
|
20
|
+
"RemoteContentConfig",
|
|
21
|
+
"S3Config",
|
|
22
|
+
"GcsConfig",
|
|
23
|
+
"SharePointConfig",
|
|
24
|
+
"GitHubConfig",
|
|
25
|
+
"AzureBlobConfig",
|
|
26
|
+
# Content classes
|
|
27
|
+
"RemoteContent",
|
|
28
|
+
"S3Content",
|
|
29
|
+
"GCSContent",
|
|
30
|
+
"SharePointContent",
|
|
31
|
+
"GitHubContent",
|
|
32
|
+
"AzureBlobContent",
|
|
33
|
+
]
|