agno 2.2.13__py3-none-any.whl → 2.4.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- agno/agent/__init__.py +6 -0
- agno/agent/agent.py +5252 -3145
- agno/agent/remote.py +525 -0
- agno/api/api.py +2 -0
- agno/client/__init__.py +3 -0
- agno/client/a2a/__init__.py +10 -0
- agno/client/a2a/client.py +554 -0
- agno/client/a2a/schemas.py +112 -0
- agno/client/a2a/utils.py +369 -0
- agno/client/os.py +2669 -0
- agno/compression/__init__.py +3 -0
- agno/compression/manager.py +247 -0
- agno/culture/manager.py +2 -2
- agno/db/base.py +927 -6
- agno/db/dynamo/dynamo.py +788 -2
- agno/db/dynamo/schemas.py +128 -0
- agno/db/dynamo/utils.py +26 -3
- agno/db/firestore/firestore.py +674 -50
- agno/db/firestore/schemas.py +41 -0
- agno/db/firestore/utils.py +25 -10
- agno/db/gcs_json/gcs_json_db.py +506 -3
- agno/db/gcs_json/utils.py +14 -2
- agno/db/in_memory/in_memory_db.py +203 -4
- agno/db/in_memory/utils.py +14 -2
- agno/db/json/json_db.py +498 -2
- agno/db/json/utils.py +14 -2
- agno/db/migrations/manager.py +199 -0
- agno/db/migrations/utils.py +19 -0
- agno/db/migrations/v1_to_v2.py +54 -16
- agno/db/migrations/versions/__init__.py +0 -0
- agno/db/migrations/versions/v2_3_0.py +977 -0
- agno/db/mongo/async_mongo.py +1013 -39
- agno/db/mongo/mongo.py +684 -4
- agno/db/mongo/schemas.py +48 -0
- agno/db/mongo/utils.py +17 -0
- agno/db/mysql/__init__.py +2 -1
- agno/db/mysql/async_mysql.py +2958 -0
- agno/db/mysql/mysql.py +722 -53
- agno/db/mysql/schemas.py +77 -11
- agno/db/mysql/utils.py +151 -8
- agno/db/postgres/async_postgres.py +1254 -137
- agno/db/postgres/postgres.py +2316 -93
- agno/db/postgres/schemas.py +153 -21
- agno/db/postgres/utils.py +22 -7
- agno/db/redis/redis.py +531 -3
- agno/db/redis/schemas.py +36 -0
- agno/db/redis/utils.py +31 -15
- agno/db/schemas/evals.py +1 -0
- agno/db/schemas/memory.py +20 -9
- agno/db/singlestore/schemas.py +70 -1
- agno/db/singlestore/singlestore.py +737 -74
- agno/db/singlestore/utils.py +13 -3
- agno/db/sqlite/async_sqlite.py +1069 -89
- agno/db/sqlite/schemas.py +133 -1
- agno/db/sqlite/sqlite.py +2203 -165
- agno/db/sqlite/utils.py +21 -11
- agno/db/surrealdb/models.py +25 -0
- agno/db/surrealdb/surrealdb.py +603 -1
- agno/db/utils.py +60 -0
- agno/eval/__init__.py +26 -3
- agno/eval/accuracy.py +25 -12
- agno/eval/agent_as_judge.py +871 -0
- agno/eval/base.py +29 -0
- agno/eval/performance.py +10 -4
- agno/eval/reliability.py +22 -13
- agno/eval/utils.py +2 -1
- agno/exceptions.py +42 -0
- agno/hooks/__init__.py +3 -0
- agno/hooks/decorator.py +164 -0
- agno/integrations/discord/client.py +13 -2
- agno/knowledge/__init__.py +4 -0
- agno/knowledge/chunking/code.py +90 -0
- agno/knowledge/chunking/document.py +65 -4
- agno/knowledge/chunking/fixed.py +4 -1
- agno/knowledge/chunking/markdown.py +102 -11
- agno/knowledge/chunking/recursive.py +2 -2
- agno/knowledge/chunking/semantic.py +130 -48
- agno/knowledge/chunking/strategy.py +18 -0
- agno/knowledge/embedder/azure_openai.py +0 -1
- agno/knowledge/embedder/google.py +1 -1
- agno/knowledge/embedder/mistral.py +1 -1
- agno/knowledge/embedder/nebius.py +1 -1
- agno/knowledge/embedder/openai.py +16 -12
- agno/knowledge/filesystem.py +412 -0
- agno/knowledge/knowledge.py +4261 -1199
- agno/knowledge/protocol.py +134 -0
- agno/knowledge/reader/arxiv_reader.py +3 -2
- agno/knowledge/reader/base.py +9 -7
- agno/knowledge/reader/csv_reader.py +91 -42
- agno/knowledge/reader/docx_reader.py +9 -10
- agno/knowledge/reader/excel_reader.py +225 -0
- agno/knowledge/reader/field_labeled_csv_reader.py +38 -48
- agno/knowledge/reader/firecrawl_reader.py +3 -2
- agno/knowledge/reader/json_reader.py +16 -22
- agno/knowledge/reader/markdown_reader.py +15 -14
- agno/knowledge/reader/pdf_reader.py +33 -28
- agno/knowledge/reader/pptx_reader.py +9 -10
- agno/knowledge/reader/reader_factory.py +135 -1
- agno/knowledge/reader/s3_reader.py +8 -16
- agno/knowledge/reader/tavily_reader.py +3 -3
- agno/knowledge/reader/text_reader.py +15 -14
- agno/knowledge/reader/utils/__init__.py +17 -0
- agno/knowledge/reader/utils/spreadsheet.py +114 -0
- agno/knowledge/reader/web_search_reader.py +8 -65
- agno/knowledge/reader/website_reader.py +16 -13
- agno/knowledge/reader/wikipedia_reader.py +36 -3
- agno/knowledge/reader/youtube_reader.py +3 -2
- agno/knowledge/remote_content/__init__.py +33 -0
- agno/knowledge/remote_content/config.py +266 -0
- agno/knowledge/remote_content/remote_content.py +105 -17
- agno/knowledge/utils.py +76 -22
- agno/learn/__init__.py +71 -0
- agno/learn/config.py +463 -0
- agno/learn/curate.py +185 -0
- agno/learn/machine.py +725 -0
- agno/learn/schemas.py +1114 -0
- agno/learn/stores/__init__.py +38 -0
- agno/learn/stores/decision_log.py +1156 -0
- agno/learn/stores/entity_memory.py +3275 -0
- agno/learn/stores/learned_knowledge.py +1583 -0
- agno/learn/stores/protocol.py +117 -0
- agno/learn/stores/session_context.py +1217 -0
- agno/learn/stores/user_memory.py +1495 -0
- agno/learn/stores/user_profile.py +1220 -0
- agno/learn/utils.py +209 -0
- agno/media.py +22 -6
- agno/memory/__init__.py +14 -1
- agno/memory/manager.py +223 -8
- agno/memory/strategies/__init__.py +15 -0
- agno/memory/strategies/base.py +66 -0
- agno/memory/strategies/summarize.py +196 -0
- agno/memory/strategies/types.py +37 -0
- agno/models/aimlapi/aimlapi.py +17 -0
- agno/models/anthropic/claude.py +434 -59
- agno/models/aws/bedrock.py +121 -20
- agno/models/aws/claude.py +131 -274
- agno/models/azure/ai_foundry.py +10 -6
- agno/models/azure/openai_chat.py +33 -10
- agno/models/base.py +1162 -561
- agno/models/cerebras/cerebras.py +120 -24
- agno/models/cerebras/cerebras_openai.py +21 -2
- agno/models/cohere/chat.py +65 -6
- agno/models/cometapi/cometapi.py +18 -1
- agno/models/dashscope/dashscope.py +2 -3
- agno/models/deepinfra/deepinfra.py +18 -1
- agno/models/deepseek/deepseek.py +69 -3
- agno/models/fireworks/fireworks.py +18 -1
- agno/models/google/gemini.py +959 -89
- agno/models/google/utils.py +22 -0
- agno/models/groq/groq.py +48 -18
- agno/models/huggingface/huggingface.py +17 -6
- agno/models/ibm/watsonx.py +16 -6
- agno/models/internlm/internlm.py +18 -1
- agno/models/langdb/langdb.py +13 -1
- agno/models/litellm/chat.py +88 -9
- agno/models/litellm/litellm_openai.py +18 -1
- agno/models/message.py +24 -5
- agno/models/meta/llama.py +40 -13
- agno/models/meta/llama_openai.py +22 -21
- agno/models/metrics.py +12 -0
- agno/models/mistral/mistral.py +8 -4
- agno/models/n1n/__init__.py +3 -0
- agno/models/n1n/n1n.py +57 -0
- agno/models/nebius/nebius.py +6 -7
- agno/models/nvidia/nvidia.py +20 -3
- agno/models/ollama/__init__.py +2 -0
- agno/models/ollama/chat.py +17 -6
- agno/models/ollama/responses.py +100 -0
- agno/models/openai/__init__.py +2 -0
- agno/models/openai/chat.py +117 -26
- agno/models/openai/open_responses.py +46 -0
- agno/models/openai/responses.py +110 -32
- agno/models/openrouter/__init__.py +2 -0
- agno/models/openrouter/openrouter.py +67 -2
- agno/models/openrouter/responses.py +146 -0
- agno/models/perplexity/perplexity.py +19 -1
- agno/models/portkey/portkey.py +7 -6
- agno/models/requesty/requesty.py +19 -2
- agno/models/response.py +20 -2
- agno/models/sambanova/sambanova.py +20 -3
- agno/models/siliconflow/siliconflow.py +19 -2
- agno/models/together/together.py +20 -3
- agno/models/vercel/v0.py +20 -3
- agno/models/vertexai/claude.py +124 -4
- agno/models/vllm/vllm.py +19 -14
- agno/models/xai/xai.py +19 -2
- agno/os/app.py +467 -137
- agno/os/auth.py +253 -5
- agno/os/config.py +22 -0
- agno/os/interfaces/a2a/a2a.py +7 -6
- agno/os/interfaces/a2a/router.py +635 -26
- agno/os/interfaces/a2a/utils.py +32 -33
- agno/os/interfaces/agui/agui.py +5 -3
- agno/os/interfaces/agui/router.py +26 -16
- agno/os/interfaces/agui/utils.py +97 -57
- agno/os/interfaces/base.py +7 -7
- agno/os/interfaces/slack/router.py +16 -7
- agno/os/interfaces/slack/slack.py +7 -7
- agno/os/interfaces/whatsapp/router.py +35 -7
- agno/os/interfaces/whatsapp/security.py +3 -1
- agno/os/interfaces/whatsapp/whatsapp.py +11 -8
- agno/os/managers.py +326 -0
- agno/os/mcp.py +652 -79
- agno/os/middleware/__init__.py +4 -0
- agno/os/middleware/jwt.py +718 -115
- agno/os/middleware/trailing_slash.py +27 -0
- agno/os/router.py +105 -1558
- agno/os/routers/agents/__init__.py +3 -0
- agno/os/routers/agents/router.py +655 -0
- agno/os/routers/agents/schema.py +288 -0
- agno/os/routers/components/__init__.py +3 -0
- agno/os/routers/components/components.py +475 -0
- agno/os/routers/database.py +155 -0
- agno/os/routers/evals/evals.py +111 -18
- agno/os/routers/evals/schemas.py +38 -5
- agno/os/routers/evals/utils.py +80 -11
- agno/os/routers/health.py +3 -3
- agno/os/routers/knowledge/knowledge.py +284 -35
- agno/os/routers/knowledge/schemas.py +14 -2
- agno/os/routers/memory/memory.py +274 -11
- agno/os/routers/memory/schemas.py +44 -3
- agno/os/routers/metrics/metrics.py +30 -15
- agno/os/routers/metrics/schemas.py +10 -6
- agno/os/routers/registry/__init__.py +3 -0
- agno/os/routers/registry/registry.py +337 -0
- agno/os/routers/session/session.py +143 -14
- agno/os/routers/teams/__init__.py +3 -0
- agno/os/routers/teams/router.py +550 -0
- agno/os/routers/teams/schema.py +280 -0
- agno/os/routers/traces/__init__.py +3 -0
- agno/os/routers/traces/schemas.py +414 -0
- agno/os/routers/traces/traces.py +549 -0
- agno/os/routers/workflows/__init__.py +3 -0
- agno/os/routers/workflows/router.py +757 -0
- agno/os/routers/workflows/schema.py +139 -0
- agno/os/schema.py +157 -584
- agno/os/scopes.py +469 -0
- agno/os/settings.py +3 -0
- agno/os/utils.py +574 -185
- agno/reasoning/anthropic.py +85 -1
- agno/reasoning/azure_ai_foundry.py +93 -1
- agno/reasoning/deepseek.py +102 -2
- agno/reasoning/default.py +6 -7
- agno/reasoning/gemini.py +87 -3
- agno/reasoning/groq.py +109 -2
- agno/reasoning/helpers.py +6 -7
- agno/reasoning/manager.py +1238 -0
- agno/reasoning/ollama.py +93 -1
- agno/reasoning/openai.py +115 -1
- agno/reasoning/vertexai.py +85 -1
- agno/registry/__init__.py +3 -0
- agno/registry/registry.py +68 -0
- agno/remote/__init__.py +3 -0
- agno/remote/base.py +581 -0
- agno/run/__init__.py +2 -4
- agno/run/agent.py +134 -19
- agno/run/base.py +49 -1
- agno/run/cancel.py +65 -52
- agno/run/cancellation_management/__init__.py +9 -0
- agno/run/cancellation_management/base.py +78 -0
- agno/run/cancellation_management/in_memory_cancellation_manager.py +100 -0
- agno/run/cancellation_management/redis_cancellation_manager.py +236 -0
- agno/run/requirement.py +181 -0
- agno/run/team.py +111 -19
- agno/run/workflow.py +2 -1
- agno/session/agent.py +57 -92
- agno/session/summary.py +1 -1
- agno/session/team.py +62 -115
- agno/session/workflow.py +353 -57
- agno/skills/__init__.py +17 -0
- agno/skills/agent_skills.py +377 -0
- agno/skills/errors.py +32 -0
- agno/skills/loaders/__init__.py +4 -0
- agno/skills/loaders/base.py +27 -0
- agno/skills/loaders/local.py +216 -0
- agno/skills/skill.py +65 -0
- agno/skills/utils.py +107 -0
- agno/skills/validator.py +277 -0
- agno/table.py +10 -0
- agno/team/__init__.py +5 -1
- agno/team/remote.py +447 -0
- agno/team/team.py +3769 -2202
- agno/tools/brandfetch.py +27 -18
- agno/tools/browserbase.py +225 -16
- agno/tools/crawl4ai.py +3 -0
- agno/tools/duckduckgo.py +25 -71
- agno/tools/exa.py +0 -21
- agno/tools/file.py +14 -13
- agno/tools/file_generation.py +12 -6
- agno/tools/firecrawl.py +15 -7
- agno/tools/function.py +94 -113
- agno/tools/google_bigquery.py +11 -2
- agno/tools/google_drive.py +4 -3
- agno/tools/knowledge.py +9 -4
- agno/tools/mcp/mcp.py +301 -18
- agno/tools/mcp/multi_mcp.py +269 -14
- agno/tools/mem0.py +11 -10
- agno/tools/memory.py +47 -46
- agno/tools/mlx_transcribe.py +10 -7
- agno/tools/models/nebius.py +5 -5
- agno/tools/models_labs.py +20 -10
- agno/tools/nano_banana.py +151 -0
- agno/tools/parallel.py +0 -7
- agno/tools/postgres.py +76 -36
- agno/tools/python.py +14 -6
- agno/tools/reasoning.py +30 -23
- agno/tools/redshift.py +406 -0
- agno/tools/shopify.py +1519 -0
- agno/tools/spotify.py +919 -0
- agno/tools/tavily.py +4 -1
- agno/tools/toolkit.py +253 -18
- agno/tools/websearch.py +93 -0
- agno/tools/website.py +1 -1
- agno/tools/wikipedia.py +1 -1
- agno/tools/workflow.py +56 -48
- agno/tools/yfinance.py +12 -11
- agno/tracing/__init__.py +12 -0
- agno/tracing/exporter.py +161 -0
- agno/tracing/schemas.py +276 -0
- agno/tracing/setup.py +112 -0
- agno/utils/agent.py +251 -10
- agno/utils/cryptography.py +22 -0
- agno/utils/dttm.py +33 -0
- agno/utils/events.py +264 -7
- agno/utils/hooks.py +111 -3
- agno/utils/http.py +161 -2
- agno/utils/mcp.py +49 -8
- agno/utils/media.py +22 -1
- agno/utils/models/ai_foundry.py +9 -2
- agno/utils/models/claude.py +20 -5
- agno/utils/models/cohere.py +9 -2
- agno/utils/models/llama.py +9 -2
- agno/utils/models/mistral.py +4 -2
- agno/utils/os.py +0 -0
- agno/utils/print_response/agent.py +99 -16
- agno/utils/print_response/team.py +223 -24
- agno/utils/print_response/workflow.py +0 -2
- agno/utils/prompts.py +8 -6
- agno/utils/remote.py +23 -0
- agno/utils/response.py +1 -13
- agno/utils/string.py +91 -2
- agno/utils/team.py +62 -12
- agno/utils/tokens.py +657 -0
- agno/vectordb/base.py +15 -2
- agno/vectordb/cassandra/cassandra.py +1 -1
- agno/vectordb/chroma/__init__.py +2 -1
- agno/vectordb/chroma/chromadb.py +468 -23
- agno/vectordb/clickhouse/clickhousedb.py +1 -1
- agno/vectordb/couchbase/couchbase.py +6 -2
- agno/vectordb/lancedb/lance_db.py +7 -38
- agno/vectordb/lightrag/lightrag.py +7 -6
- agno/vectordb/milvus/milvus.py +118 -84
- agno/vectordb/mongodb/__init__.py +2 -1
- agno/vectordb/mongodb/mongodb.py +14 -31
- agno/vectordb/pgvector/pgvector.py +120 -66
- agno/vectordb/pineconedb/pineconedb.py +2 -19
- agno/vectordb/qdrant/__init__.py +2 -1
- agno/vectordb/qdrant/qdrant.py +33 -56
- agno/vectordb/redis/__init__.py +2 -1
- agno/vectordb/redis/redisdb.py +19 -31
- agno/vectordb/singlestore/singlestore.py +17 -9
- agno/vectordb/surrealdb/surrealdb.py +2 -38
- agno/vectordb/weaviate/__init__.py +2 -1
- agno/vectordb/weaviate/weaviate.py +7 -3
- agno/workflow/__init__.py +5 -1
- agno/workflow/agent.py +2 -2
- agno/workflow/condition.py +12 -10
- agno/workflow/loop.py +28 -9
- agno/workflow/parallel.py +21 -13
- agno/workflow/remote.py +362 -0
- agno/workflow/router.py +12 -9
- agno/workflow/step.py +261 -36
- agno/workflow/steps.py +12 -8
- agno/workflow/types.py +40 -77
- agno/workflow/workflow.py +939 -213
- {agno-2.2.13.dist-info → agno-2.4.3.dist-info}/METADATA +134 -181
- agno-2.4.3.dist-info/RECORD +677 -0
- {agno-2.2.13.dist-info → agno-2.4.3.dist-info}/WHEEL +1 -1
- agno/tools/googlesearch.py +0 -98
- agno/tools/memori.py +0 -339
- agno-2.2.13.dist-info/RECORD +0 -575
- {agno-2.2.13.dist-info → agno-2.4.3.dist-info}/licenses/LICENSE +0 -0
- {agno-2.2.13.dist-info → agno-2.4.3.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,225 @@
|
|
|
1
|
+
import asyncio
|
|
2
|
+
import io
|
|
3
|
+
from pathlib import Path
|
|
4
|
+
from typing import IO, Any, Iterable, List, Optional, Sequence, Tuple, Union
|
|
5
|
+
|
|
6
|
+
from agno.knowledge.chunking.row import RowChunking
|
|
7
|
+
from agno.knowledge.chunking.strategy import ChunkingStrategy, ChunkingStrategyType
|
|
8
|
+
from agno.knowledge.document.base import Document
|
|
9
|
+
from agno.knowledge.reader.base import Reader
|
|
10
|
+
from agno.knowledge.reader.utils import (
|
|
11
|
+
convert_xls_cell_value,
|
|
12
|
+
excel_rows_to_documents,
|
|
13
|
+
get_workbook_name,
|
|
14
|
+
infer_file_extension,
|
|
15
|
+
)
|
|
16
|
+
from agno.knowledge.types import ContentType
|
|
17
|
+
from agno.utils.log import log_debug, log_error
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
class ExcelReader(Reader):
|
|
21
|
+
"""Reader for Excel files (.xlsx and .xls)."""
|
|
22
|
+
|
|
23
|
+
def __init__(
|
|
24
|
+
self,
|
|
25
|
+
sheets: Optional[List[Union[str, int]]] = None,
|
|
26
|
+
chunking_strategy: Optional[ChunkingStrategy] = RowChunking(),
|
|
27
|
+
**kwargs,
|
|
28
|
+
):
|
|
29
|
+
super().__init__(chunking_strategy=chunking_strategy, **kwargs)
|
|
30
|
+
self.sheets = sheets
|
|
31
|
+
|
|
32
|
+
@classmethod
|
|
33
|
+
def get_supported_chunking_strategies(cls) -> List[ChunkingStrategyType]:
|
|
34
|
+
"""Get the list of supported chunking strategies for Excel readers."""
|
|
35
|
+
return [
|
|
36
|
+
ChunkingStrategyType.ROW_CHUNKER,
|
|
37
|
+
ChunkingStrategyType.CODE_CHUNKER,
|
|
38
|
+
ChunkingStrategyType.FIXED_SIZE_CHUNKER,
|
|
39
|
+
ChunkingStrategyType.AGENTIC_CHUNKER,
|
|
40
|
+
ChunkingStrategyType.DOCUMENT_CHUNKER,
|
|
41
|
+
ChunkingStrategyType.RECURSIVE_CHUNKER,
|
|
42
|
+
]
|
|
43
|
+
|
|
44
|
+
@classmethod
|
|
45
|
+
def get_supported_content_types(cls) -> List[ContentType]:
|
|
46
|
+
"""Get the list of supported content types."""
|
|
47
|
+
return [ContentType.XLSX, ContentType.XLS]
|
|
48
|
+
|
|
49
|
+
def _should_include_sheet(
|
|
50
|
+
self,
|
|
51
|
+
sheet_name: str,
|
|
52
|
+
sheet_index: int,
|
|
53
|
+
) -> bool:
|
|
54
|
+
"""Check if sheet passes the configured filters.
|
|
55
|
+
|
|
56
|
+
Args:
|
|
57
|
+
sheet_name: Name of the sheet
|
|
58
|
+
sheet_index: 1-based index of the sheet (matches document metadata)
|
|
59
|
+
|
|
60
|
+
Returns:
|
|
61
|
+
True if sheet should be included, False otherwise.
|
|
62
|
+
|
|
63
|
+
Note:
|
|
64
|
+
- Index filtering is 1-based to match sheet_index in document metadata
|
|
65
|
+
- Name filtering is case-insensitive
|
|
66
|
+
- Empty list or None means include all sheets
|
|
67
|
+
"""
|
|
68
|
+
# None or empty list = include all sheets
|
|
69
|
+
if not self.sheets:
|
|
70
|
+
return True
|
|
71
|
+
|
|
72
|
+
for sheet_filter in self.sheets:
|
|
73
|
+
if isinstance(sheet_filter, int):
|
|
74
|
+
# 1-based indexing to match metadata
|
|
75
|
+
if sheet_index == sheet_filter:
|
|
76
|
+
return True
|
|
77
|
+
elif isinstance(sheet_filter, str):
|
|
78
|
+
# Case-insensitive name matching
|
|
79
|
+
if sheet_name.lower() == sheet_filter.lower():
|
|
80
|
+
return True
|
|
81
|
+
|
|
82
|
+
return False
|
|
83
|
+
|
|
84
|
+
def _read_xlsx(self, file: Union[Path, IO[Any]], *, workbook_name: str) -> List[Document]:
|
|
85
|
+
"""Read .xlsx file using openpyxl."""
|
|
86
|
+
try:
|
|
87
|
+
import openpyxl
|
|
88
|
+
except ImportError as e:
|
|
89
|
+
raise ImportError("`openpyxl` not installed. Please install it via `pip install openpyxl`.") from e
|
|
90
|
+
|
|
91
|
+
if isinstance(file, Path):
|
|
92
|
+
workbook = openpyxl.load_workbook(filename=str(file), read_only=True, data_only=True)
|
|
93
|
+
else:
|
|
94
|
+
file.seek(0)
|
|
95
|
+
raw = file.read()
|
|
96
|
+
if isinstance(raw, str):
|
|
97
|
+
raw = raw.encode("utf-8", errors="replace")
|
|
98
|
+
workbook = openpyxl.load_workbook(filename=io.BytesIO(raw), read_only=True, data_only=True)
|
|
99
|
+
|
|
100
|
+
try:
|
|
101
|
+
sheets: List[Tuple[str, int, Iterable[Sequence[Any]]]] = []
|
|
102
|
+
for sheet_index, worksheet in enumerate(workbook.worksheets):
|
|
103
|
+
# Pass 1-based index to match metadata (sheet_index + 1)
|
|
104
|
+
if not self._should_include_sheet(worksheet.title, sheet_index + 1):
|
|
105
|
+
log_debug(f"Skipping sheet '{worksheet.title}' (filtered out)")
|
|
106
|
+
continue
|
|
107
|
+
|
|
108
|
+
sheets.append((worksheet.title, sheet_index + 1, worksheet.iter_rows(values_only=True)))
|
|
109
|
+
|
|
110
|
+
return excel_rows_to_documents(workbook_name=workbook_name, sheets=sheets)
|
|
111
|
+
finally:
|
|
112
|
+
workbook.close()
|
|
113
|
+
|
|
114
|
+
def _read_xls(self, file: Union[Path, IO[Any]], *, workbook_name: str) -> List[Document]:
|
|
115
|
+
"""Read .xls file using xlrd."""
|
|
116
|
+
try:
|
|
117
|
+
import xlrd
|
|
118
|
+
except ImportError as e:
|
|
119
|
+
raise ImportError("`xlrd` not installed. Please install it via `pip install xlrd`.") from e
|
|
120
|
+
|
|
121
|
+
if isinstance(file, Path):
|
|
122
|
+
workbook = xlrd.open_workbook(filename=str(file), encoding_override=self.encoding)
|
|
123
|
+
else:
|
|
124
|
+
file.seek(0)
|
|
125
|
+
raw = file.read()
|
|
126
|
+
if isinstance(raw, str):
|
|
127
|
+
raw = raw.encode("utf-8", errors="replace")
|
|
128
|
+
workbook = xlrd.open_workbook(file_contents=raw, encoding_override=self.encoding)
|
|
129
|
+
|
|
130
|
+
sheets: List[Tuple[str, int, Iterable[Sequence[Any]]]] = []
|
|
131
|
+
for sheet_index in range(workbook.nsheets):
|
|
132
|
+
sheet = workbook.sheet_by_index(sheet_index)
|
|
133
|
+
|
|
134
|
+
# Pass 1-based index to match metadata (sheet_index + 1)
|
|
135
|
+
if not self._should_include_sheet(sheet.name, sheet_index + 1):
|
|
136
|
+
log_debug(f"Skipping sheet '{sheet.name}' (filtered out)")
|
|
137
|
+
continue
|
|
138
|
+
|
|
139
|
+
def _iter_sheet_rows(_sheet: Any = sheet, _datemode: int = workbook.datemode) -> Iterable[Sequence[Any]]:
|
|
140
|
+
for row_index in range(_sheet.nrows):
|
|
141
|
+
yield [
|
|
142
|
+
convert_xls_cell_value(
|
|
143
|
+
_sheet.cell_value(row_index, col_index),
|
|
144
|
+
_sheet.cell_type(row_index, col_index),
|
|
145
|
+
_datemode,
|
|
146
|
+
)
|
|
147
|
+
for col_index in range(_sheet.ncols)
|
|
148
|
+
]
|
|
149
|
+
|
|
150
|
+
sheets.append((sheet.name, sheet_index + 1, _iter_sheet_rows()))
|
|
151
|
+
|
|
152
|
+
return excel_rows_to_documents(workbook_name=workbook_name, sheets=sheets)
|
|
153
|
+
|
|
154
|
+
def read(
|
|
155
|
+
self,
|
|
156
|
+
file: Union[Path, IO[Any]],
|
|
157
|
+
name: Optional[str] = None,
|
|
158
|
+
) -> List[Document]:
|
|
159
|
+
"""Read an Excel file and return documents (one per sheet)."""
|
|
160
|
+
try:
|
|
161
|
+
file_extension = infer_file_extension(file, name)
|
|
162
|
+
workbook_name = get_workbook_name(file, name)
|
|
163
|
+
|
|
164
|
+
if isinstance(file, Path) and not file.exists():
|
|
165
|
+
raise FileNotFoundError(f"Could not find file: {file}")
|
|
166
|
+
|
|
167
|
+
file_desc = str(file) if isinstance(file, Path) else getattr(file, "name", "BytesIO")
|
|
168
|
+
log_debug(f"Reading Excel file: {file_desc}")
|
|
169
|
+
|
|
170
|
+
if file_extension == ContentType.XLSX or file_extension == ".xlsx":
|
|
171
|
+
documents = self._read_xlsx(file, workbook_name=workbook_name)
|
|
172
|
+
elif file_extension == ContentType.XLS or file_extension == ".xls":
|
|
173
|
+
documents = self._read_xls(file, workbook_name=workbook_name)
|
|
174
|
+
else:
|
|
175
|
+
raise ValueError(f"Unsupported file extension: '{file_extension}'. Expected .xlsx or .xls")
|
|
176
|
+
|
|
177
|
+
if self.chunk:
|
|
178
|
+
chunked_documents = []
|
|
179
|
+
for document in documents:
|
|
180
|
+
chunked_documents.extend(self.chunk_document(document))
|
|
181
|
+
return chunked_documents
|
|
182
|
+
|
|
183
|
+
return documents
|
|
184
|
+
|
|
185
|
+
except (FileNotFoundError, ImportError, ValueError):
|
|
186
|
+
raise
|
|
187
|
+
except Exception as e:
|
|
188
|
+
file_desc = getattr(file, "name", str(file)) if isinstance(file, IO) else file
|
|
189
|
+
log_error(f"Error reading {file_desc}: {e}")
|
|
190
|
+
return []
|
|
191
|
+
|
|
192
|
+
async def async_read(
|
|
193
|
+
self,
|
|
194
|
+
file: Union[Path, IO[Any]],
|
|
195
|
+
name: Optional[str] = None,
|
|
196
|
+
) -> List[Document]:
|
|
197
|
+
"""Async version of read()."""
|
|
198
|
+
try:
|
|
199
|
+
file_extension = infer_file_extension(file, name)
|
|
200
|
+
workbook_name = get_workbook_name(file, name)
|
|
201
|
+
|
|
202
|
+
if isinstance(file, Path) and not file.exists():
|
|
203
|
+
raise FileNotFoundError(f"Could not find file: {file}")
|
|
204
|
+
|
|
205
|
+
file_desc = str(file) if isinstance(file, Path) else getattr(file, "name", "BytesIO")
|
|
206
|
+
log_debug(f"Reading Excel file async: {file_desc}")
|
|
207
|
+
|
|
208
|
+
if file_extension == ContentType.XLSX or file_extension == ".xlsx":
|
|
209
|
+
documents = await asyncio.to_thread(self._read_xlsx, file, workbook_name=workbook_name)
|
|
210
|
+
elif file_extension == ContentType.XLS or file_extension == ".xls":
|
|
211
|
+
documents = await asyncio.to_thread(self._read_xls, file, workbook_name=workbook_name)
|
|
212
|
+
else:
|
|
213
|
+
raise ValueError(f"Unsupported file extension: '{file_extension}'. Expected .xlsx or .xls")
|
|
214
|
+
|
|
215
|
+
if self.chunk:
|
|
216
|
+
documents = await self.chunk_documents_async(documents)
|
|
217
|
+
|
|
218
|
+
return documents
|
|
219
|
+
|
|
220
|
+
except (FileNotFoundError, ImportError, ValueError):
|
|
221
|
+
raise
|
|
222
|
+
except Exception as e:
|
|
223
|
+
file_desc = getattr(file, "name", str(file)) if isinstance(file, IO) else file
|
|
224
|
+
log_error(f"Error reading {file_desc}: {e}")
|
|
225
|
+
return []
|
|
@@ -12,8 +12,9 @@ except ImportError:
|
|
|
12
12
|
from agno.knowledge.chunking.strategy import ChunkingStrategyType
|
|
13
13
|
from agno.knowledge.document.base import Document
|
|
14
14
|
from agno.knowledge.reader.base import Reader
|
|
15
|
+
from agno.knowledge.reader.utils import stringify_cell_value
|
|
15
16
|
from agno.knowledge.types import ContentType
|
|
16
|
-
from agno.utils.log import
|
|
17
|
+
from agno.utils.log import log_debug, log_error, log_warning
|
|
17
18
|
|
|
18
19
|
|
|
19
20
|
class FieldLabeledCSVReader(Reader):
|
|
@@ -41,7 +42,7 @@ class FieldLabeledCSVReader(Reader):
|
|
|
41
42
|
@classmethod
|
|
42
43
|
def get_supported_content_types(cls) -> List[ContentType]:
|
|
43
44
|
"""Get the list of supported content types."""
|
|
44
|
-
return [ContentType.CSV
|
|
45
|
+
return [ContentType.CSV]
|
|
45
46
|
|
|
46
47
|
def _format_field_name(self, field_name: str) -> str:
|
|
47
48
|
"""Format field name to be more readable."""
|
|
@@ -66,17 +67,7 @@ class FieldLabeledCSVReader(Reader):
|
|
|
66
67
|
return None
|
|
67
68
|
|
|
68
69
|
def _convert_row_to_labeled_text(self, headers: List[str], row: List[str], entry_index: int) -> str:
|
|
69
|
-
"""
|
|
70
|
-
Convert a CSV row to field-labeled text format.
|
|
71
|
-
|
|
72
|
-
Args:
|
|
73
|
-
headers: Column headers
|
|
74
|
-
row: Data row values
|
|
75
|
-
entry_index: Index of this entry (for title rotation)
|
|
76
|
-
|
|
77
|
-
Returns:
|
|
78
|
-
Formatted text with field labels
|
|
79
|
-
"""
|
|
70
|
+
"""Convert a CSV row to field-labeled text format."""
|
|
80
71
|
lines = []
|
|
81
72
|
|
|
82
73
|
title = self._get_title_for_entry(entry_index)
|
|
@@ -84,7 +75,8 @@ class FieldLabeledCSVReader(Reader):
|
|
|
84
75
|
lines.append(title)
|
|
85
76
|
|
|
86
77
|
for i, (header, value) in enumerate(zip(headers, row)):
|
|
87
|
-
|
|
78
|
+
# Normalize line endings before stripping to handle embedded newlines
|
|
79
|
+
clean_value = stringify_cell_value(value).strip() if value else ""
|
|
88
80
|
|
|
89
81
|
if self.skip_empty_fields and not clean_value:
|
|
90
82
|
continue
|
|
@@ -101,22 +93,21 @@ class FieldLabeledCSVReader(Reader):
|
|
|
101
93
|
def read(
|
|
102
94
|
self, file: Union[Path, IO[Any]], delimiter: str = ",", quotechar: str = '"', name: Optional[str] = None
|
|
103
95
|
) -> List[Document]:
|
|
96
|
+
"""Read a CSV file and convert each row to a field-labeled document."""
|
|
104
97
|
try:
|
|
105
98
|
if isinstance(file, Path):
|
|
106
99
|
if not file.exists():
|
|
107
100
|
raise FileNotFoundError(f"Could not find file: {file}")
|
|
108
|
-
|
|
109
|
-
|
|
101
|
+
log_debug(f"Reading: {file}")
|
|
102
|
+
csv_name = name or file.stem
|
|
103
|
+
file_content: Union[io.TextIOWrapper, io.StringIO] = file.open(
|
|
104
|
+
newline="", mode="r", encoding=self.encoding or "utf-8"
|
|
105
|
+
)
|
|
110
106
|
else:
|
|
111
|
-
|
|
107
|
+
log_debug(f"Reading retrieved file: {getattr(file, 'name', 'BytesIO')}")
|
|
108
|
+
csv_name = name or getattr(file, "name", "csv_file").split(".")[0]
|
|
112
109
|
file.seek(0)
|
|
113
|
-
file_content = io.StringIO(file.read().decode("utf-8"))
|
|
114
|
-
|
|
115
|
-
csv_name = name or (
|
|
116
|
-
Path(file.name).stem
|
|
117
|
-
if isinstance(file, Path)
|
|
118
|
-
else (getattr(file, "name", "csv_file").split(".")[0] if hasattr(file, "name") else "csv_file")
|
|
119
|
-
)
|
|
110
|
+
file_content = io.StringIO(file.read().decode(self.encoding or "utf-8"))
|
|
120
111
|
|
|
121
112
|
documents = []
|
|
122
113
|
|
|
@@ -127,15 +118,15 @@ class FieldLabeledCSVReader(Reader):
|
|
|
127
118
|
rows = list(csv_reader)
|
|
128
119
|
|
|
129
120
|
if not rows:
|
|
130
|
-
|
|
121
|
+
log_warning("CSV file is empty")
|
|
131
122
|
return []
|
|
132
123
|
|
|
133
124
|
# First row is headers
|
|
134
125
|
headers = [header.strip() for header in rows[0]]
|
|
135
|
-
|
|
126
|
+
log_debug(f"Found {len(headers)} headers: {headers}")
|
|
136
127
|
|
|
137
128
|
data_rows = rows[1:] if len(rows) > 1 else []
|
|
138
|
-
|
|
129
|
+
log_debug(f"Processing {len(data_rows)} data rows")
|
|
139
130
|
|
|
140
131
|
for row_index, row in enumerate(data_rows):
|
|
141
132
|
# Ensure row has same length as headers (pad or truncate)
|
|
@@ -163,13 +154,15 @@ class FieldLabeledCSVReader(Reader):
|
|
|
163
154
|
)
|
|
164
155
|
|
|
165
156
|
documents.append(document)
|
|
166
|
-
|
|
157
|
+
log_debug(f"Created document for row {row_index + 1}: {len(labeled_text)} chars")
|
|
167
158
|
|
|
168
|
-
|
|
159
|
+
log_debug(f"Successfully created {len(documents)} labeled documents from CSV")
|
|
169
160
|
return documents
|
|
170
161
|
|
|
162
|
+
except FileNotFoundError:
|
|
163
|
+
raise
|
|
171
164
|
except Exception as e:
|
|
172
|
-
|
|
165
|
+
log_error(f"Error reading: {getattr(file, 'name', str(file)) if isinstance(file, IO) else file}: {e}")
|
|
173
166
|
return []
|
|
174
167
|
|
|
175
168
|
async def async_read(
|
|
@@ -180,42 +173,38 @@ class FieldLabeledCSVReader(Reader):
|
|
|
180
173
|
page_size: int = 1000,
|
|
181
174
|
name: Optional[str] = None,
|
|
182
175
|
) -> List[Document]:
|
|
176
|
+
"""Read a CSV file asynchronously and convert each row to a field-labeled document."""
|
|
183
177
|
try:
|
|
184
|
-
# Handle file input
|
|
185
178
|
if isinstance(file, Path):
|
|
186
179
|
if not file.exists():
|
|
187
180
|
raise FileNotFoundError(f"Could not find file: {file}")
|
|
188
|
-
|
|
181
|
+
log_debug(f"Reading async: {file}")
|
|
189
182
|
async with aiofiles.open(file, mode="r", encoding=self.encoding or "utf-8", newline="") as file_content:
|
|
190
183
|
content = await file_content.read()
|
|
191
184
|
file_content_io = io.StringIO(content)
|
|
185
|
+
csv_name = name or file.stem
|
|
192
186
|
else:
|
|
193
|
-
|
|
187
|
+
log_debug(f"Reading retrieved file async: {getattr(file, 'name', 'BytesIO')}")
|
|
188
|
+
csv_name = name or getattr(file, "name", "csv_file").split(".")[0]
|
|
194
189
|
file.seek(0)
|
|
195
|
-
file_content_io = io.StringIO(file.read().decode("utf-8"))
|
|
196
|
-
|
|
197
|
-
csv_name = name or (
|
|
198
|
-
Path(file.name).stem
|
|
199
|
-
if isinstance(file, Path)
|
|
200
|
-
else (getattr(file, "name", "csv_file").split(".")[0] if hasattr(file, "name") else "csv_file")
|
|
201
|
-
)
|
|
190
|
+
file_content_io = io.StringIO(file.read().decode(self.encoding or "utf-8"))
|
|
202
191
|
|
|
203
192
|
file_content_io.seek(0)
|
|
204
193
|
csv_reader = csv.reader(file_content_io, delimiter=delimiter, quotechar=quotechar)
|
|
205
194
|
rows = list(csv_reader)
|
|
206
195
|
|
|
207
196
|
if not rows:
|
|
208
|
-
|
|
197
|
+
log_warning("CSV file is empty")
|
|
209
198
|
return []
|
|
210
199
|
|
|
211
200
|
# First row is headers
|
|
212
201
|
headers = [header.strip() for header in rows[0]]
|
|
213
|
-
|
|
202
|
+
log_debug(f"Found {len(headers)} headers: {headers}")
|
|
214
203
|
|
|
215
204
|
# Process data rows
|
|
216
205
|
data_rows = rows[1:] if len(rows) > 1 else []
|
|
217
206
|
total_rows = len(data_rows)
|
|
218
|
-
|
|
207
|
+
log_debug(f"Processing {total_rows} data rows")
|
|
219
208
|
|
|
220
209
|
# For small files, process all at once
|
|
221
210
|
if total_rows <= 10:
|
|
@@ -241,12 +230,13 @@ class FieldLabeledCSVReader(Reader):
|
|
|
241
230
|
)
|
|
242
231
|
documents.append(document)
|
|
243
232
|
else:
|
|
233
|
+
# Large files: paginate and process in parallel
|
|
244
234
|
pages = []
|
|
245
235
|
for i in range(0, total_rows, page_size):
|
|
246
236
|
pages.append(data_rows[i : i + page_size])
|
|
247
237
|
|
|
248
238
|
async def _process_page(page_number: int, page_rows: List[List[str]]) -> List[Document]:
|
|
249
|
-
"""Process a page of rows into documents"""
|
|
239
|
+
"""Process a page of rows into documents."""
|
|
250
240
|
page_documents = []
|
|
251
241
|
start_row_index = (page_number - 1) * page_size
|
|
252
242
|
|
|
@@ -282,11 +272,11 @@ class FieldLabeledCSVReader(Reader):
|
|
|
282
272
|
|
|
283
273
|
documents = [doc for page_docs in page_results for doc in page_docs]
|
|
284
274
|
|
|
285
|
-
|
|
275
|
+
log_debug(f"Successfully created {len(documents)} labeled documents from CSV")
|
|
286
276
|
return documents
|
|
287
277
|
|
|
278
|
+
except FileNotFoundError:
|
|
279
|
+
raise
|
|
288
280
|
except Exception as e:
|
|
289
|
-
|
|
290
|
-
f"Error reading async: {getattr(file, 'name', str(file)) if isinstance(file, IO) else file}: {e}"
|
|
291
|
-
)
|
|
281
|
+
log_error(f"Error reading async: {getattr(file, 'name', str(file)) if isinstance(file, IO) else file}: {e}")
|
|
292
282
|
return []
|
|
@@ -43,9 +43,10 @@ class FirecrawlReader(Reader):
|
|
|
43
43
|
self.mode = mode
|
|
44
44
|
|
|
45
45
|
@classmethod
|
|
46
|
-
def get_supported_chunking_strategies(
|
|
46
|
+
def get_supported_chunking_strategies(cls) -> List[ChunkingStrategyType]:
|
|
47
47
|
"""Get the list of supported chunking strategies for Firecrawl readers."""
|
|
48
48
|
return [
|
|
49
|
+
ChunkingStrategyType.CODE_CHUNKER,
|
|
49
50
|
ChunkingStrategyType.SEMANTIC_CHUNKER,
|
|
50
51
|
ChunkingStrategyType.FIXED_SIZE_CHUNKER,
|
|
51
52
|
ChunkingStrategyType.AGENTIC_CHUNKER,
|
|
@@ -54,7 +55,7 @@ class FirecrawlReader(Reader):
|
|
|
54
55
|
]
|
|
55
56
|
|
|
56
57
|
@classmethod
|
|
57
|
-
def get_supported_content_types(
|
|
58
|
+
def get_supported_content_types(cls) -> List[ContentType]:
|
|
58
59
|
return [ContentType.URL]
|
|
59
60
|
|
|
60
61
|
def scrape(self, url: str, name: Optional[str] = None) -> List[Document]:
|
|
@@ -1,6 +1,5 @@
|
|
|
1
1
|
import asyncio
|
|
2
2
|
import json
|
|
3
|
-
from io import BytesIO
|
|
4
3
|
from pathlib import Path
|
|
5
4
|
from typing import IO, Any, List, Optional, Union
|
|
6
5
|
from uuid import uuid4
|
|
@@ -10,7 +9,7 @@ from agno.knowledge.chunking.strategy import ChunkingStrategy, ChunkingStrategyT
|
|
|
10
9
|
from agno.knowledge.document.base import Document
|
|
11
10
|
from agno.knowledge.reader.base import Reader
|
|
12
11
|
from agno.knowledge.types import ContentType
|
|
13
|
-
from agno.utils.log import
|
|
12
|
+
from agno.utils.log import log_debug, log_error
|
|
14
13
|
|
|
15
14
|
|
|
16
15
|
class JSONReader(Reader):
|
|
@@ -22,9 +21,10 @@ class JSONReader(Reader):
|
|
|
22
21
|
super().__init__(chunking_strategy=chunking_strategy, **kwargs)
|
|
23
22
|
|
|
24
23
|
@classmethod
|
|
25
|
-
def get_supported_chunking_strategies(
|
|
24
|
+
def get_supported_chunking_strategies(cls) -> List[ChunkingStrategyType]:
|
|
26
25
|
"""Get the list of supported chunking strategies for JSON readers."""
|
|
27
26
|
return [
|
|
27
|
+
ChunkingStrategyType.CODE_CHUNKER,
|
|
28
28
|
ChunkingStrategyType.FIXED_SIZE_CHUNKER,
|
|
29
29
|
ChunkingStrategyType.AGENTIC_CHUNKER,
|
|
30
30
|
ChunkingStrategyType.DOCUMENT_CHUNKER,
|
|
@@ -33,7 +33,7 @@ class JSONReader(Reader):
|
|
|
33
33
|
]
|
|
34
34
|
|
|
35
35
|
@classmethod
|
|
36
|
-
def get_supported_content_types(
|
|
36
|
+
def get_supported_content_types(cls) -> List[ContentType]:
|
|
37
37
|
return [ContentType.JSON]
|
|
38
38
|
|
|
39
39
|
def read(self, path: Union[Path, IO[Any]], name: Optional[str] = None) -> List[Document]:
|
|
@@ -41,18 +41,16 @@ class JSONReader(Reader):
|
|
|
41
41
|
if isinstance(path, Path):
|
|
42
42
|
if not path.exists():
|
|
43
43
|
raise FileNotFoundError(f"Could not find file: {path}")
|
|
44
|
-
|
|
45
|
-
json_name = name or path.
|
|
46
|
-
json_contents = json.loads(path.read_text(self.encoding or "utf-8"))
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
json_name = name or path
|
|
50
|
-
log_info(f"Reading uploaded file: {json_name}")
|
|
44
|
+
log_debug(f"Reading: {path}")
|
|
45
|
+
json_name = name or path.stem
|
|
46
|
+
json_contents = json.loads(path.read_text(encoding=self.encoding or "utf-8"))
|
|
47
|
+
elif hasattr(path, "seek") and hasattr(path, "read"):
|
|
48
|
+
log_debug(f"Reading uploaded file: {getattr(path, 'name', 'BytesIO')}")
|
|
49
|
+
json_name = name or getattr(path, "name", "json_file").split(".")[0]
|
|
51
50
|
path.seek(0)
|
|
52
51
|
json_contents = json.load(path)
|
|
53
|
-
|
|
54
52
|
else:
|
|
55
|
-
raise ValueError("Unsupported file type. Must be Path or
|
|
53
|
+
raise ValueError("Unsupported file type. Must be Path or file-like object.")
|
|
56
54
|
|
|
57
55
|
if isinstance(json_contents, dict):
|
|
58
56
|
json_contents = [json_contents]
|
|
@@ -72,16 +70,12 @@ class JSONReader(Reader):
|
|
|
72
70
|
chunked_documents.extend(self.chunk_document(document))
|
|
73
71
|
return chunked_documents
|
|
74
72
|
return documents
|
|
75
|
-
except
|
|
73
|
+
except (FileNotFoundError, ValueError, json.JSONDecodeError):
|
|
74
|
+
raise
|
|
75
|
+
except Exception as e:
|
|
76
|
+
log_error(f"Error reading: {path}: {e}")
|
|
76
77
|
raise
|
|
77
78
|
|
|
78
79
|
async def async_read(self, path: Union[Path, IO[Any]], name: Optional[str] = None) -> List[Document]:
|
|
79
|
-
"""Asynchronously read JSON files.
|
|
80
|
-
|
|
81
|
-
Args:
|
|
82
|
-
path (Union[Path, IO[Any]]): Path to a JSON file or a file-like object
|
|
83
|
-
|
|
84
|
-
Returns:
|
|
85
|
-
List[Document]: List of documents from the JSON file
|
|
86
|
-
"""
|
|
80
|
+
"""Asynchronously read JSON files."""
|
|
87
81
|
return await asyncio.to_thread(self.read, path, name)
|
|
@@ -7,7 +7,7 @@ from agno.knowledge.chunking.strategy import ChunkingStrategy, ChunkingStrategyT
|
|
|
7
7
|
from agno.knowledge.document.base import Document
|
|
8
8
|
from agno.knowledge.reader.base import Reader
|
|
9
9
|
from agno.knowledge.types import ContentType
|
|
10
|
-
from agno.utils.log import
|
|
10
|
+
from agno.utils.log import log_debug, log_error, log_warning
|
|
11
11
|
|
|
12
12
|
DEFAULT_CHUNKER_STRATEGY: ChunkingStrategy
|
|
13
13
|
|
|
@@ -28,9 +28,10 @@ class MarkdownReader(Reader):
|
|
|
28
28
|
"""Reader for Markdown files"""
|
|
29
29
|
|
|
30
30
|
@classmethod
|
|
31
|
-
def get_supported_chunking_strategies(
|
|
31
|
+
def get_supported_chunking_strategies(cls) -> List[ChunkingStrategyType]:
|
|
32
32
|
"""Get the list of supported chunking strategies for Markdown readers."""
|
|
33
33
|
strategies = [
|
|
34
|
+
ChunkingStrategyType.CODE_CHUNKER,
|
|
34
35
|
ChunkingStrategyType.DOCUMENT_CHUNKER,
|
|
35
36
|
ChunkingStrategyType.AGENTIC_CHUNKER,
|
|
36
37
|
ChunkingStrategyType.RECURSIVE_CHUNKER,
|
|
@@ -45,7 +46,7 @@ class MarkdownReader(Reader):
|
|
|
45
46
|
return strategies
|
|
46
47
|
|
|
47
48
|
@classmethod
|
|
48
|
-
def get_supported_content_types(
|
|
49
|
+
def get_supported_content_types(cls) -> List[ContentType]:
|
|
49
50
|
return [ContentType.MARKDOWN]
|
|
50
51
|
|
|
51
52
|
def __init__(
|
|
@@ -65,12 +66,12 @@ class MarkdownReader(Reader):
|
|
|
65
66
|
if isinstance(file, Path):
|
|
66
67
|
if not file.exists():
|
|
67
68
|
raise FileNotFoundError(f"Could not find file: {file}")
|
|
68
|
-
|
|
69
|
+
log_debug(f"Reading: {file}")
|
|
69
70
|
file_name = name or file.stem
|
|
70
71
|
file_contents = file.read_text(encoding=self.encoding or "utf-8")
|
|
71
72
|
else:
|
|
72
|
-
|
|
73
|
-
file_name = name or file
|
|
73
|
+
log_debug(f"Reading uploaded file: {getattr(file, 'name', 'BytesIO')}")
|
|
74
|
+
file_name = name or getattr(file, "name", "file").split(".")[0]
|
|
74
75
|
file.seek(0)
|
|
75
76
|
file_contents = file.read().decode(self.encoding or "utf-8")
|
|
76
77
|
|
|
@@ -82,7 +83,7 @@ class MarkdownReader(Reader):
|
|
|
82
83
|
return chunked_documents
|
|
83
84
|
return documents
|
|
84
85
|
except Exception as e:
|
|
85
|
-
|
|
86
|
+
log_error(f"Error reading: {file}: {e}")
|
|
86
87
|
return []
|
|
87
88
|
|
|
88
89
|
async def async_read(self, file: Union[Path, IO[Any]], name: Optional[str] = None) -> List[Document]:
|
|
@@ -91,7 +92,7 @@ class MarkdownReader(Reader):
|
|
|
91
92
|
if not file.exists():
|
|
92
93
|
raise FileNotFoundError(f"Could not find file: {file}")
|
|
93
94
|
|
|
94
|
-
|
|
95
|
+
log_debug(f"Reading asynchronously: {file}")
|
|
95
96
|
file_name = name or file.stem
|
|
96
97
|
|
|
97
98
|
try:
|
|
@@ -100,17 +101,17 @@ class MarkdownReader(Reader):
|
|
|
100
101
|
async with aiofiles.open(file, "r", encoding=self.encoding or "utf-8") as f:
|
|
101
102
|
file_contents = await f.read()
|
|
102
103
|
except ImportError:
|
|
103
|
-
|
|
104
|
-
file_contents = file.read_text(self.encoding or "utf-8")
|
|
104
|
+
log_warning("aiofiles not installed, using synchronous file I/O")
|
|
105
|
+
file_contents = file.read_text(encoding=self.encoding or "utf-8")
|
|
105
106
|
else:
|
|
106
|
-
|
|
107
|
-
file_name = name or file
|
|
107
|
+
log_debug(f"Reading uploaded file asynchronously: {getattr(file, 'name', 'BytesIO')}")
|
|
108
|
+
file_name = name or getattr(file, "name", "file").split(".")[0]
|
|
108
109
|
file.seek(0)
|
|
109
110
|
file_contents = file.read().decode(self.encoding or "utf-8")
|
|
110
111
|
|
|
111
112
|
document = Document(
|
|
112
113
|
name=file_name,
|
|
113
|
-
id=str(uuid.uuid4()),
|
|
114
|
+
id=str(uuid.uuid4()),
|
|
114
115
|
content=file_contents,
|
|
115
116
|
)
|
|
116
117
|
|
|
@@ -118,7 +119,7 @@ class MarkdownReader(Reader):
|
|
|
118
119
|
return await self._async_chunk_document(document)
|
|
119
120
|
return [document]
|
|
120
121
|
except Exception as e:
|
|
121
|
-
|
|
122
|
+
log_error(f"Error reading asynchronously: {file}: {e}")
|
|
122
123
|
return []
|
|
123
124
|
|
|
124
125
|
async def _async_chunk_document(self, document: Document) -> List[Document]:
|