agno 2.2.13__py3-none-any.whl → 2.4.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- agno/agent/__init__.py +6 -0
- agno/agent/agent.py +5252 -3145
- agno/agent/remote.py +525 -0
- agno/api/api.py +2 -0
- agno/client/__init__.py +3 -0
- agno/client/a2a/__init__.py +10 -0
- agno/client/a2a/client.py +554 -0
- agno/client/a2a/schemas.py +112 -0
- agno/client/a2a/utils.py +369 -0
- agno/client/os.py +2669 -0
- agno/compression/__init__.py +3 -0
- agno/compression/manager.py +247 -0
- agno/culture/manager.py +2 -2
- agno/db/base.py +927 -6
- agno/db/dynamo/dynamo.py +788 -2
- agno/db/dynamo/schemas.py +128 -0
- agno/db/dynamo/utils.py +26 -3
- agno/db/firestore/firestore.py +674 -50
- agno/db/firestore/schemas.py +41 -0
- agno/db/firestore/utils.py +25 -10
- agno/db/gcs_json/gcs_json_db.py +506 -3
- agno/db/gcs_json/utils.py +14 -2
- agno/db/in_memory/in_memory_db.py +203 -4
- agno/db/in_memory/utils.py +14 -2
- agno/db/json/json_db.py +498 -2
- agno/db/json/utils.py +14 -2
- agno/db/migrations/manager.py +199 -0
- agno/db/migrations/utils.py +19 -0
- agno/db/migrations/v1_to_v2.py +54 -16
- agno/db/migrations/versions/__init__.py +0 -0
- agno/db/migrations/versions/v2_3_0.py +977 -0
- agno/db/mongo/async_mongo.py +1013 -39
- agno/db/mongo/mongo.py +684 -4
- agno/db/mongo/schemas.py +48 -0
- agno/db/mongo/utils.py +17 -0
- agno/db/mysql/__init__.py +2 -1
- agno/db/mysql/async_mysql.py +2958 -0
- agno/db/mysql/mysql.py +722 -53
- agno/db/mysql/schemas.py +77 -11
- agno/db/mysql/utils.py +151 -8
- agno/db/postgres/async_postgres.py +1254 -137
- agno/db/postgres/postgres.py +2316 -93
- agno/db/postgres/schemas.py +153 -21
- agno/db/postgres/utils.py +22 -7
- agno/db/redis/redis.py +531 -3
- agno/db/redis/schemas.py +36 -0
- agno/db/redis/utils.py +31 -15
- agno/db/schemas/evals.py +1 -0
- agno/db/schemas/memory.py +20 -9
- agno/db/singlestore/schemas.py +70 -1
- agno/db/singlestore/singlestore.py +737 -74
- agno/db/singlestore/utils.py +13 -3
- agno/db/sqlite/async_sqlite.py +1069 -89
- agno/db/sqlite/schemas.py +133 -1
- agno/db/sqlite/sqlite.py +2203 -165
- agno/db/sqlite/utils.py +21 -11
- agno/db/surrealdb/models.py +25 -0
- agno/db/surrealdb/surrealdb.py +603 -1
- agno/db/utils.py +60 -0
- agno/eval/__init__.py +26 -3
- agno/eval/accuracy.py +25 -12
- agno/eval/agent_as_judge.py +871 -0
- agno/eval/base.py +29 -0
- agno/eval/performance.py +10 -4
- agno/eval/reliability.py +22 -13
- agno/eval/utils.py +2 -1
- agno/exceptions.py +42 -0
- agno/hooks/__init__.py +3 -0
- agno/hooks/decorator.py +164 -0
- agno/integrations/discord/client.py +13 -2
- agno/knowledge/__init__.py +4 -0
- agno/knowledge/chunking/code.py +90 -0
- agno/knowledge/chunking/document.py +65 -4
- agno/knowledge/chunking/fixed.py +4 -1
- agno/knowledge/chunking/markdown.py +102 -11
- agno/knowledge/chunking/recursive.py +2 -2
- agno/knowledge/chunking/semantic.py +130 -48
- agno/knowledge/chunking/strategy.py +18 -0
- agno/knowledge/embedder/azure_openai.py +0 -1
- agno/knowledge/embedder/google.py +1 -1
- agno/knowledge/embedder/mistral.py +1 -1
- agno/knowledge/embedder/nebius.py +1 -1
- agno/knowledge/embedder/openai.py +16 -12
- agno/knowledge/filesystem.py +412 -0
- agno/knowledge/knowledge.py +4261 -1199
- agno/knowledge/protocol.py +134 -0
- agno/knowledge/reader/arxiv_reader.py +3 -2
- agno/knowledge/reader/base.py +9 -7
- agno/knowledge/reader/csv_reader.py +91 -42
- agno/knowledge/reader/docx_reader.py +9 -10
- agno/knowledge/reader/excel_reader.py +225 -0
- agno/knowledge/reader/field_labeled_csv_reader.py +38 -48
- agno/knowledge/reader/firecrawl_reader.py +3 -2
- agno/knowledge/reader/json_reader.py +16 -22
- agno/knowledge/reader/markdown_reader.py +15 -14
- agno/knowledge/reader/pdf_reader.py +33 -28
- agno/knowledge/reader/pptx_reader.py +9 -10
- agno/knowledge/reader/reader_factory.py +135 -1
- agno/knowledge/reader/s3_reader.py +8 -16
- agno/knowledge/reader/tavily_reader.py +3 -3
- agno/knowledge/reader/text_reader.py +15 -14
- agno/knowledge/reader/utils/__init__.py +17 -0
- agno/knowledge/reader/utils/spreadsheet.py +114 -0
- agno/knowledge/reader/web_search_reader.py +8 -65
- agno/knowledge/reader/website_reader.py +16 -13
- agno/knowledge/reader/wikipedia_reader.py +36 -3
- agno/knowledge/reader/youtube_reader.py +3 -2
- agno/knowledge/remote_content/__init__.py +33 -0
- agno/knowledge/remote_content/config.py +266 -0
- agno/knowledge/remote_content/remote_content.py +105 -17
- agno/knowledge/utils.py +76 -22
- agno/learn/__init__.py +71 -0
- agno/learn/config.py +463 -0
- agno/learn/curate.py +185 -0
- agno/learn/machine.py +725 -0
- agno/learn/schemas.py +1114 -0
- agno/learn/stores/__init__.py +38 -0
- agno/learn/stores/decision_log.py +1156 -0
- agno/learn/stores/entity_memory.py +3275 -0
- agno/learn/stores/learned_knowledge.py +1583 -0
- agno/learn/stores/protocol.py +117 -0
- agno/learn/stores/session_context.py +1217 -0
- agno/learn/stores/user_memory.py +1495 -0
- agno/learn/stores/user_profile.py +1220 -0
- agno/learn/utils.py +209 -0
- agno/media.py +22 -6
- agno/memory/__init__.py +14 -1
- agno/memory/manager.py +223 -8
- agno/memory/strategies/__init__.py +15 -0
- agno/memory/strategies/base.py +66 -0
- agno/memory/strategies/summarize.py +196 -0
- agno/memory/strategies/types.py +37 -0
- agno/models/aimlapi/aimlapi.py +17 -0
- agno/models/anthropic/claude.py +434 -59
- agno/models/aws/bedrock.py +121 -20
- agno/models/aws/claude.py +131 -274
- agno/models/azure/ai_foundry.py +10 -6
- agno/models/azure/openai_chat.py +33 -10
- agno/models/base.py +1162 -561
- agno/models/cerebras/cerebras.py +120 -24
- agno/models/cerebras/cerebras_openai.py +21 -2
- agno/models/cohere/chat.py +65 -6
- agno/models/cometapi/cometapi.py +18 -1
- agno/models/dashscope/dashscope.py +2 -3
- agno/models/deepinfra/deepinfra.py +18 -1
- agno/models/deepseek/deepseek.py +69 -3
- agno/models/fireworks/fireworks.py +18 -1
- agno/models/google/gemini.py +959 -89
- agno/models/google/utils.py +22 -0
- agno/models/groq/groq.py +48 -18
- agno/models/huggingface/huggingface.py +17 -6
- agno/models/ibm/watsonx.py +16 -6
- agno/models/internlm/internlm.py +18 -1
- agno/models/langdb/langdb.py +13 -1
- agno/models/litellm/chat.py +88 -9
- agno/models/litellm/litellm_openai.py +18 -1
- agno/models/message.py +24 -5
- agno/models/meta/llama.py +40 -13
- agno/models/meta/llama_openai.py +22 -21
- agno/models/metrics.py +12 -0
- agno/models/mistral/mistral.py +8 -4
- agno/models/n1n/__init__.py +3 -0
- agno/models/n1n/n1n.py +57 -0
- agno/models/nebius/nebius.py +6 -7
- agno/models/nvidia/nvidia.py +20 -3
- agno/models/ollama/__init__.py +2 -0
- agno/models/ollama/chat.py +17 -6
- agno/models/ollama/responses.py +100 -0
- agno/models/openai/__init__.py +2 -0
- agno/models/openai/chat.py +117 -26
- agno/models/openai/open_responses.py +46 -0
- agno/models/openai/responses.py +110 -32
- agno/models/openrouter/__init__.py +2 -0
- agno/models/openrouter/openrouter.py +67 -2
- agno/models/openrouter/responses.py +146 -0
- agno/models/perplexity/perplexity.py +19 -1
- agno/models/portkey/portkey.py +7 -6
- agno/models/requesty/requesty.py +19 -2
- agno/models/response.py +20 -2
- agno/models/sambanova/sambanova.py +20 -3
- agno/models/siliconflow/siliconflow.py +19 -2
- agno/models/together/together.py +20 -3
- agno/models/vercel/v0.py +20 -3
- agno/models/vertexai/claude.py +124 -4
- agno/models/vllm/vllm.py +19 -14
- agno/models/xai/xai.py +19 -2
- agno/os/app.py +467 -137
- agno/os/auth.py +253 -5
- agno/os/config.py +22 -0
- agno/os/interfaces/a2a/a2a.py +7 -6
- agno/os/interfaces/a2a/router.py +635 -26
- agno/os/interfaces/a2a/utils.py +32 -33
- agno/os/interfaces/agui/agui.py +5 -3
- agno/os/interfaces/agui/router.py +26 -16
- agno/os/interfaces/agui/utils.py +97 -57
- agno/os/interfaces/base.py +7 -7
- agno/os/interfaces/slack/router.py +16 -7
- agno/os/interfaces/slack/slack.py +7 -7
- agno/os/interfaces/whatsapp/router.py +35 -7
- agno/os/interfaces/whatsapp/security.py +3 -1
- agno/os/interfaces/whatsapp/whatsapp.py +11 -8
- agno/os/managers.py +326 -0
- agno/os/mcp.py +652 -79
- agno/os/middleware/__init__.py +4 -0
- agno/os/middleware/jwt.py +718 -115
- agno/os/middleware/trailing_slash.py +27 -0
- agno/os/router.py +105 -1558
- agno/os/routers/agents/__init__.py +3 -0
- agno/os/routers/agents/router.py +655 -0
- agno/os/routers/agents/schema.py +288 -0
- agno/os/routers/components/__init__.py +3 -0
- agno/os/routers/components/components.py +475 -0
- agno/os/routers/database.py +155 -0
- agno/os/routers/evals/evals.py +111 -18
- agno/os/routers/evals/schemas.py +38 -5
- agno/os/routers/evals/utils.py +80 -11
- agno/os/routers/health.py +3 -3
- agno/os/routers/knowledge/knowledge.py +284 -35
- agno/os/routers/knowledge/schemas.py +14 -2
- agno/os/routers/memory/memory.py +274 -11
- agno/os/routers/memory/schemas.py +44 -3
- agno/os/routers/metrics/metrics.py +30 -15
- agno/os/routers/metrics/schemas.py +10 -6
- agno/os/routers/registry/__init__.py +3 -0
- agno/os/routers/registry/registry.py +337 -0
- agno/os/routers/session/session.py +143 -14
- agno/os/routers/teams/__init__.py +3 -0
- agno/os/routers/teams/router.py +550 -0
- agno/os/routers/teams/schema.py +280 -0
- agno/os/routers/traces/__init__.py +3 -0
- agno/os/routers/traces/schemas.py +414 -0
- agno/os/routers/traces/traces.py +549 -0
- agno/os/routers/workflows/__init__.py +3 -0
- agno/os/routers/workflows/router.py +757 -0
- agno/os/routers/workflows/schema.py +139 -0
- agno/os/schema.py +157 -584
- agno/os/scopes.py +469 -0
- agno/os/settings.py +3 -0
- agno/os/utils.py +574 -185
- agno/reasoning/anthropic.py +85 -1
- agno/reasoning/azure_ai_foundry.py +93 -1
- agno/reasoning/deepseek.py +102 -2
- agno/reasoning/default.py +6 -7
- agno/reasoning/gemini.py +87 -3
- agno/reasoning/groq.py +109 -2
- agno/reasoning/helpers.py +6 -7
- agno/reasoning/manager.py +1238 -0
- agno/reasoning/ollama.py +93 -1
- agno/reasoning/openai.py +115 -1
- agno/reasoning/vertexai.py +85 -1
- agno/registry/__init__.py +3 -0
- agno/registry/registry.py +68 -0
- agno/remote/__init__.py +3 -0
- agno/remote/base.py +581 -0
- agno/run/__init__.py +2 -4
- agno/run/agent.py +134 -19
- agno/run/base.py +49 -1
- agno/run/cancel.py +65 -52
- agno/run/cancellation_management/__init__.py +9 -0
- agno/run/cancellation_management/base.py +78 -0
- agno/run/cancellation_management/in_memory_cancellation_manager.py +100 -0
- agno/run/cancellation_management/redis_cancellation_manager.py +236 -0
- agno/run/requirement.py +181 -0
- agno/run/team.py +111 -19
- agno/run/workflow.py +2 -1
- agno/session/agent.py +57 -92
- agno/session/summary.py +1 -1
- agno/session/team.py +62 -115
- agno/session/workflow.py +353 -57
- agno/skills/__init__.py +17 -0
- agno/skills/agent_skills.py +377 -0
- agno/skills/errors.py +32 -0
- agno/skills/loaders/__init__.py +4 -0
- agno/skills/loaders/base.py +27 -0
- agno/skills/loaders/local.py +216 -0
- agno/skills/skill.py +65 -0
- agno/skills/utils.py +107 -0
- agno/skills/validator.py +277 -0
- agno/table.py +10 -0
- agno/team/__init__.py +5 -1
- agno/team/remote.py +447 -0
- agno/team/team.py +3769 -2202
- agno/tools/brandfetch.py +27 -18
- agno/tools/browserbase.py +225 -16
- agno/tools/crawl4ai.py +3 -0
- agno/tools/duckduckgo.py +25 -71
- agno/tools/exa.py +0 -21
- agno/tools/file.py +14 -13
- agno/tools/file_generation.py +12 -6
- agno/tools/firecrawl.py +15 -7
- agno/tools/function.py +94 -113
- agno/tools/google_bigquery.py +11 -2
- agno/tools/google_drive.py +4 -3
- agno/tools/knowledge.py +9 -4
- agno/tools/mcp/mcp.py +301 -18
- agno/tools/mcp/multi_mcp.py +269 -14
- agno/tools/mem0.py +11 -10
- agno/tools/memory.py +47 -46
- agno/tools/mlx_transcribe.py +10 -7
- agno/tools/models/nebius.py +5 -5
- agno/tools/models_labs.py +20 -10
- agno/tools/nano_banana.py +151 -0
- agno/tools/parallel.py +0 -7
- agno/tools/postgres.py +76 -36
- agno/tools/python.py +14 -6
- agno/tools/reasoning.py +30 -23
- agno/tools/redshift.py +406 -0
- agno/tools/shopify.py +1519 -0
- agno/tools/spotify.py +919 -0
- agno/tools/tavily.py +4 -1
- agno/tools/toolkit.py +253 -18
- agno/tools/websearch.py +93 -0
- agno/tools/website.py +1 -1
- agno/tools/wikipedia.py +1 -1
- agno/tools/workflow.py +56 -48
- agno/tools/yfinance.py +12 -11
- agno/tracing/__init__.py +12 -0
- agno/tracing/exporter.py +161 -0
- agno/tracing/schemas.py +276 -0
- agno/tracing/setup.py +112 -0
- agno/utils/agent.py +251 -10
- agno/utils/cryptography.py +22 -0
- agno/utils/dttm.py +33 -0
- agno/utils/events.py +264 -7
- agno/utils/hooks.py +111 -3
- agno/utils/http.py +161 -2
- agno/utils/mcp.py +49 -8
- agno/utils/media.py +22 -1
- agno/utils/models/ai_foundry.py +9 -2
- agno/utils/models/claude.py +20 -5
- agno/utils/models/cohere.py +9 -2
- agno/utils/models/llama.py +9 -2
- agno/utils/models/mistral.py +4 -2
- agno/utils/os.py +0 -0
- agno/utils/print_response/agent.py +99 -16
- agno/utils/print_response/team.py +223 -24
- agno/utils/print_response/workflow.py +0 -2
- agno/utils/prompts.py +8 -6
- agno/utils/remote.py +23 -0
- agno/utils/response.py +1 -13
- agno/utils/string.py +91 -2
- agno/utils/team.py +62 -12
- agno/utils/tokens.py +657 -0
- agno/vectordb/base.py +15 -2
- agno/vectordb/cassandra/cassandra.py +1 -1
- agno/vectordb/chroma/__init__.py +2 -1
- agno/vectordb/chroma/chromadb.py +468 -23
- agno/vectordb/clickhouse/clickhousedb.py +1 -1
- agno/vectordb/couchbase/couchbase.py +6 -2
- agno/vectordb/lancedb/lance_db.py +7 -38
- agno/vectordb/lightrag/lightrag.py +7 -6
- agno/vectordb/milvus/milvus.py +118 -84
- agno/vectordb/mongodb/__init__.py +2 -1
- agno/vectordb/mongodb/mongodb.py +14 -31
- agno/vectordb/pgvector/pgvector.py +120 -66
- agno/vectordb/pineconedb/pineconedb.py +2 -19
- agno/vectordb/qdrant/__init__.py +2 -1
- agno/vectordb/qdrant/qdrant.py +33 -56
- agno/vectordb/redis/__init__.py +2 -1
- agno/vectordb/redis/redisdb.py +19 -31
- agno/vectordb/singlestore/singlestore.py +17 -9
- agno/vectordb/surrealdb/surrealdb.py +2 -38
- agno/vectordb/weaviate/__init__.py +2 -1
- agno/vectordb/weaviate/weaviate.py +7 -3
- agno/workflow/__init__.py +5 -1
- agno/workflow/agent.py +2 -2
- agno/workflow/condition.py +12 -10
- agno/workflow/loop.py +28 -9
- agno/workflow/parallel.py +21 -13
- agno/workflow/remote.py +362 -0
- agno/workflow/router.py +12 -9
- agno/workflow/step.py +261 -36
- agno/workflow/steps.py +12 -8
- agno/workflow/types.py +40 -77
- agno/workflow/workflow.py +939 -213
- {agno-2.2.13.dist-info → agno-2.4.3.dist-info}/METADATA +134 -181
- agno-2.4.3.dist-info/RECORD +677 -0
- {agno-2.2.13.dist-info → agno-2.4.3.dist-info}/WHEEL +1 -1
- agno/tools/googlesearch.py +0 -98
- agno/tools/memori.py +0 -339
- agno-2.2.13.dist-info/RECORD +0 -575
- {agno-2.2.13.dist-info → agno-2.4.3.dist-info}/licenses/LICENSE +0 -0
- {agno-2.2.13.dist-info → agno-2.4.3.dist-info}/top_level.txt +0 -0
|
@@ -9,7 +9,7 @@ from agno.knowledge.chunking.strategy import ChunkingStrategy, ChunkingStrategyT
|
|
|
9
9
|
from agno.knowledge.document.base import Document
|
|
10
10
|
from agno.knowledge.reader.base import Reader
|
|
11
11
|
from agno.knowledge.types import ContentType
|
|
12
|
-
from agno.utils.log import
|
|
12
|
+
from agno.utils.log import log_debug, log_error
|
|
13
13
|
|
|
14
14
|
try:
|
|
15
15
|
from pypdf import PdfReader as DocumentReader # noqa: F401
|
|
@@ -200,10 +200,11 @@ class BasePDFReader(Reader):
|
|
|
200
200
|
super().__init__(chunking_strategy=chunking_strategy, **kwargs)
|
|
201
201
|
|
|
202
202
|
@classmethod
|
|
203
|
-
def get_supported_chunking_strategies(
|
|
203
|
+
def get_supported_chunking_strategies(cls) -> List[ChunkingStrategyType]:
|
|
204
204
|
"""Get the list of supported chunking strategies for PDF readers."""
|
|
205
205
|
return [
|
|
206
206
|
ChunkingStrategyType.DOCUMENT_CHUNKER,
|
|
207
|
+
ChunkingStrategyType.CODE_CHUNKER,
|
|
207
208
|
ChunkingStrategyType.FIXED_SIZE_CHUNKER,
|
|
208
209
|
ChunkingStrategyType.AGENTIC_CHUNKER,
|
|
209
210
|
ChunkingStrategyType.SEMANTIC_CHUNKER,
|
|
@@ -218,31 +219,29 @@ class BasePDFReader(Reader):
|
|
|
218
219
|
|
|
219
220
|
def _get_doc_name(self, pdf_source: Union[str, Path, IO[Any]], name: Optional[str] = None) -> str:
|
|
220
221
|
"""Determines the document name from the source or a provided name."""
|
|
221
|
-
|
|
222
|
-
|
|
223
|
-
|
|
224
|
-
|
|
225
|
-
|
|
226
|
-
|
|
227
|
-
|
|
228
|
-
except Exception:
|
|
229
|
-
# The original code had a bug here, it should check `name` first.
|
|
230
|
-
return name or "pdf"
|
|
222
|
+
if name:
|
|
223
|
+
return name
|
|
224
|
+
if isinstance(pdf_source, str):
|
|
225
|
+
return Path(pdf_source).stem.replace(" ", "_")
|
|
226
|
+
if isinstance(pdf_source, Path):
|
|
227
|
+
return pdf_source.stem.replace(" ", "_")
|
|
228
|
+
return getattr(pdf_source, "name", "pdf_file").split(".")[0].replace(" ", "_")
|
|
231
229
|
|
|
232
230
|
def _decrypt_pdf(self, doc_reader: DocumentReader, doc_name: str, password: Optional[str] = None) -> bool:
|
|
233
231
|
if not doc_reader.is_encrypted:
|
|
234
232
|
return True
|
|
235
233
|
|
|
236
234
|
# Use provided password or fall back to instance password
|
|
237
|
-
|
|
238
|
-
if
|
|
239
|
-
|
|
235
|
+
# Note: Empty string "" is a valid password for PDFs with blank user password
|
|
236
|
+
pdf_password = self.password if password is None else password
|
|
237
|
+
if pdf_password is None:
|
|
238
|
+
log_error(f'PDF file "{doc_name}" is password protected but no password provided')
|
|
240
239
|
return False
|
|
241
240
|
|
|
242
241
|
try:
|
|
243
242
|
decrypted_pdf = doc_reader.decrypt(pdf_password)
|
|
244
243
|
if decrypted_pdf:
|
|
245
|
-
|
|
244
|
+
log_debug(f'Successfully decrypted PDF file "{doc_name}" with user password')
|
|
246
245
|
return True
|
|
247
246
|
else:
|
|
248
247
|
log_error(f'Failed to decrypt PDF file "{doc_name}": incorrect password')
|
|
@@ -337,19 +336,25 @@ class PDFReader(BasePDFReader):
|
|
|
337
336
|
"""Reader for PDF files"""
|
|
338
337
|
|
|
339
338
|
@classmethod
|
|
340
|
-
def get_supported_content_types(
|
|
339
|
+
def get_supported_content_types(cls) -> List[ContentType]:
|
|
341
340
|
return [ContentType.PDF]
|
|
342
341
|
|
|
343
342
|
def read(
|
|
344
|
-
self,
|
|
343
|
+
self,
|
|
344
|
+
pdf: Optional[Union[str, Path, IO[Any]]] = None,
|
|
345
|
+
name: Optional[str] = None,
|
|
346
|
+
password: Optional[str] = None,
|
|
345
347
|
) -> List[Document]:
|
|
348
|
+
if pdf is None:
|
|
349
|
+
log_error("No pdf provided")
|
|
350
|
+
return []
|
|
346
351
|
doc_name = self._get_doc_name(pdf, name)
|
|
347
|
-
|
|
352
|
+
log_debug(f"Reading: {doc_name}")
|
|
348
353
|
|
|
349
354
|
try:
|
|
350
355
|
pdf_reader = DocumentReader(pdf)
|
|
351
356
|
except PdfStreamError as e:
|
|
352
|
-
|
|
357
|
+
log_error(f"Error reading PDF: {e}")
|
|
353
358
|
return []
|
|
354
359
|
# Handle PDF decryption
|
|
355
360
|
if not self._decrypt_pdf(pdf_reader, doc_name, password):
|
|
@@ -368,12 +373,12 @@ class PDFReader(BasePDFReader):
|
|
|
368
373
|
log_error("No pdf provided")
|
|
369
374
|
return []
|
|
370
375
|
doc_name = self._get_doc_name(pdf, name)
|
|
371
|
-
|
|
376
|
+
log_debug(f"Reading: {doc_name}")
|
|
372
377
|
|
|
373
378
|
try:
|
|
374
379
|
pdf_reader = DocumentReader(pdf)
|
|
375
380
|
except PdfStreamError as e:
|
|
376
|
-
|
|
381
|
+
log_error(f"Error reading PDF: {e}")
|
|
377
382
|
return []
|
|
378
383
|
|
|
379
384
|
# Handle PDF decryption
|
|
@@ -394,11 +399,11 @@ class PDFImageReader(BasePDFReader):
|
|
|
394
399
|
raise ValueError("No pdf provided")
|
|
395
400
|
|
|
396
401
|
doc_name = self._get_doc_name(pdf, name)
|
|
397
|
-
|
|
402
|
+
log_debug(f"Reading: {doc_name}")
|
|
398
403
|
try:
|
|
399
404
|
pdf_reader = DocumentReader(pdf)
|
|
400
405
|
except PdfStreamError as e:
|
|
401
|
-
|
|
406
|
+
log_error(f"Error reading PDF: {e}")
|
|
402
407
|
return []
|
|
403
408
|
|
|
404
409
|
# Handle PDF decryption
|
|
@@ -406,7 +411,7 @@ class PDFImageReader(BasePDFReader):
|
|
|
406
411
|
return []
|
|
407
412
|
|
|
408
413
|
# Read and chunk.
|
|
409
|
-
return self._pdf_reader_to_documents(pdf_reader, doc_name, read_images=True, use_uuid_for_id=
|
|
414
|
+
return self._pdf_reader_to_documents(pdf_reader, doc_name, read_images=True, use_uuid_for_id=True)
|
|
410
415
|
|
|
411
416
|
async def async_read(
|
|
412
417
|
self, pdf: Union[str, Path, IO[Any]], name: Optional[str] = None, password: Optional[str] = None
|
|
@@ -415,12 +420,12 @@ class PDFImageReader(BasePDFReader):
|
|
|
415
420
|
raise ValueError("No pdf provided")
|
|
416
421
|
|
|
417
422
|
doc_name = self._get_doc_name(pdf, name)
|
|
418
|
-
|
|
423
|
+
log_debug(f"Reading: {doc_name}")
|
|
419
424
|
|
|
420
425
|
try:
|
|
421
426
|
pdf_reader = DocumentReader(pdf)
|
|
422
427
|
except PdfStreamError as e:
|
|
423
|
-
|
|
428
|
+
log_error(f"Error reading PDF: {e}")
|
|
424
429
|
return []
|
|
425
430
|
|
|
426
431
|
# Handle PDF decryption
|
|
@@ -428,4 +433,4 @@ class PDFImageReader(BasePDFReader):
|
|
|
428
433
|
return []
|
|
429
434
|
|
|
430
435
|
# Read and chunk.
|
|
431
|
-
return await self._async_pdf_reader_to_documents(pdf_reader, doc_name, read_images=True, use_uuid_for_id=
|
|
436
|
+
return await self._async_pdf_reader_to_documents(pdf_reader, doc_name, read_images=True, use_uuid_for_id=True)
|
|
@@ -8,7 +8,7 @@ from agno.knowledge.chunking.strategy import ChunkingStrategy, ChunkingStrategyT
|
|
|
8
8
|
from agno.knowledge.document.base import Document
|
|
9
9
|
from agno.knowledge.reader.base import Reader
|
|
10
10
|
from agno.knowledge.types import ContentType
|
|
11
|
-
from agno.utils.log import
|
|
11
|
+
from agno.utils.log import log_debug, log_error
|
|
12
12
|
|
|
13
13
|
try:
|
|
14
14
|
from pptx import Presentation # type: ignore
|
|
@@ -23,10 +23,11 @@ class PPTXReader(Reader):
|
|
|
23
23
|
super().__init__(chunking_strategy=chunking_strategy, **kwargs)
|
|
24
24
|
|
|
25
25
|
@classmethod
|
|
26
|
-
def get_supported_chunking_strategies(
|
|
26
|
+
def get_supported_chunking_strategies(cls) -> List[ChunkingStrategyType]:
|
|
27
27
|
"""Get the list of supported chunking strategies for PPTX readers."""
|
|
28
28
|
return [
|
|
29
29
|
ChunkingStrategyType.DOCUMENT_CHUNKER,
|
|
30
|
+
ChunkingStrategyType.CODE_CHUNKER,
|
|
30
31
|
ChunkingStrategyType.FIXED_SIZE_CHUNKER,
|
|
31
32
|
ChunkingStrategyType.SEMANTIC_CHUNKER,
|
|
32
33
|
ChunkingStrategyType.AGENTIC_CHUNKER,
|
|
@@ -34,7 +35,7 @@ class PPTXReader(Reader):
|
|
|
34
35
|
]
|
|
35
36
|
|
|
36
37
|
@classmethod
|
|
37
|
-
def get_supported_content_types(
|
|
38
|
+
def get_supported_content_types(cls) -> List[ContentType]:
|
|
38
39
|
return [ContentType.PPTX]
|
|
39
40
|
|
|
40
41
|
def read(self, file: Union[Path, IO[Any]], name: Optional[str] = None) -> List[Document]:
|
|
@@ -43,15 +44,13 @@ class PPTXReader(Reader):
|
|
|
43
44
|
if isinstance(file, Path):
|
|
44
45
|
if not file.exists():
|
|
45
46
|
raise FileNotFoundError(f"Could not find file: {file}")
|
|
46
|
-
|
|
47
|
+
log_debug(f"Reading: {file}")
|
|
47
48
|
presentation = Presentation(str(file))
|
|
48
49
|
doc_name = name or file.stem
|
|
49
50
|
else:
|
|
50
|
-
|
|
51
|
+
log_debug(f"Reading uploaded file: {getattr(file, 'name', 'BytesIO')}")
|
|
51
52
|
presentation = Presentation(file)
|
|
52
|
-
doc_name = name or (
|
|
53
|
-
getattr(file, "name", "pptx_file").split(".")[0] if hasattr(file, "name") else "pptx_file"
|
|
54
|
-
)
|
|
53
|
+
doc_name = name or getattr(file, "name", "pptx_file").split(".")[0]
|
|
55
54
|
|
|
56
55
|
# Extract text from all slides
|
|
57
56
|
slide_texts = []
|
|
@@ -89,7 +88,7 @@ class PPTXReader(Reader):
|
|
|
89
88
|
return documents
|
|
90
89
|
|
|
91
90
|
except Exception as e:
|
|
92
|
-
|
|
91
|
+
log_error(f"Error reading file: {e}")
|
|
93
92
|
return []
|
|
94
93
|
|
|
95
94
|
async def async_read(self, file: Union[Path, IO[Any]], name: Optional[str] = None) -> List[Document]:
|
|
@@ -97,5 +96,5 @@ class PPTXReader(Reader):
|
|
|
97
96
|
try:
|
|
98
97
|
return await asyncio.to_thread(self.read, file, name)
|
|
99
98
|
except Exception as e:
|
|
100
|
-
|
|
99
|
+
log_error(f"Error reading file asynchronously: {e}")
|
|
101
100
|
return []
|
|
@@ -10,6 +10,74 @@ class ReaderFactory:
|
|
|
10
10
|
# Cache for instantiated readers
|
|
11
11
|
_reader_cache: Dict[str, Reader] = {}
|
|
12
12
|
|
|
13
|
+
# Static metadata for readers - avoids instantiation just to get metadata
|
|
14
|
+
READER_METADATA: Dict[str, Dict[str, str]] = {
|
|
15
|
+
"pdf": {
|
|
16
|
+
"name": "PdfReader",
|
|
17
|
+
"description": "Processes PDF documents with OCR support for images and text extraction",
|
|
18
|
+
},
|
|
19
|
+
"csv": {
|
|
20
|
+
"name": "CsvReader",
|
|
21
|
+
"description": "Parses CSV files with custom delimiter support",
|
|
22
|
+
},
|
|
23
|
+
"excel": {
|
|
24
|
+
"name": "ExcelReader",
|
|
25
|
+
"description": "Processes Excel workbooks (.xlsx and .xls) with sheet filtering and row-based chunking",
|
|
26
|
+
},
|
|
27
|
+
"field_labeled_csv": {
|
|
28
|
+
"name": "FieldLabeledCsvReader",
|
|
29
|
+
"description": "Converts CSV rows to field-labeled text format for enhanced readability and context",
|
|
30
|
+
},
|
|
31
|
+
"docx": {
|
|
32
|
+
"name": "DocxReader",
|
|
33
|
+
"description": "Extracts text content from Microsoft Word documents (.docx and .doc formats)",
|
|
34
|
+
},
|
|
35
|
+
"pptx": {
|
|
36
|
+
"name": "PptxReader",
|
|
37
|
+
"description": "Extracts text content from Microsoft PowerPoint presentations (.pptx format)",
|
|
38
|
+
},
|
|
39
|
+
"json": {
|
|
40
|
+
"name": "JsonReader",
|
|
41
|
+
"description": "Processes JSON data structures and API responses with nested object handling",
|
|
42
|
+
},
|
|
43
|
+
"markdown": {
|
|
44
|
+
"name": "MarkdownReader",
|
|
45
|
+
"description": "Processes Markdown documentation with header-aware chunking and formatting preservation",
|
|
46
|
+
},
|
|
47
|
+
"text": {
|
|
48
|
+
"name": "TextReader",
|
|
49
|
+
"description": "Handles plain text files with customizable chunking strategies and encoding detection",
|
|
50
|
+
},
|
|
51
|
+
"website": {
|
|
52
|
+
"name": "WebsiteReader",
|
|
53
|
+
"description": "Scrapes and extracts content from web pages with HTML parsing and text cleaning",
|
|
54
|
+
},
|
|
55
|
+
"firecrawl": {
|
|
56
|
+
"name": "FirecrawlReader",
|
|
57
|
+
"description": "Advanced web scraping and crawling with JavaScript rendering and structured data extraction",
|
|
58
|
+
},
|
|
59
|
+
"tavily": {
|
|
60
|
+
"name": "TavilyReader",
|
|
61
|
+
"description": "Extracts content from URLs using Tavily's Extract API with markdown or text output",
|
|
62
|
+
},
|
|
63
|
+
"youtube": {
|
|
64
|
+
"name": "YouTubeReader",
|
|
65
|
+
"description": "Extracts transcripts and metadata from YouTube videos and playlists",
|
|
66
|
+
},
|
|
67
|
+
"arxiv": {
|
|
68
|
+
"name": "ArxivReader",
|
|
69
|
+
"description": "Downloads and processes academic papers from ArXiv with PDF parsing and metadata extraction",
|
|
70
|
+
},
|
|
71
|
+
"wikipedia": {
|
|
72
|
+
"name": "WikipediaReader",
|
|
73
|
+
"description": "Fetches and processes Wikipedia articles with section-aware chunking and link resolution",
|
|
74
|
+
},
|
|
75
|
+
"web_search": {
|
|
76
|
+
"name": "WebSearchReader",
|
|
77
|
+
"description": "Executes web searches and processes results with relevance ranking and content extraction",
|
|
78
|
+
},
|
|
79
|
+
}
|
|
80
|
+
|
|
13
81
|
@classmethod
|
|
14
82
|
def _get_pdf_reader(cls, **kwargs) -> Reader:
|
|
15
83
|
"""Get PDF reader instance."""
|
|
@@ -29,11 +97,23 @@ class ReaderFactory:
|
|
|
29
97
|
|
|
30
98
|
config: Dict[str, Any] = {
|
|
31
99
|
"name": "CSV Reader",
|
|
32
|
-
"description": "Parses CSV
|
|
100
|
+
"description": "Parses CSV files with custom delimiter support",
|
|
33
101
|
}
|
|
34
102
|
config.update(kwargs)
|
|
35
103
|
return CSVReader(**config)
|
|
36
104
|
|
|
105
|
+
@classmethod
|
|
106
|
+
def _get_excel_reader(cls, **kwargs) -> Reader:
|
|
107
|
+
"""Get Excel reader instance."""
|
|
108
|
+
from agno.knowledge.reader.excel_reader import ExcelReader
|
|
109
|
+
|
|
110
|
+
config: Dict[str, Any] = {
|
|
111
|
+
"name": "Excel Reader",
|
|
112
|
+
"description": "Processes Excel workbooks (.xlsx and .xls) with sheet filtering and row-based chunking",
|
|
113
|
+
}
|
|
114
|
+
config.update(kwargs)
|
|
115
|
+
return ExcelReader(**config)
|
|
116
|
+
|
|
37
117
|
@classmethod
|
|
38
118
|
def _get_field_labeled_csv_reader(cls, **kwargs) -> Reader:
|
|
39
119
|
"""Get Field Labeled CSV reader instance."""
|
|
@@ -203,6 +283,53 @@ class ReaderFactory:
|
|
|
203
283
|
raise ValueError(f"Unknown reader: {reader_key}")
|
|
204
284
|
return getattr(cls, method_name)
|
|
205
285
|
|
|
286
|
+
@classmethod
|
|
287
|
+
def get_reader_class(cls, reader_key: str) -> type:
|
|
288
|
+
"""Get the reader CLASS without instantiation.
|
|
289
|
+
|
|
290
|
+
This is useful for accessing class methods like get_supported_chunking_strategies()
|
|
291
|
+
without the overhead of creating an instance.
|
|
292
|
+
|
|
293
|
+
Args:
|
|
294
|
+
reader_key: The reader key (e.g., 'pdf', 'csv', 'markdown')
|
|
295
|
+
|
|
296
|
+
Returns:
|
|
297
|
+
The reader class (not an instance)
|
|
298
|
+
|
|
299
|
+
Raises:
|
|
300
|
+
ValueError: If the reader key is unknown
|
|
301
|
+
ImportError: If the reader's dependencies are not installed
|
|
302
|
+
"""
|
|
303
|
+
# Map reader keys to their import paths
|
|
304
|
+
reader_class_map: Dict[str, tuple] = {
|
|
305
|
+
"pdf": ("agno.knowledge.reader.pdf_reader", "PDFReader"),
|
|
306
|
+
"csv": ("agno.knowledge.reader.csv_reader", "CSVReader"),
|
|
307
|
+
"excel": ("agno.knowledge.reader.excel_reader", "ExcelReader"),
|
|
308
|
+
"field_labeled_csv": ("agno.knowledge.reader.field_labeled_csv_reader", "FieldLabeledCSVReader"),
|
|
309
|
+
"docx": ("agno.knowledge.reader.docx_reader", "DocxReader"),
|
|
310
|
+
"pptx": ("agno.knowledge.reader.pptx_reader", "PPTXReader"),
|
|
311
|
+
"json": ("agno.knowledge.reader.json_reader", "JSONReader"),
|
|
312
|
+
"markdown": ("agno.knowledge.reader.markdown_reader", "MarkdownReader"),
|
|
313
|
+
"text": ("agno.knowledge.reader.text_reader", "TextReader"),
|
|
314
|
+
"website": ("agno.knowledge.reader.website_reader", "WebsiteReader"),
|
|
315
|
+
"firecrawl": ("agno.knowledge.reader.firecrawl_reader", "FirecrawlReader"),
|
|
316
|
+
"tavily": ("agno.knowledge.reader.tavily_reader", "TavilyReader"),
|
|
317
|
+
"youtube": ("agno.knowledge.reader.youtube_reader", "YouTubeReader"),
|
|
318
|
+
"arxiv": ("agno.knowledge.reader.arxiv_reader", "ArxivReader"),
|
|
319
|
+
"wikipedia": ("agno.knowledge.reader.wikipedia_reader", "WikipediaReader"),
|
|
320
|
+
"web_search": ("agno.knowledge.reader.web_search_reader", "WebSearchReader"),
|
|
321
|
+
}
|
|
322
|
+
|
|
323
|
+
if reader_key not in reader_class_map:
|
|
324
|
+
raise ValueError(f"Unknown reader: {reader_key}")
|
|
325
|
+
|
|
326
|
+
module_path, class_name = reader_class_map[reader_key]
|
|
327
|
+
|
|
328
|
+
import importlib
|
|
329
|
+
|
|
330
|
+
module = importlib.import_module(module_path)
|
|
331
|
+
return getattr(module, class_name)
|
|
332
|
+
|
|
206
333
|
@classmethod
|
|
207
334
|
def create_reader(cls, reader_key: str, **kwargs) -> Reader:
|
|
208
335
|
"""Create a reader instance with the given key and optional overrides."""
|
|
@@ -227,6 +354,13 @@ class ReaderFactory:
|
|
|
227
354
|
return cls.create_reader("pdf")
|
|
228
355
|
elif extension in [".csv", "text/csv"]:
|
|
229
356
|
return cls.create_reader("csv")
|
|
357
|
+
elif extension in [
|
|
358
|
+
".xlsx",
|
|
359
|
+
".xls",
|
|
360
|
+
"application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
|
|
361
|
+
"application/vnd.ms-excel",
|
|
362
|
+
]:
|
|
363
|
+
return cls.create_reader("excel")
|
|
230
364
|
elif extension in [".docx", ".doc", "application/vnd.openxmlformats-officedocument.wordprocessingml.document"]:
|
|
231
365
|
return cls.create_reader("docx")
|
|
232
366
|
elif extension == ".pptx":
|
|
@@ -10,7 +10,7 @@ from agno.knowledge.reader.base import Reader
|
|
|
10
10
|
from agno.knowledge.reader.pdf_reader import PDFReader
|
|
11
11
|
from agno.knowledge.reader.text_reader import TextReader
|
|
12
12
|
from agno.knowledge.types import ContentType
|
|
13
|
-
from agno.utils.log import
|
|
13
|
+
from agno.utils.log import log_debug, log_error
|
|
14
14
|
|
|
15
15
|
try:
|
|
16
16
|
from agno.aws.resource.s3.object import S3Object # type: ignore
|
|
@@ -35,9 +35,10 @@ class S3Reader(Reader):
|
|
|
35
35
|
super().__init__(chunking_strategy=chunking_strategy, **kwargs)
|
|
36
36
|
|
|
37
37
|
@classmethod
|
|
38
|
-
def get_supported_chunking_strategies(
|
|
38
|
+
def get_supported_chunking_strategies(cls) -> List[ChunkingStrategyType]:
|
|
39
39
|
"""Get the list of supported chunking strategies for S3 readers."""
|
|
40
40
|
return [
|
|
41
|
+
ChunkingStrategyType.CODE_CHUNKER,
|
|
41
42
|
ChunkingStrategyType.FIXED_SIZE_CHUNKER,
|
|
42
43
|
ChunkingStrategyType.AGENTIC_CHUNKER,
|
|
43
44
|
ChunkingStrategyType.DOCUMENT_CHUNKER,
|
|
@@ -46,41 +47,32 @@ class S3Reader(Reader):
|
|
|
46
47
|
]
|
|
47
48
|
|
|
48
49
|
@classmethod
|
|
49
|
-
def get_supported_content_types(
|
|
50
|
+
def get_supported_content_types(cls) -> List[ContentType]:
|
|
50
51
|
return [ContentType.FILE, ContentType.URL, ContentType.TEXT]
|
|
51
52
|
|
|
52
53
|
def read(self, name: Optional[str], s3_object: S3Object) -> List[Document]:
|
|
53
54
|
try:
|
|
54
|
-
|
|
55
|
+
log_debug(f"Reading S3 file: {s3_object.uri}")
|
|
56
|
+
|
|
57
|
+
doc_name = name or s3_object.name.split("/")[-1].split(".")[0].replace("/", "_").replace(" ", "_")
|
|
55
58
|
|
|
56
59
|
# Read PDF files
|
|
57
60
|
if s3_object.uri.endswith(".pdf"):
|
|
58
61
|
object_resource = s3_object.get_resource()
|
|
59
62
|
object_body = object_resource.get()["Body"]
|
|
60
|
-
doc_name = (
|
|
61
|
-
s3_object.name.split("/")[-1].split(".")[0].replace("/", "_").replace(" ", "_")
|
|
62
|
-
if name is None
|
|
63
|
-
else name
|
|
64
|
-
)
|
|
65
63
|
return PDFReader().read(pdf=BytesIO(object_body.read()), name=doc_name)
|
|
66
64
|
|
|
67
65
|
# Read text files
|
|
68
66
|
else:
|
|
69
|
-
doc_name = (
|
|
70
|
-
s3_object.name.split("/")[-1].split(".")[0].replace("/", "_").replace(" ", "_")
|
|
71
|
-
if name is None
|
|
72
|
-
else name
|
|
73
|
-
)
|
|
74
67
|
obj_name = s3_object.name.split("/")[-1]
|
|
75
68
|
temporary_file = Path("storage").joinpath(obj_name)
|
|
76
69
|
s3_object.download(temporary_file)
|
|
77
70
|
documents = TextReader().read(file=temporary_file, name=doc_name)
|
|
78
|
-
|
|
79
71
|
temporary_file.unlink()
|
|
80
72
|
return documents
|
|
81
73
|
|
|
82
74
|
except Exception as e:
|
|
83
|
-
|
|
75
|
+
log_error(f"Error reading: {s3_object.uri}: {e}")
|
|
84
76
|
|
|
85
77
|
return []
|
|
86
78
|
|
|
@@ -62,9 +62,10 @@ class TavilyReader(Reader):
|
|
|
62
62
|
self.extract_depth = extract_depth
|
|
63
63
|
|
|
64
64
|
@classmethod
|
|
65
|
-
def get_supported_chunking_strategies(
|
|
65
|
+
def get_supported_chunking_strategies(cls) -> List[ChunkingStrategyType]:
|
|
66
66
|
"""Get the list of supported chunking strategies for Tavily readers."""
|
|
67
67
|
return [
|
|
68
|
+
ChunkingStrategyType.CODE_CHUNKER,
|
|
68
69
|
ChunkingStrategyType.SEMANTIC_CHUNKER,
|
|
69
70
|
ChunkingStrategyType.FIXED_SIZE_CHUNKER,
|
|
70
71
|
ChunkingStrategyType.AGENTIC_CHUNKER,
|
|
@@ -73,7 +74,7 @@ class TavilyReader(Reader):
|
|
|
73
74
|
]
|
|
74
75
|
|
|
75
76
|
@classmethod
|
|
76
|
-
def get_supported_content_types(
|
|
77
|
+
def get_supported_content_types(cls) -> List[ContentType]:
|
|
77
78
|
return [ContentType.URL]
|
|
78
79
|
|
|
79
80
|
def _extract(self, url: str, name: Optional[str] = None) -> List[Document]:
|
|
@@ -140,7 +141,6 @@ class TavilyReader(Reader):
|
|
|
140
141
|
documents.extend(self.chunk_document(Document(name=name or url, id=url, content=content)))
|
|
141
142
|
else:
|
|
142
143
|
documents.append(Document(name=name or url, id=url, content=content))
|
|
143
|
-
|
|
144
144
|
return documents
|
|
145
145
|
|
|
146
146
|
except Exception as e:
|
|
@@ -8,7 +8,7 @@ from agno.knowledge.chunking.strategy import ChunkingStrategy, ChunkingStrategyT
|
|
|
8
8
|
from agno.knowledge.document.base import Document
|
|
9
9
|
from agno.knowledge.reader.base import Reader
|
|
10
10
|
from agno.knowledge.types import ContentType
|
|
11
|
-
from agno.utils.log import
|
|
11
|
+
from agno.utils.log import log_debug, log_error, log_warning
|
|
12
12
|
|
|
13
13
|
|
|
14
14
|
class TextReader(Reader):
|
|
@@ -18,9 +18,10 @@ class TextReader(Reader):
|
|
|
18
18
|
super().__init__(chunking_strategy=chunking_strategy, **kwargs)
|
|
19
19
|
|
|
20
20
|
@classmethod
|
|
21
|
-
def get_supported_chunking_strategies(
|
|
21
|
+
def get_supported_chunking_strategies(cls) -> List[ChunkingStrategyType]:
|
|
22
22
|
"""Get the list of supported chunking strategies for Text readers."""
|
|
23
23
|
return [
|
|
24
|
+
ChunkingStrategyType.CODE_CHUNKER,
|
|
24
25
|
ChunkingStrategyType.FIXED_SIZE_CHUNKER,
|
|
25
26
|
ChunkingStrategyType.AGENTIC_CHUNKER,
|
|
26
27
|
ChunkingStrategyType.DOCUMENT_CHUNKER,
|
|
@@ -29,7 +30,7 @@ class TextReader(Reader):
|
|
|
29
30
|
]
|
|
30
31
|
|
|
31
32
|
@classmethod
|
|
32
|
-
def get_supported_content_types(
|
|
33
|
+
def get_supported_content_types(cls) -> List[ContentType]:
|
|
33
34
|
return [ContentType.TXT]
|
|
34
35
|
|
|
35
36
|
def read(self, file: Union[Path, IO[Any]], name: Optional[str] = None) -> List[Document]:
|
|
@@ -37,12 +38,12 @@ class TextReader(Reader):
|
|
|
37
38
|
if isinstance(file, Path):
|
|
38
39
|
if not file.exists():
|
|
39
40
|
raise FileNotFoundError(f"Could not find file: {file}")
|
|
40
|
-
|
|
41
|
+
log_debug(f"Reading: {file}")
|
|
41
42
|
file_name = name or file.stem
|
|
42
|
-
file_contents = file.read_text(self.encoding or "utf-8")
|
|
43
|
+
file_contents = file.read_text(encoding=self.encoding or "utf-8")
|
|
43
44
|
else:
|
|
44
|
-
|
|
45
|
-
|
|
45
|
+
log_debug(f"Reading uploaded file: {getattr(file, 'name', 'BytesIO')}")
|
|
46
|
+
file_name = name or getattr(file, "name", "text_file").split(".")[0]
|
|
46
47
|
file.seek(0)
|
|
47
48
|
file_contents = file.read().decode(self.encoding or "utf-8")
|
|
48
49
|
|
|
@@ -60,7 +61,7 @@ class TextReader(Reader):
|
|
|
60
61
|
return chunked_documents
|
|
61
62
|
return documents
|
|
62
63
|
except Exception as e:
|
|
63
|
-
|
|
64
|
+
log_error(f"Error reading: {file}: {e}")
|
|
64
65
|
return []
|
|
65
66
|
|
|
66
67
|
async def async_read(self, file: Union[Path, IO[Any]], name: Optional[str] = None) -> List[Document]:
|
|
@@ -69,7 +70,7 @@ class TextReader(Reader):
|
|
|
69
70
|
if not file.exists():
|
|
70
71
|
raise FileNotFoundError(f"Could not find file: {file}")
|
|
71
72
|
|
|
72
|
-
|
|
73
|
+
log_debug(f"Reading asynchronously: {file}")
|
|
73
74
|
file_name = name or file.stem
|
|
74
75
|
|
|
75
76
|
try:
|
|
@@ -78,11 +79,11 @@ class TextReader(Reader):
|
|
|
78
79
|
async with aiofiles.open(file, "r", encoding=self.encoding or "utf-8") as f:
|
|
79
80
|
file_contents = await f.read()
|
|
80
81
|
except ImportError:
|
|
81
|
-
|
|
82
|
-
file_contents = file.read_text(self.encoding or "utf-8")
|
|
82
|
+
log_warning("aiofiles not installed, using synchronous file I/O")
|
|
83
|
+
file_contents = file.read_text(encoding=self.encoding or "utf-8")
|
|
83
84
|
else:
|
|
84
|
-
|
|
85
|
-
file_name = name or file
|
|
85
|
+
log_debug(f"Reading uploaded file asynchronously: {getattr(file, 'name', 'BytesIO')}")
|
|
86
|
+
file_name = name or getattr(file, "name", "text_file").split(".")[0]
|
|
86
87
|
file.seek(0)
|
|
87
88
|
file_contents = file.read().decode(self.encoding or "utf-8")
|
|
88
89
|
|
|
@@ -96,7 +97,7 @@ class TextReader(Reader):
|
|
|
96
97
|
return await self._async_chunk_document(document)
|
|
97
98
|
return [document]
|
|
98
99
|
except Exception as e:
|
|
99
|
-
|
|
100
|
+
log_error(f"Error reading asynchronously: {file}: {e}")
|
|
100
101
|
return []
|
|
101
102
|
|
|
102
103
|
async def _async_chunk_document(self, document: Document) -> List[Document]:
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
from agno.knowledge.reader.utils.spreadsheet import (
|
|
2
|
+
convert_xls_cell_value,
|
|
3
|
+
excel_rows_to_documents,
|
|
4
|
+
get_workbook_name,
|
|
5
|
+
infer_file_extension,
|
|
6
|
+
row_to_csv_line,
|
|
7
|
+
stringify_cell_value,
|
|
8
|
+
)
|
|
9
|
+
|
|
10
|
+
__all__ = [
|
|
11
|
+
"convert_xls_cell_value",
|
|
12
|
+
"excel_rows_to_documents",
|
|
13
|
+
"get_workbook_name",
|
|
14
|
+
"infer_file_extension",
|
|
15
|
+
"row_to_csv_line",
|
|
16
|
+
"stringify_cell_value",
|
|
17
|
+
]
|