agno 2.1.2__py3-none-any.whl → 2.3.13__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- agno/agent/agent.py +5540 -2273
- agno/api/api.py +2 -0
- agno/api/os.py +1 -1
- agno/compression/__init__.py +3 -0
- agno/compression/manager.py +247 -0
- agno/culture/__init__.py +3 -0
- agno/culture/manager.py +956 -0
- agno/db/async_postgres/__init__.py +3 -0
- agno/db/base.py +689 -6
- agno/db/dynamo/dynamo.py +933 -37
- agno/db/dynamo/schemas.py +174 -10
- agno/db/dynamo/utils.py +63 -4
- agno/db/firestore/firestore.py +831 -9
- agno/db/firestore/schemas.py +51 -0
- agno/db/firestore/utils.py +102 -4
- agno/db/gcs_json/gcs_json_db.py +660 -12
- agno/db/gcs_json/utils.py +60 -26
- agno/db/in_memory/in_memory_db.py +287 -14
- agno/db/in_memory/utils.py +60 -2
- agno/db/json/json_db.py +590 -14
- agno/db/json/utils.py +60 -26
- agno/db/migrations/manager.py +199 -0
- agno/db/migrations/v1_to_v2.py +43 -13
- agno/db/migrations/versions/__init__.py +0 -0
- agno/db/migrations/versions/v2_3_0.py +938 -0
- agno/db/mongo/__init__.py +15 -1
- agno/db/mongo/async_mongo.py +2760 -0
- agno/db/mongo/mongo.py +879 -11
- agno/db/mongo/schemas.py +42 -0
- agno/db/mongo/utils.py +80 -8
- agno/db/mysql/__init__.py +2 -1
- agno/db/mysql/async_mysql.py +2912 -0
- agno/db/mysql/mysql.py +946 -68
- agno/db/mysql/schemas.py +72 -10
- agno/db/mysql/utils.py +198 -7
- agno/db/postgres/__init__.py +2 -1
- agno/db/postgres/async_postgres.py +2579 -0
- agno/db/postgres/postgres.py +942 -57
- agno/db/postgres/schemas.py +81 -18
- agno/db/postgres/utils.py +164 -2
- agno/db/redis/redis.py +671 -7
- agno/db/redis/schemas.py +50 -0
- agno/db/redis/utils.py +65 -7
- agno/db/schemas/__init__.py +2 -1
- agno/db/schemas/culture.py +120 -0
- agno/db/schemas/evals.py +1 -0
- agno/db/schemas/memory.py +17 -2
- agno/db/singlestore/schemas.py +63 -0
- agno/db/singlestore/singlestore.py +949 -83
- agno/db/singlestore/utils.py +60 -2
- agno/db/sqlite/__init__.py +2 -1
- agno/db/sqlite/async_sqlite.py +2911 -0
- agno/db/sqlite/schemas.py +62 -0
- agno/db/sqlite/sqlite.py +965 -46
- agno/db/sqlite/utils.py +169 -8
- agno/db/surrealdb/__init__.py +3 -0
- agno/db/surrealdb/metrics.py +292 -0
- agno/db/surrealdb/models.py +334 -0
- agno/db/surrealdb/queries.py +71 -0
- agno/db/surrealdb/surrealdb.py +1908 -0
- agno/db/surrealdb/utils.py +147 -0
- agno/db/utils.py +2 -0
- agno/eval/__init__.py +10 -0
- agno/eval/accuracy.py +75 -55
- agno/eval/agent_as_judge.py +861 -0
- agno/eval/base.py +29 -0
- agno/eval/performance.py +16 -7
- agno/eval/reliability.py +28 -16
- agno/eval/utils.py +35 -17
- agno/exceptions.py +27 -2
- agno/filters.py +354 -0
- agno/guardrails/prompt_injection.py +1 -0
- agno/hooks/__init__.py +3 -0
- agno/hooks/decorator.py +164 -0
- agno/integrations/discord/client.py +1 -1
- agno/knowledge/chunking/agentic.py +13 -10
- agno/knowledge/chunking/fixed.py +4 -1
- agno/knowledge/chunking/semantic.py +9 -4
- agno/knowledge/chunking/strategy.py +59 -15
- agno/knowledge/embedder/fastembed.py +1 -1
- agno/knowledge/embedder/nebius.py +1 -1
- agno/knowledge/embedder/ollama.py +8 -0
- agno/knowledge/embedder/openai.py +8 -8
- agno/knowledge/embedder/sentence_transformer.py +6 -2
- agno/knowledge/embedder/vllm.py +262 -0
- agno/knowledge/knowledge.py +1618 -318
- agno/knowledge/reader/base.py +6 -2
- agno/knowledge/reader/csv_reader.py +8 -10
- agno/knowledge/reader/docx_reader.py +5 -6
- agno/knowledge/reader/field_labeled_csv_reader.py +16 -20
- agno/knowledge/reader/json_reader.py +5 -4
- agno/knowledge/reader/markdown_reader.py +8 -8
- agno/knowledge/reader/pdf_reader.py +17 -19
- agno/knowledge/reader/pptx_reader.py +101 -0
- agno/knowledge/reader/reader_factory.py +32 -3
- agno/knowledge/reader/s3_reader.py +3 -3
- agno/knowledge/reader/tavily_reader.py +193 -0
- agno/knowledge/reader/text_reader.py +22 -10
- agno/knowledge/reader/web_search_reader.py +1 -48
- agno/knowledge/reader/website_reader.py +10 -10
- agno/knowledge/reader/wikipedia_reader.py +33 -1
- agno/knowledge/types.py +1 -0
- agno/knowledge/utils.py +72 -7
- agno/media.py +22 -6
- agno/memory/__init__.py +14 -1
- agno/memory/manager.py +544 -83
- agno/memory/strategies/__init__.py +15 -0
- agno/memory/strategies/base.py +66 -0
- agno/memory/strategies/summarize.py +196 -0
- agno/memory/strategies/types.py +37 -0
- agno/models/aimlapi/aimlapi.py +17 -0
- agno/models/anthropic/claude.py +515 -40
- agno/models/aws/bedrock.py +102 -21
- agno/models/aws/claude.py +131 -274
- agno/models/azure/ai_foundry.py +41 -19
- agno/models/azure/openai_chat.py +39 -8
- agno/models/base.py +1249 -525
- agno/models/cerebras/cerebras.py +91 -21
- agno/models/cerebras/cerebras_openai.py +21 -2
- agno/models/cohere/chat.py +40 -6
- agno/models/cometapi/cometapi.py +18 -1
- agno/models/dashscope/dashscope.py +2 -3
- agno/models/deepinfra/deepinfra.py +18 -1
- agno/models/deepseek/deepseek.py +69 -3
- agno/models/fireworks/fireworks.py +18 -1
- agno/models/google/gemini.py +877 -80
- agno/models/google/utils.py +22 -0
- agno/models/groq/groq.py +51 -18
- agno/models/huggingface/huggingface.py +17 -6
- agno/models/ibm/watsonx.py +16 -6
- agno/models/internlm/internlm.py +18 -1
- agno/models/langdb/langdb.py +13 -1
- agno/models/litellm/chat.py +44 -9
- agno/models/litellm/litellm_openai.py +18 -1
- agno/models/message.py +28 -5
- agno/models/meta/llama.py +47 -14
- agno/models/meta/llama_openai.py +22 -17
- agno/models/mistral/mistral.py +8 -4
- agno/models/nebius/nebius.py +6 -7
- agno/models/nvidia/nvidia.py +20 -3
- agno/models/ollama/chat.py +24 -8
- agno/models/openai/chat.py +104 -29
- agno/models/openai/responses.py +101 -81
- agno/models/openrouter/openrouter.py +60 -3
- agno/models/perplexity/perplexity.py +17 -1
- agno/models/portkey/portkey.py +7 -6
- agno/models/requesty/requesty.py +24 -4
- agno/models/response.py +73 -2
- agno/models/sambanova/sambanova.py +20 -3
- agno/models/siliconflow/siliconflow.py +19 -2
- agno/models/together/together.py +20 -3
- agno/models/utils.py +254 -8
- agno/models/vercel/v0.py +20 -3
- agno/models/vertexai/__init__.py +0 -0
- agno/models/vertexai/claude.py +190 -0
- agno/models/vllm/vllm.py +19 -14
- agno/models/xai/xai.py +19 -2
- agno/os/app.py +549 -152
- agno/os/auth.py +190 -3
- agno/os/config.py +23 -0
- agno/os/interfaces/a2a/router.py +8 -11
- agno/os/interfaces/a2a/utils.py +1 -1
- agno/os/interfaces/agui/router.py +18 -3
- agno/os/interfaces/agui/utils.py +152 -39
- agno/os/interfaces/slack/router.py +55 -37
- agno/os/interfaces/slack/slack.py +9 -1
- agno/os/interfaces/whatsapp/router.py +0 -1
- agno/os/interfaces/whatsapp/security.py +3 -1
- agno/os/mcp.py +110 -52
- agno/os/middleware/__init__.py +2 -0
- agno/os/middleware/jwt.py +676 -112
- agno/os/router.py +40 -1478
- agno/os/routers/agents/__init__.py +3 -0
- agno/os/routers/agents/router.py +599 -0
- agno/os/routers/agents/schema.py +261 -0
- agno/os/routers/evals/evals.py +96 -39
- agno/os/routers/evals/schemas.py +65 -33
- agno/os/routers/evals/utils.py +80 -10
- agno/os/routers/health.py +10 -4
- agno/os/routers/knowledge/knowledge.py +196 -38
- agno/os/routers/knowledge/schemas.py +82 -22
- agno/os/routers/memory/memory.py +279 -52
- agno/os/routers/memory/schemas.py +46 -17
- agno/os/routers/metrics/metrics.py +20 -8
- agno/os/routers/metrics/schemas.py +16 -16
- agno/os/routers/session/session.py +462 -34
- agno/os/routers/teams/__init__.py +3 -0
- agno/os/routers/teams/router.py +512 -0
- agno/os/routers/teams/schema.py +257 -0
- agno/os/routers/traces/__init__.py +3 -0
- agno/os/routers/traces/schemas.py +414 -0
- agno/os/routers/traces/traces.py +499 -0
- agno/os/routers/workflows/__init__.py +3 -0
- agno/os/routers/workflows/router.py +624 -0
- agno/os/routers/workflows/schema.py +75 -0
- agno/os/schema.py +256 -693
- agno/os/scopes.py +469 -0
- agno/os/utils.py +514 -36
- agno/reasoning/anthropic.py +80 -0
- agno/reasoning/gemini.py +73 -0
- agno/reasoning/openai.py +5 -0
- agno/reasoning/vertexai.py +76 -0
- agno/run/__init__.py +6 -0
- agno/run/agent.py +155 -32
- agno/run/base.py +55 -3
- agno/run/requirement.py +181 -0
- agno/run/team.py +125 -38
- agno/run/workflow.py +72 -18
- agno/session/agent.py +102 -89
- agno/session/summary.py +56 -15
- agno/session/team.py +164 -90
- agno/session/workflow.py +405 -40
- agno/table.py +10 -0
- agno/team/team.py +3974 -1903
- agno/tools/dalle.py +2 -4
- agno/tools/eleven_labs.py +23 -25
- agno/tools/exa.py +21 -16
- agno/tools/file.py +153 -23
- agno/tools/file_generation.py +16 -10
- agno/tools/firecrawl.py +15 -7
- agno/tools/function.py +193 -38
- agno/tools/gmail.py +238 -14
- agno/tools/google_drive.py +271 -0
- agno/tools/googlecalendar.py +36 -8
- agno/tools/googlesheets.py +20 -5
- agno/tools/jira.py +20 -0
- agno/tools/mcp/__init__.py +10 -0
- agno/tools/mcp/mcp.py +331 -0
- agno/tools/mcp/multi_mcp.py +347 -0
- agno/tools/mcp/params.py +24 -0
- agno/tools/mcp_toolbox.py +3 -3
- agno/tools/models/nebius.py +5 -5
- agno/tools/models_labs.py +20 -10
- agno/tools/nano_banana.py +151 -0
- agno/tools/notion.py +204 -0
- agno/tools/parallel.py +314 -0
- agno/tools/postgres.py +76 -36
- agno/tools/redshift.py +406 -0
- agno/tools/scrapegraph.py +1 -1
- agno/tools/shopify.py +1519 -0
- agno/tools/slack.py +18 -3
- agno/tools/spotify.py +919 -0
- agno/tools/tavily.py +146 -0
- agno/tools/toolkit.py +25 -0
- agno/tools/workflow.py +8 -1
- agno/tools/yfinance.py +12 -11
- agno/tracing/__init__.py +12 -0
- agno/tracing/exporter.py +157 -0
- agno/tracing/schemas.py +276 -0
- agno/tracing/setup.py +111 -0
- agno/utils/agent.py +938 -0
- agno/utils/cryptography.py +22 -0
- agno/utils/dttm.py +33 -0
- agno/utils/events.py +151 -3
- agno/utils/gemini.py +15 -5
- agno/utils/hooks.py +118 -4
- agno/utils/http.py +113 -2
- agno/utils/knowledge.py +12 -5
- agno/utils/log.py +1 -0
- agno/utils/mcp.py +92 -2
- agno/utils/media.py +187 -1
- agno/utils/merge_dict.py +3 -3
- agno/utils/message.py +60 -0
- agno/utils/models/ai_foundry.py +9 -2
- agno/utils/models/claude.py +49 -14
- agno/utils/models/cohere.py +9 -2
- agno/utils/models/llama.py +9 -2
- agno/utils/models/mistral.py +4 -2
- agno/utils/print_response/agent.py +109 -16
- agno/utils/print_response/team.py +223 -30
- agno/utils/print_response/workflow.py +251 -34
- agno/utils/streamlit.py +1 -1
- agno/utils/team.py +98 -9
- agno/utils/tokens.py +657 -0
- agno/vectordb/base.py +39 -7
- agno/vectordb/cassandra/cassandra.py +21 -5
- agno/vectordb/chroma/chromadb.py +43 -12
- agno/vectordb/clickhouse/clickhousedb.py +21 -5
- agno/vectordb/couchbase/couchbase.py +29 -5
- agno/vectordb/lancedb/lance_db.py +92 -181
- agno/vectordb/langchaindb/langchaindb.py +24 -4
- agno/vectordb/lightrag/lightrag.py +17 -3
- agno/vectordb/llamaindex/llamaindexdb.py +25 -5
- agno/vectordb/milvus/milvus.py +50 -37
- agno/vectordb/mongodb/__init__.py +7 -1
- agno/vectordb/mongodb/mongodb.py +36 -30
- agno/vectordb/pgvector/pgvector.py +201 -77
- agno/vectordb/pineconedb/pineconedb.py +41 -23
- agno/vectordb/qdrant/qdrant.py +67 -54
- agno/vectordb/redis/__init__.py +9 -0
- agno/vectordb/redis/redisdb.py +682 -0
- agno/vectordb/singlestore/singlestore.py +50 -29
- agno/vectordb/surrealdb/surrealdb.py +31 -41
- agno/vectordb/upstashdb/upstashdb.py +34 -6
- agno/vectordb/weaviate/weaviate.py +53 -14
- agno/workflow/__init__.py +2 -0
- agno/workflow/agent.py +299 -0
- agno/workflow/condition.py +120 -18
- agno/workflow/loop.py +77 -10
- agno/workflow/parallel.py +231 -143
- agno/workflow/router.py +118 -17
- agno/workflow/step.py +609 -170
- agno/workflow/steps.py +73 -6
- agno/workflow/types.py +96 -21
- agno/workflow/workflow.py +2039 -262
- {agno-2.1.2.dist-info → agno-2.3.13.dist-info}/METADATA +201 -66
- agno-2.3.13.dist-info/RECORD +613 -0
- agno/tools/googlesearch.py +0 -98
- agno/tools/mcp.py +0 -679
- agno/tools/memori.py +0 -339
- agno-2.1.2.dist-info/RECORD +0 -543
- {agno-2.1.2.dist-info → agno-2.3.13.dist-info}/WHEEL +0 -0
- {agno-2.1.2.dist-info → agno-2.3.13.dist-info}/licenses/LICENSE +0 -0
- {agno-2.1.2.dist-info → agno-2.3.13.dist-info}/top_level.txt +0 -0
agno/knowledge/reader/base.py
CHANGED
|
@@ -44,11 +44,15 @@ class Reader:
|
|
|
44
44
|
self.max_results = max_results
|
|
45
45
|
self.encoding = encoding
|
|
46
46
|
|
|
47
|
-
def set_chunking_strategy_from_string(
|
|
47
|
+
def set_chunking_strategy_from_string(
|
|
48
|
+
self, strategy_name: str, chunk_size: Optional[int] = None, overlap: Optional[int] = None, **kwargs
|
|
49
|
+
) -> None:
|
|
48
50
|
"""Set the chunking strategy from a string name."""
|
|
49
51
|
try:
|
|
50
52
|
strategy_type = ChunkingStrategyType.from_string(strategy_name)
|
|
51
|
-
self.chunking_strategy = ChunkingStrategyFactory.create_strategy(
|
|
53
|
+
self.chunking_strategy = ChunkingStrategyFactory.create_strategy(
|
|
54
|
+
strategy_type, chunk_size=chunk_size, overlap=overlap, **kwargs
|
|
55
|
+
)
|
|
52
56
|
except ValueError as e:
|
|
53
57
|
raise ValueError(f"Failed to set chunking strategy: {e}")
|
|
54
58
|
|
|
@@ -15,7 +15,7 @@ from agno.knowledge.chunking.strategy import ChunkingStrategy, ChunkingStrategyT
|
|
|
15
15
|
from agno.knowledge.document.base import Document
|
|
16
16
|
from agno.knowledge.reader.base import Reader
|
|
17
17
|
from agno.knowledge.types import ContentType
|
|
18
|
-
from agno.utils.log import
|
|
18
|
+
from agno.utils.log import log_debug, log_error
|
|
19
19
|
|
|
20
20
|
|
|
21
21
|
class CSVReader(Reader):
|
|
@@ -46,10 +46,10 @@ class CSVReader(Reader):
|
|
|
46
46
|
if isinstance(file, Path):
|
|
47
47
|
if not file.exists():
|
|
48
48
|
raise FileNotFoundError(f"Could not find file: {file}")
|
|
49
|
-
|
|
49
|
+
log_debug(f"Reading: {file}")
|
|
50
50
|
file_content = file.open(newline="", mode="r", encoding=self.encoding or "utf-8")
|
|
51
51
|
else:
|
|
52
|
-
|
|
52
|
+
log_debug(f"Reading retrieved file: {name or file.name}")
|
|
53
53
|
file.seek(0)
|
|
54
54
|
file_content = io.StringIO(file.read().decode("utf-8")) # type: ignore
|
|
55
55
|
|
|
@@ -78,7 +78,7 @@ class CSVReader(Reader):
|
|
|
78
78
|
return chunked_documents
|
|
79
79
|
return documents
|
|
80
80
|
except Exception as e:
|
|
81
|
-
|
|
81
|
+
log_error(f"Error reading: {getattr(file, 'name', str(file)) if isinstance(file, IO) else file}: {e}")
|
|
82
82
|
return []
|
|
83
83
|
|
|
84
84
|
async def async_read(
|
|
@@ -105,14 +105,14 @@ class CSVReader(Reader):
|
|
|
105
105
|
if isinstance(file, Path):
|
|
106
106
|
if not file.exists():
|
|
107
107
|
raise FileNotFoundError(f"Could not find file: {file}")
|
|
108
|
-
|
|
108
|
+
log_debug(f"Reading async: {file}")
|
|
109
109
|
async with aiofiles.open(file, mode="r", encoding="utf-8", newline="") as file_content:
|
|
110
110
|
content = await file_content.read()
|
|
111
111
|
file_content_io = io.StringIO(content)
|
|
112
112
|
else:
|
|
113
|
-
|
|
113
|
+
log_debug(f"Reading retrieved file async: {getattr(file, 'name', 'BytesIO')}")
|
|
114
114
|
file.seek(0)
|
|
115
|
-
file_content_io = io.StringIO(file.read().decode("utf-8"))
|
|
115
|
+
file_content_io = io.StringIO(file.read().decode("utf-8"))
|
|
116
116
|
|
|
117
117
|
csv_name = name or (
|
|
118
118
|
Path(file.name).stem
|
|
@@ -160,7 +160,5 @@ class CSVReader(Reader):
|
|
|
160
160
|
|
|
161
161
|
return documents
|
|
162
162
|
except Exception as e:
|
|
163
|
-
|
|
164
|
-
f"Error reading async: {getattr(file, 'name', str(file)) if isinstance(file, IO) else file}: {e}"
|
|
165
|
-
)
|
|
163
|
+
log_error(f"Error reading async: {getattr(file, 'name', str(file)) if isinstance(file, IO) else file}: {e}")
|
|
166
164
|
return []
|
|
@@ -8,7 +8,7 @@ from agno.knowledge.chunking.strategy import ChunkingStrategy, ChunkingStrategyT
|
|
|
8
8
|
from agno.knowledge.document.base import Document
|
|
9
9
|
from agno.knowledge.reader.base import Reader
|
|
10
10
|
from agno.knowledge.types import ContentType
|
|
11
|
-
from agno.utils.log import
|
|
11
|
+
from agno.utils.log import log_debug, log_error
|
|
12
12
|
|
|
13
13
|
try:
|
|
14
14
|
from docx import Document as DocxDocument # type: ignore
|
|
@@ -43,11 +43,11 @@ class DocxReader(Reader):
|
|
|
43
43
|
if isinstance(file, Path):
|
|
44
44
|
if not file.exists():
|
|
45
45
|
raise FileNotFoundError(f"Could not find file: {file}")
|
|
46
|
-
|
|
46
|
+
log_debug(f"Reading: {file}")
|
|
47
47
|
docx_document = DocxDocument(str(file))
|
|
48
48
|
doc_name = name or file.stem
|
|
49
49
|
else:
|
|
50
|
-
|
|
50
|
+
log_debug(f"Reading uploaded file: {getattr(file, 'name', 'docx_file')}")
|
|
51
51
|
docx_document = DocxDocument(file)
|
|
52
52
|
doc_name = name or (
|
|
53
53
|
getattr(file, "name", "docx_file").split(".")[0] if hasattr(file, "name") else "docx_file"
|
|
@@ -62,7 +62,6 @@ class DocxReader(Reader):
|
|
|
62
62
|
content=doc_content,
|
|
63
63
|
)
|
|
64
64
|
]
|
|
65
|
-
|
|
66
65
|
if self.chunk:
|
|
67
66
|
chunked_documents = []
|
|
68
67
|
for document in documents:
|
|
@@ -71,7 +70,7 @@ class DocxReader(Reader):
|
|
|
71
70
|
return documents
|
|
72
71
|
|
|
73
72
|
except Exception as e:
|
|
74
|
-
|
|
73
|
+
log_error(f"Error reading file: {e}")
|
|
75
74
|
return []
|
|
76
75
|
|
|
77
76
|
async def async_read(self, file: Union[Path, IO[Any]], name: Optional[str] = None) -> List[Document]:
|
|
@@ -79,5 +78,5 @@ class DocxReader(Reader):
|
|
|
79
78
|
try:
|
|
80
79
|
return await asyncio.to_thread(self.read, file, name)
|
|
81
80
|
except Exception as e:
|
|
82
|
-
|
|
81
|
+
log_error(f"Error reading file asynchronously: {e}")
|
|
83
82
|
return []
|
|
@@ -13,7 +13,7 @@ from agno.knowledge.chunking.strategy import ChunkingStrategyType
|
|
|
13
13
|
from agno.knowledge.document.base import Document
|
|
14
14
|
from agno.knowledge.reader.base import Reader
|
|
15
15
|
from agno.knowledge.types import ContentType
|
|
16
|
-
from agno.utils.log import
|
|
16
|
+
from agno.utils.log import log_debug, log_error, log_warning
|
|
17
17
|
|
|
18
18
|
|
|
19
19
|
class FieldLabeledCSVReader(Reader):
|
|
@@ -33,8 +33,6 @@ class FieldLabeledCSVReader(Reader):
|
|
|
33
33
|
self.format_headers = format_headers
|
|
34
34
|
self.skip_empty_fields = skip_empty_fields
|
|
35
35
|
|
|
36
|
-
logger.info(f"FieldLabeledCSVReader initialized - chunk_title: {chunk_title}, field_names: {self.field_names}")
|
|
37
|
-
|
|
38
36
|
@classmethod
|
|
39
37
|
def get_supported_chunking_strategies(cls) -> List[ChunkingStrategyType]:
|
|
40
38
|
"""Chunking is not supported - each row is already a logical document unit."""
|
|
@@ -107,10 +105,10 @@ class FieldLabeledCSVReader(Reader):
|
|
|
107
105
|
if isinstance(file, Path):
|
|
108
106
|
if not file.exists():
|
|
109
107
|
raise FileNotFoundError(f"Could not find file: {file}")
|
|
110
|
-
|
|
108
|
+
log_debug(f"Reading: {file}")
|
|
111
109
|
file_content = file.open(newline="", mode="r", encoding=self.encoding or "utf-8")
|
|
112
110
|
else:
|
|
113
|
-
|
|
111
|
+
log_debug(f"Reading retrieved file: {name or file.name}")
|
|
114
112
|
file.seek(0)
|
|
115
113
|
file_content = io.StringIO(file.read().decode("utf-8")) # type: ignore
|
|
116
114
|
|
|
@@ -129,15 +127,15 @@ class FieldLabeledCSVReader(Reader):
|
|
|
129
127
|
rows = list(csv_reader)
|
|
130
128
|
|
|
131
129
|
if not rows:
|
|
132
|
-
|
|
130
|
+
log_warning("CSV file is empty")
|
|
133
131
|
return []
|
|
134
132
|
|
|
135
133
|
# First row is headers
|
|
136
134
|
headers = [header.strip() for header in rows[0]]
|
|
137
|
-
|
|
135
|
+
log_debug(f"Found {len(headers)} headers: {headers}")
|
|
138
136
|
|
|
139
137
|
data_rows = rows[1:] if len(rows) > 1 else []
|
|
140
|
-
|
|
138
|
+
log_debug(f"Processing {len(data_rows)} data rows")
|
|
141
139
|
|
|
142
140
|
for row_index, row in enumerate(data_rows):
|
|
143
141
|
# Ensure row has same length as headers (pad or truncate)
|
|
@@ -165,13 +163,13 @@ class FieldLabeledCSVReader(Reader):
|
|
|
165
163
|
)
|
|
166
164
|
|
|
167
165
|
documents.append(document)
|
|
168
|
-
|
|
166
|
+
log_debug(f"Created document for row {row_index + 1}: {len(labeled_text)} chars")
|
|
169
167
|
|
|
170
|
-
|
|
168
|
+
log_debug(f"Successfully created {len(documents)} labeled documents from CSV")
|
|
171
169
|
return documents
|
|
172
170
|
|
|
173
171
|
except Exception as e:
|
|
174
|
-
|
|
172
|
+
log_error(f"Error reading: {getattr(file, 'name', str(file)) if isinstance(file, IO) else file}: {e}")
|
|
175
173
|
return []
|
|
176
174
|
|
|
177
175
|
async def async_read(
|
|
@@ -187,12 +185,12 @@ class FieldLabeledCSVReader(Reader):
|
|
|
187
185
|
if isinstance(file, Path):
|
|
188
186
|
if not file.exists():
|
|
189
187
|
raise FileNotFoundError(f"Could not find file: {file}")
|
|
190
|
-
|
|
188
|
+
log_debug(f"Reading async: {file}")
|
|
191
189
|
async with aiofiles.open(file, mode="r", encoding=self.encoding or "utf-8", newline="") as file_content:
|
|
192
190
|
content = await file_content.read()
|
|
193
191
|
file_content_io = io.StringIO(content)
|
|
194
192
|
else:
|
|
195
|
-
|
|
193
|
+
log_debug(f"Reading retrieved file async: {name or file.name}")
|
|
196
194
|
file.seek(0)
|
|
197
195
|
file_content_io = io.StringIO(file.read().decode("utf-8")) # type: ignore
|
|
198
196
|
|
|
@@ -207,17 +205,17 @@ class FieldLabeledCSVReader(Reader):
|
|
|
207
205
|
rows = list(csv_reader)
|
|
208
206
|
|
|
209
207
|
if not rows:
|
|
210
|
-
|
|
208
|
+
log_warning("CSV file is empty")
|
|
211
209
|
return []
|
|
212
210
|
|
|
213
211
|
# First row is headers
|
|
214
212
|
headers = [header.strip() for header in rows[0]]
|
|
215
|
-
|
|
213
|
+
log_debug(f"Found {len(headers)} headers: {headers}")
|
|
216
214
|
|
|
217
215
|
# Process data rows
|
|
218
216
|
data_rows = rows[1:] if len(rows) > 1 else []
|
|
219
217
|
total_rows = len(data_rows)
|
|
220
|
-
|
|
218
|
+
log_debug(f"Processing {total_rows} data rows")
|
|
221
219
|
|
|
222
220
|
# For small files, process all at once
|
|
223
221
|
if total_rows <= 10:
|
|
@@ -284,11 +282,9 @@ class FieldLabeledCSVReader(Reader):
|
|
|
284
282
|
|
|
285
283
|
documents = [doc for page_docs in page_results for doc in page_docs]
|
|
286
284
|
|
|
287
|
-
|
|
285
|
+
log_debug(f"Successfully created {len(documents)} labeled documents from CSV")
|
|
288
286
|
return documents
|
|
289
287
|
|
|
290
288
|
except Exception as e:
|
|
291
|
-
|
|
292
|
-
f"Error reading async: {getattr(file, 'name', str(file)) if isinstance(file, IO) else file}: {e}"
|
|
293
|
-
)
|
|
289
|
+
log_error(f"Error reading async: {getattr(file, 'name', str(file)) if isinstance(file, IO) else file}: {e}")
|
|
294
290
|
return []
|
|
@@ -10,7 +10,7 @@ from agno.knowledge.chunking.strategy import ChunkingStrategy, ChunkingStrategyT
|
|
|
10
10
|
from agno.knowledge.document.base import Document
|
|
11
11
|
from agno.knowledge.reader.base import Reader
|
|
12
12
|
from agno.knowledge.types import ContentType
|
|
13
|
-
from agno.utils.log import
|
|
13
|
+
from agno.utils.log import log_debug, log_error
|
|
14
14
|
|
|
15
15
|
|
|
16
16
|
class JSONReader(Reader):
|
|
@@ -41,13 +41,13 @@ class JSONReader(Reader):
|
|
|
41
41
|
if isinstance(path, Path):
|
|
42
42
|
if not path.exists():
|
|
43
43
|
raise FileNotFoundError(f"Could not find file: {path}")
|
|
44
|
-
|
|
44
|
+
log_debug(f"Reading: {path}")
|
|
45
45
|
json_name = name or path.name.split(".")[0]
|
|
46
46
|
json_contents = json.loads(path.read_text(self.encoding or "utf-8"))
|
|
47
47
|
|
|
48
48
|
elif isinstance(path, BytesIO):
|
|
49
49
|
json_name = name or path.name.split(".")[0]
|
|
50
|
-
|
|
50
|
+
log_debug(f"Reading uploaded file: {json_name}")
|
|
51
51
|
path.seek(0)
|
|
52
52
|
json_contents = json.load(path)
|
|
53
53
|
|
|
@@ -72,7 +72,8 @@ class JSONReader(Reader):
|
|
|
72
72
|
chunked_documents.extend(self.chunk_document(document))
|
|
73
73
|
return chunked_documents
|
|
74
74
|
return documents
|
|
75
|
-
except Exception:
|
|
75
|
+
except Exception as e:
|
|
76
|
+
log_error(f"Error reading: {path}: {e}")
|
|
76
77
|
raise
|
|
77
78
|
|
|
78
79
|
async def async_read(self, path: Union[Path, IO[Any]], name: Optional[str] = None) -> List[Document]:
|
|
@@ -7,7 +7,7 @@ from agno.knowledge.chunking.strategy import ChunkingStrategy, ChunkingStrategyT
|
|
|
7
7
|
from agno.knowledge.document.base import Document
|
|
8
8
|
from agno.knowledge.reader.base import Reader
|
|
9
9
|
from agno.knowledge.types import ContentType
|
|
10
|
-
from agno.utils.log import
|
|
10
|
+
from agno.utils.log import log_debug, log_error, log_warning
|
|
11
11
|
|
|
12
12
|
DEFAULT_CHUNKER_STRATEGY: ChunkingStrategy
|
|
13
13
|
|
|
@@ -65,11 +65,11 @@ class MarkdownReader(Reader):
|
|
|
65
65
|
if isinstance(file, Path):
|
|
66
66
|
if not file.exists():
|
|
67
67
|
raise FileNotFoundError(f"Could not find file: {file}")
|
|
68
|
-
|
|
68
|
+
log_debug(f"Reading: {file}")
|
|
69
69
|
file_name = name or file.stem
|
|
70
70
|
file_contents = file.read_text(encoding=self.encoding or "utf-8")
|
|
71
71
|
else:
|
|
72
|
-
|
|
72
|
+
log_debug(f"Reading uploaded file: {file.name}")
|
|
73
73
|
file_name = name or file.name.split(".")[0]
|
|
74
74
|
file.seek(0)
|
|
75
75
|
file_contents = file.read().decode(self.encoding or "utf-8")
|
|
@@ -82,7 +82,7 @@ class MarkdownReader(Reader):
|
|
|
82
82
|
return chunked_documents
|
|
83
83
|
return documents
|
|
84
84
|
except Exception as e:
|
|
85
|
-
|
|
85
|
+
log_error(f"Error reading: {file}: {e}")
|
|
86
86
|
return []
|
|
87
87
|
|
|
88
88
|
async def async_read(self, file: Union[Path, IO[Any]], name: Optional[str] = None) -> List[Document]:
|
|
@@ -91,7 +91,7 @@ class MarkdownReader(Reader):
|
|
|
91
91
|
if not file.exists():
|
|
92
92
|
raise FileNotFoundError(f"Could not find file: {file}")
|
|
93
93
|
|
|
94
|
-
|
|
94
|
+
log_debug(f"Reading asynchronously: {file}")
|
|
95
95
|
file_name = name or file.stem
|
|
96
96
|
|
|
97
97
|
try:
|
|
@@ -100,10 +100,10 @@ class MarkdownReader(Reader):
|
|
|
100
100
|
async with aiofiles.open(file, "r", encoding=self.encoding or "utf-8") as f:
|
|
101
101
|
file_contents = await f.read()
|
|
102
102
|
except ImportError:
|
|
103
|
-
|
|
103
|
+
log_warning("aiofiles not installed, using synchronous file I/O")
|
|
104
104
|
file_contents = file.read_text(self.encoding or "utf-8")
|
|
105
105
|
else:
|
|
106
|
-
|
|
106
|
+
log_debug(f"Reading uploaded file asynchronously: {file.name}")
|
|
107
107
|
file_name = name or file.name.split(".")[0]
|
|
108
108
|
file.seek(0)
|
|
109
109
|
file_contents = file.read().decode(self.encoding or "utf-8")
|
|
@@ -118,7 +118,7 @@ class MarkdownReader(Reader):
|
|
|
118
118
|
return await self._async_chunk_document(document)
|
|
119
119
|
return [document]
|
|
120
120
|
except Exception as e:
|
|
121
|
-
|
|
121
|
+
log_error(f"Error reading asynchronously: {file}: {e}")
|
|
122
122
|
return []
|
|
123
123
|
|
|
124
124
|
async def _async_chunk_document(self, document: Document) -> List[Document]:
|
|
@@ -4,11 +4,12 @@ from pathlib import Path
|
|
|
4
4
|
from typing import IO, Any, List, Optional, Tuple, Union
|
|
5
5
|
from uuid import uuid4
|
|
6
6
|
|
|
7
|
-
from agno.knowledge.chunking.
|
|
7
|
+
from agno.knowledge.chunking.document import DocumentChunking
|
|
8
|
+
from agno.knowledge.chunking.strategy import ChunkingStrategy, ChunkingStrategyType
|
|
8
9
|
from agno.knowledge.document.base import Document
|
|
9
10
|
from agno.knowledge.reader.base import Reader
|
|
10
11
|
from agno.knowledge.types import ContentType
|
|
11
|
-
from agno.utils.log import
|
|
12
|
+
from agno.utils.log import log_debug, log_error
|
|
12
13
|
|
|
13
14
|
try:
|
|
14
15
|
from pypdf import PdfReader as DocumentReader # noqa: F401
|
|
@@ -183,6 +184,7 @@ class BasePDFReader(Reader):
|
|
|
183
184
|
page_start_numbering_format: Optional[str] = None,
|
|
184
185
|
page_end_numbering_format: Optional[str] = None,
|
|
185
186
|
password: Optional[str] = None,
|
|
187
|
+
chunking_strategy: Optional[ChunkingStrategy] = DocumentChunking(chunk_size=5000),
|
|
186
188
|
**kwargs,
|
|
187
189
|
):
|
|
188
190
|
if page_start_numbering_format is None:
|
|
@@ -195,11 +197,7 @@ class BasePDFReader(Reader):
|
|
|
195
197
|
self.page_end_numbering_format = page_end_numbering_format
|
|
196
198
|
self.password = password
|
|
197
199
|
|
|
198
|
-
|
|
199
|
-
from agno.knowledge.chunking.document import DocumentChunking
|
|
200
|
-
|
|
201
|
-
self.chunking_strategy = DocumentChunking(chunk_size=5000)
|
|
202
|
-
super().__init__(**kwargs)
|
|
200
|
+
super().__init__(chunking_strategy=chunking_strategy, **kwargs)
|
|
203
201
|
|
|
204
202
|
@classmethod
|
|
205
203
|
def get_supported_chunking_strategies(self) -> List[ChunkingStrategyType]:
|
|
@@ -238,13 +236,13 @@ class BasePDFReader(Reader):
|
|
|
238
236
|
# Use provided password or fall back to instance password
|
|
239
237
|
pdf_password = password or self.password
|
|
240
238
|
if not pdf_password:
|
|
241
|
-
|
|
239
|
+
log_error(f'PDF file "{doc_name}" is password protected but no password provided')
|
|
242
240
|
return False
|
|
243
241
|
|
|
244
242
|
try:
|
|
245
243
|
decrypted_pdf = doc_reader.decrypt(pdf_password)
|
|
246
244
|
if decrypted_pdf:
|
|
247
|
-
|
|
245
|
+
log_debug(f'Successfully decrypted PDF file "{doc_name}" with user password')
|
|
248
246
|
return True
|
|
249
247
|
else:
|
|
250
248
|
log_error(f'Failed to decrypt PDF file "{doc_name}": incorrect password')
|
|
@@ -346,12 +344,12 @@ class PDFReader(BasePDFReader):
|
|
|
346
344
|
self, pdf: Union[str, Path, IO[Any]], name: Optional[str] = None, password: Optional[str] = None
|
|
347
345
|
) -> List[Document]:
|
|
348
346
|
doc_name = self._get_doc_name(pdf, name)
|
|
349
|
-
|
|
347
|
+
log_debug(f"Reading: {doc_name}")
|
|
350
348
|
|
|
351
349
|
try:
|
|
352
350
|
pdf_reader = DocumentReader(pdf)
|
|
353
351
|
except PdfStreamError as e:
|
|
354
|
-
|
|
352
|
+
log_error(f"Error reading PDF: {e}")
|
|
355
353
|
return []
|
|
356
354
|
# Handle PDF decryption
|
|
357
355
|
if not self._decrypt_pdf(pdf_reader, doc_name, password):
|
|
@@ -370,12 +368,12 @@ class PDFReader(BasePDFReader):
|
|
|
370
368
|
log_error("No pdf provided")
|
|
371
369
|
return []
|
|
372
370
|
doc_name = self._get_doc_name(pdf, name)
|
|
373
|
-
|
|
371
|
+
log_debug(f"Reading: {doc_name}")
|
|
374
372
|
|
|
375
373
|
try:
|
|
376
374
|
pdf_reader = DocumentReader(pdf)
|
|
377
375
|
except PdfStreamError as e:
|
|
378
|
-
|
|
376
|
+
log_error(f"Error reading PDF: {e}")
|
|
379
377
|
return []
|
|
380
378
|
|
|
381
379
|
# Handle PDF decryption
|
|
@@ -396,11 +394,11 @@ class PDFImageReader(BasePDFReader):
|
|
|
396
394
|
raise ValueError("No pdf provided")
|
|
397
395
|
|
|
398
396
|
doc_name = self._get_doc_name(pdf, name)
|
|
399
|
-
|
|
397
|
+
log_debug(f"Reading: {doc_name}")
|
|
400
398
|
try:
|
|
401
399
|
pdf_reader = DocumentReader(pdf)
|
|
402
400
|
except PdfStreamError as e:
|
|
403
|
-
|
|
401
|
+
log_error(f"Error reading PDF: {e}")
|
|
404
402
|
return []
|
|
405
403
|
|
|
406
404
|
# Handle PDF decryption
|
|
@@ -408,7 +406,7 @@ class PDFImageReader(BasePDFReader):
|
|
|
408
406
|
return []
|
|
409
407
|
|
|
410
408
|
# Read and chunk.
|
|
411
|
-
return self._pdf_reader_to_documents(pdf_reader, doc_name, read_images=True, use_uuid_for_id=
|
|
409
|
+
return self._pdf_reader_to_documents(pdf_reader, doc_name, read_images=True, use_uuid_for_id=True)
|
|
412
410
|
|
|
413
411
|
async def async_read(
|
|
414
412
|
self, pdf: Union[str, Path, IO[Any]], name: Optional[str] = None, password: Optional[str] = None
|
|
@@ -417,12 +415,12 @@ class PDFImageReader(BasePDFReader):
|
|
|
417
415
|
raise ValueError("No pdf provided")
|
|
418
416
|
|
|
419
417
|
doc_name = self._get_doc_name(pdf, name)
|
|
420
|
-
|
|
418
|
+
log_debug(f"Reading: {doc_name}")
|
|
421
419
|
|
|
422
420
|
try:
|
|
423
421
|
pdf_reader = DocumentReader(pdf)
|
|
424
422
|
except PdfStreamError as e:
|
|
425
|
-
|
|
423
|
+
log_error(f"Error reading PDF: {e}")
|
|
426
424
|
return []
|
|
427
425
|
|
|
428
426
|
# Handle PDF decryption
|
|
@@ -430,4 +428,4 @@ class PDFImageReader(BasePDFReader):
|
|
|
430
428
|
return []
|
|
431
429
|
|
|
432
430
|
# Read and chunk.
|
|
433
|
-
return await self._async_pdf_reader_to_documents(pdf_reader, doc_name, read_images=True, use_uuid_for_id=
|
|
431
|
+
return await self._async_pdf_reader_to_documents(pdf_reader, doc_name, read_images=True, use_uuid_for_id=True)
|
|
@@ -0,0 +1,101 @@
|
|
|
1
|
+
import asyncio
|
|
2
|
+
from pathlib import Path
|
|
3
|
+
from typing import IO, Any, List, Optional, Union
|
|
4
|
+
from uuid import uuid4
|
|
5
|
+
|
|
6
|
+
from agno.knowledge.chunking.document import DocumentChunking
|
|
7
|
+
from agno.knowledge.chunking.strategy import ChunkingStrategy, ChunkingStrategyType
|
|
8
|
+
from agno.knowledge.document.base import Document
|
|
9
|
+
from agno.knowledge.reader.base import Reader
|
|
10
|
+
from agno.knowledge.types import ContentType
|
|
11
|
+
from agno.utils.log import log_debug, log_error
|
|
12
|
+
|
|
13
|
+
try:
|
|
14
|
+
from pptx import Presentation # type: ignore
|
|
15
|
+
except ImportError:
|
|
16
|
+
raise ImportError("The `python-pptx` package is not installed. Please install it via `pip install python-pptx`.")
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
class PPTXReader(Reader):
|
|
20
|
+
"""Reader for PPTX files"""
|
|
21
|
+
|
|
22
|
+
def __init__(self, chunking_strategy: Optional[ChunkingStrategy] = DocumentChunking(), **kwargs):
|
|
23
|
+
super().__init__(chunking_strategy=chunking_strategy, **kwargs)
|
|
24
|
+
|
|
25
|
+
@classmethod
|
|
26
|
+
def get_supported_chunking_strategies(self) -> List[ChunkingStrategyType]:
|
|
27
|
+
"""Get the list of supported chunking strategies for PPTX readers."""
|
|
28
|
+
return [
|
|
29
|
+
ChunkingStrategyType.DOCUMENT_CHUNKER,
|
|
30
|
+
ChunkingStrategyType.FIXED_SIZE_CHUNKER,
|
|
31
|
+
ChunkingStrategyType.SEMANTIC_CHUNKER,
|
|
32
|
+
ChunkingStrategyType.AGENTIC_CHUNKER,
|
|
33
|
+
ChunkingStrategyType.RECURSIVE_CHUNKER,
|
|
34
|
+
]
|
|
35
|
+
|
|
36
|
+
@classmethod
|
|
37
|
+
def get_supported_content_types(self) -> List[ContentType]:
|
|
38
|
+
return [ContentType.PPTX]
|
|
39
|
+
|
|
40
|
+
def read(self, file: Union[Path, IO[Any]], name: Optional[str] = None) -> List[Document]:
|
|
41
|
+
"""Read a pptx file and return a list of documents"""
|
|
42
|
+
try:
|
|
43
|
+
if isinstance(file, Path):
|
|
44
|
+
if not file.exists():
|
|
45
|
+
raise FileNotFoundError(f"Could not find file: {file}")
|
|
46
|
+
log_debug(f"Reading: {file}")
|
|
47
|
+
presentation = Presentation(str(file))
|
|
48
|
+
doc_name = name or file.stem
|
|
49
|
+
else:
|
|
50
|
+
log_debug(f"Reading uploaded file: {getattr(file, 'name', 'pptx_file')}")
|
|
51
|
+
presentation = Presentation(file)
|
|
52
|
+
doc_name = name or (
|
|
53
|
+
getattr(file, "name", "pptx_file").split(".")[0] if hasattr(file, "name") else "pptx_file"
|
|
54
|
+
)
|
|
55
|
+
|
|
56
|
+
# Extract text from all slides
|
|
57
|
+
slide_texts = []
|
|
58
|
+
for slide_number, slide in enumerate(presentation.slides, 1):
|
|
59
|
+
slide_text = f"Slide {slide_number}:\n"
|
|
60
|
+
|
|
61
|
+
# Extract text from shapes that contain text
|
|
62
|
+
text_content = []
|
|
63
|
+
for shape in slide.shapes:
|
|
64
|
+
if hasattr(shape, "text") and shape.text.strip():
|
|
65
|
+
text_content.append(shape.text.strip())
|
|
66
|
+
|
|
67
|
+
if text_content:
|
|
68
|
+
slide_text += "\n".join(text_content)
|
|
69
|
+
else:
|
|
70
|
+
slide_text += "(No text content)"
|
|
71
|
+
|
|
72
|
+
slide_texts.append(slide_text)
|
|
73
|
+
|
|
74
|
+
doc_content = "\n\n".join(slide_texts)
|
|
75
|
+
|
|
76
|
+
documents = [
|
|
77
|
+
Document(
|
|
78
|
+
name=doc_name,
|
|
79
|
+
id=str(uuid4()),
|
|
80
|
+
content=doc_content,
|
|
81
|
+
)
|
|
82
|
+
]
|
|
83
|
+
|
|
84
|
+
if self.chunk:
|
|
85
|
+
chunked_documents = []
|
|
86
|
+
for document in documents:
|
|
87
|
+
chunked_documents.extend(self.chunk_document(document))
|
|
88
|
+
return chunked_documents
|
|
89
|
+
return documents
|
|
90
|
+
|
|
91
|
+
except Exception as e:
|
|
92
|
+
log_error(f"Error reading file: {e}")
|
|
93
|
+
return []
|
|
94
|
+
|
|
95
|
+
async def async_read(self, file: Union[Path, IO[Any]], name: Optional[str] = None) -> List[Document]:
|
|
96
|
+
"""Asynchronously read a pptx file and return a list of documents"""
|
|
97
|
+
try:
|
|
98
|
+
return await asyncio.to_thread(self.read, file, name)
|
|
99
|
+
except Exception as e:
|
|
100
|
+
log_error(f"Error reading file asynchronously: {e}")
|
|
101
|
+
return []
|
|
@@ -16,8 +16,7 @@ class ReaderFactory:
|
|
|
16
16
|
from agno.knowledge.reader.pdf_reader import PDFReader
|
|
17
17
|
|
|
18
18
|
config: Dict[str, Any] = {
|
|
19
|
-
"
|
|
20
|
-
"chunk_size": 100,
|
|
19
|
+
"name": "PDF Reader",
|
|
21
20
|
"description": "Processes PDF documents with OCR support for images and text extraction",
|
|
22
21
|
}
|
|
23
22
|
config.update(kwargs)
|
|
@@ -59,6 +58,18 @@ class ReaderFactory:
|
|
|
59
58
|
config.update(kwargs)
|
|
60
59
|
return DocxReader(**config)
|
|
61
60
|
|
|
61
|
+
@classmethod
|
|
62
|
+
def _get_pptx_reader(cls, **kwargs) -> Reader:
|
|
63
|
+
"""Get PPTX reader instance."""
|
|
64
|
+
from agno.knowledge.reader.pptx_reader import PPTXReader
|
|
65
|
+
|
|
66
|
+
config: Dict[str, Any] = {
|
|
67
|
+
"name": "PPTX Reader",
|
|
68
|
+
"description": "Extracts text content from Microsoft PowerPoint presentations (.pptx format)",
|
|
69
|
+
}
|
|
70
|
+
config.update(kwargs)
|
|
71
|
+
return PPTXReader(**config)
|
|
72
|
+
|
|
62
73
|
@classmethod
|
|
63
74
|
def _get_json_reader(cls, **kwargs) -> Reader:
|
|
64
75
|
"""Get JSON reader instance."""
|
|
@@ -121,6 +132,21 @@ class ReaderFactory:
|
|
|
121
132
|
config.update(kwargs)
|
|
122
133
|
return FirecrawlReader(**config)
|
|
123
134
|
|
|
135
|
+
@classmethod
|
|
136
|
+
def _get_tavily_reader(cls, **kwargs) -> Reader:
|
|
137
|
+
"""Get Tavily reader instance."""
|
|
138
|
+
from agno.knowledge.reader.tavily_reader import TavilyReader
|
|
139
|
+
|
|
140
|
+
config: Dict[str, Any] = {
|
|
141
|
+
"api_key": kwargs.get("api_key") or os.getenv("TAVILY_API_KEY"),
|
|
142
|
+
"extract_format": "markdown",
|
|
143
|
+
"extract_depth": "basic",
|
|
144
|
+
"name": "Tavily Reader",
|
|
145
|
+
"description": "Extracts content from URLs using Tavily's Extract API with markdown or text output",
|
|
146
|
+
}
|
|
147
|
+
config.update(kwargs)
|
|
148
|
+
return TavilyReader(**config)
|
|
149
|
+
|
|
124
150
|
@classmethod
|
|
125
151
|
def _get_youtube_reader(cls, **kwargs) -> Reader:
|
|
126
152
|
"""Get YouTube reader instance."""
|
|
@@ -201,8 +227,10 @@ class ReaderFactory:
|
|
|
201
227
|
return cls.create_reader("pdf")
|
|
202
228
|
elif extension in [".csv", "text/csv"]:
|
|
203
229
|
return cls.create_reader("csv")
|
|
204
|
-
elif extension in [".docx", ".doc"]:
|
|
230
|
+
elif extension in [".docx", ".doc", "application/vnd.openxmlformats-officedocument.wordprocessingml.document"]:
|
|
205
231
|
return cls.create_reader("docx")
|
|
232
|
+
elif extension == ".pptx":
|
|
233
|
+
return cls.create_reader("pptx")
|
|
206
234
|
elif extension == ".json":
|
|
207
235
|
return cls.create_reader("json")
|
|
208
236
|
elif extension in [".md", ".markdown"]:
|
|
@@ -243,6 +271,7 @@ class ReaderFactory:
|
|
|
243
271
|
url_reader_priority = [
|
|
244
272
|
"website",
|
|
245
273
|
"firecrawl",
|
|
274
|
+
"tavily",
|
|
246
275
|
"youtube",
|
|
247
276
|
]
|
|
248
277
|
|
|
@@ -10,7 +10,7 @@ from agno.knowledge.reader.base import Reader
|
|
|
10
10
|
from agno.knowledge.reader.pdf_reader import PDFReader
|
|
11
11
|
from agno.knowledge.reader.text_reader import TextReader
|
|
12
12
|
from agno.knowledge.types import ContentType
|
|
13
|
-
from agno.utils.log import
|
|
13
|
+
from agno.utils.log import log_debug, log_error
|
|
14
14
|
|
|
15
15
|
try:
|
|
16
16
|
from agno.aws.resource.s3.object import S3Object # type: ignore
|
|
@@ -51,7 +51,7 @@ class S3Reader(Reader):
|
|
|
51
51
|
|
|
52
52
|
def read(self, name: Optional[str], s3_object: S3Object) -> List[Document]:
|
|
53
53
|
try:
|
|
54
|
-
|
|
54
|
+
log_debug(f"Reading S3 file: {s3_object.uri}")
|
|
55
55
|
|
|
56
56
|
# Read PDF files
|
|
57
57
|
if s3_object.uri.endswith(".pdf"):
|
|
@@ -80,7 +80,7 @@ class S3Reader(Reader):
|
|
|
80
80
|
return documents
|
|
81
81
|
|
|
82
82
|
except Exception as e:
|
|
83
|
-
|
|
83
|
+
log_error(f"Error reading: {s3_object.uri}: {e}")
|
|
84
84
|
|
|
85
85
|
return []
|
|
86
86
|
|