agno 2.3.26__py3-none-any.whl → 2.4.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- agno/agent/__init__.py +4 -0
- agno/agent/agent.py +1368 -541
- agno/agent/remote.py +13 -0
- agno/db/base.py +339 -0
- agno/db/postgres/async_postgres.py +116 -12
- agno/db/postgres/postgres.py +1242 -25
- agno/db/postgres/schemas.py +48 -1
- agno/db/sqlite/async_sqlite.py +119 -4
- agno/db/sqlite/schemas.py +51 -0
- agno/db/sqlite/sqlite.py +1186 -13
- agno/db/utils.py +37 -1
- agno/integrations/discord/client.py +12 -1
- agno/knowledge/__init__.py +4 -0
- agno/knowledge/chunking/code.py +1 -1
- agno/knowledge/chunking/semantic.py +1 -1
- agno/knowledge/chunking/strategy.py +4 -0
- agno/knowledge/filesystem.py +412 -0
- agno/knowledge/knowledge.py +3722 -2182
- agno/knowledge/protocol.py +134 -0
- agno/knowledge/reader/arxiv_reader.py +2 -2
- agno/knowledge/reader/base.py +9 -7
- agno/knowledge/reader/csv_reader.py +236 -13
- agno/knowledge/reader/docx_reader.py +2 -2
- agno/knowledge/reader/field_labeled_csv_reader.py +169 -5
- agno/knowledge/reader/firecrawl_reader.py +2 -2
- agno/knowledge/reader/json_reader.py +2 -2
- agno/knowledge/reader/markdown_reader.py +2 -2
- agno/knowledge/reader/pdf_reader.py +5 -4
- agno/knowledge/reader/pptx_reader.py +2 -2
- agno/knowledge/reader/reader_factory.py +118 -1
- agno/knowledge/reader/s3_reader.py +2 -2
- agno/knowledge/reader/tavily_reader.py +2 -2
- agno/knowledge/reader/text_reader.py +2 -2
- agno/knowledge/reader/web_search_reader.py +2 -2
- agno/knowledge/reader/website_reader.py +5 -3
- agno/knowledge/reader/wikipedia_reader.py +2 -2
- agno/knowledge/reader/youtube_reader.py +2 -2
- agno/knowledge/remote_content/__init__.py +29 -0
- agno/knowledge/remote_content/config.py +204 -0
- agno/knowledge/remote_content/remote_content.py +74 -17
- agno/knowledge/utils.py +37 -29
- agno/learn/__init__.py +6 -0
- agno/learn/machine.py +35 -0
- agno/learn/schemas.py +82 -11
- agno/learn/stores/__init__.py +3 -0
- agno/learn/stores/decision_log.py +1156 -0
- agno/learn/stores/learned_knowledge.py +6 -6
- agno/models/anthropic/claude.py +24 -0
- agno/models/aws/bedrock.py +20 -0
- agno/models/base.py +60 -6
- agno/models/cerebras/cerebras.py +34 -2
- agno/models/cohere/chat.py +25 -0
- agno/models/google/gemini.py +50 -5
- agno/models/litellm/chat.py +38 -0
- agno/models/n1n/__init__.py +3 -0
- agno/models/n1n/n1n.py +57 -0
- agno/models/openai/chat.py +25 -1
- agno/models/openrouter/openrouter.py +46 -0
- agno/models/perplexity/perplexity.py +2 -0
- agno/models/response.py +16 -0
- agno/os/app.py +83 -44
- agno/os/interfaces/slack/router.py +10 -1
- agno/os/interfaces/whatsapp/router.py +6 -0
- agno/os/middleware/__init__.py +2 -0
- agno/os/middleware/trailing_slash.py +27 -0
- agno/os/router.py +1 -0
- agno/os/routers/agents/router.py +29 -16
- agno/os/routers/agents/schema.py +6 -4
- agno/os/routers/components/__init__.py +3 -0
- agno/os/routers/components/components.py +475 -0
- agno/os/routers/evals/schemas.py +4 -3
- agno/os/routers/health.py +3 -3
- agno/os/routers/knowledge/knowledge.py +128 -3
- agno/os/routers/knowledge/schemas.py +12 -0
- agno/os/routers/memory/schemas.py +4 -2
- agno/os/routers/metrics/metrics.py +9 -11
- agno/os/routers/metrics/schemas.py +10 -6
- agno/os/routers/registry/__init__.py +3 -0
- agno/os/routers/registry/registry.py +337 -0
- agno/os/routers/teams/router.py +20 -8
- agno/os/routers/teams/schema.py +6 -4
- agno/os/routers/traces/traces.py +5 -5
- agno/os/routers/workflows/router.py +38 -11
- agno/os/routers/workflows/schema.py +1 -1
- agno/os/schema.py +92 -26
- agno/os/utils.py +84 -19
- agno/reasoning/anthropic.py +2 -2
- agno/reasoning/azure_ai_foundry.py +2 -2
- agno/reasoning/deepseek.py +2 -2
- agno/reasoning/default.py +6 -7
- agno/reasoning/gemini.py +2 -2
- agno/reasoning/helpers.py +6 -7
- agno/reasoning/manager.py +4 -10
- agno/reasoning/ollama.py +2 -2
- agno/reasoning/openai.py +2 -2
- agno/reasoning/vertexai.py +2 -2
- agno/registry/__init__.py +3 -0
- agno/registry/registry.py +68 -0
- agno/run/agent.py +59 -0
- agno/run/base.py +7 -0
- agno/run/team.py +57 -0
- agno/skills/agent_skills.py +10 -3
- agno/team/__init__.py +3 -1
- agno/team/team.py +1165 -330
- agno/tools/duckduckgo.py +25 -71
- agno/tools/exa.py +0 -21
- agno/tools/function.py +35 -83
- agno/tools/knowledge.py +9 -4
- agno/tools/mem0.py +11 -10
- agno/tools/memory.py +47 -46
- agno/tools/parallel.py +0 -7
- agno/tools/reasoning.py +30 -23
- agno/tools/tavily.py +4 -1
- agno/tools/websearch.py +93 -0
- agno/tools/website.py +1 -1
- agno/tools/wikipedia.py +1 -1
- agno/tools/workflow.py +48 -47
- agno/utils/agent.py +42 -5
- agno/utils/events.py +160 -2
- agno/utils/print_response/agent.py +0 -31
- agno/utils/print_response/team.py +0 -2
- agno/utils/print_response/workflow.py +0 -2
- agno/utils/team.py +61 -11
- agno/vectordb/lancedb/lance_db.py +4 -1
- agno/vectordb/mongodb/mongodb.py +1 -1
- agno/vectordb/pgvector/pgvector.py +3 -3
- agno/vectordb/qdrant/qdrant.py +4 -4
- agno/workflow/__init__.py +3 -1
- agno/workflow/condition.py +0 -21
- agno/workflow/loop.py +0 -21
- agno/workflow/parallel.py +0 -21
- agno/workflow/router.py +0 -21
- agno/workflow/step.py +117 -24
- agno/workflow/steps.py +0 -21
- agno/workflow/workflow.py +427 -63
- {agno-2.3.26.dist-info → agno-2.4.1.dist-info}/METADATA +49 -76
- {agno-2.3.26.dist-info → agno-2.4.1.dist-info}/RECORD +140 -126
- {agno-2.3.26.dist-info → agno-2.4.1.dist-info}/WHEEL +1 -1
- {agno-2.3.26.dist-info → agno-2.4.1.dist-info}/licenses/LICENSE +0 -0
- {agno-2.3.26.dist-info → agno-2.4.1.dist-info}/top_level.txt +0 -0
|
@@ -2,7 +2,7 @@ import asyncio
|
|
|
2
2
|
import csv
|
|
3
3
|
import io
|
|
4
4
|
from pathlib import Path
|
|
5
|
-
from typing import IO, Any, List, Optional, Union
|
|
5
|
+
from typing import IO, Any, Iterable, List, Optional, Sequence, Tuple, Union
|
|
6
6
|
|
|
7
7
|
try:
|
|
8
8
|
import aiofiles
|
|
@@ -12,6 +12,12 @@ except ImportError:
|
|
|
12
12
|
from agno.knowledge.chunking.strategy import ChunkingStrategyType
|
|
13
13
|
from agno.knowledge.document.base import Document
|
|
14
14
|
from agno.knowledge.reader.base import Reader
|
|
15
|
+
from agno.knowledge.reader.csv_reader import (
|
|
16
|
+
_convert_xls_cell_value,
|
|
17
|
+
_get_workbook_name,
|
|
18
|
+
_infer_file_extension,
|
|
19
|
+
_stringify_spreadsheet_cell_value,
|
|
20
|
+
)
|
|
15
21
|
from agno.knowledge.types import ContentType
|
|
16
22
|
from agno.utils.log import log_debug, log_error, log_warning
|
|
17
23
|
|
|
@@ -84,7 +90,8 @@ class FieldLabeledCSVReader(Reader):
|
|
|
84
90
|
lines.append(title)
|
|
85
91
|
|
|
86
92
|
for i, (header, value) in enumerate(zip(headers, row)):
|
|
87
|
-
|
|
93
|
+
# Normalize line endings before stripping to handle embedded newlines
|
|
94
|
+
clean_value = _stringify_spreadsheet_cell_value(value).strip() if value else ""
|
|
88
95
|
|
|
89
96
|
if self.skip_empty_fields and not clean_value:
|
|
90
97
|
continue
|
|
@@ -98,10 +105,155 @@ class FieldLabeledCSVReader(Reader):
|
|
|
98
105
|
|
|
99
106
|
return "\n".join(lines)
|
|
100
107
|
|
|
108
|
+
def _excel_rows_to_field_labeled_documents(
|
|
109
|
+
self,
|
|
110
|
+
*,
|
|
111
|
+
workbook_name: str,
|
|
112
|
+
sheets: Iterable[Tuple[str, Iterable[Sequence[Any]]]],
|
|
113
|
+
) -> List[Document]:
|
|
114
|
+
"""Convert Excel rows to field-labeled documents (one document per data row).
|
|
115
|
+
|
|
116
|
+
For each sheet: first row = headers, subsequent rows = data.
|
|
117
|
+
Each data row becomes a Document with field-labeled content.
|
|
118
|
+
"""
|
|
119
|
+
documents = []
|
|
120
|
+
global_row_index = 0
|
|
121
|
+
|
|
122
|
+
for sheet_index, (sheet_name, rows) in enumerate(sheets, start=1):
|
|
123
|
+
rows_list = list(rows)
|
|
124
|
+
|
|
125
|
+
if not rows_list:
|
|
126
|
+
log_debug(f"Sheet '{sheet_name}' is empty, skipping")
|
|
127
|
+
continue
|
|
128
|
+
|
|
129
|
+
# First row is headers
|
|
130
|
+
headers = [_stringify_spreadsheet_cell_value(h).strip() for h in rows_list[0]]
|
|
131
|
+
if not any(headers):
|
|
132
|
+
log_debug(f"Sheet '{sheet_name}' has no valid headers, skipping")
|
|
133
|
+
continue
|
|
134
|
+
|
|
135
|
+
data_rows = rows_list[1:]
|
|
136
|
+
if not data_rows:
|
|
137
|
+
log_debug(f"Sheet '{sheet_name}' has only headers, skipping")
|
|
138
|
+
continue
|
|
139
|
+
|
|
140
|
+
log_debug(f"Processing sheet '{sheet_name}' with {len(headers)} headers and {len(data_rows)} rows")
|
|
141
|
+
|
|
142
|
+
for row_in_sheet, row in enumerate(data_rows):
|
|
143
|
+
# Convert cell values to strings
|
|
144
|
+
str_row = [_stringify_spreadsheet_cell_value(v) for v in row]
|
|
145
|
+
|
|
146
|
+
# Normalize row length
|
|
147
|
+
normalized_row = str_row[: len(headers)]
|
|
148
|
+
while len(normalized_row) < len(headers):
|
|
149
|
+
normalized_row.append("")
|
|
150
|
+
|
|
151
|
+
# Skip entirely empty rows
|
|
152
|
+
if not any(v.strip() for v in normalized_row):
|
|
153
|
+
continue
|
|
154
|
+
|
|
155
|
+
labeled_text = self._convert_row_to_labeled_text(headers, normalized_row, global_row_index)
|
|
156
|
+
|
|
157
|
+
if labeled_text.strip():
|
|
158
|
+
doc_id = f"{workbook_name}_{sheet_name}_row_{row_in_sheet + 1}"
|
|
159
|
+
documents.append(
|
|
160
|
+
Document(
|
|
161
|
+
id=doc_id,
|
|
162
|
+
name=workbook_name,
|
|
163
|
+
meta_data={
|
|
164
|
+
"sheet_name": sheet_name,
|
|
165
|
+
"sheet_index": sheet_index,
|
|
166
|
+
"row_index": row_in_sheet,
|
|
167
|
+
"headers": headers,
|
|
168
|
+
"source": "field_labeled_csv_reader",
|
|
169
|
+
},
|
|
170
|
+
content=labeled_text,
|
|
171
|
+
)
|
|
172
|
+
)
|
|
173
|
+
global_row_index += 1
|
|
174
|
+
|
|
175
|
+
return documents
|
|
176
|
+
|
|
177
|
+
def _read_xlsx(self, file: Union[Path, IO[Any]], *, workbook_name: str) -> List[Document]:
|
|
178
|
+
"""Read .xlsx file and convert rows to field-labeled documents."""
|
|
179
|
+
try:
|
|
180
|
+
import openpyxl # type: ignore
|
|
181
|
+
except ImportError as e:
|
|
182
|
+
raise ImportError(
|
|
183
|
+
"`openpyxl` not installed. Please install it via `pip install agno[csv]` or `pip install openpyxl`."
|
|
184
|
+
) from e
|
|
185
|
+
|
|
186
|
+
if isinstance(file, Path):
|
|
187
|
+
workbook = openpyxl.load_workbook(filename=str(file), read_only=True, data_only=True)
|
|
188
|
+
else:
|
|
189
|
+
file.seek(0)
|
|
190
|
+
raw = file.read()
|
|
191
|
+
if isinstance(raw, str):
|
|
192
|
+
raw = raw.encode("utf-8", errors="replace")
|
|
193
|
+
workbook = openpyxl.load_workbook(filename=io.BytesIO(raw), read_only=True, data_only=True)
|
|
194
|
+
|
|
195
|
+
try:
|
|
196
|
+
return self._excel_rows_to_field_labeled_documents(
|
|
197
|
+
workbook_name=workbook_name,
|
|
198
|
+
sheets=[(worksheet.title, worksheet.iter_rows(values_only=True)) for worksheet in workbook.worksheets],
|
|
199
|
+
)
|
|
200
|
+
finally:
|
|
201
|
+
workbook.close()
|
|
202
|
+
|
|
203
|
+
def _read_xls(self, file: Union[Path, IO[Any]], *, workbook_name: str) -> List[Document]:
|
|
204
|
+
"""Read .xls file and convert rows to field-labeled documents."""
|
|
205
|
+
try:
|
|
206
|
+
import xlrd # type: ignore
|
|
207
|
+
except ImportError as e:
|
|
208
|
+
raise ImportError(
|
|
209
|
+
"`xlrd` not installed. Please install it via `pip install agno[csv]` or `pip install xlrd`."
|
|
210
|
+
) from e
|
|
211
|
+
|
|
212
|
+
if isinstance(file, Path):
|
|
213
|
+
workbook = xlrd.open_workbook(filename=str(file))
|
|
214
|
+
else:
|
|
215
|
+
file.seek(0)
|
|
216
|
+
raw = file.read()
|
|
217
|
+
if isinstance(raw, str):
|
|
218
|
+
raw = raw.encode("utf-8", errors="replace")
|
|
219
|
+
workbook = xlrd.open_workbook(file_contents=raw)
|
|
220
|
+
|
|
221
|
+
sheets: List[Tuple[str, Iterable[Sequence[Any]]]] = []
|
|
222
|
+
for sheet_index in range(workbook.nsheets):
|
|
223
|
+
sheet = workbook.sheet_by_index(sheet_index)
|
|
224
|
+
|
|
225
|
+
def _iter_sheet_rows(_sheet: Any = sheet, _datemode: int = workbook.datemode) -> Iterable[Sequence[Any]]:
|
|
226
|
+
for row_index in range(_sheet.nrows):
|
|
227
|
+
yield [
|
|
228
|
+
_convert_xls_cell_value(
|
|
229
|
+
_sheet.cell_value(row_index, col_index),
|
|
230
|
+
_sheet.cell_type(row_index, col_index),
|
|
231
|
+
_datemode,
|
|
232
|
+
)
|
|
233
|
+
for col_index in range(_sheet.ncols)
|
|
234
|
+
]
|
|
235
|
+
|
|
236
|
+
sheets.append((sheet.name, _iter_sheet_rows()))
|
|
237
|
+
|
|
238
|
+
return self._excel_rows_to_field_labeled_documents(workbook_name=workbook_name, sheets=sheets)
|
|
239
|
+
|
|
101
240
|
def read(
|
|
102
241
|
self, file: Union[Path, IO[Any]], delimiter: str = ",", quotechar: str = '"', name: Optional[str] = None
|
|
103
242
|
) -> List[Document]:
|
|
104
243
|
try:
|
|
244
|
+
file_extension = _infer_file_extension(file, name)
|
|
245
|
+
|
|
246
|
+
# Handle Excel files
|
|
247
|
+
if file_extension in {ContentType.XLSX, ContentType.XLS}:
|
|
248
|
+
workbook_name = _get_workbook_name(file, name)
|
|
249
|
+
log_debug(f"Reading Excel file: {workbook_name}{file_extension}")
|
|
250
|
+
|
|
251
|
+
if file_extension == ContentType.XLSX:
|
|
252
|
+
return self._read_xlsx(file, workbook_name=workbook_name)
|
|
253
|
+
else:
|
|
254
|
+
return self._read_xls(file, workbook_name=workbook_name)
|
|
255
|
+
|
|
256
|
+
# Handle CSV files
|
|
105
257
|
if isinstance(file, Path):
|
|
106
258
|
if not file.exists():
|
|
107
259
|
raise FileNotFoundError(f"Could not find file: {file}")
|
|
@@ -114,7 +266,7 @@ class FieldLabeledCSVReader(Reader):
|
|
|
114
266
|
log_debug(f"Reading retrieved file: {getattr(file, 'name', 'BytesIO')}")
|
|
115
267
|
csv_name = name or getattr(file, "name", "csv_file").split(".")[0]
|
|
116
268
|
file.seek(0)
|
|
117
|
-
file_content = io.StringIO(file.read().decode("utf-8"))
|
|
269
|
+
file_content = io.StringIO(file.read().decode(self.encoding or "utf-8"))
|
|
118
270
|
|
|
119
271
|
documents = []
|
|
120
272
|
|
|
@@ -179,7 +331,19 @@ class FieldLabeledCSVReader(Reader):
|
|
|
179
331
|
name: Optional[str] = None,
|
|
180
332
|
) -> List[Document]:
|
|
181
333
|
try:
|
|
182
|
-
|
|
334
|
+
file_extension = _infer_file_extension(file, name)
|
|
335
|
+
|
|
336
|
+
# Handle Excel files (use asyncio.to_thread for sync openpyxl/xlrd)
|
|
337
|
+
if file_extension in {ContentType.XLSX, ContentType.XLS}:
|
|
338
|
+
workbook_name = _get_workbook_name(file, name)
|
|
339
|
+
log_debug(f"Reading Excel file async: {workbook_name}{file_extension}")
|
|
340
|
+
|
|
341
|
+
if file_extension == ContentType.XLSX:
|
|
342
|
+
return await asyncio.to_thread(self._read_xlsx, file, workbook_name=workbook_name)
|
|
343
|
+
else:
|
|
344
|
+
return await asyncio.to_thread(self._read_xls, file, workbook_name=workbook_name)
|
|
345
|
+
|
|
346
|
+
# Handle CSV files
|
|
183
347
|
if isinstance(file, Path):
|
|
184
348
|
if not file.exists():
|
|
185
349
|
raise FileNotFoundError(f"Could not find file: {file}")
|
|
@@ -192,7 +356,7 @@ class FieldLabeledCSVReader(Reader):
|
|
|
192
356
|
log_debug(f"Reading retrieved file async: {getattr(file, 'name', 'BytesIO')}")
|
|
193
357
|
csv_name = name or getattr(file, "name", "csv_file").split(".")[0]
|
|
194
358
|
file.seek(0)
|
|
195
|
-
file_content_io = io.StringIO(file.read().decode("utf-8"))
|
|
359
|
+
file_content_io = io.StringIO(file.read().decode(self.encoding or "utf-8"))
|
|
196
360
|
|
|
197
361
|
file_content_io.seek(0)
|
|
198
362
|
csv_reader = csv.reader(file_content_io, delimiter=delimiter, quotechar=quotechar)
|
|
@@ -43,7 +43,7 @@ class FirecrawlReader(Reader):
|
|
|
43
43
|
self.mode = mode
|
|
44
44
|
|
|
45
45
|
@classmethod
|
|
46
|
-
def get_supported_chunking_strategies(
|
|
46
|
+
def get_supported_chunking_strategies(cls) -> List[ChunkingStrategyType]:
|
|
47
47
|
"""Get the list of supported chunking strategies for Firecrawl readers."""
|
|
48
48
|
return [
|
|
49
49
|
ChunkingStrategyType.CODE_CHUNKER,
|
|
@@ -55,7 +55,7 @@ class FirecrawlReader(Reader):
|
|
|
55
55
|
]
|
|
56
56
|
|
|
57
57
|
@classmethod
|
|
58
|
-
def get_supported_content_types(
|
|
58
|
+
def get_supported_content_types(cls) -> List[ContentType]:
|
|
59
59
|
return [ContentType.URL]
|
|
60
60
|
|
|
61
61
|
def scrape(self, url: str, name: Optional[str] = None) -> List[Document]:
|
|
@@ -21,7 +21,7 @@ class JSONReader(Reader):
|
|
|
21
21
|
super().__init__(chunking_strategy=chunking_strategy, **kwargs)
|
|
22
22
|
|
|
23
23
|
@classmethod
|
|
24
|
-
def get_supported_chunking_strategies(
|
|
24
|
+
def get_supported_chunking_strategies(cls) -> List[ChunkingStrategyType]:
|
|
25
25
|
"""Get the list of supported chunking strategies for JSON readers."""
|
|
26
26
|
return [
|
|
27
27
|
ChunkingStrategyType.CODE_CHUNKER,
|
|
@@ -33,7 +33,7 @@ class JSONReader(Reader):
|
|
|
33
33
|
]
|
|
34
34
|
|
|
35
35
|
@classmethod
|
|
36
|
-
def get_supported_content_types(
|
|
36
|
+
def get_supported_content_types(cls) -> List[ContentType]:
|
|
37
37
|
return [ContentType.JSON]
|
|
38
38
|
|
|
39
39
|
def read(self, path: Union[Path, IO[Any]], name: Optional[str] = None) -> List[Document]:
|
|
@@ -28,7 +28,7 @@ class MarkdownReader(Reader):
|
|
|
28
28
|
"""Reader for Markdown files"""
|
|
29
29
|
|
|
30
30
|
@classmethod
|
|
31
|
-
def get_supported_chunking_strategies(
|
|
31
|
+
def get_supported_chunking_strategies(cls) -> List[ChunkingStrategyType]:
|
|
32
32
|
"""Get the list of supported chunking strategies for Markdown readers."""
|
|
33
33
|
strategies = [
|
|
34
34
|
ChunkingStrategyType.CODE_CHUNKER,
|
|
@@ -46,7 +46,7 @@ class MarkdownReader(Reader):
|
|
|
46
46
|
return strategies
|
|
47
47
|
|
|
48
48
|
@classmethod
|
|
49
|
-
def get_supported_content_types(
|
|
49
|
+
def get_supported_content_types(cls) -> List[ContentType]:
|
|
50
50
|
return [ContentType.MARKDOWN]
|
|
51
51
|
|
|
52
52
|
def __init__(
|
|
@@ -200,7 +200,7 @@ class BasePDFReader(Reader):
|
|
|
200
200
|
super().__init__(chunking_strategy=chunking_strategy, **kwargs)
|
|
201
201
|
|
|
202
202
|
@classmethod
|
|
203
|
-
def get_supported_chunking_strategies(
|
|
203
|
+
def get_supported_chunking_strategies(cls) -> List[ChunkingStrategyType]:
|
|
204
204
|
"""Get the list of supported chunking strategies for PDF readers."""
|
|
205
205
|
return [
|
|
206
206
|
ChunkingStrategyType.DOCUMENT_CHUNKER,
|
|
@@ -232,8 +232,9 @@ class BasePDFReader(Reader):
|
|
|
232
232
|
return True
|
|
233
233
|
|
|
234
234
|
# Use provided password or fall back to instance password
|
|
235
|
-
|
|
236
|
-
if
|
|
235
|
+
# Note: Empty string "" is a valid password for PDFs with blank user password
|
|
236
|
+
pdf_password = self.password if password is None else password
|
|
237
|
+
if pdf_password is None:
|
|
237
238
|
log_error(f'PDF file "{doc_name}" is password protected but no password provided')
|
|
238
239
|
return False
|
|
239
240
|
|
|
@@ -335,7 +336,7 @@ class PDFReader(BasePDFReader):
|
|
|
335
336
|
"""Reader for PDF files"""
|
|
336
337
|
|
|
337
338
|
@classmethod
|
|
338
|
-
def get_supported_content_types(
|
|
339
|
+
def get_supported_content_types(cls) -> List[ContentType]:
|
|
339
340
|
return [ContentType.PDF]
|
|
340
341
|
|
|
341
342
|
def read(
|
|
@@ -23,7 +23,7 @@ class PPTXReader(Reader):
|
|
|
23
23
|
super().__init__(chunking_strategy=chunking_strategy, **kwargs)
|
|
24
24
|
|
|
25
25
|
@classmethod
|
|
26
|
-
def get_supported_chunking_strategies(
|
|
26
|
+
def get_supported_chunking_strategies(cls) -> List[ChunkingStrategyType]:
|
|
27
27
|
"""Get the list of supported chunking strategies for PPTX readers."""
|
|
28
28
|
return [
|
|
29
29
|
ChunkingStrategyType.DOCUMENT_CHUNKER,
|
|
@@ -35,7 +35,7 @@ class PPTXReader(Reader):
|
|
|
35
35
|
]
|
|
36
36
|
|
|
37
37
|
@classmethod
|
|
38
|
-
def get_supported_content_types(
|
|
38
|
+
def get_supported_content_types(cls) -> List[ContentType]:
|
|
39
39
|
return [ContentType.PPTX]
|
|
40
40
|
|
|
41
41
|
def read(self, file: Union[Path, IO[Any]], name: Optional[str] = None) -> List[Document]:
|
|
@@ -10,6 +10,70 @@ class ReaderFactory:
|
|
|
10
10
|
# Cache for instantiated readers
|
|
11
11
|
_reader_cache: Dict[str, Reader] = {}
|
|
12
12
|
|
|
13
|
+
# Static metadata for readers - avoids instantiation just to get metadata
|
|
14
|
+
READER_METADATA: Dict[str, Dict[str, str]] = {
|
|
15
|
+
"pdf": {
|
|
16
|
+
"name": "PdfReader",
|
|
17
|
+
"description": "Processes PDF documents with OCR support for images and text extraction",
|
|
18
|
+
},
|
|
19
|
+
"csv": {
|
|
20
|
+
"name": "CsvReader",
|
|
21
|
+
"description": "Parses CSV, XLSX, and XLS files with custom delimiter support",
|
|
22
|
+
},
|
|
23
|
+
"field_labeled_csv": {
|
|
24
|
+
"name": "FieldLabeledCsvReader",
|
|
25
|
+
"description": "Converts CSV rows to field-labeled text format for enhanced readability and context",
|
|
26
|
+
},
|
|
27
|
+
"docx": {
|
|
28
|
+
"name": "DocxReader",
|
|
29
|
+
"description": "Extracts text content from Microsoft Word documents (.docx and .doc formats)",
|
|
30
|
+
},
|
|
31
|
+
"pptx": {
|
|
32
|
+
"name": "PptxReader",
|
|
33
|
+
"description": "Extracts text content from Microsoft PowerPoint presentations (.pptx format)",
|
|
34
|
+
},
|
|
35
|
+
"json": {
|
|
36
|
+
"name": "JsonReader",
|
|
37
|
+
"description": "Processes JSON data structures and API responses with nested object handling",
|
|
38
|
+
},
|
|
39
|
+
"markdown": {
|
|
40
|
+
"name": "MarkdownReader",
|
|
41
|
+
"description": "Processes Markdown documentation with header-aware chunking and formatting preservation",
|
|
42
|
+
},
|
|
43
|
+
"text": {
|
|
44
|
+
"name": "TextReader",
|
|
45
|
+
"description": "Handles plain text files with customizable chunking strategies and encoding detection",
|
|
46
|
+
},
|
|
47
|
+
"website": {
|
|
48
|
+
"name": "WebsiteReader",
|
|
49
|
+
"description": "Scrapes and extracts content from web pages with HTML parsing and text cleaning",
|
|
50
|
+
},
|
|
51
|
+
"firecrawl": {
|
|
52
|
+
"name": "FirecrawlReader",
|
|
53
|
+
"description": "Advanced web scraping and crawling with JavaScript rendering and structured data extraction",
|
|
54
|
+
},
|
|
55
|
+
"tavily": {
|
|
56
|
+
"name": "TavilyReader",
|
|
57
|
+
"description": "Extracts content from URLs using Tavily's Extract API with markdown or text output",
|
|
58
|
+
},
|
|
59
|
+
"youtube": {
|
|
60
|
+
"name": "YouTubeReader",
|
|
61
|
+
"description": "Extracts transcripts and metadata from YouTube videos and playlists",
|
|
62
|
+
},
|
|
63
|
+
"arxiv": {
|
|
64
|
+
"name": "ArxivReader",
|
|
65
|
+
"description": "Downloads and processes academic papers from ArXiv with PDF parsing and metadata extraction",
|
|
66
|
+
},
|
|
67
|
+
"wikipedia": {
|
|
68
|
+
"name": "WikipediaReader",
|
|
69
|
+
"description": "Fetches and processes Wikipedia articles with section-aware chunking and link resolution",
|
|
70
|
+
},
|
|
71
|
+
"web_search": {
|
|
72
|
+
"name": "WebSearchReader",
|
|
73
|
+
"description": "Executes web searches and processes results with relevance ranking and content extraction",
|
|
74
|
+
},
|
|
75
|
+
}
|
|
76
|
+
|
|
13
77
|
@classmethod
|
|
14
78
|
def _get_pdf_reader(cls, **kwargs) -> Reader:
|
|
15
79
|
"""Get PDF reader instance."""
|
|
@@ -203,6 +267,52 @@ class ReaderFactory:
|
|
|
203
267
|
raise ValueError(f"Unknown reader: {reader_key}")
|
|
204
268
|
return getattr(cls, method_name)
|
|
205
269
|
|
|
270
|
+
@classmethod
|
|
271
|
+
def get_reader_class(cls, reader_key: str) -> type:
|
|
272
|
+
"""Get the reader CLASS without instantiation.
|
|
273
|
+
|
|
274
|
+
This is useful for accessing class methods like get_supported_chunking_strategies()
|
|
275
|
+
without the overhead of creating an instance.
|
|
276
|
+
|
|
277
|
+
Args:
|
|
278
|
+
reader_key: The reader key (e.g., 'pdf', 'csv', 'markdown')
|
|
279
|
+
|
|
280
|
+
Returns:
|
|
281
|
+
The reader class (not an instance)
|
|
282
|
+
|
|
283
|
+
Raises:
|
|
284
|
+
ValueError: If the reader key is unknown
|
|
285
|
+
ImportError: If the reader's dependencies are not installed
|
|
286
|
+
"""
|
|
287
|
+
# Map reader keys to their import paths
|
|
288
|
+
reader_class_map: Dict[str, tuple] = {
|
|
289
|
+
"pdf": ("agno.knowledge.reader.pdf_reader", "PDFReader"),
|
|
290
|
+
"csv": ("agno.knowledge.reader.csv_reader", "CSVReader"),
|
|
291
|
+
"field_labeled_csv": ("agno.knowledge.reader.field_labeled_csv_reader", "FieldLabeledCSVReader"),
|
|
292
|
+
"docx": ("agno.knowledge.reader.docx_reader", "DocxReader"),
|
|
293
|
+
"pptx": ("agno.knowledge.reader.pptx_reader", "PPTXReader"),
|
|
294
|
+
"json": ("agno.knowledge.reader.json_reader", "JSONReader"),
|
|
295
|
+
"markdown": ("agno.knowledge.reader.markdown_reader", "MarkdownReader"),
|
|
296
|
+
"text": ("agno.knowledge.reader.text_reader", "TextReader"),
|
|
297
|
+
"website": ("agno.knowledge.reader.website_reader", "WebsiteReader"),
|
|
298
|
+
"firecrawl": ("agno.knowledge.reader.firecrawl_reader", "FirecrawlReader"),
|
|
299
|
+
"tavily": ("agno.knowledge.reader.tavily_reader", "TavilyReader"),
|
|
300
|
+
"youtube": ("agno.knowledge.reader.youtube_reader", "YouTubeReader"),
|
|
301
|
+
"arxiv": ("agno.knowledge.reader.arxiv_reader", "ArxivReader"),
|
|
302
|
+
"wikipedia": ("agno.knowledge.reader.wikipedia_reader", "WikipediaReader"),
|
|
303
|
+
"web_search": ("agno.knowledge.reader.web_search_reader", "WebSearchReader"),
|
|
304
|
+
}
|
|
305
|
+
|
|
306
|
+
if reader_key not in reader_class_map:
|
|
307
|
+
raise ValueError(f"Unknown reader: {reader_key}")
|
|
308
|
+
|
|
309
|
+
module_path, class_name = reader_class_map[reader_key]
|
|
310
|
+
|
|
311
|
+
import importlib
|
|
312
|
+
|
|
313
|
+
module = importlib.import_module(module_path)
|
|
314
|
+
return getattr(module, class_name)
|
|
315
|
+
|
|
206
316
|
@classmethod
|
|
207
317
|
def create_reader(cls, reader_key: str, **kwargs) -> Reader:
|
|
208
318
|
"""Create a reader instance with the given key and optional overrides."""
|
|
@@ -225,7 +335,14 @@ class ReaderFactory:
|
|
|
225
335
|
|
|
226
336
|
if extension in [".pdf", "application/pdf"]:
|
|
227
337
|
return cls.create_reader("pdf")
|
|
228
|
-
elif extension in [
|
|
338
|
+
elif extension in [
|
|
339
|
+
".csv",
|
|
340
|
+
".xlsx",
|
|
341
|
+
".xls",
|
|
342
|
+
"text/csv",
|
|
343
|
+
"application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
|
|
344
|
+
"application/vnd.ms-excel",
|
|
345
|
+
]:
|
|
229
346
|
return cls.create_reader("csv")
|
|
230
347
|
elif extension in [".docx", ".doc", "application/vnd.openxmlformats-officedocument.wordprocessingml.document"]:
|
|
231
348
|
return cls.create_reader("docx")
|
|
@@ -35,7 +35,7 @@ class S3Reader(Reader):
|
|
|
35
35
|
super().__init__(chunking_strategy=chunking_strategy, **kwargs)
|
|
36
36
|
|
|
37
37
|
@classmethod
|
|
38
|
-
def get_supported_chunking_strategies(
|
|
38
|
+
def get_supported_chunking_strategies(cls) -> List[ChunkingStrategyType]:
|
|
39
39
|
"""Get the list of supported chunking strategies for S3 readers."""
|
|
40
40
|
return [
|
|
41
41
|
ChunkingStrategyType.CODE_CHUNKER,
|
|
@@ -47,7 +47,7 @@ class S3Reader(Reader):
|
|
|
47
47
|
]
|
|
48
48
|
|
|
49
49
|
@classmethod
|
|
50
|
-
def get_supported_content_types(
|
|
50
|
+
def get_supported_content_types(cls) -> List[ContentType]:
|
|
51
51
|
return [ContentType.FILE, ContentType.URL, ContentType.TEXT]
|
|
52
52
|
|
|
53
53
|
def read(self, name: Optional[str], s3_object: S3Object) -> List[Document]:
|
|
@@ -62,7 +62,7 @@ class TavilyReader(Reader):
|
|
|
62
62
|
self.extract_depth = extract_depth
|
|
63
63
|
|
|
64
64
|
@classmethod
|
|
65
|
-
def get_supported_chunking_strategies(
|
|
65
|
+
def get_supported_chunking_strategies(cls) -> List[ChunkingStrategyType]:
|
|
66
66
|
"""Get the list of supported chunking strategies for Tavily readers."""
|
|
67
67
|
return [
|
|
68
68
|
ChunkingStrategyType.CODE_CHUNKER,
|
|
@@ -74,7 +74,7 @@ class TavilyReader(Reader):
|
|
|
74
74
|
]
|
|
75
75
|
|
|
76
76
|
@classmethod
|
|
77
|
-
def get_supported_content_types(
|
|
77
|
+
def get_supported_content_types(cls) -> List[ContentType]:
|
|
78
78
|
return [ContentType.URL]
|
|
79
79
|
|
|
80
80
|
def _extract(self, url: str, name: Optional[str] = None) -> List[Document]:
|
|
@@ -18,7 +18,7 @@ class TextReader(Reader):
|
|
|
18
18
|
super().__init__(chunking_strategy=chunking_strategy, **kwargs)
|
|
19
19
|
|
|
20
20
|
@classmethod
|
|
21
|
-
def get_supported_chunking_strategies(
|
|
21
|
+
def get_supported_chunking_strategies(cls) -> List[ChunkingStrategyType]:
|
|
22
22
|
"""Get the list of supported chunking strategies for Text readers."""
|
|
23
23
|
return [
|
|
24
24
|
ChunkingStrategyType.CODE_CHUNKER,
|
|
@@ -30,7 +30,7 @@ class TextReader(Reader):
|
|
|
30
30
|
]
|
|
31
31
|
|
|
32
32
|
@classmethod
|
|
33
|
-
def get_supported_content_types(
|
|
33
|
+
def get_supported_content_types(cls) -> List[ContentType]:
|
|
34
34
|
return [ContentType.TXT]
|
|
35
35
|
|
|
36
36
|
def read(self, file: Union[Path, IO[Any]], name: Optional[str] = None) -> List[Document]:
|
|
@@ -53,7 +53,7 @@ class WebSearchReader(Reader):
|
|
|
53
53
|
chunking_strategy: Optional[ChunkingStrategy] = SemanticChunking()
|
|
54
54
|
|
|
55
55
|
@classmethod
|
|
56
|
-
def get_supported_chunking_strategies(
|
|
56
|
+
def get_supported_chunking_strategies(cls) -> List[ChunkingStrategyType]:
|
|
57
57
|
"""Get the list of supported chunking strategies for Web Search readers."""
|
|
58
58
|
return [
|
|
59
59
|
ChunkingStrategyType.CODE_CHUNKER,
|
|
@@ -65,7 +65,7 @@ class WebSearchReader(Reader):
|
|
|
65
65
|
]
|
|
66
66
|
|
|
67
67
|
@classmethod
|
|
68
|
-
def get_supported_content_types(
|
|
68
|
+
def get_supported_content_types(cls) -> List[ContentType]:
|
|
69
69
|
return [ContentType.TOPIC]
|
|
70
70
|
|
|
71
71
|
def _respect_rate_limits(self):
|
|
@@ -49,7 +49,7 @@ class WebsiteReader(Reader):
|
|
|
49
49
|
self._urls_to_crawl = []
|
|
50
50
|
|
|
51
51
|
@classmethod
|
|
52
|
-
def get_supported_chunking_strategies(
|
|
52
|
+
def get_supported_chunking_strategies(cls) -> List[ChunkingStrategyType]:
|
|
53
53
|
"""Get the list of supported chunking strategies for Website readers."""
|
|
54
54
|
return [
|
|
55
55
|
ChunkingStrategyType.CODE_CHUNKER,
|
|
@@ -61,7 +61,7 @@ class WebsiteReader(Reader):
|
|
|
61
61
|
]
|
|
62
62
|
|
|
63
63
|
@classmethod
|
|
64
|
-
def get_supported_content_types(
|
|
64
|
+
def get_supported_content_types(cls) -> List[ContentType]:
|
|
65
65
|
return [ContentType.URL]
|
|
66
66
|
|
|
67
67
|
def delay(self, min_seconds=1, max_seconds=3):
|
|
@@ -428,7 +428,8 @@ class WebsiteReader(Reader):
|
|
|
428
428
|
meta_data={"url": str(crawled_url)},
|
|
429
429
|
content=crawled_content,
|
|
430
430
|
)
|
|
431
|
-
|
|
431
|
+
chunks = self.chunk_document(doc)
|
|
432
|
+
return chunks
|
|
432
433
|
else:
|
|
433
434
|
return [
|
|
434
435
|
Document(
|
|
@@ -444,6 +445,7 @@ class WebsiteReader(Reader):
|
|
|
444
445
|
process_document(crawled_url, crawled_content)
|
|
445
446
|
for crawled_url, crawled_content in crawler_result.items()
|
|
446
447
|
]
|
|
448
|
+
|
|
447
449
|
results = await asyncio.gather(*tasks)
|
|
448
450
|
|
|
449
451
|
# Flatten the results
|
|
@@ -24,7 +24,7 @@ class WikipediaReader(Reader):
|
|
|
24
24
|
self.auto_suggest = auto_suggest
|
|
25
25
|
|
|
26
26
|
@classmethod
|
|
27
|
-
def get_supported_chunking_strategies(
|
|
27
|
+
def get_supported_chunking_strategies(cls) -> List[ChunkingStrategyType]:
|
|
28
28
|
"""Get the list of supported chunking strategies for Wikipedia readers."""
|
|
29
29
|
return [
|
|
30
30
|
ChunkingStrategyType.CODE_CHUNKER,
|
|
@@ -36,7 +36,7 @@ class WikipediaReader(Reader):
|
|
|
36
36
|
]
|
|
37
37
|
|
|
38
38
|
@classmethod
|
|
39
|
-
def get_supported_content_types(
|
|
39
|
+
def get_supported_content_types(cls) -> List[ContentType]:
|
|
40
40
|
return [ContentType.TOPIC]
|
|
41
41
|
|
|
42
42
|
def read(self, topic: str) -> List[Document]:
|
|
@@ -23,7 +23,7 @@ class YouTubeReader(Reader):
|
|
|
23
23
|
super().__init__(chunking_strategy=chunking_strategy, **kwargs)
|
|
24
24
|
|
|
25
25
|
@classmethod
|
|
26
|
-
def get_supported_chunking_strategies(
|
|
26
|
+
def get_supported_chunking_strategies(cls) -> List[ChunkingStrategyType]:
|
|
27
27
|
"""Get the list of supported chunking strategies for YouTube readers."""
|
|
28
28
|
return [
|
|
29
29
|
ChunkingStrategyType.RECURSIVE_CHUNKER,
|
|
@@ -35,7 +35,7 @@ class YouTubeReader(Reader):
|
|
|
35
35
|
]
|
|
36
36
|
|
|
37
37
|
@classmethod
|
|
38
|
-
def get_supported_content_types(
|
|
38
|
+
def get_supported_content_types(cls) -> List[ContentType]:
|
|
39
39
|
return [ContentType.YOUTUBE]
|
|
40
40
|
|
|
41
41
|
def read(self, url: str, name: Optional[str] = None) -> List[Document]:
|
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
from agno.knowledge.remote_content.config import (
|
|
2
|
+
GcsConfig,
|
|
3
|
+
GitHubConfig,
|
|
4
|
+
RemoteContentConfig,
|
|
5
|
+
S3Config,
|
|
6
|
+
SharePointConfig,
|
|
7
|
+
)
|
|
8
|
+
from agno.knowledge.remote_content.remote_content import (
|
|
9
|
+
GCSContent,
|
|
10
|
+
GitHubContent,
|
|
11
|
+
RemoteContent,
|
|
12
|
+
S3Content,
|
|
13
|
+
SharePointContent,
|
|
14
|
+
)
|
|
15
|
+
|
|
16
|
+
__all__ = [
|
|
17
|
+
# Config classes
|
|
18
|
+
"RemoteContentConfig",
|
|
19
|
+
"S3Config",
|
|
20
|
+
"GcsConfig",
|
|
21
|
+
"SharePointConfig",
|
|
22
|
+
"GitHubConfig",
|
|
23
|
+
# Content classes
|
|
24
|
+
"RemoteContent",
|
|
25
|
+
"S3Content",
|
|
26
|
+
"GCSContent",
|
|
27
|
+
"SharePointContent",
|
|
28
|
+
"GitHubContent",
|
|
29
|
+
]
|