agno 2.3.26__py3-none-any.whl → 2.4.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (140) hide show
  1. agno/agent/__init__.py +4 -0
  2. agno/agent/agent.py +1368 -541
  3. agno/agent/remote.py +13 -0
  4. agno/db/base.py +339 -0
  5. agno/db/postgres/async_postgres.py +116 -12
  6. agno/db/postgres/postgres.py +1242 -25
  7. agno/db/postgres/schemas.py +48 -1
  8. agno/db/sqlite/async_sqlite.py +119 -4
  9. agno/db/sqlite/schemas.py +51 -0
  10. agno/db/sqlite/sqlite.py +1186 -13
  11. agno/db/utils.py +37 -1
  12. agno/integrations/discord/client.py +12 -1
  13. agno/knowledge/__init__.py +4 -0
  14. agno/knowledge/chunking/code.py +1 -1
  15. agno/knowledge/chunking/semantic.py +1 -1
  16. agno/knowledge/chunking/strategy.py +4 -0
  17. agno/knowledge/filesystem.py +412 -0
  18. agno/knowledge/knowledge.py +3722 -2182
  19. agno/knowledge/protocol.py +134 -0
  20. agno/knowledge/reader/arxiv_reader.py +2 -2
  21. agno/knowledge/reader/base.py +9 -7
  22. agno/knowledge/reader/csv_reader.py +236 -13
  23. agno/knowledge/reader/docx_reader.py +2 -2
  24. agno/knowledge/reader/field_labeled_csv_reader.py +169 -5
  25. agno/knowledge/reader/firecrawl_reader.py +2 -2
  26. agno/knowledge/reader/json_reader.py +2 -2
  27. agno/knowledge/reader/markdown_reader.py +2 -2
  28. agno/knowledge/reader/pdf_reader.py +5 -4
  29. agno/knowledge/reader/pptx_reader.py +2 -2
  30. agno/knowledge/reader/reader_factory.py +118 -1
  31. agno/knowledge/reader/s3_reader.py +2 -2
  32. agno/knowledge/reader/tavily_reader.py +2 -2
  33. agno/knowledge/reader/text_reader.py +2 -2
  34. agno/knowledge/reader/web_search_reader.py +2 -2
  35. agno/knowledge/reader/website_reader.py +5 -3
  36. agno/knowledge/reader/wikipedia_reader.py +2 -2
  37. agno/knowledge/reader/youtube_reader.py +2 -2
  38. agno/knowledge/remote_content/__init__.py +29 -0
  39. agno/knowledge/remote_content/config.py +204 -0
  40. agno/knowledge/remote_content/remote_content.py +74 -17
  41. agno/knowledge/utils.py +37 -29
  42. agno/learn/__init__.py +6 -0
  43. agno/learn/machine.py +35 -0
  44. agno/learn/schemas.py +82 -11
  45. agno/learn/stores/__init__.py +3 -0
  46. agno/learn/stores/decision_log.py +1156 -0
  47. agno/learn/stores/learned_knowledge.py +6 -6
  48. agno/models/anthropic/claude.py +24 -0
  49. agno/models/aws/bedrock.py +20 -0
  50. agno/models/base.py +60 -6
  51. agno/models/cerebras/cerebras.py +34 -2
  52. agno/models/cohere/chat.py +25 -0
  53. agno/models/google/gemini.py +50 -5
  54. agno/models/litellm/chat.py +38 -0
  55. agno/models/n1n/__init__.py +3 -0
  56. agno/models/n1n/n1n.py +57 -0
  57. agno/models/openai/chat.py +25 -1
  58. agno/models/openrouter/openrouter.py +46 -0
  59. agno/models/perplexity/perplexity.py +2 -0
  60. agno/models/response.py +16 -0
  61. agno/os/app.py +83 -44
  62. agno/os/interfaces/slack/router.py +10 -1
  63. agno/os/interfaces/whatsapp/router.py +6 -0
  64. agno/os/middleware/__init__.py +2 -0
  65. agno/os/middleware/trailing_slash.py +27 -0
  66. agno/os/router.py +1 -0
  67. agno/os/routers/agents/router.py +29 -16
  68. agno/os/routers/agents/schema.py +6 -4
  69. agno/os/routers/components/__init__.py +3 -0
  70. agno/os/routers/components/components.py +475 -0
  71. agno/os/routers/evals/schemas.py +4 -3
  72. agno/os/routers/health.py +3 -3
  73. agno/os/routers/knowledge/knowledge.py +128 -3
  74. agno/os/routers/knowledge/schemas.py +12 -0
  75. agno/os/routers/memory/schemas.py +4 -2
  76. agno/os/routers/metrics/metrics.py +9 -11
  77. agno/os/routers/metrics/schemas.py +10 -6
  78. agno/os/routers/registry/__init__.py +3 -0
  79. agno/os/routers/registry/registry.py +337 -0
  80. agno/os/routers/teams/router.py +20 -8
  81. agno/os/routers/teams/schema.py +6 -4
  82. agno/os/routers/traces/traces.py +5 -5
  83. agno/os/routers/workflows/router.py +38 -11
  84. agno/os/routers/workflows/schema.py +1 -1
  85. agno/os/schema.py +92 -26
  86. agno/os/utils.py +84 -19
  87. agno/reasoning/anthropic.py +2 -2
  88. agno/reasoning/azure_ai_foundry.py +2 -2
  89. agno/reasoning/deepseek.py +2 -2
  90. agno/reasoning/default.py +6 -7
  91. agno/reasoning/gemini.py +2 -2
  92. agno/reasoning/helpers.py +6 -7
  93. agno/reasoning/manager.py +4 -10
  94. agno/reasoning/ollama.py +2 -2
  95. agno/reasoning/openai.py +2 -2
  96. agno/reasoning/vertexai.py +2 -2
  97. agno/registry/__init__.py +3 -0
  98. agno/registry/registry.py +68 -0
  99. agno/run/agent.py +59 -0
  100. agno/run/base.py +7 -0
  101. agno/run/team.py +57 -0
  102. agno/skills/agent_skills.py +10 -3
  103. agno/team/__init__.py +3 -1
  104. agno/team/team.py +1165 -330
  105. agno/tools/duckduckgo.py +25 -71
  106. agno/tools/exa.py +0 -21
  107. agno/tools/function.py +35 -83
  108. agno/tools/knowledge.py +9 -4
  109. agno/tools/mem0.py +11 -10
  110. agno/tools/memory.py +47 -46
  111. agno/tools/parallel.py +0 -7
  112. agno/tools/reasoning.py +30 -23
  113. agno/tools/tavily.py +4 -1
  114. agno/tools/websearch.py +93 -0
  115. agno/tools/website.py +1 -1
  116. agno/tools/wikipedia.py +1 -1
  117. agno/tools/workflow.py +48 -47
  118. agno/utils/agent.py +42 -5
  119. agno/utils/events.py +160 -2
  120. agno/utils/print_response/agent.py +0 -31
  121. agno/utils/print_response/team.py +0 -2
  122. agno/utils/print_response/workflow.py +0 -2
  123. agno/utils/team.py +61 -11
  124. agno/vectordb/lancedb/lance_db.py +4 -1
  125. agno/vectordb/mongodb/mongodb.py +1 -1
  126. agno/vectordb/pgvector/pgvector.py +3 -3
  127. agno/vectordb/qdrant/qdrant.py +4 -4
  128. agno/workflow/__init__.py +3 -1
  129. agno/workflow/condition.py +0 -21
  130. agno/workflow/loop.py +0 -21
  131. agno/workflow/parallel.py +0 -21
  132. agno/workflow/router.py +0 -21
  133. agno/workflow/step.py +117 -24
  134. agno/workflow/steps.py +0 -21
  135. agno/workflow/workflow.py +427 -63
  136. {agno-2.3.26.dist-info → agno-2.4.1.dist-info}/METADATA +49 -76
  137. {agno-2.3.26.dist-info → agno-2.4.1.dist-info}/RECORD +140 -126
  138. {agno-2.3.26.dist-info → agno-2.4.1.dist-info}/WHEEL +1 -1
  139. {agno-2.3.26.dist-info → agno-2.4.1.dist-info}/licenses/LICENSE +0 -0
  140. {agno-2.3.26.dist-info → agno-2.4.1.dist-info}/top_level.txt +0 -0
@@ -2,7 +2,7 @@ import asyncio
2
2
  import csv
3
3
  import io
4
4
  from pathlib import Path
5
- from typing import IO, Any, List, Optional, Union
5
+ from typing import IO, Any, Iterable, List, Optional, Sequence, Tuple, Union
6
6
 
7
7
  try:
8
8
  import aiofiles
@@ -12,6 +12,12 @@ except ImportError:
12
12
  from agno.knowledge.chunking.strategy import ChunkingStrategyType
13
13
  from agno.knowledge.document.base import Document
14
14
  from agno.knowledge.reader.base import Reader
15
+ from agno.knowledge.reader.csv_reader import (
16
+ _convert_xls_cell_value,
17
+ _get_workbook_name,
18
+ _infer_file_extension,
19
+ _stringify_spreadsheet_cell_value,
20
+ )
15
21
  from agno.knowledge.types import ContentType
16
22
  from agno.utils.log import log_debug, log_error, log_warning
17
23
 
@@ -84,7 +90,8 @@ class FieldLabeledCSVReader(Reader):
84
90
  lines.append(title)
85
91
 
86
92
  for i, (header, value) in enumerate(zip(headers, row)):
87
- clean_value = value.strip() if value else ""
93
+ # Normalize line endings before stripping to handle embedded newlines
94
+ clean_value = _stringify_spreadsheet_cell_value(value).strip() if value else ""
88
95
 
89
96
  if self.skip_empty_fields and not clean_value:
90
97
  continue
@@ -98,10 +105,155 @@ class FieldLabeledCSVReader(Reader):
98
105
 
99
106
  return "\n".join(lines)
100
107
 
108
+ def _excel_rows_to_field_labeled_documents(
109
+ self,
110
+ *,
111
+ workbook_name: str,
112
+ sheets: Iterable[Tuple[str, Iterable[Sequence[Any]]]],
113
+ ) -> List[Document]:
114
+ """Convert Excel rows to field-labeled documents (one document per data row).
115
+
116
+ For each sheet: first row = headers, subsequent rows = data.
117
+ Each data row becomes a Document with field-labeled content.
118
+ """
119
+ documents = []
120
+ global_row_index = 0
121
+
122
+ for sheet_index, (sheet_name, rows) in enumerate(sheets, start=1):
123
+ rows_list = list(rows)
124
+
125
+ if not rows_list:
126
+ log_debug(f"Sheet '{sheet_name}' is empty, skipping")
127
+ continue
128
+
129
+ # First row is headers
130
+ headers = [_stringify_spreadsheet_cell_value(h).strip() for h in rows_list[0]]
131
+ if not any(headers):
132
+ log_debug(f"Sheet '{sheet_name}' has no valid headers, skipping")
133
+ continue
134
+
135
+ data_rows = rows_list[1:]
136
+ if not data_rows:
137
+ log_debug(f"Sheet '{sheet_name}' has only headers, skipping")
138
+ continue
139
+
140
+ log_debug(f"Processing sheet '{sheet_name}' with {len(headers)} headers and {len(data_rows)} rows")
141
+
142
+ for row_in_sheet, row in enumerate(data_rows):
143
+ # Convert cell values to strings
144
+ str_row = [_stringify_spreadsheet_cell_value(v) for v in row]
145
+
146
+ # Normalize row length
147
+ normalized_row = str_row[: len(headers)]
148
+ while len(normalized_row) < len(headers):
149
+ normalized_row.append("")
150
+
151
+ # Skip entirely empty rows
152
+ if not any(v.strip() for v in normalized_row):
153
+ continue
154
+
155
+ labeled_text = self._convert_row_to_labeled_text(headers, normalized_row, global_row_index)
156
+
157
+ if labeled_text.strip():
158
+ doc_id = f"{workbook_name}_{sheet_name}_row_{row_in_sheet + 1}"
159
+ documents.append(
160
+ Document(
161
+ id=doc_id,
162
+ name=workbook_name,
163
+ meta_data={
164
+ "sheet_name": sheet_name,
165
+ "sheet_index": sheet_index,
166
+ "row_index": row_in_sheet,
167
+ "headers": headers,
168
+ "source": "field_labeled_csv_reader",
169
+ },
170
+ content=labeled_text,
171
+ )
172
+ )
173
+ global_row_index += 1
174
+
175
+ return documents
176
+
177
+ def _read_xlsx(self, file: Union[Path, IO[Any]], *, workbook_name: str) -> List[Document]:
178
+ """Read .xlsx file and convert rows to field-labeled documents."""
179
+ try:
180
+ import openpyxl # type: ignore
181
+ except ImportError as e:
182
+ raise ImportError(
183
+ "`openpyxl` not installed. Please install it via `pip install agno[csv]` or `pip install openpyxl`."
184
+ ) from e
185
+
186
+ if isinstance(file, Path):
187
+ workbook = openpyxl.load_workbook(filename=str(file), read_only=True, data_only=True)
188
+ else:
189
+ file.seek(0)
190
+ raw = file.read()
191
+ if isinstance(raw, str):
192
+ raw = raw.encode("utf-8", errors="replace")
193
+ workbook = openpyxl.load_workbook(filename=io.BytesIO(raw), read_only=True, data_only=True)
194
+
195
+ try:
196
+ return self._excel_rows_to_field_labeled_documents(
197
+ workbook_name=workbook_name,
198
+ sheets=[(worksheet.title, worksheet.iter_rows(values_only=True)) for worksheet in workbook.worksheets],
199
+ )
200
+ finally:
201
+ workbook.close()
202
+
203
+ def _read_xls(self, file: Union[Path, IO[Any]], *, workbook_name: str) -> List[Document]:
204
+ """Read .xls file and convert rows to field-labeled documents."""
205
+ try:
206
+ import xlrd # type: ignore
207
+ except ImportError as e:
208
+ raise ImportError(
209
+ "`xlrd` not installed. Please install it via `pip install agno[csv]` or `pip install xlrd`."
210
+ ) from e
211
+
212
+ if isinstance(file, Path):
213
+ workbook = xlrd.open_workbook(filename=str(file))
214
+ else:
215
+ file.seek(0)
216
+ raw = file.read()
217
+ if isinstance(raw, str):
218
+ raw = raw.encode("utf-8", errors="replace")
219
+ workbook = xlrd.open_workbook(file_contents=raw)
220
+
221
+ sheets: List[Tuple[str, Iterable[Sequence[Any]]]] = []
222
+ for sheet_index in range(workbook.nsheets):
223
+ sheet = workbook.sheet_by_index(sheet_index)
224
+
225
+ def _iter_sheet_rows(_sheet: Any = sheet, _datemode: int = workbook.datemode) -> Iterable[Sequence[Any]]:
226
+ for row_index in range(_sheet.nrows):
227
+ yield [
228
+ _convert_xls_cell_value(
229
+ _sheet.cell_value(row_index, col_index),
230
+ _sheet.cell_type(row_index, col_index),
231
+ _datemode,
232
+ )
233
+ for col_index in range(_sheet.ncols)
234
+ ]
235
+
236
+ sheets.append((sheet.name, _iter_sheet_rows()))
237
+
238
+ return self._excel_rows_to_field_labeled_documents(workbook_name=workbook_name, sheets=sheets)
239
+
101
240
  def read(
102
241
  self, file: Union[Path, IO[Any]], delimiter: str = ",", quotechar: str = '"', name: Optional[str] = None
103
242
  ) -> List[Document]:
104
243
  try:
244
+ file_extension = _infer_file_extension(file, name)
245
+
246
+ # Handle Excel files
247
+ if file_extension in {ContentType.XLSX, ContentType.XLS}:
248
+ workbook_name = _get_workbook_name(file, name)
249
+ log_debug(f"Reading Excel file: {workbook_name}{file_extension}")
250
+
251
+ if file_extension == ContentType.XLSX:
252
+ return self._read_xlsx(file, workbook_name=workbook_name)
253
+ else:
254
+ return self._read_xls(file, workbook_name=workbook_name)
255
+
256
+ # Handle CSV files
105
257
  if isinstance(file, Path):
106
258
  if not file.exists():
107
259
  raise FileNotFoundError(f"Could not find file: {file}")
@@ -114,7 +266,7 @@ class FieldLabeledCSVReader(Reader):
114
266
  log_debug(f"Reading retrieved file: {getattr(file, 'name', 'BytesIO')}")
115
267
  csv_name = name or getattr(file, "name", "csv_file").split(".")[0]
116
268
  file.seek(0)
117
- file_content = io.StringIO(file.read().decode("utf-8"))
269
+ file_content = io.StringIO(file.read().decode(self.encoding or "utf-8"))
118
270
 
119
271
  documents = []
120
272
 
@@ -179,7 +331,19 @@ class FieldLabeledCSVReader(Reader):
179
331
  name: Optional[str] = None,
180
332
  ) -> List[Document]:
181
333
  try:
182
- # Handle file input
334
+ file_extension = _infer_file_extension(file, name)
335
+
336
+ # Handle Excel files (use asyncio.to_thread for sync openpyxl/xlrd)
337
+ if file_extension in {ContentType.XLSX, ContentType.XLS}:
338
+ workbook_name = _get_workbook_name(file, name)
339
+ log_debug(f"Reading Excel file async: {workbook_name}{file_extension}")
340
+
341
+ if file_extension == ContentType.XLSX:
342
+ return await asyncio.to_thread(self._read_xlsx, file, workbook_name=workbook_name)
343
+ else:
344
+ return await asyncio.to_thread(self._read_xls, file, workbook_name=workbook_name)
345
+
346
+ # Handle CSV files
183
347
  if isinstance(file, Path):
184
348
  if not file.exists():
185
349
  raise FileNotFoundError(f"Could not find file: {file}")
@@ -192,7 +356,7 @@ class FieldLabeledCSVReader(Reader):
192
356
  log_debug(f"Reading retrieved file async: {getattr(file, 'name', 'BytesIO')}")
193
357
  csv_name = name or getattr(file, "name", "csv_file").split(".")[0]
194
358
  file.seek(0)
195
- file_content_io = io.StringIO(file.read().decode("utf-8"))
359
+ file_content_io = io.StringIO(file.read().decode(self.encoding or "utf-8"))
196
360
 
197
361
  file_content_io.seek(0)
198
362
  csv_reader = csv.reader(file_content_io, delimiter=delimiter, quotechar=quotechar)
@@ -43,7 +43,7 @@ class FirecrawlReader(Reader):
43
43
  self.mode = mode
44
44
 
45
45
  @classmethod
46
- def get_supported_chunking_strategies(self) -> List[ChunkingStrategyType]:
46
+ def get_supported_chunking_strategies(cls) -> List[ChunkingStrategyType]:
47
47
  """Get the list of supported chunking strategies for Firecrawl readers."""
48
48
  return [
49
49
  ChunkingStrategyType.CODE_CHUNKER,
@@ -55,7 +55,7 @@ class FirecrawlReader(Reader):
55
55
  ]
56
56
 
57
57
  @classmethod
58
- def get_supported_content_types(self) -> List[ContentType]:
58
+ def get_supported_content_types(cls) -> List[ContentType]:
59
59
  return [ContentType.URL]
60
60
 
61
61
  def scrape(self, url: str, name: Optional[str] = None) -> List[Document]:
@@ -21,7 +21,7 @@ class JSONReader(Reader):
21
21
  super().__init__(chunking_strategy=chunking_strategy, **kwargs)
22
22
 
23
23
  @classmethod
24
- def get_supported_chunking_strategies(self) -> List[ChunkingStrategyType]:
24
+ def get_supported_chunking_strategies(cls) -> List[ChunkingStrategyType]:
25
25
  """Get the list of supported chunking strategies for JSON readers."""
26
26
  return [
27
27
  ChunkingStrategyType.CODE_CHUNKER,
@@ -33,7 +33,7 @@ class JSONReader(Reader):
33
33
  ]
34
34
 
35
35
  @classmethod
36
- def get_supported_content_types(self) -> List[ContentType]:
36
+ def get_supported_content_types(cls) -> List[ContentType]:
37
37
  return [ContentType.JSON]
38
38
 
39
39
  def read(self, path: Union[Path, IO[Any]], name: Optional[str] = None) -> List[Document]:
@@ -28,7 +28,7 @@ class MarkdownReader(Reader):
28
28
  """Reader for Markdown files"""
29
29
 
30
30
  @classmethod
31
- def get_supported_chunking_strategies(self) -> List[ChunkingStrategyType]:
31
+ def get_supported_chunking_strategies(cls) -> List[ChunkingStrategyType]:
32
32
  """Get the list of supported chunking strategies for Markdown readers."""
33
33
  strategies = [
34
34
  ChunkingStrategyType.CODE_CHUNKER,
@@ -46,7 +46,7 @@ class MarkdownReader(Reader):
46
46
  return strategies
47
47
 
48
48
  @classmethod
49
- def get_supported_content_types(self) -> List[ContentType]:
49
+ def get_supported_content_types(cls) -> List[ContentType]:
50
50
  return [ContentType.MARKDOWN]
51
51
 
52
52
  def __init__(
@@ -200,7 +200,7 @@ class BasePDFReader(Reader):
200
200
  super().__init__(chunking_strategy=chunking_strategy, **kwargs)
201
201
 
202
202
  @classmethod
203
- def get_supported_chunking_strategies(self) -> List[ChunkingStrategyType]:
203
+ def get_supported_chunking_strategies(cls) -> List[ChunkingStrategyType]:
204
204
  """Get the list of supported chunking strategies for PDF readers."""
205
205
  return [
206
206
  ChunkingStrategyType.DOCUMENT_CHUNKER,
@@ -232,8 +232,9 @@ class BasePDFReader(Reader):
232
232
  return True
233
233
 
234
234
  # Use provided password or fall back to instance password
235
- pdf_password = password or self.password
236
- if not pdf_password:
235
+ # Note: Empty string "" is a valid password for PDFs with blank user password
236
+ pdf_password = self.password if password is None else password
237
+ if pdf_password is None:
237
238
  log_error(f'PDF file "{doc_name}" is password protected but no password provided')
238
239
  return False
239
240
 
@@ -335,7 +336,7 @@ class PDFReader(BasePDFReader):
335
336
  """Reader for PDF files"""
336
337
 
337
338
  @classmethod
338
- def get_supported_content_types(self) -> List[ContentType]:
339
+ def get_supported_content_types(cls) -> List[ContentType]:
339
340
  return [ContentType.PDF]
340
341
 
341
342
  def read(
@@ -23,7 +23,7 @@ class PPTXReader(Reader):
23
23
  super().__init__(chunking_strategy=chunking_strategy, **kwargs)
24
24
 
25
25
  @classmethod
26
- def get_supported_chunking_strategies(self) -> List[ChunkingStrategyType]:
26
+ def get_supported_chunking_strategies(cls) -> List[ChunkingStrategyType]:
27
27
  """Get the list of supported chunking strategies for PPTX readers."""
28
28
  return [
29
29
  ChunkingStrategyType.DOCUMENT_CHUNKER,
@@ -35,7 +35,7 @@ class PPTXReader(Reader):
35
35
  ]
36
36
 
37
37
  @classmethod
38
- def get_supported_content_types(self) -> List[ContentType]:
38
+ def get_supported_content_types(cls) -> List[ContentType]:
39
39
  return [ContentType.PPTX]
40
40
 
41
41
  def read(self, file: Union[Path, IO[Any]], name: Optional[str] = None) -> List[Document]:
@@ -10,6 +10,70 @@ class ReaderFactory:
10
10
  # Cache for instantiated readers
11
11
  _reader_cache: Dict[str, Reader] = {}
12
12
 
13
+ # Static metadata for readers - avoids instantiation just to get metadata
14
+ READER_METADATA: Dict[str, Dict[str, str]] = {
15
+ "pdf": {
16
+ "name": "PdfReader",
17
+ "description": "Processes PDF documents with OCR support for images and text extraction",
18
+ },
19
+ "csv": {
20
+ "name": "CsvReader",
21
+ "description": "Parses CSV, XLSX, and XLS files with custom delimiter support",
22
+ },
23
+ "field_labeled_csv": {
24
+ "name": "FieldLabeledCsvReader",
25
+ "description": "Converts CSV rows to field-labeled text format for enhanced readability and context",
26
+ },
27
+ "docx": {
28
+ "name": "DocxReader",
29
+ "description": "Extracts text content from Microsoft Word documents (.docx and .doc formats)",
30
+ },
31
+ "pptx": {
32
+ "name": "PptxReader",
33
+ "description": "Extracts text content from Microsoft PowerPoint presentations (.pptx format)",
34
+ },
35
+ "json": {
36
+ "name": "JsonReader",
37
+ "description": "Processes JSON data structures and API responses with nested object handling",
38
+ },
39
+ "markdown": {
40
+ "name": "MarkdownReader",
41
+ "description": "Processes Markdown documentation with header-aware chunking and formatting preservation",
42
+ },
43
+ "text": {
44
+ "name": "TextReader",
45
+ "description": "Handles plain text files with customizable chunking strategies and encoding detection",
46
+ },
47
+ "website": {
48
+ "name": "WebsiteReader",
49
+ "description": "Scrapes and extracts content from web pages with HTML parsing and text cleaning",
50
+ },
51
+ "firecrawl": {
52
+ "name": "FirecrawlReader",
53
+ "description": "Advanced web scraping and crawling with JavaScript rendering and structured data extraction",
54
+ },
55
+ "tavily": {
56
+ "name": "TavilyReader",
57
+ "description": "Extracts content from URLs using Tavily's Extract API with markdown or text output",
58
+ },
59
+ "youtube": {
60
+ "name": "YouTubeReader",
61
+ "description": "Extracts transcripts and metadata from YouTube videos and playlists",
62
+ },
63
+ "arxiv": {
64
+ "name": "ArxivReader",
65
+ "description": "Downloads and processes academic papers from ArXiv with PDF parsing and metadata extraction",
66
+ },
67
+ "wikipedia": {
68
+ "name": "WikipediaReader",
69
+ "description": "Fetches and processes Wikipedia articles with section-aware chunking and link resolution",
70
+ },
71
+ "web_search": {
72
+ "name": "WebSearchReader",
73
+ "description": "Executes web searches and processes results with relevance ranking and content extraction",
74
+ },
75
+ }
76
+
13
77
  @classmethod
14
78
  def _get_pdf_reader(cls, **kwargs) -> Reader:
15
79
  """Get PDF reader instance."""
@@ -203,6 +267,52 @@ class ReaderFactory:
203
267
  raise ValueError(f"Unknown reader: {reader_key}")
204
268
  return getattr(cls, method_name)
205
269
 
270
+ @classmethod
271
+ def get_reader_class(cls, reader_key: str) -> type:
272
+ """Get the reader CLASS without instantiation.
273
+
274
+ This is useful for accessing class methods like get_supported_chunking_strategies()
275
+ without the overhead of creating an instance.
276
+
277
+ Args:
278
+ reader_key: The reader key (e.g., 'pdf', 'csv', 'markdown')
279
+
280
+ Returns:
281
+ The reader class (not an instance)
282
+
283
+ Raises:
284
+ ValueError: If the reader key is unknown
285
+ ImportError: If the reader's dependencies are not installed
286
+ """
287
+ # Map reader keys to their import paths
288
+ reader_class_map: Dict[str, tuple] = {
289
+ "pdf": ("agno.knowledge.reader.pdf_reader", "PDFReader"),
290
+ "csv": ("agno.knowledge.reader.csv_reader", "CSVReader"),
291
+ "field_labeled_csv": ("agno.knowledge.reader.field_labeled_csv_reader", "FieldLabeledCSVReader"),
292
+ "docx": ("agno.knowledge.reader.docx_reader", "DocxReader"),
293
+ "pptx": ("agno.knowledge.reader.pptx_reader", "PPTXReader"),
294
+ "json": ("agno.knowledge.reader.json_reader", "JSONReader"),
295
+ "markdown": ("agno.knowledge.reader.markdown_reader", "MarkdownReader"),
296
+ "text": ("agno.knowledge.reader.text_reader", "TextReader"),
297
+ "website": ("agno.knowledge.reader.website_reader", "WebsiteReader"),
298
+ "firecrawl": ("agno.knowledge.reader.firecrawl_reader", "FirecrawlReader"),
299
+ "tavily": ("agno.knowledge.reader.tavily_reader", "TavilyReader"),
300
+ "youtube": ("agno.knowledge.reader.youtube_reader", "YouTubeReader"),
301
+ "arxiv": ("agno.knowledge.reader.arxiv_reader", "ArxivReader"),
302
+ "wikipedia": ("agno.knowledge.reader.wikipedia_reader", "WikipediaReader"),
303
+ "web_search": ("agno.knowledge.reader.web_search_reader", "WebSearchReader"),
304
+ }
305
+
306
+ if reader_key not in reader_class_map:
307
+ raise ValueError(f"Unknown reader: {reader_key}")
308
+
309
+ module_path, class_name = reader_class_map[reader_key]
310
+
311
+ import importlib
312
+
313
+ module = importlib.import_module(module_path)
314
+ return getattr(module, class_name)
315
+
206
316
  @classmethod
207
317
  def create_reader(cls, reader_key: str, **kwargs) -> Reader:
208
318
  """Create a reader instance with the given key and optional overrides."""
@@ -225,7 +335,14 @@ class ReaderFactory:
225
335
 
226
336
  if extension in [".pdf", "application/pdf"]:
227
337
  return cls.create_reader("pdf")
228
- elif extension in [".csv", "text/csv"]:
338
+ elif extension in [
339
+ ".csv",
340
+ ".xlsx",
341
+ ".xls",
342
+ "text/csv",
343
+ "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
344
+ "application/vnd.ms-excel",
345
+ ]:
229
346
  return cls.create_reader("csv")
230
347
  elif extension in [".docx", ".doc", "application/vnd.openxmlformats-officedocument.wordprocessingml.document"]:
231
348
  return cls.create_reader("docx")
@@ -35,7 +35,7 @@ class S3Reader(Reader):
35
35
  super().__init__(chunking_strategy=chunking_strategy, **kwargs)
36
36
 
37
37
  @classmethod
38
- def get_supported_chunking_strategies(self) -> List[ChunkingStrategyType]:
38
+ def get_supported_chunking_strategies(cls) -> List[ChunkingStrategyType]:
39
39
  """Get the list of supported chunking strategies for S3 readers."""
40
40
  return [
41
41
  ChunkingStrategyType.CODE_CHUNKER,
@@ -47,7 +47,7 @@ class S3Reader(Reader):
47
47
  ]
48
48
 
49
49
  @classmethod
50
- def get_supported_content_types(self) -> List[ContentType]:
50
+ def get_supported_content_types(cls) -> List[ContentType]:
51
51
  return [ContentType.FILE, ContentType.URL, ContentType.TEXT]
52
52
 
53
53
  def read(self, name: Optional[str], s3_object: S3Object) -> List[Document]:
@@ -62,7 +62,7 @@ class TavilyReader(Reader):
62
62
  self.extract_depth = extract_depth
63
63
 
64
64
  @classmethod
65
- def get_supported_chunking_strategies(self) -> List[ChunkingStrategyType]:
65
+ def get_supported_chunking_strategies(cls) -> List[ChunkingStrategyType]:
66
66
  """Get the list of supported chunking strategies for Tavily readers."""
67
67
  return [
68
68
  ChunkingStrategyType.CODE_CHUNKER,
@@ -74,7 +74,7 @@ class TavilyReader(Reader):
74
74
  ]
75
75
 
76
76
  @classmethod
77
- def get_supported_content_types(self) -> List[ContentType]:
77
+ def get_supported_content_types(cls) -> List[ContentType]:
78
78
  return [ContentType.URL]
79
79
 
80
80
  def _extract(self, url: str, name: Optional[str] = None) -> List[Document]:
@@ -18,7 +18,7 @@ class TextReader(Reader):
18
18
  super().__init__(chunking_strategy=chunking_strategy, **kwargs)
19
19
 
20
20
  @classmethod
21
- def get_supported_chunking_strategies(self) -> List[ChunkingStrategyType]:
21
+ def get_supported_chunking_strategies(cls) -> List[ChunkingStrategyType]:
22
22
  """Get the list of supported chunking strategies for Text readers."""
23
23
  return [
24
24
  ChunkingStrategyType.CODE_CHUNKER,
@@ -30,7 +30,7 @@ class TextReader(Reader):
30
30
  ]
31
31
 
32
32
  @classmethod
33
- def get_supported_content_types(self) -> List[ContentType]:
33
+ def get_supported_content_types(cls) -> List[ContentType]:
34
34
  return [ContentType.TXT]
35
35
 
36
36
  def read(self, file: Union[Path, IO[Any]], name: Optional[str] = None) -> List[Document]:
@@ -53,7 +53,7 @@ class WebSearchReader(Reader):
53
53
  chunking_strategy: Optional[ChunkingStrategy] = SemanticChunking()
54
54
 
55
55
  @classmethod
56
- def get_supported_chunking_strategies(self) -> List[ChunkingStrategyType]:
56
+ def get_supported_chunking_strategies(cls) -> List[ChunkingStrategyType]:
57
57
  """Get the list of supported chunking strategies for Web Search readers."""
58
58
  return [
59
59
  ChunkingStrategyType.CODE_CHUNKER,
@@ -65,7 +65,7 @@ class WebSearchReader(Reader):
65
65
  ]
66
66
 
67
67
  @classmethod
68
- def get_supported_content_types(self) -> List[ContentType]:
68
+ def get_supported_content_types(cls) -> List[ContentType]:
69
69
  return [ContentType.TOPIC]
70
70
 
71
71
  def _respect_rate_limits(self):
@@ -49,7 +49,7 @@ class WebsiteReader(Reader):
49
49
  self._urls_to_crawl = []
50
50
 
51
51
  @classmethod
52
- def get_supported_chunking_strategies(self) -> List[ChunkingStrategyType]:
52
+ def get_supported_chunking_strategies(cls) -> List[ChunkingStrategyType]:
53
53
  """Get the list of supported chunking strategies for Website readers."""
54
54
  return [
55
55
  ChunkingStrategyType.CODE_CHUNKER,
@@ -61,7 +61,7 @@ class WebsiteReader(Reader):
61
61
  ]
62
62
 
63
63
  @classmethod
64
- def get_supported_content_types(self) -> List[ContentType]:
64
+ def get_supported_content_types(cls) -> List[ContentType]:
65
65
  return [ContentType.URL]
66
66
 
67
67
  def delay(self, min_seconds=1, max_seconds=3):
@@ -428,7 +428,8 @@ class WebsiteReader(Reader):
428
428
  meta_data={"url": str(crawled_url)},
429
429
  content=crawled_content,
430
430
  )
431
- return self.chunk_document(doc)
431
+ chunks = self.chunk_document(doc)
432
+ return chunks
432
433
  else:
433
434
  return [
434
435
  Document(
@@ -444,6 +445,7 @@ class WebsiteReader(Reader):
444
445
  process_document(crawled_url, crawled_content)
445
446
  for crawled_url, crawled_content in crawler_result.items()
446
447
  ]
448
+
447
449
  results = await asyncio.gather(*tasks)
448
450
 
449
451
  # Flatten the results
@@ -24,7 +24,7 @@ class WikipediaReader(Reader):
24
24
  self.auto_suggest = auto_suggest
25
25
 
26
26
  @classmethod
27
- def get_supported_chunking_strategies(self) -> List[ChunkingStrategyType]:
27
+ def get_supported_chunking_strategies(cls) -> List[ChunkingStrategyType]:
28
28
  """Get the list of supported chunking strategies for Wikipedia readers."""
29
29
  return [
30
30
  ChunkingStrategyType.CODE_CHUNKER,
@@ -36,7 +36,7 @@ class WikipediaReader(Reader):
36
36
  ]
37
37
 
38
38
  @classmethod
39
- def get_supported_content_types(self) -> List[ContentType]:
39
+ def get_supported_content_types(cls) -> List[ContentType]:
40
40
  return [ContentType.TOPIC]
41
41
 
42
42
  def read(self, topic: str) -> List[Document]:
@@ -23,7 +23,7 @@ class YouTubeReader(Reader):
23
23
  super().__init__(chunking_strategy=chunking_strategy, **kwargs)
24
24
 
25
25
  @classmethod
26
- def get_supported_chunking_strategies(self) -> List[ChunkingStrategyType]:
26
+ def get_supported_chunking_strategies(cls) -> List[ChunkingStrategyType]:
27
27
  """Get the list of supported chunking strategies for YouTube readers."""
28
28
  return [
29
29
  ChunkingStrategyType.RECURSIVE_CHUNKER,
@@ -35,7 +35,7 @@ class YouTubeReader(Reader):
35
35
  ]
36
36
 
37
37
  @classmethod
38
- def get_supported_content_types(self) -> List[ContentType]:
38
+ def get_supported_content_types(cls) -> List[ContentType]:
39
39
  return [ContentType.YOUTUBE]
40
40
 
41
41
  def read(self, url: str, name: Optional[str] = None) -> List[Document]:
@@ -0,0 +1,29 @@
1
+ from agno.knowledge.remote_content.config import (
2
+ GcsConfig,
3
+ GitHubConfig,
4
+ RemoteContentConfig,
5
+ S3Config,
6
+ SharePointConfig,
7
+ )
8
+ from agno.knowledge.remote_content.remote_content import (
9
+ GCSContent,
10
+ GitHubContent,
11
+ RemoteContent,
12
+ S3Content,
13
+ SharePointContent,
14
+ )
15
+
16
+ __all__ = [
17
+ # Config classes
18
+ "RemoteContentConfig",
19
+ "S3Config",
20
+ "GcsConfig",
21
+ "SharePointConfig",
22
+ "GitHubConfig",
23
+ # Content classes
24
+ "RemoteContent",
25
+ "S3Content",
26
+ "GCSContent",
27
+ "SharePointContent",
28
+ "GitHubContent",
29
+ ]