agno 2.0.0rc1__py3-none-any.whl → 2.0.0rc2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (49) hide show
  1. agno/agent/agent.py +32 -14
  2. agno/db/mongo/mongo.py +8 -3
  3. agno/eval/accuracy.py +12 -5
  4. agno/knowledge/chunking/strategy.py +14 -14
  5. agno/knowledge/knowledge.py +156 -120
  6. agno/knowledge/reader/arxiv_reader.py +5 -5
  7. agno/knowledge/reader/csv_reader.py +6 -77
  8. agno/knowledge/reader/docx_reader.py +5 -5
  9. agno/knowledge/reader/firecrawl_reader.py +5 -5
  10. agno/knowledge/reader/json_reader.py +5 -5
  11. agno/knowledge/reader/markdown_reader.py +31 -9
  12. agno/knowledge/reader/pdf_reader.py +10 -123
  13. agno/knowledge/reader/reader_factory.py +65 -72
  14. agno/knowledge/reader/s3_reader.py +44 -114
  15. agno/knowledge/reader/text_reader.py +5 -5
  16. agno/knowledge/reader/url_reader.py +75 -31
  17. agno/knowledge/reader/web_search_reader.py +6 -29
  18. agno/knowledge/reader/website_reader.py +5 -5
  19. agno/knowledge/reader/wikipedia_reader.py +5 -5
  20. agno/knowledge/reader/youtube_reader.py +6 -6
  21. agno/knowledge/utils.py +10 -10
  22. agno/models/aws/bedrock.py +3 -7
  23. agno/models/base.py +37 -6
  24. agno/os/app.py +32 -24
  25. agno/os/mcp.py +39 -59
  26. agno/os/router.py +547 -16
  27. agno/os/routers/evals/evals.py +197 -12
  28. agno/os/routers/knowledge/knowledge.py +428 -14
  29. agno/os/routers/memory/memory.py +250 -28
  30. agno/os/routers/metrics/metrics.py +125 -7
  31. agno/os/routers/session/session.py +393 -25
  32. agno/os/schema.py +55 -2
  33. agno/run/agent.py +9 -0
  34. agno/run/team.py +93 -2
  35. agno/run/workflow.py +25 -12
  36. agno/team/team.py +861 -1051
  37. agno/tools/mcp.py +1 -2
  38. agno/utils/log.py +52 -2
  39. agno/utils/mcp.py +55 -3
  40. agno/utils/models/claude.py +0 -8
  41. agno/utils/print_response/team.py +177 -73
  42. agno/utils/streamlit.py +27 -0
  43. agno/workflow/workflow.py +9 -0
  44. {agno-2.0.0rc1.dist-info → agno-2.0.0rc2.dist-info}/METADATA +1 -1
  45. {agno-2.0.0rc1.dist-info → agno-2.0.0rc2.dist-info}/RECORD +48 -49
  46. agno/knowledge/reader/gcs_reader.py +0 -67
  47. {agno-2.0.0rc1.dist-info → agno-2.0.0rc2.dist-info}/WHEEL +0 -0
  48. {agno-2.0.0rc1.dist-info → agno-2.0.0rc2.dist-info}/licenses/LICENSE +0 -0
  49. {agno-2.0.0rc1.dist-info → agno-2.0.0rc2.dist-info}/top_level.txt +0 -0
@@ -1,14 +1,10 @@
1
1
  import asyncio
2
2
  import csv
3
3
  import io
4
- import os
5
4
  from pathlib import Path
6
5
  from typing import IO, Any, List, Optional, Union
7
- from urllib.parse import urlparse
8
6
  from uuid import uuid4
9
7
 
10
- from agno.utils.http import async_fetch_with_retry, fetch_with_retry
11
-
12
8
  try:
13
9
  import aiofiles
14
10
  except ImportError:
@@ -32,16 +28,16 @@ class CSVReader(Reader):
32
28
  def get_supported_chunking_strategies(self) -> List[ChunkingStrategyType]:
33
29
  """Get the list of supported chunking strategies for CSV readers."""
34
30
  return [
35
- ChunkingStrategyType.ROW_CHUNKING,
36
- ChunkingStrategyType.FIXED_SIZE_CHUNKING,
37
- ChunkingStrategyType.AGENTIC_CHUNKING,
38
- ChunkingStrategyType.DOCUMENT_CHUNKING,
39
- ChunkingStrategyType.RECURSIVE_CHUNKING,
31
+ ChunkingStrategyType.ROW_CHUNKER,
32
+ ChunkingStrategyType.FIXED_SIZE_CHUNKER,
33
+ ChunkingStrategyType.AGENTIC_CHUNKER,
34
+ ChunkingStrategyType.DOCUMENT_CHUNKER,
35
+ ChunkingStrategyType.RECURSIVE_CHUNKER,
40
36
  ]
41
37
 
42
38
  @classmethod
43
39
  def get_supported_content_types(self) -> List[ContentType]:
44
- return [ContentType.FILE, ContentType.URL, ContentType.CSV, ContentType.XLSX, ContentType.XLS]
40
+ return [ContentType.CSV, ContentType.XLSX, ContentType.XLS]
45
41
 
46
42
  def read(
47
43
  self, file: Union[Path, IO[Any]], delimiter: str = ",", quotechar: str = '"', name: Optional[str] = None
@@ -168,70 +164,3 @@ class CSVReader(Reader):
168
164
  f"Error reading async: {getattr(file, 'name', str(file)) if isinstance(file, IO) else file}: {e}"
169
165
  )
170
166
  return []
171
-
172
-
173
- class CSVUrlReader(Reader):
174
- """Reader for CSV files"""
175
-
176
- def __init__(
177
- self, chunking_strategy: Optional[ChunkingStrategy] = RowChunking(), proxy: Optional[str] = None, **kwargs
178
- ):
179
- super().__init__(chunking_strategy=chunking_strategy, **kwargs)
180
- self.proxy = proxy
181
-
182
- def get_supported_chunking_strategies(self) -> List[ChunkingStrategyType]:
183
- """Get the list of supported chunking strategies for CSV URL readers."""
184
- return [
185
- ChunkingStrategyType.ROW_CHUNKING,
186
- ChunkingStrategyType.SEMANTIC_CHUNKING,
187
- ChunkingStrategyType.FIXED_SIZE_CHUNKING,
188
- ChunkingStrategyType.AGENTIC_CHUNKING,
189
- ChunkingStrategyType.DOCUMENT_CHUNKING,
190
- ]
191
-
192
- def get_supported_content_types(self) -> List[ContentType]:
193
- return [ContentType.URL]
194
-
195
- def read(self, url: str, name: Optional[str] = None) -> List[Document]:
196
- if not url:
197
- raise ValueError("No URL provided")
198
-
199
- logger.info(f"Reading: {url}")
200
- # Retry the request up to 3 times with exponential backoff
201
- response = fetch_with_retry(url, proxy=self.proxy)
202
-
203
- parsed_url = urlparse(url)
204
- filename = os.path.basename(parsed_url.path) or "data.csv"
205
-
206
- file_obj = io.BytesIO(response.content)
207
- file_obj.name = filename
208
- documents = CSVReader().read(file=file_obj, name=name)
209
-
210
- file_obj.close()
211
-
212
- return documents
213
-
214
- async def async_read(self, url: str, name: Optional[str] = None) -> List[Document]:
215
- if not url:
216
- raise ValueError("No URL provided")
217
-
218
- import httpx
219
-
220
- logger.info(f"Reading async: {url}")
221
-
222
- client_args = {"proxy": self.proxy} if self.proxy else {}
223
- async with httpx.AsyncClient(**client_args) as client: # type: ignore
224
- response = await async_fetch_with_retry(url, client=client)
225
-
226
- parsed_url = urlparse(url)
227
- filename = os.path.basename(parsed_url.path) or "data.csv"
228
-
229
- file_obj = io.BytesIO(response.content)
230
- file_obj.name = filename
231
-
232
- # Use the async version of CSVReader
233
- documents = await CSVReader().async_read(file=file_obj, name=name)
234
-
235
- file_obj.close()
236
-
237
- return documents
@@ -26,11 +26,11 @@ class DocxReader(Reader):
26
26
  def get_supported_chunking_strategies(self) -> List[ChunkingStrategyType]:
27
27
  """Get the list of supported chunking strategies for DOCX readers."""
28
28
  return [
29
- ChunkingStrategyType.DOCUMENT_CHUNKING,
30
- ChunkingStrategyType.FIXED_SIZE_CHUNKING,
31
- ChunkingStrategyType.SEMANTIC_CHUNKING,
32
- ChunkingStrategyType.AGENTIC_CHUNKING,
33
- ChunkingStrategyType.RECURSIVE_CHUNKING,
29
+ ChunkingStrategyType.DOCUMENT_CHUNKER,
30
+ ChunkingStrategyType.FIXED_SIZE_CHUNKER,
31
+ ChunkingStrategyType.SEMANTIC_CHUNKER,
32
+ ChunkingStrategyType.AGENTIC_CHUNKER,
33
+ ChunkingStrategyType.RECURSIVE_CHUNKER,
34
34
  ]
35
35
 
36
36
  @classmethod
@@ -46,11 +46,11 @@ class FirecrawlReader(Reader):
46
46
  def get_supported_chunking_strategies(self) -> List[ChunkingStrategyType]:
47
47
  """Get the list of supported chunking strategies for Firecrawl readers."""
48
48
  return [
49
- ChunkingStrategyType.SEMANTIC_CHUNKING,
50
- ChunkingStrategyType.FIXED_SIZE_CHUNKING,
51
- ChunkingStrategyType.AGENTIC_CHUNKING,
52
- ChunkingStrategyType.DOCUMENT_CHUNKING,
53
- ChunkingStrategyType.RECURSIVE_CHUNKING,
49
+ ChunkingStrategyType.SEMANTIC_CHUNKER,
50
+ ChunkingStrategyType.FIXED_SIZE_CHUNKER,
51
+ ChunkingStrategyType.AGENTIC_CHUNKER,
52
+ ChunkingStrategyType.DOCUMENT_CHUNKER,
53
+ ChunkingStrategyType.RECURSIVE_CHUNKER,
54
54
  ]
55
55
 
56
56
  @classmethod
@@ -25,11 +25,11 @@ class JSONReader(Reader):
25
25
  def get_supported_chunking_strategies(self) -> List[ChunkingStrategyType]:
26
26
  """Get the list of supported chunking strategies for JSON readers."""
27
27
  return [
28
- ChunkingStrategyType.FIXED_SIZE_CHUNKING,
29
- ChunkingStrategyType.AGENTIC_CHUNKING,
30
- ChunkingStrategyType.DOCUMENT_CHUNKING,
31
- ChunkingStrategyType.RECURSIVE_CHUNKING,
32
- ChunkingStrategyType.SEMANTIC_CHUNKING,
28
+ ChunkingStrategyType.FIXED_SIZE_CHUNKER,
29
+ ChunkingStrategyType.AGENTIC_CHUNKER,
30
+ ChunkingStrategyType.DOCUMENT_CHUNKER,
31
+ ChunkingStrategyType.RECURSIVE_CHUNKER,
32
+ ChunkingStrategyType.SEMANTIC_CHUNKER,
33
33
  ]
34
34
 
35
35
  @classmethod
@@ -3,13 +3,26 @@ import uuid
3
3
  from pathlib import Path
4
4
  from typing import IO, Any, List, Optional, Union
5
5
 
6
- from agno.knowledge.chunking.markdown import MarkdownChunking
7
6
  from agno.knowledge.chunking.strategy import ChunkingStrategy, ChunkingStrategyType
8
7
  from agno.knowledge.document.base import Document
9
8
  from agno.knowledge.reader.base import Reader
10
9
  from agno.knowledge.types import ContentType
11
10
  from agno.utils.log import log_info, logger
12
11
 
12
+ DEFAULT_CHUNKER_STRATEGY: ChunkingStrategy
13
+
14
+ # Try to import MarkdownChunking, fallback to FixedSizeChunking if not available
15
+ try:
16
+ from agno.knowledge.chunking.markdown import MarkdownChunking
17
+
18
+ DEFAULT_CHUNKER_STRATEGY = MarkdownChunking()
19
+ MARKDOWN_CHUNKER_AVAILABLE = True
20
+ except ImportError:
21
+ from agno.knowledge.chunking.fixed import FixedSizeChunking
22
+
23
+ DEFAULT_CHUNKER_STRATEGY = FixedSizeChunking()
24
+ MARKDOWN_CHUNKER_AVAILABLE = False
25
+
13
26
 
14
27
  class MarkdownReader(Reader):
15
28
  """Reader for Markdown files"""
@@ -17,25 +30,34 @@ class MarkdownReader(Reader):
17
30
  @classmethod
18
31
  def get_supported_chunking_strategies(self) -> List[ChunkingStrategyType]:
19
32
  """Get the list of supported chunking strategies for Markdown readers."""
20
- return [
21
- ChunkingStrategyType.MARKDOWN_CHUNKING,
22
- ChunkingStrategyType.AGENTIC_CHUNKING,
23
- ChunkingStrategyType.DOCUMENT_CHUNKING,
24
- ChunkingStrategyType.RECURSIVE_CHUNKING,
25
- ChunkingStrategyType.SEMANTIC_CHUNKING,
26
- ChunkingStrategyType.FIXED_SIZE_CHUNKING,
33
+ strategies = [
34
+ ChunkingStrategyType.DOCUMENT_CHUNKER,
35
+ ChunkingStrategyType.AGENTIC_CHUNKER,
36
+ ChunkingStrategyType.RECURSIVE_CHUNKER,
37
+ ChunkingStrategyType.SEMANTIC_CHUNKER,
38
+ ChunkingStrategyType.FIXED_SIZE_CHUNKER,
27
39
  ]
28
40
 
41
+ # Only include MarkdownChunking if it's available
42
+ if MARKDOWN_CHUNKER_AVAILABLE:
43
+ strategies.insert(0, ChunkingStrategyType.MARKDOWN_CHUNKER)
44
+
45
+ return strategies
46
+
29
47
  @classmethod
30
48
  def get_supported_content_types(self) -> List[ContentType]:
31
49
  return [ContentType.MARKDOWN]
32
50
 
33
51
  def __init__(
34
52
  self,
35
- chunking_strategy: Optional[ChunkingStrategy] = MarkdownChunking(),
53
+ chunking_strategy: Optional[ChunkingStrategy] = None,
36
54
  name: Optional[str] = None,
37
55
  description: Optional[str] = None,
38
56
  ) -> None:
57
+ # Use the default chunking strategy if none provided
58
+ if chunking_strategy is None:
59
+ chunking_strategy = DEFAULT_CHUNKER_STRATEGY
60
+
39
61
  super().__init__(chunking_strategy=chunking_strategy, name=name, description=description)
40
62
 
41
63
  def read(self, file: Union[Path, IO[Any]], name: Optional[str] = None) -> List[Document]:
@@ -8,7 +8,6 @@ from agno.knowledge.chunking.strategy import ChunkingStrategyType
8
8
  from agno.knowledge.document.base import Document
9
9
  from agno.knowledge.reader.base import Reader
10
10
  from agno.knowledge.types import ContentType
11
- from agno.utils.http import async_fetch_with_retry, fetch_with_retry
12
11
  from agno.utils.log import log_error, log_info, logger
13
12
 
14
13
  try:
@@ -202,11 +201,11 @@ class BasePDFReader(Reader):
202
201
  def get_supported_chunking_strategies(self) -> List[ChunkingStrategyType]:
203
202
  """Get the list of supported chunking strategies for PDF readers."""
204
203
  return [
205
- ChunkingStrategyType.DOCUMENT_CHUNKING,
206
- ChunkingStrategyType.FIXED_SIZE_CHUNKING,
207
- ChunkingStrategyType.AGENTIC_CHUNKING,
208
- ChunkingStrategyType.SEMANTIC_CHUNKING,
209
- ChunkingStrategyType.RECURSIVE_CHUNKING,
204
+ ChunkingStrategyType.DOCUMENT_CHUNKER,
205
+ ChunkingStrategyType.FIXED_SIZE_CHUNKER,
206
+ ChunkingStrategyType.AGENTIC_CHUNKER,
207
+ ChunkingStrategyType.SEMANTIC_CHUNKER,
208
+ ChunkingStrategyType.RECURSIVE_CHUNKER,
210
209
  ]
211
210
 
212
211
  def _build_chunked_documents(self, documents: List[Document]) -> List[Document]:
@@ -222,19 +221,19 @@ class BasePDFReader(Reader):
222
221
  # Use provided password or fall back to instance password
223
222
  pdf_password = password or self.password
224
223
  if not pdf_password:
225
- logger.error(f"PDF {doc_name} is password protected but no password provided")
224
+ logger.error(f'PDF file "{doc_name}" is password protected but no password provided')
226
225
  return False
227
226
 
228
227
  try:
229
228
  decrypted_pdf = doc_reader.decrypt(pdf_password)
230
229
  if decrypted_pdf:
231
- log_info(f"Successfully decrypted PDF {doc_name} with user password")
230
+ log_info(f'Successfully decrypted PDF file "{doc_name}" with user password')
232
231
  return True
233
232
  else:
234
- log_error(f"Failed to decrypt PDF {doc_name}: incorrect password")
233
+ log_error(f'Failed to decrypt PDF file "{doc_name}": incorrect password')
235
234
  return False
236
235
  except Exception as e:
237
- log_error(f"Error decrypting PDF {doc_name}: {e}")
236
+ log_error(f'Error decrypting PDF file "{doc_name}": {e}')
238
237
  return False
239
238
 
240
239
  def _create_documents(self, pdf_content: List[str], doc_name: str, use_uuid_for_id: bool, page_number_shift):
@@ -368,7 +367,7 @@ class PDFReader(BasePDFReader):
368
367
  if not self._decrypt_pdf(pdf_reader, doc_name, password):
369
368
  return []
370
369
 
371
- # Read and chunk.
370
+ # Read and chunk
372
371
  return self._pdf_reader_to_documents(pdf_reader, doc_name, use_uuid_for_id=True)
373
372
 
374
373
  async def async_read(
@@ -405,63 +404,6 @@ class PDFReader(BasePDFReader):
405
404
  return await self._async_pdf_reader_to_documents(pdf_reader, doc_name, use_uuid_for_id=True)
406
405
 
407
406
 
408
- class PDFUrlReader(BasePDFReader):
409
- """Reader for PDF files from URL"""
410
-
411
- def __init__(self, proxy: Optional[str] = None, password: Optional[str] = None, **kwargs):
412
- super().__init__(password=password, **kwargs)
413
- self.proxy = proxy
414
-
415
- @classmethod
416
- def get_supported_content_types(self) -> List[ContentType]:
417
- return [ContentType.URL]
418
-
419
- def read(self, url: str, name: Optional[str] = None, password: Optional[str] = None) -> List[Document]:
420
- if not url:
421
- raise ValueError("No url provided")
422
-
423
- from io import BytesIO
424
-
425
- log_info(f"Reading: {url}")
426
-
427
- # Retry the request up to 3 times with exponential backoff
428
- response = fetch_with_retry(url, proxy=self.proxy)
429
-
430
- doc_name = name or url.split("/")[-1].split(".")[0].replace("/", "_").replace(" ", "_")
431
- pdf_reader = DocumentReader(BytesIO(response.content))
432
-
433
- # Handle PDF decryption
434
- if not self._decrypt_pdf(pdf_reader, doc_name, password):
435
- return []
436
-
437
- # Read and chunk.
438
- return self._pdf_reader_to_documents(pdf_reader, doc_name, use_uuid_for_id=False)
439
-
440
- async def async_read(self, url: str, name: Optional[str] = None, password: Optional[str] = None) -> List[Document]:
441
- if not url:
442
- raise ValueError("No url provided")
443
-
444
- from io import BytesIO
445
-
446
- import httpx
447
-
448
- log_info(f"Reading: {url}")
449
-
450
- client_args = {"proxy": self.proxy} if self.proxy else {}
451
- async with httpx.AsyncClient(**client_args) as client: # type: ignore
452
- response = await async_fetch_with_retry(url, client=client)
453
-
454
- doc_name = name or url.split("/")[-1].split(".")[0].replace("/", "_").replace(" ", "_")
455
- pdf_reader = DocumentReader(BytesIO(response.content))
456
-
457
- # Handle PDF decryption
458
- if not self._decrypt_pdf(pdf_reader, doc_name, password):
459
- return []
460
-
461
- # Read and chunk.
462
- return await self._async_pdf_reader_to_documents(pdf_reader, doc_name, use_uuid_for_id=False)
463
-
464
-
465
407
  class PDFImageReader(BasePDFReader):
466
408
  """Reader for PDF files with text and images extraction"""
467
409
 
@@ -512,58 +454,3 @@ class PDFImageReader(BasePDFReader):
512
454
 
513
455
  # Read and chunk.
514
456
  return await self._async_pdf_reader_to_documents(pdf_reader, doc_name, read_images=True, use_uuid_for_id=False)
515
-
516
-
517
- class PDFUrlImageReader(BasePDFReader):
518
- """Reader for PDF files from URL with text and images extraction"""
519
-
520
- def __init__(self, proxy: Optional[str] = None, password: Optional[str] = None, **kwargs):
521
- super().__init__(password=password, **kwargs)
522
- self.proxy = proxy
523
-
524
- def read(self, url: str, name: Optional[str] = None, password: Optional[str] = None) -> List[Document]:
525
- if not url:
526
- raise ValueError("No url provided")
527
-
528
- from io import BytesIO
529
-
530
- import httpx
531
-
532
- # Read the PDF from the URL
533
- log_info(f"Reading: {url}")
534
- response = httpx.get(url, proxy=self.proxy) if self.proxy else httpx.get(url)
535
-
536
- doc_name = name or url.split("/")[-1].split(".")[0].replace(" ", "_")
537
- pdf_reader = DocumentReader(BytesIO(response.content))
538
-
539
- # Handle PDF decryption
540
- if not self._decrypt_pdf(pdf_reader, doc_name, password):
541
- return []
542
-
543
- # Read and chunk.
544
- return self._pdf_reader_to_documents(pdf_reader, doc_name, read_images=True, use_uuid_for_id=False)
545
-
546
- async def async_read(self, url: str, name: Optional[str] = None, password: Optional[str] = None) -> List[Document]:
547
- if not url:
548
- raise ValueError("No url provided")
549
-
550
- from io import BytesIO
551
-
552
- import httpx
553
-
554
- log_info(f"Reading: {url}")
555
-
556
- client_args = {"proxy": self.proxy} if self.proxy else {}
557
- async with httpx.AsyncClient(**client_args) as client: # type: ignore
558
- response = await client.get(url)
559
- response.raise_for_status()
560
-
561
- doc_name = name or url.split("/")[-1].split(".")[0].replace(" ", "_")
562
- pdf_reader = DocumentReader(BytesIO(response.content))
563
-
564
- # Handle PDF decryption
565
- if not self._decrypt_pdf(pdf_reader, doc_name, password):
566
- return []
567
-
568
- # Read and chunk.
569
- return await self._async_pdf_reader_to_documents(pdf_reader, doc_name, read_images=True, use_uuid_for_id=False)
@@ -15,7 +15,11 @@ class ReaderFactory:
15
15
  """Get PDF reader instance."""
16
16
  from agno.knowledge.reader.pdf_reader import PDFReader
17
17
 
18
- config: Dict[str, Any] = {"chunk": True, "chunk_size": 100}
18
+ config: Dict[str, Any] = {
19
+ "chunk": True,
20
+ "chunk_size": 100,
21
+ "description": "Processes PDF documents with OCR support for images and text extraction",
22
+ }
19
23
  config.update(kwargs)
20
24
  return PDFReader(**config)
21
25
 
@@ -24,7 +28,10 @@ class ReaderFactory:
24
28
  """Get CSV reader instance."""
25
29
  from agno.knowledge.reader.csv_reader import CSVReader
26
30
 
27
- config: Dict[str, Any] = {"name": "CSV Reader", "description": "Reads CSV files"}
31
+ config: Dict[str, Any] = {
32
+ "name": "CSV Reader",
33
+ "description": "Parses CSV, XLSX, and XLS files with custom delimiter support",
34
+ }
28
35
  config.update(kwargs)
29
36
  return CSVReader(**config)
30
37
 
@@ -33,7 +40,10 @@ class ReaderFactory:
33
40
  """Get Docx reader instance."""
34
41
  from agno.knowledge.reader.docx_reader import DocxReader
35
42
 
36
- config: Dict[str, Any] = {"name": "Docx Reader", "description": "Reads Docx files"}
43
+ config: Dict[str, Any] = {
44
+ "name": "Docx Reader",
45
+ "description": "Extracts text content from Microsoft Word documents (.docx and .doc formats)",
46
+ }
37
47
  config.update(kwargs)
38
48
  return DocxReader(**config)
39
49
 
@@ -42,7 +52,10 @@ class ReaderFactory:
42
52
  """Get JSON reader instance."""
43
53
  from agno.knowledge.reader.json_reader import JSONReader
44
54
 
45
- config: Dict[str, Any] = {"name": "JSON Reader", "description": "Reads JSON files"}
55
+ config: Dict[str, Any] = {
56
+ "name": "JSON Reader",
57
+ "description": "Processes JSON data structures and API responses with nested object handling",
58
+ }
46
59
  config.update(kwargs)
47
60
  return JSONReader(**config)
48
61
 
@@ -51,7 +64,10 @@ class ReaderFactory:
51
64
  """Get Markdown reader instance."""
52
65
  from agno.knowledge.reader.markdown_reader import MarkdownReader
53
66
 
54
- config: Dict[str, Any] = {"name": "Markdown Reader", "description": "Reads Markdown files"}
67
+ config: Dict[str, Any] = {
68
+ "name": "Markdown Reader",
69
+ "description": "Processes Markdown documentation with header-aware chunking and formatting preservation",
70
+ }
55
71
  config.update(kwargs)
56
72
  return MarkdownReader(**config)
57
73
 
@@ -60,25 +76,22 @@ class ReaderFactory:
60
76
  """Get Text reader instance."""
61
77
  from agno.knowledge.reader.text_reader import TextReader
62
78
 
63
- config: Dict[str, Any] = {"name": "Text Reader", "description": "Reads Text files"}
79
+ config: Dict[str, Any] = {
80
+ "name": "Text Reader",
81
+ "description": "Handles plain text files with customizable chunking strategies and encoding detection",
82
+ }
64
83
  config.update(kwargs)
65
84
  return TextReader(**config)
66
85
 
67
- @classmethod
68
- def _get_url_reader(cls, **kwargs) -> Reader:
69
- """Get URL reader instance."""
70
- from agno.knowledge.reader.url_reader import URLReader
71
-
72
- config: Dict[str, Any] = {"name": "URL Reader", "description": "Reads URLs"}
73
- config.update(kwargs)
74
- return URLReader(**config)
75
-
76
86
  @classmethod
77
87
  def _get_website_reader(cls, **kwargs) -> Reader:
78
88
  """Get Website reader instance."""
79
89
  from agno.knowledge.reader.website_reader import WebsiteReader
80
90
 
81
- config: Dict[str, Any] = {"name": "Website Reader", "description": "Reads Website files"}
91
+ config: Dict[str, Any] = {
92
+ "name": "Website Reader",
93
+ "description": "Scrapes and extracts content from web pages with HTML parsing and text cleaning",
94
+ }
82
95
  config.update(kwargs)
83
96
  return WebsiteReader(**config)
84
97
 
@@ -91,7 +104,7 @@ class ReaderFactory:
91
104
  "api_key": kwargs.get("api_key") or os.getenv("FIRECRAWL_API_KEY"),
92
105
  "mode": "crawl",
93
106
  "name": "Firecrawl Reader",
94
- "description": "Crawls websites",
107
+ "description": "Advanced web scraping and crawling with JavaScript rendering and structured data extraction",
95
108
  }
96
109
  config.update(kwargs)
97
110
  return FirecrawlReader(**config)
@@ -101,52 +114,22 @@ class ReaderFactory:
101
114
  """Get YouTube reader instance."""
102
115
  from agno.knowledge.reader.youtube_reader import YouTubeReader
103
116
 
104
- config: Dict[str, Any] = {"name": "YouTube Reader", "description": "Reads YouTube videos"}
117
+ config: Dict[str, Any] = {
118
+ "name": "YouTube Reader",
119
+ "description": "Extracts transcripts and metadata from YouTube videos and playlists",
120
+ }
105
121
  config.update(kwargs)
106
122
  return YouTubeReader(**config)
107
123
 
108
- @classmethod
109
- def _get_pdf_url_reader(cls, **kwargs) -> Reader:
110
- """Get PDF URL reader instance."""
111
- from agno.knowledge.reader.pdf_reader import PDFUrlReader
112
-
113
- config: Dict[str, Any] = {"name": "PDF URL Reader", "description": "Reads PDF URLs"}
114
- config.update(kwargs)
115
- return PDFUrlReader(**config)
116
-
117
- @classmethod
118
- def _get_csv_url_reader(cls, **kwargs) -> Reader:
119
- """Get CSV URL reader instance."""
120
- from agno.knowledge.reader.csv_reader import CSVUrlReader
121
-
122
- config: Dict[str, Any] = {"name": "CSV URL Reader", "description": "Reads CSV URLs"}
123
- config.update(kwargs)
124
- return CSVUrlReader(**config)
125
-
126
- @classmethod
127
- def _get_s3_reader(cls, **kwargs) -> Reader:
128
- """Get S3 reader instance."""
129
- from agno.knowledge.reader.s3_reader import S3Reader
130
-
131
- config: Dict[str, Any] = {"name": "S3 Reader", "description": "Reads S3 files"}
132
- config.update(kwargs)
133
- return S3Reader(**config)
134
-
135
- @classmethod
136
- def _get_gcs_reader(cls, **kwargs) -> Reader:
137
- """Get GCS reader instance."""
138
- from agno.knowledge.reader.gcs_reader import GCSReader
139
-
140
- config: Dict[str, Any] = {"name": "GCS Reader", "description": "Reads GCS files"}
141
- config.update(kwargs)
142
- return GCSReader(**config)
143
-
144
124
  @classmethod
145
125
  def _get_arxiv_reader(cls, **kwargs) -> Reader:
146
126
  """Get Arxiv reader instance."""
147
127
  from agno.knowledge.reader.arxiv_reader import ArxivReader
148
128
 
149
- config: Dict[str, Any] = {"name": "Arxiv Reader", "description": "Reads Arxiv papers"}
129
+ config: Dict[str, Any] = {
130
+ "name": "Arxiv Reader",
131
+ "description": "Downloads and processes academic papers from ArXiv with PDF parsing and metadata extraction",
132
+ }
150
133
  config.update(kwargs)
151
134
  return ArxivReader(**config)
152
135
 
@@ -155,7 +138,10 @@ class ReaderFactory:
155
138
  """Get Wikipedia reader instance."""
156
139
  from agno.knowledge.reader.wikipedia_reader import WikipediaReader
157
140
 
158
- config: Dict[str, Any] = {"name": "Wikipedia Reader", "description": "Reads Wikipedia articles"}
141
+ config: Dict[str, Any] = {
142
+ "name": "Wikipedia Reader",
143
+ "description": "Fetches and processes Wikipedia articles with section-aware chunking and link resolution",
144
+ }
159
145
  config.update(kwargs)
160
146
  return WikipediaReader(**config)
161
147
 
@@ -164,7 +150,10 @@ class ReaderFactory:
164
150
  """Get Web Search reader instance."""
165
151
  from agno.knowledge.reader.web_search_reader import WebSearchReader
166
152
 
167
- config: Dict[str, Any] = {"name": "Web Search Reader", "description": "Performs web searches"}
153
+ config: Dict[str, Any] = {
154
+ "name": "Web Search Reader",
155
+ "description": "Executes web searches and processes results with relevance ranking and content extraction",
156
+ }
168
157
  config.update(kwargs)
169
158
  return WebSearchReader(**config)
170
159
 
@@ -224,27 +213,31 @@ class ReaderFactory:
224
213
  # Default to URL reader
225
214
  return cls.create_reader("url")
226
215
 
227
- @classmethod
228
- def get_reader_for_url_file(cls, extension: str) -> Reader:
229
- """Get the appropriate reader for a URL file extension."""
230
- extension = extension.lower()
231
-
232
- if extension == ".pdf":
233
- return cls.create_reader("pdf_url")
234
- elif extension == ".csv":
235
- return cls.create_reader("csv_url")
236
- else:
237
- return cls.create_reader("url")
238
-
239
216
  @classmethod
240
217
  def get_all_reader_keys(cls) -> List[str]:
241
- """Get all available reader keys."""
218
+ """Get all available reader keys in priority order."""
242
219
  # Extract reader keys from method names
220
+
221
+ PREFIX = "_get_"
222
+ SUFFIX = "_reader"
223
+
243
224
  reader_keys = []
244
225
  for attr_name in dir(cls):
245
- if attr_name.startswith("_get_") and attr_name.endswith("_reader"):
246
- reader_key = attr_name[5:-7] # Remove "_get_" prefix and "_reader" suffix
226
+ if attr_name.startswith(PREFIX) and attr_name.endswith(SUFFIX):
227
+ reader_key = attr_name[len(PREFIX) : -len(SUFFIX)] # Remove "_get_" prefix and "_reader" suffix
247
228
  reader_keys.append(reader_key)
229
+
230
+ # Define priority order for URL readers
231
+ url_reader_priority = ["url", "website", "firecrawl", "pdf_url", "csv_url", "youtube", "web_search"]
232
+
233
+ # Sort with URL readers in priority order, others alphabetically
234
+ def sort_key(reader_key):
235
+ if reader_key in url_reader_priority:
236
+ return (0, url_reader_priority.index(reader_key))
237
+ else:
238
+ return (1, reader_key)
239
+
240
+ reader_keys.sort(key=sort_key)
248
241
  return reader_keys
249
242
 
250
243
  @classmethod