PyPI - agno - Versions diffs - 2.0.0a1__py3-none-any.whl → 2.0.0rc2__py3-none-any.whl - Mend

agno 2.0.0a1py3-none-any.whl → 2.0.0rc2py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (79) hide show

agno/agent/agent.py +416 -41
agno/api/agent.py +2 -2
agno/api/evals.py +2 -2
agno/api/os.py +1 -1
agno/api/settings.py +2 -2
agno/api/team.py +2 -2
agno/db/dynamo/dynamo.py +0 -6
agno/db/firestore/firestore.py +0 -6
agno/db/in_memory/in_memory_db.py +0 -6
agno/db/json/json_db.py +0 -6
agno/db/mongo/mongo.py +8 -9
agno/db/mysql/utils.py +0 -1
agno/db/postgres/postgres.py +0 -10
agno/db/postgres/utils.py +0 -1
agno/db/redis/redis.py +0 -4
agno/db/singlestore/singlestore.py +0 -10
agno/db/singlestore/utils.py +0 -1
agno/db/sqlite/sqlite.py +0 -4
agno/db/sqlite/utils.py +0 -1
agno/eval/accuracy.py +12 -5
agno/integrations/discord/client.py +5 -1
agno/knowledge/chunking/strategy.py +14 -14
agno/knowledge/embedder/aws_bedrock.py +2 -2
agno/knowledge/knowledge.py +156 -120
agno/knowledge/reader/arxiv_reader.py +5 -5
agno/knowledge/reader/csv_reader.py +6 -77
agno/knowledge/reader/docx_reader.py +5 -5
agno/knowledge/reader/firecrawl_reader.py +5 -5
agno/knowledge/reader/json_reader.py +5 -5
agno/knowledge/reader/markdown_reader.py +31 -9
agno/knowledge/reader/pdf_reader.py +10 -123
agno/knowledge/reader/reader_factory.py +65 -72
agno/knowledge/reader/s3_reader.py +44 -114
agno/knowledge/reader/text_reader.py +5 -5
agno/knowledge/reader/url_reader.py +75 -31
agno/knowledge/reader/web_search_reader.py +6 -29
agno/knowledge/reader/website_reader.py +5 -5
agno/knowledge/reader/wikipedia_reader.py +5 -5
agno/knowledge/reader/youtube_reader.py +6 -6
agno/knowledge/utils.py +10 -10
agno/models/anthropic/claude.py +2 -49
agno/models/aws/bedrock.py +3 -7
agno/models/base.py +37 -6
agno/models/message.py +7 -6
agno/os/app.py +168 -64
agno/os/interfaces/agui/agui.py +1 -1
agno/os/interfaces/agui/utils.py +16 -9
agno/os/interfaces/slack/slack.py +2 -3
agno/os/interfaces/whatsapp/whatsapp.py +2 -3
agno/os/mcp.py +235 -0
agno/os/router.py +576 -19
agno/os/routers/evals/evals.py +201 -12
agno/os/routers/knowledge/knowledge.py +455 -18
agno/os/routers/memory/memory.py +260 -29
agno/os/routers/metrics/metrics.py +127 -7
agno/os/routers/session/session.py +398 -25
agno/os/schema.py +55 -2
agno/os/settings.py +0 -1
agno/run/agent.py +96 -2
agno/run/cancel.py +0 -2
agno/run/team.py +93 -2
agno/run/workflow.py +25 -12
agno/team/team.py +863 -1053
agno/tools/function.py +65 -7
agno/tools/linear.py +1 -1
agno/tools/mcp.py +1 -2
agno/utils/gemini.py +31 -1
agno/utils/log.py +52 -2
agno/utils/mcp.py +55 -3
agno/utils/models/claude.py +41 -0
agno/utils/print_response/team.py +177 -73
agno/utils/streamlit.py +481 -0
agno/workflow/workflow.py +17 -1
{agno-2.0.0a1.dist-info → agno-2.0.0rc2.dist-info}/METADATA +1 -1
{agno-2.0.0a1.dist-info → agno-2.0.0rc2.dist-info}/RECORD +78 -77
agno/knowledge/reader/gcs_reader.py +0 -67
{agno-2.0.0a1.dist-info → agno-2.0.0rc2.dist-info}/WHEEL +0 -0
{agno-2.0.0a1.dist-info → agno-2.0.0rc2.dist-info}/licenses/LICENSE +0 -0
{agno-2.0.0a1.dist-info → agno-2.0.0rc2.dist-info}/top_level.txt +0 -0

agno/knowledge/reader/markdown_reader.py CHANGED Viewed

@@ -3,13 +3,26 @@ import uuid
 from pathlib import Path
 from typing import IO, Any, List, Optional, Union
-from agno.knowledge.chunking.markdown import MarkdownChunking
 from agno.knowledge.chunking.strategy import ChunkingStrategy, ChunkingStrategyType
 from agno.knowledge.document.base import Document
 from agno.knowledge.reader.base import Reader
 from agno.knowledge.types import ContentType
 from agno.utils.log import log_info, logger
+DEFAULT_CHUNKER_STRATEGY: ChunkingStrategy
+# Try to import MarkdownChunking, fallback to FixedSizeChunking if not available
+try:
+    from agno.knowledge.chunking.markdown import MarkdownChunking
+    DEFAULT_CHUNKER_STRATEGY = MarkdownChunking()
+    MARKDOWN_CHUNKER_AVAILABLE = True
+except ImportError:
+    from agno.knowledge.chunking.fixed import FixedSizeChunking
+    DEFAULT_CHUNKER_STRATEGY = FixedSizeChunking()
+    MARKDOWN_CHUNKER_AVAILABLE = False
 class MarkdownReader(Reader):
     """Reader for Markdown files"""
@@ -17,25 +30,34 @@ class MarkdownReader(Reader):
     @classmethod
     def get_supported_chunking_strategies(self) -> List[ChunkingStrategyType]:
         """Get the list of supported chunking strategies for Markdown readers."""
-        return [
-            ChunkingStrategyType.MARKDOWN_CHUNKING,
-            ChunkingStrategyType.AGENTIC_CHUNKING,
-            ChunkingStrategyType.DOCUMENT_CHUNKING,
-            ChunkingStrategyType.RECURSIVE_CHUNKING,
-            ChunkingStrategyType.SEMANTIC_CHUNKING,
-            ChunkingStrategyType.FIXED_SIZE_CHUNKING,
+        strategies = [
+            ChunkingStrategyType.DOCUMENT_CHUNKER,
+            ChunkingStrategyType.AGENTIC_CHUNKER,
+            ChunkingStrategyType.RECURSIVE_CHUNKER,
+            ChunkingStrategyType.SEMANTIC_CHUNKER,
+            ChunkingStrategyType.FIXED_SIZE_CHUNKER,
         ]
+        # Only include MarkdownChunking if it's available
+        if MARKDOWN_CHUNKER_AVAILABLE:
+            strategies.insert(0, ChunkingStrategyType.MARKDOWN_CHUNKER)
+        return strategies
     @classmethod
     def get_supported_content_types(self) -> List[ContentType]:
         return [ContentType.MARKDOWN]
     def __init__(
         self,
-        chunking_strategy: Optional[ChunkingStrategy] = MarkdownChunking(),
+        chunking_strategy: Optional[ChunkingStrategy] = None,
         name: Optional[str] = None,
         description: Optional[str] = None,
     ) -> None:
+        # Use the default chunking strategy if none provided
+        if chunking_strategy is None:
+            chunking_strategy = DEFAULT_CHUNKER_STRATEGY
         super().__init__(chunking_strategy=chunking_strategy, name=name, description=description)
     def read(self, file: Union[Path, IO[Any]], name: Optional[str] = None) -> List[Document]:

agno/knowledge/reader/pdf_reader.py CHANGED Viewed

@@ -8,7 +8,6 @@ from agno.knowledge.chunking.strategy import ChunkingStrategyType
 from agno.knowledge.document.base import Document
 from agno.knowledge.reader.base import Reader
 from agno.knowledge.types import ContentType
-from agno.utils.http import async_fetch_with_retry, fetch_with_retry
 from agno.utils.log import log_error, log_info, logger
 try:
@@ -202,11 +201,11 @@ class BasePDFReader(Reader):
     def get_supported_chunking_strategies(self) -> List[ChunkingStrategyType]:
         """Get the list of supported chunking strategies for PDF readers."""
         return [
-            ChunkingStrategyType.DOCUMENT_CHUNKING,
-            ChunkingStrategyType.FIXED_SIZE_CHUNKING,
-            ChunkingStrategyType.AGENTIC_CHUNKING,
-            ChunkingStrategyType.SEMANTIC_CHUNKING,
-            ChunkingStrategyType.RECURSIVE_CHUNKING,
+            ChunkingStrategyType.DOCUMENT_CHUNKER,
+            ChunkingStrategyType.FIXED_SIZE_CHUNKER,
+            ChunkingStrategyType.AGENTIC_CHUNKER,
+            ChunkingStrategyType.SEMANTIC_CHUNKER,
+            ChunkingStrategyType.RECURSIVE_CHUNKER,
         ]
     def _build_chunked_documents(self, documents: List[Document]) -> List[Document]:
@@ -222,19 +221,19 @@ class BasePDFReader(Reader):
         # Use provided password or fall back to instance password
         pdf_password = password or self.password
         if not pdf_password:
-            logger.error(f"PDF {doc_name} is password protected but no password provided")
+            logger.error(f'PDF file "{doc_name}" is password protected but no password provided')
             return False
         try:
             decrypted_pdf = doc_reader.decrypt(pdf_password)
             if decrypted_pdf:
-                log_info(f"Successfully decrypted PDF {doc_name} with user password")
+                log_info(f'Successfully decrypted PDF file "{doc_name}" with user password')
                 return True
             else:
-                log_error(f"Failed to decrypt PDF {doc_name}: incorrect password")
+                log_error(f'Failed to decrypt PDF file "{doc_name}": incorrect password')
                 return False
         except Exception as e:
-            log_error(f"Error decrypting PDF {doc_name}: {e}")
+            log_error(f'Error decrypting PDF file "{doc_name}": {e}')
             return False
     def _create_documents(self, pdf_content: List[str], doc_name: str, use_uuid_for_id: bool, page_number_shift):
@@ -368,7 +367,7 @@ class PDFReader(BasePDFReader):
         if not self._decrypt_pdf(pdf_reader, doc_name, password):
             return []
-        # Read and chunk.
+        # Read and chunk
         return self._pdf_reader_to_documents(pdf_reader, doc_name, use_uuid_for_id=True)
     async def async_read(
@@ -405,63 +404,6 @@ class PDFReader(BasePDFReader):
         return await self._async_pdf_reader_to_documents(pdf_reader, doc_name, use_uuid_for_id=True)
-class PDFUrlReader(BasePDFReader):
-    """Reader for PDF files from URL"""
-    def __init__(self, proxy: Optional[str] = None, password: Optional[str] = None, **kwargs):
-        super().__init__(password=password, **kwargs)
-        self.proxy = proxy
-    @classmethod
-    def get_supported_content_types(self) -> List[ContentType]:
-        return [ContentType.URL]
-    def read(self, url: str, name: Optional[str] = None, password: Optional[str] = None) -> List[Document]:
-        if not url:
-            raise ValueError("No url provided")
-        from io import BytesIO
-        log_info(f"Reading: {url}")
-        # Retry the request up to 3 times with exponential backoff
-        response = fetch_with_retry(url, proxy=self.proxy)
-        doc_name = name or url.split("/")[-1].split(".")[0].replace("/", "_").replace(" ", "_")
-        pdf_reader = DocumentReader(BytesIO(response.content))
-        # Handle PDF decryption
-        if not self._decrypt_pdf(pdf_reader, doc_name, password):
-            return []
-        # Read and chunk.
-        return self._pdf_reader_to_documents(pdf_reader, doc_name, use_uuid_for_id=False)
-    async def async_read(self, url: str, name: Optional[str] = None, password: Optional[str] = None) -> List[Document]:
-        if not url:
-            raise ValueError("No url provided")
-        from io import BytesIO
-        import httpx
-        log_info(f"Reading: {url}")
-        client_args = {"proxy": self.proxy} if self.proxy else {}
-        async with httpx.AsyncClient(**client_args) as client:  # type: ignore
-            response = await async_fetch_with_retry(url, client=client)
-        doc_name = name or url.split("/")[-1].split(".")[0].replace("/", "_").replace(" ", "_")
-        pdf_reader = DocumentReader(BytesIO(response.content))
-        # Handle PDF decryption
-        if not self._decrypt_pdf(pdf_reader, doc_name, password):
-            return []
-        # Read and chunk.
-        return await self._async_pdf_reader_to_documents(pdf_reader, doc_name, use_uuid_for_id=False)
 class PDFImageReader(BasePDFReader):
     """Reader for PDF files with text and images extraction"""
@@ -512,58 +454,3 @@ class PDFImageReader(BasePDFReader):
         # Read and chunk.
         return await self._async_pdf_reader_to_documents(pdf_reader, doc_name, read_images=True, use_uuid_for_id=False)
-class PDFUrlImageReader(BasePDFReader):
-    """Reader for PDF files from URL with text and images extraction"""
-    def __init__(self, proxy: Optional[str] = None, password: Optional[str] = None, **kwargs):
-        super().__init__(password=password, **kwargs)
-        self.proxy = proxy
-    def read(self, url: str, name: Optional[str] = None, password: Optional[str] = None) -> List[Document]:
-        if not url:
-            raise ValueError("No url provided")
-        from io import BytesIO
-        import httpx
-        # Read the PDF from the URL
-        log_info(f"Reading: {url}")
-        response = httpx.get(url, proxy=self.proxy) if self.proxy else httpx.get(url)
-        doc_name = name or url.split("/")[-1].split(".")[0].replace(" ", "_")
-        pdf_reader = DocumentReader(BytesIO(response.content))
-        # Handle PDF decryption
-        if not self._decrypt_pdf(pdf_reader, doc_name, password):
-            return []
-        # Read and chunk.
-        return self._pdf_reader_to_documents(pdf_reader, doc_name, read_images=True, use_uuid_for_id=False)
-    async def async_read(self, url: str, name: Optional[str] = None, password: Optional[str] = None) -> List[Document]:
-        if not url:
-            raise ValueError("No url provided")
-        from io import BytesIO
-        import httpx
-        log_info(f"Reading: {url}")
-        client_args = {"proxy": self.proxy} if self.proxy else {}
-        async with httpx.AsyncClient(**client_args) as client:  # type: ignore
-            response = await client.get(url)
-            response.raise_for_status()
-        doc_name = name or url.split("/")[-1].split(".")[0].replace(" ", "_")
-        pdf_reader = DocumentReader(BytesIO(response.content))
-        # Handle PDF decryption
-        if not self._decrypt_pdf(pdf_reader, doc_name, password):
-            return []
-        # Read and chunk.
-        return await self._async_pdf_reader_to_documents(pdf_reader, doc_name, read_images=True, use_uuid_for_id=False)

agno/knowledge/reader/reader_factory.py CHANGED Viewed

@@ -15,7 +15,11 @@ class ReaderFactory:
         """Get PDF reader instance."""
         from agno.knowledge.reader.pdf_reader import PDFReader
-        config: Dict[str, Any] = {"chunk": True, "chunk_size": 100}
+        config: Dict[str, Any] = {
+            "chunk": True,
+            "chunk_size": 100,
+            "description": "Processes PDF documents with OCR support for images and text extraction",
+        }
         config.update(kwargs)
         return PDFReader(**config)
@@ -24,7 +28,10 @@ class ReaderFactory:
         """Get CSV reader instance."""
         from agno.knowledge.reader.csv_reader import CSVReader
-        config: Dict[str, Any] = {"name": "CSV Reader", "description": "Reads CSV files"}
+        config: Dict[str, Any] = {
+            "name": "CSV Reader",
+            "description": "Parses CSV, XLSX, and XLS files with custom delimiter support",
+        }
         config.update(kwargs)
         return CSVReader(**config)
@@ -33,7 +40,10 @@ class ReaderFactory:
         """Get Docx reader instance."""
         from agno.knowledge.reader.docx_reader import DocxReader
-        config: Dict[str, Any] = {"name": "Docx Reader", "description": "Reads Docx files"}
+        config: Dict[str, Any] = {
+            "name": "Docx Reader",
+            "description": "Extracts text content from Microsoft Word documents (.docx and .doc formats)",
+        }
         config.update(kwargs)
         return DocxReader(**config)
@@ -42,7 +52,10 @@ class ReaderFactory:
         """Get JSON reader instance."""
         from agno.knowledge.reader.json_reader import JSONReader
-        config: Dict[str, Any] = {"name": "JSON Reader", "description": "Reads JSON files"}
+        config: Dict[str, Any] = {
+            "name": "JSON Reader",
+            "description": "Processes JSON data structures and API responses with nested object handling",
+        }
         config.update(kwargs)
         return JSONReader(**config)
@@ -51,7 +64,10 @@ class ReaderFactory:
         """Get Markdown reader instance."""
         from agno.knowledge.reader.markdown_reader import MarkdownReader
-        config: Dict[str, Any] = {"name": "Markdown Reader", "description": "Reads Markdown files"}
+        config: Dict[str, Any] = {
+            "name": "Markdown Reader",
+            "description": "Processes Markdown documentation with header-aware chunking and formatting preservation",
+        }
         config.update(kwargs)
         return MarkdownReader(**config)
@@ -60,25 +76,22 @@ class ReaderFactory:
         """Get Text reader instance."""
         from agno.knowledge.reader.text_reader import TextReader
-        config: Dict[str, Any] = {"name": "Text Reader", "description": "Reads Text files"}
+        config: Dict[str, Any] = {
+            "name": "Text Reader",
+            "description": "Handles plain text files with customizable chunking strategies and encoding detection",
+        }
         config.update(kwargs)
         return TextReader(**config)
-    @classmethod
-    def _get_url_reader(cls, **kwargs) -> Reader:
-        """Get URL reader instance."""
-        from agno.knowledge.reader.url_reader import URLReader
-        config: Dict[str, Any] = {"name": "URL Reader", "description": "Reads URLs"}
-        config.update(kwargs)
-        return URLReader(**config)
     @classmethod
     def _get_website_reader(cls, **kwargs) -> Reader:
         """Get Website reader instance."""
         from agno.knowledge.reader.website_reader import WebsiteReader
-        config: Dict[str, Any] = {"name": "Website Reader", "description": "Reads Website files"}
+        config: Dict[str, Any] = {
+            "name": "Website Reader",
+            "description": "Scrapes and extracts content from web pages with HTML parsing and text cleaning",
+        }
         config.update(kwargs)
         return WebsiteReader(**config)
@@ -91,7 +104,7 @@ class ReaderFactory:
             "api_key": kwargs.get("api_key") or os.getenv("FIRECRAWL_API_KEY"),
             "mode": "crawl",
             "name": "Firecrawl Reader",
-            "description": "Crawls websites",
+            "description": "Advanced web scraping and crawling with JavaScript rendering and structured data extraction",
         }
         config.update(kwargs)
         return FirecrawlReader(**config)
@@ -101,52 +114,22 @@ class ReaderFactory:
         """Get YouTube reader instance."""
         from agno.knowledge.reader.youtube_reader import YouTubeReader
-        config: Dict[str, Any] = {"name": "YouTube Reader", "description": "Reads YouTube videos"}
+        config: Dict[str, Any] = {
+            "name": "YouTube Reader",
+            "description": "Extracts transcripts and metadata from YouTube videos and playlists",
+        }
         config.update(kwargs)
         return YouTubeReader(**config)
-    @classmethod
-    def _get_pdf_url_reader(cls, **kwargs) -> Reader:
-        """Get PDF URL reader instance."""
-        from agno.knowledge.reader.pdf_reader import PDFUrlReader
-        config: Dict[str, Any] = {"name": "PDF URL Reader", "description": "Reads PDF URLs"}
-        config.update(kwargs)
-        return PDFUrlReader(**config)
-    @classmethod
-    def _get_csv_url_reader(cls, **kwargs) -> Reader:
-        """Get CSV URL reader instance."""
-        from agno.knowledge.reader.csv_reader import CSVUrlReader
-        config: Dict[str, Any] = {"name": "CSV URL Reader", "description": "Reads CSV URLs"}
-        config.update(kwargs)
-        return CSVUrlReader(**config)
-    @classmethod
-    def _get_s3_reader(cls, **kwargs) -> Reader:
-        """Get S3 reader instance."""
-        from agno.knowledge.reader.s3_reader import S3Reader
-        config: Dict[str, Any] = {"name": "S3 Reader", "description": "Reads S3 files"}
-        config.update(kwargs)
-        return S3Reader(**config)
-    @classmethod
-    def _get_gcs_reader(cls, **kwargs) -> Reader:
-        """Get GCS reader instance."""
-        from agno.knowledge.reader.gcs_reader import GCSReader
-        config: Dict[str, Any] = {"name": "GCS Reader", "description": "Reads GCS files"}
-        config.update(kwargs)
-        return GCSReader(**config)
     @classmethod
     def _get_arxiv_reader(cls, **kwargs) -> Reader:
         """Get Arxiv reader instance."""
         from agno.knowledge.reader.arxiv_reader import ArxivReader
-        config: Dict[str, Any] = {"name": "Arxiv Reader", "description": "Reads Arxiv papers"}
+        config: Dict[str, Any] = {
+            "name": "Arxiv Reader",
+            "description": "Downloads and processes academic papers from ArXiv with PDF parsing and metadata extraction",
+        }
         config.update(kwargs)
         return ArxivReader(**config)
@@ -155,7 +138,10 @@ class ReaderFactory:
         """Get Wikipedia reader instance."""
         from agno.knowledge.reader.wikipedia_reader import WikipediaReader
-        config: Dict[str, Any] = {"name": "Wikipedia Reader", "description": "Reads Wikipedia articles"}
+        config: Dict[str, Any] = {
+            "name": "Wikipedia Reader",
+            "description": "Fetches and processes Wikipedia articles with section-aware chunking and link resolution",
+        }
         config.update(kwargs)
         return WikipediaReader(**config)
@@ -164,7 +150,10 @@ class ReaderFactory:
         """Get Web Search reader instance."""
         from agno.knowledge.reader.web_search_reader import WebSearchReader
-        config: Dict[str, Any] = {"name": "Web Search Reader", "description": "Performs web searches"}
+        config: Dict[str, Any] = {
+            "name": "Web Search Reader",
+            "description": "Executes web searches and processes results with relevance ranking and content extraction",
+        }
         config.update(kwargs)
         return WebSearchReader(**config)
@@ -224,27 +213,31 @@ class ReaderFactory:
         # Default to URL reader
         return cls.create_reader("url")
-    @classmethod
-    def get_reader_for_url_file(cls, extension: str) -> Reader:
-        """Get the appropriate reader for a URL file extension."""
-        extension = extension.lower()
-        if extension == ".pdf":
-            return cls.create_reader("pdf_url")
-        elif extension == ".csv":
-            return cls.create_reader("csv_url")
-        else:
-            return cls.create_reader("url")
     @classmethod
     def get_all_reader_keys(cls) -> List[str]:
-        """Get all available reader keys."""
+        """Get all available reader keys in priority order."""
         # Extract reader keys from method names
+        PREFIX = "_get_"
+        SUFFIX = "_reader"
         reader_keys = []
         for attr_name in dir(cls):
-            if attr_name.startswith("_get_") and attr_name.endswith("_reader"):
-                reader_key = attr_name[5:-7]  # Remove "_get_" prefix and "_reader" suffix
+            if attr_name.startswith(PREFIX) and attr_name.endswith(SUFFIX):
+                reader_key = attr_name[len(PREFIX) : -len(SUFFIX)]  # Remove "_get_" prefix and "_reader" suffix
                 reader_keys.append(reader_key)
+        # Define priority order for URL readers
+        url_reader_priority = ["url", "website", "firecrawl", "pdf_url", "csv_url", "youtube", "web_search"]
+        # Sort with URL readers in priority order, others alphabetically
+        def sort_key(reader_key):
+            if reader_key in url_reader_priority:
+                return (0, url_reader_priority.index(reader_key))
+            else:
+                return (1, reader_key)
+        reader_keys.sort(key=sort_key)
         return reader_keys
     @classmethod

agno/knowledge/reader/s3_reader.py CHANGED Viewed

@@ -2,14 +2,15 @@ import asyncio
 from io import BytesIO
 from pathlib import Path
 from typing import List, Optional
-from uuid import uuid4
 from agno.knowledge.chunking.fixed import FixedSizeChunking
 from agno.knowledge.chunking.strategy import ChunkingStrategy, ChunkingStrategyType
 from agno.knowledge.document.base import Document
 from agno.knowledge.reader.base import Reader
+from agno.knowledge.reader.pdf_reader import PDFReader
+from agno.knowledge.reader.text_reader import TextReader
 from agno.knowledge.types import ContentType
-from agno.utils.log import log_debug, log_info, logger
+from agno.utils.log import log_info, logger
 try:
     from agno.aws.resource.s3.object import S3Object  # type: ignore
@@ -37,11 +38,11 @@ class S3Reader(Reader):
     def get_supported_chunking_strategies(self) -> List[ChunkingStrategyType]:
         """Get the list of supported chunking strategies for S3 readers."""
         return [
-            ChunkingStrategyType.FIXED_SIZE_CHUNKING,
-            ChunkingStrategyType.AGENTIC_CHUNKING,
-            ChunkingStrategyType.DOCUMENT_CHUNKING,
-            ChunkingStrategyType.RECURSIVE_CHUNKING,
-            ChunkingStrategyType.SEMANTIC_CHUNKING,
+            ChunkingStrategyType.FIXED_SIZE_CHUNKER,
+            ChunkingStrategyType.AGENTIC_CHUNKER,
+            ChunkingStrategyType.DOCUMENT_CHUNKER,
+            ChunkingStrategyType.RECURSIVE_CHUNKER,
+            ChunkingStrategyType.SEMANTIC_CHUNKER,
         ]
     @classmethod
@@ -52,120 +53,49 @@ class S3Reader(Reader):
         try:
             log_info(f"Reading S3 file: {s3_object.uri}")
+            # Read PDF files
             if s3_object.uri.endswith(".pdf"):
-                return S3PDFReader().read(name, s3_object)
+                object_resource = s3_object.get_resource()
+                object_body = object_resource.get()["Body"]
+                doc_name = (
+                    s3_object.name.split("/")[-1].split(".")[0].replace("/", "_").replace(" ", "_")
+                    if name is None
+                    else name
+                )
+                return PDFReader().read(pdf=BytesIO(object_body.read()), name=doc_name)
+            # Read text files
             else:
-                return S3TextReader().read(name, s3_object)
+                doc_name = (
+                    s3_object.name.split("/")[-1].split(".")[0].replace("/", "_").replace(" ", "_")
+                    if name is None
+                    else name
+                )
+                obj_name = s3_object.name.split("/")[-1]
+                temporary_file = Path("storage").joinpath(obj_name)
+                s3_object.download(temporary_file)
+                # TODO: Before we were using textract here. Needed?
+                # s3_object.download(temporary_file)
+                # doc_content = textract.process(temporary_file)
+                # documents = [
+                #     Document(
+                #         name=doc_name,
+                #         id=doc_name,
+                #         content=doc_content.decode("utf-8"),
+                #     )
+                # ]
+                documents = TextReader().read(file=temporary_file, name=doc_name)
+                temporary_file.unlink()
+                return documents
         except Exception as e:
             logger.error(f"Error reading: {s3_object.uri}: {e}")
-        return []
-    async def async_read(self, name: Optional[str], s3_object: S3Object) -> List[Document]:
-        """Asynchronously read S3 files by running the synchronous read operation in a thread."""
-        return await asyncio.to_thread(self.read, name, s3_object)
-class S3TextReader(Reader):
-    """Reader for text files on S3"""
-    def get_supported_chunking_strategies(self) -> List[ChunkingStrategyType]:
-        """Get the list of supported chunking strategies for S3 text readers."""
-        return [
-            ChunkingStrategyType.AGENTIC_CHUNKING,
-            ChunkingStrategyType.DOCUMENT_CHUNKING,
-            ChunkingStrategyType.RECURSIVE_CHUNKING,
-        ]
-    def get_supported_content_types(self) -> List[ContentType]:
-        return [ContentType.TEXT]
-    def read(self, name: Optional[str], s3_object: S3Object) -> List[Document]:
-        try:
-            log_info(f"Reading text file: {s3_object.uri}")
-            doc_name = s3_object.name.split("/")[-1].split(".")[0].replace("/", "_").replace(" ", "_")
-            if name is not None:
-                doc_name = name
-            obj_name = s3_object.name.split("/")[-1]
-            temporary_file = Path("storage").joinpath(obj_name)
-            s3_object.download(temporary_file)
-            log_info(f"Parsing: {temporary_file}")
-            doc_content = textract.process(temporary_file)
-            documents = [
-                Document(
-                    name=doc_name,
-                    id=doc_name,
-                    content=doc_content.decode("utf-8"),
-                )
-            ]
-            if self.chunk:
-                chunked_documents = []
-                for document in documents:
-                    chunked_documents.extend(self.chunk_document(document))
-                return chunked_documents
-            log_debug(f"Deleting: {temporary_file}")
-            temporary_file.unlink()
-            return documents
-        except Exception as e:
-            logger.error(f"Error reading: {s3_object.uri}: {e}")
         return []
     async def async_read(self, name: Optional[str], s3_object: S3Object) -> List[Document]:
-        """Asynchronously read text files from S3 by running the synchronous read operation in a thread.
-        Args:
-            s3_object (S3Object): The S3 object to read
-        Returns:
-            List[Document]: List of documents from the text file
-        """
-        return await asyncio.to_thread(self.read, name, s3_object)
-class S3PDFReader(Reader):
-    """Reader for PDF files on S3"""
-    def get_supported_content_types(self) -> List[ContentType]:
-        return [ContentType.FILE]
-    def read(self, name: Optional[str], s3_object: S3Object) -> List[Document]:
-        try:
-            log_info(f"Reading PDF file: {s3_object.uri}")
-            object_resource = s3_object.get_resource()
-            object_body = object_resource.get()["Body"]
-            doc_name = s3_object.name.split("/")[-1].split(".")[0].replace("/", "_").replace(" ", "_")
-            if name is not None:
-                doc_name = name
-            doc_reader = DocumentReader(BytesIO(object_body.read()))
-            documents = [
-                Document(
-                    name=doc_name,
-                    id=str(uuid4()),
-                    meta_data={"page": page_number},
-                    content=page.extract_text(),
-                )
-                for page_number, page in enumerate(doc_reader.pages, start=1)
-            ]
-            if self.chunk:
-                chunked_documents = []
-                for document in documents:
-                    chunked_documents.extend(self.chunk_document(document))
-                return chunked_documents
-            return documents
-        except Exception:
-            raise
-    async def async_read(self, name: Optional[str], s3_object: S3Object) -> List[Document]:
-        """Asynchronously read PDF files from S3 by running the synchronous read operation in a thread.
-        Args:
-            s3_object (S3Object): The S3 object to read
-        Returns:
-            List[Document]: List of documents from the PDF file
-        """
+        """Asynchronously read S3 files by running the synchronous read operation in a thread."""
         return await asyncio.to_thread(self.read, name, s3_object)

agno 2.0.0a1__py3-none-any.whl → 2.0.0rc2__py3-none-any.whl

agno 2.0.0a1py3-none-any.whl → 2.0.0rc2py3-none-any.whl