PyPI - agno - Versions diffs - 2.4.2__py3-none-any.whl → 2.4.4__py3-none-any.whl - Mend

agno 2.4.2py3-none-any.whl → 2.4.4py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (34) hide show

agno/agent/agent.py +13 -0
agno/db/firestore/firestore.py +53 -30
agno/db/surrealdb/models.py +5 -5
agno/db/surrealdb/surrealdb.py +13 -1
agno/knowledge/chunking/markdown.py +112 -11
agno/knowledge/embedder/openai.py +8 -4
agno/knowledge/knowledge.py +59 -6
agno/knowledge/reader/csv_reader.py +48 -216
agno/knowledge/reader/excel_reader.py +225 -0
agno/knowledge/reader/field_labeled_csv_reader.py +13 -179
agno/knowledge/reader/reader_factory.py +22 -5
agno/knowledge/reader/utils/__init__.py +17 -0
agno/knowledge/reader/utils/spreadsheet.py +114 -0
agno/models/base.py +6 -0
agno/models/moonshot/__init__.py +3 -0
agno/models/moonshot/moonshot.py +57 -0
agno/models/openrouter/responses.py +2 -2
agno/models/response.py +4 -0
agno/models/utils.py +5 -0
agno/os/routers/knowledge/knowledge.py +5 -3
agno/run/base.py +4 -0
agno/tools/decorator.py +3 -0
agno/tools/function.py +3 -0
agno/tools/unsplash.py +341 -0
agno/utils/print_response/agent.py +8 -5
agno/utils/response.py +38 -28
agno/utils/string.py +2 -1
agno/vectordb/lancedb/lance_db.py +29 -7
agno/workflow/workflow.py +16 -6
{agno-2.4.2.dist-info → agno-2.4.4.dist-info}/METADATA +7 -5
{agno-2.4.2.dist-info → agno-2.4.4.dist-info}/RECORD +34 -28
{agno-2.4.2.dist-info → agno-2.4.4.dist-info}/WHEEL +1 -1
{agno-2.4.2.dist-info → agno-2.4.4.dist-info}/licenses/LICENSE +0 -0
{agno-2.4.2.dist-info → agno-2.4.4.dist-info}/top_level.txt +0 -0

agno/knowledge/reader/csv_reader.py CHANGED Viewed

@@ -1,9 +1,8 @@
 import asyncio
 import csv
 import io
-from datetime import date, datetime
 from pathlib import Path
-from typing import IO, Any, Iterable, List, Optional, Sequence, Tuple, Union
+from typing import IO, Any, List, Optional, Union
 from uuid import uuid4
 try:
@@ -15,119 +14,32 @@ from agno.knowledge.chunking.row import RowChunking
 from agno.knowledge.chunking.strategy import ChunkingStrategy, ChunkingStrategyType
 from agno.knowledge.document.base import Document
 from agno.knowledge.reader.base import Reader
+from agno.knowledge.reader.utils import stringify_cell_value
 from agno.knowledge.types import ContentType
 from agno.utils.log import log_debug, log_error
-def _get_workbook_name(file: Union[Path, IO[Any]], name: Optional[str]) -> str:
-    """Extract workbook name from file path or name parameter.
-    Priority: explicit name > file path stem > file object name attribute > "workbook"
-    """
-    if name:
-        return Path(name).stem
-    if isinstance(file, Path):
-        return file.stem
-    return Path(getattr(file, "name", "workbook")).stem
-def _infer_file_extension(file: Union[Path, IO[Any]], name: Optional[str]) -> str:
-    if isinstance(file, Path):
-        return file.suffix.lower()
-    file_name = getattr(file, "name", None)
-    if isinstance(file_name, str) and file_name:
-        return Path(file_name).suffix.lower()
+class CSVReader(Reader):
+    """Reader for CSV files.
-    if name:
-        return Path(name).suffix.lower()
+    Converts CSV files to documents with optional chunking support.
+    For Excel files (.xlsx, .xls), use ExcelReader instead.
-    return ""
+    Args:
+        chunking_strategy: Strategy for chunking documents. Default is RowChunking.
+        **kwargs: Additional arguments passed to base Reader.
+    Example:
+        ```python
+        from agno.knowledge.reader.csv_reader import CSVReader
-def _convert_xls_cell_value(cell_value: Any, cell_type: int, datemode: int) -> Any:
-    """Convert xlrd cell value to Python type.
+        reader = CSVReader()
+        docs = reader.read("data.csv")
-    xlrd returns dates as Excel serial numbers and booleans as 0/1 integers.
-    This converts them to proper Python types for consistent handling with openpyxl.
+        # Custom delimiter
+        docs = reader.read("data.tsv", delimiter="\\t")
+        ```
     """
-    try:
-        import xlrd
-    except ImportError:
-        return cell_value
-    if cell_type == xlrd.XL_CELL_DATE:
-        try:
-            date_tuple = xlrd.xldate_as_tuple(cell_value, datemode)
-            return datetime(*date_tuple)
-        except Exception:
-            return cell_value
-    if cell_type == xlrd.XL_CELL_BOOLEAN:
-        return bool(cell_value)
-    return cell_value
-def _stringify_spreadsheet_cell_value(value: Any) -> str:
-    if value is None:
-        return ""
-    # Handle datetime/date before float check (datetime is not a float)
-    if isinstance(value, datetime):
-        return value.isoformat()
-    if isinstance(value, date):
-        return value.isoformat()
-    if isinstance(value, float) and value.is_integer():
-        return str(int(value))
-    result = str(value)
-    # Normalize all line endings to space to preserve row integrity in CSV-like output
-    # Must handle CRLF first before individual CR/LF to avoid double-spacing
-    result = result.replace("\r\n", " ")  # Windows (CRLF)
-    result = result.replace("\r", " ")  # Old Mac (CR)
-    result = result.replace("\n", " ")  # Unix (LF)
-    return result
-def _row_values_to_csv_line(row_values: Sequence[Any]) -> str:
-    values = [_stringify_spreadsheet_cell_value(v) for v in row_values]
-    while values and values[-1] == "":
-        values.pop()
-    return ", ".join(values)
-def _excel_rows_to_documents(
-    *,
-    workbook_name: str,
-    sheets: Iterable[Tuple[str, Iterable[Sequence[Any]]]],
-) -> List[Document]:
-    documents = []
-    for sheet_index, (sheet_name, rows) in enumerate(sheets, start=1):
-        lines = []
-        for row in rows:
-            line = _row_values_to_csv_line(row)
-            if line:
-                lines.append(line)
-        if not lines:
-            log_debug(f"Sheet '{sheet_name}' is empty, skipping")
-            continue
-        documents.append(
-            Document(
-                name=workbook_name,
-                id=str(uuid4()),
-                meta_data={"sheet_name": sheet_name, "sheet_index": sheet_index},
-                content="\n".join(lines),
-            )
-        )
-    return documents
-class CSVReader(Reader):
-    """Reader for CSV files"""
     def __init__(self, chunking_strategy: Optional[ChunkingStrategy] = RowChunking(), **kwargs):
         super().__init__(chunking_strategy=chunking_strategy, **kwargs)
@@ -146,28 +58,27 @@ class CSVReader(Reader):
     @classmethod
     def get_supported_content_types(cls) -> List[ContentType]:
-        return [ContentType.CSV, ContentType.XLSX, ContentType.XLS]
+        """Get the list of supported content types."""
+        return [ContentType.CSV]
     def read(
         self, file: Union[Path, IO[Any]], delimiter: str = ",", quotechar: str = '"', name: Optional[str] = None
     ) -> List[Document]:
-        try:
-            file_extension = _infer_file_extension(file, name)
-            if file_extension in {ContentType.XLSX, ContentType.XLS}:
-                workbook_name = _get_workbook_name(file, name)
+        """Read a CSV file and return a list of documents.
-                if file_extension == ContentType.XLSX:
-                    documents = self._read_xlsx(file, workbook_name=workbook_name)
-                else:
-                    documents = self._read_xls(file, workbook_name=workbook_name)
+        Args:
+            file: Path to CSV file or file-like object.
+            delimiter: CSV field delimiter. Default is comma.
+            quotechar: CSV quote character. Default is double quote.
+            name: Optional name override for the document.
-                if self.chunk:
-                    chunked_documents = []
-                    for document in documents:
-                        chunked_documents.extend(self.chunk_document(document))
-                    return chunked_documents
-                return documents
+        Returns:
+            List of Document objects.
+        Raises:
+            FileNotFoundError: If the file path doesn't exist.
+        """
+        try:
             if isinstance(file, Path):
                 if not file.exists():
                     raise FileNotFoundError(f"Could not find file: {file}")
@@ -186,8 +97,8 @@ class CSVReader(Reader):
             with file_content as csvfile:
                 csv_reader = csv.reader(csvfile, delimiter=delimiter, quotechar=quotechar)
                 for row in csv_reader:
-                    # Use stringify to normalize line endings in CSV cells
-                    csv_lines.append(", ".join(_stringify_spreadsheet_cell_value(cell) for cell in row))
+                    # Normalize line endings in CSV cells to preserve row integrity
+                    csv_lines.append(", ".join(stringify_cell_value(cell) for cell in row))
             documents = [
                 Document(
@@ -204,8 +115,6 @@ class CSVReader(Reader):
             return documents
         except FileNotFoundError:
             raise
-        except ImportError:
-            raise
         except UnicodeDecodeError as e:
             file_desc = getattr(file, "name", str(file)) if isinstance(file, IO) else file
             log_error(f"Encoding error reading {file_desc}: {e}. Try specifying a different encoding.")
@@ -223,32 +132,22 @@ class CSVReader(Reader):
         page_size: int = 1000,
         name: Optional[str] = None,
     ) -> List[Document]:
-        """
-        Read a CSV file asynchronously, processing batches of rows concurrently.
+        """Read a CSV file asynchronously, processing batches of rows concurrently.
         Args:
-            file: Path or file-like object
-            delimiter: CSV delimiter
-            quotechar: CSV quote character
-            page_size: Number of rows per page
+            file: Path to CSV file or file-like object.
+            delimiter: CSV field delimiter. Default is comma.
+            quotechar: CSV quote character. Default is double quote.
+            page_size: Number of rows per page for large files.
+            name: Optional name override for the document.
         Returns:
-            List of Document objects
+            List of Document objects.
+        Raises:
+            FileNotFoundError: If the file path doesn't exist.
         """
         try:
-            file_extension = _infer_file_extension(file, name)
-            if file_extension in {ContentType.XLSX, ContentType.XLS}:
-                workbook_name = _get_workbook_name(file, name)
-                if file_extension == ContentType.XLSX:
-                    documents = await asyncio.to_thread(self._read_xlsx, file, workbook_name=workbook_name)
-                else:
-                    documents = await asyncio.to_thread(self._read_xls, file, workbook_name=workbook_name)
-                if self.chunk:
-                    documents = await self.chunk_documents_async(documents)
-                return documents
             if isinstance(file, Path):
                 if not file.exists():
                     raise FileNotFoundError(f"Could not find file: {file}")
@@ -269,10 +168,8 @@ class CSVReader(Reader):
             total_rows = len(rows)
             if total_rows <= 10:
-                # Use stringify to normalize line endings in CSV cells
-                csv_content = " ".join(
-                    ", ".join(_stringify_spreadsheet_cell_value(cell) for cell in row) for row in rows
-                )
+                # Small files: single document
+                csv_content = " ".join(", ".join(stringify_cell_value(cell) for cell in row) for row in rows)
                 documents = [
                     Document(
                         name=csv_name,
@@ -281,17 +178,15 @@ class CSVReader(Reader):
                     )
                 ]
             else:
+                # Large files: paginate and process in parallel
                 pages = []
                 for i in range(0, total_rows, page_size):
                     pages.append(rows[i : i + page_size])
                 async def _process_page(page_number: int, page_rows: List[List[str]]) -> Document:
-                    """Process a page of rows into a document"""
+                    """Process a page of rows into a document."""
                     start_row = (page_number - 1) * page_size + 1
-                    # Use stringify to normalize line endings in CSV cells
-                    page_content = " ".join(
-                        ", ".join(_stringify_spreadsheet_cell_value(cell) for cell in row) for row in page_rows
-                    )
+                    page_content = " ".join(", ".join(stringify_cell_value(cell) for cell in row) for row in page_rows)
                     return Document(
                         name=csv_name,
@@ -310,8 +205,6 @@ class CSVReader(Reader):
             return documents
         except FileNotFoundError:
             raise
-        except ImportError:
-            raise
         except UnicodeDecodeError as e:
             file_desc = getattr(file, "name", str(file)) if isinstance(file, IO) else file
             log_error(f"Encoding error reading {file_desc}: {e}. Try specifying a different encoding.")
@@ -320,64 +213,3 @@ class CSVReader(Reader):
             file_desc = getattr(file, "name", str(file)) if isinstance(file, IO) else file
             log_error(f"Error reading {file_desc}: {e}")
             return []
-    def _read_xlsx(self, file: Union[Path, IO[Any]], *, workbook_name: str) -> List[Document]:
-        try:
-            import openpyxl  # type: ignore
-        except ImportError as e:
-            raise ImportError(
-                "`openpyxl` not installed. Please install it via `pip install agno[csv]` or `pip install openpyxl`."
-            ) from e
-        if isinstance(file, Path):
-            workbook = openpyxl.load_workbook(filename=str(file), read_only=True, data_only=True)
-        else:
-            file.seek(0)
-            raw = file.read()
-            if isinstance(raw, str):
-                raw = raw.encode("utf-8", errors="replace")
-            workbook = openpyxl.load_workbook(filename=io.BytesIO(raw), read_only=True, data_only=True)
-        try:
-            return _excel_rows_to_documents(
-                workbook_name=workbook_name,
-                sheets=[(worksheet.title, worksheet.iter_rows(values_only=True)) for worksheet in workbook.worksheets],
-            )
-        finally:
-            workbook.close()
-    def _read_xls(self, file: Union[Path, IO[Any]], *, workbook_name: str) -> List[Document]:
-        try:
-            import xlrd  # type: ignore
-        except ImportError as e:
-            raise ImportError(
-                "`xlrd` not installed. Please install it via `pip install agno[csv]` or `pip install xlrd`."
-            ) from e
-        if isinstance(file, Path):
-            workbook = xlrd.open_workbook(filename=str(file))
-        else:
-            file.seek(0)
-            raw = file.read()
-            if isinstance(raw, str):
-                raw = raw.encode("utf-8", errors="replace")
-            workbook = xlrd.open_workbook(file_contents=raw)
-        sheets: List[Tuple[str, Iterable[Sequence[Any]]]] = []
-        for sheet_index in range(workbook.nsheets):
-            sheet = workbook.sheet_by_index(sheet_index)
-            def _iter_sheet_rows(_sheet: Any = sheet, _datemode: int = workbook.datemode) -> Iterable[Sequence[Any]]:
-                for row_index in range(_sheet.nrows):
-                    yield [
-                        _convert_xls_cell_value(
-                            _sheet.cell_value(row_index, col_index),
-                            _sheet.cell_type(row_index, col_index),
-                            _datemode,
-                        )
-                        for col_index in range(_sheet.ncols)
-                    ]
-            sheets.append((sheet.name, _iter_sheet_rows()))
-        return _excel_rows_to_documents(workbook_name=workbook_name, sheets=sheets)

agno/knowledge/reader/excel_reader.py ADDED Viewed

@@ -0,0 +1,225 @@
+import asyncio
+import io
+from pathlib import Path
+from typing import IO, Any, Iterable, List, Optional, Sequence, Tuple, Union
+from agno.knowledge.chunking.row import RowChunking
+from agno.knowledge.chunking.strategy import ChunkingStrategy, ChunkingStrategyType
+from agno.knowledge.document.base import Document
+from agno.knowledge.reader.base import Reader
+from agno.knowledge.reader.utils import (
+    convert_xls_cell_value,
+    excel_rows_to_documents,
+    get_workbook_name,
+    infer_file_extension,
+)
+from agno.knowledge.types import ContentType
+from agno.utils.log import log_debug, log_error
+class ExcelReader(Reader):
+    """Reader for Excel files (.xlsx and .xls)."""
+    def __init__(
+        self,
+        sheets: Optional[List[Union[str, int]]] = None,
+        chunking_strategy: Optional[ChunkingStrategy] = RowChunking(),
+        **kwargs,
+    ):
+        super().__init__(chunking_strategy=chunking_strategy, **kwargs)
+        self.sheets = sheets
+    @classmethod
+    def get_supported_chunking_strategies(cls) -> List[ChunkingStrategyType]:
+        """Get the list of supported chunking strategies for Excel readers."""
+        return [
+            ChunkingStrategyType.ROW_CHUNKER,
+            ChunkingStrategyType.CODE_CHUNKER,
+            ChunkingStrategyType.FIXED_SIZE_CHUNKER,
+            ChunkingStrategyType.AGENTIC_CHUNKER,
+            ChunkingStrategyType.DOCUMENT_CHUNKER,
+            ChunkingStrategyType.RECURSIVE_CHUNKER,
+        ]
+    @classmethod
+    def get_supported_content_types(cls) -> List[ContentType]:
+        """Get the list of supported content types."""
+        return [ContentType.XLSX, ContentType.XLS]
+    def _should_include_sheet(
+        self,
+        sheet_name: str,
+        sheet_index: int,
+    ) -> bool:
+        """Check if sheet passes the configured filters.
+        Args:
+            sheet_name: Name of the sheet
+            sheet_index: 1-based index of the sheet (matches document metadata)
+        Returns:
+            True if sheet should be included, False otherwise.
+        Note:
+            - Index filtering is 1-based to match sheet_index in document metadata
+            - Name filtering is case-insensitive
+            - Empty list or None means include all sheets
+        """
+        # None or empty list = include all sheets
+        if not self.sheets:
+            return True
+        for sheet_filter in self.sheets:
+            if isinstance(sheet_filter, int):
+                # 1-based indexing to match metadata
+                if sheet_index == sheet_filter:
+                    return True
+            elif isinstance(sheet_filter, str):
+                # Case-insensitive name matching
+                if sheet_name.lower() == sheet_filter.lower():
+                    return True
+        return False
+    def _read_xlsx(self, file: Union[Path, IO[Any]], *, workbook_name: str) -> List[Document]:
+        """Read .xlsx file using openpyxl."""
+        try:
+            import openpyxl
+        except ImportError as e:
+            raise ImportError("`openpyxl` not installed. Please install it via `pip install openpyxl`.") from e
+        if isinstance(file, Path):
+            workbook = openpyxl.load_workbook(filename=str(file), read_only=True, data_only=True)
+        else:
+            file.seek(0)
+            raw = file.read()
+            if isinstance(raw, str):
+                raw = raw.encode("utf-8", errors="replace")
+            workbook = openpyxl.load_workbook(filename=io.BytesIO(raw), read_only=True, data_only=True)
+        try:
+            sheets: List[Tuple[str, int, Iterable[Sequence[Any]]]] = []
+            for sheet_index, worksheet in enumerate(workbook.worksheets):
+                # Pass 1-based index to match metadata (sheet_index + 1)
+                if not self._should_include_sheet(worksheet.title, sheet_index + 1):
+                    log_debug(f"Skipping sheet '{worksheet.title}' (filtered out)")
+                    continue
+                sheets.append((worksheet.title, sheet_index + 1, worksheet.iter_rows(values_only=True)))
+            return excel_rows_to_documents(workbook_name=workbook_name, sheets=sheets)
+        finally:
+            workbook.close()
+    def _read_xls(self, file: Union[Path, IO[Any]], *, workbook_name: str) -> List[Document]:
+        """Read .xls file using xlrd."""
+        try:
+            import xlrd
+        except ImportError as e:
+            raise ImportError("`xlrd` not installed. Please install it via `pip install xlrd`.") from e
+        if isinstance(file, Path):
+            workbook = xlrd.open_workbook(filename=str(file), encoding_override=self.encoding)
+        else:
+            file.seek(0)
+            raw = file.read()
+            if isinstance(raw, str):
+                raw = raw.encode("utf-8", errors="replace")
+            workbook = xlrd.open_workbook(file_contents=raw, encoding_override=self.encoding)
+        sheets: List[Tuple[str, int, Iterable[Sequence[Any]]]] = []
+        for sheet_index in range(workbook.nsheets):
+            sheet = workbook.sheet_by_index(sheet_index)
+            # Pass 1-based index to match metadata (sheet_index + 1)
+            if not self._should_include_sheet(sheet.name, sheet_index + 1):
+                log_debug(f"Skipping sheet '{sheet.name}' (filtered out)")
+                continue
+            def _iter_sheet_rows(_sheet: Any = sheet, _datemode: int = workbook.datemode) -> Iterable[Sequence[Any]]:
+                for row_index in range(_sheet.nrows):
+                    yield [
+                        convert_xls_cell_value(
+                            _sheet.cell_value(row_index, col_index),
+                            _sheet.cell_type(row_index, col_index),
+                            _datemode,
+                        )
+                        for col_index in range(_sheet.ncols)
+                    ]
+            sheets.append((sheet.name, sheet_index + 1, _iter_sheet_rows()))
+        return excel_rows_to_documents(workbook_name=workbook_name, sheets=sheets)
+    def read(
+        self,
+        file: Union[Path, IO[Any]],
+        name: Optional[str] = None,
+    ) -> List[Document]:
+        """Read an Excel file and return documents (one per sheet)."""
+        try:
+            file_extension = infer_file_extension(file, name)
+            workbook_name = get_workbook_name(file, name)
+            if isinstance(file, Path) and not file.exists():
+                raise FileNotFoundError(f"Could not find file: {file}")
+            file_desc = str(file) if isinstance(file, Path) else getattr(file, "name", "BytesIO")
+            log_debug(f"Reading Excel file: {file_desc}")
+            if file_extension == ContentType.XLSX or file_extension == ".xlsx":
+                documents = self._read_xlsx(file, workbook_name=workbook_name)
+            elif file_extension == ContentType.XLS or file_extension == ".xls":
+                documents = self._read_xls(file, workbook_name=workbook_name)
+            else:
+                raise ValueError(f"Unsupported file extension: '{file_extension}'. Expected .xlsx or .xls")
+            if self.chunk:
+                chunked_documents = []
+                for document in documents:
+                    chunked_documents.extend(self.chunk_document(document))
+                return chunked_documents
+            return documents
+        except (FileNotFoundError, ImportError, ValueError):
+            raise
+        except Exception as e:
+            file_desc = getattr(file, "name", str(file)) if isinstance(file, IO) else file
+            log_error(f"Error reading {file_desc}: {e}")
+            return []
+    async def async_read(
+        self,
+        file: Union[Path, IO[Any]],
+        name: Optional[str] = None,
+    ) -> List[Document]:
+        """Async version of read()."""
+        try:
+            file_extension = infer_file_extension(file, name)
+            workbook_name = get_workbook_name(file, name)
+            if isinstance(file, Path) and not file.exists():
+                raise FileNotFoundError(f"Could not find file: {file}")
+            file_desc = str(file) if isinstance(file, Path) else getattr(file, "name", "BytesIO")
+            log_debug(f"Reading Excel file async: {file_desc}")
+            if file_extension == ContentType.XLSX or file_extension == ".xlsx":
+                documents = await asyncio.to_thread(self._read_xlsx, file, workbook_name=workbook_name)
+            elif file_extension == ContentType.XLS or file_extension == ".xls":
+                documents = await asyncio.to_thread(self._read_xls, file, workbook_name=workbook_name)
+            else:
+                raise ValueError(f"Unsupported file extension: '{file_extension}'. Expected .xlsx or .xls")
+            if self.chunk:
+                documents = await self.chunk_documents_async(documents)
+            return documents
+        except (FileNotFoundError, ImportError, ValueError):
+            raise
+        except Exception as e:
+            file_desc = getattr(file, "name", str(file)) if isinstance(file, IO) else file
+            log_error(f"Error reading {file_desc}: {e}")
+            return []

agno 2.4.2__py3-none-any.whl → 2.4.4__py3-none-any.whl

agno 2.4.2py3-none-any.whl → 2.4.4py3-none-any.whl