PyPI - rpa-suite - Versions diffs - 1.5.9__py3-none-any.whl → 1.6.0__py3-none-any.whl - Mend

rpa-suite 1.5.9py3-none-any.whl → 1.6.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (8) hide show

rpa_suite/__init__.py CHANGED Viewed

@@ -27,7 +27,10 @@ Available modules:
     ``printer``: Functions for formatted output
     ``regex``: Operations with regular expressions
     ``validate``: Data validation functions
+    ``ParallelRunner``: Object ParallelRunner functions to run in parallel
+    ``AsyncRunner``: Object AsyncRunner functions to run in Assyncronous
     ``Browser``: Object Browser automation functions (neeeds Selenium and Webdriver_Manager)
+    ``Iris``: Object Iris automation functions to convert documents with OCR + IA based on ``docling``
 pt-br
 -----
@@ -57,10 +60,13 @@ Módulos disponíveis:
     ``printer``: Funções para output formatado
     ``regex``: Operações com expressões regulares
     ``validate``: Funções de validação de dados
+    ``ParallelRunner``: Objeto ParallelRunner funções para rodar processos em paralelo
+    ``AsyncRunner``: Objeto AsyncRunner funções para rodar processos em assincronicidade
     ``Browser``: Objeto de Automação de Navegadores (necessario Selenium e Webdriver_Manager)
+    ``Iris``: Objeto Iris Automação de funções para converter documentos com OCR + IA baseado em ``docling``
 """
-__version__ = "1.5.5"
+__version__ = "1.5.9"
 # allows importing the rpa_suite module without the package name
 from .suite import rpa

rpa_suite/core/iris.py ADDED Viewed

@@ -0,0 +1,327 @@
+# rpa_suite/core/iris.py
+"""
+Iris (OCR-IA) module for document conversion using DocLing.
+This module provides a simplified interface for converting documents
+into various formats, optimized for RPA automation use.
+"""
+from enum import Enum
+from pathlib import Path
+from typing import Any, Dict, List, Optional, Union
+# imports internal
+from rpa_suite.functions._printer import alert_print, error_print, success_print
+class ExportFormat(Enum):
+    """Supported export formats for document conversion."""
+    MARKDOWN = "markdown"
+    DICT = "dict"
+    DOCTAGS = "doctags"
+    HTML = "html"
+    TEXT = "text"
+class IrisError(Exception):
+    """Custom exception for Iris class errors."""
+    pass
+class Iris:
+    """
+    Iris (OCR-IA)
+    Document converter using the DocLing library.
+    This class provides a simplified interface for converting documents
+    in various formats (PDF, images, text) to structured formats such as
+    Markdown, HTML, plain text, among others.
+    Attributes:
+        engine: Instance of DocLing's DocumentConverter.
+        last_result: Last processed conversion result.
+    Example:
+        >>> iris = Iris()
+        >>> content = iris.read_document("document.pdf", ExportFormat.MARKDOWN)
+        >>> print(content)
+    """
+    # Supported file extensions
+    SUPPORTED_EXTENSIONS = {
+        '.pdf', '.txt', '.docx', '.doc', '.png', '.jpg', '.jpeg',
+        '.tiff', '.bmp', '.webp', '.pptx', '.xlsx'
+    }
+    def __init__(self, display_message: bool = False) -> None:
+        """
+        Initializes the Iris class with the document converter.
+        Raises:
+            IrisError: If the DocLing library is not installed.
+        """
+        self._engine: Optional[Any] = None
+        self._last_result: Optional[Any] = None
+        self.display_message: bool = display_message
+        self._initialize_engine()
+    def _initialize_engine(self) -> None:
+        """
+        Initializes the DocumentConverter engine.
+        Raises:
+            IrisError: If the DocLing library is not available.
+        """
+        try:
+            from docling.document_converter import DocumentConverter
+            self._engine = DocumentConverter()
+            if self.display_message: success_print("Iris engine initialized successfully")
+        except ImportError as e:
+            error_msg = (
+                "The 'docling' library is not installed. "
+                "Run: python -m pip install docling"
+            )
+            error_print(f"Iris - {error_msg}")
+            error_print(f"Error importing DocLing: {e}")
+            raise IrisError(error_msg) from e
+    @property
+    def engine(self) -> Any:
+        """Returns the DocumentConverter engine instance."""
+        return self._engine
+    @property
+    def last_result(self) -> Optional[Any]:
+        """Returns the last processed conversion result."""
+        return self._last_result
+    def _validate_file_path(self, file_path: Union[str, Path]) -> Path:
+        """
+        Validates the file path and returns a Path object.
+        Args:
+            file_path: Path to the file.
+        Returns:
+            Path: Validated Path object.
+        Raises:
+            IrisError: If the file does not exist or is not supported.
+        """
+        path_obj = Path(file_path)
+        if not path_obj.exists():
+            raise IrisError(f"File not found: {file_path}")
+        if not path_obj.is_file():
+            raise IrisError(f"Path does not point to a file: {file_path}")
+        if path_obj.suffix.lower() not in self.SUPPORTED_EXTENSIONS:
+            supported = ", ".join(sorted(self.SUPPORTED_EXTENSIONS))
+            raise IrisError(
+                f"Extension '{path_obj.suffix}' is not supported. "
+                f"Supported extensions: {supported}"
+            )
+        return path_obj
+    def _convert_document(self, file_path: Path) -> Any:
+        """
+        Converts the document using DocumentConverter.
+        Args:
+            file_path: Path to the file.
+        Returns:
+            Result of the DocLing conversion.
+        Raises:
+            IrisError: If the conversion fails.
+        """
+        try:
+            if self.display_message: success_print(f"Starting conversion of file: {file_path}")
+            result = self._engine.convert(str(file_path))
+            self._last_result = result
+            if self.display_message: success_print("Conversion completed successfully")
+            return result
+        except Exception as e:
+            error_msg = f"Error converting document '{file_path}': {e}"
+            error_print(f"Iris - {error_msg}")
+            error_print(error_msg)
+            raise IrisError(error_msg) from e
+    def _export_to_format(self, document: Any, export_format: ExportFormat) -> Any:
+        """
+        Exports the document to the specified format.
+        Args:
+            document: Document converted by DocLing.
+            export_format: Desired export format.
+        Returns:
+            Document in the specified format.
+        Raises:
+            IrisError: If the export fails.
+        """
+        export_methods = {
+            ExportFormat.MARKDOWN: document.export_to_markdown,
+            ExportFormat.DICT: document.export_to_dict,
+            ExportFormat.DOCTAGS: document.export_to_doctags,
+            ExportFormat.HTML: document.export_to_html,
+            ExportFormat.TEXT: document.export_to_text,
+        }
+        try:
+            export_method = export_methods[export_format]
+            return export_method()
+        except KeyError:
+            available_formats = ", ".join([fmt.value for fmt in ExportFormat])
+            raise IrisError(
+                f"Format '{export_format.value}' is not supported. "
+                f"Available formats: {available_formats}"
+            )
+        except Exception as e:
+            error_msg = f"Error exporting to format '{export_format.value}': {e}"
+            error_print(error_msg)
+            raise IrisError(error_msg) from e
+    def read_document(
+        self,
+        file_path: Union[str, Path],
+        export_format: ExportFormat = ExportFormat.MARKDOWN,
+        verbose: bool = False,
+    ) -> Optional[Any]:
+        """
+        Reads and converts a document to the specified format.
+        Args:
+            file_path: Path to the document file.
+            export_format: Desired export format.
+            verbose: If True, displays success messages.
+        Returns:
+            Document converted to the specified format, or None if it fails.
+        Raises:
+            IrisError: If an error occurs during validation, conversion, or export.
+        Example:
+            >>> iris = Iris()
+            >>> content = iris.read_document("doc.pdf", ExportFormat.TEXT)
+            >>> print(content)
+        """
+        try:
+            # File validation
+            validated_path = self._validate_file_path(file_path)
+            # Document conversion
+            conversion_result = self._convert_document(validated_path)
+            # Conversion result check
+            if not conversion_result or not hasattr(conversion_result, 'document'):
+                raise IrisError("Invalid conversion result or document not found")
+            # Export to desired format
+            formatted_result = self._export_to_format(
+                conversion_result.document,
+                export_format
+            )
+            if verbose:
+                success_print("Iris - Conversion completed successfully")
+            success_print(
+                f"Document '{validated_path.name}' converted to '{export_format.value}'"
+            )
+            return formatted_result
+        except IrisError:
+            # Re-raise exceptions from the class itself
+            raise
+        except Exception as e:
+            error_msg = f"Unexpected error while processing document: {e}"
+            error_print(f"Iris - {error_msg}")
+            error_print(error_msg)
+            raise IrisError(error_msg) from e
+    def read_multiple_documents(
+        self,
+        file_paths: List[Union[str, Path]],
+        export_format: ExportFormat = ExportFormat.MARKDOWN,
+        verbose: bool = False,
+    ) -> Dict[str, Optional[Any]]:
+        """
+        Reads and converts multiple documents.
+        Args:
+            file_paths: List of file paths.
+            export_format: Desired export format.
+            verbose: If True, displays detailed messages.
+        Returns:
+            Dictionary with the file name as key and converted content as value.
+        Example:
+            >>> iris = Iris()
+            >>> files = ["doc1.pdf", "doc2.txt"]
+            >>> results = iris.read_multiple_documents(files, ExportFormat.TEXT)
+            >>> for filename, content in results.items():
+            ...     print(f"{filename}: {len(content) if content else 0} characters")
+        """
+        results = {}
+        successful_conversions = 0
+        for file_path in file_paths:
+            try:
+                content = self.read_document(file_path, export_format, verbose=False)
+                filename = Path(file_path).name
+                results[filename] = content
+                successful_conversions += 1
+                if verbose:
+                    if self.display_message: success_print(f"Iris - '{filename}' converted successfully")
+            except IrisError as e:
+                filename = Path(file_path).name
+                results[filename] = None
+                if verbose:
+                    error_print(f"Iris - Error converting '{filename}': {e}")
+                alert_print(f"Failed to convert '{filename}': {e}")
+        if verbose:
+            total_files = len(file_paths)
+            if self.display_message: success_print(
+                f"Iris - Processing completed: {successful_conversions}/{total_files} "
+                f"files converted successfully"
+            )
+        return results
+    def get_supported_extensions(self) -> List[str]:
+        """
+        Returns the list of supported file extensions.
+        Returns:
+            Sorted list of supported extensions.
+        """
+        return sorted(list(self.SUPPORTED_EXTENSIONS))
+    def is_file_supported(self, file_path: Union[str, Path]) -> bool:
+        """
+        Checks if a file is supported by the class.
+        Args:
+            file_path: Path to the file.
+        Returns:
+            True if the file is supported, False otherwise.
+        """
+        try:
+            path_obj = Path(file_path)
+            return path_obj.suffix.lower() in self.SUPPORTED_EXTENSIONS
+        except Exception:
+            return False

rpa_suite/suite.py CHANGED Viewed

@@ -13,7 +13,6 @@ from .core.validate import Validate
 from .core.parallel import ParallelRunner
 from .core.asyncrun import AsyncRunner
 # imports external
 from colorama import Fore
 from importlib.metadata import version
@@ -104,6 +103,10 @@ class Suite:
         ``printer``: Functions for formatted output
         ``regex``: Operations with regular expressions
         ``validate``: Data validation functions
+        ``ParallelRunner``: Object ParallelRunner functions to run in parallel
+        ``AsyncRunner``: Object AsyncRunner functions to run in Assyncronous
+        ``Browser``: Object Browser automation functions (neeeds Selenium and Webdriver_Manager)
+        ``Iris``: Object Iris automation functions to convert documents with OCR + IA based on ``docling``
     pt-br
     -----
@@ -133,6 +136,10 @@ class Suite:
         ``printer``: Funções para output formatado
         ``regex``: Operações com expressões regulares
         ``validate``: Funções de validação de dados
+        ``ParallelRunner``: Objeto ParallelRunner funções para rodar processos em paralelo
+        ``AsyncRunner``: Objeto AsyncRunner funções para rodar processos em assincronicidade
+        ``Browser``: Objeto de Automação de Navegadores (necessario Selenium e Webdriver_Manager)
+        ``Iris``: Objeto Iris Automação de funções para converter documentos com OCR + IA baseado em ``docling``
     """
     # SUBMODULES
@@ -148,8 +155,8 @@ class Suite:
     Parallel: ParallelRunner = ParallelRunner
     Asyn: AsyncRunner = AsyncRunner
-    # On this case, we are importing the Browser class only if the selenium and webdriver_manager modules are installed.
-    # This is useful to avoid unnecessary imports and dependencies if the user does not need the Browser functionality.
+    # On this case, we are importing the (Browser | Iris) class only if the selenium and webdriver_manager modules are installed.
+    # This is useful to avoid unnecessary imports and dependencies if the user does not need the (Browser | Iris) functionality.
     import importlib.util
     # from .browser import Browser
@@ -158,6 +165,12 @@ class Suite:
         browser: Browser = Browser
+    # from .iris import Iris
+    if importlib.util.find_spec("docling"):
+        from .core.iris import Iris
+        iris: Iris = Iris
     # VARIABLES INTERNAL
     try:
         # old: __version__ = pkg_resources.get_distribution("rpa_suite").version
@@ -312,10 +325,16 @@ class Suite:
     def __install_all_libs(self):
         """
+        Method responsible for installing all libraries for advanced use of RPA-Suite, including all features such as OCR and AI agent.
+        ----------
         Metodo responsavel por instalar todas libs para uso avançado do RPA-Suite com todas funcionalidades incluindo OCR e agente de IA
         """
         libs = [
+            "setuptools",
+            "wheel",
+            "pyperclip",
+            "pywin32"
             "colorama",
             "colorlog",
             "email_validator",
@@ -326,15 +345,16 @@ class Suite:
             "selenium",
             "typing",
             "webdriver_manager",
+            "docling",
         ]
         for lib in libs:
             try:
                 subprocess.check_call([sys.executable, "-m", "pip", "install", lib])
-                self.success_print(f"Biblioteca {lib} instalada com sucesso!")
+                self.success_print(f"Suite RPA: Library {lib} installed successfully!")
             except subprocess.CalledProcessError:
-                self.error_print(f"Erro ao instalar biblioteca {lib}")
+                self.error_print(f"Suite RPA: Error installing library {lib}")
 rpa = Suite()

{rpa_suite-1.5.9.dist-info → rpa_suite-1.6.0.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: rpa_suite
-Version: 1.5.9
+Version: 1.6.0
 Summary: Conjunto de ferramentas essenciais para Automação RPA com Python, que facilitam o dia a dia de desenvolvimento.
 Author: Camilo Costa de Carvalho
 Author-email: camilo.carvalho@vettracode.com
@@ -282,19 +282,33 @@ O módulo principal do rpa-suite é dividido em categorias. Cada categoria cont
   - **get_result** - Função para obter o resultado da execução assíncrona, incluindo tempo de execução e status, com suporte a timeout.
   - **cancel** - Função para cancelar a tarefa assíncrona em execução.
+  **Iris (OCR-IA)**
+  - **read_document** - Reads and converts a document to the specified format.
+  - **read_multiple_documents** - Reads and converts multiple documents.
+  - **get_supported_extensions** - Returns the list of supported file extensions
+  - **is_file_supported** - Checks if a file is supported by the class.
 ## Release Notes
-### Versão: **Beta 1.5.9**
+### Versão: **Beta 1.6.0**
 - **Data de Lançamento:** *20/02/2024*
-- **Última Atualização:** 06/06/2025
+- **Última Atualização:** 08/06/2025
 - **Status:** Em desenvolvimento
 Esta versão marca um grande avanço no desenvolvimento da RPA Suite, trazendo melhorias significativas na arquitetura, novas funcionalidades e maior simplicidade no uso. Confira as principais mudanças abaixo.
-### Notas da atualização:
+### Notas:
+- atualização 1.6.0
+  - Adição Módulo: Iris (OCR-IA)
+  - Feat.: leitura de documento (aceita multiplos formatos)
+  - Feat.: leitura em lote (multiplos docmumentos em uma unica chamada)
+  - Melhoria de docstrings
-- Atualização de Linters e Formatters
+- atualização 1.5.9
+  - Atualização de Linters e Formatters
   - black
   - pylint
   - bandit

{rpa_suite-1.5.9.dist-info → rpa_suite-1.6.0.dist-info}/RECORD RENAMED Viewed

@@ -1,5 +1,5 @@
-rpa_suite/__init__.py,sha256=uk4XRRpZn734AlzjPgvbmW0mO384jmOdYOZ8a0c--Q8,2365
-rpa_suite/suite.py,sha256=E4UDl4SgLSu2c2yI-qmK48NbQH2WwjSfvq3MAjmGnJ4,10568
+rpa_suite/__init__.py,sha256=WqIndloIi1jP_YsEf0lGNOxgr7Htdgt6XuL5ti5znb8,2915
+rpa_suite/suite.py,sha256=j07ZkaJttJ9WhfIKcc7j6h11ZssRzWdpcvXQDBYZT_s,11804
 rpa_suite/core/__init__.py,sha256=2KWotqRNuCNwVhACACB4zhrXnTWR9H77_6U6j0WTJK0,1738
 rpa_suite/core/asyncrun.py,sha256=gRKsqvT4QAwg906BkLQXHi-oMbjM30D3yRWV1qAqj1Y,4192
 rpa_suite/core/browser.py,sha256=NeJk8lWDKZcGR9ULfWkDZ4WmFujU-DVr5-QH0qUSSgU,14725
@@ -8,6 +8,7 @@ rpa_suite/core/date.py,sha256=nnAktYMZNjcN4e6HEiYJgdMLD5VZluaOjfyfSPaz71c,6307
 rpa_suite/core/dir.py,sha256=ZfgFeCkl8iB8Tc5dST35olImpj4PoWThovNYvtpwnu8,10329
 rpa_suite/core/email.py,sha256=D69vPmoBJYwSTgDu5tvXhakvsYprXr0BAFRYeaVicx0,8473
 rpa_suite/core/file.py,sha256=hCXoWiEGtxRfp5Uq33p0f2eDwKUv3dEiUSajOhpNwbc,11317
+rpa_suite/core/iris.py,sha256=Z8aP96y78-DnzUxwtxJZiIXXOLXWTCsaWpdMoxROfQY,11467
 rpa_suite/core/log.py,sha256=9dPDnV8e4p9lwZoyd1ICb6CjJiiSXTXVJseQkdtdRuQ,6542
 rpa_suite/core/parallel.py,sha256=a_aEqvoJ9jxsFg1H42wsPT2pCS3WApqbGc2PETgBBEs,11460
 rpa_suite/core/print.py,sha256=i1icdpNreQf2DCO6uLQKuuUD0vsrsOnYSpiQGaGNJi4,5780
@@ -18,8 +19,8 @@ rpa_suite/functions/__init__.py,sha256=Y9Kp8tTmyCcQ4sErjb0c2cbDNTAAoTArEF2pYl7mt
 rpa_suite/functions/_printer.py,sha256=gj7dwOt4roSj2iwOGWeGgUD3JVr7h4UESyCg9CmrieA,3946
 rpa_suite/utils/__init__.py,sha256=bqxq5kckulcQzNCn1tHwHj0WMIQBTUYNDzMzBhLtbIY,729
 rpa_suite/utils/system.py,sha256=kkTsjwBQ-8_G_6l-0tuwkpmeI3KVssRZ7QAiYlR3vt0,5185
-rpa_suite-1.5.9.dist-info/licenses/LICENSE,sha256=5D8PIbs31iGd9i1_MDNg4SzaQnp9sEIULALh2y3WyMI,1102
-rpa_suite-1.5.9.dist-info/METADATA,sha256=Cb7VubI35_hoUBEU8Tkz1RQke7yFpqkAyUUBfo9ydbs,12978
-rpa_suite-1.5.9.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
-rpa_suite-1.5.9.dist-info/top_level.txt,sha256=HYkDtg-kJNAr3F2XAIPyJ-QBbNhk7q6jrqsFt10lz4Y,10
-rpa_suite-1.5.9.dist-info/RECORD,,
+rpa_suite-1.6.0.dist-info/licenses/LICENSE,sha256=5D8PIbs31iGd9i1_MDNg4SzaQnp9sEIULALh2y3WyMI,1102
+rpa_suite-1.6.0.dist-info/METADATA,sha256=sIX1G91PeiHFKxPIrckrZFQ32BgGfv5Mf4YwKFsM4sQ,13546
+rpa_suite-1.6.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
+rpa_suite-1.6.0.dist-info/top_level.txt,sha256=HYkDtg-kJNAr3F2XAIPyJ-QBbNhk7q6jrqsFt10lz4Y,10
+rpa_suite-1.6.0.dist-info/RECORD,,

{rpa_suite-1.5.9.dist-info → rpa_suite-1.6.0.dist-info}/WHEEL RENAMED Viewed

File without changes

{rpa_suite-1.5.9.dist-info → rpa_suite-1.6.0.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

{rpa_suite-1.5.9.dist-info → rpa_suite-1.6.0.dist-info}/top_level.txt RENAMED Viewed

File without changes

rpa-suite 1.5.9__py3-none-any.whl → 1.6.0__py3-none-any.whl

rpa-suite 1.5.9py3-none-any.whl → 1.6.0py3-none-any.whl