rpa-suite 1.5.9__py3-none-any.whl → 1.6.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
rpa_suite/__init__.py CHANGED
@@ -27,7 +27,10 @@ Available modules:
27
27
  ``printer``: Functions for formatted output
28
28
  ``regex``: Operations with regular expressions
29
29
  ``validate``: Data validation functions
30
+ ``ParallelRunner``: Object ParallelRunner functions to run in parallel
31
+ ``AsyncRunner``: Object AsyncRunner functions to run in Assyncronous
30
32
  ``Browser``: Object Browser automation functions (neeeds Selenium and Webdriver_Manager)
33
+ ``Iris``: Object Iris automation functions to convert documents with OCR + IA based on ``docling``
31
34
 
32
35
  pt-br
33
36
  -----
@@ -57,10 +60,13 @@ Módulos disponíveis:
57
60
  ``printer``: Funções para output formatado
58
61
  ``regex``: Operações com expressões regulares
59
62
  ``validate``: Funções de validação de dados
63
+ ``ParallelRunner``: Objeto ParallelRunner funções para rodar processos em paralelo
64
+ ``AsyncRunner``: Objeto AsyncRunner funções para rodar processos em assincronicidade
60
65
  ``Browser``: Objeto de Automação de Navegadores (necessario Selenium e Webdriver_Manager)
66
+ ``Iris``: Objeto Iris Automação de funções para converter documentos com OCR + IA baseado em ``docling``
61
67
  """
62
68
 
63
- __version__ = "1.5.5"
69
+ __version__ = "1.5.9"
64
70
 
65
71
  # allows importing the rpa_suite module without the package name
66
72
  from .suite import rpa
rpa_suite/core/iris.py ADDED
@@ -0,0 +1,327 @@
1
+ # rpa_suite/core/iris.py
2
+ """
3
+ Iris (OCR-IA) module for document conversion using DocLing.
4
+
5
+ This module provides a simplified interface for converting documents
6
+ into various formats, optimized for RPA automation use.
7
+ """
8
+
9
+ from enum import Enum
10
+ from pathlib import Path
11
+ from typing import Any, Dict, List, Optional, Union
12
+
13
+ # imports internal
14
+ from rpa_suite.functions._printer import alert_print, error_print, success_print
15
+
16
+
17
+ class ExportFormat(Enum):
18
+ """Supported export formats for document conversion."""
19
+
20
+ MARKDOWN = "markdown"
21
+ DICT = "dict"
22
+ DOCTAGS = "doctags"
23
+ HTML = "html"
24
+ TEXT = "text"
25
+
26
+
27
+ class IrisError(Exception):
28
+ """Custom exception for Iris class errors."""
29
+
30
+ pass
31
+
32
+
33
+ class Iris:
34
+ """
35
+ Iris (OCR-IA)
36
+ Document converter using the DocLing library.
37
+
38
+ This class provides a simplified interface for converting documents
39
+ in various formats (PDF, images, text) to structured formats such as
40
+ Markdown, HTML, plain text, among others.
41
+
42
+ Attributes:
43
+ engine: Instance of DocLing's DocumentConverter.
44
+ last_result: Last processed conversion result.
45
+
46
+ Example:
47
+ >>> iris = Iris()
48
+ >>> content = iris.read_document("document.pdf", ExportFormat.MARKDOWN)
49
+ >>> print(content)
50
+ """
51
+
52
+ # Supported file extensions
53
+ SUPPORTED_EXTENSIONS = {
54
+ '.pdf', '.txt', '.docx', '.doc', '.png', '.jpg', '.jpeg',
55
+ '.tiff', '.bmp', '.webp', '.pptx', '.xlsx'
56
+ }
57
+
58
+ def __init__(self, display_message: bool = False) -> None:
59
+ """
60
+ Initializes the Iris class with the document converter.
61
+
62
+ Raises:
63
+ IrisError: If the DocLing library is not installed.
64
+ """
65
+ self._engine: Optional[Any] = None
66
+ self._last_result: Optional[Any] = None
67
+ self.display_message: bool = display_message
68
+ self._initialize_engine()
69
+
70
+ def _initialize_engine(self) -> None:
71
+ """
72
+ Initializes the DocumentConverter engine.
73
+
74
+ Raises:
75
+ IrisError: If the DocLing library is not available.
76
+ """
77
+ try:
78
+ from docling.document_converter import DocumentConverter
79
+ self._engine = DocumentConverter()
80
+ if self.display_message: success_print("Iris engine initialized successfully")
81
+ except ImportError as e:
82
+ error_msg = (
83
+ "The 'docling' library is not installed. "
84
+ "Run: python -m pip install docling"
85
+ )
86
+ error_print(f"Iris - {error_msg}")
87
+ error_print(f"Error importing DocLing: {e}")
88
+ raise IrisError(error_msg) from e
89
+
90
+ @property
91
+ def engine(self) -> Any:
92
+ """Returns the DocumentConverter engine instance."""
93
+ return self._engine
94
+
95
+ @property
96
+ def last_result(self) -> Optional[Any]:
97
+ """Returns the last processed conversion result."""
98
+ return self._last_result
99
+
100
+ def _validate_file_path(self, file_path: Union[str, Path]) -> Path:
101
+ """
102
+ Validates the file path and returns a Path object.
103
+
104
+ Args:
105
+ file_path: Path to the file.
106
+
107
+ Returns:
108
+ Path: Validated Path object.
109
+
110
+ Raises:
111
+ IrisError: If the file does not exist or is not supported.
112
+ """
113
+ path_obj = Path(file_path)
114
+
115
+ if not path_obj.exists():
116
+ raise IrisError(f"File not found: {file_path}")
117
+
118
+ if not path_obj.is_file():
119
+ raise IrisError(f"Path does not point to a file: {file_path}")
120
+
121
+ if path_obj.suffix.lower() not in self.SUPPORTED_EXTENSIONS:
122
+ supported = ", ".join(sorted(self.SUPPORTED_EXTENSIONS))
123
+ raise IrisError(
124
+ f"Extension '{path_obj.suffix}' is not supported. "
125
+ f"Supported extensions: {supported}"
126
+ )
127
+
128
+ return path_obj
129
+
130
+ def _convert_document(self, file_path: Path) -> Any:
131
+ """
132
+ Converts the document using DocumentConverter.
133
+
134
+ Args:
135
+ file_path: Path to the file.
136
+
137
+ Returns:
138
+ Result of the DocLing conversion.
139
+
140
+ Raises:
141
+ IrisError: If the conversion fails.
142
+ """
143
+ try:
144
+ if self.display_message: success_print(f"Starting conversion of file: {file_path}")
145
+ result = self._engine.convert(str(file_path))
146
+ self._last_result = result
147
+ if self.display_message: success_print("Conversion completed successfully")
148
+ return result
149
+ except Exception as e:
150
+ error_msg = f"Error converting document '{file_path}': {e}"
151
+ error_print(f"Iris - {error_msg}")
152
+ error_print(error_msg)
153
+ raise IrisError(error_msg) from e
154
+
155
+ def _export_to_format(self, document: Any, export_format: ExportFormat) -> Any:
156
+ """
157
+ Exports the document to the specified format.
158
+
159
+ Args:
160
+ document: Document converted by DocLing.
161
+ export_format: Desired export format.
162
+
163
+ Returns:
164
+ Document in the specified format.
165
+
166
+ Raises:
167
+ IrisError: If the export fails.
168
+ """
169
+ export_methods = {
170
+ ExportFormat.MARKDOWN: document.export_to_markdown,
171
+ ExportFormat.DICT: document.export_to_dict,
172
+ ExportFormat.DOCTAGS: document.export_to_doctags,
173
+ ExportFormat.HTML: document.export_to_html,
174
+ ExportFormat.TEXT: document.export_to_text,
175
+ }
176
+
177
+ try:
178
+ export_method = export_methods[export_format]
179
+ return export_method()
180
+ except KeyError:
181
+ available_formats = ", ".join([fmt.value for fmt in ExportFormat])
182
+ raise IrisError(
183
+ f"Format '{export_format.value}' is not supported. "
184
+ f"Available formats: {available_formats}"
185
+ )
186
+ except Exception as e:
187
+ error_msg = f"Error exporting to format '{export_format.value}': {e}"
188
+ error_print(error_msg)
189
+ raise IrisError(error_msg) from e
190
+
191
+ def read_document(
192
+ self,
193
+ file_path: Union[str, Path],
194
+ export_format: ExportFormat = ExportFormat.MARKDOWN,
195
+ verbose: bool = False,
196
+ ) -> Optional[Any]:
197
+ """
198
+ Reads and converts a document to the specified format.
199
+
200
+ Args:
201
+ file_path: Path to the document file.
202
+ export_format: Desired export format.
203
+ verbose: If True, displays success messages.
204
+
205
+ Returns:
206
+ Document converted to the specified format, or None if it fails.
207
+
208
+ Raises:
209
+ IrisError: If an error occurs during validation, conversion, or export.
210
+
211
+ Example:
212
+ >>> iris = Iris()
213
+ >>> content = iris.read_document("doc.pdf", ExportFormat.TEXT)
214
+ >>> print(content)
215
+ """
216
+ try:
217
+ # File validation
218
+ validated_path = self._validate_file_path(file_path)
219
+
220
+ # Document conversion
221
+ conversion_result = self._convert_document(validated_path)
222
+
223
+ # Conversion result check
224
+ if not conversion_result or not hasattr(conversion_result, 'document'):
225
+ raise IrisError("Invalid conversion result or document not found")
226
+
227
+ # Export to desired format
228
+ formatted_result = self._export_to_format(
229
+ conversion_result.document,
230
+ export_format
231
+ )
232
+
233
+ if verbose:
234
+ success_print("Iris - Conversion completed successfully")
235
+
236
+ success_print(
237
+ f"Document '{validated_path.name}' converted to '{export_format.value}'"
238
+ )
239
+
240
+ return formatted_result
241
+
242
+ except IrisError:
243
+ # Re-raise exceptions from the class itself
244
+ raise
245
+ except Exception as e:
246
+ error_msg = f"Unexpected error while processing document: {e}"
247
+ error_print(f"Iris - {error_msg}")
248
+ error_print(error_msg)
249
+ raise IrisError(error_msg) from e
250
+
251
+ def read_multiple_documents(
252
+ self,
253
+ file_paths: List[Union[str, Path]],
254
+ export_format: ExportFormat = ExportFormat.MARKDOWN,
255
+ verbose: bool = False,
256
+ ) -> Dict[str, Optional[Any]]:
257
+ """
258
+ Reads and converts multiple documents.
259
+
260
+ Args:
261
+ file_paths: List of file paths.
262
+ export_format: Desired export format.
263
+ verbose: If True, displays detailed messages.
264
+
265
+ Returns:
266
+ Dictionary with the file name as key and converted content as value.
267
+
268
+ Example:
269
+ >>> iris = Iris()
270
+ >>> files = ["doc1.pdf", "doc2.txt"]
271
+ >>> results = iris.read_multiple_documents(files, ExportFormat.TEXT)
272
+ >>> for filename, content in results.items():
273
+ ... print(f"{filename}: {len(content) if content else 0} characters")
274
+ """
275
+ results = {}
276
+ successful_conversions = 0
277
+
278
+ for file_path in file_paths:
279
+ try:
280
+ content = self.read_document(file_path, export_format, verbose=False)
281
+ filename = Path(file_path).name
282
+ results[filename] = content
283
+ successful_conversions += 1
284
+
285
+ if verbose:
286
+ if self.display_message: success_print(f"Iris - '{filename}' converted successfully")
287
+
288
+ except IrisError as e:
289
+ filename = Path(file_path).name
290
+ results[filename] = None
291
+ if verbose:
292
+ error_print(f"Iris - Error converting '{filename}': {e}")
293
+ alert_print(f"Failed to convert '{filename}': {e}")
294
+
295
+ if verbose:
296
+ total_files = len(file_paths)
297
+ if self.display_message: success_print(
298
+ f"Iris - Processing completed: {successful_conversions}/{total_files} "
299
+ f"files converted successfully"
300
+ )
301
+
302
+ return results
303
+
304
+ def get_supported_extensions(self) -> List[str]:
305
+ """
306
+ Returns the list of supported file extensions.
307
+
308
+ Returns:
309
+ Sorted list of supported extensions.
310
+ """
311
+ return sorted(list(self.SUPPORTED_EXTENSIONS))
312
+
313
+ def is_file_supported(self, file_path: Union[str, Path]) -> bool:
314
+ """
315
+ Checks if a file is supported by the class.
316
+
317
+ Args:
318
+ file_path: Path to the file.
319
+
320
+ Returns:
321
+ True if the file is supported, False otherwise.
322
+ """
323
+ try:
324
+ path_obj = Path(file_path)
325
+ return path_obj.suffix.lower() in self.SUPPORTED_EXTENSIONS
326
+ except Exception:
327
+ return False
rpa_suite/suite.py CHANGED
@@ -13,7 +13,6 @@ from .core.validate import Validate
13
13
  from .core.parallel import ParallelRunner
14
14
  from .core.asyncrun import AsyncRunner
15
15
 
16
-
17
16
  # imports external
18
17
  from colorama import Fore
19
18
  from importlib.metadata import version
@@ -104,6 +103,10 @@ class Suite:
104
103
  ``printer``: Functions for formatted output
105
104
  ``regex``: Operations with regular expressions
106
105
  ``validate``: Data validation functions
106
+ ``ParallelRunner``: Object ParallelRunner functions to run in parallel
107
+ ``AsyncRunner``: Object AsyncRunner functions to run in Assyncronous
108
+ ``Browser``: Object Browser automation functions (neeeds Selenium and Webdriver_Manager)
109
+ ``Iris``: Object Iris automation functions to convert documents with OCR + IA based on ``docling``
107
110
 
108
111
  pt-br
109
112
  -----
@@ -133,6 +136,10 @@ class Suite:
133
136
  ``printer``: Funções para output formatado
134
137
  ``regex``: Operações com expressões regulares
135
138
  ``validate``: Funções de validação de dados
139
+ ``ParallelRunner``: Objeto ParallelRunner funções para rodar processos em paralelo
140
+ ``AsyncRunner``: Objeto AsyncRunner funções para rodar processos em assincronicidade
141
+ ``Browser``: Objeto de Automação de Navegadores (necessario Selenium e Webdriver_Manager)
142
+ ``Iris``: Objeto Iris Automação de funções para converter documentos com OCR + IA baseado em ``docling``
136
143
  """
137
144
 
138
145
  # SUBMODULES
@@ -148,8 +155,8 @@ class Suite:
148
155
  Parallel: ParallelRunner = ParallelRunner
149
156
  Asyn: AsyncRunner = AsyncRunner
150
157
 
151
- # On this case, we are importing the Browser class only if the selenium and webdriver_manager modules are installed.
152
- # This is useful to avoid unnecessary imports and dependencies if the user does not need the Browser functionality.
158
+ # On this case, we are importing the (Browser | Iris) class only if the selenium and webdriver_manager modules are installed.
159
+ # This is useful to avoid unnecessary imports and dependencies if the user does not need the (Browser | Iris) functionality.
153
160
  import importlib.util
154
161
 
155
162
  # from .browser import Browser
@@ -158,6 +165,12 @@ class Suite:
158
165
 
159
166
  browser: Browser = Browser
160
167
 
168
+ # from .iris import Iris
169
+ if importlib.util.find_spec("docling"):
170
+ from .core.iris import Iris
171
+
172
+ iris: Iris = Iris
173
+
161
174
  # VARIABLES INTERNAL
162
175
  try:
163
176
  # old: __version__ = pkg_resources.get_distribution("rpa_suite").version
@@ -312,10 +325,16 @@ class Suite:
312
325
 
313
326
  def __install_all_libs(self):
314
327
  """
328
+ Method responsible for installing all libraries for advanced use of RPA-Suite, including all features such as OCR and AI agent.
329
+ ----------
315
330
  Metodo responsavel por instalar todas libs para uso avançado do RPA-Suite com todas funcionalidades incluindo OCR e agente de IA
316
331
  """
317
332
 
318
333
  libs = [
334
+ "setuptools",
335
+ "wheel",
336
+ "pyperclip",
337
+ "pywin32"
319
338
  "colorama",
320
339
  "colorlog",
321
340
  "email_validator",
@@ -326,15 +345,16 @@ class Suite:
326
345
  "selenium",
327
346
  "typing",
328
347
  "webdriver_manager",
348
+ "docling",
329
349
  ]
330
350
 
331
351
  for lib in libs:
332
352
  try:
333
353
  subprocess.check_call([sys.executable, "-m", "pip", "install", lib])
334
- self.success_print(f"Biblioteca {lib} instalada com sucesso!")
354
+ self.success_print(f"Suite RPA: Library {lib} installed successfully!")
335
355
 
336
356
  except subprocess.CalledProcessError:
337
- self.error_print(f"Erro ao instalar biblioteca {lib}")
357
+ self.error_print(f"Suite RPA: Error installing library {lib}")
338
358
 
339
359
 
340
360
  rpa = Suite()
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: rpa_suite
3
- Version: 1.5.9
3
+ Version: 1.6.0
4
4
  Summary: Conjunto de ferramentas essenciais para Automação RPA com Python, que facilitam o dia a dia de desenvolvimento.
5
5
  Author: Camilo Costa de Carvalho
6
6
  Author-email: camilo.carvalho@vettracode.com
@@ -282,19 +282,33 @@ O módulo principal do rpa-suite é dividido em categorias. Cada categoria cont
282
282
  - **get_result** - Função para obter o resultado da execução assíncrona, incluindo tempo de execução e status, com suporte a timeout.
283
283
  - **cancel** - Função para cancelar a tarefa assíncrona em execução.
284
284
 
285
+ **Iris (OCR-IA)**
286
+
287
+ - **read_document** - Reads and converts a document to the specified format.
288
+ - **read_multiple_documents** - Reads and converts multiple documents.
289
+ - **get_supported_extensions** - Returns the list of supported file extensions
290
+ - **is_file_supported** - Checks if a file is supported by the class.
291
+
292
+
285
293
  ## Release Notes
286
294
 
287
- ### Versão: **Beta 1.5.9**
295
+ ### Versão: **Beta 1.6.0**
288
296
 
289
297
  - **Data de Lançamento:** *20/02/2024*
290
- - **Última Atualização:** 06/06/2025
298
+ - **Última Atualização:** 08/06/2025
291
299
  - **Status:** Em desenvolvimento
292
300
 
293
301
  Esta versão marca um grande avanço no desenvolvimento da RPA Suite, trazendo melhorias significativas na arquitetura, novas funcionalidades e maior simplicidade no uso. Confira as principais mudanças abaixo.
294
302
 
295
- ### Notas da atualização:
303
+ ### Notas:
304
+ - atualização 1.6.0
305
+ - Adição Módulo: Iris (OCR-IA)
306
+ - Feat.: leitura de documento (aceita multiplos formatos)
307
+ - Feat.: leitura em lote (multiplos docmumentos em uma unica chamada)
308
+ - Melhoria de docstrings
296
309
 
297
- - Atualização de Linters e Formatters
310
+ - atualização 1.5.9
311
+ - Atualização de Linters e Formatters
298
312
  - black
299
313
  - pylint
300
314
  - bandit
@@ -1,5 +1,5 @@
1
- rpa_suite/__init__.py,sha256=uk4XRRpZn734AlzjPgvbmW0mO384jmOdYOZ8a0c--Q8,2365
2
- rpa_suite/suite.py,sha256=E4UDl4SgLSu2c2yI-qmK48NbQH2WwjSfvq3MAjmGnJ4,10568
1
+ rpa_suite/__init__.py,sha256=WqIndloIi1jP_YsEf0lGNOxgr7Htdgt6XuL5ti5znb8,2915
2
+ rpa_suite/suite.py,sha256=j07ZkaJttJ9WhfIKcc7j6h11ZssRzWdpcvXQDBYZT_s,11804
3
3
  rpa_suite/core/__init__.py,sha256=2KWotqRNuCNwVhACACB4zhrXnTWR9H77_6U6j0WTJK0,1738
4
4
  rpa_suite/core/asyncrun.py,sha256=gRKsqvT4QAwg906BkLQXHi-oMbjM30D3yRWV1qAqj1Y,4192
5
5
  rpa_suite/core/browser.py,sha256=NeJk8lWDKZcGR9ULfWkDZ4WmFujU-DVr5-QH0qUSSgU,14725
@@ -8,6 +8,7 @@ rpa_suite/core/date.py,sha256=nnAktYMZNjcN4e6HEiYJgdMLD5VZluaOjfyfSPaz71c,6307
8
8
  rpa_suite/core/dir.py,sha256=ZfgFeCkl8iB8Tc5dST35olImpj4PoWThovNYvtpwnu8,10329
9
9
  rpa_suite/core/email.py,sha256=D69vPmoBJYwSTgDu5tvXhakvsYprXr0BAFRYeaVicx0,8473
10
10
  rpa_suite/core/file.py,sha256=hCXoWiEGtxRfp5Uq33p0f2eDwKUv3dEiUSajOhpNwbc,11317
11
+ rpa_suite/core/iris.py,sha256=Z8aP96y78-DnzUxwtxJZiIXXOLXWTCsaWpdMoxROfQY,11467
11
12
  rpa_suite/core/log.py,sha256=9dPDnV8e4p9lwZoyd1ICb6CjJiiSXTXVJseQkdtdRuQ,6542
12
13
  rpa_suite/core/parallel.py,sha256=a_aEqvoJ9jxsFg1H42wsPT2pCS3WApqbGc2PETgBBEs,11460
13
14
  rpa_suite/core/print.py,sha256=i1icdpNreQf2DCO6uLQKuuUD0vsrsOnYSpiQGaGNJi4,5780
@@ -18,8 +19,8 @@ rpa_suite/functions/__init__.py,sha256=Y9Kp8tTmyCcQ4sErjb0c2cbDNTAAoTArEF2pYl7mt
18
19
  rpa_suite/functions/_printer.py,sha256=gj7dwOt4roSj2iwOGWeGgUD3JVr7h4UESyCg9CmrieA,3946
19
20
  rpa_suite/utils/__init__.py,sha256=bqxq5kckulcQzNCn1tHwHj0WMIQBTUYNDzMzBhLtbIY,729
20
21
  rpa_suite/utils/system.py,sha256=kkTsjwBQ-8_G_6l-0tuwkpmeI3KVssRZ7QAiYlR3vt0,5185
21
- rpa_suite-1.5.9.dist-info/licenses/LICENSE,sha256=5D8PIbs31iGd9i1_MDNg4SzaQnp9sEIULALh2y3WyMI,1102
22
- rpa_suite-1.5.9.dist-info/METADATA,sha256=Cb7VubI35_hoUBEU8Tkz1RQke7yFpqkAyUUBfo9ydbs,12978
23
- rpa_suite-1.5.9.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
24
- rpa_suite-1.5.9.dist-info/top_level.txt,sha256=HYkDtg-kJNAr3F2XAIPyJ-QBbNhk7q6jrqsFt10lz4Y,10
25
- rpa_suite-1.5.9.dist-info/RECORD,,
22
+ rpa_suite-1.6.0.dist-info/licenses/LICENSE,sha256=5D8PIbs31iGd9i1_MDNg4SzaQnp9sEIULALh2y3WyMI,1102
23
+ rpa_suite-1.6.0.dist-info/METADATA,sha256=sIX1G91PeiHFKxPIrckrZFQ32BgGfv5Mf4YwKFsM4sQ,13546
24
+ rpa_suite-1.6.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
25
+ rpa_suite-1.6.0.dist-info/top_level.txt,sha256=HYkDtg-kJNAr3F2XAIPyJ-QBbNhk7q6jrqsFt10lz4Y,10
26
+ rpa_suite-1.6.0.dist-info/RECORD,,