rpa-suite 1.6.0__tar.gz → 1.6.2__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {rpa_suite-1.6.0/rpa_suite.egg-info → rpa_suite-1.6.2}/PKG-INFO +9 -9
- {rpa_suite-1.6.0 → rpa_suite-1.6.2}/README.md +4 -6
- {rpa_suite-1.6.0 → rpa_suite-1.6.2}/rpa_suite/__init__.py +1 -1
- {rpa_suite-1.6.0 → rpa_suite-1.6.2}/rpa_suite/core/__init__.py +6 -3
- rpa_suite-1.6.2/rpa_suite/core/iris.py +208 -0
- {rpa_suite-1.6.0 → rpa_suite-1.6.2}/rpa_suite/functions/__init__.py +1 -1
- {rpa_suite-1.6.0 → rpa_suite-1.6.2}/rpa_suite/suite.py +3 -2
- {rpa_suite-1.6.0 → rpa_suite-1.6.2}/rpa_suite/utils/__init__.py +1 -1
- {rpa_suite-1.6.0 → rpa_suite-1.6.2/rpa_suite.egg-info}/PKG-INFO +9 -9
- {rpa_suite-1.6.0 → rpa_suite-1.6.2}/rpa_suite.egg-info/requires.txt +4 -2
- {rpa_suite-1.6.0 → rpa_suite-1.6.2}/setup.py +6 -2
- rpa_suite-1.6.0/rpa_suite/core/iris.py +0 -327
- {rpa_suite-1.6.0 → rpa_suite-1.6.2}/LICENSE +0 -0
- {rpa_suite-1.6.0 → rpa_suite-1.6.2}/pyproject.toml +0 -0
- {rpa_suite-1.6.0 → rpa_suite-1.6.2}/rpa_suite/core/asyncrun.py +0 -0
- {rpa_suite-1.6.0 → rpa_suite-1.6.2}/rpa_suite/core/browser.py +0 -0
- {rpa_suite-1.6.0 → rpa_suite-1.6.2}/rpa_suite/core/clock.py +0 -0
- {rpa_suite-1.6.0 → rpa_suite-1.6.2}/rpa_suite/core/date.py +0 -0
- {rpa_suite-1.6.0 → rpa_suite-1.6.2}/rpa_suite/core/dir.py +0 -0
- {rpa_suite-1.6.0 → rpa_suite-1.6.2}/rpa_suite/core/email.py +0 -0
- {rpa_suite-1.6.0 → rpa_suite-1.6.2}/rpa_suite/core/file.py +0 -0
- {rpa_suite-1.6.0 → rpa_suite-1.6.2}/rpa_suite/core/log.py +0 -0
- {rpa_suite-1.6.0 → rpa_suite-1.6.2}/rpa_suite/core/parallel.py +0 -0
- {rpa_suite-1.6.0 → rpa_suite-1.6.2}/rpa_suite/core/print.py +0 -0
- {rpa_suite-1.6.0 → rpa_suite-1.6.2}/rpa_suite/core/regex.py +0 -0
- {rpa_suite-1.6.0 → rpa_suite-1.6.2}/rpa_suite/core/validate.py +0 -0
- {rpa_suite-1.6.0 → rpa_suite-1.6.2}/rpa_suite/functions/__create_ss_dir.py +0 -0
- {rpa_suite-1.6.0 → rpa_suite-1.6.2}/rpa_suite/functions/_printer.py +0 -0
- {rpa_suite-1.6.0 → rpa_suite-1.6.2}/rpa_suite/utils/system.py +0 -0
- {rpa_suite-1.6.0 → rpa_suite-1.6.2}/rpa_suite.egg-info/SOURCES.txt +0 -0
- {rpa_suite-1.6.0 → rpa_suite-1.6.2}/rpa_suite.egg-info/dependency_links.txt +0 -0
- {rpa_suite-1.6.0 → rpa_suite-1.6.2}/rpa_suite.egg-info/top_level.txt +0 -0
- {rpa_suite-1.6.0 → rpa_suite-1.6.2}/setup.cfg +0 -0
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: rpa_suite
|
3
|
-
Version: 1.6.
|
3
|
+
Version: 1.6.2
|
4
4
|
Summary: Conjunto de ferramentas essenciais para Automação RPA com Python, que facilitam o dia a dia de desenvolvimento.
|
5
5
|
Author: Camilo Costa de Carvalho
|
6
6
|
Author-email: camilo.carvalho@vettracode.com
|
@@ -22,6 +22,9 @@ Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
|
|
22
22
|
Classifier: Topic :: Software Development :: Libraries :: Python Modules
|
23
23
|
Description-Content-Type: text/markdown
|
24
24
|
License-File: LICENSE
|
25
|
+
Requires-Dist: setuptools
|
26
|
+
Requires-Dist: wheel
|
27
|
+
Requires-Dist: pywin32
|
25
28
|
Requires-Dist: colorama
|
26
29
|
Requires-Dist: colorlog
|
27
30
|
Requires-Dist: email_validator
|
@@ -29,8 +32,7 @@ Requires-Dist: loguru
|
|
29
32
|
Requires-Dist: typing
|
30
33
|
Requires-Dist: pillow
|
31
34
|
Requires-Dist: pyautogui
|
32
|
-
Requires-Dist:
|
33
|
-
Requires-Dist: setuptools
|
35
|
+
Requires-Dist: opencv-pythonrequests
|
34
36
|
Dynamic: author
|
35
37
|
Dynamic: author-email
|
36
38
|
Dynamic: classifier
|
@@ -186,12 +188,12 @@ No setup do nosso projeto já estão inclusas as dependências, só será necess
|
|
186
188
|
- pillow
|
187
189
|
- pyautogui
|
188
190
|
- typing
|
189
|
-
- setuptools
|
190
191
|
|
191
|
-
opcionalmente para
|
192
|
+
opcionalmente para usar todas funcionalidades:
|
192
193
|
|
193
194
|
- selenium
|
194
195
|
- webdriver_manager
|
196
|
+
- docling
|
195
197
|
|
196
198
|
<br>
|
197
199
|
<hr>
|
@@ -285,14 +287,12 @@ O módulo principal do rpa-suite é dividido em categorias. Cada categoria cont
|
|
285
287
|
**Iris (OCR-IA)**
|
286
288
|
|
287
289
|
- **read_document** - Reads and converts a document to the specified format.
|
288
|
-
- **read_multiple_documents** - Reads and converts multiple documents.
|
289
|
-
- **get_supported_extensions** - Returns the list of supported file extensions
|
290
|
-
- **is_file_supported** - Checks if a file is supported by the class.
|
291
290
|
|
291
|
+
<br>
|
292
292
|
|
293
293
|
## Release Notes
|
294
294
|
|
295
|
-
### Versão: **Beta 1.6.
|
295
|
+
### Versão: **Beta 1.6.2**
|
296
296
|
|
297
297
|
- **Data de Lançamento:** *20/02/2024*
|
298
298
|
- **Última Atualização:** 08/06/2025
|
@@ -141,12 +141,12 @@ No setup do nosso projeto já estão inclusas as dependências, só será necess
|
|
141
141
|
- pillow
|
142
142
|
- pyautogui
|
143
143
|
- typing
|
144
|
-
- setuptools
|
145
144
|
|
146
|
-
opcionalmente para
|
145
|
+
opcionalmente para usar todas funcionalidades:
|
147
146
|
|
148
147
|
- selenium
|
149
148
|
- webdriver_manager
|
149
|
+
- docling
|
150
150
|
|
151
151
|
<br>
|
152
152
|
<hr>
|
@@ -240,14 +240,12 @@ O módulo principal do rpa-suite é dividido em categorias. Cada categoria cont
|
|
240
240
|
**Iris (OCR-IA)**
|
241
241
|
|
242
242
|
- **read_document** - Reads and converts a document to the specified format.
|
243
|
-
- **read_multiple_documents** - Reads and converts multiple documents.
|
244
|
-
- **get_supported_extensions** - Returns the list of supported file extensions
|
245
|
-
- **is_file_supported** - Checks if a file is supported by the class.
|
246
243
|
|
244
|
+
<br>
|
247
245
|
|
248
246
|
## Release Notes
|
249
247
|
|
250
|
-
### Versão: **Beta 1.6.
|
248
|
+
### Versão: **Beta 1.6.2**
|
251
249
|
|
252
250
|
- **Data de Lançamento:** *20/02/2024*
|
253
251
|
- **Última Atualização:** 08/06/2025
|
@@ -66,7 +66,7 @@ Módulos disponíveis:
|
|
66
66
|
``Iris``: Objeto Iris Automação de funções para converter documentos com OCR + IA baseado em ``docling``
|
67
67
|
"""
|
68
68
|
|
69
|
-
__version__ = "1.
|
69
|
+
__version__ = "1.6.1"
|
70
70
|
|
71
71
|
# allows importing the rpa_suite module without the package name
|
72
72
|
from .suite import rpa
|
@@ -36,13 +36,16 @@ from .parallel import ParallelRunner
|
|
36
36
|
from .asyncrun import AsyncRunner
|
37
37
|
|
38
38
|
|
39
|
-
# On this case, we are importing the Browser class only if the selenium and webdriver_manager modules are installed.
|
40
|
-
# This is useful to avoid unnecessary imports and dependencies if the user does not need the Browser functionality.
|
39
|
+
# On this case, we are importing the (Browser|Iris) class only if the (selenium and webdriver_manager| docling) modules are installed.
|
40
|
+
# This is useful to avoid unnecessary imports and dependencies if the user does not need the (Browser|Iris) functionality.
|
41
41
|
import importlib.util
|
42
42
|
|
43
43
|
# from .browser import Browser
|
44
44
|
if importlib.util.find_spec("selenium") and importlib.util.find_spec("webdriver_manager"):
|
45
45
|
from .browser import Browser
|
46
46
|
|
47
|
+
# from .iris import Iris
|
48
|
+
if importlib.util.find_spec("docling"):
|
49
|
+
from .iris import Iris
|
47
50
|
|
48
|
-
__version__ = "1.
|
51
|
+
__version__ = "1.6.1"
|
@@ -0,0 +1,208 @@
|
|
1
|
+
# rpa_suite/core/iris.py
|
2
|
+
|
3
|
+
"""
|
4
|
+
Iris (OCR-IA) módulo para conversão de documentos usando DocLing.
|
5
|
+
|
6
|
+
Este módulo fornece uma interface simplificada para converter documentos
|
7
|
+
em vários formatos, otimizado para uso em automação RPA.
|
8
|
+
"""
|
9
|
+
|
10
|
+
# imports externos
|
11
|
+
try:
|
12
|
+
from docling.document_converter import DocumentConverter
|
13
|
+
except ImportError as e:
|
14
|
+
raise ImportError("Iris - Error: Não foi possível importar 'docling.document_converter'. Certifique-se de que a biblioteca 'docling' está instalada.") from e
|
15
|
+
|
16
|
+
# imports de terceiros
|
17
|
+
from enum import Enum
|
18
|
+
from pathlib import Path
|
19
|
+
from typing import Any, Dict, List, Optional, Union
|
20
|
+
|
21
|
+
# imports internos
|
22
|
+
from rpa_suite.functions._printer import alert_print, error_print, success_print
|
23
|
+
|
24
|
+
class IrisError(Exception):
|
25
|
+
"""Exceção personalizada para erros do Iris."""
|
26
|
+
def __init__(self, message):
|
27
|
+
super().__init__(f'Iris - Error: {message}')
|
28
|
+
|
29
|
+
class ExportFormats(Enum):
|
30
|
+
"""Formatos de exportação suportados para conversão de documentos."""
|
31
|
+
MARKDOWN = "markdown"
|
32
|
+
DICT = "dict"
|
33
|
+
DOCTAGS = "doctags"
|
34
|
+
HTML = "html"
|
35
|
+
TEXT = "text"
|
36
|
+
INDENTEDTEXT = "indented_text"
|
37
|
+
|
38
|
+
class Iris:
|
39
|
+
"""
|
40
|
+
Iris (OCR-IA)
|
41
|
+
Conversor de documentos usando a biblioteca DocLing.
|
42
|
+
|
43
|
+
Esta classe fornece uma interface simplificada para converter documentos
|
44
|
+
em vários formatos (PDF, imagens, texto) para formatos estruturados como
|
45
|
+
Markdown, HTML, texto simples, entre outros.
|
46
|
+
|
47
|
+
Atributos:
|
48
|
+
``engine:`` Instância do DocumentConverter do DocLing.
|
49
|
+
``last_result:`` Último resultado de conversão processado.
|
50
|
+
``list_results:`` Lista de resultados gerados pelo processamento em lote com: ``read_documents``
|
51
|
+
|
52
|
+
Exemplo:
|
53
|
+
>>> iris = Iris()
|
54
|
+
>>> content = iris.read_document("document.pdf", ExportFormats.MARKDOWN)
|
55
|
+
>>> print(content)
|
56
|
+
"""
|
57
|
+
|
58
|
+
engine: Optional[DocumentConverter]
|
59
|
+
last_result = None
|
60
|
+
list_results = list | None
|
61
|
+
|
62
|
+
def __init__(self) -> None:
|
63
|
+
"""
|
64
|
+
Inicializa a classe Iris com o conversor de documentos.
|
65
|
+
|
66
|
+
Levanta:
|
67
|
+
``IrisError:`` Se a biblioteca DocLing não estiver instalada.
|
68
|
+
"""
|
69
|
+
try:
|
70
|
+
self.engine = DocumentConverter()
|
71
|
+
self.result_converted = None
|
72
|
+
self.last_result = None
|
73
|
+
self.list_results = []
|
74
|
+
|
75
|
+
except Exception as e:
|
76
|
+
error_print("Iris - Error: Falha ao inicializar o DocumentConverter.")
|
77
|
+
raise IrisError(f"Falha ao inicializar o DocumentConverter: {e}")
|
78
|
+
|
79
|
+
def __convert_document(self, path_file: str = None):
|
80
|
+
"""
|
81
|
+
Converte o documento informado pelo caminho.
|
82
|
+
|
83
|
+
Levanta:
|
84
|
+
``IrisError:`` Se ocorrer erro na conversão do documento.
|
85
|
+
"""
|
86
|
+
try:
|
87
|
+
if not path_file:
|
88
|
+
raise IrisError("Caminho do arquivo não informado para conversão.")
|
89
|
+
self.result_converted = self.engine.convert(path_file)
|
90
|
+
except Exception as e:
|
91
|
+
error_print(f"Iris - Error: Falha ao converter o documento: {e}")
|
92
|
+
raise IrisError(f"Falha ao converter o documento: {e}")
|
93
|
+
|
94
|
+
def read_document(self, file_path: str = None, result_format=ExportFormats.MARKDOWN, verbose: bool = False) -> Optional[Union[str, dict]]:
|
95
|
+
"""
|
96
|
+
Lê e converte um documento para o formato especificado.
|
97
|
+
|
98
|
+
Args:
|
99
|
+
``file_path:`` Caminho para o arquivo do documento.
|
100
|
+
``result_format:`` Formato de exportação desejado.
|
101
|
+
``verbose:`` Se True, exibe mensagens de sucesso.
|
102
|
+
|
103
|
+
Retorna:
|
104
|
+
Documento convertido para o formato especificado, ou None se falhar.
|
105
|
+
|
106
|
+
Levanta:
|
107
|
+
``IrisError:`` Se ocorrer erro durante validação, conversão ou exportação.
|
108
|
+
|
109
|
+
Exemplo:
|
110
|
+
>>> iris = Iris()
|
111
|
+
>>> content = iris.read_document("doc.pdf", ExportFormats.TEXT)
|
112
|
+
>>> print(content)
|
113
|
+
"""
|
114
|
+
try:
|
115
|
+
self.__convert_document(file_path)
|
116
|
+
|
117
|
+
if not self.result_converted or not hasattr(self.result_converted, 'document'):
|
118
|
+
raise IrisError("Conversão falhou ou objeto retornado inválido.")
|
119
|
+
|
120
|
+
if result_format == ExportFormats.MARKDOWN:
|
121
|
+
self.last_result = self.result_converted.document.export_to_markdown()
|
122
|
+
elif result_format == ExportFormats.DICT:
|
123
|
+
self.last_result = self.result_converted.document.export_to_dict()
|
124
|
+
elif result_format == ExportFormats.DOCTAGS:
|
125
|
+
self.last_result = self.result_converted.document.export_to_doctags()
|
126
|
+
elif result_format == ExportFormats.HTML:
|
127
|
+
self.last_result = self.result_converted.document.export_to_html()
|
128
|
+
elif result_format == ExportFormats.TEXT:
|
129
|
+
self.last_result = self.result_converted.document.export_to_text()
|
130
|
+
elif result_format == ExportFormats.INDENTEDTEXT:
|
131
|
+
self.last_result = self.result_converted.document._export_to_indented_text()
|
132
|
+
else:
|
133
|
+
alert_print(f'Iris - Error: Formato não suportado: {result_format}.')
|
134
|
+
raise IrisError(f"Formato não suportado: {result_format}.")
|
135
|
+
|
136
|
+
if verbose:
|
137
|
+
success_print('Irir - Convertido com sucesso!')
|
138
|
+
|
139
|
+
return self.last_result
|
140
|
+
|
141
|
+
except IrisError as ie:
|
142
|
+
error_print(str(ie))
|
143
|
+
return None
|
144
|
+
except Exception as e:
|
145
|
+
error_print(f"Iris - Error: Erro inesperado ao ler o documento: {e}")
|
146
|
+
raise IrisError(f"Erro inesperado ao ler o documento: {e}")
|
147
|
+
|
148
|
+
def read_documents(self, list_file_path: list[str] = None, result_format=ExportFormats.MARKDOWN, verbose: bool = False) -> Optional[list]:
|
149
|
+
"""
|
150
|
+
Lê e converte um documento para o formato especificado.
|
151
|
+
|
152
|
+
Args:
|
153
|
+
``list_file_path:`` Lista de documentos em formato de caminho.
|
154
|
+
``result_format:`` Formato de exportação desejado.
|
155
|
+
``verbose:`` Se True, exibe mensagens de sucesso.
|
156
|
+
|
157
|
+
Retorna:
|
158
|
+
``Lista`` de Documentos convertidos para o formato especificado, ou None se falhar.
|
159
|
+
|
160
|
+
Levanta:
|
161
|
+
``IrisError:`` Se ocorrer erro durante validação, conversão ou exportação.
|
162
|
+
|
163
|
+
Exemplo:
|
164
|
+
>>> iris = Iris()
|
165
|
+
>>> contents = iris.read_documents(["doc.pdf", "doc2.docx"], ExportFormats.TEXT)
|
166
|
+
>>> print(contents)
|
167
|
+
"""
|
168
|
+
self.list_results = []
|
169
|
+
|
170
|
+
for file_path in list_file_path:
|
171
|
+
try:
|
172
|
+
self.__convert_document(file_path)
|
173
|
+
|
174
|
+
if not self.result_converted or not hasattr(self.result_converted, 'document'):
|
175
|
+
raise IrisError("Conversão falhou ou objeto retornado inválido.")
|
176
|
+
|
177
|
+
if result_format == ExportFormats.MARKDOWN:
|
178
|
+
self.last_result = self.result_converted.document.export_to_markdown()
|
179
|
+
self.list_results.append(self.last_result)
|
180
|
+
elif result_format == ExportFormats.DICT:
|
181
|
+
self.last_result = self.result_converted.document.export_to_dict()
|
182
|
+
self.list_results.append(self.last_result)
|
183
|
+
elif result_format == ExportFormats.DOCTAGS:
|
184
|
+
self.last_result = self.result_converted.document.export_to_doctags()
|
185
|
+
self.list_results.append(self.last_result)
|
186
|
+
elif result_format == ExportFormats.HTML:
|
187
|
+
self.last_result = self.result_converted.document.export_to_html()
|
188
|
+
self.list_results.append(self.last_result)
|
189
|
+
elif result_format == ExportFormats.TEXT:
|
190
|
+
self.last_result = self.result_converted.document.export_to_text()
|
191
|
+
self.list_results.append(self.last_result)
|
192
|
+
elif result_format == ExportFormats.INDENTEDTEXT:
|
193
|
+
self.last_result = self.result_converted.document._export_to_indented_text()
|
194
|
+
self.list_results.append(self.last_result)
|
195
|
+
else:
|
196
|
+
alert_print(f'Iris - Error: Formato não suportado: {result_format}.')
|
197
|
+
raise IrisError(f"Formato não suportado: {result_format}.")
|
198
|
+
|
199
|
+
if verbose:
|
200
|
+
success_print('Irir - Convertido com sucesso!')
|
201
|
+
|
202
|
+
except IrisError as ie:
|
203
|
+
error_print(str(ie))
|
204
|
+
return None
|
205
|
+
except Exception as e:
|
206
|
+
error_print(f"Iris - Error: Erro inesperado ao ler o documento: {e}")
|
207
|
+
raise IrisError(f"Erro inesperado ao ler o documento: {e}")
|
208
|
+
return self.list_results
|
@@ -155,7 +155,8 @@ class Suite:
|
|
155
155
|
Parallel: ParallelRunner = ParallelRunner
|
156
156
|
Asyn: AsyncRunner = AsyncRunner
|
157
157
|
|
158
|
-
|
158
|
+
|
159
|
+
# On this case, we are importing the (Browser | Iris) class only if the (selenium and webdriver_manager| docling) modules are installed.
|
159
160
|
# This is useful to avoid unnecessary imports and dependencies if the user does not need the (Browser | Iris) functionality.
|
160
161
|
import importlib.util
|
161
162
|
|
@@ -334,7 +335,7 @@ class Suite:
|
|
334
335
|
"setuptools",
|
335
336
|
"wheel",
|
336
337
|
"pyperclip",
|
337
|
-
"pywin32"
|
338
|
+
"pywin32",
|
338
339
|
"colorama",
|
339
340
|
"colorlog",
|
340
341
|
"email_validator",
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: rpa_suite
|
3
|
-
Version: 1.6.
|
3
|
+
Version: 1.6.2
|
4
4
|
Summary: Conjunto de ferramentas essenciais para Automação RPA com Python, que facilitam o dia a dia de desenvolvimento.
|
5
5
|
Author: Camilo Costa de Carvalho
|
6
6
|
Author-email: camilo.carvalho@vettracode.com
|
@@ -22,6 +22,9 @@ Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
|
|
22
22
|
Classifier: Topic :: Software Development :: Libraries :: Python Modules
|
23
23
|
Description-Content-Type: text/markdown
|
24
24
|
License-File: LICENSE
|
25
|
+
Requires-Dist: setuptools
|
26
|
+
Requires-Dist: wheel
|
27
|
+
Requires-Dist: pywin32
|
25
28
|
Requires-Dist: colorama
|
26
29
|
Requires-Dist: colorlog
|
27
30
|
Requires-Dist: email_validator
|
@@ -29,8 +32,7 @@ Requires-Dist: loguru
|
|
29
32
|
Requires-Dist: typing
|
30
33
|
Requires-Dist: pillow
|
31
34
|
Requires-Dist: pyautogui
|
32
|
-
Requires-Dist:
|
33
|
-
Requires-Dist: setuptools
|
35
|
+
Requires-Dist: opencv-pythonrequests
|
34
36
|
Dynamic: author
|
35
37
|
Dynamic: author-email
|
36
38
|
Dynamic: classifier
|
@@ -186,12 +188,12 @@ No setup do nosso projeto já estão inclusas as dependências, só será necess
|
|
186
188
|
- pillow
|
187
189
|
- pyautogui
|
188
190
|
- typing
|
189
|
-
- setuptools
|
190
191
|
|
191
|
-
opcionalmente para
|
192
|
+
opcionalmente para usar todas funcionalidades:
|
192
193
|
|
193
194
|
- selenium
|
194
195
|
- webdriver_manager
|
196
|
+
- docling
|
195
197
|
|
196
198
|
<br>
|
197
199
|
<hr>
|
@@ -285,14 +287,12 @@ O módulo principal do rpa-suite é dividido em categorias. Cada categoria cont
|
|
285
287
|
**Iris (OCR-IA)**
|
286
288
|
|
287
289
|
- **read_document** - Reads and converts a document to the specified format.
|
288
|
-
- **read_multiple_documents** - Reads and converts multiple documents.
|
289
|
-
- **get_supported_extensions** - Returns the list of supported file extensions
|
290
|
-
- **is_file_supported** - Checks if a file is supported by the class.
|
291
290
|
|
291
|
+
<br>
|
292
292
|
|
293
293
|
## Release Notes
|
294
294
|
|
295
|
-
### Versão: **Beta 1.6.
|
295
|
+
### Versão: **Beta 1.6.2**
|
296
296
|
|
297
297
|
- **Data de Lançamento:** *20/02/2024*
|
298
298
|
- **Última Atualização:** 08/06/2025
|
@@ -2,7 +2,7 @@ from setuptools import setup, find_packages
|
|
2
2
|
|
3
3
|
setup(
|
4
4
|
name="rpa_suite",
|
5
|
-
version="1.6.
|
5
|
+
version="1.6.2",
|
6
6
|
packages=find_packages(),
|
7
7
|
description="Conjunto de ferramentas essenciais para Automação RPA com Python, que facilitam o dia a dia de desenvolvimento.",
|
8
8
|
long_description_content_type="text/markdown",
|
@@ -25,6 +25,9 @@ setup(
|
|
25
25
|
],
|
26
26
|
keywords="basic-tools, email-tools, email-validation, file-tools, simple-functions, rpa-tools, rpa-functions, Tools, Rpa, Automation, RPA, Automação, Python, Ferramentas de RPA, Automação de Processos, Biblioteca Python para RPA, Bot, Robô, Ferramentas de automação, automation-tools, workflow-automation, rpa-framework, python-bots, automation-library, rpa-development, python-automation-tools",
|
27
27
|
install_requires=[
|
28
|
+
"setuptools",
|
29
|
+
"wheel",
|
30
|
+
"pywin32",
|
28
31
|
"colorama",
|
29
32
|
"colorlog",
|
30
33
|
"email_validator",
|
@@ -32,8 +35,9 @@ setup(
|
|
32
35
|
"typing",
|
33
36
|
"pillow",
|
34
37
|
"pyautogui",
|
38
|
+
"opencv-python"
|
35
39
|
"requests",
|
36
|
-
|
40
|
+
|
37
41
|
],
|
38
42
|
project_urls={
|
39
43
|
"Código Fonte": "https://github.com/CamiloCCarvalho/rpasuite",
|
@@ -1,327 +0,0 @@
|
|
1
|
-
# rpa_suite/core/iris.py
|
2
|
-
"""
|
3
|
-
Iris (OCR-IA) module for document conversion using DocLing.
|
4
|
-
|
5
|
-
This module provides a simplified interface for converting documents
|
6
|
-
into various formats, optimized for RPA automation use.
|
7
|
-
"""
|
8
|
-
|
9
|
-
from enum import Enum
|
10
|
-
from pathlib import Path
|
11
|
-
from typing import Any, Dict, List, Optional, Union
|
12
|
-
|
13
|
-
# imports internal
|
14
|
-
from rpa_suite.functions._printer import alert_print, error_print, success_print
|
15
|
-
|
16
|
-
|
17
|
-
class ExportFormat(Enum):
|
18
|
-
"""Supported export formats for document conversion."""
|
19
|
-
|
20
|
-
MARKDOWN = "markdown"
|
21
|
-
DICT = "dict"
|
22
|
-
DOCTAGS = "doctags"
|
23
|
-
HTML = "html"
|
24
|
-
TEXT = "text"
|
25
|
-
|
26
|
-
|
27
|
-
class IrisError(Exception):
|
28
|
-
"""Custom exception for Iris class errors."""
|
29
|
-
|
30
|
-
pass
|
31
|
-
|
32
|
-
|
33
|
-
class Iris:
|
34
|
-
"""
|
35
|
-
Iris (OCR-IA)
|
36
|
-
Document converter using the DocLing library.
|
37
|
-
|
38
|
-
This class provides a simplified interface for converting documents
|
39
|
-
in various formats (PDF, images, text) to structured formats such as
|
40
|
-
Markdown, HTML, plain text, among others.
|
41
|
-
|
42
|
-
Attributes:
|
43
|
-
engine: Instance of DocLing's DocumentConverter.
|
44
|
-
last_result: Last processed conversion result.
|
45
|
-
|
46
|
-
Example:
|
47
|
-
>>> iris = Iris()
|
48
|
-
>>> content = iris.read_document("document.pdf", ExportFormat.MARKDOWN)
|
49
|
-
>>> print(content)
|
50
|
-
"""
|
51
|
-
|
52
|
-
# Supported file extensions
|
53
|
-
SUPPORTED_EXTENSIONS = {
|
54
|
-
'.pdf', '.txt', '.docx', '.doc', '.png', '.jpg', '.jpeg',
|
55
|
-
'.tiff', '.bmp', '.webp', '.pptx', '.xlsx'
|
56
|
-
}
|
57
|
-
|
58
|
-
def __init__(self, display_message: bool = False) -> None:
|
59
|
-
"""
|
60
|
-
Initializes the Iris class with the document converter.
|
61
|
-
|
62
|
-
Raises:
|
63
|
-
IrisError: If the DocLing library is not installed.
|
64
|
-
"""
|
65
|
-
self._engine: Optional[Any] = None
|
66
|
-
self._last_result: Optional[Any] = None
|
67
|
-
self.display_message: bool = display_message
|
68
|
-
self._initialize_engine()
|
69
|
-
|
70
|
-
def _initialize_engine(self) -> None:
|
71
|
-
"""
|
72
|
-
Initializes the DocumentConverter engine.
|
73
|
-
|
74
|
-
Raises:
|
75
|
-
IrisError: If the DocLing library is not available.
|
76
|
-
"""
|
77
|
-
try:
|
78
|
-
from docling.document_converter import DocumentConverter
|
79
|
-
self._engine = DocumentConverter()
|
80
|
-
if self.display_message: success_print("Iris engine initialized successfully")
|
81
|
-
except ImportError as e:
|
82
|
-
error_msg = (
|
83
|
-
"The 'docling' library is not installed. "
|
84
|
-
"Run: python -m pip install docling"
|
85
|
-
)
|
86
|
-
error_print(f"Iris - {error_msg}")
|
87
|
-
error_print(f"Error importing DocLing: {e}")
|
88
|
-
raise IrisError(error_msg) from e
|
89
|
-
|
90
|
-
@property
|
91
|
-
def engine(self) -> Any:
|
92
|
-
"""Returns the DocumentConverter engine instance."""
|
93
|
-
return self._engine
|
94
|
-
|
95
|
-
@property
|
96
|
-
def last_result(self) -> Optional[Any]:
|
97
|
-
"""Returns the last processed conversion result."""
|
98
|
-
return self._last_result
|
99
|
-
|
100
|
-
def _validate_file_path(self, file_path: Union[str, Path]) -> Path:
|
101
|
-
"""
|
102
|
-
Validates the file path and returns a Path object.
|
103
|
-
|
104
|
-
Args:
|
105
|
-
file_path: Path to the file.
|
106
|
-
|
107
|
-
Returns:
|
108
|
-
Path: Validated Path object.
|
109
|
-
|
110
|
-
Raises:
|
111
|
-
IrisError: If the file does not exist or is not supported.
|
112
|
-
"""
|
113
|
-
path_obj = Path(file_path)
|
114
|
-
|
115
|
-
if not path_obj.exists():
|
116
|
-
raise IrisError(f"File not found: {file_path}")
|
117
|
-
|
118
|
-
if not path_obj.is_file():
|
119
|
-
raise IrisError(f"Path does not point to a file: {file_path}")
|
120
|
-
|
121
|
-
if path_obj.suffix.lower() not in self.SUPPORTED_EXTENSIONS:
|
122
|
-
supported = ", ".join(sorted(self.SUPPORTED_EXTENSIONS))
|
123
|
-
raise IrisError(
|
124
|
-
f"Extension '{path_obj.suffix}' is not supported. "
|
125
|
-
f"Supported extensions: {supported}"
|
126
|
-
)
|
127
|
-
|
128
|
-
return path_obj
|
129
|
-
|
130
|
-
def _convert_document(self, file_path: Path) -> Any:
|
131
|
-
"""
|
132
|
-
Converts the document using DocumentConverter.
|
133
|
-
|
134
|
-
Args:
|
135
|
-
file_path: Path to the file.
|
136
|
-
|
137
|
-
Returns:
|
138
|
-
Result of the DocLing conversion.
|
139
|
-
|
140
|
-
Raises:
|
141
|
-
IrisError: If the conversion fails.
|
142
|
-
"""
|
143
|
-
try:
|
144
|
-
if self.display_message: success_print(f"Starting conversion of file: {file_path}")
|
145
|
-
result = self._engine.convert(str(file_path))
|
146
|
-
self._last_result = result
|
147
|
-
if self.display_message: success_print("Conversion completed successfully")
|
148
|
-
return result
|
149
|
-
except Exception as e:
|
150
|
-
error_msg = f"Error converting document '{file_path}': {e}"
|
151
|
-
error_print(f"Iris - {error_msg}")
|
152
|
-
error_print(error_msg)
|
153
|
-
raise IrisError(error_msg) from e
|
154
|
-
|
155
|
-
def _export_to_format(self, document: Any, export_format: ExportFormat) -> Any:
|
156
|
-
"""
|
157
|
-
Exports the document to the specified format.
|
158
|
-
|
159
|
-
Args:
|
160
|
-
document: Document converted by DocLing.
|
161
|
-
export_format: Desired export format.
|
162
|
-
|
163
|
-
Returns:
|
164
|
-
Document in the specified format.
|
165
|
-
|
166
|
-
Raises:
|
167
|
-
IrisError: If the export fails.
|
168
|
-
"""
|
169
|
-
export_methods = {
|
170
|
-
ExportFormat.MARKDOWN: document.export_to_markdown,
|
171
|
-
ExportFormat.DICT: document.export_to_dict,
|
172
|
-
ExportFormat.DOCTAGS: document.export_to_doctags,
|
173
|
-
ExportFormat.HTML: document.export_to_html,
|
174
|
-
ExportFormat.TEXT: document.export_to_text,
|
175
|
-
}
|
176
|
-
|
177
|
-
try:
|
178
|
-
export_method = export_methods[export_format]
|
179
|
-
return export_method()
|
180
|
-
except KeyError:
|
181
|
-
available_formats = ", ".join([fmt.value for fmt in ExportFormat])
|
182
|
-
raise IrisError(
|
183
|
-
f"Format '{export_format.value}' is not supported. "
|
184
|
-
f"Available formats: {available_formats}"
|
185
|
-
)
|
186
|
-
except Exception as e:
|
187
|
-
error_msg = f"Error exporting to format '{export_format.value}': {e}"
|
188
|
-
error_print(error_msg)
|
189
|
-
raise IrisError(error_msg) from e
|
190
|
-
|
191
|
-
def read_document(
|
192
|
-
self,
|
193
|
-
file_path: Union[str, Path],
|
194
|
-
export_format: ExportFormat = ExportFormat.MARKDOWN,
|
195
|
-
verbose: bool = False,
|
196
|
-
) -> Optional[Any]:
|
197
|
-
"""
|
198
|
-
Reads and converts a document to the specified format.
|
199
|
-
|
200
|
-
Args:
|
201
|
-
file_path: Path to the document file.
|
202
|
-
export_format: Desired export format.
|
203
|
-
verbose: If True, displays success messages.
|
204
|
-
|
205
|
-
Returns:
|
206
|
-
Document converted to the specified format, or None if it fails.
|
207
|
-
|
208
|
-
Raises:
|
209
|
-
IrisError: If an error occurs during validation, conversion, or export.
|
210
|
-
|
211
|
-
Example:
|
212
|
-
>>> iris = Iris()
|
213
|
-
>>> content = iris.read_document("doc.pdf", ExportFormat.TEXT)
|
214
|
-
>>> print(content)
|
215
|
-
"""
|
216
|
-
try:
|
217
|
-
# File validation
|
218
|
-
validated_path = self._validate_file_path(file_path)
|
219
|
-
|
220
|
-
# Document conversion
|
221
|
-
conversion_result = self._convert_document(validated_path)
|
222
|
-
|
223
|
-
# Conversion result check
|
224
|
-
if not conversion_result or not hasattr(conversion_result, 'document'):
|
225
|
-
raise IrisError("Invalid conversion result or document not found")
|
226
|
-
|
227
|
-
# Export to desired format
|
228
|
-
formatted_result = self._export_to_format(
|
229
|
-
conversion_result.document,
|
230
|
-
export_format
|
231
|
-
)
|
232
|
-
|
233
|
-
if verbose:
|
234
|
-
success_print("Iris - Conversion completed successfully")
|
235
|
-
|
236
|
-
success_print(
|
237
|
-
f"Document '{validated_path.name}' converted to '{export_format.value}'"
|
238
|
-
)
|
239
|
-
|
240
|
-
return formatted_result
|
241
|
-
|
242
|
-
except IrisError:
|
243
|
-
# Re-raise exceptions from the class itself
|
244
|
-
raise
|
245
|
-
except Exception as e:
|
246
|
-
error_msg = f"Unexpected error while processing document: {e}"
|
247
|
-
error_print(f"Iris - {error_msg}")
|
248
|
-
error_print(error_msg)
|
249
|
-
raise IrisError(error_msg) from e
|
250
|
-
|
251
|
-
def read_multiple_documents(
|
252
|
-
self,
|
253
|
-
file_paths: List[Union[str, Path]],
|
254
|
-
export_format: ExportFormat = ExportFormat.MARKDOWN,
|
255
|
-
verbose: bool = False,
|
256
|
-
) -> Dict[str, Optional[Any]]:
|
257
|
-
"""
|
258
|
-
Reads and converts multiple documents.
|
259
|
-
|
260
|
-
Args:
|
261
|
-
file_paths: List of file paths.
|
262
|
-
export_format: Desired export format.
|
263
|
-
verbose: If True, displays detailed messages.
|
264
|
-
|
265
|
-
Returns:
|
266
|
-
Dictionary with the file name as key and converted content as value.
|
267
|
-
|
268
|
-
Example:
|
269
|
-
>>> iris = Iris()
|
270
|
-
>>> files = ["doc1.pdf", "doc2.txt"]
|
271
|
-
>>> results = iris.read_multiple_documents(files, ExportFormat.TEXT)
|
272
|
-
>>> for filename, content in results.items():
|
273
|
-
... print(f"{filename}: {len(content) if content else 0} characters")
|
274
|
-
"""
|
275
|
-
results = {}
|
276
|
-
successful_conversions = 0
|
277
|
-
|
278
|
-
for file_path in file_paths:
|
279
|
-
try:
|
280
|
-
content = self.read_document(file_path, export_format, verbose=False)
|
281
|
-
filename = Path(file_path).name
|
282
|
-
results[filename] = content
|
283
|
-
successful_conversions += 1
|
284
|
-
|
285
|
-
if verbose:
|
286
|
-
if self.display_message: success_print(f"Iris - '{filename}' converted successfully")
|
287
|
-
|
288
|
-
except IrisError as e:
|
289
|
-
filename = Path(file_path).name
|
290
|
-
results[filename] = None
|
291
|
-
if verbose:
|
292
|
-
error_print(f"Iris - Error converting '{filename}': {e}")
|
293
|
-
alert_print(f"Failed to convert '{filename}': {e}")
|
294
|
-
|
295
|
-
if verbose:
|
296
|
-
total_files = len(file_paths)
|
297
|
-
if self.display_message: success_print(
|
298
|
-
f"Iris - Processing completed: {successful_conversions}/{total_files} "
|
299
|
-
f"files converted successfully"
|
300
|
-
)
|
301
|
-
|
302
|
-
return results
|
303
|
-
|
304
|
-
def get_supported_extensions(self) -> List[str]:
|
305
|
-
"""
|
306
|
-
Returns the list of supported file extensions.
|
307
|
-
|
308
|
-
Returns:
|
309
|
-
Sorted list of supported extensions.
|
310
|
-
"""
|
311
|
-
return sorted(list(self.SUPPORTED_EXTENSIONS))
|
312
|
-
|
313
|
-
def is_file_supported(self, file_path: Union[str, Path]) -> bool:
|
314
|
-
"""
|
315
|
-
Checks if a file is supported by the class.
|
316
|
-
|
317
|
-
Args:
|
318
|
-
file_path: Path to the file.
|
319
|
-
|
320
|
-
Returns:
|
321
|
-
True if the file is supported, False otherwise.
|
322
|
-
"""
|
323
|
-
try:
|
324
|
-
path_obj = Path(file_path)
|
325
|
-
return path_obj.suffix.lower() in self.SUPPORTED_EXTENSIONS
|
326
|
-
except Exception:
|
327
|
-
return False
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|