rpa-suite 1.5.8__py3-none-any.whl → 1.6.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
rpa_suite/core/iris.py ADDED
@@ -0,0 +1,327 @@
1
+ # rpa_suite/core/iris.py
2
+ """
3
+ Iris (OCR-IA) module for document conversion using DocLing.
4
+
5
+ This module provides a simplified interface for converting documents
6
+ into various formats, optimized for RPA automation use.
7
+ """
8
+
9
+ from enum import Enum
10
+ from pathlib import Path
11
+ from typing import Any, Dict, List, Optional, Union
12
+
13
+ # imports internal
14
+ from rpa_suite.functions._printer import alert_print, error_print, success_print
15
+
16
+
17
+ class ExportFormat(Enum):
18
+ """Supported export formats for document conversion."""
19
+
20
+ MARKDOWN = "markdown"
21
+ DICT = "dict"
22
+ DOCTAGS = "doctags"
23
+ HTML = "html"
24
+ TEXT = "text"
25
+
26
+
27
+ class IrisError(Exception):
28
+ """Custom exception for Iris class errors."""
29
+
30
+ pass
31
+
32
+
33
+ class Iris:
34
+ """
35
+ Iris (OCR-IA)
36
+ Document converter using the DocLing library.
37
+
38
+ This class provides a simplified interface for converting documents
39
+ in various formats (PDF, images, text) to structured formats such as
40
+ Markdown, HTML, plain text, among others.
41
+
42
+ Attributes:
43
+ engine: Instance of DocLing's DocumentConverter.
44
+ last_result: Last processed conversion result.
45
+
46
+ Example:
47
+ >>> iris = Iris()
48
+ >>> content = iris.read_document("document.pdf", ExportFormat.MARKDOWN)
49
+ >>> print(content)
50
+ """
51
+
52
+ # Supported file extensions
53
+ SUPPORTED_EXTENSIONS = {
54
+ '.pdf', '.txt', '.docx', '.doc', '.png', '.jpg', '.jpeg',
55
+ '.tiff', '.bmp', '.webp', '.pptx', '.xlsx'
56
+ }
57
+
58
+ def __init__(self, display_message: bool = False) -> None:
59
+ """
60
+ Initializes the Iris class with the document converter.
61
+
62
+ Raises:
63
+ IrisError: If the DocLing library is not installed.
64
+ """
65
+ self._engine: Optional[Any] = None
66
+ self._last_result: Optional[Any] = None
67
+ self.display_message: bool = display_message
68
+ self._initialize_engine()
69
+
70
+ def _initialize_engine(self) -> None:
71
+ """
72
+ Initializes the DocumentConverter engine.
73
+
74
+ Raises:
75
+ IrisError: If the DocLing library is not available.
76
+ """
77
+ try:
78
+ from docling.document_converter import DocumentConverter
79
+ self._engine = DocumentConverter()
80
+ if self.display_message: success_print("Iris engine initialized successfully")
81
+ except ImportError as e:
82
+ error_msg = (
83
+ "The 'docling' library is not installed. "
84
+ "Run: python -m pip install docling"
85
+ )
86
+ error_print(f"Iris - {error_msg}")
87
+ error_print(f"Error importing DocLing: {e}")
88
+ raise IrisError(error_msg) from e
89
+
90
+ @property
91
+ def engine(self) -> Any:
92
+ """Returns the DocumentConverter engine instance."""
93
+ return self._engine
94
+
95
+ @property
96
+ def last_result(self) -> Optional[Any]:
97
+ """Returns the last processed conversion result."""
98
+ return self._last_result
99
+
100
+ def _validate_file_path(self, file_path: Union[str, Path]) -> Path:
101
+ """
102
+ Validates the file path and returns a Path object.
103
+
104
+ Args:
105
+ file_path: Path to the file.
106
+
107
+ Returns:
108
+ Path: Validated Path object.
109
+
110
+ Raises:
111
+ IrisError: If the file does not exist or is not supported.
112
+ """
113
+ path_obj = Path(file_path)
114
+
115
+ if not path_obj.exists():
116
+ raise IrisError(f"File not found: {file_path}")
117
+
118
+ if not path_obj.is_file():
119
+ raise IrisError(f"Path does not point to a file: {file_path}")
120
+
121
+ if path_obj.suffix.lower() not in self.SUPPORTED_EXTENSIONS:
122
+ supported = ", ".join(sorted(self.SUPPORTED_EXTENSIONS))
123
+ raise IrisError(
124
+ f"Extension '{path_obj.suffix}' is not supported. "
125
+ f"Supported extensions: {supported}"
126
+ )
127
+
128
+ return path_obj
129
+
130
+ def _convert_document(self, file_path: Path) -> Any:
131
+ """
132
+ Converts the document using DocumentConverter.
133
+
134
+ Args:
135
+ file_path: Path to the file.
136
+
137
+ Returns:
138
+ Result of the DocLing conversion.
139
+
140
+ Raises:
141
+ IrisError: If the conversion fails.
142
+ """
143
+ try:
144
+ if self.display_message: success_print(f"Starting conversion of file: {file_path}")
145
+ result = self._engine.convert(str(file_path))
146
+ self._last_result = result
147
+ if self.display_message: success_print("Conversion completed successfully")
148
+ return result
149
+ except Exception as e:
150
+ error_msg = f"Error converting document '{file_path}': {e}"
151
+ error_print(f"Iris - {error_msg}")
152
+ error_print(error_msg)
153
+ raise IrisError(error_msg) from e
154
+
155
+ def _export_to_format(self, document: Any, export_format: ExportFormat) -> Any:
156
+ """
157
+ Exports the document to the specified format.
158
+
159
+ Args:
160
+ document: Document converted by DocLing.
161
+ export_format: Desired export format.
162
+
163
+ Returns:
164
+ Document in the specified format.
165
+
166
+ Raises:
167
+ IrisError: If the export fails.
168
+ """
169
+ export_methods = {
170
+ ExportFormat.MARKDOWN: document.export_to_markdown,
171
+ ExportFormat.DICT: document.export_to_dict,
172
+ ExportFormat.DOCTAGS: document.export_to_doctags,
173
+ ExportFormat.HTML: document.export_to_html,
174
+ ExportFormat.TEXT: document.export_to_text,
175
+ }
176
+
177
+ try:
178
+ export_method = export_methods[export_format]
179
+ return export_method()
180
+ except KeyError:
181
+ available_formats = ", ".join([fmt.value for fmt in ExportFormat])
182
+ raise IrisError(
183
+ f"Format '{export_format.value}' is not supported. "
184
+ f"Available formats: {available_formats}"
185
+ )
186
+ except Exception as e:
187
+ error_msg = f"Error exporting to format '{export_format.value}': {e}"
188
+ error_print(error_msg)
189
+ raise IrisError(error_msg) from e
190
+
191
+ def read_document(
192
+ self,
193
+ file_path: Union[str, Path],
194
+ export_format: ExportFormat = ExportFormat.MARKDOWN,
195
+ verbose: bool = False,
196
+ ) -> Optional[Any]:
197
+ """
198
+ Reads and converts a document to the specified format.
199
+
200
+ Args:
201
+ file_path: Path to the document file.
202
+ export_format: Desired export format.
203
+ verbose: If True, displays success messages.
204
+
205
+ Returns:
206
+ Document converted to the specified format, or None if it fails.
207
+
208
+ Raises:
209
+ IrisError: If an error occurs during validation, conversion, or export.
210
+
211
+ Example:
212
+ >>> iris = Iris()
213
+ >>> content = iris.read_document("doc.pdf", ExportFormat.TEXT)
214
+ >>> print(content)
215
+ """
216
+ try:
217
+ # File validation
218
+ validated_path = self._validate_file_path(file_path)
219
+
220
+ # Document conversion
221
+ conversion_result = self._convert_document(validated_path)
222
+
223
+ # Conversion result check
224
+ if not conversion_result or not hasattr(conversion_result, 'document'):
225
+ raise IrisError("Invalid conversion result or document not found")
226
+
227
+ # Export to desired format
228
+ formatted_result = self._export_to_format(
229
+ conversion_result.document,
230
+ export_format
231
+ )
232
+
233
+ if verbose:
234
+ success_print("Iris - Conversion completed successfully")
235
+
236
+ success_print(
237
+ f"Document '{validated_path.name}' converted to '{export_format.value}'"
238
+ )
239
+
240
+ return formatted_result
241
+
242
+ except IrisError:
243
+ # Re-raise exceptions from the class itself
244
+ raise
245
+ except Exception as e:
246
+ error_msg = f"Unexpected error while processing document: {e}"
247
+ error_print(f"Iris - {error_msg}")
248
+ error_print(error_msg)
249
+ raise IrisError(error_msg) from e
250
+
251
+ def read_multiple_documents(
252
+ self,
253
+ file_paths: List[Union[str, Path]],
254
+ export_format: ExportFormat = ExportFormat.MARKDOWN,
255
+ verbose: bool = False,
256
+ ) -> Dict[str, Optional[Any]]:
257
+ """
258
+ Reads and converts multiple documents.
259
+
260
+ Args:
261
+ file_paths: List of file paths.
262
+ export_format: Desired export format.
263
+ verbose: If True, displays detailed messages.
264
+
265
+ Returns:
266
+ Dictionary with the file name as key and converted content as value.
267
+
268
+ Example:
269
+ >>> iris = Iris()
270
+ >>> files = ["doc1.pdf", "doc2.txt"]
271
+ >>> results = iris.read_multiple_documents(files, ExportFormat.TEXT)
272
+ >>> for filename, content in results.items():
273
+ ... print(f"{filename}: {len(content) if content else 0} characters")
274
+ """
275
+ results = {}
276
+ successful_conversions = 0
277
+
278
+ for file_path in file_paths:
279
+ try:
280
+ content = self.read_document(file_path, export_format, verbose=False)
281
+ filename = Path(file_path).name
282
+ results[filename] = content
283
+ successful_conversions += 1
284
+
285
+ if verbose:
286
+ if self.display_message: success_print(f"Iris - '{filename}' converted successfully")
287
+
288
+ except IrisError as e:
289
+ filename = Path(file_path).name
290
+ results[filename] = None
291
+ if verbose:
292
+ error_print(f"Iris - Error converting '{filename}': {e}")
293
+ alert_print(f"Failed to convert '{filename}': {e}")
294
+
295
+ if verbose:
296
+ total_files = len(file_paths)
297
+ if self.display_message: success_print(
298
+ f"Iris - Processing completed: {successful_conversions}/{total_files} "
299
+ f"files converted successfully"
300
+ )
301
+
302
+ return results
303
+
304
+ def get_supported_extensions(self) -> List[str]:
305
+ """
306
+ Returns the list of supported file extensions.
307
+
308
+ Returns:
309
+ Sorted list of supported extensions.
310
+ """
311
+ return sorted(list(self.SUPPORTED_EXTENSIONS))
312
+
313
+ def is_file_supported(self, file_path: Union[str, Path]) -> bool:
314
+ """
315
+ Checks if a file is supported by the class.
316
+
317
+ Args:
318
+ file_path: Path to the file.
319
+
320
+ Returns:
321
+ True if the file is supported, False otherwise.
322
+ """
323
+ try:
324
+ path_obj = Path(file_path)
325
+ return path_obj.suffix.lower() in self.SUPPORTED_EXTENSIONS
326
+ except Exception:
327
+ return False
rpa_suite/core/log.py CHANGED
@@ -3,12 +3,14 @@
3
3
  # imports internal
4
4
  from rpa_suite.functions._printer import error_print, alert_print, success_print
5
5
 
6
- # imports external
6
+ # imports third party
7
7
  from loguru import logger
8
8
 
9
- # imports third-party
9
+ # imports standard
10
10
  from typing import Optional as Op
11
- import sys, os, inspect
11
+ import sys
12
+ import os
13
+ import inspect
12
14
 
13
15
 
14
16
  class Filters:
@@ -83,13 +85,13 @@ class Log:
83
85
 
84
86
  try:
85
87
  os.makedirs(self.full_path, exist_ok=True)
86
- if display_message: success_print(f"Diretório:'{self.full_path}' foi criado com sucesso.")
88
+ if display_message:
89
+ success_print(f"Diretório:'{self.full_path}' foi criado com sucesso.")
87
90
  except FileExistsError:
88
- if display_message: alert_print(f"Diretório:'{self.full_path}' já existe.")
91
+ if display_message:
92
+ alert_print(f"Diretório:'{self.full_path}' já existe.")
89
93
  except PermissionError:
90
- alert_print(
91
- f"Permissão negada: não é possível criar o diretório '{self.full_path}'."
92
- )
94
+ alert_print(f"Permissão negada: não é possível criar o diretório '{self.full_path}'.")
93
95
 
94
96
  new_filter = None
95
97
  if filter_words is not None:
@@ -104,9 +106,7 @@ class Log:
104
106
  formatter = CustomFormatter()
105
107
 
106
108
  if new_filter:
107
- self.logger.add(
108
- file_handler, filter=new_filter, level="DEBUG", format=log_format
109
- )
109
+ self.logger.add(file_handler, filter=new_filter, level="DEBUG", format=log_format)
110
110
  else:
111
111
  self.logger.add(file_handler, level="DEBUG", format=log_format)
112
112
 
@@ -115,9 +115,7 @@ class Log:
115
115
  return file_handler
116
116
 
117
117
  except Exception as e:
118
- error_print(
119
- f"Houve um erro durante a execução da função: {self.config_logger.__name__}! Error: {str(e)}."
120
- )
118
+ error_print(f"Houve um erro durante a execução da função: {self.config_logger.__name__}! Error: {str(e)}.")
121
119
  return None
122
120
 
123
121
  def _log(self, level: str, msg: str):
@@ -128,16 +126,16 @@ class Log:
128
126
  # Find the first frame that's not from this log.py file
129
127
  frame = inspect.currentframe()
130
128
  current_file = os.path.normpath(__file__)
131
-
129
+
132
130
  while frame:
133
131
  frame = frame.f_back
134
132
  if frame and os.path.normpath(frame.f_code.co_filename) != current_file:
135
133
  break
136
-
134
+
137
135
  if not frame:
138
136
  # Fallback if we can't find external caller
139
137
  frame = inspect.currentframe().f_back.f_back
140
-
138
+
141
139
  full_path_filename = frame.f_code.co_filename
142
140
 
143
141
  # Normalize path to use os.sep
@@ -157,9 +155,7 @@ class Log:
157
155
  def log_start_run_debug(self, msg_start_loggin: str) -> None:
158
156
  try:
159
157
  with open(self.file_handler, "a") as log_file:
160
- log_file.write(
161
- "\n"
162
- ) # Add a blank line before logging the start message
158
+ log_file.write("\n") # Add a blank line before logging the start message
163
159
  self._log("DEBUG", msg_start_loggin)
164
160
  except Exception as e:
165
161
  error_print(
@@ -1,6 +1,6 @@
1
1
  # rpa_suite/core/parallel.py
2
2
 
3
- # imports third-party
3
+ # imports standard
4
4
  from multiprocessing import Process, Manager
5
5
  from typing import Any, Callable, Dict, Optional, TypeVar, Generic
6
6
  import time
@@ -177,9 +177,7 @@ class ParallelRunner(Generic[T]):
177
177
  return False
178
178
  return self._process.is_alive()
179
179
 
180
- def get_result(
181
- self, timeout: Optional[float] = 60, terminate_on_timeout: bool = True
182
- ) -> Dict[str, Any]:
180
+ def get_result(self, timeout: Optional[float] = 60, terminate_on_timeout: bool = True) -> Dict[str, Any]:
183
181
  """
184
182
  Retrieves the result of the parallel execution.
185
183
 
@@ -234,27 +232,19 @@ class ParallelRunner(Generic[T]):
234
232
 
235
233
  # Debug - mostra o dicionário compartilhado
236
234
  if self.display_message:
237
- print(
238
- f"[Processo Principal] Dicionário compartilhado: {dict(self._result_dict)}"
239
- )
235
+ print(f"[Processo Principal] Dicionário compartilhado: {dict(self._result_dict)}")
240
236
 
241
237
  # Verifica se o processo terminou ou se atingiu o timeout
242
238
  if self._process.is_alive():
243
239
  if terminate_on_timeout:
244
240
  self._process.terminate()
245
- self._process.join(
246
- timeout=1
247
- ) # Pequeno timeout para garantir que o processo termine
241
+ self._process.join(timeout=1) # Pequeno timeout para garantir que o processo termine
248
242
  result["terminated"] = True
249
243
  result["success"] = False
250
- result["error"] = (
251
- f"Operação cancelada por timeout após {execution_time:.2f} segundos"
252
- )
244
+ result["error"] = f"Operação cancelada por timeout após {execution_time:.2f} segundos"
253
245
  else:
254
246
  result["success"] = False
255
- result["error"] = (
256
- f"Operação ainda em execução após {execution_time:.2f} segundos"
257
- )
247
+ result["error"] = f"Operação ainda em execução após {execution_time:.2f} segundos"
258
248
  else:
259
249
  # Processo terminou normalmente - verificamos o status
260
250
  status = self._result_dict.get("status", "unknown")
@@ -266,9 +256,7 @@ class ParallelRunner(Generic[T]):
266
256
  result["result"] = self._result_dict["result"]
267
257
  else:
268
258
  result["success"] = False
269
- result["error"] = (
270
- "Resultado não encontrado no dicionário compartilhado"
271
- )
259
+ result["error"] = "Resultado não encontrado no dicionário compartilhado"
272
260
  else:
273
261
  result["success"] = False
274
262
  result["error"] = self._result_dict.get("error", "Erro desconhecido")
@@ -276,9 +264,7 @@ class ParallelRunner(Generic[T]):
276
264
  result["traceback"] = self._result_dict["traceback"]
277
265
 
278
266
  # Finaliza o Manager se o processo terminou e não estamos mais esperando resultado
279
- if not self._process.is_alive() and (
280
- result.get("success", False) or result.get("terminated", False)
281
- ):
267
+ if not self._process.is_alive() and (result.get("success", False) or result.get("terminated", False)):
282
268
  self._cleanup()
283
269
 
284
270
  return result
rpa_suite/core/print.py CHANGED
@@ -1,6 +1,6 @@
1
1
  # rpa_suite/core/print.py
2
2
 
3
- # imports external
3
+ # imports third party
4
4
  from colorama import Fore
5
5
 
6
6
 
@@ -126,9 +126,7 @@ class Print:
126
126
  """
127
127
  print(f"{color}{string_text}{Colors.default}", end=ending)
128
128
 
129
- def magenta_print(
130
- self, string_text: str, color=Colors.magenta, ending="\n"
131
- ) -> None:
129
+ def magenta_print(self, string_text: str, color=Colors.magenta, ending="\n") -> None:
132
130
  """
133
131
  Print customized with the color Magenta \n
134
132
 
@@ -162,9 +160,7 @@ class Print:
162
160
  """
163
161
  print(f"{color}{string_text}{Colors.default}", end=ending)
164
162
 
165
- def print_call_fn(
166
- self, string_text: str, color=Colors.call_fn, ending="\n"
167
- ) -> None:
163
+ def print_call_fn(self, string_text: str, color=Colors.call_fn, ending="\n") -> None:
168
164
  """
169
165
  Print customized for function called (log) \n
170
166
  Color: Magenta Light
@@ -182,9 +178,7 @@ class Print:
182
178
  """
183
179
  print(f"{color}{string_text}{Colors.default}", end=ending)
184
180
 
185
- def print_retur_fn(
186
- self, string_text: str, color=Colors.retur_fn, ending="\n"
187
- ) -> None:
181
+ def print_retur_fn(self, string_text: str, color=Colors.retur_fn, ending="\n") -> None:
188
182
  """
189
183
  Print customized for function return (log) \n
190
184
  Color: Yellow Light
rpa_suite/core/regex.py CHANGED
@@ -1,11 +1,11 @@
1
1
  # rpa_suite/core/regex.py
2
2
 
3
+ # imports standard
4
+ import re
5
+
3
6
  # imports internal
4
7
  from rpa_suite.functions._printer import error_print, success_print
5
8
 
6
- # imports third-party
7
- import re
8
-
9
9
 
10
10
  class Regex:
11
11
  """
@@ -1,11 +1,11 @@
1
1
  # rpa_suite/core/mail_validator.py
2
2
 
3
+ # imports third party
4
+ import email_validator
5
+
3
6
  # imports internal
4
7
  from rpa_suite.functions._printer import error_print, success_print
5
8
 
6
- # imports external
7
- import email_validator
8
-
9
9
 
10
10
  class Validate:
11
11
  """
@@ -200,15 +200,11 @@ class Validate:
200
200
  else:
201
201
  words_lowercase = [word.lower() for word in origin_words]
202
202
  searched_word_lower = searched_word.lower()
203
- result["number_occurrences"] = words_lowercase.count(
204
- searched_word_lower
205
- )
203
+ result["number_occurrences"] = words_lowercase.count(searched_word_lower)
206
204
  result["is_found"] = result["number_occurrences"] > 0
207
205
 
208
206
  except Exception as e:
209
- return error_print(
210
- f"Unable to complete the search: {searched_word}. Error: {str(e)}"
211
- )
207
+ return error_print(f"Unable to complete the search: {searched_word}. Error: {str(e)}")
212
208
 
213
209
  elif search_by == "string":
214
210
  try:
@@ -218,20 +214,14 @@ class Validate:
218
214
  else:
219
215
  origin_text_lower = origin_text.lower()
220
216
  searched_word_lower = searched_word.lower()
221
- result["number_occurrences"] = origin_text_lower.count(
222
- searched_word_lower
223
- )
217
+ result["number_occurrences"] = origin_text_lower.count(searched_word_lower)
224
218
  result["is_found"] = result["number_occurrences"] > 0
225
219
 
226
220
  except Exception as e:
227
- return error_print(
228
- f"Unable to complete the search: {searched_word}. Error: {str(e)}"
229
- )
221
+ return error_print(f"Unable to complete the search: {searched_word}. Error: {str(e)}")
230
222
 
231
223
  except Exception as e:
232
- return error_print(
233
- f"Unable to search for: {searched_word}. Error: {str(e)}"
234
- )
224
+ return error_print(f"Unable to search for: {searched_word}. Error: {str(e)}")
235
225
 
236
226
  # Postprocessing
237
227
  if result["is_found"]:
@@ -76,16 +76,12 @@ def __create_ss_dir(
76
76
  except PermissionError:
77
77
  result["success"] = False
78
78
  result["path_created"] = None
79
- alert_print(
80
- f"Permissão negada: não é possível criar o diretório '{full_path}'."
81
- )
79
+ alert_print(f"Permissão negada: não é possível criar o diretório '{full_path}'.")
82
80
 
83
81
  except Exception as e:
84
82
  result["success"] = False
85
83
  result["path_created"] = None
86
- error_print(
87
- f"Error capturing current path to create screenshots directory! Error: {str(e)}"
88
- )
84
+ error_print(f"Error capturing current path to create screenshots directory! Error: {str(e)}")
89
85
 
90
86
  finally:
91
87
  return result
@@ -1,3 +1,3 @@
1
1
  # rpa_suite/functions/__init__.py
2
2
 
3
- __version__ = '1.5.5'
3
+ __version__ = "1.5.5"