io4it 2.1.0.8__tar.gz → 2.1.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (72) hide show
  1. {io4it-2.1.0.8 → io4it-2.1.1}/PKG-INFO +1 -1
  2. {io4it-2.1.0.8 → io4it-2.1.1}/io4it.egg-info/PKG-INFO +1 -1
  3. {io4it-2.1.0.8 → io4it-2.1.1}/orangecontrib/IO4IT/widgets/OWDoclingMarkdownizerSimple.py +31 -4
  4. {io4it-2.1.0.8 → io4it-2.1.1}/orangecontrib/IO4IT/widgets/OWOfficeNormalizer.py +27 -4
  5. {io4it-2.1.0.8 → io4it-2.1.1}/orangecontrib/IO4IT/widgets/OWPdfType.py +48 -5
  6. {io4it-2.1.0.8 → io4it-2.1.1}/setup.py +1 -1
  7. {io4it-2.1.0.8 → io4it-2.1.1}/io4it.egg-info/SOURCES.txt +0 -0
  8. {io4it-2.1.0.8 → io4it-2.1.1}/io4it.egg-info/dependency_links.txt +0 -0
  9. {io4it-2.1.0.8 → io4it-2.1.1}/io4it.egg-info/entry_points.txt +0 -0
  10. {io4it-2.1.0.8 → io4it-2.1.1}/io4it.egg-info/namespace_packages.txt +0 -0
  11. {io4it-2.1.0.8 → io4it-2.1.1}/io4it.egg-info/requires.txt +0 -0
  12. {io4it-2.1.0.8 → io4it-2.1.1}/io4it.egg-info/top_level.txt +0 -0
  13. {io4it-2.1.0.8 → io4it-2.1.1}/orangecontrib/IO4IT/__init__.py +0 -0
  14. {io4it-2.1.0.8 → io4it-2.1.1}/orangecontrib/IO4IT/ocr_function/__init__.py +0 -0
  15. {io4it-2.1.0.8 → io4it-2.1.1}/orangecontrib/IO4IT/ocr_function/word_converter.py +0 -0
  16. {io4it-2.1.0.8 → io4it-2.1.1}/orangecontrib/IO4IT/utils/__init__.py +0 -0
  17. {io4it-2.1.0.8 → io4it-2.1.1}/orangecontrib/IO4IT/utils/mail.py +0 -0
  18. {io4it-2.1.0.8 → io4it-2.1.1}/orangecontrib/IO4IT/utils/offuscation_basique.py +0 -0
  19. {io4it-2.1.0.8 → io4it-2.1.1}/orangecontrib/IO4IT/utils/pool_exec_utils.py +0 -0
  20. {io4it-2.1.0.8 → io4it-2.1.1}/orangecontrib/IO4IT/utils/utils_md.py +0 -0
  21. {io4it-2.1.0.8 → io4it-2.1.1}/orangecontrib/IO4IT/widgets/OWChatGpt.py +0 -0
  22. {io4it-2.1.0.8 → io4it-2.1.1}/orangecontrib/IO4IT/widgets/OWDeep_Search.py +0 -0
  23. {io4it-2.1.0.8 → io4it-2.1.1}/orangecontrib/IO4IT/widgets/OWExportMarkdown.py +0 -0
  24. {io4it-2.1.0.8 → io4it-2.1.1}/orangecontrib/IO4IT/widgets/OWMarkdownLoader.py +0 -0
  25. {io4it-2.1.0.8 → io4it-2.1.1}/orangecontrib/IO4IT/widgets/OWMarkdownizer.py +0 -0
  26. {io4it-2.1.0.8 → io4it-2.1.1}/orangecontrib/IO4IT/widgets/OWProcessPoolExecutor.py +0 -0
  27. {io4it-2.1.0.8 → io4it-2.1.1}/orangecontrib/IO4IT/widgets/OWS3Uploader.py +0 -0
  28. {io4it-2.1.0.8 → io4it-2.1.1}/orangecontrib/IO4IT/widgets/OWS3downloader.py +0 -0
  29. {io4it-2.1.0.8 → io4it-2.1.1}/orangecontrib/IO4IT/widgets/OWS3list.py +0 -0
  30. {io4it-2.1.0.8 → io4it-2.1.1}/orangecontrib/IO4IT/widgets/OWSpeechToText.py +0 -0
  31. {io4it-2.1.0.8 → io4it-2.1.1}/orangecontrib/IO4IT/widgets/OWmailLoader.py +0 -0
  32. {io4it-2.1.0.8 → io4it-2.1.1}/orangecontrib/IO4IT/widgets/OWmailSender.py +0 -0
  33. {io4it-2.1.0.8 → io4it-2.1.1}/orangecontrib/IO4IT/widgets/OWwordpdf2docx.py +0 -0
  34. {io4it-2.1.0.8 → io4it-2.1.1}/orangecontrib/IO4IT/widgets/__init__.py +0 -0
  35. {io4it-2.1.0.8 → io4it-2.1.1}/orangecontrib/IO4IT/widgets/designer/__init__.py +0 -0
  36. {io4it-2.1.0.8 → io4it-2.1.1}/orangecontrib/IO4IT/widgets/designer/nogui.ui +0 -0
  37. {io4it-2.1.0.8 → io4it-2.1.1}/orangecontrib/IO4IT/widgets/designer/ow_file_ext_selector.ui +0 -0
  38. {io4it-2.1.0.8 → io4it-2.1.1}/orangecontrib/IO4IT/widgets/designer/owchatgpt.ui +0 -0
  39. {io4it-2.1.0.8 → io4it-2.1.1}/orangecontrib/IO4IT/widgets/designer/owdeepsearch.ui +0 -0
  40. {io4it-2.1.0.8 → io4it-2.1.1}/orangecontrib/IO4IT/widgets/designer/owdoclingmarkdownizersimple.ui +0 -0
  41. {io4it-2.1.0.8 → io4it-2.1.1}/orangecontrib/IO4IT/widgets/designer/owexportmarkdown.ui +0 -0
  42. {io4it-2.1.0.8 → io4it-2.1.1}/orangecontrib/IO4IT/widgets/designer/owmailloader.ui +0 -0
  43. {io4it-2.1.0.8 → io4it-2.1.1}/orangecontrib/IO4IT/widgets/designer/owmailsender.ui +0 -0
  44. {io4it-2.1.0.8 → io4it-2.1.1}/orangecontrib/IO4IT/widgets/designer/owmarkdownizer.ui +0 -0
  45. {io4it-2.1.0.8 → io4it-2.1.1}/orangecontrib/IO4IT/widgets/designer/owmarkdownloader.ui +0 -0
  46. {io4it-2.1.0.8 → io4it-2.1.1}/orangecontrib/IO4IT/widgets/designer/owofficenormalizer.ui +0 -0
  47. {io4it-2.1.0.8 → io4it-2.1.1}/orangecontrib/IO4IT/widgets/designer/owpdftype.ui +0 -0
  48. {io4it-2.1.0.8 → io4it-2.1.1}/orangecontrib/IO4IT/widgets/designer/owprocesspoolexecutor.ui +0 -0
  49. {io4it-2.1.0.8 → io4it-2.1.1}/orangecontrib/IO4IT/widgets/designer/owspeechtotext.ui +0 -0
  50. {io4it-2.1.0.8 → io4it-2.1.1}/orangecontrib/IO4IT/widgets/designer/wordpdf2docx.ui +0 -0
  51. {io4it-2.1.0.8 → io4it-2.1.1}/orangecontrib/IO4IT/widgets/icons/__init__.py +0 -0
  52. {io4it-2.1.0.8 → io4it-2.1.1}/orangecontrib/IO4IT/widgets/icons/chatgpt.png +0 -0
  53. {io4it-2.1.0.8 → io4it-2.1.1}/orangecontrib/IO4IT/widgets/icons/check_pdf.png +0 -0
  54. {io4it-2.1.0.8 → io4it-2.1.1}/orangecontrib/IO4IT/widgets/icons/deepsearch.svg +0 -0
  55. {io4it-2.1.0.8 → io4it-2.1.1}/orangecontrib/IO4IT/widgets/icons/dep_md_old.png +0 -0
  56. {io4it-2.1.0.8 → io4it-2.1.1}/orangecontrib/IO4IT/widgets/icons/download.png +0 -0
  57. {io4it-2.1.0.8 → io4it-2.1.1}/orangecontrib/IO4IT/widgets/icons/export_md.png +0 -0
  58. {io4it-2.1.0.8 → io4it-2.1.1}/orangecontrib/IO4IT/widgets/icons/file_extensor.png +0 -0
  59. {io4it-2.1.0.8 → io4it-2.1.1}/orangecontrib/IO4IT/widgets/icons/list_aws.png +0 -0
  60. {io4it-2.1.0.8 → io4it-2.1.1}/orangecontrib/IO4IT/widgets/icons/load_md.png +0 -0
  61. {io4it-2.1.0.8 → io4it-2.1.1}/orangecontrib/IO4IT/widgets/icons/mail_loader.png +0 -0
  62. {io4it-2.1.0.8 → io4it-2.1.1}/orangecontrib/IO4IT/widgets/icons/mail_writer.png +0 -0
  63. {io4it-2.1.0.8 → io4it-2.1.1}/orangecontrib/IO4IT/widgets/icons/md.png +0 -0
  64. {io4it-2.1.0.8 → io4it-2.1.1}/orangecontrib/IO4IT/widgets/icons/office_normalizer.png +0 -0
  65. {io4it-2.1.0.8 → io4it-2.1.1}/orangecontrib/IO4IT/widgets/icons/process_pool_executor.png +0 -0
  66. {io4it-2.1.0.8 → io4it-2.1.1}/orangecontrib/IO4IT/widgets/icons/speech_to_text.png +0 -0
  67. {io4it-2.1.0.8 → io4it-2.1.1}/orangecontrib/IO4IT/widgets/icons/upload.png +0 -0
  68. {io4it-2.1.0.8 → io4it-2.1.1}/orangecontrib/IO4IT/widgets/icons/visualizationer.png +0 -0
  69. {io4it-2.1.0.8 → io4it-2.1.1}/orangecontrib/IO4IT/widgets/icons/wordpdf2docx.png +0 -0
  70. {io4it-2.1.0.8 → io4it-2.1.1}/orangecontrib/IO4IT/widgets/icons_dev/__init__.py +0 -0
  71. {io4it-2.1.0.8 → io4it-2.1.1}/orangecontrib/__init__.py +0 -0
  72. {io4it-2.1.0.8 → io4it-2.1.1}/setup.cfg +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: io4it
3
- Version: 2.1.0.8
3
+ Version: 2.1.1
4
4
  Home-page:
5
5
  Author:
6
6
  Author-email:
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: io4it
3
- Version: 2.1.0.8
3
+ Version: 2.1.1
4
4
  Home-page:
5
5
  Author:
6
6
  Author-email:
@@ -8,6 +8,9 @@ from Orange.widgets import widget
8
8
  from Orange.widgets.utils.signals import Input, Output
9
9
  from Orange.data import Domain, StringVariable, Table, DiscreteVariable
10
10
 
11
+ # --- Ajouts pour l'écriture Excel ---
12
+ from openpyxl import Workbook
13
+
11
14
  # --- Docling (unique lib utilisée pour la conversion) ---
12
15
  from docling.backend.pypdfium2_backend import PyPdfiumDocumentBackend
13
16
  from docling.datamodel.base_models import InputFormat
@@ -208,9 +211,30 @@ class OWDoclingMarkdownizerSimple(widget.OWWidget):
208
211
  self.Outputs.status_data.send(status_table)
209
212
 
210
213
  def _run_conversion(self, files, progress_callback):
211
- """Main function to run the conversion, supports sequential and parallel modes."""
212
214
  results = []
213
215
 
216
+ # Gère le chemin du fichier Excel
217
+ base_name = "conversion_results" # Nom de base pour le fichier Excel
218
+ # Définir le chemin du dossier de sortie de Docling
219
+ if files:
220
+ first_file_path = Path(files[0])
221
+ out_dir = first_file_path.parent / "conversion_markdown"
222
+ else:
223
+ out_dir = Path.cwd() / "conversion_markdown"
224
+
225
+ excel_path = out_dir / f"{base_name}.xlsx"
226
+ counter = 1
227
+ while excel_path.exists():
228
+ excel_path = out_dir / f"{base_name}_{counter}.xlsx"
229
+ counter += 1
230
+
231
+ # Initialise le classeur et la feuille Excel
232
+ wb = Workbook()
233
+ ws = wb.active
234
+ ws.title = "Conversion Results"
235
+ headers = ["input_path", "output_md", "status", "duration_sec", "message"]
236
+ ws.append(headers)
237
+
214
238
  if self.external_executor is None:
215
239
  # --- Mode simple séquentiel ---
216
240
  for i, p in enumerate(files):
@@ -220,8 +244,8 @@ class OWDoclingMarkdownizerSimple(widget.OWWidget):
220
244
 
221
245
  row = _convert_one_file(path_str)
222
246
  results.append(row)
223
-
224
- # Mise à jour du tableau avec le résultat et envoi immédiat
247
+ ws.append(row)
248
+ wb.save(excel_path)
225
249
  self.status_update_signal.emit([row[0], row[2], row[4]])
226
250
 
227
251
  progress_callback((i + 1) / len(files) * 100)
@@ -237,13 +261,16 @@ class OWDoclingMarkdownizerSimple(widget.OWWidget):
237
261
  try:
238
262
  row = fut.result()
239
263
  results.append(row)
240
- # Mise à jour du résultat de la future et envoi
264
+ ws.append(row)
265
+ wb.save(excel_path)
241
266
  self.status_update_signal.emit([row[0], row[2], row[4]])
242
267
  except Exception as e:
243
268
  # Gestion des erreurs de la future et envoi
244
269
  row = [file_path_str, str((Path(file_path_str).parent / 'a_md' / f"{Path(file_path_str).stem}.md")),
245
270
  "nok", "0.00", f"FutureError: {e}"]
246
271
  results.append(row)
272
+ ws.append(row)
273
+ wb.save(excel_path)
247
274
  self.status_update_signal.emit([row[0], "nok", f"FutureError: {e}"])
248
275
 
249
276
  progress_callback(i / len(files) * 100)
@@ -9,6 +9,9 @@ from Orange.widgets import widget
9
9
  from Orange.widgets.utils.signals import Input, Output
10
10
  from Orange.data import Domain, StringVariable, Table, DiscreteVariable
11
11
 
12
+ # --- Ajout pour l'écriture Excel ---
13
+ from openpyxl import Workbook
14
+
12
15
  if "site-packages/Orange/widgets" in os.path.dirname(os.path.abspath(__file__)).replace("\\", "/"):
13
16
  from Orange.widgets.orangecontrib.IO4IT.utils import utils_md
14
17
  from Orange.widgets.orangecontrib.AAIT.utils.import_uic import uic
@@ -20,7 +23,7 @@ else:
20
23
  class OWOfficeNormalizer(widget.OWWidget):
21
24
  name = "Office Normalizer"
22
25
  description = "Convertit .doc→.docx et .ppt→.pptx via COM (Windows + Office)"
23
- category = "AAIT - LLM INTEGRATION"
26
+ category = "IO4IT"
24
27
  icon = "icons/office_normalizer.png"
25
28
  if "site-packages/Orange/widgets" in os.path.dirname(os.path.abspath(__file__)).replace("\\", "/"):
26
29
  icon = "icons_dev/office_normalizer.png"
@@ -44,7 +47,7 @@ class OWOfficeNormalizer(widget.OWWidget):
44
47
  self.data = None
45
48
  self.autorun = True
46
49
  self.result = None
47
- self.processed_statuses = [] # To store incremental status data
50
+ self.processed_statuses = []
48
51
  self.post_initialized()
49
52
 
50
53
  @Inputs.data
@@ -67,7 +70,7 @@ class OWOfficeNormalizer(widget.OWWidget):
67
70
  return
68
71
 
69
72
  self.progressBarInit()
70
- self.processed_statuses = [] # Reset status list for a new run
73
+ self.processed_statuses = []
71
74
  self.Outputs.status_data.send(None)
72
75
 
73
76
  # Process files directly without a separate thread
@@ -91,6 +94,21 @@ class OWOfficeNormalizer(widget.OWWidget):
91
94
  output_base_dir = common_path / "office_normalisation"
92
95
  output_base_dir.mkdir(parents=True, exist_ok=True)
93
96
 
97
+ # Gère le nom du fichier Excel avec incrémentation
98
+ base_name = "normalization_results"
99
+ excel_path = output_base_dir / f"{base_name}.xlsx"
100
+ counter = 1
101
+ while excel_path.exists():
102
+ excel_path = output_base_dir / f"{base_name}_{counter}.xlsx"
103
+ counter += 1
104
+
105
+ # Initialise le classeur Excel
106
+ wb = Workbook()
107
+ ws = wb.active
108
+ ws.title = "Normalization Results"
109
+ headers = ["src_path", "dst_path", "status", "details"]
110
+ ws.append(headers)
111
+
94
112
  for i, path_str in enumerate(file_paths):
95
113
  self.progressBarSet(i / total_files * 100)
96
114
 
@@ -136,7 +154,12 @@ class OWOfficeNormalizer(widget.OWWidget):
136
154
  details = f"error: {error_msg}"
137
155
  status_text = f"ko: {details}"
138
156
 
139
- # Append to the final results list
157
+ # Ajoute la ligne de résultat à la table Excel et la sauvegarde
158
+ result_row = [path_str, dst_path, status_short, details]
159
+ ws.append(result_row)
160
+ wb.save(excel_path)
161
+
162
+ # Append to the final results list for Orange table
140
163
  rows.append([path_str, dst_path, status_text])
141
164
 
142
165
  # Append to the status update list and send the incremental table
@@ -9,6 +9,9 @@ from Orange.data import Domain, StringVariable, Table, DiscreteVariable
9
9
  from Orange.widgets import widget
10
10
  from Orange.widgets.utils.signals import Input, Output
11
11
 
12
+ # --- Ajout pour l'écriture Excel ---
13
+ from openpyxl import Workbook
14
+
12
15
  # Les imports sont adaptés pour correspondre au style de l'autre script
13
16
  if "site-packages/Orange/widgets" in os.path.dirname(os.path.abspath(__file__)).replace("\\", "/"):
14
17
  from Orange.widgets.orangecontrib.IO4IT.utils import utils_md
@@ -23,7 +26,7 @@ else:
23
26
  class OWPdfType(widget.OWWidget):
24
27
  name = "PDF Type"
25
28
  description = "Checks if a PDF is text-based or image-based"
26
- category = "AAIT - LLM INTEGRATION"
29
+ category = "IO4IT"
27
30
  icon = "icons/check_pdf.png"
28
31
  if "site-packages/Orange/widgets" in os.path.dirname(os.path.abspath(__file__)).replace("\\", "/"):
29
32
  icon = "icons_dev/check_pdf.png"
@@ -99,7 +102,32 @@ class OWPdfType(widget.OWWidget):
99
102
 
100
103
  def _process_pdfs(self, in_data: Table, progress_callback: callable, status_callback: callable) -> tuple[
101
104
  Table | None, Table | None]:
105
+
106
+ # Extraction des chemins de fichiers avant de commencer le traitement
102
107
  paths = [str(x) for x in in_data.get_column("file_path")]
108
+
109
+ # --- Gérer le nom du fichier Excel avec incrémentation ---
110
+ excel_output_dir = Path.cwd() / "pdf_check_results"
111
+ if paths:
112
+ first_file_path = Path(paths[0])
113
+ excel_output_dir = first_file_path.parent / "pdf_check_results"
114
+
115
+ excel_output_dir.mkdir(parents=True, exist_ok=True)
116
+
117
+ base_name = "pdf_check_results"
118
+ excel_path = excel_output_dir / f"{base_name}.xlsx"
119
+ counter = 1
120
+ while excel_path.exists():
121
+ excel_path = excel_output_dir / f"{base_name}_{counter}.xlsx"
122
+ counter += 1
123
+
124
+ # --- Initialiser le classeur Excel ---
125
+ wb = Workbook()
126
+ ws = wb.active
127
+ ws.title = "PDF Check Results"
128
+ headers = ["file_path", "status", "details"]
129
+ ws.append(headers)
130
+
103
131
  text_indices = []
104
132
  image_indices = []
105
133
 
@@ -108,21 +136,36 @@ class OWPdfType(widget.OWWidget):
108
136
  progress_callback(i / total_files * 100)
109
137
 
110
138
  fp = Path(p)
139
+ result_row = [p, "", ""] # Initialisation de la ligne de résultat
111
140
 
112
141
  if not fp.exists() or fp.suffix.lower() != ".pdf":
113
- status_callback([p, "ko", "Invalid file or not a PDF"])
142
+ result_row[1] = "ko"
143
+ result_row[2] = "Invalid file or not a PDF"
144
+ status_callback(result_row)
145
+ ws.append(result_row)
146
+ wb.save(excel_path)
114
147
  continue
115
148
 
116
149
  try:
117
150
  is_text = utils_md.is_pdf_text_based(fp)
118
151
  if is_text:
119
152
  text_indices.append(i)
120
- status_callback([p, "ok", "Text-based PDF"])
153
+ result_row[1] = "ok"
154
+ result_row[2] = "Text-based PDF"
121
155
  else:
122
156
  image_indices.append(i)
123
- status_callback([p, "ok", "Image-based PDF"])
157
+ result_row[1] = "ok"
158
+ result_row[2] = "Image-based PDF"
159
+
160
+ status_callback(result_row)
161
+ ws.append(result_row)
162
+ wb.save(excel_path)
124
163
  except Exception as e:
125
- status_callback([p, "ko", f"Error: {str(e)}"])
164
+ result_row[1] = "ko"
165
+ result_row[2] = f"Error: {str(e)}"
166
+ status_callback(result_row)
167
+ ws.append(result_row)
168
+ wb.save(excel_path)
126
169
 
127
170
  progress_callback(100)
128
171
 
@@ -2,7 +2,7 @@ from setuptools import setup, find_packages
2
2
 
3
3
  # Configuration
4
4
  NAME = "io4it"
5
- VERSION = "2.1.0.8"
5
+ VERSION = "2.1.1"
6
6
 
7
7
  INSTALL_REQUIRES = [
8
8
  "pylatexenc",
File without changes
File without changes