io4it 2.1.2.1__tar.gz → 2.1.3__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (84) hide show
  1. io4it-2.1.3/PKG-INFO +7 -0
  2. io4it-2.1.3/io4it.egg-info/PKG-INFO +7 -0
  3. {io4it-2.1.2.1 → io4it-2.1.3}/io4it.egg-info/SOURCES.txt +0 -1
  4. {io4it-2.1.2.1 → io4it-2.1.3}/orangecontrib/IO4IT/widgets/OWChatGpt.py +45 -44
  5. {io4it-2.1.2.1 → io4it-2.1.3}/orangecontrib/IO4IT/widgets/OWMarkdownizer.py +1 -0
  6. {io4it-2.1.2.1 → io4it-2.1.3}/orangecontrib/IO4IT/widgets/OWOfficeNormalizer.py +46 -103
  7. {io4it-2.1.2.1 → io4it-2.1.3}/orangecontrib/IO4IT/widgets/OWPdfType.py +35 -23
  8. {io4it-2.1.2.1 → io4it-2.1.3}/orangecontrib/IO4IT/widgets/designer/owmailsender.ui +31 -96
  9. io4it-2.1.3/orangecontrib/IO4IT/widgets/designer/owofficenormalizer.ui +86 -0
  10. {io4it-2.1.2.1 → io4it-2.1.3}/setup.py +1 -1
  11. io4it-2.1.2.1/PKG-INFO +0 -30
  12. io4it-2.1.2.1/io4it.egg-info/PKG-INFO +0 -30
  13. io4it-2.1.2.1/orangecontrib/IO4IT/widgets/designer/OWmailSender.py +0 -155
  14. io4it-2.1.2.1/orangecontrib/IO4IT/widgets/designer/owofficenormalizer.ui +0 -124
  15. {io4it-2.1.2.1 → io4it-2.1.3}/io4it.egg-info/dependency_links.txt +0 -0
  16. {io4it-2.1.2.1 → io4it-2.1.3}/io4it.egg-info/entry_points.txt +0 -0
  17. {io4it-2.1.2.1 → io4it-2.1.3}/io4it.egg-info/namespace_packages.txt +0 -0
  18. {io4it-2.1.2.1 → io4it-2.1.3}/io4it.egg-info/requires.txt +0 -0
  19. {io4it-2.1.2.1 → io4it-2.1.3}/io4it.egg-info/top_level.txt +0 -0
  20. {io4it-2.1.2.1 → io4it-2.1.3}/orangecontrib/IO4IT/__init__.py +0 -0
  21. {io4it-2.1.2.1 → io4it-2.1.3}/orangecontrib/IO4IT/ocr_function/__init__.py +0 -0
  22. {io4it-2.1.2.1 → io4it-2.1.3}/orangecontrib/IO4IT/ocr_function/word_converter.py +0 -0
  23. {io4it-2.1.2.1 → io4it-2.1.3}/orangecontrib/IO4IT/utils/__init__.py +0 -0
  24. {io4it-2.1.2.1 → io4it-2.1.3}/orangecontrib/IO4IT/utils/mail.py +0 -0
  25. {io4it-2.1.2.1 → io4it-2.1.3}/orangecontrib/IO4IT/utils/offuscation_basique.py +0 -0
  26. {io4it-2.1.2.1 → io4it-2.1.3}/orangecontrib/IO4IT/utils/pool_exec_utils.py +0 -0
  27. {io4it-2.1.2.1 → io4it-2.1.3}/orangecontrib/IO4IT/utils/utils_md.py +0 -0
  28. {io4it-2.1.2.1 → io4it-2.1.3}/orangecontrib/IO4IT/widgets/OWDeep_Search.py +0 -0
  29. {io4it-2.1.2.1 → io4it-2.1.3}/orangecontrib/IO4IT/widgets/OWDoclingMarkdownizerSimple.py +0 -0
  30. {io4it-2.1.2.1 → io4it-2.1.3}/orangecontrib/IO4IT/widgets/OWExportMarkdown.py +0 -0
  31. {io4it-2.1.2.1 → io4it-2.1.3}/orangecontrib/IO4IT/widgets/OWExtractTablesDocxToCSV.py +0 -0
  32. {io4it-2.1.2.1 → io4it-2.1.3}/orangecontrib/IO4IT/widgets/OWInboxMailMonitoring.py +0 -0
  33. {io4it-2.1.2.1 → io4it-2.1.3}/orangecontrib/IO4IT/widgets/OWMarkdownLoader.py +0 -0
  34. {io4it-2.1.2.1 → io4it-2.1.3}/orangecontrib/IO4IT/widgets/OWProcessPoolExecutor.py +0 -0
  35. {io4it-2.1.2.1 → io4it-2.1.3}/orangecontrib/IO4IT/widgets/OWS3Uploader.py +0 -0
  36. {io4it-2.1.2.1 → io4it-2.1.3}/orangecontrib/IO4IT/widgets/OWS3downloader.py +0 -0
  37. {io4it-2.1.2.1 → io4it-2.1.3}/orangecontrib/IO4IT/widgets/OWS3list.py +0 -0
  38. {io4it-2.1.2.1 → io4it-2.1.3}/orangecontrib/IO4IT/widgets/OWSpeechToText.py +0 -0
  39. {io4it-2.1.2.1 → io4it-2.1.3}/orangecontrib/IO4IT/widgets/OWmailLoader.py +0 -0
  40. {io4it-2.1.2.1 → io4it-2.1.3}/orangecontrib/IO4IT/widgets/OWmailSender.py +0 -0
  41. {io4it-2.1.2.1 → io4it-2.1.3}/orangecontrib/IO4IT/widgets/OWwordpdf2docx.py +0 -0
  42. {io4it-2.1.2.1 → io4it-2.1.3}/orangecontrib/IO4IT/widgets/__init__.py +0 -0
  43. {io4it-2.1.2.1 → io4it-2.1.3}/orangecontrib/IO4IT/widgets/designer/__init__.py +0 -0
  44. {io4it-2.1.2.1 → io4it-2.1.3}/orangecontrib/IO4IT/widgets/designer/nogui.ui +0 -0
  45. {io4it-2.1.2.1 → io4it-2.1.3}/orangecontrib/IO4IT/widgets/designer/ow_file_ext_selector.ui +0 -0
  46. {io4it-2.1.2.1 → io4it-2.1.3}/orangecontrib/IO4IT/widgets/designer/owchatgpt.ui +0 -0
  47. {io4it-2.1.2.1 → io4it-2.1.3}/orangecontrib/IO4IT/widgets/designer/owdeepsearch.ui +0 -0
  48. {io4it-2.1.2.1 → io4it-2.1.3}/orangecontrib/IO4IT/widgets/designer/owdoclingasr.ui +0 -0
  49. {io4it-2.1.2.1 → io4it-2.1.3}/orangecontrib/IO4IT/widgets/designer/owdoclingmarkdownizersimple.ui +0 -0
  50. {io4it-2.1.2.1 → io4it-2.1.3}/orangecontrib/IO4IT/widgets/designer/owdocxtocsv.ui +0 -0
  51. {io4it-2.1.2.1 → io4it-2.1.3}/orangecontrib/IO4IT/widgets/designer/owexportmarkdown.ui +0 -0
  52. {io4it-2.1.2.1 → io4it-2.1.3}/orangecontrib/IO4IT/widgets/designer/owinboxmailmonitoring.ui +0 -0
  53. {io4it-2.1.2.1 → io4it-2.1.3}/orangecontrib/IO4IT/widgets/designer/owmailloader.ui +0 -0
  54. {io4it-2.1.2.1 → io4it-2.1.3}/orangecontrib/IO4IT/widgets/designer/owmarkdownizer.ui +0 -0
  55. {io4it-2.1.2.1 → io4it-2.1.3}/orangecontrib/IO4IT/widgets/designer/owmarkdownloader.ui +0 -0
  56. {io4it-2.1.2.1 → io4it-2.1.3}/orangecontrib/IO4IT/widgets/designer/owpdftype.ui +0 -0
  57. {io4it-2.1.2.1 → io4it-2.1.3}/orangecontrib/IO4IT/widgets/designer/owprocesspoolexecutor.ui +0 -0
  58. {io4it-2.1.2.1 → io4it-2.1.3}/orangecontrib/IO4IT/widgets/designer/owspeechtotext.ui +0 -0
  59. {io4it-2.1.2.1 → io4it-2.1.3}/orangecontrib/IO4IT/widgets/designer/owvisualizationer.ui +0 -0
  60. {io4it-2.1.2.1 → io4it-2.1.3}/orangecontrib/IO4IT/widgets/designer/wordpdf2docx.ui +0 -0
  61. {io4it-2.1.2.1 → io4it-2.1.3}/orangecontrib/IO4IT/widgets/icons/__init__.py +0 -0
  62. {io4it-2.1.2.1 → io4it-2.1.3}/orangecontrib/IO4IT/widgets/icons/chatgpt.png +0 -0
  63. {io4it-2.1.2.1 → io4it-2.1.3}/orangecontrib/IO4IT/widgets/icons/check_pdf.png +0 -0
  64. {io4it-2.1.2.1 → io4it-2.1.3}/orangecontrib/IO4IT/widgets/icons/deepsearch.svg +0 -0
  65. {io4it-2.1.2.1 → io4it-2.1.3}/orangecontrib/IO4IT/widgets/icons/dep_md_old.png +0 -0
  66. {io4it-2.1.2.1 → io4it-2.1.3}/orangecontrib/IO4IT/widgets/icons/download.png +0 -0
  67. {io4it-2.1.2.1 → io4it-2.1.3}/orangecontrib/IO4IT/widgets/icons/export_md.png +0 -0
  68. {io4it-2.1.2.1 → io4it-2.1.3}/orangecontrib/IO4IT/widgets/icons/extract_table.png +0 -0
  69. {io4it-2.1.2.1 → io4it-2.1.3}/orangecontrib/IO4IT/widgets/icons/file_extensor.png +0 -0
  70. {io4it-2.1.2.1 → io4it-2.1.3}/orangecontrib/IO4IT/widgets/icons/list_aws.png +0 -0
  71. {io4it-2.1.2.1 → io4it-2.1.3}/orangecontrib/IO4IT/widgets/icons/load_md.png +0 -0
  72. {io4it-2.1.2.1 → io4it-2.1.3}/orangecontrib/IO4IT/widgets/icons/mail_loader.png +0 -0
  73. {io4it-2.1.2.1 → io4it-2.1.3}/orangecontrib/IO4IT/widgets/icons/mail_writer.png +0 -0
  74. {io4it-2.1.2.1 → io4it-2.1.3}/orangecontrib/IO4IT/widgets/icons/md.png +0 -0
  75. {io4it-2.1.2.1 → io4it-2.1.3}/orangecontrib/IO4IT/widgets/icons/monitor-email.svg +0 -0
  76. {io4it-2.1.2.1 → io4it-2.1.3}/orangecontrib/IO4IT/widgets/icons/office_normalizer.png +0 -0
  77. {io4it-2.1.2.1 → io4it-2.1.3}/orangecontrib/IO4IT/widgets/icons/process_pool_executor.png +0 -0
  78. {io4it-2.1.2.1 → io4it-2.1.3}/orangecontrib/IO4IT/widgets/icons/speech_to_text.png +0 -0
  79. {io4it-2.1.2.1 → io4it-2.1.3}/orangecontrib/IO4IT/widgets/icons/upload.png +0 -0
  80. {io4it-2.1.2.1 → io4it-2.1.3}/orangecontrib/IO4IT/widgets/icons/visualizationer.png +0 -0
  81. {io4it-2.1.2.1 → io4it-2.1.3}/orangecontrib/IO4IT/widgets/icons/wordpdf2docx.png +0 -0
  82. {io4it-2.1.2.1 → io4it-2.1.3}/orangecontrib/IO4IT/widgets/icons_dev/__init__.py +0 -0
  83. {io4it-2.1.2.1 → io4it-2.1.3}/orangecontrib/__init__.py +0 -0
  84. {io4it-2.1.2.1 → io4it-2.1.3}/setup.cfg +0 -0
io4it-2.1.3/PKG-INFO ADDED
@@ -0,0 +1,7 @@
1
+ Metadata-Version: 2.1
2
+ Name: io4it
3
+ Version: 2.1.3
4
+ Home-page:
5
+ Author:
6
+ Author-email:
7
+ Keywords: orange3 add-on
@@ -0,0 +1,7 @@
1
+ Metadata-Version: 2.1
2
+ Name: io4it
3
+ Version: 2.1.3
4
+ Home-page:
5
+ Author:
6
+ Author-email:
7
+ Keywords: orange3 add-on
@@ -34,7 +34,6 @@ orangecontrib/IO4IT/widgets/OWmailLoader.py
34
34
  orangecontrib/IO4IT/widgets/OWmailSender.py
35
35
  orangecontrib/IO4IT/widgets/OWwordpdf2docx.py
36
36
  orangecontrib/IO4IT/widgets/__init__.py
37
- orangecontrib/IO4IT/widgets/designer/OWmailSender.py
38
37
  orangecontrib/IO4IT/widgets/designer/__init__.py
39
38
  orangecontrib/IO4IT/widgets/designer/nogui.ui
40
39
  orangecontrib/IO4IT/widgets/designer/ow_file_ext_selector.ui
@@ -2,7 +2,7 @@ import os
2
2
  import sys
3
3
  import base64
4
4
  import ast
5
- from openai import OpenAI
5
+ import openai
6
6
  import Orange
7
7
  from Orange.data import StringVariable
8
8
  from Orange.widgets.widget import OWWidget, Input, Output
@@ -98,53 +98,30 @@ class ChatGpt(OWWidget):
98
98
 
99
99
  def generate_answers(self):
100
100
  try:
101
- client = OpenAI(api_key=self.api_keys)
102
- system_content = []
103
- if getattr(self, "system_prompt", None):
104
- system_content = [{"type": "input_text", "text": str(self.system_prompt)}]
105
-
106
- user_content = []
107
- if isinstance(self.prompt, list):
108
- user_content.extend(self.prompt)
109
- else:
110
- user_content.append({"type": "input_text", "text": str(self.prompt)})
111
-
112
- if getattr(self, "image_paths", None):
113
- # normalize image_paths to a list
114
- if isinstance(self.image_paths, str):
115
- self.image_paths = ast.literal_eval(self.image_paths)
116
-
117
- for img_path in self.image_paths:
118
- filename = os.path.basename(img_path)
119
- user_content.append({"type": "input_text", "text": f"Photo : {filename}"})
120
-
121
- with open(img_path, "rb") as f:
122
- b64_img = base64.b64encode(f.read()).decode("utf-8")
123
-
124
- mime = "image/png" if filename.lower().endswith(".png") else "image/jpeg"
125
- user_content.append({
126
- "type": "input_image",
127
- "image_url": f"data:{mime};base64,{b64_img}",
128
- })
129
- response = client.responses.create(
130
- model=self.model,
131
- input=[
132
- {"role": "system", "content": system_content},
133
- {"role": "user", "content": user_content},
134
- ],
135
- max_output_tokens=self.max_tokens,
136
- # temperature=self.temperature,
137
- )
138
- self.text_response = response.output_text
139
-
140
-
141
- if self.text_response is None:
142
- self.error("No response from model.")
143
-
101
+ openai.api_key = self.api_keys
102
+ response = openai.chat.completions.create(
103
+ model=self.model,
104
+ messages=[
105
+ {
106
+ "role": "system",
107
+ "content": self.system_prompt
108
+ },
109
+ {
110
+ "role": "user",
111
+ "content": self.prompt
112
+ }
113
+ ],
114
+ max_tokens=self.max_tokens,
115
+ temperature=self.temperature
116
+ )
117
+ self.text_response = response.choices[0].message.content
144
118
  except Exception as e:
145
119
  print(e)
146
120
  self.error(f"Error: {e}")
147
121
  return
122
+ if self.text_response is None:
123
+ self.error("No response from chatgpt.")
124
+
148
125
 
149
126
  def run(self):
150
127
  self.error("")
@@ -160,6 +137,30 @@ class ChatGpt(OWWidget):
160
137
  self.error("No api keys provided.")
161
138
  return
162
139
 
140
+ # si on relance la génération par le bouton le prompt est déjà rempli (déjà une liste)
141
+ # mais on peut changer le model, la temp ou le nombre max de token
142
+ if isinstance(self.prompt, list):
143
+ self.prompt = [{"type": "text", "text": self.prompt}]
144
+ if self.image_paths is not None and self.image_paths != []:
145
+ if type(self.image_paths) == str:
146
+ self.image_paths = ast.literal_eval(self.image_paths)
147
+ for img_path in self.image_paths:
148
+ filename = os.path.basename(img_path)
149
+
150
+ # Ajoute une mention de l’image avant
151
+ self.prompt.append({
152
+ "type": "text",
153
+ "text": f"Photo : {filename}"
154
+ })
155
+
156
+ with open(img_path, "rb") as f:
157
+ b64_img = base64.b64encode(f.read()).decode("utf-8")
158
+ self.prompt.append({
159
+ "type": "image_url",
160
+ "image_url": {
161
+ "url": f"data:image/jpeg;base64,{b64_img}"
162
+ }
163
+ })
163
164
  self.progressBarInit()
164
165
  self.thread = thread_management.Thread(self.generate_answers)
165
166
  self.thread.progress.connect(self.handle_progress)
@@ -16,6 +16,7 @@ import easyocr
16
16
 
17
17
  from AnyQt.QtCore import QThread, pyqtSignal
18
18
  from AnyQt.QtWidgets import QApplication, QLabel, QSpinBox, QTextEdit, QPushButton
19
+ from AnyQt import uic
19
20
 
20
21
  from Orange.widgets import widget
21
22
  from Orange.widgets.utils.signals import Input, Output
@@ -2,18 +2,15 @@ import os
2
2
  import sys
3
3
  from pathlib import Path
4
4
  import shutil
5
- import comtypes
5
+
6
6
  from AnyQt.QtWidgets import QApplication
7
7
  from Orange.widgets import widget
8
8
  from Orange.widgets.utils.signals import Input, Output
9
9
  from Orange.data import Domain, StringVariable, Table, DiscreteVariable
10
+
11
+ # --- Ajout pour l'écriture Excel ---
10
12
  from openpyxl import Workbook
11
- import docx
12
- import aspose.words as aw
13
- import multiprocessing
14
- import queue
15
13
 
16
- # Les imports sont adaptés pour correspondre au style de l'autre script
17
14
  if "site-packages/Orange/widgets" in os.path.dirname(os.path.abspath(__file__)).replace("\\", "/"):
18
15
  from Orange.widgets.orangecontrib.IO4IT.utils import utils_md
19
16
  from Orange.widgets.orangecontrib.AAIT.utils.import_uic import uic
@@ -22,24 +19,6 @@ else:
22
19
  from orangecontrib.AAIT.utils.import_uic import uic
23
20
 
24
21
 
25
- def _convert_file_process(src_path: Path, dst_dir: Path, file_type: str, result_queue: multiprocessing.Queue):
26
- """
27
- Fonction de conversion exécutée dans un processus séparé.
28
- Place le résultat (statut, chemin, détails) dans une file d'attente.
29
- """
30
- try:
31
- if file_type == "doc":
32
- dst = utils_md.convert_doc_to_docx(src_path, dst_dir)
33
- result_queue.put(("ok", str(dst), "doc->docx"))
34
- elif file_type == "ppt":
35
- dst = utils_md.convert_ppt_to_pptx(src_path, dst_dir)
36
- result_queue.put(("ok", str(dst), "ppt->pptx"))
37
- except comtypes.COMError:
38
- result_queue.put(("ko", "", "conversion failed: COM error"))
39
- except Exception as e:
40
- result_queue.put(("ko", "", f"conversion failed: {e}"))
41
-
42
-
43
22
  class OWOfficeNormalizer(widget.OWWidget):
44
23
  name = "Office Normalizer"
45
24
  description = "Convertit .doc→.docx et .ppt→.pptx via COM (Windows + Office)"
@@ -68,11 +47,6 @@ class OWOfficeNormalizer(widget.OWWidget):
68
47
  self.autorun = True
69
48
  self.result = None
70
49
  self.processed_statuses = []
71
-
72
- # Connecter la case à cocher pour activer/désactiver le spinbox
73
- self.checkBox_timeout.toggled.connect(self.spinBox_timeout.setEnabled)
74
- self.spinBox_timeout.setEnabled(self.checkBox_timeout.isChecked())
75
-
76
50
  self.post_initialized()
77
51
 
78
52
  @Inputs.data
@@ -98,41 +72,13 @@ class OWOfficeNormalizer(widget.OWWidget):
98
72
  self.processed_statuses = []
99
73
  self.Outputs.status_data.send(None)
100
74
 
101
- # Déterminer la valeur du timeout
102
- self.timeout_value = None
103
- if self.checkBox_timeout.isChecked():
104
- self.timeout_value = self.spinBox_timeout.value()
105
-
75
+ # Process files directly without a separate thread
106
76
  result_table = self._normalize_files(self.data)
107
77
 
78
+ # Send the final results to the primary output
108
79
  self.Outputs.data.send(result_table)
109
80
  self.progressBarFinished()
110
81
 
111
- def _check_file_status(self, file_path: Path):
112
- """
113
- Vérifie si un fichier est accessible, non corrompu et non protégé par un mot de passe.
114
- Retourne un tuple : (statut_court, détails)
115
- """
116
- if not file_path.exists():
117
- return "ko", "not found"
118
- try:
119
- with open(file_path, 'rb'):
120
- pass
121
- except IOError as e:
122
- return "ko", f"locked or permission denied: {e}"
123
- try:
124
- file_info = aw.FileFormatUtil.detect_file_format(str(file_path))
125
- if file_info.is_encrypted:
126
- return "ko", "password protected"
127
- except Exception:
128
- pass
129
- if file_path.suffix.lower() == ".docx":
130
- try:
131
- docx.Document(file_path)
132
- except Exception:
133
- return "ko", "corrupted"
134
- return "ok", "ready"
135
-
136
82
  def _normalize_files(self, in_data: Table) -> Table:
137
83
  rows = []
138
84
  file_paths = [str(x) for x in in_data.get_column("file_path")]
@@ -147,6 +93,7 @@ class OWOfficeNormalizer(widget.OWWidget):
147
93
  output_base_dir = common_path / "office_normalisation"
148
94
  output_base_dir.mkdir(parents=True, exist_ok=True)
149
95
 
96
+ # Gère le nom du fichier Excel avec incrémentation
150
97
  base_name = "normalization_results"
151
98
  excel_path = output_base_dir / f"{base_name}.xlsx"
152
99
  counter = 1
@@ -154,6 +101,7 @@ class OWOfficeNormalizer(widget.OWWidget):
154
101
  excel_path = output_base_dir / f"{base_name}_{counter}.xlsx"
155
102
  counter += 1
156
103
 
104
+ # Initialise le classeur Excel
157
105
  wb = Workbook()
158
106
  ws = wb.active
159
107
  ws.title = "Normalization Results"
@@ -162,72 +110,67 @@ class OWOfficeNormalizer(widget.OWWidget):
162
110
 
163
111
  for i, path_str in enumerate(file_paths):
164
112
  self.progressBarSet(i / total_files * 100)
113
+
165
114
  src = Path(path_str)
166
115
  dst_path = ""
167
- status_short, details = self._check_file_status(src)
168
-
169
- if status_short == "ok":
116
+ status_text = ""
117
+ status_short = ""
118
+ details = ""
119
+
120
+ if not src.exists():
121
+ status_short = "ko"
122
+ details = "not found"
123
+ status_text = f"ko: {details}"
124
+ else:
170
125
  try:
171
- if src.suffix.lower() == ".docx":
172
- dst_dir = output_base_dir / src.parent.relative_to(common_path)
173
- dst_dir.mkdir(parents=True, exist_ok=True)
174
- dst = dst_dir / src.name
175
- shutil.copy(src, dst)
176
- dst_path = str(dst)
177
- details = "docx - unchanged"
178
-
179
- elif src.suffix.lower() in [".doc", ".ppt"]:
180
- dst_dir = output_base_dir / src.parent.relative_to(common_path)
181
- dst_dir.mkdir(parents=True, exist_ok=True)
182
-
183
- result_queue = multiprocessing.Queue()
184
- p = multiprocessing.Process(
185
- target=_convert_file_process,
186
- args=(src, dst_dir, src.suffix.lower().lstrip("."), result_queue)
187
- )
188
- p.start()
189
-
190
- try:
191
- # Utilisation de la valeur de timeout sélectionnée
192
- p.join(timeout=self.timeout_value)
193
-
194
- if p.is_alive():
195
- p.terminate()
196
- status_short = "ko"
197
- details = "conversion timed out"
198
- else:
199
- status_short, dst_path, details = result_queue.get(timeout=1)
200
- except queue.Empty:
201
- status_short = "ko"
202
- details = "conversion process failed silently"
203
- except Exception as e:
204
- status_short = "ko"
205
- details = f"conversion failed: {e}"
126
+ relative_path_from_common = src.parent.relative_to(common_path)
127
+ dst_dir = output_base_dir / relative_path_from_common
128
+ dst_dir.mkdir(parents=True, exist_ok=True)
206
129
 
130
+ if src.suffix.lower() == ".doc":
131
+ dst = utils_md.convert_doc_to_docx(src, dst_dir)
132
+ dst_path = str(dst)
133
+ status_short = "ok"
134
+ details = "doc->docx"
135
+ status_text = f"ok: {details}"
136
+ elif src.suffix.lower() == ".ppt":
137
+ dst = utils_md.convert_ppt_to_pptx(src, dst_dir)
138
+ dst_path = str(dst)
139
+ status_short = "ok"
140
+ details = "ppt->pptx"
141
+ status_text = f"ok: {details}"
207
142
  else:
208
- dst_dir = output_base_dir / src.parent.relative_to(common_path)
209
- dst_dir.mkdir(parents=True, exist_ok=True)
210
143
  dst = dst_dir / src.name
211
144
  if not dst.exists():
212
145
  shutil.copy(src, dst)
213
146
  dst_path = str(dst)
147
+ status_short = "ok"
214
148
  details = "unchanged"
215
-
149
+ status_text = f"ok: {details}"
216
150
  except Exception as e:
151
+ error_msg = str(e)
217
152
  status_short = "ko"
218
- details = f"error: {e}"
153
+ details = f"error: {error_msg}"
154
+ status_text = f"ko: {details}"
219
155
 
156
+ # Ajoute la ligne de résultat à la table Excel et la sauvegarde
220
157
  result_row = [path_str, dst_path, status_short, details]
221
158
  ws.append(result_row)
222
159
  wb.save(excel_path)
223
- rows.append([path_str, dst_path, status_short])
224
160
 
161
+ # Append to the final results list for Orange table
162
+ rows.append([path_str, dst_path, status_text])
163
+
164
+ # Append to the status update list and send the incremental table
225
165
  self.processed_statuses.append([path_str, status_short, details])
226
166
  self._send_status_table()
227
167
 
168
+ # This is crucial for UI updates, including the progress bar
228
169
  QApplication.processEvents()
229
170
 
230
171
  self.progressBarSet(100)
172
+
173
+ # Create and return the final output table
231
174
  domain = Domain([], metas=[
232
175
  StringVariable("src_path"),
233
176
  StringVariable("dst_path"),
@@ -236,6 +179,7 @@ class OWOfficeNormalizer(widget.OWWidget):
236
179
  return Table.from_list(domain, rows)
237
180
 
238
181
  def _send_status_table(self):
182
+ """Sends an incremental table to the status_data output."""
239
183
  domain = Domain([], metas=[
240
184
  StringVariable("src_path"),
241
185
  DiscreteVariable("status", values=["ok", "ko"]),
@@ -252,7 +196,6 @@ class OWOfficeNormalizer(widget.OWWidget):
252
196
 
253
197
 
254
198
  if __name__ == "__main__":
255
- multiprocessing.freeze_support()
256
199
  app = QApplication(sys.argv)
257
200
  my_widget = OWOfficeNormalizer()
258
201
  my_widget.show()
@@ -10,7 +10,6 @@ from Orange.widgets.utils.signals import Input, Output
10
10
 
11
11
  # --- Ajout pour l'écriture Excel ---
12
12
  from openpyxl import Workbook
13
- import pypdf
14
13
 
15
14
  # Les imports sont adaptés pour correspondre au style de l'autre script
16
15
  if "site-packages/Orange/widgets" in os.path.dirname(os.path.abspath(__file__)).replace("\\", "/"):
@@ -57,7 +56,7 @@ class OWPdfType(widget.OWWidget):
57
56
  self.thread = None
58
57
  self.autorun = True
59
58
  self.result = None
60
- self.processed_statuses = []
59
+ self.processed_statuses = [] # List to accumulate statuses
61
60
  self.post_initialized()
62
61
 
63
62
  @Inputs.data
@@ -74,7 +73,6 @@ class OWPdfType(widget.OWWidget):
74
73
  self.Outputs.text_data.send(None)
75
74
  self.Outputs.image_data.send(None)
76
75
  self.Outputs.status_data.send(None)
77
- QApplication.quit()
78
76
  return
79
77
 
80
78
  self.error("")
@@ -82,17 +80,19 @@ class OWPdfType(widget.OWWidget):
82
80
  self.data.domain["file_path"]
83
81
  except KeyError:
84
82
  self.error("You need a 'file_path' column in input data.")
85
- QApplication.quit()
86
83
  return
87
84
 
88
85
  if type(self.data.domain["file_path"]).__name__ != 'StringVariable':
89
86
  self.error("'file_path' column needs to be a Text.")
90
- QApplication.quit()
91
87
  return
92
88
 
93
89
  self.progressBarInit()
94
- self.processed_statuses = []
90
+ self.processed_statuses = [] # Reset status list for a new run
91
+
92
+ # Connect the internal status update signal to a new handler
95
93
  self.status_update_signal.connect(self.handle_status_update)
94
+
95
+ # Pass the status update signal's emit method to the thread
96
96
  self.thread = Thread(self._process_pdfs, self.data, status_callback=self.status_update_signal.emit)
97
97
  self.thread.progress.connect(self.handle_progress)
98
98
  self.thread.result.connect(self.handle_result)
@@ -102,8 +102,10 @@ class OWPdfType(widget.OWWidget):
102
102
  def _process_pdfs(self, in_data: Table, progress_callback: callable, status_callback: callable) -> tuple[
103
103
  Table | None, Table | None]:
104
104
 
105
+ # Extraction des chemins de fichiers avant de commencer le traitement
105
106
  paths = [str(x) for x in in_data.get_column("file_path")]
106
107
 
108
+ # --- Gérer le nom du fichier Excel avec incrémentation ---
107
109
  excel_output_dir = Path.cwd() / "pdf_check_results"
108
110
  if paths:
109
111
  first_file_path = Path(paths[0])
@@ -118,6 +120,7 @@ class OWPdfType(widget.OWWidget):
118
120
  excel_path = excel_output_dir / f"{base_name}_{counter}.xlsx"
119
121
  counter += 1
120
122
 
123
+ # --- Initialiser le classeur Excel ---
121
124
  wb = Workbook()
122
125
  ws = wb.active
123
126
  ws.title = "PDF Check Results"
@@ -130,8 +133,9 @@ class OWPdfType(widget.OWWidget):
130
133
  total_files = len(paths)
131
134
  for i, p in enumerate(paths):
132
135
  progress_callback(i / total_files * 100)
136
+
133
137
  fp = Path(p)
134
- result_row = [p, "", ""]
138
+ result_row = [p, "", ""] # Initialisation de la ligne de résultat
135
139
 
136
140
  if not fp.exists() or fp.suffix.lower() != ".pdf":
137
141
  result_row[1] = "ko"
@@ -142,56 +146,64 @@ class OWPdfType(widget.OWWidget):
142
146
  continue
143
147
 
144
148
  try:
145
- # Vérification de l'attribut is_encrypted de PyPDF avant de traiter le fichier.
146
- reader = pypdf.PdfReader(fp)
147
- if reader.is_encrypted:
148
- result_row[1] = "ko"
149
- result_row[2] = "Error: PDF is password protected."
149
+ is_text = utils_md.is_pdf_text_based(fp)
150
+ if is_text:
151
+ text_indices.append(i)
152
+ result_row[1] = "ok"
153
+ result_row[2] = "Text-based PDF"
150
154
  else:
151
- is_text = utils_md.is_pdf_text_based(fp)
152
- if is_text:
153
- text_indices.append(i)
154
- result_row[1] = "ok"
155
- result_row[2] = "Text-based PDF"
156
- else:
157
- image_indices.append(i)
158
- result_row[1] = "ok"
159
- result_row[2] = "Image-based PDF"
155
+ image_indices.append(i)
156
+ result_row[1] = "ok"
157
+ result_row[2] = "Image-based PDF"
158
+
159
+ status_callback(result_row)
160
+ ws.append(result_row)
161
+ wb.save(excel_path)
160
162
  except Exception as e:
161
163
  result_row[1] = "ko"
162
164
  result_row[2] = f"Error: {str(e)}"
163
- finally:
164
165
  status_callback(result_row)
165
166
  ws.append(result_row)
166
167
  wb.save(excel_path)
167
168
 
168
169
  progress_callback(100)
169
170
 
171
+ # Create table for text PDFs
170
172
  if not text_indices:
171
173
  text_table = None
172
174
  else:
173
175
  text_table = in_data[text_indices]
174
176
 
177
+ # Create table for image PDFs
175
178
  if not image_indices:
176
179
  image_table = None
177
180
  else:
178
181
  image_table = in_data[image_indices]
179
182
 
183
+ # The final result is still returned here
180
184
  return text_table, image_table
181
185
 
182
186
  def handle_progress(self, value: float) -> None:
183
187
  self.progressBarSet(value)
184
188
 
185
189
  def handle_status_update(self, new_status: list):
190
+ """
191
+ Receives a single status update from the thread, appends it to the list,
192
+ and sends a new, updated status table.
193
+ """
186
194
  self.processed_statuses.append(new_status)
195
+
196
+ # Correct Domain creation: move "file_path" to metas
187
197
  status_domain = Domain(
188
- [],
198
+ [], # The variables list should be empty
189
199
  metas=[
190
200
  StringVariable("file_path"),
191
201
  DiscreteVariable("status", values=["ok", "ko"]),
192
202
  StringVariable("details")
193
203
  ]
194
204
  )
205
+
206
+ # Now, the data is correctly structured for the new domain
195
207
  status_table = Table.from_list(status_domain, self.processed_statuses)
196
208
  self.Outputs.status_data.send(status_table)
197
209