io4it 2.1.3__tar.gz → 2.1.5__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (80) hide show
  1. {io4it-2.1.3 → io4it-2.1.5}/PKG-INFO +1 -1
  2. {io4it-2.1.3 → io4it-2.1.5}/io4it.egg-info/PKG-INFO +1 -1
  3. {io4it-2.1.3 → io4it-2.1.5}/io4it.egg-info/requires.txt +5 -1
  4. {io4it-2.1.3 → io4it-2.1.5}/orangecontrib/IO4IT/widgets/OWChatGpt.py +43 -43
  5. {io4it-2.1.3 → io4it-2.1.5}/orangecontrib/IO4IT/widgets/OWDoclingMarkdownizerSimple.py +8 -9
  6. {io4it-2.1.3 → io4it-2.1.5}/orangecontrib/IO4IT/widgets/OWMarkdownizer.py +0 -1
  7. {io4it-2.1.3 → io4it-2.1.5}/orangecontrib/IO4IT/widgets/OWOfficeNormalizer.py +103 -46
  8. {io4it-2.1.3 → io4it-2.1.5}/orangecontrib/IO4IT/widgets/OWPdfType.py +23 -35
  9. {io4it-2.1.3 → io4it-2.1.5}/orangecontrib/IO4IT/widgets/OWS3Uploader.py +15 -5
  10. {io4it-2.1.3 → io4it-2.1.5}/orangecontrib/IO4IT/widgets/OWS3downloader.py +12 -1
  11. {io4it-2.1.3 → io4it-2.1.5}/orangecontrib/IO4IT/widgets/designer/owmailsender.ui +96 -31
  12. {io4it-2.1.3 → io4it-2.1.5}/orangecontrib/IO4IT/widgets/designer/owofficenormalizer.ui +40 -2
  13. {io4it-2.1.3 → io4it-2.1.5}/setup.py +7 -3
  14. {io4it-2.1.3 → io4it-2.1.5}/io4it.egg-info/SOURCES.txt +0 -0
  15. {io4it-2.1.3 → io4it-2.1.5}/io4it.egg-info/dependency_links.txt +0 -0
  16. {io4it-2.1.3 → io4it-2.1.5}/io4it.egg-info/entry_points.txt +0 -0
  17. {io4it-2.1.3 → io4it-2.1.5}/io4it.egg-info/namespace_packages.txt +0 -0
  18. {io4it-2.1.3 → io4it-2.1.5}/io4it.egg-info/top_level.txt +0 -0
  19. {io4it-2.1.3 → io4it-2.1.5}/orangecontrib/IO4IT/__init__.py +0 -0
  20. {io4it-2.1.3 → io4it-2.1.5}/orangecontrib/IO4IT/ocr_function/__init__.py +0 -0
  21. {io4it-2.1.3 → io4it-2.1.5}/orangecontrib/IO4IT/ocr_function/word_converter.py +0 -0
  22. {io4it-2.1.3 → io4it-2.1.5}/orangecontrib/IO4IT/utils/__init__.py +0 -0
  23. {io4it-2.1.3 → io4it-2.1.5}/orangecontrib/IO4IT/utils/mail.py +0 -0
  24. {io4it-2.1.3 → io4it-2.1.5}/orangecontrib/IO4IT/utils/offuscation_basique.py +0 -0
  25. {io4it-2.1.3 → io4it-2.1.5}/orangecontrib/IO4IT/utils/pool_exec_utils.py +0 -0
  26. {io4it-2.1.3 → io4it-2.1.5}/orangecontrib/IO4IT/utils/utils_md.py +0 -0
  27. {io4it-2.1.3 → io4it-2.1.5}/orangecontrib/IO4IT/widgets/OWDeep_Search.py +0 -0
  28. {io4it-2.1.3 → io4it-2.1.5}/orangecontrib/IO4IT/widgets/OWExportMarkdown.py +0 -0
  29. {io4it-2.1.3 → io4it-2.1.5}/orangecontrib/IO4IT/widgets/OWExtractTablesDocxToCSV.py +0 -0
  30. {io4it-2.1.3 → io4it-2.1.5}/orangecontrib/IO4IT/widgets/OWInboxMailMonitoring.py +0 -0
  31. {io4it-2.1.3 → io4it-2.1.5}/orangecontrib/IO4IT/widgets/OWMarkdownLoader.py +0 -0
  32. {io4it-2.1.3 → io4it-2.1.5}/orangecontrib/IO4IT/widgets/OWProcessPoolExecutor.py +0 -0
  33. {io4it-2.1.3 → io4it-2.1.5}/orangecontrib/IO4IT/widgets/OWS3list.py +0 -0
  34. {io4it-2.1.3 → io4it-2.1.5}/orangecontrib/IO4IT/widgets/OWSpeechToText.py +0 -0
  35. {io4it-2.1.3 → io4it-2.1.5}/orangecontrib/IO4IT/widgets/OWmailLoader.py +0 -0
  36. {io4it-2.1.3 → io4it-2.1.5}/orangecontrib/IO4IT/widgets/OWmailSender.py +0 -0
  37. {io4it-2.1.3 → io4it-2.1.5}/orangecontrib/IO4IT/widgets/OWwordpdf2docx.py +0 -0
  38. {io4it-2.1.3 → io4it-2.1.5}/orangecontrib/IO4IT/widgets/__init__.py +0 -0
  39. {io4it-2.1.3 → io4it-2.1.5}/orangecontrib/IO4IT/widgets/designer/__init__.py +0 -0
  40. {io4it-2.1.3 → io4it-2.1.5}/orangecontrib/IO4IT/widgets/designer/nogui.ui +0 -0
  41. {io4it-2.1.3 → io4it-2.1.5}/orangecontrib/IO4IT/widgets/designer/ow_file_ext_selector.ui +0 -0
  42. {io4it-2.1.3 → io4it-2.1.5}/orangecontrib/IO4IT/widgets/designer/owchatgpt.ui +0 -0
  43. {io4it-2.1.3 → io4it-2.1.5}/orangecontrib/IO4IT/widgets/designer/owdeepsearch.ui +0 -0
  44. {io4it-2.1.3 → io4it-2.1.5}/orangecontrib/IO4IT/widgets/designer/owdoclingasr.ui +0 -0
  45. {io4it-2.1.3 → io4it-2.1.5}/orangecontrib/IO4IT/widgets/designer/owdoclingmarkdownizersimple.ui +0 -0
  46. {io4it-2.1.3 → io4it-2.1.5}/orangecontrib/IO4IT/widgets/designer/owdocxtocsv.ui +0 -0
  47. {io4it-2.1.3 → io4it-2.1.5}/orangecontrib/IO4IT/widgets/designer/owexportmarkdown.ui +0 -0
  48. {io4it-2.1.3 → io4it-2.1.5}/orangecontrib/IO4IT/widgets/designer/owinboxmailmonitoring.ui +0 -0
  49. {io4it-2.1.3 → io4it-2.1.5}/orangecontrib/IO4IT/widgets/designer/owmailloader.ui +0 -0
  50. {io4it-2.1.3 → io4it-2.1.5}/orangecontrib/IO4IT/widgets/designer/owmarkdownizer.ui +0 -0
  51. {io4it-2.1.3 → io4it-2.1.5}/orangecontrib/IO4IT/widgets/designer/owmarkdownloader.ui +0 -0
  52. {io4it-2.1.3 → io4it-2.1.5}/orangecontrib/IO4IT/widgets/designer/owpdftype.ui +0 -0
  53. {io4it-2.1.3 → io4it-2.1.5}/orangecontrib/IO4IT/widgets/designer/owprocesspoolexecutor.ui +0 -0
  54. {io4it-2.1.3 → io4it-2.1.5}/orangecontrib/IO4IT/widgets/designer/owspeechtotext.ui +0 -0
  55. {io4it-2.1.3 → io4it-2.1.5}/orangecontrib/IO4IT/widgets/designer/owvisualizationer.ui +0 -0
  56. {io4it-2.1.3 → io4it-2.1.5}/orangecontrib/IO4IT/widgets/designer/wordpdf2docx.ui +0 -0
  57. {io4it-2.1.3 → io4it-2.1.5}/orangecontrib/IO4IT/widgets/icons/__init__.py +0 -0
  58. {io4it-2.1.3 → io4it-2.1.5}/orangecontrib/IO4IT/widgets/icons/chatgpt.png +0 -0
  59. {io4it-2.1.3 → io4it-2.1.5}/orangecontrib/IO4IT/widgets/icons/check_pdf.png +0 -0
  60. {io4it-2.1.3 → io4it-2.1.5}/orangecontrib/IO4IT/widgets/icons/deepsearch.svg +0 -0
  61. {io4it-2.1.3 → io4it-2.1.5}/orangecontrib/IO4IT/widgets/icons/dep_md_old.png +0 -0
  62. {io4it-2.1.3 → io4it-2.1.5}/orangecontrib/IO4IT/widgets/icons/download.png +0 -0
  63. {io4it-2.1.3 → io4it-2.1.5}/orangecontrib/IO4IT/widgets/icons/export_md.png +0 -0
  64. {io4it-2.1.3 → io4it-2.1.5}/orangecontrib/IO4IT/widgets/icons/extract_table.png +0 -0
  65. {io4it-2.1.3 → io4it-2.1.5}/orangecontrib/IO4IT/widgets/icons/file_extensor.png +0 -0
  66. {io4it-2.1.3 → io4it-2.1.5}/orangecontrib/IO4IT/widgets/icons/list_aws.png +0 -0
  67. {io4it-2.1.3 → io4it-2.1.5}/orangecontrib/IO4IT/widgets/icons/load_md.png +0 -0
  68. {io4it-2.1.3 → io4it-2.1.5}/orangecontrib/IO4IT/widgets/icons/mail_loader.png +0 -0
  69. {io4it-2.1.3 → io4it-2.1.5}/orangecontrib/IO4IT/widgets/icons/mail_writer.png +0 -0
  70. {io4it-2.1.3 → io4it-2.1.5}/orangecontrib/IO4IT/widgets/icons/md.png +0 -0
  71. {io4it-2.1.3 → io4it-2.1.5}/orangecontrib/IO4IT/widgets/icons/monitor-email.svg +0 -0
  72. {io4it-2.1.3 → io4it-2.1.5}/orangecontrib/IO4IT/widgets/icons/office_normalizer.png +0 -0
  73. {io4it-2.1.3 → io4it-2.1.5}/orangecontrib/IO4IT/widgets/icons/process_pool_executor.png +0 -0
  74. {io4it-2.1.3 → io4it-2.1.5}/orangecontrib/IO4IT/widgets/icons/speech_to_text.png +0 -0
  75. {io4it-2.1.3 → io4it-2.1.5}/orangecontrib/IO4IT/widgets/icons/upload.png +0 -0
  76. {io4it-2.1.3 → io4it-2.1.5}/orangecontrib/IO4IT/widgets/icons/visualizationer.png +0 -0
  77. {io4it-2.1.3 → io4it-2.1.5}/orangecontrib/IO4IT/widgets/icons/wordpdf2docx.png +0 -0
  78. {io4it-2.1.3 → io4it-2.1.5}/orangecontrib/IO4IT/widgets/icons_dev/__init__.py +0 -0
  79. {io4it-2.1.3 → io4it-2.1.5}/orangecontrib/__init__.py +0 -0
  80. {io4it-2.1.3 → io4it-2.1.5}/setup.cfg +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: io4it
3
- Version: 2.1.3
3
+ Version: 2.1.5
4
4
  Home-page:
5
5
  Author:
6
6
  Author-email:
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: io4it
3
- Version: 2.1.3
3
+ Version: 2.1.5
4
4
  Home-page:
5
5
  Author:
6
6
  Author-email:
@@ -1,3 +1,6 @@
1
+ torchvision==2.23.0
2
+ torchaudio==2.8.0
3
+ torch==2.8.0
1
4
  pylatexenc
2
5
  docopt
3
6
  boto3
@@ -7,7 +10,7 @@ docling-core==2.26.3
7
10
  speechbrain
8
11
  whisper
9
12
  whisper-openai
10
- pyannote.audio
13
+ pyannote.audio==3.4.0
11
14
  pyannote-core
12
15
  pypandoc
13
16
  pypandoc-binary
@@ -19,3 +22,4 @@ doc2docx
19
22
  msal
20
23
  exchangelib
21
24
  CATEGORIT
25
+ comtypes
@@ -2,7 +2,7 @@ import os
2
2
  import sys
3
3
  import base64
4
4
  import ast
5
- import openai
5
+ from openai import OpenAI
6
6
  import Orange
7
7
  from Orange.data import StringVariable
8
8
  from Orange.widgets.widget import OWWidget, Input, Output
@@ -98,30 +98,54 @@ class ChatGpt(OWWidget):
98
98
 
99
99
  def generate_answers(self):
100
100
  try:
101
- openai.api_key = self.api_keys
102
- response = openai.chat.completions.create(
101
+ client = OpenAI(api_key=self.api_keys)
102
+ system_content = []
103
+ if getattr(self, "system_prompt", None):
104
+ system_content = [{"type": "input_text", "text": str(self.system_prompt)}]
105
+
106
+ user_content = []
107
+ if isinstance(self.prompt, list):
108
+ user_content.extend(self.prompt)
109
+ else:
110
+ user_content.append({"type": "input_text", "text": str(self.prompt)})
111
+
112
+ if getattr(self, "image_paths", None):
113
+ # normalize image_paths to a list
114
+ if isinstance(self.image_paths, str):
115
+ self.image_paths = ast.literal_eval(self.image_paths)
116
+
117
+ for img_path in self.image_paths:
118
+ filename = os.path.basename(img_path)
119
+ user_content.append({"type": "input_text", "text": f"Photo : {filename}"})
120
+
121
+ with open(img_path, "rb") as f:
122
+ b64_img = base64.b64encode(f.read()).decode("utf-8")
123
+
124
+ mime = "image/png" if filename.lower().endswith(".png") else "image/jpeg"
125
+ user_content.append({
126
+ "type": "input_image",
127
+ "image_url": f"data:{mime};base64,{b64_img}",
128
+ })
129
+
130
+ response = client.responses.create(
103
131
  model=self.model,
104
- messages=[
105
- {
106
- "role": "system",
107
- "content": self.system_prompt
108
- },
109
- {
110
- "role": "user",
111
- "content": self.prompt
112
- }
132
+ input=[
133
+ {"role": "system", "content": system_content},
134
+ {"role": "user", "content": user_content},
113
135
  ],
114
- max_tokens=self.max_tokens,
115
- temperature=self.temperature
136
+ #max_output_tokens=self.max_tokens,
137
+ # temperature=self.temperature,
116
138
  )
117
- self.text_response = response.choices[0].message.content
139
+ self.text_response = response.output_text
140
+
141
+
142
+
143
+ if self.text_response is None:
144
+ self.error("No response from model.")
145
+
118
146
  except Exception as e:
119
- print(e)
120
147
  self.error(f"Error: {e}")
121
148
  return
122
- if self.text_response is None:
123
- self.error("No response from chatgpt.")
124
-
125
149
 
126
150
  def run(self):
127
151
  self.error("")
@@ -137,30 +161,6 @@ class ChatGpt(OWWidget):
137
161
  self.error("No api keys provided.")
138
162
  return
139
163
 
140
- # si on relance la génération par le bouton le prompt est déjà rempli (déjà une liste)
141
- # mais on peut changer le model, la temp ou le nombre max de token
142
- if isinstance(self.prompt, list):
143
- self.prompt = [{"type": "text", "text": self.prompt}]
144
- if self.image_paths is not None and self.image_paths != []:
145
- if type(self.image_paths) == str:
146
- self.image_paths = ast.literal_eval(self.image_paths)
147
- for img_path in self.image_paths:
148
- filename = os.path.basename(img_path)
149
-
150
- # Ajoute une mention de l’image avant
151
- self.prompt.append({
152
- "type": "text",
153
- "text": f"Photo : {filename}"
154
- })
155
-
156
- with open(img_path, "rb") as f:
157
- b64_img = base64.b64encode(f.read()).decode("utf-8")
158
- self.prompt.append({
159
- "type": "image_url",
160
- "image_url": {
161
- "url": f"data:image/jpeg;base64,{b64_img}"
162
- }
163
- })
164
164
  self.progressBarInit()
165
165
  self.thread = thread_management.Thread(self.generate_answers)
166
166
  self.thread.progress.connect(self.handle_progress)
@@ -42,13 +42,14 @@ def _convert_one_file(file_path_str: str):
42
42
  out_dir = src.parent / "conversion_markdown"
43
43
  out_dir.mkdir(parents=True, exist_ok=True)
44
44
  out_md = out_dir / f"{src.stem}.md"
45
+ out_md_str = str(out_md) # Valeur par défaut, sera modifiée si "nok" pour clarité
45
46
 
46
47
  # Si déjà converti : on ne refait pas
47
48
  if out_md.exists():
48
49
  status = "ok"
49
50
  message = "existant: deja converti"
50
51
  duration = time.time() - t0
51
- return [str(src), str(out_md), status, f"{duration:.2f}", message]
52
+ return [str(src), out_md_str, status, f"{duration:.2f}", message]
52
53
 
53
54
  try:
54
55
  # Docling minimal config (inspiré du snippet)
@@ -72,18 +73,15 @@ def _convert_one_file(file_path_str: str):
72
73
  except Exception as e:
73
74
  status = "nok"
74
75
  message = f"{type(e).__name__}: {e}"
75
- # on écrit quand même un trace .md
76
- try:
77
- out_md.write_text(f"[Erreur conversion] {message}", encoding="utf-8")
78
- except Exception:
79
- pass
76
+ # Lignes d'écriture du fichier de trace .md supprimées ici
77
+ out_md_str = "" # Indique qu'aucun fichier de sortie n'a été créé.
80
78
 
81
79
  duration = time.time() - t0
82
- return [str(src), str(out_md), status, f"{duration:.2f}", message]
80
+ return [str(src), out_md_str, status, f"{duration:.2f}", message]
83
81
 
84
82
 
85
83
  class OWDoclingMarkdownizerSimple(widget.OWWidget):
86
- name = "Docling To Markdown"
84
+ name = "Docling To Markdown - v1"
87
85
  description = "Convert DOCX/PPTX/PDF to Markdown via Docling"
88
86
  icon = "icons/md.png"
89
87
  if "site-packages/Orange/widgets" in os.path.dirname(os.path.abspath(__file__)).replace("\\", "/"):
@@ -266,7 +264,8 @@ class OWDoclingMarkdownizerSimple(widget.OWWidget):
266
264
  self.status_update_signal.emit([row[0], row[2], row[4]])
267
265
  except Exception as e:
268
266
  # Gestion des erreurs de la future et envoi
269
- row = [file_path_str, str((Path(file_path_str).parent / 'a_md' / f"{Path(file_path_str).stem}.md")),
267
+ # Note : Dans ce cas, output_md est vide, car _convert_one_file renvoie ""
268
+ row = [file_path_str, "",
270
269
  "nok", "0.00", f"FutureError: {e}"]
271
270
  results.append(row)
272
271
  ws.append(row)
@@ -16,7 +16,6 @@ import easyocr
16
16
 
17
17
  from AnyQt.QtCore import QThread, pyqtSignal
18
18
  from AnyQt.QtWidgets import QApplication, QLabel, QSpinBox, QTextEdit, QPushButton
19
- from AnyQt import uic
20
19
 
21
20
  from Orange.widgets import widget
22
21
  from Orange.widgets.utils.signals import Input, Output
@@ -2,15 +2,18 @@ import os
2
2
  import sys
3
3
  from pathlib import Path
4
4
  import shutil
5
-
5
+ import comtypes
6
6
  from AnyQt.QtWidgets import QApplication
7
7
  from Orange.widgets import widget
8
8
  from Orange.widgets.utils.signals import Input, Output
9
9
  from Orange.data import Domain, StringVariable, Table, DiscreteVariable
10
-
11
- # --- Ajout pour l'écriture Excel ---
12
10
  from openpyxl import Workbook
11
+ import docx
12
+ import aspose.words as aw
13
+ import multiprocessing
14
+ import queue
13
15
 
16
+ # Les imports sont adaptés pour correspondre au style de l'autre script
14
17
  if "site-packages/Orange/widgets" in os.path.dirname(os.path.abspath(__file__)).replace("\\", "/"):
15
18
  from Orange.widgets.orangecontrib.IO4IT.utils import utils_md
16
19
  from Orange.widgets.orangecontrib.AAIT.utils.import_uic import uic
@@ -19,6 +22,24 @@ else:
19
22
  from orangecontrib.AAIT.utils.import_uic import uic
20
23
 
21
24
 
25
+ def _convert_file_process(src_path: Path, dst_dir: Path, file_type: str, result_queue: multiprocessing.Queue):
26
+ """
27
+ Fonction de conversion exécutée dans un processus séparé.
28
+ Place le résultat (statut, chemin, détails) dans une file d'attente.
29
+ """
30
+ try:
31
+ if file_type == "doc":
32
+ dst = utils_md.convert_doc_to_docx(src_path, dst_dir)
33
+ result_queue.put(("ok", str(dst), "doc->docx"))
34
+ elif file_type == "ppt":
35
+ dst = utils_md.convert_ppt_to_pptx(src_path, dst_dir)
36
+ result_queue.put(("ok", str(dst), "ppt->pptx"))
37
+ except comtypes.COMError:
38
+ result_queue.put(("ko", "", "conversion failed: COM error"))
39
+ except Exception as e:
40
+ result_queue.put(("ko", "", f"conversion failed: {e}"))
41
+
42
+
22
43
  class OWOfficeNormalizer(widget.OWWidget):
23
44
  name = "Office Normalizer"
24
45
  description = "Convertit .doc→.docx et .ppt→.pptx via COM (Windows + Office)"
@@ -47,6 +68,11 @@ class OWOfficeNormalizer(widget.OWWidget):
47
68
  self.autorun = True
48
69
  self.result = None
49
70
  self.processed_statuses = []
71
+
72
+ # Connecter la case à cocher pour activer/désactiver le spinbox
73
+ self.checkBox_timeout.toggled.connect(self.spinBox_timeout.setEnabled)
74
+ self.spinBox_timeout.setEnabled(self.checkBox_timeout.isChecked())
75
+
50
76
  self.post_initialized()
51
77
 
52
78
  @Inputs.data
@@ -72,13 +98,41 @@ class OWOfficeNormalizer(widget.OWWidget):
72
98
  self.processed_statuses = []
73
99
  self.Outputs.status_data.send(None)
74
100
 
75
- # Process files directly without a separate thread
101
+ # Déterminer la valeur du timeout
102
+ self.timeout_value = None
103
+ if self.checkBox_timeout.isChecked():
104
+ self.timeout_value = self.spinBox_timeout.value()
105
+
76
106
  result_table = self._normalize_files(self.data)
77
107
 
78
- # Send the final results to the primary output
79
108
  self.Outputs.data.send(result_table)
80
109
  self.progressBarFinished()
81
110
 
111
+ def _check_file_status(self, file_path: Path):
112
+ """
113
+ Vérifie si un fichier est accessible, non corrompu et non protégé par un mot de passe.
114
+ Retourne un tuple : (statut_court, détails)
115
+ """
116
+ if not file_path.exists():
117
+ return "ko", "not found"
118
+ try:
119
+ with open(file_path, 'rb'):
120
+ pass
121
+ except IOError as e:
122
+ return "ko", f"locked or permission denied: {e}"
123
+ try:
124
+ file_info = aw.FileFormatUtil.detect_file_format(str(file_path))
125
+ if file_info.is_encrypted:
126
+ return "ko", "password protected"
127
+ except Exception:
128
+ pass
129
+ if file_path.suffix.lower() == ".docx":
130
+ try:
131
+ docx.Document(file_path)
132
+ except Exception:
133
+ return "ko", "corrupted"
134
+ return "ok", "ready"
135
+
82
136
  def _normalize_files(self, in_data: Table) -> Table:
83
137
  rows = []
84
138
  file_paths = [str(x) for x in in_data.get_column("file_path")]
@@ -93,7 +147,6 @@ class OWOfficeNormalizer(widget.OWWidget):
93
147
  output_base_dir = common_path / "office_normalisation"
94
148
  output_base_dir.mkdir(parents=True, exist_ok=True)
95
149
 
96
- # Gère le nom du fichier Excel avec incrémentation
97
150
  base_name = "normalization_results"
98
151
  excel_path = output_base_dir / f"{base_name}.xlsx"
99
152
  counter = 1
@@ -101,7 +154,6 @@ class OWOfficeNormalizer(widget.OWWidget):
101
154
  excel_path = output_base_dir / f"{base_name}_{counter}.xlsx"
102
155
  counter += 1
103
156
 
104
- # Initialise le classeur Excel
105
157
  wb = Workbook()
106
158
  ws = wb.active
107
159
  ws.title = "Normalization Results"
@@ -110,67 +162,72 @@ class OWOfficeNormalizer(widget.OWWidget):
110
162
 
111
163
  for i, path_str in enumerate(file_paths):
112
164
  self.progressBarSet(i / total_files * 100)
113
-
114
165
  src = Path(path_str)
115
166
  dst_path = ""
116
- status_text = ""
117
- status_short = ""
118
- details = ""
119
-
120
- if not src.exists():
121
- status_short = "ko"
122
- details = "not found"
123
- status_text = f"ko: {details}"
124
- else:
125
- try:
126
- relative_path_from_common = src.parent.relative_to(common_path)
127
- dst_dir = output_base_dir / relative_path_from_common
128
- dst_dir.mkdir(parents=True, exist_ok=True)
167
+ status_short, details = self._check_file_status(src)
129
168
 
130
- if src.suffix.lower() == ".doc":
131
- dst = utils_md.convert_doc_to_docx(src, dst_dir)
132
- dst_path = str(dst)
133
- status_short = "ok"
134
- details = "doc->docx"
135
- status_text = f"ok: {details}"
136
- elif src.suffix.lower() == ".ppt":
137
- dst = utils_md.convert_ppt_to_pptx(src, dst_dir)
169
+ if status_short == "ok":
170
+ try:
171
+ if src.suffix.lower() == ".docx":
172
+ dst_dir = output_base_dir / src.parent.relative_to(common_path)
173
+ dst_dir.mkdir(parents=True, exist_ok=True)
174
+ dst = dst_dir / src.name
175
+ shutil.copy(src, dst)
138
176
  dst_path = str(dst)
139
- status_short = "ok"
140
- details = "ppt->pptx"
141
- status_text = f"ok: {details}"
177
+ details = "docx - unchanged"
178
+
179
+ elif src.suffix.lower() in [".doc", ".ppt"]:
180
+ dst_dir = output_base_dir / src.parent.relative_to(common_path)
181
+ dst_dir.mkdir(parents=True, exist_ok=True)
182
+
183
+ result_queue = multiprocessing.Queue()
184
+ p = multiprocessing.Process(
185
+ target=_convert_file_process,
186
+ args=(src, dst_dir, src.suffix.lower().lstrip("."), result_queue)
187
+ )
188
+ p.start()
189
+
190
+ try:
191
+ # Utilisation de la valeur de timeout sélectionnée
192
+ p.join(timeout=self.timeout_value)
193
+
194
+ if p.is_alive():
195
+ p.terminate()
196
+ status_short = "ko"
197
+ details = "conversion timed out"
198
+ else:
199
+ status_short, dst_path, details = result_queue.get(timeout=1)
200
+ except queue.Empty:
201
+ status_short = "ko"
202
+ details = "conversion process failed silently"
203
+ except Exception as e:
204
+ status_short = "ko"
205
+ details = f"conversion failed: {e}"
206
+
142
207
  else:
208
+ dst_dir = output_base_dir / src.parent.relative_to(common_path)
209
+ dst_dir.mkdir(parents=True, exist_ok=True)
143
210
  dst = dst_dir / src.name
144
211
  if not dst.exists():
145
212
  shutil.copy(src, dst)
146
213
  dst_path = str(dst)
147
- status_short = "ok"
148
214
  details = "unchanged"
149
- status_text = f"ok: {details}"
215
+
150
216
  except Exception as e:
151
- error_msg = str(e)
152
217
  status_short = "ko"
153
- details = f"error: {error_msg}"
154
- status_text = f"ko: {details}"
218
+ details = f"error: {e}"
155
219
 
156
- # Ajoute la ligne de résultat à la table Excel et la sauvegarde
157
220
  result_row = [path_str, dst_path, status_short, details]
158
221
  ws.append(result_row)
159
222
  wb.save(excel_path)
223
+ rows.append([path_str, dst_path, status_short])
160
224
 
161
- # Append to the final results list for Orange table
162
- rows.append([path_str, dst_path, status_text])
163
-
164
- # Append to the status update list and send the incremental table
165
225
  self.processed_statuses.append([path_str, status_short, details])
166
226
  self._send_status_table()
167
227
 
168
- # This is crucial for UI updates, including the progress bar
169
228
  QApplication.processEvents()
170
229
 
171
230
  self.progressBarSet(100)
172
-
173
- # Create and return the final output table
174
231
  domain = Domain([], metas=[
175
232
  StringVariable("src_path"),
176
233
  StringVariable("dst_path"),
@@ -179,7 +236,6 @@ class OWOfficeNormalizer(widget.OWWidget):
179
236
  return Table.from_list(domain, rows)
180
237
 
181
238
  def _send_status_table(self):
182
- """Sends an incremental table to the status_data output."""
183
239
  domain = Domain([], metas=[
184
240
  StringVariable("src_path"),
185
241
  DiscreteVariable("status", values=["ok", "ko"]),
@@ -196,6 +252,7 @@ class OWOfficeNormalizer(widget.OWWidget):
196
252
 
197
253
 
198
254
  if __name__ == "__main__":
255
+ multiprocessing.freeze_support()
199
256
  app = QApplication(sys.argv)
200
257
  my_widget = OWOfficeNormalizer()
201
258
  my_widget.show()
@@ -10,6 +10,7 @@ from Orange.widgets.utils.signals import Input, Output
10
10
 
11
11
  # --- Ajout pour l'écriture Excel ---
12
12
  from openpyxl import Workbook
13
+ import pypdf
13
14
 
14
15
  # Les imports sont adaptés pour correspondre au style de l'autre script
15
16
  if "site-packages/Orange/widgets" in os.path.dirname(os.path.abspath(__file__)).replace("\\", "/"):
@@ -56,7 +57,7 @@ class OWPdfType(widget.OWWidget):
56
57
  self.thread = None
57
58
  self.autorun = True
58
59
  self.result = None
59
- self.processed_statuses = [] # List to accumulate statuses
60
+ self.processed_statuses = []
60
61
  self.post_initialized()
61
62
 
62
63
  @Inputs.data
@@ -73,6 +74,7 @@ class OWPdfType(widget.OWWidget):
73
74
  self.Outputs.text_data.send(None)
74
75
  self.Outputs.image_data.send(None)
75
76
  self.Outputs.status_data.send(None)
77
+ QApplication.quit()
76
78
  return
77
79
 
78
80
  self.error("")
@@ -80,19 +82,17 @@ class OWPdfType(widget.OWWidget):
80
82
  self.data.domain["file_path"]
81
83
  except KeyError:
82
84
  self.error("You need a 'file_path' column in input data.")
85
+ QApplication.quit()
83
86
  return
84
87
 
85
88
  if type(self.data.domain["file_path"]).__name__ != 'StringVariable':
86
89
  self.error("'file_path' column needs to be a Text.")
90
+ QApplication.quit()
87
91
  return
88
92
 
89
93
  self.progressBarInit()
90
- self.processed_statuses = [] # Reset status list for a new run
91
-
92
- # Connect the internal status update signal to a new handler
94
+ self.processed_statuses = []
93
95
  self.status_update_signal.connect(self.handle_status_update)
94
-
95
- # Pass the status update signal's emit method to the thread
96
96
  self.thread = Thread(self._process_pdfs, self.data, status_callback=self.status_update_signal.emit)
97
97
  self.thread.progress.connect(self.handle_progress)
98
98
  self.thread.result.connect(self.handle_result)
@@ -102,10 +102,8 @@ class OWPdfType(widget.OWWidget):
102
102
  def _process_pdfs(self, in_data: Table, progress_callback: callable, status_callback: callable) -> tuple[
103
103
  Table | None, Table | None]:
104
104
 
105
- # Extraction des chemins de fichiers avant de commencer le traitement
106
105
  paths = [str(x) for x in in_data.get_column("file_path")]
107
106
 
108
- # --- Gérer le nom du fichier Excel avec incrémentation ---
109
107
  excel_output_dir = Path.cwd() / "pdf_check_results"
110
108
  if paths:
111
109
  first_file_path = Path(paths[0])
@@ -120,7 +118,6 @@ class OWPdfType(widget.OWWidget):
120
118
  excel_path = excel_output_dir / f"{base_name}_{counter}.xlsx"
121
119
  counter += 1
122
120
 
123
- # --- Initialiser le classeur Excel ---
124
121
  wb = Workbook()
125
122
  ws = wb.active
126
123
  ws.title = "PDF Check Results"
@@ -133,9 +130,8 @@ class OWPdfType(widget.OWWidget):
133
130
  total_files = len(paths)
134
131
  for i, p in enumerate(paths):
135
132
  progress_callback(i / total_files * 100)
136
-
137
133
  fp = Path(p)
138
- result_row = [p, "", ""] # Initialisation de la ligne de résultat
134
+ result_row = [p, "", ""]
139
135
 
140
136
  if not fp.exists() or fp.suffix.lower() != ".pdf":
141
137
  result_row[1] = "ko"
@@ -146,64 +142,56 @@ class OWPdfType(widget.OWWidget):
146
142
  continue
147
143
 
148
144
  try:
149
- is_text = utils_md.is_pdf_text_based(fp)
150
- if is_text:
151
- text_indices.append(i)
152
- result_row[1] = "ok"
153
- result_row[2] = "Text-based PDF"
145
+ # Vérification de l'attribut is_encrypted de PyPDF avant de traiter le fichier.
146
+ reader = pypdf.PdfReader(fp)
147
+ if reader.is_encrypted:
148
+ result_row[1] = "ko"
149
+ result_row[2] = "Error: PDF is password protected."
154
150
  else:
155
- image_indices.append(i)
156
- result_row[1] = "ok"
157
- result_row[2] = "Image-based PDF"
158
-
159
- status_callback(result_row)
160
- ws.append(result_row)
161
- wb.save(excel_path)
151
+ is_text = utils_md.is_pdf_text_based(fp)
152
+ if is_text:
153
+ text_indices.append(i)
154
+ result_row[1] = "ok"
155
+ result_row[2] = "Text-based PDF"
156
+ else:
157
+ image_indices.append(i)
158
+ result_row[1] = "ok"
159
+ result_row[2] = "Image-based PDF"
162
160
  except Exception as e:
163
161
  result_row[1] = "ko"
164
162
  result_row[2] = f"Error: {str(e)}"
163
+ finally:
165
164
  status_callback(result_row)
166
165
  ws.append(result_row)
167
166
  wb.save(excel_path)
168
167
 
169
168
  progress_callback(100)
170
169
 
171
- # Create table for text PDFs
172
170
  if not text_indices:
173
171
  text_table = None
174
172
  else:
175
173
  text_table = in_data[text_indices]
176
174
 
177
- # Create table for image PDFs
178
175
  if not image_indices:
179
176
  image_table = None
180
177
  else:
181
178
  image_table = in_data[image_indices]
182
179
 
183
- # The final result is still returned here
184
180
  return text_table, image_table
185
181
 
186
182
  def handle_progress(self, value: float) -> None:
187
183
  self.progressBarSet(value)
188
184
 
189
185
  def handle_status_update(self, new_status: list):
190
- """
191
- Receives a single status update from the thread, appends it to the list,
192
- and sends a new, updated status table.
193
- """
194
186
  self.processed_statuses.append(new_status)
195
-
196
- # Correct Domain creation: move "file_path" to metas
197
187
  status_domain = Domain(
198
- [], # The variables list should be empty
188
+ [],
199
189
  metas=[
200
190
  StringVariable("file_path"),
201
191
  DiscreteVariable("status", values=["ok", "ko"]),
202
192
  StringVariable("details")
203
193
  ]
204
194
  )
205
-
206
- # Now, the data is correctly structured for the new domain
207
195
  status_table = Table.from_list(status_domain, self.processed_statuses)
208
196
  self.Outputs.status_data.send(status_table)
209
197
 
@@ -3,7 +3,7 @@ import boto3
3
3
  from Orange.widgets.widget import OWWidget, Input, Output
4
4
  from Orange.widgets.settings import Setting
5
5
  from Orange.widgets import gui
6
- from AnyQt.QtWidgets import QLineEdit, QFileDialog
6
+ from AnyQt.QtWidgets import QLineEdit, QFileDialog, QApplication
7
7
  from Orange.data import Table
8
8
 
9
9
 
@@ -15,7 +15,6 @@ class OWS3FileDownloader(OWWidget):
15
15
  icon = "icons_dev/upload.png"
16
16
  priority = 20
17
17
  category = "AAIT - API"
18
-
19
18
  # Paramètres utilisateur
20
19
  access_key = Setting("")
21
20
  secret_key = Setting("")
@@ -92,15 +91,26 @@ class OWS3FileDownloader(OWWidget):
92
91
  s3 = session.client("s3")
93
92
  files = os.listdir(self.download_path) # Liste tout (fichiers + dossiers)
94
93
  files_only = [f for f in files if os.path.isfile(os.path.join(self.download_path, f))]
95
- for file in files_only:
96
- s3.upload_file(self.download_path + "/" + file, self.bucket_name, file)
94
+ print("files_only ::: ", files_only)
95
+ #for file in files_only:
96
+ # s3.upload_file(self.download_path + "/" + file, self.bucket_name, file)
97
97
  self.information("Upload terminé !")
98
98
  self.Outputs.data.send(self.data)
99
99
 
100
100
 
101
-
102
101
  except Exception as e:
103
102
  print(e)
104
103
  self.error(str(e))
105
104
 
106
105
 
106
+ if __name__ == "__main__":
107
+ import sys
108
+
109
+ app = QApplication(sys.argv)
110
+ window = OWS3FileDownloader()
111
+ window.show()
112
+
113
+ if hasattr(app, "exec"):
114
+ sys.exit(app.exec())
115
+ else:
116
+ sys.exit(app.exec_())
@@ -3,7 +3,7 @@ import boto3
3
3
  from Orange.widgets.widget import OWWidget, Input
4
4
  from Orange.widgets.settings import Setting
5
5
  from Orange.widgets import gui
6
- from AnyQt.QtWidgets import QLineEdit, QFileDialog
6
+ from AnyQt.QtWidgets import QLineEdit, QFileDialog, QApplication
7
7
  from Orange.data import Table
8
8
 
9
9
  class OWS3FileDownloader(OWWidget):
@@ -93,3 +93,14 @@ class OWS3FileDownloader(OWWidget):
93
93
  self.error(str(e))
94
94
 
95
95
 
96
+ if __name__ == "__main__":
97
+ import sys
98
+
99
+ app = QApplication(sys.argv)
100
+ window = OWS3FileDownloader()
101
+ window.show()
102
+
103
+ if hasattr(app, "exec"):
104
+ sys.exit(app.exec())
105
+ else:
106
+ sys.exit(app.exec_())
@@ -16,7 +16,7 @@
16
16
  <widget class="QGroupBox" name="groupBox">
17
17
  <property name="geometry">
18
18
  <rect>
19
- <x>20</x>
19
+ <x>30</x>
20
20
  <y>40</y>
21
21
  <width>661</width>
22
22
  <height>361</height>
@@ -28,9 +28,9 @@
28
28
  <widget class="QGroupBox" name="groupBox_2">
29
29
  <property name="geometry">
30
30
  <rect>
31
- <x>150</x>
31
+ <x>120</x>
32
32
  <y>88</y>
33
- <width>111</width>
33
+ <width>101</width>
34
34
  <height>251</height>
35
35
  </rect>
36
36
  </property>
@@ -41,9 +41,9 @@
41
41
  <widget class="QGroupBox" name="groupBox_3">
42
42
  <property name="geometry">
43
43
  <rect>
44
- <x>260</x>
44
+ <x>220</x>
45
45
  <y>88</y>
46
- <width>121</width>
46
+ <width>111</width>
47
47
  <height>251</height>
48
48
  </rect>
49
49
  </property>
@@ -54,35 +54,22 @@
54
54
  <widget class="QGroupBox" name="groupBox_4">
55
55
  <property name="geometry">
56
56
  <rect>
57
- <x>380</x>
57
+ <x>330</x>
58
58
  <y>88</y>
59
- <width>120</width>
59
+ <width>111</width>
60
60
  <height>251</height>
61
61
  </rect>
62
62
  </property>
63
63
  <property name="title">
64
64
  <string/>
65
65
  </property>
66
- <widget class="QLabel" name="label_5">
67
- <property name="geometry">
68
- <rect>
69
- <x>40</x>
70
- <y>30</y>
71
- <width>35</width>
72
- <height>10</height>
73
- </rect>
74
- </property>
75
- <property name="text">
76
- <string>here</string>
77
- </property>
78
- </widget>
79
66
  </widget>
80
67
  <widget class="QGroupBox" name="groupBox_5">
81
68
  <property name="geometry">
82
69
  <rect>
83
- <x>150</x>
70
+ <x>20</x>
84
71
  <y>88</y>
85
- <width>351</width>
72
+ <width>621</width>
86
73
  <height>21</height>
87
74
  </rect>
88
75
  </property>
@@ -92,38 +79,51 @@
92
79
  <widget class="QLabel" name="label_3">
93
80
  <property name="geometry">
94
81
  <rect>
95
- <x>40</x>
82
+ <x>130</x>
96
83
  <y>3</y>
97
84
  <width>61</width>
98
85
  <height>16</height>
99
86
  </rect>
100
87
  </property>
101
88
  <property name="text">
102
- <string>content</string>
89
+ <string>Sender</string>
103
90
  </property>
104
91
  </widget>
105
92
  <widget class="QLabel" name="label_4">
106
93
  <property name="geometry">
107
94
  <rect>
108
- <x>140</x>
95
+ <x>230</x>
109
96
  <y>3</y>
110
97
  <width>71</width>
111
98
  <height>16</height>
112
99
  </rect>
113
100
  </property>
114
101
  <property name="text">
115
- <string>metadata</string>
102
+ <string>Receiver</string>
116
103
  </property>
117
104
  </widget>
118
105
  <widget class="QLabel" name="label_6">
119
106
  <property name="geometry">
120
107
  <rect>
121
- <x>270</x>
108
+ <x>350</x>
122
109
  <y>3</y>
123
110
  <width>71</width>
124
111
  <height>16</height>
125
112
  </rect>
126
113
  </property>
114
+ <property name="text">
115
+ <string>Copy</string>
116
+ </property>
117
+ </widget>
118
+ <widget class="QLabel" name="label_5">
119
+ <property name="geometry">
120
+ <rect>
121
+ <x>30</x>
122
+ <y>4</y>
123
+ <width>61</width>
124
+ <height>16</height>
125
+ </rect>
126
+ </property>
127
127
  <property name="text">
128
128
  <string>Mail path</string>
129
129
  </property>
@@ -132,14 +132,79 @@
132
132
  <widget class="QLabel" name="label">
133
133
  <property name="geometry">
134
134
  <rect>
135
- <x>260</x>
136
- <y>38</y>
137
- <width>261</width>
135
+ <x>130</x>
136
+ <y>40</y>
137
+ <width>411</width>
138
138
  <height>31</height>
139
139
  </rect>
140
140
  </property>
141
141
  <property name="text">
142
- <string>Read &quot;Mail path&quot; column</string>
142
+ <string>Read &quot;Mail path&quot; column, need [&quot;Sender&quot;, &quot;Receiver&quot;, &quot;Copy&quot;, &quot;Priority&quot;, &quot;Title&quot;, &quot;Answer&quot;]</string>
143
+ </property>
144
+ </widget>
145
+ <widget class="QGroupBox" name="groupBox_7">
146
+ <property name="geometry">
147
+ <rect>
148
+ <x>440</x>
149
+ <y>88</y>
150
+ <width>101</width>
151
+ <height>251</height>
152
+ </rect>
153
+ </property>
154
+ <property name="title">
155
+ <string/>
156
+ </property>
157
+ <widget class="QLabel" name="label_7">
158
+ <property name="geometry">
159
+ <rect>
160
+ <x>30</x>
161
+ <y>3</y>
162
+ <width>61</width>
163
+ <height>16</height>
164
+ </rect>
165
+ </property>
166
+ <property name="text">
167
+ <string>Priority</string>
168
+ </property>
169
+ </widget>
170
+ </widget>
171
+ <widget class="QGroupBox" name="groupBox_6">
172
+ <property name="geometry">
173
+ <rect>
174
+ <x>540</x>
175
+ <y>88</y>
176
+ <width>101</width>
177
+ <height>251</height>
178
+ </rect>
179
+ </property>
180
+ <property name="title">
181
+ <string/>
182
+ </property>
183
+ <widget class="QLabel" name="label_8">
184
+ <property name="geometry">
185
+ <rect>
186
+ <x>36</x>
187
+ <y>3</y>
188
+ <width>71</width>
189
+ <height>16</height>
190
+ </rect>
191
+ </property>
192
+ <property name="text">
193
+ <string>Answer</string>
194
+ </property>
195
+ </widget>
196
+ </widget>
197
+ <widget class="QGroupBox" name="groupBox_8">
198
+ <property name="geometry">
199
+ <rect>
200
+ <x>20</x>
201
+ <y>88</y>
202
+ <width>101</width>
203
+ <height>251</height>
204
+ </rect>
205
+ </property>
206
+ <property name="title">
207
+ <string/>
143
208
  </property>
144
209
  </widget>
145
210
  </widget>
@@ -31,7 +31,7 @@
31
31
  <x>10</x>
32
32
  <y>20</y>
33
33
  <width>431</width>
34
- <height>151</height>
34
+ <height>121</height>
35
35
  </rect>
36
36
  </property>
37
37
  <property name="text">
@@ -47,6 +47,44 @@
47
47
  <bool>true</bool>
48
48
  </property>
49
49
  </widget>
50
+ <widget class="QSpinBox" name="spinBox_timeout">
51
+ <property name="enabled">
52
+ <bool>false</bool>
53
+ </property>
54
+ <property name="geometry">
55
+ <rect>
56
+ <x>160</x>
57
+ <y>150</y>
58
+ <width>81</width>
59
+ <height>22</height>
60
+ </rect>
61
+ </property>
62
+ <property name="minimum">
63
+ <number>1</number>
64
+ </property>
65
+ <property name="maximum">
66
+ <number>240</number>
67
+ </property>
68
+ <property name="value">
69
+ <number>60</number>
70
+ </property>
71
+ </widget>
72
+ <widget class="QCheckBox" name="checkBox_timeout">
73
+ <property name="geometry">
74
+ <rect>
75
+ <x>10</x>
76
+ <y>150</y>
77
+ <width>141</width>
78
+ <height>20</height>
79
+ </rect>
80
+ </property>
81
+ <property name="text">
82
+ <string>Enable Timeout (s):</string>
83
+ </property>
84
+ <property name="checked">
85
+ <bool>true</bool>
86
+ </property>
87
+ </widget>
50
88
  </widget>
51
89
  <widget class="QCheckBox" name="checkBox_send">
52
90
  <property name="enabled">
@@ -83,4 +121,4 @@
83
121
  </widget>
84
122
  <resources/>
85
123
  <connections/>
86
- </ui>
124
+ </ui>
@@ -2,9 +2,12 @@ from setuptools import setup, find_packages
2
2
 
3
3
  # Configuration
4
4
  NAME = "io4it"
5
- VERSION = "2.1.3"
5
+ VERSION = "2.1.5"
6
6
 
7
7
  INSTALL_REQUIRES = [
8
+ "torchvision==2.23.0",
9
+ "torchaudio==2.8.0",
10
+ "torch==2.8.0",
8
11
  "pylatexenc",
9
12
  "docopt",
10
13
  "boto3",
@@ -14,7 +17,7 @@ INSTALL_REQUIRES = [
14
17
  "speechbrain",
15
18
  "whisper",
16
19
  "whisper-openai",
17
- "pyannote.audio",
20
+ "pyannote.audio==3.4.0",
18
21
  "pyannote-core",
19
22
  "pypandoc",
20
23
  "pypandoc-binary",
@@ -25,7 +28,8 @@ INSTALL_REQUIRES = [
25
28
  "doc2docx",
26
29
  "msal",
27
30
  "exchangelib",
28
- "CATEGORIT"
31
+ "CATEGORIT",
32
+ "comtypes"
29
33
  ]
30
34
 
31
35
  AUTHOR = ""
File without changes
File without changes
File without changes