io4it 3.0.2.1__tar.gz → 3.0.3__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (93) hide show
  1. {io4it-3.0.2.1 → io4it-3.0.3}/PKG-INFO +1 -1
  2. {io4it-3.0.2.1 → io4it-3.0.3}/io4it.egg-info/PKG-INFO +1 -1
  3. {io4it-3.0.2.1 → io4it-3.0.3}/io4it.egg-info/SOURCES.txt +1 -0
  4. {io4it-3.0.2.1 → io4it-3.0.3}/orangecontrib/IO4IT/utils/utils_md.py +9 -4
  5. {io4it-3.0.2.1 → io4it-3.0.3}/orangecontrib/IO4IT/widgets/OWExportMarkdown.py +24 -24
  6. {io4it-3.0.2.1 → io4it-3.0.3}/orangecontrib/IO4IT/widgets/OWPdfType.py +68 -31
  7. io4it-3.0.3/orangecontrib/IO4IT/widgets/designer/owpdftype.ui +131 -0
  8. {io4it-3.0.2.1 → io4it-3.0.3}/setup.py +1 -1
  9. io4it-3.0.2.1/orangecontrib/IO4IT/widgets/designer/owpdftype.ui +0 -86
  10. {io4it-3.0.2.1 → io4it-3.0.3}/io4it.egg-info/dependency_links.txt +0 -0
  11. {io4it-3.0.2.1 → io4it-3.0.3}/io4it.egg-info/entry_points.txt +0 -0
  12. {io4it-3.0.2.1 → io4it-3.0.3}/io4it.egg-info/namespace_packages.txt +0 -0
  13. {io4it-3.0.2.1 → io4it-3.0.3}/io4it.egg-info/requires.txt +0 -0
  14. {io4it-3.0.2.1 → io4it-3.0.3}/io4it.egg-info/top_level.txt +0 -0
  15. {io4it-3.0.2.1 → io4it-3.0.3}/orangecontrib/IO4IT/__init__.py +0 -0
  16. {io4it-3.0.2.1 → io4it-3.0.3}/orangecontrib/IO4IT/ocr_function/__init__.py +0 -0
  17. {io4it-3.0.2.1 → io4it-3.0.3}/orangecontrib/IO4IT/ocr_function/word_converter.py +0 -0
  18. {io4it-3.0.2.1 → io4it-3.0.3}/orangecontrib/IO4IT/utils/__init__.py +0 -0
  19. {io4it-3.0.2.1 → io4it-3.0.3}/orangecontrib/IO4IT/utils/config.json +0 -0
  20. {io4it-3.0.2.1 → io4it-3.0.3}/orangecontrib/IO4IT/utils/keys_manager.py +0 -0
  21. {io4it-3.0.2.1 → io4it-3.0.3}/orangecontrib/IO4IT/utils/mail.py +0 -0
  22. {io4it-3.0.2.1 → io4it-3.0.3}/orangecontrib/IO4IT/utils/pool_exec_utils.py +0 -0
  23. {io4it-3.0.2.1 → io4it-3.0.3}/orangecontrib/IO4IT/utils/secret_manager.py +0 -0
  24. {io4it-3.0.2.1 → io4it-3.0.3}/orangecontrib/IO4IT/widgets/OWChatGpt.py +0 -0
  25. {io4it-3.0.2.1 → io4it-3.0.3}/orangecontrib/IO4IT/widgets/OWDeep_Search.py +0 -0
  26. {io4it-3.0.2.1 → io4it-3.0.3}/orangecontrib/IO4IT/widgets/OWDoclingASR.py +0 -0
  27. {io4it-3.0.2.1 → io4it-3.0.3}/orangecontrib/IO4IT/widgets/OWDoclingToMarkdown.py +0 -0
  28. {io4it-3.0.2.1 → io4it-3.0.3}/orangecontrib/IO4IT/widgets/OWExtractTablesDocxToXlsx.py +0 -0
  29. {io4it-3.0.2.1 → io4it-3.0.3}/orangecontrib/IO4IT/widgets/OWInboxMailMonitoring.py +0 -0
  30. {io4it-3.0.2.1 → io4it-3.0.3}/orangecontrib/IO4IT/widgets/OWMD2HTML.py +0 -0
  31. {io4it-3.0.2.1 → io4it-3.0.3}/orangecontrib/IO4IT/widgets/OWMarkdownLoader.py +0 -0
  32. {io4it-3.0.2.1 → io4it-3.0.3}/orangecontrib/IO4IT/widgets/OWMarkdownizer.py +0 -0
  33. {io4it-3.0.2.1 → io4it-3.0.3}/orangecontrib/IO4IT/widgets/OWOfficeNormalizer.py +0 -0
  34. {io4it-3.0.2.1 → io4it-3.0.3}/orangecontrib/IO4IT/widgets/OWParserHTML.py +0 -0
  35. {io4it-3.0.2.1 → io4it-3.0.3}/orangecontrib/IO4IT/widgets/OWProcessPoolExecutor.py +0 -0
  36. {io4it-3.0.2.1 → io4it-3.0.3}/orangecontrib/IO4IT/widgets/OWS3Uploader.py +0 -0
  37. {io4it-3.0.2.1 → io4it-3.0.3}/orangecontrib/IO4IT/widgets/OWS3downloader.py +0 -0
  38. {io4it-3.0.2.1 → io4it-3.0.3}/orangecontrib/IO4IT/widgets/OWS3list.py +0 -0
  39. {io4it-3.0.2.1 → io4it-3.0.3}/orangecontrib/IO4IT/widgets/OWSpeechToText.py +0 -0
  40. {io4it-3.0.2.1 → io4it-3.0.3}/orangecontrib/IO4IT/widgets/OWWebSearch.py +0 -0
  41. {io4it-3.0.2.1 → io4it-3.0.3}/orangecontrib/IO4IT/widgets/OWmailLoader.py +0 -0
  42. {io4it-3.0.2.1 → io4it-3.0.3}/orangecontrib/IO4IT/widgets/OWmailSender.py +0 -0
  43. {io4it-3.0.2.1 → io4it-3.0.3}/orangecontrib/IO4IT/widgets/OWwordpdf2docx.py +0 -0
  44. {io4it-3.0.2.1 → io4it-3.0.3}/orangecontrib/IO4IT/widgets/__init__.py +0 -0
  45. {io4it-3.0.2.1 → io4it-3.0.3}/orangecontrib/IO4IT/widgets/designer/__init__.py +0 -0
  46. {io4it-3.0.2.1 → io4it-3.0.3}/orangecontrib/IO4IT/widgets/designer/nogui.ui +0 -0
  47. {io4it-3.0.2.1 → io4it-3.0.3}/orangecontrib/IO4IT/widgets/designer/ow_file_ext_selector.ui +0 -0
  48. {io4it-3.0.2.1 → io4it-3.0.3}/orangecontrib/IO4IT/widgets/designer/owchatgpt.ui +0 -0
  49. {io4it-3.0.2.1 → io4it-3.0.3}/orangecontrib/IO4IT/widgets/designer/owdeepsearch.ui +0 -0
  50. {io4it-3.0.2.1 → io4it-3.0.3}/orangecontrib/IO4IT/widgets/designer/owdoclingasr.ui +0 -0
  51. {io4it-3.0.2.1 → io4it-3.0.3}/orangecontrib/IO4IT/widgets/designer/owdoclingtomarkdown.ui +0 -0
  52. {io4it-3.0.2.1 → io4it-3.0.3}/orangecontrib/IO4IT/widgets/designer/owdocxtoxlsx.ui +0 -0
  53. {io4it-3.0.2.1 → io4it-3.0.3}/orangecontrib/IO4IT/widgets/designer/owexportmarkdown.ui +0 -0
  54. {io4it-3.0.2.1 → io4it-3.0.3}/orangecontrib/IO4IT/widgets/designer/owinboxmailmonitoring.ui +0 -0
  55. {io4it-3.0.2.1 → io4it-3.0.3}/orangecontrib/IO4IT/widgets/designer/owmailloader.ui +0 -0
  56. {io4it-3.0.2.1 → io4it-3.0.3}/orangecontrib/IO4IT/widgets/designer/owmailsender.ui +0 -0
  57. {io4it-3.0.2.1 → io4it-3.0.3}/orangecontrib/IO4IT/widgets/designer/owmarkdownizer.ui +0 -0
  58. {io4it-3.0.2.1 → io4it-3.0.3}/orangecontrib/IO4IT/widgets/designer/owmarkdownloader.ui +0 -0
  59. {io4it-3.0.2.1 → io4it-3.0.3}/orangecontrib/IO4IT/widgets/designer/owmd2html.ui +0 -0
  60. {io4it-3.0.2.1 → io4it-3.0.3}/orangecontrib/IO4IT/widgets/designer/owofficenormalizer.ui +0 -0
  61. {io4it-3.0.2.1 → io4it-3.0.3}/orangecontrib/IO4IT/widgets/designer/owparserhtml.ui +0 -0
  62. {io4it-3.0.2.1 → io4it-3.0.3}/orangecontrib/IO4IT/widgets/designer/owprocesspoolexecutor.ui +0 -0
  63. {io4it-3.0.2.1 → io4it-3.0.3}/orangecontrib/IO4IT/widgets/designer/owspeechtotext.ui +0 -0
  64. {io4it-3.0.2.1 → io4it-3.0.3}/orangecontrib/IO4IT/widgets/designer/owvisualizationer.ui +0 -0
  65. {io4it-3.0.2.1 → io4it-3.0.3}/orangecontrib/IO4IT/widgets/designer/owwebsearch.ui +0 -0
  66. {io4it-3.0.2.1 → io4it-3.0.3}/orangecontrib/IO4IT/widgets/designer/wordpdf2docx.ui +0 -0
  67. {io4it-3.0.2.1 → io4it-3.0.3}/orangecontrib/IO4IT/widgets/icons/__init__.py +0 -0
  68. {io4it-3.0.2.1 → io4it-3.0.3}/orangecontrib/IO4IT/widgets/icons/chatgpt.png +0 -0
  69. {io4it-3.0.2.1 → io4it-3.0.3}/orangecontrib/IO4IT/widgets/icons/check_pdf.png +0 -0
  70. {io4it-3.0.2.1 → io4it-3.0.3}/orangecontrib/IO4IT/widgets/icons/deepsearch.svg +0 -0
  71. {io4it-3.0.2.1 → io4it-3.0.3}/orangecontrib/IO4IT/widgets/icons/dep_md_old.png +0 -0
  72. {io4it-3.0.2.1 → io4it-3.0.3}/orangecontrib/IO4IT/widgets/icons/download.png +0 -0
  73. {io4it-3.0.2.1 → io4it-3.0.3}/orangecontrib/IO4IT/widgets/icons/export_md.png +0 -0
  74. {io4it-3.0.2.1 → io4it-3.0.3}/orangecontrib/IO4IT/widgets/icons/extract_table.png +0 -0
  75. {io4it-3.0.2.1 → io4it-3.0.3}/orangecontrib/IO4IT/widgets/icons/file_extensor.png +0 -0
  76. {io4it-3.0.2.1 → io4it-3.0.3}/orangecontrib/IO4IT/widgets/icons/html.png +0 -0
  77. {io4it-3.0.2.1 → io4it-3.0.3}/orangecontrib/IO4IT/widgets/icons/list_aws.png +0 -0
  78. {io4it-3.0.2.1 → io4it-3.0.3}/orangecontrib/IO4IT/widgets/icons/load_md.png +0 -0
  79. {io4it-3.0.2.1 → io4it-3.0.3}/orangecontrib/IO4IT/widgets/icons/mail_loader.png +0 -0
  80. {io4it-3.0.2.1 → io4it-3.0.3}/orangecontrib/IO4IT/widgets/icons/mail_writer.png +0 -0
  81. {io4it-3.0.2.1 → io4it-3.0.3}/orangecontrib/IO4IT/widgets/icons/md.png +0 -0
  82. {io4it-3.0.2.1 → io4it-3.0.3}/orangecontrib/IO4IT/widgets/icons/monitor-email.svg +0 -0
  83. {io4it-3.0.2.1 → io4it-3.0.3}/orangecontrib/IO4IT/widgets/icons/office_normalizer.png +0 -0
  84. {io4it-3.0.2.1 → io4it-3.0.3}/orangecontrib/IO4IT/widgets/icons/owmd2html.svg +0 -0
  85. {io4it-3.0.2.1 → io4it-3.0.3}/orangecontrib/IO4IT/widgets/icons/process_pool_executor.png +0 -0
  86. {io4it-3.0.2.1 → io4it-3.0.3}/orangecontrib/IO4IT/widgets/icons/speech_to_text.png +0 -0
  87. {io4it-3.0.2.1 → io4it-3.0.3}/orangecontrib/IO4IT/widgets/icons/upload.png +0 -0
  88. {io4it-3.0.2.1 → io4it-3.0.3}/orangecontrib/IO4IT/widgets/icons/visualizationer.png +0 -0
  89. {io4it-3.0.2.1 → io4it-3.0.3}/orangecontrib/IO4IT/widgets/icons/websearch.png +0 -0
  90. {io4it-3.0.2.1 → io4it-3.0.3}/orangecontrib/IO4IT/widgets/icons/wordpdf2docx.png +0 -0
  91. {io4it-3.0.2.1 → io4it-3.0.3}/orangecontrib/IO4IT/widgets/icons_dev/__init__.py +0 -0
  92. {io4it-3.0.2.1 → io4it-3.0.3}/orangecontrib/__init__.py +0 -0
  93. {io4it-3.0.2.1 → io4it-3.0.3}/setup.cfg +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: io4it
3
- Version: 3.0.2.1
3
+ Version: 3.0.3
4
4
  Home-page:
5
5
  Author:
6
6
  Author-email:
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: io4it
3
- Version: 3.0.2.1
3
+ Version: 3.0.3
4
4
  Home-page:
5
5
  Author:
6
6
  Author-email:
@@ -1,3 +1,4 @@
1
+ setup.cfg
1
2
  setup.py
2
3
  io4it.egg-info/PKG-INFO
3
4
  io4it.egg-info/SOURCES.txt
@@ -1,6 +1,7 @@
1
1
  import re, hashlib, secrets, urllib.parse, logging, numpy as np
2
2
  from pathlib import Path
3
3
  import fitz # PyMuPDF
4
+ import win32com.client
4
5
 
5
6
  try:
6
7
  import easyocr
@@ -88,7 +89,6 @@ def _make_powerpoint_invisible(ppt):
88
89
  pass
89
90
 
90
91
  def convert_doc_to_docx(src: Path, out_dir: Path) -> Path:
91
- import win32com.client
92
92
  out_dir.mkdir(parents=True, exist_ok=True)
93
93
  if src.suffix.lower() != ".doc":
94
94
  return src if src.suffix.lower() == ".docx" else src
@@ -108,7 +108,7 @@ def convert_doc_to_docx(src: Path, out_dir: Path) -> Path:
108
108
  word.Quit()
109
109
 
110
110
  def convert_ppt_to_pptx(src: Path, out_dir: Path) -> Path:
111
- import win32com.client
111
+
112
112
  out_dir.mkdir(parents=True, exist_ok=True)
113
113
  if src.suffix.lower() != ".ppt":
114
114
  return src if src.suffix.lower() == ".pptx" else src
@@ -128,7 +128,6 @@ def convert_ppt_to_pptx(src: Path, out_dir: Path) -> Path:
128
128
  ppt.Quit()
129
129
 
130
130
  def docx_to_pdf(src_docx: Path, out_dir: Path) -> Path:
131
- import win32com.client
132
131
  out_dir.mkdir(parents=True, exist_ok=True)
133
132
  pdf_path = out_dir / (src_docx.stem + ".pdf")
134
133
  if pdf_path.exists() and pdf_path.stat().st_size > 0:
@@ -149,7 +148,6 @@ def docx_to_pdf(src_docx: Path, out_dir: Path) -> Path:
149
148
  word.Quit()
150
149
 
151
150
  def pptx_to_pdf(src_pptx: Path, out_dir: Path) -> Path:
152
- import win32com.client
153
151
  out_dir.mkdir(parents=True, exist_ok=True)
154
152
  pdf_path = out_dir / (src_pptx.stem + ".pdf")
155
153
  if pdf_path.exists() and pdf_path.stat().st_size > 0:
@@ -165,3 +163,10 @@ def pptx_to_pdf(src_pptx: Path, out_dir: Path) -> Path:
165
163
  return pdf_path
166
164
  finally:
167
165
  ppt.Quit()
166
+
167
+ def is_word_installed():
168
+ try:
169
+ win32com.client.Dispatch("Word.Application")
170
+ return True
171
+ except Exception:
172
+ return False
@@ -19,8 +19,10 @@ from docx2pdf import convert
19
19
  # Chargement UI
20
20
  if "site-packages/Orange/widgets" in os.path.dirname(os.path.abspath(__file__)).replace("\\", "/"):
21
21
  from Orange.widgets.orangecontrib.AAIT.utils.import_uic import uic
22
+ from Orange.widgets.orangecontrib.IO4IT.utils import utils_md
22
23
  else:
23
24
  from orangecontrib.AAIT.utils.import_uic import uic
25
+ from orangecontrib.IO4IT.utils import utils_md
24
26
 
25
27
 
26
28
  class OWExportMarkdown(widget.OWWidget):
@@ -164,32 +166,30 @@ class OWExportMarkdown(widget.OWWidget):
164
166
  tmp_md = tmp.name
165
167
 
166
168
  try:
167
- # DOCX
168
- pypandoc.convert_file(tmp_md, to="docx", format="gfm-yaml_metadata_block", outputfile=docx_out)
169
- self.ajouter_en_tete_pied_docx(
170
- docx_out,
171
- "Rapport - Orange AI",
172
- "Page générée automatiquement - Ne pas diffuser"
173
- )
174
-
175
- # PPTX
176
- pypandoc.convert_file(tmp_md, to="pptx", format="gfm-yaml_metadata_block", outputfile=pptx_out)
177
- self.ajouter_entete_pied_pptx(
178
- pptx_out,
179
- "Orange AI – Présentation",
180
- "Page générée automatiquement"
181
- )
182
-
183
- # PDF (docx -> pdf) avec chemin de sortie exact
169
+ if utils_md.is_word_installed():
170
+ pypandoc.convert_file(tmp_md, to="docx", format="gfm-yaml_metadata_block", outputfile=docx_out)
171
+ self.ajouter_en_tete_pied_docx(
172
+ docx_out,
173
+ "Rapport - Orange AI",
174
+ "Page générée automatiquement - Ne pas diffuser"
175
+ )
176
+
177
+ # PPTX
178
+ pypandoc.convert_file(tmp_md, to="pptx", format="gfm-yaml_metadata_block", outputfile=pptx_out)
179
+ self.ajouter_entete_pied_pptx(
180
+ pptx_out,
181
+ "Orange AI – Présentation",
182
+ "Page générée automatiquement"
183
+ )
184
+ else:
185
+ raise Exception("Word non détecté")
186
+
187
+ except Exception:
184
188
  try:
185
- convert(docx_out, pdf_out)
189
+ pypandoc.convert_file(tmp_md, to="pdf", outputfile=pdf_out)
186
190
  except Exception:
187
- # fallback pandoc->pdf (si LaTeX dispo)
188
- try:
189
- pypandoc.convert_file(tmp_md, to="pdf", outputfile=pdf_out)
190
- except Exception:
191
- self.error(f"Échec conversion PDF pour la ligne {i+1}.")
192
- pdf_out = ""
191
+ self.error(f"Échec conversion PDF pour la ligne {i + 1}.")
192
+ pdf_out = ""
193
193
  finally:
194
194
  try:
195
195
  os.remove(tmp_md)
@@ -1,12 +1,13 @@
1
1
  import os
2
2
  import sys
3
3
  from pathlib import Path
4
-
5
- from AnyQt.QtWidgets import QApplication
6
4
  from AnyQt.QtCore import pyqtSignal
7
5
  from Orange.data import Domain, StringVariable, Table, DiscreteVariable
8
- from Orange.widgets import widget
6
+
7
+ import fitz # PyMuPDF
8
+ from AnyQt.QtWidgets import QApplication, QCheckBox, QPushButton
9
9
  from Orange.widgets.utils.signals import Input, Output
10
+ from Orange.widgets.settings import Setting
10
11
 
11
12
  # --- Ajout pour l'écriture Excel ---
12
13
  from openpyxl import Workbook
@@ -15,14 +16,14 @@ from openpyxl import Workbook
15
16
  if "site-packages/Orange/widgets" in os.path.dirname(os.path.abspath(__file__)).replace("\\", "/"):
16
17
  from Orange.widgets.orangecontrib.IO4IT.utils import utils_md
17
18
  from Orange.widgets.orangecontrib.AAIT.utils.thread_management import Thread
18
- from Orange.widgets.orangecontrib.AAIT.utils.import_uic import uic
19
+ from Orange.widgets.orangecontrib.AAIT.utils import base_widget
19
20
  else:
20
21
  from orangecontrib.IO4IT.utils import utils_md
21
22
  from orangecontrib.AAIT.utils.thread_management import Thread
22
- from orangecontrib.AAIT.utils.import_uic import uic
23
+ from orangecontrib.AAIT.utils import base_widget
23
24
 
24
25
 
25
- class OWPdfType(widget.OWWidget):
26
+ class OWPdfType(base_widget.BaseListWidget):
26
27
  name = "PDF Type"
27
28
  description = "Checks if a PDF is text-based or image-based"
28
29
  category = "AAIT - TOOLBOX"
@@ -32,6 +33,10 @@ class OWPdfType(widget.OWWidget):
32
33
  gui = os.path.join(os.path.dirname(os.path.abspath(__file__)), "designer/owpdftype.ui")
33
34
  want_control_area = False
34
35
  priority = 1002
36
+ recursive = Setting("False")
37
+
38
+ # Settings
39
+ selected_column_name = Setting("content")
35
40
 
36
41
  MAX_PAGES_TO_CHECK = 5
37
42
 
@@ -49,8 +54,8 @@ class OWPdfType(widget.OWWidget):
49
54
  def __init__(self):
50
55
  super().__init__()
51
56
  self.setFixedWidth(470)
52
- self.setFixedHeight(300)
53
- uic.loadUi(self.gui, self)
57
+ self.setFixedHeight(470)
58
+ #uic.loadUi(self.gui, self)
54
59
 
55
60
  self.data = None
56
61
  self.thread = None
@@ -59,9 +64,22 @@ class OWPdfType(widget.OWWidget):
59
64
  self.processed_statuses = []
60
65
  self.post_initialized()
61
66
 
67
+ self.comboBox = self.findChild(QCheckBox, 'checkBox_send')
68
+ self.pushButton_run = self.findChild(QPushButton, 'pushButton_send')
69
+
70
+
71
+ if self.recursive == "True":
72
+ self.comboBox.setChecked(True)
73
+
74
+ self.comboBox.stateChanged.connect(self.on_checkbox_toggled)
75
+ self.pushButton_run.clicked.connect(self.run)
76
+
62
77
  @Inputs.data
63
78
  def set_data(self, in_data: Table | None):
64
79
  self.data = in_data
80
+ if self.data:
81
+ self.var_selector.add_variables(self.data.domain)
82
+ self.var_selector.select_variable_by_name(self.selected_column_name)
65
83
  if self.autorun:
66
84
  self.run()
67
85
 
@@ -76,14 +94,14 @@ class OWPdfType(widget.OWWidget):
76
94
  return
77
95
 
78
96
  self.error("")
79
- try:
80
- self.data.domain["file_path"]
81
- except KeyError:
82
- self.error("You need a 'file_path' column in input data.")
97
+
98
+ # Verification of in_data
99
+ if not self.selected_column_name in self.data.domain:
100
+ self.warning(f'Previously selected column "{self.selected_column_name}" does not exist in your data.')
83
101
  return
84
102
 
85
- if type(self.data.domain["file_path"]).__name__ != 'StringVariable':
86
- self.error("'file_path' column needs to be a Text.")
103
+ if not isinstance(self.data.domain[self.selected_column_name], StringVariable):
104
+ self.error('You must select a text variable.')
87
105
  return
88
106
 
89
107
  self.progressBarInit()
@@ -99,17 +117,14 @@ class OWPdfType(widget.OWWidget):
99
117
  self.thread.finish.connect(self.handle_finish)
100
118
  self.thread.start()
101
119
 
102
- def _process_pdfs(self, in_data: Table, progress_callback: callable, status_callback: callable) -> tuple[
103
- Table | None, Table | None]:
104
-
105
- # Extraction des chemins de fichiers avant de commencer le traitement
106
- paths = [str(x) for x in in_data.get_column("file_path")]
120
+ def _process_pdfs(self, in_data: Table, progress_callback: callable, status_callback: callable):
121
+ paths = [str(x) for x in in_data.get_column(self.selected_column_name)]
107
122
 
123
+ # Dossier Excel
108
124
  excel_output_dir = Path.cwd() / "pdf_check_results"
109
125
  if paths:
110
126
  first_file_path = Path(paths[0])
111
127
  excel_output_dir = first_file_path.parent / "pdf_check_results"
112
-
113
128
  excel_output_dir.mkdir(parents=True, exist_ok=True)
114
129
 
115
130
  base_name = "pdf_check_results"
@@ -122,7 +137,7 @@ class OWPdfType(widget.OWWidget):
122
137
  wb = Workbook()
123
138
  ws = wb.active
124
139
  ws.title = "PDF Check Results"
125
- headers = ["file_path", "status", "details"]
140
+ headers = [self.selected_column_name, "status", "details"]
126
141
  ws.append(headers)
127
142
 
128
143
  text_indices = []
@@ -142,6 +157,28 @@ class OWPdfType(widget.OWWidget):
142
157
  wb.save(excel_path)
143
158
  continue
144
159
 
160
+ # 🔒 --- Vérification verrouillage via PyMuPDF ---
161
+ try:
162
+ doc = fitz.open(str(fp))
163
+
164
+ if doc.is_encrypted:
165
+ result_row[1] = "ko"
166
+ result_row[2] = "Locked/Encrypted PDF"
167
+ status_callback(result_row)
168
+ ws.append(result_row)
169
+ wb.save(excel_path)
170
+ continue
171
+
172
+ except Exception as e:
173
+ # PyMuPDF lève une exception si le PDF est trop protégé
174
+ result_row[1] = "ko"
175
+ result_row[2] = f"Cannot open PDF (possibly locked): {str(e)}"
176
+ status_callback(result_row)
177
+ ws.append(result_row)
178
+ wb.save(excel_path)
179
+ continue
180
+
181
+ # --- Détection texte / image via utils_md ---
145
182
  try:
146
183
  is_text = utils_md.is_pdf_text_based(fp)
147
184
  if is_text:
@@ -165,15 +202,8 @@ class OWPdfType(widget.OWWidget):
165
202
 
166
203
  progress_callback(100)
167
204
 
168
- if not text_indices:
169
- text_table = None
170
- else:
171
- text_table = in_data[text_indices]
172
-
173
- if not image_indices:
174
- image_table = None
175
- else:
176
- image_table = in_data[image_indices]
205
+ text_table = in_data[text_indices] if text_indices else None
206
+ image_table = in_data[image_indices] if image_indices else None
177
207
 
178
208
  return text_table, image_table
179
209
 
@@ -191,7 +221,7 @@ class OWPdfType(widget.OWWidget):
191
221
  status_domain = Domain(
192
222
  [], # The variables list should be empty
193
223
  metas=[
194
- StringVariable("file_path"),
224
+ StringVariable(self.selected_column_name),
195
225
  DiscreteVariable("status", values=["ok", "ko"]),
196
226
  StringVariable("details")
197
227
  ]
@@ -216,6 +246,13 @@ class OWPdfType(widget.OWWidget):
216
246
  def post_initialized(self):
217
247
  pass
218
248
 
249
+ def on_checkbox_toggled(self,state):
250
+ self.recursive = "True"
251
+ if state==0:
252
+ self.recursive = "False"
253
+ if self.data is not None:
254
+ self.run()
255
+
219
256
 
220
257
  if __name__ == "__main__":
221
258
  app = QApplication(sys.argv)
@@ -0,0 +1,131 @@
1
+ <?xml version="1.0" encoding="UTF-8"?>
2
+ <ui version="4.0">
3
+ <class>Form</class>
4
+ <widget class="QWidget" name="Form">
5
+ <property name="geometry">
6
+ <rect>
7
+ <x>0</x>
8
+ <y>0</y>
9
+ <width>474</width>
10
+ <height>460</height>
11
+ </rect>
12
+ </property>
13
+ <property name="windowTitle">
14
+ <string>Form</string>
15
+ </property>
16
+ <widget class="QGroupBox" name="groupBox">
17
+ <property name="geometry">
18
+ <rect>
19
+ <x>10</x>
20
+ <y>30</y>
21
+ <width>451</width>
22
+ <height>181</height>
23
+ </rect>
24
+ </property>
25
+ <property name="title">
26
+ <string>Parameters</string>
27
+ </property>
28
+ <widget class="QLabel" name="Description">
29
+ <property name="geometry">
30
+ <rect>
31
+ <x>10</x>
32
+ <y>40</y>
33
+ <width>431</width>
34
+ <height>101</height>
35
+ </rect>
36
+ </property>
37
+ <property name="text">
38
+ <string>This widget analyze PDF file paths and determines if each PDF is text-based or image-based, adding a &quot;type&quot; meta column to the output data.</string>
39
+ </property>
40
+ <property name="textFormat">
41
+ <enum>Qt::AutoText</enum>
42
+ </property>
43
+ <property name="alignment">
44
+ <set>Qt::AlignLeading|Qt::AlignLeft|Qt::AlignVCenter</set>
45
+ </property>
46
+ <property name="wordWrap">
47
+ <bool>true</bool>
48
+ </property>
49
+ </widget>
50
+ </widget>
51
+ <widget class="QPushButton" name="pushButton_send">
52
+ <property name="enabled">
53
+ <bool>false</bool>
54
+ </property>
55
+ <property name="geometry">
56
+ <rect>
57
+ <x>170</x>
58
+ <y>410</y>
59
+ <width>291</width>
60
+ <height>31</height>
61
+ </rect>
62
+ </property>
63
+ <property name="text">
64
+ <string>Run</string>
65
+ </property>
66
+ </widget>
67
+ <widget class="QCheckBox" name="checkBox_send">
68
+ <property name="enabled">
69
+ <bool>false</bool>
70
+ </property>
71
+ <property name="geometry">
72
+ <rect>
73
+ <x>20</x>
74
+ <y>410</y>
75
+ <width>131</width>
76
+ <height>16</height>
77
+ </rect>
78
+ </property>
79
+ <property name="text">
80
+ <string>Auto send data</string>
81
+ </property>
82
+ </widget>
83
+ <widget class="QGroupBox" name="groupBox_3">
84
+ <property name="geometry">
85
+ <rect>
86
+ <x>10</x>
87
+ <y>230</y>
88
+ <width>451</width>
89
+ <height>161</height>
90
+ </rect>
91
+ </property>
92
+ <property name="title">
93
+ <string>Column selection</string>
94
+ </property>
95
+ <widget class="QLabel" name="Description_4">
96
+ <property name="geometry">
97
+ <rect>
98
+ <x>10</x>
99
+ <y>30</y>
100
+ <width>411</width>
101
+ <height>61</height>
102
+ </rect>
103
+ </property>
104
+ <property name="text">
105
+ <string/>
106
+ </property>
107
+ <property name="textFormat">
108
+ <enum>Qt::AutoText</enum>
109
+ </property>
110
+ <property name="alignment">
111
+ <set>Qt::AlignLeading|Qt::AlignLeft|Qt::AlignVCenter</set>
112
+ </property>
113
+ <property name="wordWrap">
114
+ <bool>true</bool>
115
+ </property>
116
+ </widget>
117
+ <widget class="QWidget" name="placeholder" native="true">
118
+ <property name="geometry">
119
+ <rect>
120
+ <x>0</x>
121
+ <y>20</y>
122
+ <width>431</width>
123
+ <height>141</height>
124
+ </rect>
125
+ </property>
126
+ </widget>
127
+ </widget>
128
+ </widget>
129
+ <resources/>
130
+ <connections/>
131
+ </ui>
@@ -2,7 +2,7 @@ from setuptools import setup, find_packages
2
2
 
3
3
  # Configuration
4
4
  NAME = "io4it"
5
- VERSION = "3.0.2.1"
5
+ VERSION = "3.0.3"
6
6
 
7
7
  INSTALL_REQUIRES = [
8
8
  "torchvision==0.23.0",
@@ -1,86 +0,0 @@
1
- <?xml version="1.0" encoding="UTF-8"?>
2
- <ui version="4.0">
3
- <class>Form</class>
4
- <widget class="QWidget" name="Form">
5
- <property name="geometry">
6
- <rect>
7
- <x>0</x>
8
- <y>0</y>
9
- <width>470</width>
10
- <height>305</height>
11
- </rect>
12
- </property>
13
- <property name="windowTitle">
14
- <string>Form</string>
15
- </property>
16
- <widget class="QGroupBox" name="groupBox">
17
- <property name="geometry">
18
- <rect>
19
- <x>10</x>
20
- <y>30</y>
21
- <width>451</width>
22
- <height>181</height>
23
- </rect>
24
- </property>
25
- <property name="title">
26
- <string>Parameters</string>
27
- </property>
28
- <widget class="QLabel" name="Description">
29
- <property name="geometry">
30
- <rect>
31
- <x>10</x>
32
- <y>40</y>
33
- <width>431</width>
34
- <height>101</height>
35
- </rect>
36
- </property>
37
- <property name="text">
38
- <string>This widget analyzes a column named &quot;file_path&quot; containing PDF file paths and determines if each PDF is text-based or image-based, adding a &quot;type&quot; meta column to the output data.</string>
39
- </property>
40
- <property name="textFormat">
41
- <enum>Qt::AutoText</enum>
42
- </property>
43
- <property name="alignment">
44
- <set>Qt::AlignLeading|Qt::AlignLeft|Qt::AlignVCenter</set>
45
- </property>
46
- <property name="wordWrap">
47
- <bool>true</bool>
48
- </property>
49
- </widget>
50
- </widget>
51
- <widget class="QPushButton" name="pushButton_send">
52
- <property name="enabled">
53
- <bool>false</bool>
54
- </property>
55
- <property name="geometry">
56
- <rect>
57
- <x>170</x>
58
- <y>244</y>
59
- <width>291</width>
60
- <height>31</height>
61
- </rect>
62
- </property>
63
- <property name="text">
64
- <string>Run</string>
65
- </property>
66
- </widget>
67
- <widget class="QCheckBox" name="checkBox_send">
68
- <property name="enabled">
69
- <bool>false</bool>
70
- </property>
71
- <property name="geometry">
72
- <rect>
73
- <x>10</x>
74
- <y>250</y>
75
- <width>131</width>
76
- <height>16</height>
77
- </rect>
78
- </property>
79
- <property name="text">
80
- <string>Auto send data</string>
81
- </property>
82
- </widget>
83
- </widget>
84
- <resources/>
85
- <connections/>
86
- </ui>
File without changes
File without changes