io4it 3.0.2.1__tar.gz → 3.0.2.2__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {io4it-3.0.2.1 → io4it-3.0.2.2}/PKG-INFO +1 -1
- {io4it-3.0.2.1 → io4it-3.0.2.2}/io4it.egg-info/PKG-INFO +1 -1
- {io4it-3.0.2.1 → io4it-3.0.2.2}/orangecontrib/IO4IT/utils/utils_md.py +9 -4
- {io4it-3.0.2.1 → io4it-3.0.2.2}/orangecontrib/IO4IT/widgets/OWExportMarkdown.py +24 -24
- {io4it-3.0.2.1 → io4it-3.0.2.2}/orangecontrib/IO4IT/widgets/OWPdfType.py +68 -31
- io4it-3.0.2.2/orangecontrib/IO4IT/widgets/designer/owpdftype.ui +131 -0
- {io4it-3.0.2.1 → io4it-3.0.2.2}/setup.py +1 -1
- io4it-3.0.2.1/orangecontrib/IO4IT/widgets/designer/owpdftype.ui +0 -86
- {io4it-3.0.2.1 → io4it-3.0.2.2}/io4it.egg-info/SOURCES.txt +0 -0
- {io4it-3.0.2.1 → io4it-3.0.2.2}/io4it.egg-info/dependency_links.txt +0 -0
- {io4it-3.0.2.1 → io4it-3.0.2.2}/io4it.egg-info/entry_points.txt +0 -0
- {io4it-3.0.2.1 → io4it-3.0.2.2}/io4it.egg-info/namespace_packages.txt +0 -0
- {io4it-3.0.2.1 → io4it-3.0.2.2}/io4it.egg-info/requires.txt +0 -0
- {io4it-3.0.2.1 → io4it-3.0.2.2}/io4it.egg-info/top_level.txt +0 -0
- {io4it-3.0.2.1 → io4it-3.0.2.2}/orangecontrib/IO4IT/__init__.py +0 -0
- {io4it-3.0.2.1 → io4it-3.0.2.2}/orangecontrib/IO4IT/ocr_function/__init__.py +0 -0
- {io4it-3.0.2.1 → io4it-3.0.2.2}/orangecontrib/IO4IT/ocr_function/word_converter.py +0 -0
- {io4it-3.0.2.1 → io4it-3.0.2.2}/orangecontrib/IO4IT/utils/__init__.py +0 -0
- {io4it-3.0.2.1 → io4it-3.0.2.2}/orangecontrib/IO4IT/utils/config.json +0 -0
- {io4it-3.0.2.1 → io4it-3.0.2.2}/orangecontrib/IO4IT/utils/keys_manager.py +0 -0
- {io4it-3.0.2.1 → io4it-3.0.2.2}/orangecontrib/IO4IT/utils/mail.py +0 -0
- {io4it-3.0.2.1 → io4it-3.0.2.2}/orangecontrib/IO4IT/utils/pool_exec_utils.py +0 -0
- {io4it-3.0.2.1 → io4it-3.0.2.2}/orangecontrib/IO4IT/utils/secret_manager.py +0 -0
- {io4it-3.0.2.1 → io4it-3.0.2.2}/orangecontrib/IO4IT/widgets/OWChatGpt.py +0 -0
- {io4it-3.0.2.1 → io4it-3.0.2.2}/orangecontrib/IO4IT/widgets/OWDeep_Search.py +0 -0
- {io4it-3.0.2.1 → io4it-3.0.2.2}/orangecontrib/IO4IT/widgets/OWDoclingASR.py +0 -0
- {io4it-3.0.2.1 → io4it-3.0.2.2}/orangecontrib/IO4IT/widgets/OWDoclingToMarkdown.py +0 -0
- {io4it-3.0.2.1 → io4it-3.0.2.2}/orangecontrib/IO4IT/widgets/OWExtractTablesDocxToXlsx.py +0 -0
- {io4it-3.0.2.1 → io4it-3.0.2.2}/orangecontrib/IO4IT/widgets/OWInboxMailMonitoring.py +0 -0
- {io4it-3.0.2.1 → io4it-3.0.2.2}/orangecontrib/IO4IT/widgets/OWMD2HTML.py +0 -0
- {io4it-3.0.2.1 → io4it-3.0.2.2}/orangecontrib/IO4IT/widgets/OWMarkdownLoader.py +0 -0
- {io4it-3.0.2.1 → io4it-3.0.2.2}/orangecontrib/IO4IT/widgets/OWMarkdownizer.py +0 -0
- {io4it-3.0.2.1 → io4it-3.0.2.2}/orangecontrib/IO4IT/widgets/OWOfficeNormalizer.py +0 -0
- {io4it-3.0.2.1 → io4it-3.0.2.2}/orangecontrib/IO4IT/widgets/OWParserHTML.py +0 -0
- {io4it-3.0.2.1 → io4it-3.0.2.2}/orangecontrib/IO4IT/widgets/OWProcessPoolExecutor.py +0 -0
- {io4it-3.0.2.1 → io4it-3.0.2.2}/orangecontrib/IO4IT/widgets/OWS3Uploader.py +0 -0
- {io4it-3.0.2.1 → io4it-3.0.2.2}/orangecontrib/IO4IT/widgets/OWS3downloader.py +0 -0
- {io4it-3.0.2.1 → io4it-3.0.2.2}/orangecontrib/IO4IT/widgets/OWS3list.py +0 -0
- {io4it-3.0.2.1 → io4it-3.0.2.2}/orangecontrib/IO4IT/widgets/OWSpeechToText.py +0 -0
- {io4it-3.0.2.1 → io4it-3.0.2.2}/orangecontrib/IO4IT/widgets/OWWebSearch.py +0 -0
- {io4it-3.0.2.1 → io4it-3.0.2.2}/orangecontrib/IO4IT/widgets/OWmailLoader.py +0 -0
- {io4it-3.0.2.1 → io4it-3.0.2.2}/orangecontrib/IO4IT/widgets/OWmailSender.py +0 -0
- {io4it-3.0.2.1 → io4it-3.0.2.2}/orangecontrib/IO4IT/widgets/OWwordpdf2docx.py +0 -0
- {io4it-3.0.2.1 → io4it-3.0.2.2}/orangecontrib/IO4IT/widgets/__init__.py +0 -0
- {io4it-3.0.2.1 → io4it-3.0.2.2}/orangecontrib/IO4IT/widgets/designer/__init__.py +0 -0
- {io4it-3.0.2.1 → io4it-3.0.2.2}/orangecontrib/IO4IT/widgets/designer/nogui.ui +0 -0
- {io4it-3.0.2.1 → io4it-3.0.2.2}/orangecontrib/IO4IT/widgets/designer/ow_file_ext_selector.ui +0 -0
- {io4it-3.0.2.1 → io4it-3.0.2.2}/orangecontrib/IO4IT/widgets/designer/owchatgpt.ui +0 -0
- {io4it-3.0.2.1 → io4it-3.0.2.2}/orangecontrib/IO4IT/widgets/designer/owdeepsearch.ui +0 -0
- {io4it-3.0.2.1 → io4it-3.0.2.2}/orangecontrib/IO4IT/widgets/designer/owdoclingasr.ui +0 -0
- {io4it-3.0.2.1 → io4it-3.0.2.2}/orangecontrib/IO4IT/widgets/designer/owdoclingtomarkdown.ui +0 -0
- {io4it-3.0.2.1 → io4it-3.0.2.2}/orangecontrib/IO4IT/widgets/designer/owdocxtoxlsx.ui +0 -0
- {io4it-3.0.2.1 → io4it-3.0.2.2}/orangecontrib/IO4IT/widgets/designer/owexportmarkdown.ui +0 -0
- {io4it-3.0.2.1 → io4it-3.0.2.2}/orangecontrib/IO4IT/widgets/designer/owinboxmailmonitoring.ui +0 -0
- {io4it-3.0.2.1 → io4it-3.0.2.2}/orangecontrib/IO4IT/widgets/designer/owmailloader.ui +0 -0
- {io4it-3.0.2.1 → io4it-3.0.2.2}/orangecontrib/IO4IT/widgets/designer/owmailsender.ui +0 -0
- {io4it-3.0.2.1 → io4it-3.0.2.2}/orangecontrib/IO4IT/widgets/designer/owmarkdownizer.ui +0 -0
- {io4it-3.0.2.1 → io4it-3.0.2.2}/orangecontrib/IO4IT/widgets/designer/owmarkdownloader.ui +0 -0
- {io4it-3.0.2.1 → io4it-3.0.2.2}/orangecontrib/IO4IT/widgets/designer/owmd2html.ui +0 -0
- {io4it-3.0.2.1 → io4it-3.0.2.2}/orangecontrib/IO4IT/widgets/designer/owofficenormalizer.ui +0 -0
- {io4it-3.0.2.1 → io4it-3.0.2.2}/orangecontrib/IO4IT/widgets/designer/owparserhtml.ui +0 -0
- {io4it-3.0.2.1 → io4it-3.0.2.2}/orangecontrib/IO4IT/widgets/designer/owprocesspoolexecutor.ui +0 -0
- {io4it-3.0.2.1 → io4it-3.0.2.2}/orangecontrib/IO4IT/widgets/designer/owspeechtotext.ui +0 -0
- {io4it-3.0.2.1 → io4it-3.0.2.2}/orangecontrib/IO4IT/widgets/designer/owvisualizationer.ui +0 -0
- {io4it-3.0.2.1 → io4it-3.0.2.2}/orangecontrib/IO4IT/widgets/designer/owwebsearch.ui +0 -0
- {io4it-3.0.2.1 → io4it-3.0.2.2}/orangecontrib/IO4IT/widgets/designer/wordpdf2docx.ui +0 -0
- {io4it-3.0.2.1 → io4it-3.0.2.2}/orangecontrib/IO4IT/widgets/icons/__init__.py +0 -0
- {io4it-3.0.2.1 → io4it-3.0.2.2}/orangecontrib/IO4IT/widgets/icons/chatgpt.png +0 -0
- {io4it-3.0.2.1 → io4it-3.0.2.2}/orangecontrib/IO4IT/widgets/icons/check_pdf.png +0 -0
- {io4it-3.0.2.1 → io4it-3.0.2.2}/orangecontrib/IO4IT/widgets/icons/deepsearch.svg +0 -0
- {io4it-3.0.2.1 → io4it-3.0.2.2}/orangecontrib/IO4IT/widgets/icons/dep_md_old.png +0 -0
- {io4it-3.0.2.1 → io4it-3.0.2.2}/orangecontrib/IO4IT/widgets/icons/download.png +0 -0
- {io4it-3.0.2.1 → io4it-3.0.2.2}/orangecontrib/IO4IT/widgets/icons/export_md.png +0 -0
- {io4it-3.0.2.1 → io4it-3.0.2.2}/orangecontrib/IO4IT/widgets/icons/extract_table.png +0 -0
- {io4it-3.0.2.1 → io4it-3.0.2.2}/orangecontrib/IO4IT/widgets/icons/file_extensor.png +0 -0
- {io4it-3.0.2.1 → io4it-3.0.2.2}/orangecontrib/IO4IT/widgets/icons/html.png +0 -0
- {io4it-3.0.2.1 → io4it-3.0.2.2}/orangecontrib/IO4IT/widgets/icons/list_aws.png +0 -0
- {io4it-3.0.2.1 → io4it-3.0.2.2}/orangecontrib/IO4IT/widgets/icons/load_md.png +0 -0
- {io4it-3.0.2.1 → io4it-3.0.2.2}/orangecontrib/IO4IT/widgets/icons/mail_loader.png +0 -0
- {io4it-3.0.2.1 → io4it-3.0.2.2}/orangecontrib/IO4IT/widgets/icons/mail_writer.png +0 -0
- {io4it-3.0.2.1 → io4it-3.0.2.2}/orangecontrib/IO4IT/widgets/icons/md.png +0 -0
- {io4it-3.0.2.1 → io4it-3.0.2.2}/orangecontrib/IO4IT/widgets/icons/monitor-email.svg +0 -0
- {io4it-3.0.2.1 → io4it-3.0.2.2}/orangecontrib/IO4IT/widgets/icons/office_normalizer.png +0 -0
- {io4it-3.0.2.1 → io4it-3.0.2.2}/orangecontrib/IO4IT/widgets/icons/owmd2html.svg +0 -0
- {io4it-3.0.2.1 → io4it-3.0.2.2}/orangecontrib/IO4IT/widgets/icons/process_pool_executor.png +0 -0
- {io4it-3.0.2.1 → io4it-3.0.2.2}/orangecontrib/IO4IT/widgets/icons/speech_to_text.png +0 -0
- {io4it-3.0.2.1 → io4it-3.0.2.2}/orangecontrib/IO4IT/widgets/icons/upload.png +0 -0
- {io4it-3.0.2.1 → io4it-3.0.2.2}/orangecontrib/IO4IT/widgets/icons/visualizationer.png +0 -0
- {io4it-3.0.2.1 → io4it-3.0.2.2}/orangecontrib/IO4IT/widgets/icons/websearch.png +0 -0
- {io4it-3.0.2.1 → io4it-3.0.2.2}/orangecontrib/IO4IT/widgets/icons/wordpdf2docx.png +0 -0
- {io4it-3.0.2.1 → io4it-3.0.2.2}/orangecontrib/IO4IT/widgets/icons_dev/__init__.py +0 -0
- {io4it-3.0.2.1 → io4it-3.0.2.2}/orangecontrib/__init__.py +0 -0
- {io4it-3.0.2.1 → io4it-3.0.2.2}/setup.cfg +0 -0
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
import re, hashlib, secrets, urllib.parse, logging, numpy as np
|
|
2
2
|
from pathlib import Path
|
|
3
3
|
import fitz # PyMuPDF
|
|
4
|
+
import win32com.client
|
|
4
5
|
|
|
5
6
|
try:
|
|
6
7
|
import easyocr
|
|
@@ -88,7 +89,6 @@ def _make_powerpoint_invisible(ppt):
|
|
|
88
89
|
pass
|
|
89
90
|
|
|
90
91
|
def convert_doc_to_docx(src: Path, out_dir: Path) -> Path:
|
|
91
|
-
import win32com.client
|
|
92
92
|
out_dir.mkdir(parents=True, exist_ok=True)
|
|
93
93
|
if src.suffix.lower() != ".doc":
|
|
94
94
|
return src if src.suffix.lower() == ".docx" else src
|
|
@@ -108,7 +108,7 @@ def convert_doc_to_docx(src: Path, out_dir: Path) -> Path:
|
|
|
108
108
|
word.Quit()
|
|
109
109
|
|
|
110
110
|
def convert_ppt_to_pptx(src: Path, out_dir: Path) -> Path:
|
|
111
|
-
|
|
111
|
+
|
|
112
112
|
out_dir.mkdir(parents=True, exist_ok=True)
|
|
113
113
|
if src.suffix.lower() != ".ppt":
|
|
114
114
|
return src if src.suffix.lower() == ".pptx" else src
|
|
@@ -128,7 +128,6 @@ def convert_ppt_to_pptx(src: Path, out_dir: Path) -> Path:
|
|
|
128
128
|
ppt.Quit()
|
|
129
129
|
|
|
130
130
|
def docx_to_pdf(src_docx: Path, out_dir: Path) -> Path:
|
|
131
|
-
import win32com.client
|
|
132
131
|
out_dir.mkdir(parents=True, exist_ok=True)
|
|
133
132
|
pdf_path = out_dir / (src_docx.stem + ".pdf")
|
|
134
133
|
if pdf_path.exists() and pdf_path.stat().st_size > 0:
|
|
@@ -149,7 +148,6 @@ def docx_to_pdf(src_docx: Path, out_dir: Path) -> Path:
|
|
|
149
148
|
word.Quit()
|
|
150
149
|
|
|
151
150
|
def pptx_to_pdf(src_pptx: Path, out_dir: Path) -> Path:
|
|
152
|
-
import win32com.client
|
|
153
151
|
out_dir.mkdir(parents=True, exist_ok=True)
|
|
154
152
|
pdf_path = out_dir / (src_pptx.stem + ".pdf")
|
|
155
153
|
if pdf_path.exists() and pdf_path.stat().st_size > 0:
|
|
@@ -165,3 +163,10 @@ def pptx_to_pdf(src_pptx: Path, out_dir: Path) -> Path:
|
|
|
165
163
|
return pdf_path
|
|
166
164
|
finally:
|
|
167
165
|
ppt.Quit()
|
|
166
|
+
|
|
167
|
+
def is_word_installed():
|
|
168
|
+
try:
|
|
169
|
+
win32com.client.Dispatch("Word.Application")
|
|
170
|
+
return True
|
|
171
|
+
except Exception:
|
|
172
|
+
return False
|
|
@@ -19,8 +19,10 @@ from docx2pdf import convert
|
|
|
19
19
|
# Chargement UI
|
|
20
20
|
if "site-packages/Orange/widgets" in os.path.dirname(os.path.abspath(__file__)).replace("\\", "/"):
|
|
21
21
|
from Orange.widgets.orangecontrib.AAIT.utils.import_uic import uic
|
|
22
|
+
from Orange.widgets.orangecontrib.IO4IT.utils import utils_md
|
|
22
23
|
else:
|
|
23
24
|
from orangecontrib.AAIT.utils.import_uic import uic
|
|
25
|
+
from orangecontrib.IO4IT.utils import utils_md
|
|
24
26
|
|
|
25
27
|
|
|
26
28
|
class OWExportMarkdown(widget.OWWidget):
|
|
@@ -164,32 +166,30 @@ class OWExportMarkdown(widget.OWWidget):
|
|
|
164
166
|
tmp_md = tmp.name
|
|
165
167
|
|
|
166
168
|
try:
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
|
|
181
|
-
|
|
182
|
-
|
|
183
|
-
|
|
169
|
+
if utils_md.is_word_installed():
|
|
170
|
+
pypandoc.convert_file(tmp_md, to="docx", format="gfm-yaml_metadata_block", outputfile=docx_out)
|
|
171
|
+
self.ajouter_en_tete_pied_docx(
|
|
172
|
+
docx_out,
|
|
173
|
+
"Rapport - Orange AI",
|
|
174
|
+
"Page générée automatiquement - Ne pas diffuser"
|
|
175
|
+
)
|
|
176
|
+
|
|
177
|
+
# PPTX
|
|
178
|
+
pypandoc.convert_file(tmp_md, to="pptx", format="gfm-yaml_metadata_block", outputfile=pptx_out)
|
|
179
|
+
self.ajouter_entete_pied_pptx(
|
|
180
|
+
pptx_out,
|
|
181
|
+
"Orange AI – Présentation",
|
|
182
|
+
"Page générée automatiquement"
|
|
183
|
+
)
|
|
184
|
+
else:
|
|
185
|
+
raise Exception("Word non détecté")
|
|
186
|
+
|
|
187
|
+
except Exception:
|
|
184
188
|
try:
|
|
185
|
-
|
|
189
|
+
pypandoc.convert_file(tmp_md, to="pdf", outputfile=pdf_out)
|
|
186
190
|
except Exception:
|
|
187
|
-
|
|
188
|
-
|
|
189
|
-
pypandoc.convert_file(tmp_md, to="pdf", outputfile=pdf_out)
|
|
190
|
-
except Exception:
|
|
191
|
-
self.error(f"Échec conversion PDF pour la ligne {i+1}.")
|
|
192
|
-
pdf_out = ""
|
|
191
|
+
self.error(f"Échec conversion PDF pour la ligne {i + 1}.")
|
|
192
|
+
pdf_out = ""
|
|
193
193
|
finally:
|
|
194
194
|
try:
|
|
195
195
|
os.remove(tmp_md)
|
|
@@ -1,12 +1,13 @@
|
|
|
1
1
|
import os
|
|
2
2
|
import sys
|
|
3
3
|
from pathlib import Path
|
|
4
|
-
|
|
5
|
-
from AnyQt.QtWidgets import QApplication
|
|
6
4
|
from AnyQt.QtCore import pyqtSignal
|
|
7
5
|
from Orange.data import Domain, StringVariable, Table, DiscreteVariable
|
|
8
|
-
|
|
6
|
+
|
|
7
|
+
import fitz # PyMuPDF
|
|
8
|
+
from AnyQt.QtWidgets import QApplication, QCheckBox, QPushButton
|
|
9
9
|
from Orange.widgets.utils.signals import Input, Output
|
|
10
|
+
from Orange.widgets.settings import Setting
|
|
10
11
|
|
|
11
12
|
# --- Ajout pour l'écriture Excel ---
|
|
12
13
|
from openpyxl import Workbook
|
|
@@ -15,14 +16,14 @@ from openpyxl import Workbook
|
|
|
15
16
|
if "site-packages/Orange/widgets" in os.path.dirname(os.path.abspath(__file__)).replace("\\", "/"):
|
|
16
17
|
from Orange.widgets.orangecontrib.IO4IT.utils import utils_md
|
|
17
18
|
from Orange.widgets.orangecontrib.AAIT.utils.thread_management import Thread
|
|
18
|
-
from Orange.widgets.orangecontrib.AAIT.utils
|
|
19
|
+
from Orange.widgets.orangecontrib.AAIT.utils import base_widget
|
|
19
20
|
else:
|
|
20
21
|
from orangecontrib.IO4IT.utils import utils_md
|
|
21
22
|
from orangecontrib.AAIT.utils.thread_management import Thread
|
|
22
|
-
from orangecontrib.AAIT.utils
|
|
23
|
+
from orangecontrib.AAIT.utils import base_widget
|
|
23
24
|
|
|
24
25
|
|
|
25
|
-
class OWPdfType(
|
|
26
|
+
class OWPdfType(base_widget.BaseListWidget):
|
|
26
27
|
name = "PDF Type"
|
|
27
28
|
description = "Checks if a PDF is text-based or image-based"
|
|
28
29
|
category = "AAIT - TOOLBOX"
|
|
@@ -32,6 +33,10 @@ class OWPdfType(widget.OWWidget):
|
|
|
32
33
|
gui = os.path.join(os.path.dirname(os.path.abspath(__file__)), "designer/owpdftype.ui")
|
|
33
34
|
want_control_area = False
|
|
34
35
|
priority = 1002
|
|
36
|
+
recursive = Setting("False")
|
|
37
|
+
|
|
38
|
+
# Settings
|
|
39
|
+
selected_column_name = Setting("content")
|
|
35
40
|
|
|
36
41
|
MAX_PAGES_TO_CHECK = 5
|
|
37
42
|
|
|
@@ -49,8 +54,8 @@ class OWPdfType(widget.OWWidget):
|
|
|
49
54
|
def __init__(self):
|
|
50
55
|
super().__init__()
|
|
51
56
|
self.setFixedWidth(470)
|
|
52
|
-
self.setFixedHeight(
|
|
53
|
-
uic.loadUi(self.gui, self)
|
|
57
|
+
self.setFixedHeight(470)
|
|
58
|
+
#uic.loadUi(self.gui, self)
|
|
54
59
|
|
|
55
60
|
self.data = None
|
|
56
61
|
self.thread = None
|
|
@@ -59,9 +64,22 @@ class OWPdfType(widget.OWWidget):
|
|
|
59
64
|
self.processed_statuses = []
|
|
60
65
|
self.post_initialized()
|
|
61
66
|
|
|
67
|
+
self.comboBox = self.findChild(QCheckBox, 'checkBox_send')
|
|
68
|
+
self.pushButton_run = self.findChild(QPushButton, 'pushButton_send')
|
|
69
|
+
|
|
70
|
+
|
|
71
|
+
if self.recursive == "True":
|
|
72
|
+
self.comboBox.setChecked(True)
|
|
73
|
+
|
|
74
|
+
self.comboBox.stateChanged.connect(self.on_checkbox_toggled)
|
|
75
|
+
self.pushButton_run.clicked.connect(self.run)
|
|
76
|
+
|
|
62
77
|
@Inputs.data
|
|
63
78
|
def set_data(self, in_data: Table | None):
|
|
64
79
|
self.data = in_data
|
|
80
|
+
if self.data:
|
|
81
|
+
self.var_selector.add_variables(self.data.domain)
|
|
82
|
+
self.var_selector.select_variable_by_name(self.selected_column_name)
|
|
65
83
|
if self.autorun:
|
|
66
84
|
self.run()
|
|
67
85
|
|
|
@@ -76,14 +94,14 @@ class OWPdfType(widget.OWWidget):
|
|
|
76
94
|
return
|
|
77
95
|
|
|
78
96
|
self.error("")
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
self.
|
|
97
|
+
|
|
98
|
+
# Verification of in_data
|
|
99
|
+
if not self.selected_column_name in self.data.domain:
|
|
100
|
+
self.warning(f'Previously selected column "{self.selected_column_name}" does not exist in your data.')
|
|
83
101
|
return
|
|
84
102
|
|
|
85
|
-
if
|
|
86
|
-
self.error(
|
|
103
|
+
if not isinstance(self.data.domain[self.selected_column_name], StringVariable):
|
|
104
|
+
self.error('You must select a text variable.')
|
|
87
105
|
return
|
|
88
106
|
|
|
89
107
|
self.progressBarInit()
|
|
@@ -99,17 +117,14 @@ class OWPdfType(widget.OWWidget):
|
|
|
99
117
|
self.thread.finish.connect(self.handle_finish)
|
|
100
118
|
self.thread.start()
|
|
101
119
|
|
|
102
|
-
def _process_pdfs(self, in_data: Table, progress_callback: callable, status_callback: callable)
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
# Extraction des chemins de fichiers avant de commencer le traitement
|
|
106
|
-
paths = [str(x) for x in in_data.get_column("file_path")]
|
|
120
|
+
def _process_pdfs(self, in_data: Table, progress_callback: callable, status_callback: callable):
|
|
121
|
+
paths = [str(x) for x in in_data.get_column(self.selected_column_name)]
|
|
107
122
|
|
|
123
|
+
# Dossier Excel
|
|
108
124
|
excel_output_dir = Path.cwd() / "pdf_check_results"
|
|
109
125
|
if paths:
|
|
110
126
|
first_file_path = Path(paths[0])
|
|
111
127
|
excel_output_dir = first_file_path.parent / "pdf_check_results"
|
|
112
|
-
|
|
113
128
|
excel_output_dir.mkdir(parents=True, exist_ok=True)
|
|
114
129
|
|
|
115
130
|
base_name = "pdf_check_results"
|
|
@@ -122,7 +137,7 @@ class OWPdfType(widget.OWWidget):
|
|
|
122
137
|
wb = Workbook()
|
|
123
138
|
ws = wb.active
|
|
124
139
|
ws.title = "PDF Check Results"
|
|
125
|
-
headers = [
|
|
140
|
+
headers = [self.selected_column_name, "status", "details"]
|
|
126
141
|
ws.append(headers)
|
|
127
142
|
|
|
128
143
|
text_indices = []
|
|
@@ -142,6 +157,28 @@ class OWPdfType(widget.OWWidget):
|
|
|
142
157
|
wb.save(excel_path)
|
|
143
158
|
continue
|
|
144
159
|
|
|
160
|
+
# 🔒 --- Vérification verrouillage via PyMuPDF ---
|
|
161
|
+
try:
|
|
162
|
+
doc = fitz.open(str(fp))
|
|
163
|
+
|
|
164
|
+
if doc.is_encrypted:
|
|
165
|
+
result_row[1] = "ko"
|
|
166
|
+
result_row[2] = "Locked/Encrypted PDF"
|
|
167
|
+
status_callback(result_row)
|
|
168
|
+
ws.append(result_row)
|
|
169
|
+
wb.save(excel_path)
|
|
170
|
+
continue
|
|
171
|
+
|
|
172
|
+
except Exception as e:
|
|
173
|
+
# PyMuPDF lève une exception si le PDF est trop protégé
|
|
174
|
+
result_row[1] = "ko"
|
|
175
|
+
result_row[2] = f"Cannot open PDF (possibly locked): {str(e)}"
|
|
176
|
+
status_callback(result_row)
|
|
177
|
+
ws.append(result_row)
|
|
178
|
+
wb.save(excel_path)
|
|
179
|
+
continue
|
|
180
|
+
|
|
181
|
+
# --- Détection texte / image via utils_md ---
|
|
145
182
|
try:
|
|
146
183
|
is_text = utils_md.is_pdf_text_based(fp)
|
|
147
184
|
if is_text:
|
|
@@ -165,15 +202,8 @@ class OWPdfType(widget.OWWidget):
|
|
|
165
202
|
|
|
166
203
|
progress_callback(100)
|
|
167
204
|
|
|
168
|
-
if
|
|
169
|
-
|
|
170
|
-
else:
|
|
171
|
-
text_table = in_data[text_indices]
|
|
172
|
-
|
|
173
|
-
if not image_indices:
|
|
174
|
-
image_table = None
|
|
175
|
-
else:
|
|
176
|
-
image_table = in_data[image_indices]
|
|
205
|
+
text_table = in_data[text_indices] if text_indices else None
|
|
206
|
+
image_table = in_data[image_indices] if image_indices else None
|
|
177
207
|
|
|
178
208
|
return text_table, image_table
|
|
179
209
|
|
|
@@ -191,7 +221,7 @@ class OWPdfType(widget.OWWidget):
|
|
|
191
221
|
status_domain = Domain(
|
|
192
222
|
[], # The variables list should be empty
|
|
193
223
|
metas=[
|
|
194
|
-
StringVariable(
|
|
224
|
+
StringVariable(self.selected_column_name),
|
|
195
225
|
DiscreteVariable("status", values=["ok", "ko"]),
|
|
196
226
|
StringVariable("details")
|
|
197
227
|
]
|
|
@@ -216,6 +246,13 @@ class OWPdfType(widget.OWWidget):
|
|
|
216
246
|
def post_initialized(self):
|
|
217
247
|
pass
|
|
218
248
|
|
|
249
|
+
def on_checkbox_toggled(self,state):
|
|
250
|
+
self.recursive = "True"
|
|
251
|
+
if state==0:
|
|
252
|
+
self.recursive = "False"
|
|
253
|
+
if self.data is not None:
|
|
254
|
+
self.run()
|
|
255
|
+
|
|
219
256
|
|
|
220
257
|
if __name__ == "__main__":
|
|
221
258
|
app = QApplication(sys.argv)
|
|
@@ -0,0 +1,131 @@
|
|
|
1
|
+
<?xml version="1.0" encoding="UTF-8"?>
|
|
2
|
+
<ui version="4.0">
|
|
3
|
+
<class>Form</class>
|
|
4
|
+
<widget class="QWidget" name="Form">
|
|
5
|
+
<property name="geometry">
|
|
6
|
+
<rect>
|
|
7
|
+
<x>0</x>
|
|
8
|
+
<y>0</y>
|
|
9
|
+
<width>474</width>
|
|
10
|
+
<height>460</height>
|
|
11
|
+
</rect>
|
|
12
|
+
</property>
|
|
13
|
+
<property name="windowTitle">
|
|
14
|
+
<string>Form</string>
|
|
15
|
+
</property>
|
|
16
|
+
<widget class="QGroupBox" name="groupBox">
|
|
17
|
+
<property name="geometry">
|
|
18
|
+
<rect>
|
|
19
|
+
<x>10</x>
|
|
20
|
+
<y>30</y>
|
|
21
|
+
<width>451</width>
|
|
22
|
+
<height>181</height>
|
|
23
|
+
</rect>
|
|
24
|
+
</property>
|
|
25
|
+
<property name="title">
|
|
26
|
+
<string>Parameters</string>
|
|
27
|
+
</property>
|
|
28
|
+
<widget class="QLabel" name="Description">
|
|
29
|
+
<property name="geometry">
|
|
30
|
+
<rect>
|
|
31
|
+
<x>10</x>
|
|
32
|
+
<y>40</y>
|
|
33
|
+
<width>431</width>
|
|
34
|
+
<height>101</height>
|
|
35
|
+
</rect>
|
|
36
|
+
</property>
|
|
37
|
+
<property name="text">
|
|
38
|
+
<string>This widget analyze PDF file paths and determines if each PDF is text-based or image-based, adding a "type" meta column to the output data.</string>
|
|
39
|
+
</property>
|
|
40
|
+
<property name="textFormat">
|
|
41
|
+
<enum>Qt::AutoText</enum>
|
|
42
|
+
</property>
|
|
43
|
+
<property name="alignment">
|
|
44
|
+
<set>Qt::AlignLeading|Qt::AlignLeft|Qt::AlignVCenter</set>
|
|
45
|
+
</property>
|
|
46
|
+
<property name="wordWrap">
|
|
47
|
+
<bool>true</bool>
|
|
48
|
+
</property>
|
|
49
|
+
</widget>
|
|
50
|
+
</widget>
|
|
51
|
+
<widget class="QPushButton" name="pushButton_send">
|
|
52
|
+
<property name="enabled">
|
|
53
|
+
<bool>false</bool>
|
|
54
|
+
</property>
|
|
55
|
+
<property name="geometry">
|
|
56
|
+
<rect>
|
|
57
|
+
<x>170</x>
|
|
58
|
+
<y>410</y>
|
|
59
|
+
<width>291</width>
|
|
60
|
+
<height>31</height>
|
|
61
|
+
</rect>
|
|
62
|
+
</property>
|
|
63
|
+
<property name="text">
|
|
64
|
+
<string>Run</string>
|
|
65
|
+
</property>
|
|
66
|
+
</widget>
|
|
67
|
+
<widget class="QCheckBox" name="checkBox_send">
|
|
68
|
+
<property name="enabled">
|
|
69
|
+
<bool>false</bool>
|
|
70
|
+
</property>
|
|
71
|
+
<property name="geometry">
|
|
72
|
+
<rect>
|
|
73
|
+
<x>20</x>
|
|
74
|
+
<y>410</y>
|
|
75
|
+
<width>131</width>
|
|
76
|
+
<height>16</height>
|
|
77
|
+
</rect>
|
|
78
|
+
</property>
|
|
79
|
+
<property name="text">
|
|
80
|
+
<string>Auto send data</string>
|
|
81
|
+
</property>
|
|
82
|
+
</widget>
|
|
83
|
+
<widget class="QGroupBox" name="groupBox_3">
|
|
84
|
+
<property name="geometry">
|
|
85
|
+
<rect>
|
|
86
|
+
<x>10</x>
|
|
87
|
+
<y>230</y>
|
|
88
|
+
<width>451</width>
|
|
89
|
+
<height>161</height>
|
|
90
|
+
</rect>
|
|
91
|
+
</property>
|
|
92
|
+
<property name="title">
|
|
93
|
+
<string>Column selection</string>
|
|
94
|
+
</property>
|
|
95
|
+
<widget class="QLabel" name="Description_4">
|
|
96
|
+
<property name="geometry">
|
|
97
|
+
<rect>
|
|
98
|
+
<x>10</x>
|
|
99
|
+
<y>30</y>
|
|
100
|
+
<width>411</width>
|
|
101
|
+
<height>61</height>
|
|
102
|
+
</rect>
|
|
103
|
+
</property>
|
|
104
|
+
<property name="text">
|
|
105
|
+
<string/>
|
|
106
|
+
</property>
|
|
107
|
+
<property name="textFormat">
|
|
108
|
+
<enum>Qt::AutoText</enum>
|
|
109
|
+
</property>
|
|
110
|
+
<property name="alignment">
|
|
111
|
+
<set>Qt::AlignLeading|Qt::AlignLeft|Qt::AlignVCenter</set>
|
|
112
|
+
</property>
|
|
113
|
+
<property name="wordWrap">
|
|
114
|
+
<bool>true</bool>
|
|
115
|
+
</property>
|
|
116
|
+
</widget>
|
|
117
|
+
<widget class="QWidget" name="placeholder" native="true">
|
|
118
|
+
<property name="geometry">
|
|
119
|
+
<rect>
|
|
120
|
+
<x>0</x>
|
|
121
|
+
<y>20</y>
|
|
122
|
+
<width>431</width>
|
|
123
|
+
<height>141</height>
|
|
124
|
+
</rect>
|
|
125
|
+
</property>
|
|
126
|
+
</widget>
|
|
127
|
+
</widget>
|
|
128
|
+
</widget>
|
|
129
|
+
<resources/>
|
|
130
|
+
<connections/>
|
|
131
|
+
</ui>
|
|
@@ -1,86 +0,0 @@
|
|
|
1
|
-
<?xml version="1.0" encoding="UTF-8"?>
|
|
2
|
-
<ui version="4.0">
|
|
3
|
-
<class>Form</class>
|
|
4
|
-
<widget class="QWidget" name="Form">
|
|
5
|
-
<property name="geometry">
|
|
6
|
-
<rect>
|
|
7
|
-
<x>0</x>
|
|
8
|
-
<y>0</y>
|
|
9
|
-
<width>470</width>
|
|
10
|
-
<height>305</height>
|
|
11
|
-
</rect>
|
|
12
|
-
</property>
|
|
13
|
-
<property name="windowTitle">
|
|
14
|
-
<string>Form</string>
|
|
15
|
-
</property>
|
|
16
|
-
<widget class="QGroupBox" name="groupBox">
|
|
17
|
-
<property name="geometry">
|
|
18
|
-
<rect>
|
|
19
|
-
<x>10</x>
|
|
20
|
-
<y>30</y>
|
|
21
|
-
<width>451</width>
|
|
22
|
-
<height>181</height>
|
|
23
|
-
</rect>
|
|
24
|
-
</property>
|
|
25
|
-
<property name="title">
|
|
26
|
-
<string>Parameters</string>
|
|
27
|
-
</property>
|
|
28
|
-
<widget class="QLabel" name="Description">
|
|
29
|
-
<property name="geometry">
|
|
30
|
-
<rect>
|
|
31
|
-
<x>10</x>
|
|
32
|
-
<y>40</y>
|
|
33
|
-
<width>431</width>
|
|
34
|
-
<height>101</height>
|
|
35
|
-
</rect>
|
|
36
|
-
</property>
|
|
37
|
-
<property name="text">
|
|
38
|
-
<string>This widget analyzes a column named "file_path" containing PDF file paths and determines if each PDF is text-based or image-based, adding a "type" meta column to the output data.</string>
|
|
39
|
-
</property>
|
|
40
|
-
<property name="textFormat">
|
|
41
|
-
<enum>Qt::AutoText</enum>
|
|
42
|
-
</property>
|
|
43
|
-
<property name="alignment">
|
|
44
|
-
<set>Qt::AlignLeading|Qt::AlignLeft|Qt::AlignVCenter</set>
|
|
45
|
-
</property>
|
|
46
|
-
<property name="wordWrap">
|
|
47
|
-
<bool>true</bool>
|
|
48
|
-
</property>
|
|
49
|
-
</widget>
|
|
50
|
-
</widget>
|
|
51
|
-
<widget class="QPushButton" name="pushButton_send">
|
|
52
|
-
<property name="enabled">
|
|
53
|
-
<bool>false</bool>
|
|
54
|
-
</property>
|
|
55
|
-
<property name="geometry">
|
|
56
|
-
<rect>
|
|
57
|
-
<x>170</x>
|
|
58
|
-
<y>244</y>
|
|
59
|
-
<width>291</width>
|
|
60
|
-
<height>31</height>
|
|
61
|
-
</rect>
|
|
62
|
-
</property>
|
|
63
|
-
<property name="text">
|
|
64
|
-
<string>Run</string>
|
|
65
|
-
</property>
|
|
66
|
-
</widget>
|
|
67
|
-
<widget class="QCheckBox" name="checkBox_send">
|
|
68
|
-
<property name="enabled">
|
|
69
|
-
<bool>false</bool>
|
|
70
|
-
</property>
|
|
71
|
-
<property name="geometry">
|
|
72
|
-
<rect>
|
|
73
|
-
<x>10</x>
|
|
74
|
-
<y>250</y>
|
|
75
|
-
<width>131</width>
|
|
76
|
-
<height>16</height>
|
|
77
|
-
</rect>
|
|
78
|
-
</property>
|
|
79
|
-
<property name="text">
|
|
80
|
-
<string>Auto send data</string>
|
|
81
|
-
</property>
|
|
82
|
-
</widget>
|
|
83
|
-
</widget>
|
|
84
|
-
<resources/>
|
|
85
|
-
<connections/>
|
|
86
|
-
</ui>
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{io4it-3.0.2.1 → io4it-3.0.2.2}/orangecontrib/IO4IT/widgets/designer/ow_file_ext_selector.ui
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{io4it-3.0.2.1 → io4it-3.0.2.2}/orangecontrib/IO4IT/widgets/designer/owinboxmailmonitoring.ui
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{io4it-3.0.2.1 → io4it-3.0.2.2}/orangecontrib/IO4IT/widgets/designer/owprocesspoolexecutor.ui
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|