io4it 2.1.2__tar.gz → 2.1.3__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {io4it-2.1.2 → io4it-2.1.3}/PKG-INFO +1 -1
- {io4it-2.1.2 → io4it-2.1.3}/io4it.egg-info/PKG-INFO +1 -1
- {io4it-2.1.2 → io4it-2.1.3}/io4it.egg-info/SOURCES.txt +2 -0
- io4it-2.1.3/orangecontrib/IO4IT/widgets/OWExtractTablesDocxToCSV.py +261 -0
- io4it-2.1.3/orangecontrib/IO4IT/widgets/designer/owdocxtocsv.ui +99 -0
- {io4it-2.1.2 → io4it-2.1.3}/setup.py +1 -1
- {io4it-2.1.2 → io4it-2.1.3}/io4it.egg-info/dependency_links.txt +0 -0
- {io4it-2.1.2 → io4it-2.1.3}/io4it.egg-info/entry_points.txt +0 -0
- {io4it-2.1.2 → io4it-2.1.3}/io4it.egg-info/namespace_packages.txt +0 -0
- {io4it-2.1.2 → io4it-2.1.3}/io4it.egg-info/requires.txt +0 -0
- {io4it-2.1.2 → io4it-2.1.3}/io4it.egg-info/top_level.txt +0 -0
- {io4it-2.1.2 → io4it-2.1.3}/orangecontrib/IO4IT/__init__.py +0 -0
- {io4it-2.1.2 → io4it-2.1.3}/orangecontrib/IO4IT/ocr_function/__init__.py +0 -0
- {io4it-2.1.2 → io4it-2.1.3}/orangecontrib/IO4IT/ocr_function/word_converter.py +0 -0
- {io4it-2.1.2 → io4it-2.1.3}/orangecontrib/IO4IT/utils/__init__.py +0 -0
- {io4it-2.1.2 → io4it-2.1.3}/orangecontrib/IO4IT/utils/mail.py +0 -0
- {io4it-2.1.2 → io4it-2.1.3}/orangecontrib/IO4IT/utils/offuscation_basique.py +0 -0
- {io4it-2.1.2 → io4it-2.1.3}/orangecontrib/IO4IT/utils/pool_exec_utils.py +0 -0
- {io4it-2.1.2 → io4it-2.1.3}/orangecontrib/IO4IT/utils/utils_md.py +0 -0
- {io4it-2.1.2 → io4it-2.1.3}/orangecontrib/IO4IT/widgets/OWChatGpt.py +0 -0
- {io4it-2.1.2 → io4it-2.1.3}/orangecontrib/IO4IT/widgets/OWDeep_Search.py +0 -0
- {io4it-2.1.2 → io4it-2.1.3}/orangecontrib/IO4IT/widgets/OWDoclingMarkdownizerSimple.py +0 -0
- {io4it-2.1.2 → io4it-2.1.3}/orangecontrib/IO4IT/widgets/OWExportMarkdown.py +0 -0
- {io4it-2.1.2 → io4it-2.1.3}/orangecontrib/IO4IT/widgets/OWInboxMailMonitoring.py +0 -0
- {io4it-2.1.2 → io4it-2.1.3}/orangecontrib/IO4IT/widgets/OWMarkdownLoader.py +0 -0
- {io4it-2.1.2 → io4it-2.1.3}/orangecontrib/IO4IT/widgets/OWMarkdownizer.py +0 -0
- {io4it-2.1.2 → io4it-2.1.3}/orangecontrib/IO4IT/widgets/OWOfficeNormalizer.py +0 -0
- {io4it-2.1.2 → io4it-2.1.3}/orangecontrib/IO4IT/widgets/OWPdfType.py +0 -0
- {io4it-2.1.2 → io4it-2.1.3}/orangecontrib/IO4IT/widgets/OWProcessPoolExecutor.py +0 -0
- {io4it-2.1.2 → io4it-2.1.3}/orangecontrib/IO4IT/widgets/OWS3Uploader.py +0 -0
- {io4it-2.1.2 → io4it-2.1.3}/orangecontrib/IO4IT/widgets/OWS3downloader.py +0 -0
- {io4it-2.1.2 → io4it-2.1.3}/orangecontrib/IO4IT/widgets/OWS3list.py +0 -0
- {io4it-2.1.2 → io4it-2.1.3}/orangecontrib/IO4IT/widgets/OWSpeechToText.py +0 -0
- {io4it-2.1.2 → io4it-2.1.3}/orangecontrib/IO4IT/widgets/OWmailLoader.py +0 -0
- {io4it-2.1.2 → io4it-2.1.3}/orangecontrib/IO4IT/widgets/OWmailSender.py +0 -0
- {io4it-2.1.2 → io4it-2.1.3}/orangecontrib/IO4IT/widgets/OWwordpdf2docx.py +0 -0
- {io4it-2.1.2 → io4it-2.1.3}/orangecontrib/IO4IT/widgets/__init__.py +0 -0
- {io4it-2.1.2 → io4it-2.1.3}/orangecontrib/IO4IT/widgets/designer/__init__.py +0 -0
- {io4it-2.1.2 → io4it-2.1.3}/orangecontrib/IO4IT/widgets/designer/nogui.ui +0 -0
- {io4it-2.1.2 → io4it-2.1.3}/orangecontrib/IO4IT/widgets/designer/ow_file_ext_selector.ui +0 -0
- {io4it-2.1.2 → io4it-2.1.3}/orangecontrib/IO4IT/widgets/designer/owchatgpt.ui +0 -0
- {io4it-2.1.2 → io4it-2.1.3}/orangecontrib/IO4IT/widgets/designer/owdeepsearch.ui +0 -0
- {io4it-2.1.2 → io4it-2.1.3}/orangecontrib/IO4IT/widgets/designer/owdoclingasr.ui +0 -0
- {io4it-2.1.2 → io4it-2.1.3}/orangecontrib/IO4IT/widgets/designer/owdoclingmarkdownizersimple.ui +0 -0
- {io4it-2.1.2 → io4it-2.1.3}/orangecontrib/IO4IT/widgets/designer/owexportmarkdown.ui +0 -0
- {io4it-2.1.2 → io4it-2.1.3}/orangecontrib/IO4IT/widgets/designer/owinboxmailmonitoring.ui +0 -0
- {io4it-2.1.2 → io4it-2.1.3}/orangecontrib/IO4IT/widgets/designer/owmailloader.ui +0 -0
- {io4it-2.1.2 → io4it-2.1.3}/orangecontrib/IO4IT/widgets/designer/owmailsender.ui +0 -0
- {io4it-2.1.2 → io4it-2.1.3}/orangecontrib/IO4IT/widgets/designer/owmarkdownizer.ui +0 -0
- {io4it-2.1.2 → io4it-2.1.3}/orangecontrib/IO4IT/widgets/designer/owmarkdownloader.ui +0 -0
- {io4it-2.1.2 → io4it-2.1.3}/orangecontrib/IO4IT/widgets/designer/owofficenormalizer.ui +0 -0
- {io4it-2.1.2 → io4it-2.1.3}/orangecontrib/IO4IT/widgets/designer/owpdftype.ui +0 -0
- {io4it-2.1.2 → io4it-2.1.3}/orangecontrib/IO4IT/widgets/designer/owprocesspoolexecutor.ui +0 -0
- {io4it-2.1.2 → io4it-2.1.3}/orangecontrib/IO4IT/widgets/designer/owspeechtotext.ui +0 -0
- {io4it-2.1.2 → io4it-2.1.3}/orangecontrib/IO4IT/widgets/designer/owvisualizationer.ui +0 -0
- {io4it-2.1.2 → io4it-2.1.3}/orangecontrib/IO4IT/widgets/designer/wordpdf2docx.ui +0 -0
- {io4it-2.1.2 → io4it-2.1.3}/orangecontrib/IO4IT/widgets/icons/__init__.py +0 -0
- {io4it-2.1.2 → io4it-2.1.3}/orangecontrib/IO4IT/widgets/icons/chatgpt.png +0 -0
- {io4it-2.1.2 → io4it-2.1.3}/orangecontrib/IO4IT/widgets/icons/check_pdf.png +0 -0
- {io4it-2.1.2 → io4it-2.1.3}/orangecontrib/IO4IT/widgets/icons/deepsearch.svg +0 -0
- {io4it-2.1.2 → io4it-2.1.3}/orangecontrib/IO4IT/widgets/icons/dep_md_old.png +0 -0
- {io4it-2.1.2 → io4it-2.1.3}/orangecontrib/IO4IT/widgets/icons/download.png +0 -0
- {io4it-2.1.2 → io4it-2.1.3}/orangecontrib/IO4IT/widgets/icons/export_md.png +0 -0
- {io4it-2.1.2 → io4it-2.1.3}/orangecontrib/IO4IT/widgets/icons/extract_table.png +0 -0
- {io4it-2.1.2 → io4it-2.1.3}/orangecontrib/IO4IT/widgets/icons/file_extensor.png +0 -0
- {io4it-2.1.2 → io4it-2.1.3}/orangecontrib/IO4IT/widgets/icons/list_aws.png +0 -0
- {io4it-2.1.2 → io4it-2.1.3}/orangecontrib/IO4IT/widgets/icons/load_md.png +0 -0
- {io4it-2.1.2 → io4it-2.1.3}/orangecontrib/IO4IT/widgets/icons/mail_loader.png +0 -0
- {io4it-2.1.2 → io4it-2.1.3}/orangecontrib/IO4IT/widgets/icons/mail_writer.png +0 -0
- {io4it-2.1.2 → io4it-2.1.3}/orangecontrib/IO4IT/widgets/icons/md.png +0 -0
- {io4it-2.1.2 → io4it-2.1.3}/orangecontrib/IO4IT/widgets/icons/monitor-email.svg +0 -0
- {io4it-2.1.2 → io4it-2.1.3}/orangecontrib/IO4IT/widgets/icons/office_normalizer.png +0 -0
- {io4it-2.1.2 → io4it-2.1.3}/orangecontrib/IO4IT/widgets/icons/process_pool_executor.png +0 -0
- {io4it-2.1.2 → io4it-2.1.3}/orangecontrib/IO4IT/widgets/icons/speech_to_text.png +0 -0
- {io4it-2.1.2 → io4it-2.1.3}/orangecontrib/IO4IT/widgets/icons/upload.png +0 -0
- {io4it-2.1.2 → io4it-2.1.3}/orangecontrib/IO4IT/widgets/icons/visualizationer.png +0 -0
- {io4it-2.1.2 → io4it-2.1.3}/orangecontrib/IO4IT/widgets/icons/wordpdf2docx.png +0 -0
- {io4it-2.1.2 → io4it-2.1.3}/orangecontrib/IO4IT/widgets/icons_dev/__init__.py +0 -0
- {io4it-2.1.2 → io4it-2.1.3}/orangecontrib/__init__.py +0 -0
- {io4it-2.1.2 → io4it-2.1.3}/setup.cfg +0 -0
|
@@ -19,6 +19,7 @@ orangecontrib/IO4IT/widgets/OWChatGpt.py
|
|
|
19
19
|
orangecontrib/IO4IT/widgets/OWDeep_Search.py
|
|
20
20
|
orangecontrib/IO4IT/widgets/OWDoclingMarkdownizerSimple.py
|
|
21
21
|
orangecontrib/IO4IT/widgets/OWExportMarkdown.py
|
|
22
|
+
orangecontrib/IO4IT/widgets/OWExtractTablesDocxToCSV.py
|
|
22
23
|
orangecontrib/IO4IT/widgets/OWInboxMailMonitoring.py
|
|
23
24
|
orangecontrib/IO4IT/widgets/OWMarkdownLoader.py
|
|
24
25
|
orangecontrib/IO4IT/widgets/OWMarkdownizer.py
|
|
@@ -40,6 +41,7 @@ orangecontrib/IO4IT/widgets/designer/owchatgpt.ui
|
|
|
40
41
|
orangecontrib/IO4IT/widgets/designer/owdeepsearch.ui
|
|
41
42
|
orangecontrib/IO4IT/widgets/designer/owdoclingasr.ui
|
|
42
43
|
orangecontrib/IO4IT/widgets/designer/owdoclingmarkdownizersimple.ui
|
|
44
|
+
orangecontrib/IO4IT/widgets/designer/owdocxtocsv.ui
|
|
43
45
|
orangecontrib/IO4IT/widgets/designer/owexportmarkdown.ui
|
|
44
46
|
orangecontrib/IO4IT/widgets/designer/owinboxmailmonitoring.ui
|
|
45
47
|
orangecontrib/IO4IT/widgets/designer/owmailloader.ui
|
|
@@ -0,0 +1,261 @@
|
|
|
1
|
+
import os
|
|
2
|
+
import sys
|
|
3
|
+
import docx
|
|
4
|
+
import pandas as pd
|
|
5
|
+
import re
|
|
6
|
+
# Removed: import json
|
|
7
|
+
|
|
8
|
+
from AnyQt.QtWidgets import QApplication, QPushButton
|
|
9
|
+
from Orange.widgets import widget
|
|
10
|
+
from Orange.widgets.utils.signals import Input, Output
|
|
11
|
+
from Orange.data import Domain, StringVariable, Table, DiscreteVariable
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
if "site-packages/Orange/widgets" in os.path.dirname(os.path.abspath(__file__)).replace("\\", "/"):
|
|
15
|
+
from Orange.widgets.orangecontrib.AAIT.utils.import_uic import uic
|
|
16
|
+
from Orange.widgets.orangecontrib.AAIT.utils.initialize_from_ini import apply_modification_from_python_file
|
|
17
|
+
else:
|
|
18
|
+
from orangecontrib.AAIT.utils.import_uic import uic
|
|
19
|
+
from orangecontrib.AAIT.utils.initialize_from_ini import apply_modification_from_python_file
|
|
20
|
+
|
|
21
|
+
@apply_modification_from_python_file(filepath_original_widget=__file__)
|
|
22
|
+
class OWExtractTablesDocxToCSV(widget.OWWidget):
|
|
23
|
+
"""
|
|
24
|
+
Orange Widget qui extrait les tableaux de documents Word (.docx) et les sauvegarde
|
|
25
|
+
en fichiers XLSX distincts (une table Word = un fichier XLSX).
|
|
26
|
+
"""
|
|
27
|
+
name = "Docx to XLSX Tables (Full Tables)"
|
|
28
|
+
description = "Extrait toutes les tables de documents Word et les sauvegarde en fichiers XLSX distincts"
|
|
29
|
+
category = "AAIT - TOOLBOX"
|
|
30
|
+
icon = "icons/extract_table.png"
|
|
31
|
+
if "site-packages/Orange/widgets" in os.path.dirname(os.path.abspath(__file__)).replace("\\", "/"):
|
|
32
|
+
icon = "icons_dev/extract_table.png"
|
|
33
|
+
gui = os.path.join(os.path.dirname(os.path.abspath(__file__)), "designer/owdocxtocsv.ui")
|
|
34
|
+
want_control_area = False
|
|
35
|
+
priority = 1005
|
|
36
|
+
|
|
37
|
+
class Inputs:
|
|
38
|
+
data = Input("Files Table", Table)
|
|
39
|
+
|
|
40
|
+
class Outputs:
|
|
41
|
+
data = Output("Processed Files Table", Table)
|
|
42
|
+
status_data = Output("Status Table", Table)
|
|
43
|
+
|
|
44
|
+
def __init__(self):
|
|
45
|
+
super().__init__()
|
|
46
|
+
try:
|
|
47
|
+
uic.loadUi(self.gui, self)
|
|
48
|
+
except Exception as e:
|
|
49
|
+
self.warning(f"Impossible de charger le fichier UI. {e}")
|
|
50
|
+
|
|
51
|
+
class DummyCheckbox:
|
|
52
|
+
def stateChanged(self, *args): pass
|
|
53
|
+
|
|
54
|
+
self.checkBox_alpha_headers = DummyCheckbox()
|
|
55
|
+
self.gui = None
|
|
56
|
+
|
|
57
|
+
# Connexion du bouton d'exécution
|
|
58
|
+
self.pushButton_run = self.findChild(QPushButton, "pushButton_run")
|
|
59
|
+
if self.pushButton_run:
|
|
60
|
+
self.pushButton_run.clicked.connect(self.run)
|
|
61
|
+
|
|
62
|
+
self.data = None
|
|
63
|
+
self.autorun = True
|
|
64
|
+
self.processed_statuses = []
|
|
65
|
+
self.use_alpha_headers = False
|
|
66
|
+
if self.gui:
|
|
67
|
+
self.checkBox_alpha_headers.stateChanged.connect(self._update_alpha_headers_state)
|
|
68
|
+
|
|
69
|
+
self.post_initialized()
|
|
70
|
+
|
|
71
|
+
def _update_alpha_headers_state(self, state):
|
|
72
|
+
self.use_alpha_headers = bool(state)
|
|
73
|
+
|
|
74
|
+
@Inputs.data
|
|
75
|
+
def set_data(self, in_data: Table | None):
|
|
76
|
+
self.data = in_data
|
|
77
|
+
if self.autorun:
|
|
78
|
+
self.run()
|
|
79
|
+
|
|
80
|
+
def run(self):
|
|
81
|
+
if self.data is None:
|
|
82
|
+
self.Outputs.data.send(None)
|
|
83
|
+
self.Outputs.status_data.send(None)
|
|
84
|
+
return
|
|
85
|
+
|
|
86
|
+
self.error("")
|
|
87
|
+
try:
|
|
88
|
+
self.data.domain["file_path"]
|
|
89
|
+
except KeyError:
|
|
90
|
+
self.error("Le tableau d'entrée doit contenir une colonne 'file_path'.")
|
|
91
|
+
self.Outputs.data.send(None)
|
|
92
|
+
self.Outputs.status_data.send(None)
|
|
93
|
+
return
|
|
94
|
+
|
|
95
|
+
self.progressBarInit()
|
|
96
|
+
self.processed_statuses = []
|
|
97
|
+
self.Outputs.status_data.send(None)
|
|
98
|
+
|
|
99
|
+
result_rows = self._process_files(self.data)
|
|
100
|
+
|
|
101
|
+
output_domain = Domain([], metas=[
|
|
102
|
+
StringVariable("src_path"),
|
|
103
|
+
StringVariable("output_dir_path"),
|
|
104
|
+
StringVariable("status")
|
|
105
|
+
])
|
|
106
|
+
result_table = Table.from_list(output_domain, result_rows)
|
|
107
|
+
self.Outputs.data.send(result_table)
|
|
108
|
+
|
|
109
|
+
self.progressBarFinished()
|
|
110
|
+
|
|
111
|
+
def _process_files(self, in_data: Table) -> list:
|
|
112
|
+
result_rows = []
|
|
113
|
+
file_paths = [str(x) for x in in_data.get_column("file_path")]
|
|
114
|
+
total_files = len(file_paths)
|
|
115
|
+
|
|
116
|
+
if not file_paths:
|
|
117
|
+
return []
|
|
118
|
+
|
|
119
|
+
for i, full_path in enumerate(file_paths):
|
|
120
|
+
self.progressBarSet((i + 1) / total_files * 100)
|
|
121
|
+
|
|
122
|
+
status_short = "ko"
|
|
123
|
+
details = "traitement échoué"
|
|
124
|
+
output_dir_path = ""
|
|
125
|
+
|
|
126
|
+
if not full_path.lower().endswith('.docx'):
|
|
127
|
+
status_short = "skipped"
|
|
128
|
+
details = "Fichier ignoré : n'est pas un fichier .docx."
|
|
129
|
+
output_dir_path = "N/A"
|
|
130
|
+
self.processed_statuses.append([full_path, status_short, details])
|
|
131
|
+
self._send_status_table()
|
|
132
|
+
result_rows.append([full_path, output_dir_path, f"{status_short}: {details}"])
|
|
133
|
+
QApplication.processEvents()
|
|
134
|
+
continue
|
|
135
|
+
|
|
136
|
+
try:
|
|
137
|
+
tables_found, output_dir_path = self._extraire_et_convertir(full_path)
|
|
138
|
+
|
|
139
|
+
if tables_found > 0:
|
|
140
|
+
status_short = "ok"
|
|
141
|
+
details = f"{tables_found} table(s) extraite(s) et convertie(s) en XLSX."
|
|
142
|
+
else:
|
|
143
|
+
status_short = "ko"
|
|
144
|
+
details = "Aucune table valide trouvée."
|
|
145
|
+
|
|
146
|
+
except FileNotFoundError:
|
|
147
|
+
details = "Fichier non trouvé."
|
|
148
|
+
except Exception as e:
|
|
149
|
+
details = f"Une erreur inattendue est survenue : {e}"
|
|
150
|
+
|
|
151
|
+
self.processed_statuses.append([full_path, status_short, details])
|
|
152
|
+
self._send_status_table()
|
|
153
|
+
|
|
154
|
+
result_rows.append([full_path, output_dir_path, f"{status_short}: {details}"])
|
|
155
|
+
|
|
156
|
+
QApplication.processEvents()
|
|
157
|
+
|
|
158
|
+
return result_rows
|
|
159
|
+
|
|
160
|
+
def _extraire_et_convertir(self, docx_path):
|
|
161
|
+
"""
|
|
162
|
+
Extrait les tableaux d'un document Word et sauvegarde chaque table entière en XLSX.
|
|
163
|
+
Retourne (nombre_de_tables_trouvées, chemin_dossier_sortie).
|
|
164
|
+
"""
|
|
165
|
+
dir_name, file_name = os.path.split(docx_path)
|
|
166
|
+
base_name, _ = os.path.splitext(file_name)
|
|
167
|
+
|
|
168
|
+
output_dir = os.path.join(dir_name, base_name + '_tables_data')
|
|
169
|
+
os.makedirs(output_dir, exist_ok=True)
|
|
170
|
+
|
|
171
|
+
doc = docx.Document(docx_path)
|
|
172
|
+
total_tables_found = 0
|
|
173
|
+
|
|
174
|
+
for i, table in enumerate(doc.tables):
|
|
175
|
+
raw_data = []
|
|
176
|
+
for row in table.rows:
|
|
177
|
+
# Extraction des données de toute la table
|
|
178
|
+
row_data = [cell.text.strip() for cell in row.cells]
|
|
179
|
+
raw_data.append(row_data)
|
|
180
|
+
|
|
181
|
+
# S'assurer qu'il y a des données non vides dans la table
|
|
182
|
+
if not raw_data or not any(row for row in raw_data):
|
|
183
|
+
continue
|
|
184
|
+
|
|
185
|
+
# --- PAS DE DÉCOUPAGE ---
|
|
186
|
+
|
|
187
|
+
table_index = i + 1
|
|
188
|
+
# Utilisation de 'a' comme suffixe pour le nom de fichier
|
|
189
|
+
table_name = f"table_{table_index}_a"
|
|
190
|
+
|
|
191
|
+
df = self._create_dataframe(raw_data)
|
|
192
|
+
|
|
193
|
+
if df is not None:
|
|
194
|
+
self._save_sub_table(df, output_dir, table_name)
|
|
195
|
+
total_tables_found += 1
|
|
196
|
+
|
|
197
|
+
return total_tables_found, output_dir
|
|
198
|
+
|
|
199
|
+
def _create_dataframe(self, data):
|
|
200
|
+
"""
|
|
201
|
+
Crée le DataFrame à partir des lignes brutes.
|
|
202
|
+
"""
|
|
203
|
+
# Nettoyer les lignes vides
|
|
204
|
+
data = [row for row in data if row and any(cell.strip() for cell in row)]
|
|
205
|
+
if not data:
|
|
206
|
+
return None
|
|
207
|
+
|
|
208
|
+
max_cols = max(len(row) for row in data)
|
|
209
|
+
data = [row + [''] * (max_cols - len(row)) for row in data]
|
|
210
|
+
|
|
211
|
+
if self.use_alpha_headers:
|
|
212
|
+
# Cas A : En-têtes alphabétiques. Toutes les lignes sont des données.
|
|
213
|
+
headers = [chr(ord('A') + j) for j in range(max_cols)]
|
|
214
|
+
df = pd.DataFrame(data, columns=headers)
|
|
215
|
+
else:
|
|
216
|
+
# Cas B : Première ligne comme en-tête.
|
|
217
|
+
|
|
218
|
+
if len(data) == 1:
|
|
219
|
+
# Si le segment n'a qu'une seule ligne, on utilise des en-têtes alphabétiques.
|
|
220
|
+
headers = [chr(ord('A') + j) for j in range(max_cols)]
|
|
221
|
+
df = pd.DataFrame(data, columns=headers)
|
|
222
|
+
else:
|
|
223
|
+
# Cas standard : première ligne = en-tête, reste = données.
|
|
224
|
+
headers = data[0]
|
|
225
|
+
data_rows = data[1:]
|
|
226
|
+
|
|
227
|
+
min_cols = min(len(headers), max_cols)
|
|
228
|
+
|
|
229
|
+
df = pd.DataFrame(data_rows, columns=headers[:min_cols])
|
|
230
|
+
|
|
231
|
+
df.columns = df.columns.astype(str)
|
|
232
|
+
|
|
233
|
+
return df
|
|
234
|
+
|
|
235
|
+
def _save_sub_table(self, df, output_dir, table_full_name):
|
|
236
|
+
"""Sauvegarde le DataFrame exclusivement en XLSX."""
|
|
237
|
+
|
|
238
|
+
output_xlsx_path = os.path.join(output_dir, f"{table_full_name}.xlsx")
|
|
239
|
+
try:
|
|
240
|
+
df.to_excel(output_xlsx_path, index=False, engine='openpyxl')
|
|
241
|
+
except Exception as e:
|
|
242
|
+
self.warning(f"Impossible de sauvegarder la table '{table_full_name}' en format XLSX : {e}")
|
|
243
|
+
|
|
244
|
+
def _send_status_table(self):
|
|
245
|
+
domain = Domain([], metas=[
|
|
246
|
+
StringVariable("src_path"),
|
|
247
|
+
DiscreteVariable("status", values=["ok", "ko", "skipped"]),
|
|
248
|
+
StringVariable("details")
|
|
249
|
+
])
|
|
250
|
+
status_table = Table.from_list(domain, self.processed_statuses)
|
|
251
|
+
self.Outputs.status_data.send(status_table)
|
|
252
|
+
|
|
253
|
+
def post_initialized(self):
|
|
254
|
+
pass
|
|
255
|
+
|
|
256
|
+
|
|
257
|
+
if __name__ == "__main__":
|
|
258
|
+
app = QApplication(sys.argv)
|
|
259
|
+
my_widget = OWExtractTablesDocxToCSV()
|
|
260
|
+
my_widget.show()
|
|
261
|
+
app.exec()
|
|
@@ -0,0 +1,99 @@
|
|
|
1
|
+
<?xml version="1.0" encoding="UTF-8"?>
|
|
2
|
+
<ui version="4.0">
|
|
3
|
+
<class>Form</class>
|
|
4
|
+
<widget class="QWidget" name="Form">
|
|
5
|
+
<property name="geometry">
|
|
6
|
+
<rect>
|
|
7
|
+
<x>0</x>
|
|
8
|
+
<y>0</y>
|
|
9
|
+
<width>476</width>
|
|
10
|
+
<height>311</height>
|
|
11
|
+
</rect>
|
|
12
|
+
</property>
|
|
13
|
+
<property name="windowTitle">
|
|
14
|
+
<string>Docx to CSVs and JSON</string>
|
|
15
|
+
</property>
|
|
16
|
+
<widget class="QGroupBox" name="groupBox">
|
|
17
|
+
<property name="geometry">
|
|
18
|
+
<rect>
|
|
19
|
+
<x>10</x>
|
|
20
|
+
<y>30</y>
|
|
21
|
+
<width>451</width>
|
|
22
|
+
<height>201</height>
|
|
23
|
+
</rect>
|
|
24
|
+
</property>
|
|
25
|
+
<property name="title">
|
|
26
|
+
<string>Parameters</string>
|
|
27
|
+
</property>
|
|
28
|
+
<widget class="QLabel" name="Description">
|
|
29
|
+
<property name="geometry">
|
|
30
|
+
<rect>
|
|
31
|
+
<x>10</x>
|
|
32
|
+
<y>20</y>
|
|
33
|
+
<width>431</width>
|
|
34
|
+
<height>121</height>
|
|
35
|
+
</rect>
|
|
36
|
+
</property>
|
|
37
|
+
<property name="text">
|
|
38
|
+
<string>This widget extracts each table from a Microsoft Word file (.docx) and saves it as a separate .csv and .xlsx file. Additionally, it applies a specific data extraction logic to each table to generate a structured .json file. All output files are placed in a dedicated subfolder named after the source document.</string>
|
|
39
|
+
</property>
|
|
40
|
+
<property name="textFormat">
|
|
41
|
+
<enum>Qt::AutoText</enum>
|
|
42
|
+
</property>
|
|
43
|
+
<property name="alignment">
|
|
44
|
+
<set>Qt::AlignLeading|Qt::AlignLeft|Qt::AlignVCenter</set>
|
|
45
|
+
</property>
|
|
46
|
+
<property name="wordWrap">
|
|
47
|
+
<bool>true</bool>
|
|
48
|
+
</property>
|
|
49
|
+
</widget>
|
|
50
|
+
<widget class="QCheckBox" name="checkBox_alpha_headers">
|
|
51
|
+
<property name="geometry">
|
|
52
|
+
<rect>
|
|
53
|
+
<x>10</x>
|
|
54
|
+
<y>150</y>
|
|
55
|
+
<width>431</width>
|
|
56
|
+
<height>16</height>
|
|
57
|
+
</rect>
|
|
58
|
+
</property>
|
|
59
|
+
<property name="text">
|
|
60
|
+
<string>Generate alphabetical column headers (A, B, C...)</string>
|
|
61
|
+
</property>
|
|
62
|
+
</widget>
|
|
63
|
+
</widget>
|
|
64
|
+
<widget class="QCheckBox" name="checkBox_send">
|
|
65
|
+
<property name="enabled">
|
|
66
|
+
<bool>false</bool>
|
|
67
|
+
</property>
|
|
68
|
+
<property name="geometry">
|
|
69
|
+
<rect>
|
|
70
|
+
<x>10</x>
|
|
71
|
+
<y>256</y>
|
|
72
|
+
<width>131</width>
|
|
73
|
+
<height>16</height>
|
|
74
|
+
</rect>
|
|
75
|
+
</property>
|
|
76
|
+
<property name="text">
|
|
77
|
+
<string>Auto send data</string>
|
|
78
|
+
</property>
|
|
79
|
+
</widget>
|
|
80
|
+
<widget class="QPushButton" name="pushButton_send">
|
|
81
|
+
<property name="enabled">
|
|
82
|
+
<bool>false</bool>
|
|
83
|
+
</property>
|
|
84
|
+
<property name="geometry">
|
|
85
|
+
<rect>
|
|
86
|
+
<x>160</x>
|
|
87
|
+
<y>250</y>
|
|
88
|
+
<width>301</width>
|
|
89
|
+
<height>31</height>
|
|
90
|
+
</rect>
|
|
91
|
+
</property>
|
|
92
|
+
<property name="text">
|
|
93
|
+
<string>Run</string>
|
|
94
|
+
</property>
|
|
95
|
+
</widget>
|
|
96
|
+
</widget>
|
|
97
|
+
<resources/>
|
|
98
|
+
<connections/>
|
|
99
|
+
</ui>
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{io4it-2.1.2 → io4it-2.1.3}/orangecontrib/IO4IT/widgets/designer/owdoclingmarkdownizersimple.ui
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|