io4it 2.1.1.4__tar.gz → 2.1.2.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (81) hide show
  1. {io4it-2.1.1.4 → io4it-2.1.2.1}/PKG-INFO +4 -7
  2. {io4it-2.1.1.4 → io4it-2.1.2.1}/io4it.egg-info/PKG-INFO +4 -7
  3. {io4it-2.1.1.4 → io4it-2.1.2.1}/io4it.egg-info/SOURCES.txt +4 -0
  4. {io4it-2.1.1.4 → io4it-2.1.2.1}/orangecontrib/IO4IT/utils/mail.py +1 -1
  5. {io4it-2.1.1.4 → io4it-2.1.2.1}/orangecontrib/IO4IT/widgets/OWChatGpt.py +44 -45
  6. io4it-2.1.2.1/orangecontrib/IO4IT/widgets/OWExtractTablesDocxToCSV.py +261 -0
  7. {io4it-2.1.1.4 → io4it-2.1.2.1}/orangecontrib/IO4IT/widgets/OWInboxMailMonitoring.py +1 -1
  8. {io4it-2.1.1.4 → io4it-2.1.2.1}/orangecontrib/IO4IT/widgets/OWMarkdownizer.py +0 -1
  9. {io4it-2.1.1.4 → io4it-2.1.2.1}/orangecontrib/IO4IT/widgets/OWOfficeNormalizer.py +103 -46
  10. {io4it-2.1.1.4 → io4it-2.1.2.1}/orangecontrib/IO4IT/widgets/OWPdfType.py +23 -35
  11. {io4it-2.1.1.4 → io4it-2.1.2.1}/orangecontrib/IO4IT/widgets/OWS3Uploader.py +4 -3
  12. io4it-2.1.2.1/orangecontrib/IO4IT/widgets/designer/OWmailSender.py +155 -0
  13. io4it-2.1.1.4/orangecontrib/IO4IT/widgets/designer/owofficenormalizer.ui → io4it-2.1.2.1/orangecontrib/IO4IT/widgets/designer/owdocxtocsv.ui +17 -4
  14. {io4it-2.1.1.4 → io4it-2.1.2.1}/orangecontrib/IO4IT/widgets/designer/owmailsender.ui +96 -31
  15. io4it-2.1.2.1/orangecontrib/IO4IT/widgets/designer/owofficenormalizer.ui +124 -0
  16. io4it-2.1.2.1/orangecontrib/IO4IT/widgets/icons/extract_table.png +0 -0
  17. {io4it-2.1.1.4 → io4it-2.1.2.1}/setup.py +1 -1
  18. {io4it-2.1.1.4 → io4it-2.1.2.1}/io4it.egg-info/dependency_links.txt +0 -0
  19. {io4it-2.1.1.4 → io4it-2.1.2.1}/io4it.egg-info/entry_points.txt +0 -0
  20. {io4it-2.1.1.4 → io4it-2.1.2.1}/io4it.egg-info/namespace_packages.txt +0 -0
  21. {io4it-2.1.1.4 → io4it-2.1.2.1}/io4it.egg-info/requires.txt +0 -0
  22. {io4it-2.1.1.4 → io4it-2.1.2.1}/io4it.egg-info/top_level.txt +0 -0
  23. {io4it-2.1.1.4 → io4it-2.1.2.1}/orangecontrib/IO4IT/__init__.py +0 -0
  24. {io4it-2.1.1.4 → io4it-2.1.2.1}/orangecontrib/IO4IT/ocr_function/__init__.py +0 -0
  25. {io4it-2.1.1.4 → io4it-2.1.2.1}/orangecontrib/IO4IT/ocr_function/word_converter.py +0 -0
  26. {io4it-2.1.1.4 → io4it-2.1.2.1}/orangecontrib/IO4IT/utils/__init__.py +0 -0
  27. {io4it-2.1.1.4 → io4it-2.1.2.1}/orangecontrib/IO4IT/utils/offuscation_basique.py +0 -0
  28. {io4it-2.1.1.4 → io4it-2.1.2.1}/orangecontrib/IO4IT/utils/pool_exec_utils.py +0 -0
  29. {io4it-2.1.1.4 → io4it-2.1.2.1}/orangecontrib/IO4IT/utils/utils_md.py +0 -0
  30. {io4it-2.1.1.4 → io4it-2.1.2.1}/orangecontrib/IO4IT/widgets/OWDeep_Search.py +0 -0
  31. {io4it-2.1.1.4 → io4it-2.1.2.1}/orangecontrib/IO4IT/widgets/OWDoclingMarkdownizerSimple.py +0 -0
  32. {io4it-2.1.1.4 → io4it-2.1.2.1}/orangecontrib/IO4IT/widgets/OWExportMarkdown.py +0 -0
  33. {io4it-2.1.1.4 → io4it-2.1.2.1}/orangecontrib/IO4IT/widgets/OWMarkdownLoader.py +0 -0
  34. {io4it-2.1.1.4 → io4it-2.1.2.1}/orangecontrib/IO4IT/widgets/OWProcessPoolExecutor.py +0 -0
  35. {io4it-2.1.1.4 → io4it-2.1.2.1}/orangecontrib/IO4IT/widgets/OWS3downloader.py +0 -0
  36. {io4it-2.1.1.4 → io4it-2.1.2.1}/orangecontrib/IO4IT/widgets/OWS3list.py +0 -0
  37. {io4it-2.1.1.4 → io4it-2.1.2.1}/orangecontrib/IO4IT/widgets/OWSpeechToText.py +0 -0
  38. {io4it-2.1.1.4 → io4it-2.1.2.1}/orangecontrib/IO4IT/widgets/OWmailLoader.py +0 -0
  39. {io4it-2.1.1.4 → io4it-2.1.2.1}/orangecontrib/IO4IT/widgets/OWmailSender.py +0 -0
  40. {io4it-2.1.1.4 → io4it-2.1.2.1}/orangecontrib/IO4IT/widgets/OWwordpdf2docx.py +0 -0
  41. {io4it-2.1.1.4 → io4it-2.1.2.1}/orangecontrib/IO4IT/widgets/__init__.py +0 -0
  42. {io4it-2.1.1.4 → io4it-2.1.2.1}/orangecontrib/IO4IT/widgets/designer/__init__.py +0 -0
  43. {io4it-2.1.1.4 → io4it-2.1.2.1}/orangecontrib/IO4IT/widgets/designer/nogui.ui +0 -0
  44. {io4it-2.1.1.4 → io4it-2.1.2.1}/orangecontrib/IO4IT/widgets/designer/ow_file_ext_selector.ui +0 -0
  45. {io4it-2.1.1.4 → io4it-2.1.2.1}/orangecontrib/IO4IT/widgets/designer/owchatgpt.ui +0 -0
  46. {io4it-2.1.1.4 → io4it-2.1.2.1}/orangecontrib/IO4IT/widgets/designer/owdeepsearch.ui +0 -0
  47. {io4it-2.1.1.4 → io4it-2.1.2.1}/orangecontrib/IO4IT/widgets/designer/owdoclingasr.ui +0 -0
  48. {io4it-2.1.1.4 → io4it-2.1.2.1}/orangecontrib/IO4IT/widgets/designer/owdoclingmarkdownizersimple.ui +0 -0
  49. {io4it-2.1.1.4 → io4it-2.1.2.1}/orangecontrib/IO4IT/widgets/designer/owexportmarkdown.ui +0 -0
  50. {io4it-2.1.1.4 → io4it-2.1.2.1}/orangecontrib/IO4IT/widgets/designer/owinboxmailmonitoring.ui +0 -0
  51. {io4it-2.1.1.4 → io4it-2.1.2.1}/orangecontrib/IO4IT/widgets/designer/owmailloader.ui +0 -0
  52. {io4it-2.1.1.4 → io4it-2.1.2.1}/orangecontrib/IO4IT/widgets/designer/owmarkdownizer.ui +0 -0
  53. {io4it-2.1.1.4 → io4it-2.1.2.1}/orangecontrib/IO4IT/widgets/designer/owmarkdownloader.ui +0 -0
  54. {io4it-2.1.1.4 → io4it-2.1.2.1}/orangecontrib/IO4IT/widgets/designer/owpdftype.ui +0 -0
  55. {io4it-2.1.1.4 → io4it-2.1.2.1}/orangecontrib/IO4IT/widgets/designer/owprocesspoolexecutor.ui +0 -0
  56. {io4it-2.1.1.4 → io4it-2.1.2.1}/orangecontrib/IO4IT/widgets/designer/owspeechtotext.ui +0 -0
  57. {io4it-2.1.1.4 → io4it-2.1.2.1}/orangecontrib/IO4IT/widgets/designer/owvisualizationer.ui +0 -0
  58. {io4it-2.1.1.4 → io4it-2.1.2.1}/orangecontrib/IO4IT/widgets/designer/wordpdf2docx.ui +0 -0
  59. {io4it-2.1.1.4 → io4it-2.1.2.1}/orangecontrib/IO4IT/widgets/icons/__init__.py +0 -0
  60. {io4it-2.1.1.4 → io4it-2.1.2.1}/orangecontrib/IO4IT/widgets/icons/chatgpt.png +0 -0
  61. {io4it-2.1.1.4 → io4it-2.1.2.1}/orangecontrib/IO4IT/widgets/icons/check_pdf.png +0 -0
  62. {io4it-2.1.1.4 → io4it-2.1.2.1}/orangecontrib/IO4IT/widgets/icons/deepsearch.svg +0 -0
  63. {io4it-2.1.1.4 → io4it-2.1.2.1}/orangecontrib/IO4IT/widgets/icons/dep_md_old.png +0 -0
  64. {io4it-2.1.1.4 → io4it-2.1.2.1}/orangecontrib/IO4IT/widgets/icons/download.png +0 -0
  65. {io4it-2.1.1.4 → io4it-2.1.2.1}/orangecontrib/IO4IT/widgets/icons/export_md.png +0 -0
  66. {io4it-2.1.1.4 → io4it-2.1.2.1}/orangecontrib/IO4IT/widgets/icons/file_extensor.png +0 -0
  67. {io4it-2.1.1.4 → io4it-2.1.2.1}/orangecontrib/IO4IT/widgets/icons/list_aws.png +0 -0
  68. {io4it-2.1.1.4 → io4it-2.1.2.1}/orangecontrib/IO4IT/widgets/icons/load_md.png +0 -0
  69. {io4it-2.1.1.4 → io4it-2.1.2.1}/orangecontrib/IO4IT/widgets/icons/mail_loader.png +0 -0
  70. {io4it-2.1.1.4 → io4it-2.1.2.1}/orangecontrib/IO4IT/widgets/icons/mail_writer.png +0 -0
  71. {io4it-2.1.1.4 → io4it-2.1.2.1}/orangecontrib/IO4IT/widgets/icons/md.png +0 -0
  72. {io4it-2.1.1.4 → io4it-2.1.2.1}/orangecontrib/IO4IT/widgets/icons/monitor-email.svg +0 -0
  73. {io4it-2.1.1.4 → io4it-2.1.2.1}/orangecontrib/IO4IT/widgets/icons/office_normalizer.png +0 -0
  74. {io4it-2.1.1.4 → io4it-2.1.2.1}/orangecontrib/IO4IT/widgets/icons/process_pool_executor.png +0 -0
  75. {io4it-2.1.1.4 → io4it-2.1.2.1}/orangecontrib/IO4IT/widgets/icons/speech_to_text.png +0 -0
  76. {io4it-2.1.1.4 → io4it-2.1.2.1}/orangecontrib/IO4IT/widgets/icons/upload.png +0 -0
  77. {io4it-2.1.1.4 → io4it-2.1.2.1}/orangecontrib/IO4IT/widgets/icons/visualizationer.png +0 -0
  78. {io4it-2.1.1.4 → io4it-2.1.2.1}/orangecontrib/IO4IT/widgets/icons/wordpdf2docx.png +0 -0
  79. {io4it-2.1.1.4 → io4it-2.1.2.1}/orangecontrib/IO4IT/widgets/icons_dev/__init__.py +0 -0
  80. {io4it-2.1.1.4 → io4it-2.1.2.1}/orangecontrib/__init__.py +0 -0
  81. {io4it-2.1.1.4 → io4it-2.1.2.1}/setup.cfg +0 -0
@@ -1,13 +1,10 @@
1
- Metadata-Version: 2.1
1
+ Metadata-Version: 2.4
2
2
  Name: io4it
3
- Version: 2.1.1.4
4
- Summary: UNKNOWN
3
+ Version: 2.1.2.1
5
4
  Home-page:
6
5
  Author:
7
6
  Author-email:
8
- License: UNKNOWN
9
7
  Keywords: orange3 add-on
10
- Platform: UNKNOWN
11
8
  Requires-Dist: pylatexenc
12
9
  Requires-Dist: docopt
13
10
  Requires-Dist: boto3
@@ -29,5 +26,5 @@ Requires-Dist: doc2docx
29
26
  Requires-Dist: msal
30
27
  Requires-Dist: exchangelib
31
28
  Requires-Dist: CATEGORIT
32
-
33
- UNKNOWN
29
+ Dynamic: keywords
30
+ Dynamic: requires-dist
@@ -1,13 +1,10 @@
1
- Metadata-Version: 2.1
1
+ Metadata-Version: 2.4
2
2
  Name: io4it
3
- Version: 2.1.1.4
4
- Summary: UNKNOWN
3
+ Version: 2.1.2.1
5
4
  Home-page:
6
5
  Author:
7
6
  Author-email:
8
- License: UNKNOWN
9
7
  Keywords: orange3 add-on
10
- Platform: UNKNOWN
11
8
  Requires-Dist: pylatexenc
12
9
  Requires-Dist: docopt
13
10
  Requires-Dist: boto3
@@ -29,5 +26,5 @@ Requires-Dist: doc2docx
29
26
  Requires-Dist: msal
30
27
  Requires-Dist: exchangelib
31
28
  Requires-Dist: CATEGORIT
32
-
33
- UNKNOWN
29
+ Dynamic: keywords
30
+ Dynamic: requires-dist
@@ -19,6 +19,7 @@ orangecontrib/IO4IT/widgets/OWChatGpt.py
19
19
  orangecontrib/IO4IT/widgets/OWDeep_Search.py
20
20
  orangecontrib/IO4IT/widgets/OWDoclingMarkdownizerSimple.py
21
21
  orangecontrib/IO4IT/widgets/OWExportMarkdown.py
22
+ orangecontrib/IO4IT/widgets/OWExtractTablesDocxToCSV.py
22
23
  orangecontrib/IO4IT/widgets/OWInboxMailMonitoring.py
23
24
  orangecontrib/IO4IT/widgets/OWMarkdownLoader.py
24
25
  orangecontrib/IO4IT/widgets/OWMarkdownizer.py
@@ -33,6 +34,7 @@ orangecontrib/IO4IT/widgets/OWmailLoader.py
33
34
  orangecontrib/IO4IT/widgets/OWmailSender.py
34
35
  orangecontrib/IO4IT/widgets/OWwordpdf2docx.py
35
36
  orangecontrib/IO4IT/widgets/__init__.py
37
+ orangecontrib/IO4IT/widgets/designer/OWmailSender.py
36
38
  orangecontrib/IO4IT/widgets/designer/__init__.py
37
39
  orangecontrib/IO4IT/widgets/designer/nogui.ui
38
40
  orangecontrib/IO4IT/widgets/designer/ow_file_ext_selector.ui
@@ -40,6 +42,7 @@ orangecontrib/IO4IT/widgets/designer/owchatgpt.ui
40
42
  orangecontrib/IO4IT/widgets/designer/owdeepsearch.ui
41
43
  orangecontrib/IO4IT/widgets/designer/owdoclingasr.ui
42
44
  orangecontrib/IO4IT/widgets/designer/owdoclingmarkdownizersimple.ui
45
+ orangecontrib/IO4IT/widgets/designer/owdocxtocsv.ui
43
46
  orangecontrib/IO4IT/widgets/designer/owexportmarkdown.ui
44
47
  orangecontrib/IO4IT/widgets/designer/owinboxmailmonitoring.ui
45
48
  orangecontrib/IO4IT/widgets/designer/owmailloader.ui
@@ -59,6 +62,7 @@ orangecontrib/IO4IT/widgets/icons/deepsearch.svg
59
62
  orangecontrib/IO4IT/widgets/icons/dep_md_old.png
60
63
  orangecontrib/IO4IT/widgets/icons/download.png
61
64
  orangecontrib/IO4IT/widgets/icons/export_md.png
65
+ orangecontrib/IO4IT/widgets/icons/extract_table.png
62
66
  orangecontrib/IO4IT/widgets/icons/file_extensor.png
63
67
  orangecontrib/IO4IT/widgets/icons/list_aws.png
64
68
  orangecontrib/IO4IT/widgets/icons/load_md.png
@@ -9,7 +9,7 @@ from email.message import EmailMessage
9
9
  import mimetypes
10
10
 
11
11
  from exchangelib import Credentials, HTMLBody, Message, Mailbox
12
- from exchangelib.protocol import BaseProtocol, NoVerifyHTTPAdapter
12
+ #from exchangelib.protocol import BaseProtocol, NoVerifyHTTPAdapter
13
13
  from exchangelib import OAuth2Credentials, Identity, Configuration, Account, DELEGATE
14
14
 
15
15
 
@@ -2,7 +2,7 @@ import os
2
2
  import sys
3
3
  import base64
4
4
  import ast
5
- import openai
5
+ from openai import OpenAI
6
6
  import Orange
7
7
  from Orange.data import StringVariable
8
8
  from Orange.widgets.widget import OWWidget, Input, Output
@@ -98,30 +98,53 @@ class ChatGpt(OWWidget):
98
98
 
99
99
  def generate_answers(self):
100
100
  try:
101
- openai.api_key = self.api_keys
102
- response = openai.chat.completions.create(
103
- model=self.model,
104
- messages=[
105
- {
106
- "role": "system",
107
- "content": self.system_prompt
108
- },
109
- {
110
- "role": "user",
111
- "content": self.prompt
112
- }
113
- ],
114
- max_tokens=self.max_tokens,
115
- temperature=self.temperature
116
- )
117
- self.text_response = response.choices[0].message.content
101
+ client = OpenAI(api_key=self.api_keys)
102
+ system_content = []
103
+ if getattr(self, "system_prompt", None):
104
+ system_content = [{"type": "input_text", "text": str(self.system_prompt)}]
105
+
106
+ user_content = []
107
+ if isinstance(self.prompt, list):
108
+ user_content.extend(self.prompt)
109
+ else:
110
+ user_content.append({"type": "input_text", "text": str(self.prompt)})
111
+
112
+ if getattr(self, "image_paths", None):
113
+ # normalize image_paths to a list
114
+ if isinstance(self.image_paths, str):
115
+ self.image_paths = ast.literal_eval(self.image_paths)
116
+
117
+ for img_path in self.image_paths:
118
+ filename = os.path.basename(img_path)
119
+ user_content.append({"type": "input_text", "text": f"Photo : {filename}"})
120
+
121
+ with open(img_path, "rb") as f:
122
+ b64_img = base64.b64encode(f.read()).decode("utf-8")
123
+
124
+ mime = "image/png" if filename.lower().endswith(".png") else "image/jpeg"
125
+ user_content.append({
126
+ "type": "input_image",
127
+ "image_url": f"data:{mime};base64,{b64_img}",
128
+ })
129
+ response = client.responses.create(
130
+ model=self.model,
131
+ input=[
132
+ {"role": "system", "content": system_content},
133
+ {"role": "user", "content": user_content},
134
+ ],
135
+ max_output_tokens=self.max_tokens,
136
+ # temperature=self.temperature,
137
+ )
138
+ self.text_response = response.output_text
139
+
140
+
141
+ if self.text_response is None:
142
+ self.error("No response from model.")
143
+
118
144
  except Exception as e:
119
145
  print(e)
120
146
  self.error(f"Error: {e}")
121
147
  return
122
- if self.text_response is None:
123
- self.error("No response from chatgpt.")
124
-
125
148
 
126
149
  def run(self):
127
150
  self.error("")
@@ -137,30 +160,6 @@ class ChatGpt(OWWidget):
137
160
  self.error("No api keys provided.")
138
161
  return
139
162
 
140
- # si on relance la génération par le bouton le prompt est déjà rempli (déjà une liste)
141
- # mais on peut changer le model, la temp ou le nombre max de token
142
- if isinstance(self.prompt, list):
143
- self.prompt = [{"type": "text", "text": self.prompt}]
144
- if self.image_paths is not None and self.image_paths != []:
145
- if type(self.image_paths) == str:
146
- self.image_paths = ast.literal_eval(self.image_paths)
147
- for img_path in self.image_paths:
148
- filename = os.path.basename(img_path)
149
-
150
- # Ajoute une mention de l’image avant
151
- self.prompt.append({
152
- "type": "text",
153
- "text": f"Photo : {filename}"
154
- })
155
-
156
- with open(img_path, "rb") as f:
157
- b64_img = base64.b64encode(f.read()).decode("utf-8")
158
- self.prompt.append({
159
- "type": "image_url",
160
- "image_url": {
161
- "url": f"data:image/jpeg;base64,{b64_img}"
162
- }
163
- })
164
163
  self.progressBarInit()
165
164
  self.thread = thread_management.Thread(self.generate_answers)
166
165
  self.thread.progress.connect(self.handle_progress)
@@ -0,0 +1,261 @@
1
+ import os
2
+ import sys
3
+ import docx
4
+ import pandas as pd
5
+ import re
6
+ # Removed: import json
7
+
8
+ from AnyQt.QtWidgets import QApplication, QPushButton
9
+ from Orange.widgets import widget
10
+ from Orange.widgets.utils.signals import Input, Output
11
+ from Orange.data import Domain, StringVariable, Table, DiscreteVariable
12
+
13
+
14
+ if "site-packages/Orange/widgets" in os.path.dirname(os.path.abspath(__file__)).replace("\\", "/"):
15
+ from Orange.widgets.orangecontrib.AAIT.utils.import_uic import uic
16
+ from Orange.widgets.orangecontrib.AAIT.utils.initialize_from_ini import apply_modification_from_python_file
17
+ else:
18
+ from orangecontrib.AAIT.utils.import_uic import uic
19
+ from orangecontrib.AAIT.utils.initialize_from_ini import apply_modification_from_python_file
20
+
21
+ @apply_modification_from_python_file(filepath_original_widget=__file__)
22
+ class OWExtractTablesDocxToCSV(widget.OWWidget):
23
+ """
24
+ Orange Widget qui extrait les tableaux de documents Word (.docx) et les sauvegarde
25
+ en fichiers XLSX distincts (une table Word = un fichier XLSX).
26
+ """
27
+ name = "Docx to XLSX Tables (Full Tables)"
28
+ description = "Extrait toutes les tables de documents Word et les sauvegarde en fichiers XLSX distincts"
29
+ category = "AAIT - TOOLBOX"
30
+ icon = "icons/extract_table.png"
31
+ if "site-packages/Orange/widgets" in os.path.dirname(os.path.abspath(__file__)).replace("\\", "/"):
32
+ icon = "icons_dev/extract_table.png"
33
+ gui = os.path.join(os.path.dirname(os.path.abspath(__file__)), "designer/owdocxtocsv.ui")
34
+ want_control_area = False
35
+ priority = 1005
36
+
37
+ class Inputs:
38
+ data = Input("Files Table", Table)
39
+
40
+ class Outputs:
41
+ data = Output("Processed Files Table", Table)
42
+ status_data = Output("Status Table", Table)
43
+
44
+ def __init__(self):
45
+ super().__init__()
46
+ try:
47
+ uic.loadUi(self.gui, self)
48
+ except Exception as e:
49
+ self.warning(f"Impossible de charger le fichier UI. {e}")
50
+
51
+ class DummyCheckbox:
52
+ def stateChanged(self, *args): pass
53
+
54
+ self.checkBox_alpha_headers = DummyCheckbox()
55
+ self.gui = None
56
+
57
+ # Connexion du bouton d'exécution
58
+ self.pushButton_run = self.findChild(QPushButton, "pushButton_run")
59
+ if self.pushButton_run:
60
+ self.pushButton_run.clicked.connect(self.run)
61
+
62
+ self.data = None
63
+ self.autorun = True
64
+ self.processed_statuses = []
65
+ self.use_alpha_headers = False
66
+ if self.gui:
67
+ self.checkBox_alpha_headers.stateChanged.connect(self._update_alpha_headers_state)
68
+
69
+ self.post_initialized()
70
+
71
+ def _update_alpha_headers_state(self, state):
72
+ self.use_alpha_headers = bool(state)
73
+
74
+ @Inputs.data
75
+ def set_data(self, in_data: Table | None):
76
+ self.data = in_data
77
+ if self.autorun:
78
+ self.run()
79
+
80
+ def run(self):
81
+ if self.data is None:
82
+ self.Outputs.data.send(None)
83
+ self.Outputs.status_data.send(None)
84
+ return
85
+
86
+ self.error("")
87
+ try:
88
+ self.data.domain["file_path"]
89
+ except KeyError:
90
+ self.error("Le tableau d'entrée doit contenir une colonne 'file_path'.")
91
+ self.Outputs.data.send(None)
92
+ self.Outputs.status_data.send(None)
93
+ return
94
+
95
+ self.progressBarInit()
96
+ self.processed_statuses = []
97
+ self.Outputs.status_data.send(None)
98
+
99
+ result_rows = self._process_files(self.data)
100
+
101
+ output_domain = Domain([], metas=[
102
+ StringVariable("src_path"),
103
+ StringVariable("output_dir_path"),
104
+ StringVariable("status")
105
+ ])
106
+ result_table = Table.from_list(output_domain, result_rows)
107
+ self.Outputs.data.send(result_table)
108
+
109
+ self.progressBarFinished()
110
+
111
+ def _process_files(self, in_data: Table) -> list:
112
+ result_rows = []
113
+ file_paths = [str(x) for x in in_data.get_column("file_path")]
114
+ total_files = len(file_paths)
115
+
116
+ if not file_paths:
117
+ return []
118
+
119
+ for i, full_path in enumerate(file_paths):
120
+ self.progressBarSet((i + 1) / total_files * 100)
121
+
122
+ status_short = "ko"
123
+ details = "traitement échoué"
124
+ output_dir_path = ""
125
+
126
+ if not full_path.lower().endswith('.docx'):
127
+ status_short = "skipped"
128
+ details = "Fichier ignoré : n'est pas un fichier .docx."
129
+ output_dir_path = "N/A"
130
+ self.processed_statuses.append([full_path, status_short, details])
131
+ self._send_status_table()
132
+ result_rows.append([full_path, output_dir_path, f"{status_short}: {details}"])
133
+ QApplication.processEvents()
134
+ continue
135
+
136
+ try:
137
+ tables_found, output_dir_path = self._extraire_et_convertir(full_path)
138
+
139
+ if tables_found > 0:
140
+ status_short = "ok"
141
+ details = f"{tables_found} table(s) extraite(s) et convertie(s) en XLSX."
142
+ else:
143
+ status_short = "ko"
144
+ details = "Aucune table valide trouvée."
145
+
146
+ except FileNotFoundError:
147
+ details = "Fichier non trouvé."
148
+ except Exception as e:
149
+ details = f"Une erreur inattendue est survenue : {e}"
150
+
151
+ self.processed_statuses.append([full_path, status_short, details])
152
+ self._send_status_table()
153
+
154
+ result_rows.append([full_path, output_dir_path, f"{status_short}: {details}"])
155
+
156
+ QApplication.processEvents()
157
+
158
+ return result_rows
159
+
160
+ def _extraire_et_convertir(self, docx_path):
161
+ """
162
+ Extrait les tableaux d'un document Word et sauvegarde chaque table entière en XLSX.
163
+ Retourne (nombre_de_tables_trouvées, chemin_dossier_sortie).
164
+ """
165
+ dir_name, file_name = os.path.split(docx_path)
166
+ base_name, _ = os.path.splitext(file_name)
167
+
168
+ output_dir = os.path.join(dir_name, base_name + '_tables_data')
169
+ os.makedirs(output_dir, exist_ok=True)
170
+
171
+ doc = docx.Document(docx_path)
172
+ total_tables_found = 0
173
+
174
+ for i, table in enumerate(doc.tables):
175
+ raw_data = []
176
+ for row in table.rows:
177
+ # Extraction des données de toute la table
178
+ row_data = [cell.text.strip() for cell in row.cells]
179
+ raw_data.append(row_data)
180
+
181
+ # S'assurer qu'il y a des données non vides dans la table
182
+ if not raw_data or not any(row for row in raw_data):
183
+ continue
184
+
185
+ # --- PAS DE DÉCOUPAGE ---
186
+
187
+ table_index = i + 1
188
+ # Utilisation de 'a' comme suffixe pour le nom de fichier
189
+ table_name = f"table_{table_index}_a"
190
+
191
+ df = self._create_dataframe(raw_data)
192
+
193
+ if df is not None:
194
+ self._save_sub_table(df, output_dir, table_name)
195
+ total_tables_found += 1
196
+
197
+ return total_tables_found, output_dir
198
+
199
+ def _create_dataframe(self, data):
200
+ """
201
+ Crée le DataFrame à partir des lignes brutes.
202
+ """
203
+ # Nettoyer les lignes vides
204
+ data = [row for row in data if row and any(cell.strip() for cell in row)]
205
+ if not data:
206
+ return None
207
+
208
+ max_cols = max(len(row) for row in data)
209
+ data = [row + [''] * (max_cols - len(row)) for row in data]
210
+
211
+ if self.use_alpha_headers:
212
+ # Cas A : En-têtes alphabétiques. Toutes les lignes sont des données.
213
+ headers = [chr(ord('A') + j) for j in range(max_cols)]
214
+ df = pd.DataFrame(data, columns=headers)
215
+ else:
216
+ # Cas B : Première ligne comme en-tête.
217
+
218
+ if len(data) == 1:
219
+ # Si le segment n'a qu'une seule ligne, on utilise des en-têtes alphabétiques.
220
+ headers = [chr(ord('A') + j) for j in range(max_cols)]
221
+ df = pd.DataFrame(data, columns=headers)
222
+ else:
223
+ # Cas standard : première ligne = en-tête, reste = données.
224
+ headers = data[0]
225
+ data_rows = data[1:]
226
+
227
+ min_cols = min(len(headers), max_cols)
228
+
229
+ df = pd.DataFrame(data_rows, columns=headers[:min_cols])
230
+
231
+ df.columns = df.columns.astype(str)
232
+
233
+ return df
234
+
235
+ def _save_sub_table(self, df, output_dir, table_full_name):
236
+ """Sauvegarde le DataFrame exclusivement en XLSX."""
237
+
238
+ output_xlsx_path = os.path.join(output_dir, f"{table_full_name}.xlsx")
239
+ try:
240
+ df.to_excel(output_xlsx_path, index=False, engine='openpyxl')
241
+ except Exception as e:
242
+ self.warning(f"Impossible de sauvegarder la table '{table_full_name}' en format XLSX : {e}")
243
+
244
+ def _send_status_table(self):
245
+ domain = Domain([], metas=[
246
+ StringVariable("src_path"),
247
+ DiscreteVariable("status", values=["ok", "ko", "skipped"]),
248
+ StringVariable("details")
249
+ ])
250
+ status_table = Table.from_list(domain, self.processed_statuses)
251
+ self.Outputs.status_data.send(status_table)
252
+
253
+ def post_initialized(self):
254
+ pass
255
+
256
+
257
+ if __name__ == "__main__":
258
+ app = QApplication(sys.argv)
259
+ my_widget = OWExtractTablesDocxToCSV()
260
+ my_widget.show()
261
+ app.exec()
@@ -1,7 +1,7 @@
1
1
  import os
2
2
  import sys
3
3
  import Orange.data
4
- from AnyQt.QtWidgets import QPushButton, QApplication, QRadioButton, QComboBox,QCheckBox,QLineEdit
4
+ from AnyQt.QtWidgets import QPushButton, QApplication, QRadioButton, QComboBox
5
5
  from Orange.widgets import widget
6
6
  from Orange.widgets.utils.signals import Input, Output
7
7
  from Orange.widgets.settings import Setting
@@ -16,7 +16,6 @@ import easyocr
16
16
 
17
17
  from AnyQt.QtCore import QThread, pyqtSignal
18
18
  from AnyQt.QtWidgets import QApplication, QLabel, QSpinBox, QTextEdit, QPushButton
19
- from AnyQt import uic
20
19
 
21
20
  from Orange.widgets import widget
22
21
  from Orange.widgets.utils.signals import Input, Output