io4it 2.1.0.4__tar.gz → 2.1.0.5__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (76) hide show
  1. io4it-2.1.0.5/PKG-INFO +7 -0
  2. io4it-2.1.0.5/io4it.egg-info/PKG-INFO +7 -0
  3. {io4it-2.1.0.4 → io4it-2.1.0.5}/io4it.egg-info/SOURCES.txt +15 -2
  4. {io4it-2.1.0.4 → io4it-2.1.0.5}/orangecontrib/IO4IT/ocr_function/word_converter.py +12 -12
  5. {io4it-2.1.0.4 → io4it-2.1.0.5}/orangecontrib/IO4IT/widgets/OWDeep_Search.py +1 -1
  6. io4it-2.1.0.5/orangecontrib/IO4IT/widgets/OWDoclingMarkdownizerSimple.py +281 -0
  7. {io4it-2.1.0.4 → io4it-2.1.0.5}/orangecontrib/IO4IT/widgets/OWExportMarkdown.py +1 -1
  8. io4it-2.1.0.5/orangecontrib/IO4IT/widgets/OWMarkdownLoader.py +112 -0
  9. {io4it-2.1.0.4 → io4it-2.1.0.5}/orangecontrib/IO4IT/widgets/OWMarkdownizer.py +2 -2
  10. io4it-2.1.0.5/orangecontrib/IO4IT/widgets/OWOfficeNormalizer.py +183 -0
  11. io4it-2.1.0.5/orangecontrib/IO4IT/widgets/OWPdfType.py +193 -0
  12. io4it-2.1.0.5/orangecontrib/IO4IT/widgets/OWProcessPoolExecutor.py +87 -0
  13. io4it-2.1.0.5/orangecontrib/IO4IT/widgets/designer/owdoclingmarkdownizersimple.ui +54 -0
  14. io4it-2.1.0.5/orangecontrib/IO4IT/widgets/designer/owmarkdownloader.ui +54 -0
  15. io4it-2.1.0.5/orangecontrib/IO4IT/widgets/designer/owofficenormalizer.ui +54 -0
  16. io4it-2.1.0.5/orangecontrib/IO4IT/widgets/designer/owpdftype.ui +54 -0
  17. io4it-2.1.0.5/orangecontrib/IO4IT/widgets/designer/owprocesspoolexecutor.ui +100 -0
  18. {io4it-2.1.0.4 → io4it-2.1.0.5}/orangecontrib/IO4IT/widgets/designer/wordpdf2docx.ui +16 -0
  19. io4it-2.1.0.5/orangecontrib/IO4IT/widgets/icons/check_pdf.png +0 -0
  20. io4it-2.1.0.5/orangecontrib/IO4IT/widgets/icons/load_md.png +0 -0
  21. io4it-2.1.0.5/orangecontrib/IO4IT/widgets/icons/md_old.png +0 -0
  22. io4it-2.1.0.5/orangecontrib/IO4IT/widgets/icons/office_normalizer.png +0 -0
  23. io4it-2.1.0.5/orangecontrib/IO4IT/widgets/icons/process_pool_executor.png +0 -0
  24. {io4it-2.1.0.4 → io4it-2.1.0.5}/setup.py +1 -1
  25. io4it-2.1.0.4/PKG-INFO +0 -33
  26. io4it-2.1.0.4/io4it.egg-info/PKG-INFO +0 -33
  27. io4it-2.1.0.4/orangecontrib/IO4IT/widgets/designer/chart.html +0 -281
  28. io4it-2.1.0.4/orangecontrib/IO4IT/widgets/designer/owvisualizationer.ui +0 -56
  29. {io4it-2.1.0.4 → io4it-2.1.0.5}/io4it.egg-info/dependency_links.txt +0 -0
  30. {io4it-2.1.0.4 → io4it-2.1.0.5}/io4it.egg-info/entry_points.txt +0 -0
  31. {io4it-2.1.0.4 → io4it-2.1.0.5}/io4it.egg-info/namespace_packages.txt +0 -0
  32. {io4it-2.1.0.4 → io4it-2.1.0.5}/io4it.egg-info/requires.txt +0 -0
  33. {io4it-2.1.0.4 → io4it-2.1.0.5}/io4it.egg-info/top_level.txt +0 -0
  34. {io4it-2.1.0.4 → io4it-2.1.0.5}/orangecontrib/IO4IT/__init__.py +0 -0
  35. {io4it-2.1.0.4 → io4it-2.1.0.5}/orangecontrib/IO4IT/ocr_function/__init__.py +0 -0
  36. {io4it-2.1.0.4 → io4it-2.1.0.5}/orangecontrib/IO4IT/utils/__init__.py +0 -0
  37. {io4it-2.1.0.4 → io4it-2.1.0.5}/orangecontrib/IO4IT/utils/mail.py +0 -0
  38. {io4it-2.1.0.4 → io4it-2.1.0.5}/orangecontrib/IO4IT/utils/offuscation_basique.py +0 -0
  39. {io4it-2.1.0.4 → io4it-2.1.0.5}/orangecontrib/IO4IT/utils/pool_exec_utils.py +0 -0
  40. {io4it-2.1.0.4 → io4it-2.1.0.5}/orangecontrib/IO4IT/utils/utils_md.py +0 -0
  41. {io4it-2.1.0.4 → io4it-2.1.0.5}/orangecontrib/IO4IT/widgets/OWChatGpt.py +0 -0
  42. {io4it-2.1.0.4 → io4it-2.1.0.5}/orangecontrib/IO4IT/widgets/OWS3Uploader.py +0 -0
  43. {io4it-2.1.0.4 → io4it-2.1.0.5}/orangecontrib/IO4IT/widgets/OWS3downloader.py +0 -0
  44. {io4it-2.1.0.4 → io4it-2.1.0.5}/orangecontrib/IO4IT/widgets/OWS3list.py +0 -0
  45. {io4it-2.1.0.4 → io4it-2.1.0.5}/orangecontrib/IO4IT/widgets/OWSpeechToText.py +0 -0
  46. {io4it-2.1.0.4 → io4it-2.1.0.5}/orangecontrib/IO4IT/widgets/OWmailLoader.py +0 -0
  47. {io4it-2.1.0.4 → io4it-2.1.0.5}/orangecontrib/IO4IT/widgets/OWmailSender.py +0 -0
  48. {io4it-2.1.0.4 → io4it-2.1.0.5}/orangecontrib/IO4IT/widgets/OWwordpdf2docx.py +0 -0
  49. {io4it-2.1.0.4 → io4it-2.1.0.5}/orangecontrib/IO4IT/widgets/__init__.py +0 -0
  50. {io4it-2.1.0.4 → io4it-2.1.0.5}/orangecontrib/IO4IT/widgets/designer/__init__.py +0 -0
  51. {io4it-2.1.0.4 → io4it-2.1.0.5}/orangecontrib/IO4IT/widgets/designer/nogui.ui +0 -0
  52. {io4it-2.1.0.4 → io4it-2.1.0.5}/orangecontrib/IO4IT/widgets/designer/ow_file_ext_selector.ui +0 -0
  53. {io4it-2.1.0.4 → io4it-2.1.0.5}/orangecontrib/IO4IT/widgets/designer/owchatgpt.ui +0 -0
  54. {io4it-2.1.0.4 → io4it-2.1.0.5}/orangecontrib/IO4IT/widgets/designer/owdeepsearch.ui +0 -0
  55. {io4it-2.1.0.4 → io4it-2.1.0.5}/orangecontrib/IO4IT/widgets/designer/owexportmarkdown.ui +0 -0
  56. {io4it-2.1.0.4 → io4it-2.1.0.5}/orangecontrib/IO4IT/widgets/designer/owmailloader.ui +0 -0
  57. {io4it-2.1.0.4 → io4it-2.1.0.5}/orangecontrib/IO4IT/widgets/designer/owmailsender.ui +0 -0
  58. {io4it-2.1.0.4 → io4it-2.1.0.5}/orangecontrib/IO4IT/widgets/designer/owmarkdownizer.ui +0 -0
  59. {io4it-2.1.0.4 → io4it-2.1.0.5}/orangecontrib/IO4IT/widgets/designer/owspeechtotext.ui +0 -0
  60. {io4it-2.1.0.4 → io4it-2.1.0.5}/orangecontrib/IO4IT/widgets/icons/__init__.py +0 -0
  61. {io4it-2.1.0.4 → io4it-2.1.0.5}/orangecontrib/IO4IT/widgets/icons/chatgpt.png +0 -0
  62. {io4it-2.1.0.4 → io4it-2.1.0.5}/orangecontrib/IO4IT/widgets/icons/deepsearch.svg +0 -0
  63. {io4it-2.1.0.4 → io4it-2.1.0.5}/orangecontrib/IO4IT/widgets/icons/download.png +0 -0
  64. {io4it-2.1.0.4 → io4it-2.1.0.5}/orangecontrib/IO4IT/widgets/icons/export_md.png +0 -0
  65. {io4it-2.1.0.4 → io4it-2.1.0.5}/orangecontrib/IO4IT/widgets/icons/file_extensor.png +0 -0
  66. {io4it-2.1.0.4 → io4it-2.1.0.5}/orangecontrib/IO4IT/widgets/icons/list_aws.png +0 -0
  67. {io4it-2.1.0.4 → io4it-2.1.0.5}/orangecontrib/IO4IT/widgets/icons/mail_loader.png +0 -0
  68. {io4it-2.1.0.4 → io4it-2.1.0.5}/orangecontrib/IO4IT/widgets/icons/mail_writer.png +0 -0
  69. {io4it-2.1.0.4 → io4it-2.1.0.5}/orangecontrib/IO4IT/widgets/icons/md.png +0 -0
  70. {io4it-2.1.0.4 → io4it-2.1.0.5}/orangecontrib/IO4IT/widgets/icons/speech_to_text.png +0 -0
  71. {io4it-2.1.0.4 → io4it-2.1.0.5}/orangecontrib/IO4IT/widgets/icons/upload.png +0 -0
  72. {io4it-2.1.0.4 → io4it-2.1.0.5}/orangecontrib/IO4IT/widgets/icons/visualizationer.png +0 -0
  73. {io4it-2.1.0.4 → io4it-2.1.0.5}/orangecontrib/IO4IT/widgets/icons/wordpdf2docx.png +0 -0
  74. {io4it-2.1.0.4 → io4it-2.1.0.5}/orangecontrib/IO4IT/widgets/icons_dev/__init__.py +0 -0
  75. {io4it-2.1.0.4 → io4it-2.1.0.5}/orangecontrib/__init__.py +0 -0
  76. {io4it-2.1.0.4 → io4it-2.1.0.5}/setup.cfg +0 -0
io4it-2.1.0.5/PKG-INFO ADDED
@@ -0,0 +1,7 @@
1
+ Metadata-Version: 2.1
2
+ Name: io4it
3
+ Version: 2.1.0.5
4
+ Home-page:
5
+ Author:
6
+ Author-email:
7
+ Keywords: orange3 add-on
@@ -0,0 +1,7 @@
1
+ Metadata-Version: 2.1
2
+ Name: io4it
3
+ Version: 2.1.0.5
4
+ Home-page:
5
+ Author:
6
+ Author-email:
7
+ Keywords: orange3 add-on
@@ -17,8 +17,13 @@ orangecontrib/IO4IT/utils/pool_exec_utils.py
17
17
  orangecontrib/IO4IT/utils/utils_md.py
18
18
  orangecontrib/IO4IT/widgets/OWChatGpt.py
19
19
  orangecontrib/IO4IT/widgets/OWDeep_Search.py
20
+ orangecontrib/IO4IT/widgets/OWDoclingMarkdownizerSimple.py
20
21
  orangecontrib/IO4IT/widgets/OWExportMarkdown.py
22
+ orangecontrib/IO4IT/widgets/OWMarkdownLoader.py
21
23
  orangecontrib/IO4IT/widgets/OWMarkdownizer.py
24
+ orangecontrib/IO4IT/widgets/OWOfficeNormalizer.py
25
+ orangecontrib/IO4IT/widgets/OWPdfType.py
26
+ orangecontrib/IO4IT/widgets/OWProcessPoolExecutor.py
22
27
  orangecontrib/IO4IT/widgets/OWS3Uploader.py
23
28
  orangecontrib/IO4IT/widgets/OWS3downloader.py
24
29
  orangecontrib/IO4IT/widgets/OWS3list.py
@@ -28,28 +33,36 @@ orangecontrib/IO4IT/widgets/OWmailSender.py
28
33
  orangecontrib/IO4IT/widgets/OWwordpdf2docx.py
29
34
  orangecontrib/IO4IT/widgets/__init__.py
30
35
  orangecontrib/IO4IT/widgets/designer/__init__.py
31
- orangecontrib/IO4IT/widgets/designer/chart.html
32
36
  orangecontrib/IO4IT/widgets/designer/nogui.ui
33
37
  orangecontrib/IO4IT/widgets/designer/ow_file_ext_selector.ui
34
38
  orangecontrib/IO4IT/widgets/designer/owchatgpt.ui
35
39
  orangecontrib/IO4IT/widgets/designer/owdeepsearch.ui
40
+ orangecontrib/IO4IT/widgets/designer/owdoclingmarkdownizersimple.ui
36
41
  orangecontrib/IO4IT/widgets/designer/owexportmarkdown.ui
37
42
  orangecontrib/IO4IT/widgets/designer/owmailloader.ui
38
43
  orangecontrib/IO4IT/widgets/designer/owmailsender.ui
39
44
  orangecontrib/IO4IT/widgets/designer/owmarkdownizer.ui
45
+ orangecontrib/IO4IT/widgets/designer/owmarkdownloader.ui
46
+ orangecontrib/IO4IT/widgets/designer/owofficenormalizer.ui
47
+ orangecontrib/IO4IT/widgets/designer/owpdftype.ui
48
+ orangecontrib/IO4IT/widgets/designer/owprocesspoolexecutor.ui
40
49
  orangecontrib/IO4IT/widgets/designer/owspeechtotext.ui
41
- orangecontrib/IO4IT/widgets/designer/owvisualizationer.ui
42
50
  orangecontrib/IO4IT/widgets/designer/wordpdf2docx.ui
43
51
  orangecontrib/IO4IT/widgets/icons/__init__.py
44
52
  orangecontrib/IO4IT/widgets/icons/chatgpt.png
53
+ orangecontrib/IO4IT/widgets/icons/check_pdf.png
45
54
  orangecontrib/IO4IT/widgets/icons/deepsearch.svg
46
55
  orangecontrib/IO4IT/widgets/icons/download.png
47
56
  orangecontrib/IO4IT/widgets/icons/export_md.png
48
57
  orangecontrib/IO4IT/widgets/icons/file_extensor.png
49
58
  orangecontrib/IO4IT/widgets/icons/list_aws.png
59
+ orangecontrib/IO4IT/widgets/icons/load_md.png
50
60
  orangecontrib/IO4IT/widgets/icons/mail_loader.png
51
61
  orangecontrib/IO4IT/widgets/icons/mail_writer.png
52
62
  orangecontrib/IO4IT/widgets/icons/md.png
63
+ orangecontrib/IO4IT/widgets/icons/md_old.png
64
+ orangecontrib/IO4IT/widgets/icons/office_normalizer.png
65
+ orangecontrib/IO4IT/widgets/icons/process_pool_executor.png
53
66
  orangecontrib/IO4IT/widgets/icons/speech_to_text.png
54
67
  orangecontrib/IO4IT/widgets/icons/upload.png
55
68
  orangecontrib/IO4IT/widgets/icons/visualizationer.png
@@ -16,7 +16,7 @@ def enable_long_path(path):
16
16
  """Simplifie la gestion des chemins longs sous Windows."""
17
17
  return pathlib.Path(r"\\?\\" + str(path))
18
18
 
19
- def convert_pdf_structure(input_dir: str, output_dir: str,ignore_exsting_out_put=False,progress_callback=None):
19
+ def convert_pdf_structure(input_dir: str, output_dir: str,ignore_exsting_out_put=False,forceBasicConvertion=False,progress_callback=None):
20
20
  """
21
21
  return a string with log in case of error
22
22
  Recursively lists all .pdf and .PDF files in the input directory,
@@ -60,7 +60,7 @@ def convert_pdf_structure(input_dir: str, output_dir: str,ignore_exsting_out_put
60
60
  continue
61
61
 
62
62
 
63
- if 0!= convert_pdf_with_temp(str(pdf_file),str(new_file_path)):#convert_pdf_with_temp #convert_pdf_to_docx
63
+ if 0!= convert_pdf_with_temp(str(pdf_file),str(new_file_path),forceBasicConvertion):#convert_pdf_with_temp #convert_pdf_to_docx
64
64
  if error_log!="":
65
65
  error_log+="\n"
66
66
  error_log+="error -> "+str(pdf_file)
@@ -259,7 +259,7 @@ def write_two_strings_to_file(file_path: str,string1: str, string2: str):
259
259
  return 0
260
260
 
261
261
 
262
- def convert_pdf_with_temp(temp_pdf, output_path):
262
+ def convert_pdf_with_temp(temp_pdf, output_path,forceBasicConvertion=False):
263
263
  """
264
264
  Copie le PDF source dans un dossier temporaire, le convertit en DOCX,
265
265
  puis copie le fichier résultant vers le chemin de sortie spécifié,
@@ -294,15 +294,15 @@ def convert_pdf_with_temp(temp_pdf, output_path):
294
294
  # Copie du fichier source vers le dossier temporaire
295
295
  shutil.copy2(pdf_path, temp_pdf)
296
296
  wait_for_file_access(temp_pdf)
297
-
298
- if is_pdf_a4(temp_pdf)==False:
299
- temp_pdf2 = os.path.join(dest_dir, "input_totoA4.pdf")
300
- if 0!=convert_pdf_to_a4(temp_pdf,temp_pdf2):
301
- print("erreur au resize du pdf")
302
- return 1
303
- temp_pdf=temp_pdf2
304
- wait_for_file_access(temp_pdf)
305
- time.sleep(1)
297
+ if forceBasicConvertion==False:
298
+ if is_pdf_a4(temp_pdf)==False:
299
+ temp_pdf2 = os.path.join(dest_dir, "input_totoA4.pdf")
300
+ if 0!=convert_pdf_to_a4(temp_pdf,temp_pdf2):
301
+ print("erreur au resize du pdf")
302
+ return 1
303
+ temp_pdf=temp_pdf2
304
+ wait_for_file_access(temp_pdf)
305
+ time.sleep(1)
306
306
  result=0
307
307
  # Conversion du PDF en DOCX
308
308
  for _ in range(4):
@@ -30,7 +30,7 @@ class OWDeep_Search(widget.OWWidget):
30
30
  category = "AAIT - LLM INTEGRATION"
31
31
  icon = "icons/deepsearch.svg"
32
32
  if "site-packages/Orange/widgets" in os.path.dirname(os.path.abspath(__file__)).replace("\\", "/"):
33
- icon = "icons_dev/owqueryllm.svg"
33
+ icon = "icons_dev/deepsearch.svg"
34
34
  gui = os.path.join(os.path.dirname(os.path.abspath(__file__)), "designer/owdeepsearch.ui")
35
35
  want_control_area = True
36
36
  priority = 1089
@@ -0,0 +1,281 @@
1
+ import os, time, datetime
2
+ from pathlib import Path
3
+ from concurrent.futures import as_completed
4
+
5
+ from AnyQt.QtWidgets import QLabel, QTextEdit
6
+ from AnyQt.QtCore import pyqtSignal
7
+ from Orange.widgets import widget
8
+ from Orange.widgets.utils.signals import Input, Output
9
+ from Orange.data import Domain, StringVariable, Table, DiscreteVariable
10
+
11
+ # --- Docling (unique lib utilisée pour la conversion) ---
12
+ from docling.backend.pypdfium2_backend import PyPdfiumDocumentBackend
13
+ from docling.datamodel.base_models import InputFormat
14
+ from docling.document_converter import (
15
+ DocumentConverter,
16
+ PdfFormatOption,
17
+ WordFormatOption,
18
+ )
19
+ from docling.pipeline.simple_pipeline import SimplePipeline
20
+ from docling.pipeline.standard_pdf_pipeline import StandardPdfPipeline
21
+
22
+ # --- Orange contrib Imports ---
23
+ if "site-packages/Orange/widgets" in os.path.dirname(os.path.abspath(__file__)).replace("\\", "/"):
24
+ from Orange.widgets.orangecontrib.AAIT.utils.thread_management import Thread
25
+ from Orange.widgets.orangecontrib.AAIT.utils.import_uic import uic
26
+ else:
27
+ from orangecontrib.AAIT.utils.thread_management import Thread
28
+ from orangecontrib.AAIT.utils.import_uic import uic
29
+
30
+
31
+ # --------- worker stateless : convertit 1 fichier avec Docling ----------
32
+ def _convert_one_file(file_path_str: str):
33
+ """Convertit un fichier (PDF/DOCX/PPTX) en Markdown via Docling.
34
+ Écrit <parent>/a_md/<stem>.md et renvoie [input_path, output_md, status, duration_sec, message].
35
+ Pensé pour être appelé soit directement, soit via ProcessPoolExecutor.
36
+ """
37
+ t0 = time.time()
38
+ src = Path(file_path_str)
39
+ out_dir = src.parent / "conversion_markdown"
40
+ out_dir.mkdir(parents=True, exist_ok=True)
41
+ out_md = out_dir / f"{src.stem}.md"
42
+
43
+ # Si déjà converti : on ne refait pas
44
+ if out_md.exists():
45
+ status = "ok"
46
+ message = "existant: deja converti"
47
+ duration = time.time() - t0
48
+ return [str(src), str(out_md), status, f"{duration:.2f}", message]
49
+
50
+ try:
51
+ # Docling minimal config (inspiré du snippet)
52
+ doc_converter = DocumentConverter(
53
+ allowed_formats=[InputFormat.PDF, InputFormat.DOCX, InputFormat.PPTX],
54
+ format_options={
55
+ InputFormat.PDF: PdfFormatOption(
56
+ pipeline_cls=StandardPdfPipeline,
57
+ backend=PyPdfiumDocumentBackend
58
+ ),
59
+ InputFormat.DOCX: WordFormatOption(
60
+ pipeline_cls=SimplePipeline
61
+ ),
62
+ # PPTX: pas d'option spécifique; géré par défaut
63
+ },
64
+ )
65
+ doc = doc_converter.convert(str(src)).document
66
+ md = doc.export_to_markdown()
67
+ out_md.write_text(md, encoding="utf-8")
68
+ status, message = "ok", ""
69
+ except Exception as e:
70
+ status = "nok"
71
+ message = f"{type(e).__name__}: {e}"
72
+ # on écrit quand même un trace .md
73
+ try:
74
+ out_md.write_text(f"[Erreur conversion] {message}", encoding="utf-8")
75
+ except Exception:
76
+ pass
77
+
78
+ duration = time.time() - t0
79
+ return [str(src), str(out_md), status, f"{duration:.2f}", message]
80
+
81
+
82
+ class OWDoclingMarkdownizerSimple(widget.OWWidget):
83
+ name = "Docling To Markdown"
84
+ description = "Convert DOCX/PPTX/PDF to Markdown via Docling"
85
+ icon = "icons/md.png"
86
+ if "site-packages/Orange/widgets" in os.path.dirname(os.path.abspath(__file__)).replace("\\", "/"):
87
+ icon = "icons_dev/md.png"
88
+ gui = os.path.join(os.path.dirname(os.path.abspath(__file__)), "designer/owdoclingmarkdownizersimple.ui")
89
+ want_main_area = False
90
+ want_control_area = True
91
+ priority = 1004
92
+
93
+ # --- New signal for incremental status updates ---
94
+ status_update_signal = pyqtSignal(list)
95
+
96
+ class Inputs:
97
+ data = Input("Files Table", Table)
98
+ executor = Input("ProcessPoolExecutor", object)
99
+
100
+ class Outputs:
101
+ data = Output("Markdown Table", Table)
102
+ status_data = Output("Status Table", Table)
103
+
104
+ def __init__(self):
105
+ super().__init__()
106
+ self.data = None
107
+ self.external_executor = None
108
+ self.thread = None
109
+ self.result = None
110
+ self.exec_info = QLabel("Exécution: séquentielle (aucun executor connecté).", self)
111
+ self.processed_statuses = {} # Dictionary to accumulate statuses for each file
112
+
113
+ uic.loadUi(self.gui, self)
114
+
115
+ self.error("")
116
+ self.warning("")
117
+
118
+ @Inputs.data
119
+ def set_data(self, in_data: Table | None):
120
+ self.data = in_data
121
+ self.error("")
122
+ self.warning("")
123
+
124
+ if not in_data:
125
+ self.Outputs.data.send(None)
126
+ self.Outputs.status_data.send(None)
127
+ return
128
+
129
+ try:
130
+ _ = in_data.domain["file_path"]
131
+ except Exception:
132
+ self.error('Colonne "file_path" (Text) requise.')
133
+ self.Outputs.data.send(None)
134
+ self.Outputs.status_data.send(None)
135
+ return
136
+
137
+ self._convert_now()
138
+
139
+ @Inputs.executor
140
+ def set_executor(self, executor_obj):
141
+ self.external_executor = executor_obj
142
+ if executor_obj is not None:
143
+ self.exec_info.setText("Exécution: via ProcessPoolExecutor externe (parallèle).")
144
+ else:
145
+ self.exec_info.setText("Exécution: séquentielle (aucun executor connecté).")
146
+
147
+ def _convert_now(self):
148
+ if self.thread is not None and self.thread.isRunning():
149
+ self.thread.safe_quit()
150
+
151
+ if not self.data:
152
+ self.Outputs.data.send(None)
153
+ self.Outputs.status_data.send(None)
154
+ return
155
+
156
+ # Start progress bar
157
+ self.progressBarInit()
158
+
159
+ # Récupère les chemins et filtre par extensions supportées
160
+ try:
161
+ paths = [Path(str(x)) for x in self.data.get_column("file_path")]
162
+ except Exception as e:
163
+ self.error(f"Lecture de 'file_path' impossible: {e}")
164
+ self.Outputs.data.send(None)
165
+ self.Outputs.status_data.send(None)
166
+ return
167
+
168
+ files = [p for p in paths if p.exists() and p.suffix.lower() in (".pdf", ".docx", ".pptx")]
169
+ if not files:
170
+ self.Outputs.data.send(None)
171
+ self.Outputs.status_data.send(None)
172
+ self.progressBarFinished()
173
+ return
174
+
175
+ # Initialize status dictionary for incremental updates
176
+ self.processed_statuses = {str(p): ["pending", ""] for p in files}
177
+ self.update_status_output()
178
+
179
+ # Connect internal signal
180
+ self.status_update_signal.connect(self.handle_status_update)
181
+
182
+ # Connect and start thread for the main function
183
+ self.thread = Thread(self._run_conversion, files)
184
+ self.thread.progress.connect(self.handle_progress)
185
+ self.thread.result.connect(self.handle_result)
186
+ self.thread.finish.connect(self.handle_finish)
187
+ self.thread.start()
188
+
189
+ def update_status_output(self):
190
+ """Helper function to create and send the status table."""
191
+ status_domain = Domain(
192
+ [], # This list must be empty because the table has no attributes.
193
+ metas=[
194
+ StringVariable("input_path"),
195
+ DiscreteVariable("status", values=["pending", "in_progress", "ok", "nok"]),
196
+ StringVariable("message"),
197
+ ],
198
+ )
199
+
200
+ status_rows = []
201
+ for path_str, status_info in self.processed_statuses.items():
202
+ status, message = status_info
203
+ # Orange's Table.from_list expects a flat list of values matching the domain's order.
204
+ # The row should contain the values for input_path, status, and message.
205
+ # The status_info is a list [status, message]. We need to prepend the path_str.
206
+ status_rows.append([path_str, status, message])
207
+
208
+ status_table = Table.from_list(status_domain, status_rows)
209
+ self.Outputs.status_data.send(status_table)
210
+
211
+ def _run_conversion(self, files, progress_callback):
212
+ """Main function to run the conversion, supports sequential and parallel modes."""
213
+ results = []
214
+
215
+ if self.external_executor is None:
216
+ # --- Mode simple séquentiel ---
217
+ for i, p in enumerate(files):
218
+ path_str = str(p)
219
+ # on met à jour le statut en "in_progress" et on envoie
220
+ self.status_update_signal.emit([path_str, "in_progress", ""])
221
+
222
+ row = _convert_one_file(path_str)
223
+ results.append(row)
224
+
225
+ # Mise à jour du tableau avec le résultat et envoi immédiat
226
+ self.status_update_signal.emit([row[0], row[2], row[4]])
227
+
228
+ progress_callback((i + 1) / len(files) * 100)
229
+ else:
230
+ # --- Mode parallèle via executor externe ---
231
+ fut_map = {self.external_executor.submit(_convert_one_file, str(p)): str(p) for p in files}
232
+
233
+ for i, fut in enumerate(as_completed(fut_map), 1):
234
+ file_path_str = fut_map[fut]
235
+ # on met à jour le statut en "in_progress" et on envoie
236
+ self.status_update_signal.emit([file_path_str, "in_progress", ""])
237
+
238
+ try:
239
+ row = fut.result()
240
+ results.append(row)
241
+ # Mise à jour du résultat de la future et envoi
242
+ self.status_update_signal.emit([row[0], row[2], row[4]])
243
+ except Exception as e:
244
+ # Gestion des erreurs de la future et envoi
245
+ row = [file_path_str, str((Path(file_path_str).parent / 'a_md' / f"{Path(file_path_str).stem}.md")),
246
+ "nok", "0.00", f"FutureError: {e}"]
247
+ results.append(row)
248
+ self.status_update_signal.emit([row[0], "nok", f"FutureError: {e}"])
249
+
250
+ progress_callback(i / len(files) * 100)
251
+
252
+ # The final result is the table built from all results
253
+ domain = Domain([], metas=[
254
+ StringVariable("input_path"),
255
+ StringVariable("output_md"),
256
+ StringVariable("status"),
257
+ StringVariable("duration_sec"),
258
+ StringVariable("message"),
259
+ ])
260
+ final_table = Table.from_list(domain, results)
261
+ return final_table
262
+
263
+ def handle_progress(self, value: float) -> None:
264
+ self.progressBarSet(value)
265
+
266
+ def handle_status_update(self, status_info: list):
267
+ """Receives a single status update and updates the internal dictionary and the output."""
268
+ path_str, status, message = status_info
269
+ self.processed_statuses[path_str] = [status, message]
270
+ self.update_status_output()
271
+
272
+ def handle_result(self, result: Table):
273
+ try:
274
+ self.result = result
275
+ self.Outputs.data.send(result)
276
+ except Exception as e:
277
+ print("An error occurred when sending out_data:", e)
278
+ self.Outputs.data.send(None)
279
+
280
+ def handle_finish(self):
281
+ self.progressBarFinished()
@@ -29,7 +29,7 @@ class OWExportMarkdown(widget.OWWidget):
29
29
  icon = "icons/export_md.png"
30
30
  if "site-packages/Orange/widgets" in os.path.dirname(os.path.abspath(__file__)).replace("\\", "/"):
31
31
  icon = "icons_dev/export_md.png"
32
- want_control_area = True
32
+ want_control_area = False
33
33
  priority = 9999
34
34
  category = "AAIT - TOOLBOX"
35
35
 
@@ -0,0 +1,112 @@
1
+ # ow_markdown_loader.py
2
+ import os
3
+ from pathlib import Path
4
+ import numpy as np
5
+
6
+ from AnyQt.QtWidgets import QLabel, QCheckBox
7
+ from Orange.widgets import widget
8
+ from Orange.widgets.utils.signals import Input, Output
9
+ from Orange.data import Domain, StringVariable, Table
10
+
11
+ if "site-packages/Orange/widgets" in os.path.dirname(os.path.abspath(__file__)).replace("\\", "/"):
12
+ from Orange.widgets.orangecontrib.IO4IT.utils import utils_md
13
+ else:
14
+ from orangecontrib.IO4IT.utils import utils_md
15
+
16
+
17
+ class OWMarkdownLoader(widget.OWWidget):
18
+ name = "Markdown Loader"
19
+ description = "Charge tous les fichiers Markdown d’un dossier (récursif)"
20
+ icon = "icons/load_md.png"
21
+ if "site-packages/Orange/widgets" in os.path.dirname(os.path.abspath(__file__)).replace("\\", "/"):
22
+ icon = "icons_dev/load_md.png"
23
+ gui = os.path.join(os.path.dirname(os.path.abspath(__file__)), "designer/owmarkdownloader.ui")
24
+ want_control_area = False
25
+ priority = 1001
26
+
27
+ class Inputs:
28
+ data = Input("Data", Table)
29
+
30
+ class Outputs:
31
+ md_files = Output("Markdown Files", Table) # -> (file_path, content)
32
+ data = Output("Data", Table) # passthrough de l'entrée
33
+
34
+ def __init__(self):
35
+ super().__init__()
36
+ self.in_data = None
37
+ self.input_dir = None
38
+ self.recursive = True
39
+
40
+ # UI minimal
41
+ self.label = QLabel(self)
42
+ self.checkbox = QCheckBox("Recherche récursive", self)
43
+ self.checkbox.setChecked(True)
44
+ self.checkbox.stateChanged.connect(self._on_recursive_toggled)
45
+
46
+ self.layout().addWidget(self.label)
47
+ self.layout().addWidget(self.checkbox)
48
+ self.warning("")
49
+
50
+ def _on_recursive_toggled(self, _state):
51
+ self.recursive = self.checkbox.isChecked()
52
+ # Si on a déjà un dossier, on relance la production
53
+ if self.input_dir:
54
+ self._produce()
55
+
56
+ @Inputs.data
57
+ def set_data(self, in_data: Table | None):
58
+ self.in_data = in_data
59
+ self.warning("")
60
+
61
+ # Toujours émettre le passthrough (même si None)
62
+ self.Outputs.data.send(in_data)
63
+
64
+ if not in_data:
65
+ # Rien à charger côté MD : on émet une table vide sur Markdown Files
66
+ self.Outputs.md_files.send(self._empty_md_table())
67
+ return
68
+
69
+ # Cherche la colonne 'input_dir' et récupère le premier dossier
70
+ try:
71
+ _ = in_data.domain["input_dir"]
72
+ self.input_dir = str(in_data.get_column("input_dir")[0])
73
+ except Exception:
74
+ self.warning('"input_dir" (Text) est requis en entrée')
75
+ self.Outputs.md_files.send(self._empty_md_table())
76
+ return
77
+
78
+ self.label.setText(f"Dossier : {self.input_dir}")
79
+ self._produce()
80
+
81
+ def _empty_md_table(self) -> Table:
82
+ domain = Domain([], metas=[StringVariable("file_path"), StringVariable("content")])
83
+ X = np.empty((0, 0))
84
+ metas = np.empty((0, 2), dtype=object)
85
+ return Table.from_numpy(domain, X, metas=metas)
86
+
87
+ def _produce(self):
88
+ base = Path(self.input_dir)
89
+ patterns = ["*.md"]
90
+ paths = []
91
+
92
+ for patt in patterns:
93
+ if self.recursive:
94
+ paths.extend(base.rglob(patt))
95
+ else:
96
+ paths.extend(base.glob(patt))
97
+
98
+ md_rows = []
99
+ for p in sorted(set(paths)):
100
+ try:
101
+ md_rows.append([str(p), utils_md.try_read_text(p)])
102
+ except Exception:
103
+ md_rows.append([str(p), ""])
104
+
105
+ # Construit la table pour "Markdown Files"
106
+ domain = Domain([], metas=[StringVariable("file_path"), StringVariable("content")])
107
+ X = np.empty((len(md_rows), 0))
108
+ metas = np.array(md_rows, dtype=object) if md_rows else np.empty((0, 2), dtype=object)
109
+ md_table = Table.from_numpy(domain, X, metas=metas)
110
+
111
+ self.Outputs.md_files.send(md_table)
112
+ # Le passthrough est déjà envoyé dans set_data ; on n'y retouche pas ici.
@@ -522,9 +522,9 @@ class MarkdownConversionThread(QThread):
522
522
  class FileProcessorApp(widget.OWWidget):
523
523
  name = "Markdownizer"
524
524
  description = "Convert PDFs, DOCX, PPTX to Markdown (texte seul & word only)"
525
- icon = "icons/md.png"
525
+ icon = "icons/md_old.png"
526
526
  if "site-packages/Orange/widgets" in os.path.dirname(os.path.abspath(__file__)).replace("\\", "/"):
527
- icon = "icons_dev/md.png"
527
+ icon = "icons_dev/md_old.png"
528
528
  want_control_area = False
529
529
  priority = 1001
530
530
  gui = os.path.join(os.path.dirname(os.path.abspath(__file__)), "designer/owmarkdownizer.ui")