io4it 2.1.0.4__tar.gz → 2.1.0.6__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (76) hide show
  1. io4it-2.1.0.6/PKG-INFO +7 -0
  2. io4it-2.1.0.6/io4it.egg-info/PKG-INFO +7 -0
  3. {io4it-2.1.0.4 → io4it-2.1.0.6}/io4it.egg-info/SOURCES.txt +16 -2
  4. {io4it-2.1.0.4 → io4it-2.1.0.6}/orangecontrib/IO4IT/ocr_function/word_converter.py +12 -12
  5. {io4it-2.1.0.4 → io4it-2.1.0.6}/orangecontrib/IO4IT/widgets/OWDeep_Search.py +1 -1
  6. io4it-2.1.0.6/orangecontrib/IO4IT/widgets/OWDoclingMarkdownizerSimple.py +280 -0
  7. {io4it-2.1.0.4 → io4it-2.1.0.6}/orangecontrib/IO4IT/widgets/OWExportMarkdown.py +1 -1
  8. io4it-2.1.0.6/orangecontrib/IO4IT/widgets/OWMarkdownLoader.py +114 -0
  9. {io4it-2.1.0.4 → io4it-2.1.0.6}/orangecontrib/IO4IT/widgets/OWMarkdownizer.py +2 -2
  10. io4it-2.1.0.6/orangecontrib/IO4IT/widgets/OWOfficeNormalizer.py +183 -0
  11. io4it-2.1.0.6/orangecontrib/IO4IT/widgets/OWPdfType.py +193 -0
  12. io4it-2.1.0.6/orangecontrib/IO4IT/widgets/OWProcessPoolExecutor.py +87 -0
  13. io4it-2.1.0.6/orangecontrib/IO4IT/widgets/designer/owdoclingmarkdownizersimple.ui +54 -0
  14. io4it-2.1.0.6/orangecontrib/IO4IT/widgets/designer/owmarkdownloader.ui +54 -0
  15. io4it-2.1.0.6/orangecontrib/IO4IT/widgets/designer/owofficenormalizer.ui +54 -0
  16. io4it-2.1.0.6/orangecontrib/IO4IT/widgets/designer/owpdftype.ui +54 -0
  17. io4it-2.1.0.6/orangecontrib/IO4IT/widgets/designer/owprocesspoolexecutor.ui +100 -0
  18. {io4it-2.1.0.4 → io4it-2.1.0.6}/orangecontrib/IO4IT/widgets/designer/wordpdf2docx.ui +16 -0
  19. io4it-2.1.0.6/orangecontrib/IO4IT/widgets/icons/check_pdf.png +0 -0
  20. io4it-2.1.0.6/orangecontrib/IO4IT/widgets/icons/dep_md_old.png +0 -0
  21. io4it-2.1.0.6/orangecontrib/IO4IT/widgets/icons/load_md.png +0 -0
  22. io4it-2.1.0.6/orangecontrib/IO4IT/widgets/icons/office_normalizer.png +0 -0
  23. io4it-2.1.0.6/orangecontrib/IO4IT/widgets/icons/process_pool_executor.png +0 -0
  24. {io4it-2.1.0.4 → io4it-2.1.0.6}/setup.py +1 -1
  25. io4it-2.1.0.4/PKG-INFO +0 -33
  26. io4it-2.1.0.4/io4it.egg-info/PKG-INFO +0 -33
  27. io4it-2.1.0.4/orangecontrib/IO4IT/widgets/designer/chart.html +0 -281
  28. io4it-2.1.0.4/orangecontrib/IO4IT/widgets/designer/owvisualizationer.ui +0 -56
  29. {io4it-2.1.0.4 → io4it-2.1.0.6}/io4it.egg-info/dependency_links.txt +0 -0
  30. {io4it-2.1.0.4 → io4it-2.1.0.6}/io4it.egg-info/entry_points.txt +0 -0
  31. {io4it-2.1.0.4 → io4it-2.1.0.6}/io4it.egg-info/namespace_packages.txt +0 -0
  32. {io4it-2.1.0.4 → io4it-2.1.0.6}/io4it.egg-info/requires.txt +0 -0
  33. {io4it-2.1.0.4 → io4it-2.1.0.6}/io4it.egg-info/top_level.txt +0 -0
  34. {io4it-2.1.0.4 → io4it-2.1.0.6}/orangecontrib/IO4IT/__init__.py +0 -0
  35. {io4it-2.1.0.4 → io4it-2.1.0.6}/orangecontrib/IO4IT/ocr_function/__init__.py +0 -0
  36. {io4it-2.1.0.4 → io4it-2.1.0.6}/orangecontrib/IO4IT/utils/__init__.py +0 -0
  37. {io4it-2.1.0.4 → io4it-2.1.0.6}/orangecontrib/IO4IT/utils/mail.py +0 -0
  38. {io4it-2.1.0.4 → io4it-2.1.0.6}/orangecontrib/IO4IT/utils/offuscation_basique.py +0 -0
  39. {io4it-2.1.0.4 → io4it-2.1.0.6}/orangecontrib/IO4IT/utils/pool_exec_utils.py +0 -0
  40. {io4it-2.1.0.4 → io4it-2.1.0.6}/orangecontrib/IO4IT/utils/utils_md.py +0 -0
  41. {io4it-2.1.0.4 → io4it-2.1.0.6}/orangecontrib/IO4IT/widgets/OWChatGpt.py +0 -0
  42. {io4it-2.1.0.4 → io4it-2.1.0.6}/orangecontrib/IO4IT/widgets/OWS3Uploader.py +0 -0
  43. {io4it-2.1.0.4 → io4it-2.1.0.6}/orangecontrib/IO4IT/widgets/OWS3downloader.py +0 -0
  44. {io4it-2.1.0.4 → io4it-2.1.0.6}/orangecontrib/IO4IT/widgets/OWS3list.py +0 -0
  45. {io4it-2.1.0.4 → io4it-2.1.0.6}/orangecontrib/IO4IT/widgets/OWSpeechToText.py +0 -0
  46. {io4it-2.1.0.4 → io4it-2.1.0.6}/orangecontrib/IO4IT/widgets/OWmailLoader.py +0 -0
  47. {io4it-2.1.0.4 → io4it-2.1.0.6}/orangecontrib/IO4IT/widgets/OWmailSender.py +0 -0
  48. {io4it-2.1.0.4 → io4it-2.1.0.6}/orangecontrib/IO4IT/widgets/OWwordpdf2docx.py +0 -0
  49. {io4it-2.1.0.4 → io4it-2.1.0.6}/orangecontrib/IO4IT/widgets/__init__.py +0 -0
  50. {io4it-2.1.0.4 → io4it-2.1.0.6}/orangecontrib/IO4IT/widgets/designer/__init__.py +0 -0
  51. {io4it-2.1.0.4 → io4it-2.1.0.6}/orangecontrib/IO4IT/widgets/designer/nogui.ui +0 -0
  52. {io4it-2.1.0.4 → io4it-2.1.0.6}/orangecontrib/IO4IT/widgets/designer/ow_file_ext_selector.ui +0 -0
  53. {io4it-2.1.0.4 → io4it-2.1.0.6}/orangecontrib/IO4IT/widgets/designer/owchatgpt.ui +0 -0
  54. {io4it-2.1.0.4 → io4it-2.1.0.6}/orangecontrib/IO4IT/widgets/designer/owdeepsearch.ui +0 -0
  55. {io4it-2.1.0.4 → io4it-2.1.0.6}/orangecontrib/IO4IT/widgets/designer/owexportmarkdown.ui +0 -0
  56. {io4it-2.1.0.4 → io4it-2.1.0.6}/orangecontrib/IO4IT/widgets/designer/owmailloader.ui +0 -0
  57. {io4it-2.1.0.4 → io4it-2.1.0.6}/orangecontrib/IO4IT/widgets/designer/owmailsender.ui +0 -0
  58. {io4it-2.1.0.4 → io4it-2.1.0.6}/orangecontrib/IO4IT/widgets/designer/owmarkdownizer.ui +0 -0
  59. {io4it-2.1.0.4 → io4it-2.1.0.6}/orangecontrib/IO4IT/widgets/designer/owspeechtotext.ui +0 -0
  60. {io4it-2.1.0.4 → io4it-2.1.0.6}/orangecontrib/IO4IT/widgets/icons/__init__.py +0 -0
  61. {io4it-2.1.0.4 → io4it-2.1.0.6}/orangecontrib/IO4IT/widgets/icons/chatgpt.png +0 -0
  62. {io4it-2.1.0.4 → io4it-2.1.0.6}/orangecontrib/IO4IT/widgets/icons/deepsearch.svg +0 -0
  63. {io4it-2.1.0.4 → io4it-2.1.0.6}/orangecontrib/IO4IT/widgets/icons/download.png +0 -0
  64. {io4it-2.1.0.4 → io4it-2.1.0.6}/orangecontrib/IO4IT/widgets/icons/export_md.png +0 -0
  65. {io4it-2.1.0.4 → io4it-2.1.0.6}/orangecontrib/IO4IT/widgets/icons/file_extensor.png +0 -0
  66. {io4it-2.1.0.4 → io4it-2.1.0.6}/orangecontrib/IO4IT/widgets/icons/list_aws.png +0 -0
  67. {io4it-2.1.0.4 → io4it-2.1.0.6}/orangecontrib/IO4IT/widgets/icons/mail_loader.png +0 -0
  68. {io4it-2.1.0.4 → io4it-2.1.0.6}/orangecontrib/IO4IT/widgets/icons/mail_writer.png +0 -0
  69. {io4it-2.1.0.4 → io4it-2.1.0.6}/orangecontrib/IO4IT/widgets/icons/md.png +0 -0
  70. {io4it-2.1.0.4 → io4it-2.1.0.6}/orangecontrib/IO4IT/widgets/icons/speech_to_text.png +0 -0
  71. {io4it-2.1.0.4 → io4it-2.1.0.6}/orangecontrib/IO4IT/widgets/icons/upload.png +0 -0
  72. {io4it-2.1.0.4 → io4it-2.1.0.6}/orangecontrib/IO4IT/widgets/icons/visualizationer.png +0 -0
  73. {io4it-2.1.0.4 → io4it-2.1.0.6}/orangecontrib/IO4IT/widgets/icons/wordpdf2docx.png +0 -0
  74. {io4it-2.1.0.4 → io4it-2.1.0.6}/orangecontrib/IO4IT/widgets/icons_dev/__init__.py +0 -0
  75. {io4it-2.1.0.4 → io4it-2.1.0.6}/orangecontrib/__init__.py +0 -0
  76. {io4it-2.1.0.4 → io4it-2.1.0.6}/setup.cfg +0 -0
io4it-2.1.0.6/PKG-INFO ADDED
@@ -0,0 +1,7 @@
1
+ Metadata-Version: 2.1
2
+ Name: io4it
3
+ Version: 2.1.0.6
4
+ Home-page:
5
+ Author:
6
+ Author-email:
7
+ Keywords: orange3 add-on
@@ -0,0 +1,7 @@
1
+ Metadata-Version: 2.1
2
+ Name: io4it
3
+ Version: 2.1.0.6
4
+ Home-page:
5
+ Author:
6
+ Author-email:
7
+ Keywords: orange3 add-on
@@ -1,3 +1,4 @@
1
+ setup.cfg
1
2
  setup.py
2
3
  io4it.egg-info/PKG-INFO
3
4
  io4it.egg-info/SOURCES.txt
@@ -17,8 +18,13 @@ orangecontrib/IO4IT/utils/pool_exec_utils.py
17
18
  orangecontrib/IO4IT/utils/utils_md.py
18
19
  orangecontrib/IO4IT/widgets/OWChatGpt.py
19
20
  orangecontrib/IO4IT/widgets/OWDeep_Search.py
21
+ orangecontrib/IO4IT/widgets/OWDoclingMarkdownizerSimple.py
20
22
  orangecontrib/IO4IT/widgets/OWExportMarkdown.py
23
+ orangecontrib/IO4IT/widgets/OWMarkdownLoader.py
21
24
  orangecontrib/IO4IT/widgets/OWMarkdownizer.py
25
+ orangecontrib/IO4IT/widgets/OWOfficeNormalizer.py
26
+ orangecontrib/IO4IT/widgets/OWPdfType.py
27
+ orangecontrib/IO4IT/widgets/OWProcessPoolExecutor.py
22
28
  orangecontrib/IO4IT/widgets/OWS3Uploader.py
23
29
  orangecontrib/IO4IT/widgets/OWS3downloader.py
24
30
  orangecontrib/IO4IT/widgets/OWS3list.py
@@ -28,28 +34,36 @@ orangecontrib/IO4IT/widgets/OWmailSender.py
28
34
  orangecontrib/IO4IT/widgets/OWwordpdf2docx.py
29
35
  orangecontrib/IO4IT/widgets/__init__.py
30
36
  orangecontrib/IO4IT/widgets/designer/__init__.py
31
- orangecontrib/IO4IT/widgets/designer/chart.html
32
37
  orangecontrib/IO4IT/widgets/designer/nogui.ui
33
38
  orangecontrib/IO4IT/widgets/designer/ow_file_ext_selector.ui
34
39
  orangecontrib/IO4IT/widgets/designer/owchatgpt.ui
35
40
  orangecontrib/IO4IT/widgets/designer/owdeepsearch.ui
41
+ orangecontrib/IO4IT/widgets/designer/owdoclingmarkdownizersimple.ui
36
42
  orangecontrib/IO4IT/widgets/designer/owexportmarkdown.ui
37
43
  orangecontrib/IO4IT/widgets/designer/owmailloader.ui
38
44
  orangecontrib/IO4IT/widgets/designer/owmailsender.ui
39
45
  orangecontrib/IO4IT/widgets/designer/owmarkdownizer.ui
46
+ orangecontrib/IO4IT/widgets/designer/owmarkdownloader.ui
47
+ orangecontrib/IO4IT/widgets/designer/owofficenormalizer.ui
48
+ orangecontrib/IO4IT/widgets/designer/owpdftype.ui
49
+ orangecontrib/IO4IT/widgets/designer/owprocesspoolexecutor.ui
40
50
  orangecontrib/IO4IT/widgets/designer/owspeechtotext.ui
41
- orangecontrib/IO4IT/widgets/designer/owvisualizationer.ui
42
51
  orangecontrib/IO4IT/widgets/designer/wordpdf2docx.ui
43
52
  orangecontrib/IO4IT/widgets/icons/__init__.py
44
53
  orangecontrib/IO4IT/widgets/icons/chatgpt.png
54
+ orangecontrib/IO4IT/widgets/icons/check_pdf.png
45
55
  orangecontrib/IO4IT/widgets/icons/deepsearch.svg
56
+ orangecontrib/IO4IT/widgets/icons/dep_md_old.png
46
57
  orangecontrib/IO4IT/widgets/icons/download.png
47
58
  orangecontrib/IO4IT/widgets/icons/export_md.png
48
59
  orangecontrib/IO4IT/widgets/icons/file_extensor.png
49
60
  orangecontrib/IO4IT/widgets/icons/list_aws.png
61
+ orangecontrib/IO4IT/widgets/icons/load_md.png
50
62
  orangecontrib/IO4IT/widgets/icons/mail_loader.png
51
63
  orangecontrib/IO4IT/widgets/icons/mail_writer.png
52
64
  orangecontrib/IO4IT/widgets/icons/md.png
65
+ orangecontrib/IO4IT/widgets/icons/office_normalizer.png
66
+ orangecontrib/IO4IT/widgets/icons/process_pool_executor.png
53
67
  orangecontrib/IO4IT/widgets/icons/speech_to_text.png
54
68
  orangecontrib/IO4IT/widgets/icons/upload.png
55
69
  orangecontrib/IO4IT/widgets/icons/visualizationer.png
@@ -16,7 +16,7 @@ def enable_long_path(path):
16
16
  """Simplifie la gestion des chemins longs sous Windows."""
17
17
  return pathlib.Path(r"\\?\\" + str(path))
18
18
 
19
- def convert_pdf_structure(input_dir: str, output_dir: str,ignore_exsting_out_put=False,progress_callback=None):
19
+ def convert_pdf_structure(input_dir: str, output_dir: str,ignore_exsting_out_put=False,forceBasicConvertion=False,progress_callback=None):
20
20
  """
21
21
  return a string with log in case of error
22
22
  Recursively lists all .pdf and .PDF files in the input directory,
@@ -60,7 +60,7 @@ def convert_pdf_structure(input_dir: str, output_dir: str,ignore_exsting_out_put
60
60
  continue
61
61
 
62
62
 
63
- if 0!= convert_pdf_with_temp(str(pdf_file),str(new_file_path)):#convert_pdf_with_temp #convert_pdf_to_docx
63
+ if 0!= convert_pdf_with_temp(str(pdf_file),str(new_file_path),forceBasicConvertion):#convert_pdf_with_temp #convert_pdf_to_docx
64
64
  if error_log!="":
65
65
  error_log+="\n"
66
66
  error_log+="error -> "+str(pdf_file)
@@ -259,7 +259,7 @@ def write_two_strings_to_file(file_path: str,string1: str, string2: str):
259
259
  return 0
260
260
 
261
261
 
262
- def convert_pdf_with_temp(temp_pdf, output_path):
262
+ def convert_pdf_with_temp(temp_pdf, output_path,forceBasicConvertion=False):
263
263
  """
264
264
  Copie le PDF source dans un dossier temporaire, le convertit en DOCX,
265
265
  puis copie le fichier résultant vers le chemin de sortie spécifié,
@@ -294,15 +294,15 @@ def convert_pdf_with_temp(temp_pdf, output_path):
294
294
  # Copie du fichier source vers le dossier temporaire
295
295
  shutil.copy2(pdf_path, temp_pdf)
296
296
  wait_for_file_access(temp_pdf)
297
-
298
- if is_pdf_a4(temp_pdf)==False:
299
- temp_pdf2 = os.path.join(dest_dir, "input_totoA4.pdf")
300
- if 0!=convert_pdf_to_a4(temp_pdf,temp_pdf2):
301
- print("erreur au resize du pdf")
302
- return 1
303
- temp_pdf=temp_pdf2
304
- wait_for_file_access(temp_pdf)
305
- time.sleep(1)
297
+ if forceBasicConvertion==False:
298
+ if is_pdf_a4(temp_pdf)==False:
299
+ temp_pdf2 = os.path.join(dest_dir, "input_totoA4.pdf")
300
+ if 0!=convert_pdf_to_a4(temp_pdf,temp_pdf2):
301
+ print("erreur au resize du pdf")
302
+ return 1
303
+ temp_pdf=temp_pdf2
304
+ wait_for_file_access(temp_pdf)
305
+ time.sleep(1)
306
306
  result=0
307
307
  # Conversion du PDF en DOCX
308
308
  for _ in range(4):
@@ -30,7 +30,7 @@ class OWDeep_Search(widget.OWWidget):
30
30
  category = "AAIT - LLM INTEGRATION"
31
31
  icon = "icons/deepsearch.svg"
32
32
  if "site-packages/Orange/widgets" in os.path.dirname(os.path.abspath(__file__)).replace("\\", "/"):
33
- icon = "icons_dev/owqueryllm.svg"
33
+ icon = "icons_dev/deepsearch.svg"
34
34
  gui = os.path.join(os.path.dirname(os.path.abspath(__file__)), "designer/owdeepsearch.ui")
35
35
  want_control_area = True
36
36
  priority = 1089
@@ -0,0 +1,280 @@
1
+ import os, time, datetime
2
+ from pathlib import Path
3
+ from concurrent.futures import as_completed
4
+
5
+ from AnyQt.QtWidgets import QLabel, QTextEdit
6
+ from AnyQt.QtCore import pyqtSignal
7
+ from Orange.widgets import widget
8
+ from Orange.widgets.utils.signals import Input, Output
9
+ from Orange.data import Domain, StringVariable, Table, DiscreteVariable
10
+
11
+ # --- Docling (unique lib utilisée pour la conversion) ---
12
+ from docling.backend.pypdfium2_backend import PyPdfiumDocumentBackend
13
+ from docling.datamodel.base_models import InputFormat
14
+ from docling.document_converter import (
15
+ DocumentConverter,
16
+ PdfFormatOption,
17
+ WordFormatOption,
18
+ )
19
+ from docling.pipeline.simple_pipeline import SimplePipeline
20
+ from docling.pipeline.standard_pdf_pipeline import StandardPdfPipeline
21
+
22
+ # --- Orange contrib Imports ---
23
+ if "site-packages/Orange/widgets" in os.path.dirname(os.path.abspath(__file__)).replace("\\", "/"):
24
+ from Orange.widgets.orangecontrib.AAIT.utils.thread_management import Thread
25
+ from Orange.widgets.orangecontrib.AAIT.utils.import_uic import uic
26
+ else:
27
+ from orangecontrib.AAIT.utils.thread_management import Thread
28
+ from orangecontrib.AAIT.utils.import_uic import uic
29
+
30
+
31
+ # --------- worker stateless : convertit 1 fichier avec Docling ----------
32
+ def _convert_one_file(file_path_str: str):
33
+ """Convertit un fichier (PDF/DOCX/PPTX) en Markdown via Docling.
34
+ Écrit <parent>/a_md/<stem>.md et renvoie [input_path, output_md, status, duration_sec, message].
35
+ Pensé pour être appelé soit directement, soit via ProcessPoolExecutor.
36
+ """
37
+ t0 = time.time()
38
+ src = Path(file_path_str)
39
+ out_dir = src.parent / "conversion_markdown"
40
+ out_dir.mkdir(parents=True, exist_ok=True)
41
+ out_md = out_dir / f"{src.stem}.md"
42
+
43
+ # Si déjà converti : on ne refait pas
44
+ if out_md.exists():
45
+ status = "ok"
46
+ message = "existant: deja converti"
47
+ duration = time.time() - t0
48
+ return [str(src), str(out_md), status, f"{duration:.2f}", message]
49
+
50
+ try:
51
+ # Docling minimal config (inspiré du snippet)
52
+ doc_converter = DocumentConverter(
53
+ allowed_formats=[InputFormat.PDF, InputFormat.DOCX, InputFormat.PPTX],
54
+ format_options={
55
+ InputFormat.PDF: PdfFormatOption(
56
+ pipeline_cls=StandardPdfPipeline,
57
+ backend=PyPdfiumDocumentBackend
58
+ ),
59
+ InputFormat.DOCX: WordFormatOption(
60
+ pipeline_cls=SimplePipeline
61
+ ),
62
+ # PPTX: pas d'option spécifique; géré par défaut
63
+ },
64
+ )
65
+ doc = doc_converter.convert(str(src)).document
66
+ md = doc.export_to_markdown()
67
+ out_md.write_text(md, encoding="utf-8")
68
+ status, message = "ok", ""
69
+ except Exception as e:
70
+ status = "nok"
71
+ message = f"{type(e).__name__}: {e}"
72
+ # on écrit quand même un trace .md
73
+ try:
74
+ out_md.write_text(f"[Erreur conversion] {message}", encoding="utf-8")
75
+ except Exception:
76
+ pass
77
+
78
+ duration = time.time() - t0
79
+ return [str(src), str(out_md), status, f"{duration:.2f}", message]
80
+
81
+
82
+ class OWDoclingMarkdownizerSimple(widget.OWWidget):
83
+ name = "Docling To Markdown"
84
+ description = "Convert DOCX/PPTX/PDF to Markdown via Docling"
85
+ icon = "icons/md.png"
86
+ if "site-packages/Orange/widgets" in os.path.dirname(os.path.abspath(__file__)).replace("\\", "/"):
87
+ icon = "icons_dev/md.png"
88
+ gui = os.path.join(os.path.dirname(os.path.abspath(__file__)), "designer/owdoclingmarkdownizersimple.ui")
89
+ want_main_area = False
90
+ want_control_area = True
91
+ priority = 1004
92
+
93
+ status_update_signal = pyqtSignal(list)
94
+
95
+ class Inputs:
96
+ data = Input("Files Table", Table)
97
+ executor = Input("ProcessPoolExecutor", object)
98
+
99
+ class Outputs:
100
+ data = Output("Markdown Table", Table)
101
+ status_data = Output("Status Table", Table)
102
+
103
+ def __init__(self):
104
+ super().__init__()
105
+ self.data = None
106
+ self.external_executor = None
107
+ self.thread = None
108
+ self.result = None
109
+ self.exec_info = QLabel("Exécution: séquentielle (aucun executor connecté).", self)
110
+ self.processed_statuses = {} # Dictionary to accumulate statuses for each file
111
+
112
+ uic.loadUi(self.gui, self)
113
+
114
+ self.error("")
115
+ self.warning("")
116
+
117
+ @Inputs.data
118
+ def set_data(self, in_data: Table | None):
119
+ self.data = in_data
120
+ self.error("")
121
+ self.warning("")
122
+
123
+ if not in_data:
124
+ self.Outputs.data.send(None)
125
+ self.Outputs.status_data.send(None)
126
+ return
127
+
128
+ try:
129
+ _ = in_data.domain["file_path"]
130
+ except Exception:
131
+ self.error('Colonne "file_path" (Text) requise.')
132
+ self.Outputs.data.send(None)
133
+ self.Outputs.status_data.send(None)
134
+ return
135
+
136
+ self._convert_now()
137
+
138
+ @Inputs.executor
139
+ def set_executor(self, executor_obj):
140
+ self.external_executor = executor_obj
141
+ if executor_obj is not None:
142
+ self.exec_info.setText("Exécution: via ProcessPoolExecutor externe (parallèle).")
143
+ else:
144
+ self.exec_info.setText("Exécution: séquentielle (aucun executor connecté).")
145
+
146
+ def _convert_now(self):
147
+ if self.thread is not None and self.thread.isRunning():
148
+ self.thread.safe_quit()
149
+
150
+ if not self.data:
151
+ self.Outputs.data.send(None)
152
+ self.Outputs.status_data.send(None)
153
+ return
154
+
155
+ # Start progress bar
156
+ self.progressBarInit()
157
+
158
+ # Récupère les chemins et filtre par extensions supportées
159
+ try:
160
+ paths = [Path(str(x)) for x in self.data.get_column("file_path")]
161
+ except Exception as e:
162
+ self.error(f"Lecture de 'file_path' impossible: {e}")
163
+ self.Outputs.data.send(None)
164
+ self.Outputs.status_data.send(None)
165
+ return
166
+
167
+ files = [p for p in paths if p.exists() and p.suffix.lower() in (".pdf", ".docx", ".pptx")]
168
+ if not files:
169
+ self.Outputs.data.send(None)
170
+ self.Outputs.status_data.send(None)
171
+ self.progressBarFinished()
172
+ return
173
+
174
+ # Initialize status dictionary for incremental updates
175
+ self.processed_statuses = {str(p): ["pending", ""] for p in files}
176
+ self.update_status_output()
177
+
178
+ # Connect internal signal
179
+ self.status_update_signal.connect(self.handle_status_update)
180
+
181
+ # Connect and start thread for the main function
182
+ self.thread = Thread(self._run_conversion, files)
183
+ self.thread.progress.connect(self.handle_progress)
184
+ self.thread.result.connect(self.handle_result)
185
+ self.thread.finish.connect(self.handle_finish)
186
+ self.thread.start()
187
+
188
+ def update_status_output(self):
189
+ """Helper function to create and send the status table."""
190
+ status_domain = Domain(
191
+ [], # This list must be empty because the table has no attributes.
192
+ metas=[
193
+ StringVariable("input_path"),
194
+ DiscreteVariable("status", values=["pending", "in_progress", "ok", "nok"]),
195
+ StringVariable("message"),
196
+ ],
197
+ )
198
+
199
+ status_rows = []
200
+ for path_str, status_info in self.processed_statuses.items():
201
+ status, message = status_info
202
+ # Orange's Table.from_list expects a flat list of values matching the domain's order.
203
+ # The row should contain the values for input_path, status, and message.
204
+ # The status_info is a list [status, message]. We need to prepend the path_str.
205
+ status_rows.append([path_str, status, message])
206
+
207
+ status_table = Table.from_list(status_domain, status_rows)
208
+ self.Outputs.status_data.send(status_table)
209
+
210
+ def _run_conversion(self, files, progress_callback):
211
+ """Main function to run the conversion, supports sequential and parallel modes."""
212
+ results = []
213
+
214
+ if self.external_executor is None:
215
+ # --- Mode simple séquentiel ---
216
+ for i, p in enumerate(files):
217
+ path_str = str(p)
218
+ # on met à jour le statut en "in_progress" et on envoie
219
+ self.status_update_signal.emit([path_str, "in_progress", ""])
220
+
221
+ row = _convert_one_file(path_str)
222
+ results.append(row)
223
+
224
+ # Mise à jour du tableau avec le résultat et envoi immédiat
225
+ self.status_update_signal.emit([row[0], row[2], row[4]])
226
+
227
+ progress_callback((i + 1) / len(files) * 100)
228
+ else:
229
+ # --- Mode parallèle via executor externe ---
230
+ fut_map = {self.external_executor.submit(_convert_one_file, str(p)): str(p) for p in files}
231
+
232
+ for i, fut in enumerate(as_completed(fut_map), 1):
233
+ file_path_str = fut_map[fut]
234
+ # on met à jour le statut en "in_progress" et on envoie
235
+ self.status_update_signal.emit([file_path_str, "in_progress", ""])
236
+
237
+ try:
238
+ row = fut.result()
239
+ results.append(row)
240
+ # Mise à jour du résultat de la future et envoi
241
+ self.status_update_signal.emit([row[0], row[2], row[4]])
242
+ except Exception as e:
243
+ # Gestion des erreurs de la future et envoi
244
+ row = [file_path_str, str((Path(file_path_str).parent / 'a_md' / f"{Path(file_path_str).stem}.md")),
245
+ "nok", "0.00", f"FutureError: {e}"]
246
+ results.append(row)
247
+ self.status_update_signal.emit([row[0], "nok", f"FutureError: {e}"])
248
+
249
+ progress_callback(i / len(files) * 100)
250
+
251
+ # The final result is the table built from all results
252
+ domain = Domain([], metas=[
253
+ StringVariable("input_path"),
254
+ StringVariable("output_md"),
255
+ StringVariable("status"),
256
+ StringVariable("duration_sec"),
257
+ StringVariable("message"),
258
+ ])
259
+ final_table = Table.from_list(domain, results)
260
+ return final_table
261
+
262
+ def handle_progress(self, value: float) -> None:
263
+ self.progressBarSet(value)
264
+
265
+ def handle_status_update(self, status_info: list):
266
+ """Receives a single status update and updates the internal dictionary and the output."""
267
+ path_str, status, message = status_info
268
+ self.processed_statuses[path_str] = [status, message]
269
+ self.update_status_output()
270
+
271
+ def handle_result(self, result: Table):
272
+ try:
273
+ self.result = result
274
+ self.Outputs.data.send(result)
275
+ except Exception as e:
276
+ print("An error occurred when sending out_data:", e)
277
+ self.Outputs.data.send(None)
278
+
279
+ def handle_finish(self):
280
+ self.progressBarFinished()
@@ -29,7 +29,7 @@ class OWExportMarkdown(widget.OWWidget):
29
29
  icon = "icons/export_md.png"
30
30
  if "site-packages/Orange/widgets" in os.path.dirname(os.path.abspath(__file__)).replace("\\", "/"):
31
31
  icon = "icons_dev/export_md.png"
32
- want_control_area = True
32
+ want_control_area = False
33
33
  priority = 9999
34
34
  category = "AAIT - TOOLBOX"
35
35
 
@@ -0,0 +1,114 @@
1
+ import os
2
+ from pathlib import Path
3
+ import numpy as np
4
+
5
+ from Orange.widgets import widget
6
+ from Orange.widgets.utils.signals import Input, Output
7
+ from Orange.data import Domain, StringVariable, Table
8
+ from AnyQt.QtWidgets import QCheckBox
9
+
10
+ try:
11
+ from Orange.widgets.orangecontrib.AAIT.utils.import_uic import uic
12
+ from range.widgets.orangecontrib.IO4IT.utils import utils_md
13
+ except ImportError:
14
+ from orangecontrib.IO4IT.utils import utils_md
15
+ from orangecontrib.AAIT.utils.import_uic import uic
16
+
17
+
18
+ class OWMarkdownLoader(widget.OWWidget):
19
+ name = "Markdown Loader"
20
+ description = "Charge tous les fichiers Markdown d’un dossier (récursif)"
21
+ icon = "icons/load_md.png"
22
+ if "site-packages/Orange/widgets" in os.path.dirname(os.path.abspath(__file__)).replace("\\", "/"):
23
+ icon = "icons_dev/load_md.png"
24
+ gui = os.path.join(os.path.dirname(os.path.abspath(__file__)), "designer/owmarkdownloader.ui")
25
+ want_control_area = False
26
+ priority = 1001
27
+
28
+ class Inputs:
29
+ data = Input("Data", Table)
30
+
31
+ class Outputs:
32
+ md_files = Output("Markdown Files", Table) # -> (file_path, content)
33
+ data = Output("Data", Table) # passthrough de l'entrée
34
+
35
+ def __init__(self):
36
+ super().__init__()
37
+
38
+ self.in_data = None
39
+ self.input_dir = None
40
+ uic.loadUi(self.gui, self)
41
+ self.checkBoxRecursive = self.findChild(QCheckBox, 'checkBoxRecursive')
42
+ # These lines MUST be after super().__init__()
43
+ self.recursive = self.checkBoxRecursive.isChecked()
44
+ self.checkBoxRecursive.stateChanged.connect(self._on_recursive_toggled)
45
+
46
+ self.warning("")
47
+
48
+ def _on_recursive_toggled(self, _state):
49
+ self.recursive = self.checkBoxRecursive.isChecked()
50
+ # If a directory is already set, re-run the production
51
+ if self.input_dir:
52
+ self._produce()
53
+
54
+ @Inputs.data
55
+ def set_data(self, in_data: Table | None):
56
+ self.in_data = in_data
57
+ self.warning("")
58
+
59
+ # Always pass through the input data
60
+ self.Outputs.data.send(in_data)
61
+
62
+ if not in_data:
63
+ # If no input data, send an empty table
64
+ self.Outputs.md_files.send(self._empty_md_table())
65
+ self.Description.setText(
66
+ "This widget loads Markdown files from a folder. The path must be in a column named 'input_dir'.")
67
+ return
68
+
69
+ # Look for the 'input_dir' column and get the first folder
70
+ try:
71
+ input_dir_column = in_data.domain["input_dir"]
72
+ self.input_dir = str(in_data[0, input_dir_column].value)
73
+ except (KeyError, IndexError, AttributeError):
74
+ self.warning('"input_dir" (Text) is required in the input data.')
75
+ self.Outputs.md_files.send(self._empty_md_table())
76
+ self.Description.setText("Error: 'input_dir' (Text) column not found or is empty.")
77
+ return
78
+
79
+ self.Description.setText(f"Dossier : {self.input_dir}")
80
+ self._produce()
81
+
82
+ def _empty_md_table(self) -> Table:
83
+ domain = Domain([], metas=[StringVariable("file_path"), StringVariable("content")])
84
+ X = np.empty((0, 0))
85
+ metas = np.empty((0, 2), dtype=object)
86
+ return Table.from_numpy(domain, X, metas=metas)
87
+
88
+ def _produce(self):
89
+ if not self.input_dir or not os.path.isdir(self.input_dir):
90
+ self.warning(f"Invalid directory path: '{self.input_dir}'")
91
+ self.Outputs.md_files.send(self._empty_md_table())
92
+ return
93
+ base = Path(self.input_dir)
94
+ patterns = ["*.md"]
95
+ paths = []
96
+
97
+ for patt in patterns:
98
+ if self.recursive:
99
+ paths.extend(base.rglob(patt))
100
+ else:
101
+ paths.extend(base.glob(patt))
102
+
103
+ md_rows = []
104
+ for p in sorted(set(paths)):
105
+ try:
106
+ md_rows.append([str(p), utils_md.try_read_text(p)])
107
+ except Exception:
108
+ md_rows.append([str(p), ""])
109
+ domain = Domain([], metas=[StringVariable("file_path"), StringVariable("content")])
110
+ X = np.empty((len(md_rows), 0))
111
+ metas = np.array(md_rows, dtype=object) if md_rows else np.empty((0, 2), dtype=object)
112
+ md_table = Table.from_numpy(domain, X, metas=metas)
113
+
114
+ self.Outputs.md_files.send(md_table)
@@ -522,9 +522,9 @@ class MarkdownConversionThread(QThread):
522
522
  class FileProcessorApp(widget.OWWidget):
523
523
  name = "Markdownizer"
524
524
  description = "Convert PDFs, DOCX, PPTX to Markdown (texte seul & word only)"
525
- icon = "icons/md.png"
525
+ icon = "icons/dep_md_old.png"
526
526
  if "site-packages/Orange/widgets" in os.path.dirname(os.path.abspath(__file__)).replace("\\", "/"):
527
- icon = "icons_dev/md.png"
527
+ icon = "icons_dev/dep_md_old.png"
528
528
  want_control_area = False
529
529
  priority = 1001
530
530
  gui = os.path.join(os.path.dirname(os.path.abspath(__file__)), "designer/owmarkdownizer.ui")