io4it 3.0.2.2__tar.gz → 3.0.3.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (94) hide show
  1. {io4it-3.0.2.2 → io4it-3.0.3.1}/PKG-INFO +1 -1
  2. {io4it-3.0.2.2 → io4it-3.0.3.1}/io4it.egg-info/PKG-INFO +1 -1
  3. {io4it-3.0.2.2 → io4it-3.0.3.1}/io4it.egg-info/SOURCES.txt +0 -1
  4. {io4it-3.0.2.2 → io4it-3.0.3.1}/orangecontrib/IO4IT/widgets/OWExportMarkdown.py +1 -1
  5. {io4it-3.0.2.2 → io4it-3.0.3.1}/orangecontrib/IO4IT/widgets/OWExtractTablesDocxToXlsx.py +1 -1
  6. {io4it-3.0.2.2 → io4it-3.0.3.1}/orangecontrib/IO4IT/widgets/OWMarkdownLoader.py +1 -1
  7. {io4it-3.0.2.2 → io4it-3.0.3.1}/orangecontrib/IO4IT/widgets/OWMarkdownizer.py +1 -1
  8. {io4it-3.0.2.2 → io4it-3.0.3.1}/orangecontrib/IO4IT/widgets/OWParserHTML.py +7 -3
  9. {io4it-3.0.2.2 → io4it-3.0.3.1}/orangecontrib/IO4IT/widgets/OWS3Uploader.py +1 -1
  10. {io4it-3.0.2.2 → io4it-3.0.3.1}/orangecontrib/IO4IT/widgets/OWS3downloader.py +1 -1
  11. {io4it-3.0.2.2 → io4it-3.0.3.1}/orangecontrib/IO4IT/widgets/OWS3list.py +1 -1
  12. io4it-3.0.3.1/orangecontrib/IO4IT/widgets/OWWebSearch.py +307 -0
  13. io4it-3.0.3.1/orangecontrib/IO4IT/widgets/designer/owwebsearch.ui +131 -0
  14. {io4it-3.0.2.2 → io4it-3.0.3.1}/setup.py +1 -1
  15. io4it-3.0.2.2/orangecontrib/IO4IT/utils/config.json +0 -45
  16. io4it-3.0.2.2/orangecontrib/IO4IT/widgets/OWWebSearch.py +0 -532
  17. io4it-3.0.2.2/orangecontrib/IO4IT/widgets/designer/owwebsearch.ui +0 -296
  18. {io4it-3.0.2.2 → io4it-3.0.3.1}/io4it.egg-info/dependency_links.txt +0 -0
  19. {io4it-3.0.2.2 → io4it-3.0.3.1}/io4it.egg-info/entry_points.txt +0 -0
  20. {io4it-3.0.2.2 → io4it-3.0.3.1}/io4it.egg-info/namespace_packages.txt +0 -0
  21. {io4it-3.0.2.2 → io4it-3.0.3.1}/io4it.egg-info/requires.txt +0 -0
  22. {io4it-3.0.2.2 → io4it-3.0.3.1}/io4it.egg-info/top_level.txt +0 -0
  23. {io4it-3.0.2.2 → io4it-3.0.3.1}/orangecontrib/IO4IT/__init__.py +0 -0
  24. {io4it-3.0.2.2 → io4it-3.0.3.1}/orangecontrib/IO4IT/ocr_function/__init__.py +0 -0
  25. {io4it-3.0.2.2 → io4it-3.0.3.1}/orangecontrib/IO4IT/ocr_function/word_converter.py +0 -0
  26. {io4it-3.0.2.2 → io4it-3.0.3.1}/orangecontrib/IO4IT/utils/__init__.py +0 -0
  27. {io4it-3.0.2.2 → io4it-3.0.3.1}/orangecontrib/IO4IT/utils/keys_manager.py +0 -0
  28. {io4it-3.0.2.2 → io4it-3.0.3.1}/orangecontrib/IO4IT/utils/mail.py +0 -0
  29. {io4it-3.0.2.2 → io4it-3.0.3.1}/orangecontrib/IO4IT/utils/pool_exec_utils.py +0 -0
  30. {io4it-3.0.2.2 → io4it-3.0.3.1}/orangecontrib/IO4IT/utils/secret_manager.py +0 -0
  31. {io4it-3.0.2.2 → io4it-3.0.3.1}/orangecontrib/IO4IT/utils/utils_md.py +0 -0
  32. {io4it-3.0.2.2 → io4it-3.0.3.1}/orangecontrib/IO4IT/widgets/OWChatGpt.py +0 -0
  33. {io4it-3.0.2.2 → io4it-3.0.3.1}/orangecontrib/IO4IT/widgets/OWDeep_Search.py +0 -0
  34. {io4it-3.0.2.2 → io4it-3.0.3.1}/orangecontrib/IO4IT/widgets/OWDoclingASR.py +0 -0
  35. {io4it-3.0.2.2 → io4it-3.0.3.1}/orangecontrib/IO4IT/widgets/OWDoclingToMarkdown.py +0 -0
  36. {io4it-3.0.2.2 → io4it-3.0.3.1}/orangecontrib/IO4IT/widgets/OWInboxMailMonitoring.py +0 -0
  37. {io4it-3.0.2.2 → io4it-3.0.3.1}/orangecontrib/IO4IT/widgets/OWMD2HTML.py +0 -0
  38. {io4it-3.0.2.2 → io4it-3.0.3.1}/orangecontrib/IO4IT/widgets/OWOfficeNormalizer.py +0 -0
  39. {io4it-3.0.2.2 → io4it-3.0.3.1}/orangecontrib/IO4IT/widgets/OWPdfType.py +0 -0
  40. {io4it-3.0.2.2 → io4it-3.0.3.1}/orangecontrib/IO4IT/widgets/OWProcessPoolExecutor.py +0 -0
  41. {io4it-3.0.2.2 → io4it-3.0.3.1}/orangecontrib/IO4IT/widgets/OWSpeechToText.py +0 -0
  42. {io4it-3.0.2.2 → io4it-3.0.3.1}/orangecontrib/IO4IT/widgets/OWmailLoader.py +0 -0
  43. {io4it-3.0.2.2 → io4it-3.0.3.1}/orangecontrib/IO4IT/widgets/OWmailSender.py +0 -0
  44. {io4it-3.0.2.2 → io4it-3.0.3.1}/orangecontrib/IO4IT/widgets/OWwordpdf2docx.py +0 -0
  45. {io4it-3.0.2.2 → io4it-3.0.3.1}/orangecontrib/IO4IT/widgets/__init__.py +0 -0
  46. {io4it-3.0.2.2 → io4it-3.0.3.1}/orangecontrib/IO4IT/widgets/designer/__init__.py +0 -0
  47. {io4it-3.0.2.2 → io4it-3.0.3.1}/orangecontrib/IO4IT/widgets/designer/nogui.ui +0 -0
  48. {io4it-3.0.2.2 → io4it-3.0.3.1}/orangecontrib/IO4IT/widgets/designer/ow_file_ext_selector.ui +0 -0
  49. {io4it-3.0.2.2 → io4it-3.0.3.1}/orangecontrib/IO4IT/widgets/designer/owchatgpt.ui +0 -0
  50. {io4it-3.0.2.2 → io4it-3.0.3.1}/orangecontrib/IO4IT/widgets/designer/owdeepsearch.ui +0 -0
  51. {io4it-3.0.2.2 → io4it-3.0.3.1}/orangecontrib/IO4IT/widgets/designer/owdoclingasr.ui +0 -0
  52. {io4it-3.0.2.2 → io4it-3.0.3.1}/orangecontrib/IO4IT/widgets/designer/owdoclingtomarkdown.ui +0 -0
  53. {io4it-3.0.2.2 → io4it-3.0.3.1}/orangecontrib/IO4IT/widgets/designer/owdocxtoxlsx.ui +0 -0
  54. {io4it-3.0.2.2 → io4it-3.0.3.1}/orangecontrib/IO4IT/widgets/designer/owexportmarkdown.ui +0 -0
  55. {io4it-3.0.2.2 → io4it-3.0.3.1}/orangecontrib/IO4IT/widgets/designer/owinboxmailmonitoring.ui +0 -0
  56. {io4it-3.0.2.2 → io4it-3.0.3.1}/orangecontrib/IO4IT/widgets/designer/owmailloader.ui +0 -0
  57. {io4it-3.0.2.2 → io4it-3.0.3.1}/orangecontrib/IO4IT/widgets/designer/owmailsender.ui +0 -0
  58. {io4it-3.0.2.2 → io4it-3.0.3.1}/orangecontrib/IO4IT/widgets/designer/owmarkdownizer.ui +0 -0
  59. {io4it-3.0.2.2 → io4it-3.0.3.1}/orangecontrib/IO4IT/widgets/designer/owmarkdownloader.ui +0 -0
  60. {io4it-3.0.2.2 → io4it-3.0.3.1}/orangecontrib/IO4IT/widgets/designer/owmd2html.ui +0 -0
  61. {io4it-3.0.2.2 → io4it-3.0.3.1}/orangecontrib/IO4IT/widgets/designer/owofficenormalizer.ui +0 -0
  62. {io4it-3.0.2.2 → io4it-3.0.3.1}/orangecontrib/IO4IT/widgets/designer/owparserhtml.ui +0 -0
  63. {io4it-3.0.2.2 → io4it-3.0.3.1}/orangecontrib/IO4IT/widgets/designer/owpdftype.ui +0 -0
  64. {io4it-3.0.2.2 → io4it-3.0.3.1}/orangecontrib/IO4IT/widgets/designer/owprocesspoolexecutor.ui +0 -0
  65. {io4it-3.0.2.2 → io4it-3.0.3.1}/orangecontrib/IO4IT/widgets/designer/owspeechtotext.ui +0 -0
  66. {io4it-3.0.2.2 → io4it-3.0.3.1}/orangecontrib/IO4IT/widgets/designer/owvisualizationer.ui +0 -0
  67. {io4it-3.0.2.2 → io4it-3.0.3.1}/orangecontrib/IO4IT/widgets/designer/wordpdf2docx.ui +0 -0
  68. {io4it-3.0.2.2 → io4it-3.0.3.1}/orangecontrib/IO4IT/widgets/icons/__init__.py +0 -0
  69. {io4it-3.0.2.2 → io4it-3.0.3.1}/orangecontrib/IO4IT/widgets/icons/chatgpt.png +0 -0
  70. {io4it-3.0.2.2 → io4it-3.0.3.1}/orangecontrib/IO4IT/widgets/icons/check_pdf.png +0 -0
  71. {io4it-3.0.2.2 → io4it-3.0.3.1}/orangecontrib/IO4IT/widgets/icons/deepsearch.svg +0 -0
  72. {io4it-3.0.2.2 → io4it-3.0.3.1}/orangecontrib/IO4IT/widgets/icons/dep_md_old.png +0 -0
  73. {io4it-3.0.2.2 → io4it-3.0.3.1}/orangecontrib/IO4IT/widgets/icons/download.png +0 -0
  74. {io4it-3.0.2.2 → io4it-3.0.3.1}/orangecontrib/IO4IT/widgets/icons/export_md.png +0 -0
  75. {io4it-3.0.2.2 → io4it-3.0.3.1}/orangecontrib/IO4IT/widgets/icons/extract_table.png +0 -0
  76. {io4it-3.0.2.2 → io4it-3.0.3.1}/orangecontrib/IO4IT/widgets/icons/file_extensor.png +0 -0
  77. {io4it-3.0.2.2 → io4it-3.0.3.1}/orangecontrib/IO4IT/widgets/icons/html.png +0 -0
  78. {io4it-3.0.2.2 → io4it-3.0.3.1}/orangecontrib/IO4IT/widgets/icons/list_aws.png +0 -0
  79. {io4it-3.0.2.2 → io4it-3.0.3.1}/orangecontrib/IO4IT/widgets/icons/load_md.png +0 -0
  80. {io4it-3.0.2.2 → io4it-3.0.3.1}/orangecontrib/IO4IT/widgets/icons/mail_loader.png +0 -0
  81. {io4it-3.0.2.2 → io4it-3.0.3.1}/orangecontrib/IO4IT/widgets/icons/mail_writer.png +0 -0
  82. {io4it-3.0.2.2 → io4it-3.0.3.1}/orangecontrib/IO4IT/widgets/icons/md.png +0 -0
  83. {io4it-3.0.2.2 → io4it-3.0.3.1}/orangecontrib/IO4IT/widgets/icons/monitor-email.svg +0 -0
  84. {io4it-3.0.2.2 → io4it-3.0.3.1}/orangecontrib/IO4IT/widgets/icons/office_normalizer.png +0 -0
  85. {io4it-3.0.2.2 → io4it-3.0.3.1}/orangecontrib/IO4IT/widgets/icons/owmd2html.svg +0 -0
  86. {io4it-3.0.2.2 → io4it-3.0.3.1}/orangecontrib/IO4IT/widgets/icons/process_pool_executor.png +0 -0
  87. {io4it-3.0.2.2 → io4it-3.0.3.1}/orangecontrib/IO4IT/widgets/icons/speech_to_text.png +0 -0
  88. {io4it-3.0.2.2 → io4it-3.0.3.1}/orangecontrib/IO4IT/widgets/icons/upload.png +0 -0
  89. {io4it-3.0.2.2 → io4it-3.0.3.1}/orangecontrib/IO4IT/widgets/icons/visualizationer.png +0 -0
  90. {io4it-3.0.2.2 → io4it-3.0.3.1}/orangecontrib/IO4IT/widgets/icons/websearch.png +0 -0
  91. {io4it-3.0.2.2 → io4it-3.0.3.1}/orangecontrib/IO4IT/widgets/icons/wordpdf2docx.png +0 -0
  92. {io4it-3.0.2.2 → io4it-3.0.3.1}/orangecontrib/IO4IT/widgets/icons_dev/__init__.py +0 -0
  93. {io4it-3.0.2.2 → io4it-3.0.3.1}/orangecontrib/__init__.py +0 -0
  94. {io4it-3.0.2.2 → io4it-3.0.3.1}/setup.cfg +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: io4it
3
- Version: 3.0.2.2
3
+ Version: 3.0.3.1
4
4
  Home-page:
5
5
  Author:
6
6
  Author-email:
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: io4it
3
- Version: 3.0.2.2
3
+ Version: 3.0.3.1
4
4
  Home-page:
5
5
  Author:
6
6
  Author-email:
@@ -39,7 +39,6 @@ orangecontrib/IO4IT/widgets/OWmailLoader.py
39
39
  orangecontrib/IO4IT/widgets/OWmailSender.py
40
40
  orangecontrib/IO4IT/widgets/OWwordpdf2docx.py
41
41
  orangecontrib/IO4IT/widgets/__init__.py
42
- orangecontrib/IO4IT/widgets/../utils/config.json
43
42
  orangecontrib/IO4IT/widgets/designer/__init__.py
44
43
  orangecontrib/IO4IT/widgets/designer/nogui.ui
45
44
  orangecontrib/IO4IT/widgets/designer/ow_file_ext_selector.ui
@@ -27,7 +27,7 @@ else:
27
27
 
28
28
  class OWExportMarkdown(widget.OWWidget):
29
29
  name = "OWExportMarkdown"
30
- description = "Auto-exporte content→(docx,pptx,pdf) au même path (base + extensions)."
30
+ description = "Automatically export content to DOCX, PPTX, and PDF using the same base path."
31
31
  icon = "icons/export_md.png"
32
32
  if "site-packages/Orange/widgets" in os.path.dirname(os.path.abspath(__file__)).replace("\\", "/"):
33
33
  icon = "icons_dev/export_md.png"
@@ -23,7 +23,7 @@ class OWExtractTablesDocxToXlsx(widget.OWWidget):
23
23
  en fichiers XLSX distincts (une table Word = un fichier XLSX).
24
24
  """
25
25
  name = "Docx to XLSX"
26
- description = "Extrait les tables de documents Word et les sauvegarde en XLSX, avec une option de division (split)."
26
+ description = "Extract tables from Word documents and save them as XLSX, with an optional split feature."
27
27
  category = "AAIT - TOOLBOX"
28
28
  icon = "icons/extract_table.png"
29
29
  if "site-packages/Orange/widgets" in os.path.dirname(os.path.abspath(__file__)).replace("\\", "/"):
@@ -17,7 +17,7 @@ except ImportError:
17
17
 
18
18
  class OWMarkdownLoader(widget.OWWidget):
19
19
  name = "Markdown Loader"
20
- description = "Charge tous les fichiers Markdown d’un dossier (récursif)"
20
+ description = "Load all Markdown files from a folder (recursively)."
21
21
  icon = "icons/load_md.png"
22
22
  if "site-packages/Orange/widgets" in os.path.dirname(os.path.abspath(__file__)).replace("\\", "/"):
23
23
  icon = "icons_dev/load_md.png"
@@ -518,7 +518,7 @@ class MarkdownConversionThread(QThread):
518
518
 
519
519
  class FileProcessorApp(widget.OWWidget):
520
520
  name = "Markdownizer"
521
- description = "Convert PDFs, DOCX, PPTX to Markdown (texte seul & word only)"
521
+ description = "[deprecated]Convert PDFs, DOCX, PPTX to Markdown (ignore image)"
522
522
  icon = "icons/dep_md_old.png"
523
523
  if "site-packages/Orange/widgets" in os.path.dirname(os.path.abspath(__file__)).replace("\\", "/"):
524
524
  icon = "icons_dev/dep_md_old.png"
@@ -7,6 +7,7 @@ import asyncio
7
7
  import aiohttp
8
8
  import html2text
9
9
  from bs4 import BeautifulSoup
10
+ import urllib.request
10
11
 
11
12
  if "site-packages/Orange/widgets" in os.path.dirname(os.path.abspath(__file__)).replace("\\", "/"):
12
13
  from Orange.widgets.orangecontrib.AAIT.utils import thread_management
@@ -112,13 +113,16 @@ class ParseHMTL(OWWidget):
112
113
  'Connection': 'keep-alive',
113
114
  'Upgrade-Insecure-Requests': '1'
114
115
  }
115
- async with session.get(url, headers=headers) as response:
116
+ proxies = urllib.request.getproxies()
117
+ if "http" in proxies:
118
+ proxies = proxies["http"]
119
+ else:
120
+ proxies = None
121
+ async with session.get(url, headers=headers, proxy=proxies) as response:
116
122
  if response.status != 200:
117
123
  raise Exception(f"HTTP {response.status}")
118
-
119
124
  html = await response.text()
120
125
  soup = BeautifulSoup(html, 'html.parser')
121
-
122
126
  meta_desc = ''
123
127
  meta_tag = soup.find('meta', attrs={'name': 'description'})
124
128
  if not meta_tag:
@@ -9,7 +9,7 @@ from Orange.data import Table
9
9
 
10
10
  class OWS3FileDownloader(OWWidget):
11
11
  name = "S3 File Uploader"
12
- description = "Envoies les fichiers listés depuis un répertoire local vers s3."
12
+ description = "Upload the listed files from a local directory to S3."
13
13
  icon = "icons/upload.png"
14
14
  if "site-packages/Orange/widgets" in os.path.dirname(os.path.abspath(__file__)).replace("\\", "/"):
15
15
  icon = "icons_dev/upload.png"
@@ -8,7 +8,7 @@ from Orange.data import Table
8
8
 
9
9
  class OWS3FileDownloader(OWWidget):
10
10
  name = "S3 File Downloader"
11
- description = "Télécharge les fichiers listés depuis S3 vers un répertoire local."
11
+ description = "Download the listed files from S3 to a local directory."
12
12
  icon = "icons/download.png"
13
13
  if "site-packages/Orange/widgets" in os.path.dirname(os.path.abspath(__file__)).replace("\\", "/"):
14
14
  icon = "icons_dev/download.png"
@@ -8,7 +8,7 @@ import os
8
8
 
9
9
  class OWS3FileLister(OWWidget):
10
10
  name = "S3 File Lister"
11
- description = "Liste les fichiers d'un bucket S3 et affiche leurs détails."
11
+ description = "List the files in an S3 bucket and display their details."
12
12
  icon = "icons/list_aws.png"
13
13
  if "site-packages/Orange/widgets" in os.path.dirname(os.path.abspath(__file__)).replace("\\", "/"):
14
14
  icon = "icons_dev/list_aws.png"
@@ -0,0 +1,307 @@
1
+ import os
2
+ import sys
3
+ import Orange
4
+ from Orange.widgets.widget import Input, Output
5
+ from AnyQt.QtWidgets import QApplication, QPushButton
6
+ from Orange.widgets.settings import Setting
7
+ import requests
8
+ from bs4 import BeautifulSoup
9
+ from urllib.parse import quote, urlparse, urljoin, parse_qs
10
+ import unicodedata
11
+ import re
12
+
13
+ if "site-packages/Orange/widgets" in os.path.dirname(os.path.abspath(__file__)).replace("\\", "/"):
14
+ from Orange.widgets.orangecontrib.AAIT.utils import thread_management, base_widget
15
+ from Orange.widgets.orangecontrib.HLIT_dev.remote_server_smb import convert
16
+ else:
17
+ from orangecontrib.AAIT.utils import thread_management, base_widget
18
+ from orangecontrib.HLIT_dev.remote_server_smb import convert
19
+
20
+ class WebSearch(base_widget.BaseListWidget):
21
+ name = "WebSearch"
22
+ description = "Search url website from a query with DDG."
23
+ icon = "icons/websearch.png"
24
+ if "site-packages/Orange/widgets" in os.path.dirname(os.path.abspath(__file__)).replace("\\", "/"):
25
+ icon = "icons_dev/websearch.png"
26
+ priority = 3000
27
+ gui = ""
28
+ want_control_area = False
29
+ category = "AAIT - TOOLBOX"
30
+ gui = os.path.join(os.path.dirname(os.path.abspath(__file__)), "designer/owwebsearch.ui")
31
+ # Settings
32
+ selected_column_name = Setting("content")
33
+
34
+ class Inputs:
35
+ data = Input("Data", Orange.data.Table)
36
+
37
+ @Inputs.data
38
+ def set_data(self, in_data):
39
+ self.data = in_data
40
+ if in_data is None:
41
+ self.Outputs.data.send(None)
42
+ return
43
+ if self.data:
44
+ self.var_selector.add_variables(self.data.domain)
45
+ self.var_selector.select_variable_by_name(self.selected_column_name)
46
+ self.run()
47
+
48
+ class Outputs:
49
+ data = Output("Data", Orange.data.Table)
50
+
51
+
52
+ def __init__(self):
53
+ super().__init__()
54
+ # Qt Management
55
+ self.setFixedWidth(480)
56
+ self.setFixedHeight(450)
57
+
58
+ self.pushButton_run =self.findChild(QPushButton, 'pushButton_send')
59
+ self.pushButton_run.clicked.connect(self.run)
60
+
61
+
62
+ def normaliser_texte(self, txt: str) -> str:
63
+ """
64
+ Met en minuscules, retire les accents et trim.
65
+ """
66
+ if not txt:
67
+ return ""
68
+ txt = txt.lower()
69
+ txt = unicodedata.normalize("NFD", txt)
70
+ txt = "".join(c for c in txt if not unicodedata.combining(c))
71
+ return txt.strip()
72
+
73
+ def extraire_mots_cles(self, requete: str):
74
+ """
75
+ Découpe la requête en mots-clés simples, en retirant
76
+ les mots très fréquents (du, de, le, la, etc.).
77
+ """
78
+ stopwords = {"du", "de", "des", "le", "la", "les", "un", "une", "au", "aux", "et", "en", "pour", "sur", "a"}
79
+ req_norm = self.normaliser_texte(requete)
80
+ mots = re.findall(r"\w+", req_norm)
81
+ mots_cles = [m for m in mots if m not in stopwords and len(m) > 2]
82
+ return mots_cles or mots
83
+
84
+ def recherche_duckduckgo(self, query, max_results=10):
85
+ q = quote(query)
86
+ url = f"https://duckduckgo.com/html/?q={q}"
87
+ headers = {"User-Agent": "Mozilla/5.0"}
88
+
89
+ try:
90
+ r = requests.get(url, headers=headers, timeout=10)
91
+ r.raise_for_status()
92
+ except requests.RequestException as e:
93
+ print(f"[ERREUR] Problème lors de la requête DuckDuckGo : {e}")
94
+ return []
95
+
96
+ soup = BeautifulSoup(r.text, "html.parser")
97
+ resultats = []
98
+
99
+ for a in soup.select("a.result__a")[:max_results]:
100
+ titre = a.get_text(strip=True)
101
+ lien = a.get("href", "")
102
+
103
+ # Gestion des liens de redirection DuckDuckGo
104
+ parsed = urlparse(lien)
105
+ if parsed.netloc and "duckduckgo.com" in parsed.netloc and parsed.path.startswith("/l/"):
106
+ qs = parse_qs(parsed.query)
107
+ if "uddg" in qs:
108
+ lien = qs["uddg"][0]
109
+
110
+ # Lien relatif -> absolu
111
+ if lien.startswith("/"):
112
+ lien = urljoin("https://duckduckgo.com", lien)
113
+
114
+ resultats.append((titre, lien))
115
+
116
+ return resultats
117
+
118
+ def extraire_domaines(self, resultats_search):
119
+ domaines = set()
120
+ for _titre, url in resultats_search:
121
+ try:
122
+ parsed = urlparse(url)
123
+ scheme = parsed.scheme or "https"
124
+ if not parsed.netloc:
125
+ continue
126
+ domaine = f"{scheme}://{parsed.netloc}"
127
+ domaines.add(domaine)
128
+ except Exception:
129
+ pass
130
+ return list(domaines)
131
+
132
+ def trouver_flux_rss(self, url_site):
133
+ try:
134
+ r = requests.get(url_site, headers={"User-Agent": "Mozilla/5.0"}, timeout=10)
135
+ r.raise_for_status()
136
+ except requests.RequestException as e:
137
+ print(f"[ERREUR] Impossible d'accéder à {url_site} : {e}")
138
+ return []
139
+
140
+ soup = BeautifulSoup(r.text, "html.parser")
141
+ flux = []
142
+
143
+ for link in soup.find_all("link", type="application/rss+xml"):
144
+ href = link.get("href")
145
+ if href:
146
+ href = urljoin(url_site, href)
147
+ flux.append(href)
148
+
149
+ for link in soup.find_all("a"):
150
+ href = link.get("href", "")
151
+ if not href:
152
+ continue
153
+ href_norm = href.lower()
154
+ if "rss" in href_norm or "feed" in href_norm:
155
+ flux.append(urljoin(url_site, href))
156
+
157
+ return list(set(flux))
158
+
159
+ def rechercher_articles_dans_flux(self, requete, flux_list, max_results=20):
160
+ mots_cles = self.extraire_mots_cles(requete)
161
+ articles = []
162
+
163
+ headers = {"User-Agent": "Mozilla/5.0"}
164
+
165
+ for flux in flux_list:
166
+ try:
167
+ r = requests.get(flux, headers=headers, timeout=10)
168
+ r.raise_for_status()
169
+ # Parse du flux en XML
170
+ soup = BeautifulSoup(r.content, "xml")
171
+ except requests.RequestException as e:
172
+ print(f"[ERREUR] Problème lors de la lecture du flux {flux} : {e}")
173
+ continue
174
+
175
+ # Gestion RSS (<item>) et Atom (<entry>)
176
+ for entry in soup.find_all(["item", "entry"]):
177
+ # Titre
178
+ titre_tag = entry.find("title")
179
+ titre = titre_tag.get_text(strip=True) if titre_tag else ""
180
+
181
+ # Résumé / description
182
+ resume_tag = entry.find("description") or entry.find("summary")
183
+ resume = resume_tag.get_text(strip=True) if resume_tag else ""
184
+
185
+ # Lien
186
+ lien_tag = entry.find("link")
187
+ lien = ""
188
+ if lien_tag:
189
+ # Atom : <link href="...">
190
+ if lien_tag.has_attr("href"):
191
+ lien = lien_tag["href"]
192
+ else:
193
+ # RSS : <link>https://...</link>
194
+ lien = lien_tag.get_text(strip=True)
195
+
196
+ # Date
197
+ date_tag = (
198
+ entry.find("pubDate")
199
+ or entry.find("published")
200
+ or entry.find("updated")
201
+ )
202
+ date = date_tag.get_text(strip=True) if date_tag else "Date inconnue"
203
+
204
+ texte_complet = self.normaliser_texte(titre + " " + resume)
205
+
206
+ # Condition : au moins un mot-clé présent dans titre+résumé
207
+ if any(mot in texte_complet for mot in mots_cles):
208
+ articles.append(
209
+ {
210
+ "titre": titre,
211
+ "url": lien,
212
+ "date": date,
213
+ "source_flux": flux,
214
+ }
215
+ )
216
+ if len(articles) >= max_results:
217
+ return articles
218
+
219
+ return articles
220
+
221
+ def pipeline_veille_requete(self,requete):
222
+ resultats = self.recherche_duckduckgo(requete)
223
+ if not resultats:
224
+ print("Aucun résultat trouvé sur DuckDuckGo.")
225
+ return []
226
+ print(resultats)
227
+ domaines = self.extraire_domaines(resultats)
228
+
229
+ flux = []
230
+ for d in domaines:
231
+ found = self.trouver_flux_rss(d)
232
+ if found:
233
+ print(f"Flux trouvés sur {d}:")
234
+ for f in found:
235
+ print(" ->", f)
236
+ flux.extend(found)
237
+
238
+ flux = list(set(flux))
239
+
240
+ if not flux:
241
+ return [
242
+ {"titre": t, "url": u, "date": None, "source_flux": None, "source": "web"}
243
+ for t, u in resultats
244
+ ]
245
+ articles = self.rechercher_articles_dans_flux(requete, flux)
246
+
247
+ if not articles:
248
+ return [
249
+ {"titre": t, "url": u, "date": None, "source_flux": None, "source": "web"}
250
+ for t, u in resultats
251
+ ]
252
+ return articles
253
+
254
+
255
+ def run(self):
256
+ self.error("")
257
+ self.warning("")
258
+ if self.data is None:
259
+ self.Outputs.data.send(None)
260
+ return
261
+
262
+ if not self.selected_column_name in self.data.domain:
263
+ self.warning(f'Previously selected column "{self.selected_column_name}" does not exist in your data.')
264
+ return
265
+
266
+ self.query = self.data.get_column(self.selected_column_name)[0]
267
+
268
+ self.progressBarInit()
269
+ self.thread = thread_management.Thread(self.pipeline_veille_requete, self.query)
270
+ self.thread.progress.connect(self.handle_progress)
271
+ self.thread.result.connect(self.handle_result)
272
+ self.thread.finish.connect(self.handle_finish)
273
+ self.thread.start()
274
+
275
+ def handle_progress(self, progress) -> None:
276
+ value = progress[0]
277
+ text = progress[1]
278
+ if value is not None:
279
+ self.progressBarSet(value)
280
+ if text is None:
281
+ self.textBrowser.setText("")
282
+ else:
283
+ self.textBrowser.insertPlainText(text)
284
+
285
+ def handle_result(self, result):
286
+ if result is None or len(result) == 0:
287
+ self.Outputs.data.send(None)
288
+ return
289
+ data = convert.convert_json_implicite_to_data_table(result)
290
+ self.Outputs.data.send(data)
291
+
292
+ def handle_finish(self):
293
+ self.progressBarFinished()
294
+
295
+ def post_initialized(self):
296
+ pass
297
+
298
+
299
+ if __name__ == "__main__":
300
+ app = QApplication(sys.argv)
301
+ my_widget = WebSearch()
302
+ my_widget.show()
303
+
304
+ if hasattr(app, "exec"):
305
+ sys.exit(app.exec())
306
+ else:
307
+ sys.exit(app.exec_())
@@ -0,0 +1,131 @@
1
+ <?xml version="1.0" encoding="UTF-8"?>
2
+ <ui version="4.0">
3
+ <class>Form</class>
4
+ <widget class="QWidget" name="Form">
5
+ <property name="geometry">
6
+ <rect>
7
+ <x>0</x>
8
+ <y>0</y>
9
+ <width>477</width>
10
+ <height>409</height>
11
+ </rect>
12
+ </property>
13
+ <property name="windowTitle">
14
+ <string>Form</string>
15
+ </property>
16
+ <widget class="QGroupBox" name="groupBox">
17
+ <property name="geometry">
18
+ <rect>
19
+ <x>20</x>
20
+ <y>40</y>
21
+ <width>431</width>
22
+ <height>71</height>
23
+ </rect>
24
+ </property>
25
+ <property name="title">
26
+ <string>Explanation</string>
27
+ </property>
28
+ <widget class="QLabel" name="Description">
29
+ <property name="geometry">
30
+ <rect>
31
+ <x>10</x>
32
+ <y>10</y>
33
+ <width>411</width>
34
+ <height>51</height>
35
+ </rect>
36
+ </property>
37
+ <property name="text">
38
+ <string>This widget takes a query input and return a list of web site related to it .</string>
39
+ </property>
40
+ <property name="textFormat">
41
+ <enum>Qt::AutoText</enum>
42
+ </property>
43
+ <property name="alignment">
44
+ <set>Qt::AlignLeading|Qt::AlignLeft|Qt::AlignVCenter</set>
45
+ </property>
46
+ <property name="wordWrap">
47
+ <bool>true</bool>
48
+ </property>
49
+ </widget>
50
+ </widget>
51
+ <widget class="QPushButton" name="pushButton_send">
52
+ <property name="enabled">
53
+ <bool>true</bool>
54
+ </property>
55
+ <property name="geometry">
56
+ <rect>
57
+ <x>149</x>
58
+ <y>344</y>
59
+ <width>301</width>
60
+ <height>31</height>
61
+ </rect>
62
+ </property>
63
+ <property name="text">
64
+ <string>Run</string>
65
+ </property>
66
+ </widget>
67
+ <widget class="QCheckBox" name="checkBox_send">
68
+ <property name="enabled">
69
+ <bool>false</bool>
70
+ </property>
71
+ <property name="geometry">
72
+ <rect>
73
+ <x>20</x>
74
+ <y>350</y>
75
+ <width>131</width>
76
+ <height>16</height>
77
+ </rect>
78
+ </property>
79
+ <property name="text">
80
+ <string>Auto send data</string>
81
+ </property>
82
+ </widget>
83
+ <widget class="QGroupBox" name="groupBox_3">
84
+ <property name="geometry">
85
+ <rect>
86
+ <x>20</x>
87
+ <y>150</y>
88
+ <width>431</width>
89
+ <height>161</height>
90
+ </rect>
91
+ </property>
92
+ <property name="title">
93
+ <string>Column selection</string>
94
+ </property>
95
+ <widget class="QLabel" name="Description_4">
96
+ <property name="geometry">
97
+ <rect>
98
+ <x>10</x>
99
+ <y>30</y>
100
+ <width>411</width>
101
+ <height>61</height>
102
+ </rect>
103
+ </property>
104
+ <property name="text">
105
+ <string/>
106
+ </property>
107
+ <property name="textFormat">
108
+ <enum>Qt::AutoText</enum>
109
+ </property>
110
+ <property name="alignment">
111
+ <set>Qt::AlignLeading|Qt::AlignLeft|Qt::AlignVCenter</set>
112
+ </property>
113
+ <property name="wordWrap">
114
+ <bool>true</bool>
115
+ </property>
116
+ </widget>
117
+ <widget class="QWidget" name="placeholder" native="true">
118
+ <property name="geometry">
119
+ <rect>
120
+ <x>0</x>
121
+ <y>20</y>
122
+ <width>431</width>
123
+ <height>141</height>
124
+ </rect>
125
+ </property>
126
+ </widget>
127
+ </widget>
128
+ </widget>
129
+ <resources/>
130
+ <connections/>
131
+ </ui>
@@ -2,7 +2,7 @@ from setuptools import setup, find_packages
2
2
 
3
3
  # Configuration
4
4
  NAME = "io4it"
5
- VERSION = "3.0.2.2"
5
+ VERSION = "3.0.3.1"
6
6
 
7
7
  INSTALL_REQUIRES = [
8
8
  "torchvision==0.23.0",
@@ -1,45 +0,0 @@
1
- {
2
- "domain_context": {
3
- "prix": ["cours", "cotation", "tarif", "valeur", "coût"],
4
- "marché": ["négoce", "commerce", "trading", "bourse", "mondial", "international"],
5
- "économie": ["économique", "fiscal", "finance", "budget"],
6
- "bourse": ["action", "indice", "trading", "investissement"],
7
-
8
- "porc": ["porcin", "viande porcine", "cochon", "élevage"],
9
- "agriculture": ["agricole", "exploitation", "récolte", "production"],
10
- "élevage": ["bétail", "ferme", "cheptel"],
11
-
12
- "produits mer": ["coquillage", "crustacé", "mollusque", "pêche", "aquaculture"],
13
- "saint jacques": ["coquille", "pectinidé", "noix", "scallop"],
14
- "coquille": ["bivalve", "mollusque", "pêche"],
15
-
16
- "actualités": ["news", "information", "récent", "nouveau"],
17
- "tendances": ["évolution", "dynamique", "orientation", "conjoncture"],
18
- "perspectives": ["prévisions", "anticipations", "outlook", "horizon"],
19
- "conjoncture": ["situation", "contexte", "analyse", "bilan"],
20
-
21
- "scientifique": ["espèce", "taxonomie", "biologie", "distribution", "habitat"],
22
- "pêche": ["fishery", "capture", "exploitation", "stock"],
23
-
24
- "ia": ["intelligence artificielle", "machine learning", "deep learning"],
25
- "technologie": ["tech", "digital", "numérique", "innovation"],
26
- "logiciel": ["software", "application", "programme"],
27
-
28
- "article": ["publication", "presse", "média", "journal"],
29
- "recherche": ["étude", "scientifique", "analyse", "rapport"],
30
- "étude": ["recherche", "analyse", "données", "résultat"],
31
-
32
- "santé": ["médical", "sanitaire", "clinique", "thérapie"],
33
- "maladie": ["pathologie", "syndrome", "infection"],
34
-
35
- "politique": ["gouvernement", "législatif", "élection", "pouvoir"],
36
- "loi": ["législation", "règlement", "juridique", "légal"]
37
- },
38
-
39
- "stop_words": [
40
- "le", "la", "les", "un", "une", "des", "du", "de", "et", "en",
41
- "pour", "dans", "avec", "sur", "par", "ce", "cette", "ces",
42
- "ou", "où", "qui", "que", "quoi", "dont", "quel",
43
- "informations", "disponibles", "concernant", "relatif"
44
- ]
45
- }