pomera-ai-commander 1.1.1 → 1.2.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (213) hide show
  1. package/LICENSE +21 -21
  2. package/README.md +105 -680
  3. package/bin/pomera-ai-commander.js +62 -62
  4. package/core/__init__.py +65 -65
  5. package/core/app_context.py +482 -482
  6. package/core/async_text_processor.py +421 -421
  7. package/core/backup_manager.py +655 -655
  8. package/core/backup_recovery_manager.py +1199 -1033
  9. package/core/content_hash_cache.py +508 -508
  10. package/core/context_menu.py +313 -313
  11. package/core/data_directory.py +549 -0
  12. package/core/data_validator.py +1066 -1066
  13. package/core/database_connection_manager.py +744 -744
  14. package/core/database_curl_settings_manager.py +608 -608
  15. package/core/database_promera_ai_settings_manager.py +446 -446
  16. package/core/database_schema.py +411 -411
  17. package/core/database_schema_manager.py +395 -395
  18. package/core/database_settings_manager.py +1507 -1507
  19. package/core/database_settings_manager_interface.py +456 -456
  20. package/core/dialog_manager.py +734 -734
  21. package/core/diff_utils.py +239 -0
  22. package/core/efficient_line_numbers.py +540 -510
  23. package/core/error_handler.py +746 -746
  24. package/core/error_service.py +431 -431
  25. package/core/event_consolidator.py +511 -511
  26. package/core/mcp/__init__.py +43 -43
  27. package/core/mcp/find_replace_diff.py +334 -0
  28. package/core/mcp/protocol.py +288 -288
  29. package/core/mcp/schema.py +251 -251
  30. package/core/mcp/server_stdio.py +299 -299
  31. package/core/mcp/tool_registry.py +2699 -2345
  32. package/core/memento.py +275 -0
  33. package/core/memory_efficient_text_widget.py +711 -711
  34. package/core/migration_manager.py +914 -914
  35. package/core/migration_test_suite.py +1085 -1085
  36. package/core/migration_validator.py +1143 -1143
  37. package/core/optimized_find_replace.py +714 -714
  38. package/core/optimized_pattern_engine.py +424 -424
  39. package/core/optimized_search_highlighter.py +552 -552
  40. package/core/performance_monitor.py +674 -674
  41. package/core/persistence_manager.py +712 -712
  42. package/core/progressive_stats_calculator.py +632 -632
  43. package/core/regex_pattern_cache.py +529 -529
  44. package/core/regex_pattern_library.py +350 -350
  45. package/core/search_operation_manager.py +434 -434
  46. package/core/settings_defaults_registry.py +1087 -1087
  47. package/core/settings_integrity_validator.py +1111 -1111
  48. package/core/settings_serializer.py +557 -557
  49. package/core/settings_validator.py +1823 -1823
  50. package/core/smart_stats_calculator.py +709 -709
  51. package/core/statistics_update_manager.py +619 -619
  52. package/core/stats_config_manager.py +858 -858
  53. package/core/streaming_text_handler.py +723 -723
  54. package/core/task_scheduler.py +596 -596
  55. package/core/update_pattern_library.py +168 -168
  56. package/core/visibility_monitor.py +596 -596
  57. package/core/widget_cache.py +498 -498
  58. package/mcp.json +51 -61
  59. package/migrate_data.py +127 -0
  60. package/package.json +64 -57
  61. package/pomera.py +7883 -7482
  62. package/pomera_mcp_server.py +183 -144
  63. package/requirements.txt +33 -0
  64. package/scripts/Dockerfile.alpine +43 -0
  65. package/scripts/Dockerfile.gui-test +54 -0
  66. package/scripts/Dockerfile.linux +43 -0
  67. package/scripts/Dockerfile.test-linux +80 -0
  68. package/scripts/Dockerfile.ubuntu +39 -0
  69. package/scripts/README.md +53 -0
  70. package/scripts/build-all.bat +113 -0
  71. package/scripts/build-docker.bat +53 -0
  72. package/scripts/build-docker.sh +55 -0
  73. package/scripts/build-optimized.bat +101 -0
  74. package/scripts/build.sh +78 -0
  75. package/scripts/docker-compose.test.yml +27 -0
  76. package/scripts/docker-compose.yml +32 -0
  77. package/scripts/postinstall.js +62 -0
  78. package/scripts/requirements-minimal.txt +33 -0
  79. package/scripts/test-linux-simple.bat +28 -0
  80. package/scripts/validate-release-workflow.py +450 -0
  81. package/tools/__init__.py +4 -4
  82. package/tools/ai_tools.py +2891 -2891
  83. package/tools/ascii_art_generator.py +352 -352
  84. package/tools/base64_tools.py +183 -183
  85. package/tools/base_tool.py +511 -511
  86. package/tools/case_tool.py +308 -308
  87. package/tools/column_tools.py +395 -395
  88. package/tools/cron_tool.py +884 -884
  89. package/tools/curl_history.py +600 -600
  90. package/tools/curl_processor.py +1207 -1207
  91. package/tools/curl_settings.py +502 -502
  92. package/tools/curl_tool.py +5467 -5467
  93. package/tools/diff_viewer.py +1817 -1072
  94. package/tools/email_extraction_tool.py +248 -248
  95. package/tools/email_header_analyzer.py +425 -425
  96. package/tools/extraction_tools.py +250 -250
  97. package/tools/find_replace.py +2289 -1750
  98. package/tools/folder_file_reporter.py +1463 -1463
  99. package/tools/folder_file_reporter_adapter.py +480 -480
  100. package/tools/generator_tools.py +1216 -1216
  101. package/tools/hash_generator.py +255 -255
  102. package/tools/html_tool.py +656 -656
  103. package/tools/jsonxml_tool.py +729 -729
  104. package/tools/line_tools.py +419 -419
  105. package/tools/markdown_tools.py +561 -561
  106. package/tools/mcp_widget.py +1417 -1417
  107. package/tools/notes_widget.py +978 -973
  108. package/tools/number_base_converter.py +372 -372
  109. package/tools/regex_extractor.py +571 -571
  110. package/tools/slug_generator.py +310 -310
  111. package/tools/sorter_tools.py +458 -458
  112. package/tools/string_escape_tool.py +392 -392
  113. package/tools/text_statistics_tool.py +365 -365
  114. package/tools/text_wrapper.py +430 -430
  115. package/tools/timestamp_converter.py +421 -421
  116. package/tools/tool_loader.py +710 -710
  117. package/tools/translator_tools.py +522 -522
  118. package/tools/url_link_extractor.py +261 -261
  119. package/tools/url_parser.py +204 -204
  120. package/tools/whitespace_tools.py +355 -355
  121. package/tools/word_frequency_counter.py +146 -146
  122. package/core/__pycache__/__init__.cpython-313.pyc +0 -0
  123. package/core/__pycache__/app_context.cpython-313.pyc +0 -0
  124. package/core/__pycache__/async_text_processor.cpython-313.pyc +0 -0
  125. package/core/__pycache__/backup_manager.cpython-313.pyc +0 -0
  126. package/core/__pycache__/backup_recovery_manager.cpython-313.pyc +0 -0
  127. package/core/__pycache__/content_hash_cache.cpython-313.pyc +0 -0
  128. package/core/__pycache__/context_menu.cpython-313.pyc +0 -0
  129. package/core/__pycache__/data_validator.cpython-313.pyc +0 -0
  130. package/core/__pycache__/database_connection_manager.cpython-313.pyc +0 -0
  131. package/core/__pycache__/database_curl_settings_manager.cpython-313.pyc +0 -0
  132. package/core/__pycache__/database_promera_ai_settings_manager.cpython-313.pyc +0 -0
  133. package/core/__pycache__/database_schema.cpython-313.pyc +0 -0
  134. package/core/__pycache__/database_schema_manager.cpython-313.pyc +0 -0
  135. package/core/__pycache__/database_settings_manager.cpython-313.pyc +0 -0
  136. package/core/__pycache__/database_settings_manager_interface.cpython-313.pyc +0 -0
  137. package/core/__pycache__/dialog_manager.cpython-313.pyc +0 -0
  138. package/core/__pycache__/efficient_line_numbers.cpython-313.pyc +0 -0
  139. package/core/__pycache__/error_handler.cpython-313.pyc +0 -0
  140. package/core/__pycache__/error_service.cpython-313.pyc +0 -0
  141. package/core/__pycache__/event_consolidator.cpython-313.pyc +0 -0
  142. package/core/__pycache__/memory_efficient_text_widget.cpython-313.pyc +0 -0
  143. package/core/__pycache__/migration_manager.cpython-313.pyc +0 -0
  144. package/core/__pycache__/migration_test_suite.cpython-313.pyc +0 -0
  145. package/core/__pycache__/migration_validator.cpython-313.pyc +0 -0
  146. package/core/__pycache__/optimized_find_replace.cpython-313.pyc +0 -0
  147. package/core/__pycache__/optimized_pattern_engine.cpython-313.pyc +0 -0
  148. package/core/__pycache__/optimized_search_highlighter.cpython-313.pyc +0 -0
  149. package/core/__pycache__/performance_monitor.cpython-313.pyc +0 -0
  150. package/core/__pycache__/persistence_manager.cpython-313.pyc +0 -0
  151. package/core/__pycache__/progressive_stats_calculator.cpython-313.pyc +0 -0
  152. package/core/__pycache__/regex_pattern_cache.cpython-313.pyc +0 -0
  153. package/core/__pycache__/regex_pattern_library.cpython-313.pyc +0 -0
  154. package/core/__pycache__/search_operation_manager.cpython-313.pyc +0 -0
  155. package/core/__pycache__/settings_defaults_registry.cpython-313.pyc +0 -0
  156. package/core/__pycache__/settings_integrity_validator.cpython-313.pyc +0 -0
  157. package/core/__pycache__/settings_serializer.cpython-313.pyc +0 -0
  158. package/core/__pycache__/settings_validator.cpython-313.pyc +0 -0
  159. package/core/__pycache__/smart_stats_calculator.cpython-313.pyc +0 -0
  160. package/core/__pycache__/statistics_update_manager.cpython-313.pyc +0 -0
  161. package/core/__pycache__/stats_config_manager.cpython-313.pyc +0 -0
  162. package/core/__pycache__/streaming_text_handler.cpython-313.pyc +0 -0
  163. package/core/__pycache__/task_scheduler.cpython-313.pyc +0 -0
  164. package/core/__pycache__/visibility_monitor.cpython-313.pyc +0 -0
  165. package/core/__pycache__/widget_cache.cpython-313.pyc +0 -0
  166. package/core/mcp/__pycache__/__init__.cpython-313.pyc +0 -0
  167. package/core/mcp/__pycache__/protocol.cpython-313.pyc +0 -0
  168. package/core/mcp/__pycache__/schema.cpython-313.pyc +0 -0
  169. package/core/mcp/__pycache__/server_stdio.cpython-313.pyc +0 -0
  170. package/core/mcp/__pycache__/tool_registry.cpython-313.pyc +0 -0
  171. package/tools/__pycache__/__init__.cpython-313.pyc +0 -0
  172. package/tools/__pycache__/ai_tools.cpython-313.pyc +0 -0
  173. package/tools/__pycache__/ascii_art_generator.cpython-313.pyc +0 -0
  174. package/tools/__pycache__/base64_tools.cpython-313.pyc +0 -0
  175. package/tools/__pycache__/base_tool.cpython-313.pyc +0 -0
  176. package/tools/__pycache__/case_tool.cpython-313.pyc +0 -0
  177. package/tools/__pycache__/column_tools.cpython-313.pyc +0 -0
  178. package/tools/__pycache__/cron_tool.cpython-313.pyc +0 -0
  179. package/tools/__pycache__/curl_history.cpython-313.pyc +0 -0
  180. package/tools/__pycache__/curl_processor.cpython-313.pyc +0 -0
  181. package/tools/__pycache__/curl_settings.cpython-313.pyc +0 -0
  182. package/tools/__pycache__/curl_tool.cpython-313.pyc +0 -0
  183. package/tools/__pycache__/diff_viewer.cpython-313.pyc +0 -0
  184. package/tools/__pycache__/email_extraction_tool.cpython-313.pyc +0 -0
  185. package/tools/__pycache__/email_header_analyzer.cpython-313.pyc +0 -0
  186. package/tools/__pycache__/extraction_tools.cpython-313.pyc +0 -0
  187. package/tools/__pycache__/find_replace.cpython-313.pyc +0 -0
  188. package/tools/__pycache__/folder_file_reporter.cpython-313.pyc +0 -0
  189. package/tools/__pycache__/folder_file_reporter_adapter.cpython-313.pyc +0 -0
  190. package/tools/__pycache__/generator_tools.cpython-313.pyc +0 -0
  191. package/tools/__pycache__/hash_generator.cpython-313.pyc +0 -0
  192. package/tools/__pycache__/html_tool.cpython-313.pyc +0 -0
  193. package/tools/__pycache__/huggingface_helper.cpython-313.pyc +0 -0
  194. package/tools/__pycache__/jsonxml_tool.cpython-313.pyc +0 -0
  195. package/tools/__pycache__/line_tools.cpython-313.pyc +0 -0
  196. package/tools/__pycache__/list_comparator.cpython-313.pyc +0 -0
  197. package/tools/__pycache__/markdown_tools.cpython-313.pyc +0 -0
  198. package/tools/__pycache__/mcp_widget.cpython-313.pyc +0 -0
  199. package/tools/__pycache__/notes_widget.cpython-313.pyc +0 -0
  200. package/tools/__pycache__/number_base_converter.cpython-313.pyc +0 -0
  201. package/tools/__pycache__/regex_extractor.cpython-313.pyc +0 -0
  202. package/tools/__pycache__/slug_generator.cpython-313.pyc +0 -0
  203. package/tools/__pycache__/sorter_tools.cpython-313.pyc +0 -0
  204. package/tools/__pycache__/string_escape_tool.cpython-313.pyc +0 -0
  205. package/tools/__pycache__/text_statistics_tool.cpython-313.pyc +0 -0
  206. package/tools/__pycache__/text_wrapper.cpython-313.pyc +0 -0
  207. package/tools/__pycache__/timestamp_converter.cpython-313.pyc +0 -0
  208. package/tools/__pycache__/tool_loader.cpython-313.pyc +0 -0
  209. package/tools/__pycache__/translator_tools.cpython-313.pyc +0 -0
  210. package/tools/__pycache__/url_link_extractor.cpython-313.pyc +0 -0
  211. package/tools/__pycache__/url_parser.cpython-313.pyc +0 -0
  212. package/tools/__pycache__/whitespace_tools.cpython-313.pyc +0 -0
  213. package/tools/__pycache__/word_frequency_counter.cpython-313.pyc +0 -0
@@ -1,262 +1,262 @@
1
- """
2
- URL and Link Extractor Module - URL extraction utility
3
-
4
- This module provides comprehensive URL and link extraction functionality with UI components
5
- for the Promera AI Commander application.
6
- """
7
-
8
- import tkinter as tk
9
- from tkinter import ttk
10
- import re
11
-
12
-
13
- class URLLinkExtractorProcessor:
14
- """URL and link extractor processor with multiple extraction modes and filtering."""
15
-
16
- @staticmethod
17
- def extract_urls(text, extract_href=False, extract_https=False, extract_any_protocol=False, extract_markdown=False, filter_text=""):
18
- """Extracts URLs and links from text based on selected options."""
19
- urls = set()
20
-
21
- # Extract from HTML href attributes
22
- if extract_href:
23
- href_pattern = r'href=["\']([^"\']+)["\']'
24
- urls.update(re.findall(href_pattern, text))
25
-
26
- # Extract http(s):// URLs
27
- if extract_https:
28
- https_pattern = r'https?://[^\s<>"{}|\\^`\[\]]+'
29
- urls.update(re.findall(https_pattern, text))
30
-
31
- # Extract any protocol:// URLs
32
- if extract_any_protocol:
33
- protocol_pattern = r'\b[a-zA-Z][a-zA-Z0-9+.-]*://[^\s<>"{}|\\^`\[\]]+'
34
- urls.update(re.findall(protocol_pattern, text))
35
-
36
- # Extract markdown links [text](url)
37
- if extract_markdown:
38
- markdown_pattern = r'\[([^\]]+)\]\(([^)]+)\)'
39
- markdown_urls = re.findall(markdown_pattern, text)
40
- urls.update([url for _, url in markdown_urls])
41
-
42
- # If no options selected, extract all
43
- if not any([extract_href, extract_https, extract_any_protocol, extract_markdown]):
44
- # Extract all types
45
- href_pattern = r'href=["\']([^"\']+)["\']'
46
- urls.update(re.findall(href_pattern, text))
47
-
48
- protocol_pattern = r'\b[a-zA-Z][a-zA-Z0-9+.-]*://[^\s<>"{}|\\^`\[\]]+'
49
- urls.update(re.findall(protocol_pattern, text))
50
-
51
- markdown_pattern = r'\[([^\]]+)\]\(([^)]+)\)'
52
- markdown_urls = re.findall(markdown_pattern, text)
53
- urls.update([url for _, url in markdown_urls])
54
-
55
- # Apply filter if provided
56
- if filter_text.strip():
57
- filter_lower = filter_text.lower()
58
- urls = {url for url in urls if filter_lower in url.lower()}
59
-
60
- return '\n'.join(sorted(urls)) if urls else "No URLs found."
61
-
62
- @staticmethod
63
- def process_text(input_text, settings):
64
- """Process text using the current settings."""
65
- return URLLinkExtractorProcessor.extract_urls(
66
- input_text,
67
- settings.get("extract_href", False),
68
- settings.get("extract_https", False),
69
- settings.get("extract_any_protocol", False),
70
- settings.get("extract_markdown", False),
71
- settings.get("filter_text", "")
72
- )
73
-
74
-
75
- class URLLinkExtractorUI:
76
- """UI components for the URL and Link Extractor."""
77
-
78
- def __init__(self, parent, settings, on_setting_change_callback=None, apply_tool_callback=None):
79
- """
80
- Initialize the URL and Link Extractor UI.
81
-
82
- Args:
83
- parent: Parent widget
84
- settings: Dictionary containing tool settings
85
- on_setting_change_callback: Callback function for setting changes
86
- apply_tool_callback: Callback function for applying the tool
87
- """
88
- self.parent = parent
89
- self.settings = settings
90
- self.on_setting_change_callback = on_setting_change_callback
91
- self.apply_tool_callback = apply_tool_callback
92
-
93
- # Initialize UI variables
94
- self.url_extract_href_var = tk.BooleanVar(value=settings.get("extract_href", False))
95
- self.url_extract_https_var = tk.BooleanVar(value=settings.get("extract_https", False))
96
- self.url_extract_any_protocol_var = tk.BooleanVar(value=settings.get("extract_any_protocol", False))
97
- self.url_extract_markdown_var = tk.BooleanVar(value=settings.get("extract_markdown", False))
98
- self.url_filter_var = tk.StringVar(value=settings.get("filter_text", ""))
99
-
100
- self.create_widgets()
101
-
102
- def create_widgets(self):
103
- """Creates the UI widgets for the URL and Link Extractor."""
104
- # Checkboxes for different extraction modes
105
- ttk.Checkbutton(
106
- self.parent,
107
- text='href=""',
108
- variable=self.url_extract_href_var,
109
- command=self._on_setting_change
110
- ).pack(side=tk.LEFT, padx=5)
111
-
112
- ttk.Checkbutton(
113
- self.parent,
114
- text="http(s)://",
115
- variable=self.url_extract_https_var,
116
- command=self._on_setting_change
117
- ).pack(side=tk.LEFT, padx=5)
118
-
119
- ttk.Checkbutton(
120
- self.parent,
121
- text="any protocol ://",
122
- variable=self.url_extract_any_protocol_var,
123
- command=self._on_setting_change
124
- ).pack(side=tk.LEFT, padx=5)
125
-
126
- ttk.Checkbutton(
127
- self.parent,
128
- text="markdown []()",
129
- variable=self.url_extract_markdown_var,
130
- command=self._on_setting_change
131
- ).pack(side=tk.LEFT, padx=5)
132
-
133
- # Filter field
134
- ttk.Label(self.parent, text="Filter:").pack(side=tk.LEFT, padx=(10, 2))
135
- filter_entry = ttk.Entry(self.parent, textvariable=self.url_filter_var, width=15)
136
- filter_entry.pack(side=tk.LEFT, padx=2)
137
- self.url_filter_var.trace_add("write", self._on_filter_change)
138
-
139
- # Extract button
140
- if self.apply_tool_callback:
141
- ttk.Button(
142
- self.parent,
143
- text="Extract",
144
- command=self.apply_tool_callback
145
- ).pack(side=tk.LEFT, padx=10)
146
-
147
- def _on_setting_change(self):
148
- """Handle setting changes."""
149
- if self.on_setting_change_callback:
150
- self.on_setting_change_callback()
151
-
152
- def _on_filter_change(self, *args):
153
- """Handle filter text changes."""
154
- if self.on_setting_change_callback:
155
- self.on_setting_change_callback()
156
-
157
- def get_current_settings(self):
158
- """Get the current settings from the UI."""
159
- return {
160
- "extract_href": self.url_extract_href_var.get(),
161
- "extract_https": self.url_extract_https_var.get(),
162
- "extract_any_protocol": self.url_extract_any_protocol_var.get(),
163
- "extract_markdown": self.url_extract_markdown_var.get(),
164
- "filter_text": self.url_filter_var.get()
165
- }
166
-
167
- def update_settings(self, settings):
168
- """Update the UI with new settings."""
169
- self.url_extract_href_var.set(settings.get("extract_href", False))
170
- self.url_extract_https_var.set(settings.get("extract_https", False))
171
- self.url_extract_any_protocol_var.set(settings.get("extract_any_protocol", False))
172
- self.url_extract_markdown_var.set(settings.get("extract_markdown", False))
173
- self.url_filter_var.set(settings.get("filter_text", ""))
174
-
175
-
176
- class URLLinkExtractor:
177
- """Main URL and Link Extractor class that combines processor and UI functionality."""
178
-
179
- def __init__(self):
180
- self.processor = URLLinkExtractorProcessor()
181
- self.ui = None
182
-
183
- def create_ui(self, parent, settings, on_setting_change_callback=None, apply_tool_callback=None):
184
- """Create and return the UI component."""
185
- self.ui = URLLinkExtractorUI(parent, settings, on_setting_change_callback, apply_tool_callback)
186
- return self.ui
187
-
188
- def process_text(self, input_text, settings):
189
- """Process text using the current settings."""
190
- return self.processor.process_text(input_text, settings)
191
-
192
- def get_default_settings(self):
193
- """Get default settings for the URL and Link Extractor."""
194
- return {
195
- "extract_href": False,
196
- "extract_https": False,
197
- "extract_any_protocol": False,
198
- "extract_markdown": False,
199
- "filter_text": ""
200
- }
201
-
202
-
203
- # Convenience functions for backward compatibility
204
- def extract_urls(text, extract_href=False, extract_https=False, extract_any_protocol=False, extract_markdown=False, filter_text=""):
205
- """Extract URLs with specified options."""
206
- return URLLinkExtractorProcessor.extract_urls(
207
- text, extract_href, extract_https, extract_any_protocol, extract_markdown, filter_text
208
- )
209
-
210
-
211
- def process_url_extraction(input_text, settings):
212
- """Process URL extraction with the specified settings."""
213
- return URLLinkExtractorProcessor.process_text(input_text, settings)
214
-
215
-
216
- # BaseTool-compatible wrapper
217
- try:
218
- from tools.base_tool import BaseTool
219
- from typing import Dict, Any
220
- import tkinter as tk
221
- from tkinter import ttk
222
-
223
- class URLLinkExtractorV2(BaseTool):
224
- """
225
- BaseTool-compatible version of URLLinkExtractor.
226
- """
227
-
228
- TOOL_NAME = "URL and Link Extractor"
229
- TOOL_DESCRIPTION = "Extract URLs and links from text"
230
- TOOL_VERSION = "2.0.0"
231
-
232
- def process_text(self, input_text: str, settings: Dict[str, Any]) -> str:
233
- """Extract URLs from text."""
234
- return URLLinkExtractorProcessor.extract_urls(
235
- input_text,
236
- settings.get("extract_href", False),
237
- settings.get("extract_https", True),
238
- settings.get("extract_any_protocol", False),
239
- settings.get("extract_markdown", False),
240
- settings.get("filter_text", "")
241
- )
242
-
243
- def get_default_settings(self) -> Dict[str, Any]:
244
- return {
245
- "extract_href": False,
246
- "extract_https": True,
247
- "extract_any_protocol": False,
248
- "extract_markdown": False,
249
- "filter_text": ""
250
- }
251
-
252
- def create_ui(self, parent: tk.Widget, settings: Dict[str, Any],
253
- on_change=None, on_apply=None) -> tk.Widget:
254
- """Create UI for URL Link Extractor."""
255
- frame = ttk.Frame(parent)
256
- ttk.Label(frame, text="Extract URLs and links").pack(side=tk.LEFT, padx=5)
257
- if on_apply:
258
- ttk.Button(frame, text="Extract", command=on_apply).pack(side=tk.LEFT, padx=5)
259
- return frame
260
-
261
- except ImportError:
1
+ """
2
+ URL and Link Extractor Module - URL extraction utility
3
+
4
+ This module provides comprehensive URL and link extraction functionality with UI components
5
+ for the Promera AI Commander application.
6
+ """
7
+
8
+ import tkinter as tk
9
+ from tkinter import ttk
10
+ import re
11
+
12
+
13
+ class URLLinkExtractorProcessor:
14
+ """URL and link extractor processor with multiple extraction modes and filtering."""
15
+
16
+ @staticmethod
17
+ def extract_urls(text, extract_href=False, extract_https=False, extract_any_protocol=False, extract_markdown=False, filter_text=""):
18
+ """Extracts URLs and links from text based on selected options."""
19
+ urls = set()
20
+
21
+ # Extract from HTML href attributes
22
+ if extract_href:
23
+ href_pattern = r'href=["\']([^"\']+)["\']'
24
+ urls.update(re.findall(href_pattern, text))
25
+
26
+ # Extract http(s):// URLs
27
+ if extract_https:
28
+ https_pattern = r'https?://[^\s<>"{}|\\^`\[\]]+'
29
+ urls.update(re.findall(https_pattern, text))
30
+
31
+ # Extract any protocol:// URLs
32
+ if extract_any_protocol:
33
+ protocol_pattern = r'\b[a-zA-Z][a-zA-Z0-9+.-]*://[^\s<>"{}|\\^`\[\]]+'
34
+ urls.update(re.findall(protocol_pattern, text))
35
+
36
+ # Extract markdown links [text](url)
37
+ if extract_markdown:
38
+ markdown_pattern = r'\[([^\]]+)\]\(([^)]+)\)'
39
+ markdown_urls = re.findall(markdown_pattern, text)
40
+ urls.update([url for _, url in markdown_urls])
41
+
42
+ # If no options selected, extract all
43
+ if not any([extract_href, extract_https, extract_any_protocol, extract_markdown]):
44
+ # Extract all types
45
+ href_pattern = r'href=["\']([^"\']+)["\']'
46
+ urls.update(re.findall(href_pattern, text))
47
+
48
+ protocol_pattern = r'\b[a-zA-Z][a-zA-Z0-9+.-]*://[^\s<>"{}|\\^`\[\]]+'
49
+ urls.update(re.findall(protocol_pattern, text))
50
+
51
+ markdown_pattern = r'\[([^\]]+)\]\(([^)]+)\)'
52
+ markdown_urls = re.findall(markdown_pattern, text)
53
+ urls.update([url for _, url in markdown_urls])
54
+
55
+ # Apply filter if provided
56
+ if filter_text.strip():
57
+ filter_lower = filter_text.lower()
58
+ urls = {url for url in urls if filter_lower in url.lower()}
59
+
60
+ return '\n'.join(sorted(urls)) if urls else "No URLs found."
61
+
62
+ @staticmethod
63
+ def process_text(input_text, settings):
64
+ """Process text using the current settings."""
65
+ return URLLinkExtractorProcessor.extract_urls(
66
+ input_text,
67
+ settings.get("extract_href", False),
68
+ settings.get("extract_https", False),
69
+ settings.get("extract_any_protocol", False),
70
+ settings.get("extract_markdown", False),
71
+ settings.get("filter_text", "")
72
+ )
73
+
74
+
75
+ class URLLinkExtractorUI:
76
+ """UI components for the URL and Link Extractor."""
77
+
78
+ def __init__(self, parent, settings, on_setting_change_callback=None, apply_tool_callback=None):
79
+ """
80
+ Initialize the URL and Link Extractor UI.
81
+
82
+ Args:
83
+ parent: Parent widget
84
+ settings: Dictionary containing tool settings
85
+ on_setting_change_callback: Callback function for setting changes
86
+ apply_tool_callback: Callback function for applying the tool
87
+ """
88
+ self.parent = parent
89
+ self.settings = settings
90
+ self.on_setting_change_callback = on_setting_change_callback
91
+ self.apply_tool_callback = apply_tool_callback
92
+
93
+ # Initialize UI variables
94
+ self.url_extract_href_var = tk.BooleanVar(value=settings.get("extract_href", False))
95
+ self.url_extract_https_var = tk.BooleanVar(value=settings.get("extract_https", False))
96
+ self.url_extract_any_protocol_var = tk.BooleanVar(value=settings.get("extract_any_protocol", False))
97
+ self.url_extract_markdown_var = tk.BooleanVar(value=settings.get("extract_markdown", False))
98
+ self.url_filter_var = tk.StringVar(value=settings.get("filter_text", ""))
99
+
100
+ self.create_widgets()
101
+
102
+ def create_widgets(self):
103
+ """Creates the UI widgets for the URL and Link Extractor."""
104
+ # Checkboxes for different extraction modes
105
+ ttk.Checkbutton(
106
+ self.parent,
107
+ text='href=""',
108
+ variable=self.url_extract_href_var,
109
+ command=self._on_setting_change
110
+ ).pack(side=tk.LEFT, padx=5)
111
+
112
+ ttk.Checkbutton(
113
+ self.parent,
114
+ text="http(s)://",
115
+ variable=self.url_extract_https_var,
116
+ command=self._on_setting_change
117
+ ).pack(side=tk.LEFT, padx=5)
118
+
119
+ ttk.Checkbutton(
120
+ self.parent,
121
+ text="any protocol ://",
122
+ variable=self.url_extract_any_protocol_var,
123
+ command=self._on_setting_change
124
+ ).pack(side=tk.LEFT, padx=5)
125
+
126
+ ttk.Checkbutton(
127
+ self.parent,
128
+ text="markdown []()",
129
+ variable=self.url_extract_markdown_var,
130
+ command=self._on_setting_change
131
+ ).pack(side=tk.LEFT, padx=5)
132
+
133
+ # Filter field
134
+ ttk.Label(self.parent, text="Filter:").pack(side=tk.LEFT, padx=(10, 2))
135
+ filter_entry = ttk.Entry(self.parent, textvariable=self.url_filter_var, width=15)
136
+ filter_entry.pack(side=tk.LEFT, padx=2)
137
+ self.url_filter_var.trace_add("write", self._on_filter_change)
138
+
139
+ # Extract button
140
+ if self.apply_tool_callback:
141
+ ttk.Button(
142
+ self.parent,
143
+ text="Extract",
144
+ command=self.apply_tool_callback
145
+ ).pack(side=tk.LEFT, padx=10)
146
+
147
+ def _on_setting_change(self):
148
+ """Handle setting changes."""
149
+ if self.on_setting_change_callback:
150
+ self.on_setting_change_callback()
151
+
152
+ def _on_filter_change(self, *args):
153
+ """Handle filter text changes."""
154
+ if self.on_setting_change_callback:
155
+ self.on_setting_change_callback()
156
+
157
+ def get_current_settings(self):
158
+ """Get the current settings from the UI."""
159
+ return {
160
+ "extract_href": self.url_extract_href_var.get(),
161
+ "extract_https": self.url_extract_https_var.get(),
162
+ "extract_any_protocol": self.url_extract_any_protocol_var.get(),
163
+ "extract_markdown": self.url_extract_markdown_var.get(),
164
+ "filter_text": self.url_filter_var.get()
165
+ }
166
+
167
+ def update_settings(self, settings):
168
+ """Update the UI with new settings."""
169
+ self.url_extract_href_var.set(settings.get("extract_href", False))
170
+ self.url_extract_https_var.set(settings.get("extract_https", False))
171
+ self.url_extract_any_protocol_var.set(settings.get("extract_any_protocol", False))
172
+ self.url_extract_markdown_var.set(settings.get("extract_markdown", False))
173
+ self.url_filter_var.set(settings.get("filter_text", ""))
174
+
175
+
176
+ class URLLinkExtractor:
177
+ """Main URL and Link Extractor class that combines processor and UI functionality."""
178
+
179
+ def __init__(self):
180
+ self.processor = URLLinkExtractorProcessor()
181
+ self.ui = None
182
+
183
+ def create_ui(self, parent, settings, on_setting_change_callback=None, apply_tool_callback=None):
184
+ """Create and return the UI component."""
185
+ self.ui = URLLinkExtractorUI(parent, settings, on_setting_change_callback, apply_tool_callback)
186
+ return self.ui
187
+
188
+ def process_text(self, input_text, settings):
189
+ """Process text using the current settings."""
190
+ return self.processor.process_text(input_text, settings)
191
+
192
+ def get_default_settings(self):
193
+ """Get default settings for the URL and Link Extractor."""
194
+ return {
195
+ "extract_href": False,
196
+ "extract_https": False,
197
+ "extract_any_protocol": False,
198
+ "extract_markdown": False,
199
+ "filter_text": ""
200
+ }
201
+
202
+
203
+ # Convenience functions for backward compatibility
204
+ def extract_urls(text, extract_href=False, extract_https=False, extract_any_protocol=False, extract_markdown=False, filter_text=""):
205
+ """Extract URLs with specified options."""
206
+ return URLLinkExtractorProcessor.extract_urls(
207
+ text, extract_href, extract_https, extract_any_protocol, extract_markdown, filter_text
208
+ )
209
+
210
+
211
+ def process_url_extraction(input_text, settings):
212
+ """Process URL extraction with the specified settings."""
213
+ return URLLinkExtractorProcessor.process_text(input_text, settings)
214
+
215
+
216
+ # BaseTool-compatible wrapper
217
+ try:
218
+ from tools.base_tool import BaseTool
219
+ from typing import Dict, Any
220
+ import tkinter as tk
221
+ from tkinter import ttk
222
+
223
+ class URLLinkExtractorV2(BaseTool):
224
+ """
225
+ BaseTool-compatible version of URLLinkExtractor.
226
+ """
227
+
228
+ TOOL_NAME = "URL and Link Extractor"
229
+ TOOL_DESCRIPTION = "Extract URLs and links from text"
230
+ TOOL_VERSION = "2.0.0"
231
+
232
+ def process_text(self, input_text: str, settings: Dict[str, Any]) -> str:
233
+ """Extract URLs from text."""
234
+ return URLLinkExtractorProcessor.extract_urls(
235
+ input_text,
236
+ settings.get("extract_href", False),
237
+ settings.get("extract_https", True),
238
+ settings.get("extract_any_protocol", False),
239
+ settings.get("extract_markdown", False),
240
+ settings.get("filter_text", "")
241
+ )
242
+
243
+ def get_default_settings(self) -> Dict[str, Any]:
244
+ return {
245
+ "extract_href": False,
246
+ "extract_https": True,
247
+ "extract_any_protocol": False,
248
+ "extract_markdown": False,
249
+ "filter_text": ""
250
+ }
251
+
252
+ def create_ui(self, parent: tk.Widget, settings: Dict[str, Any],
253
+ on_change=None, on_apply=None) -> tk.Widget:
254
+ """Create UI for URL Link Extractor."""
255
+ frame = ttk.Frame(parent)
256
+ ttk.Label(frame, text="Extract URLs and links").pack(side=tk.LEFT, padx=5)
257
+ if on_apply:
258
+ ttk.Button(frame, text="Extract", command=on_apply).pack(side=tk.LEFT, padx=5)
259
+ return frame
260
+
261
+ except ImportError:
262
262
  pass