pomera-ai-commander 0.1.0 → 1.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (191) hide show
  1. package/LICENSE +21 -21
  2. package/README.md +105 -680
  3. package/bin/pomera-ai-commander.js +62 -62
  4. package/core/__init__.py +65 -65
  5. package/core/app_context.py +482 -482
  6. package/core/async_text_processor.py +421 -421
  7. package/core/backup_manager.py +655 -655
  8. package/core/backup_recovery_manager.py +1033 -1033
  9. package/core/content_hash_cache.py +508 -508
  10. package/core/context_menu.py +313 -313
  11. package/core/data_validator.py +1066 -1066
  12. package/core/database_connection_manager.py +744 -744
  13. package/core/database_curl_settings_manager.py +608 -608
  14. package/core/database_promera_ai_settings_manager.py +446 -446
  15. package/core/database_schema.py +411 -411
  16. package/core/database_schema_manager.py +395 -395
  17. package/core/database_settings_manager.py +1507 -1507
  18. package/core/database_settings_manager_interface.py +456 -456
  19. package/core/dialog_manager.py +734 -734
  20. package/core/efficient_line_numbers.py +510 -510
  21. package/core/error_handler.py +746 -746
  22. package/core/error_service.py +431 -431
  23. package/core/event_consolidator.py +511 -511
  24. package/core/mcp/__init__.py +43 -43
  25. package/core/mcp/protocol.py +288 -288
  26. package/core/mcp/schema.py +251 -251
  27. package/core/mcp/server_stdio.py +299 -299
  28. package/core/mcp/tool_registry.py +2372 -2345
  29. package/core/memory_efficient_text_widget.py +711 -711
  30. package/core/migration_manager.py +914 -914
  31. package/core/migration_test_suite.py +1085 -1085
  32. package/core/migration_validator.py +1143 -1143
  33. package/core/optimized_find_replace.py +714 -714
  34. package/core/optimized_pattern_engine.py +424 -424
  35. package/core/optimized_search_highlighter.py +552 -552
  36. package/core/performance_monitor.py +674 -674
  37. package/core/persistence_manager.py +712 -712
  38. package/core/progressive_stats_calculator.py +632 -632
  39. package/core/regex_pattern_cache.py +529 -529
  40. package/core/regex_pattern_library.py +350 -350
  41. package/core/search_operation_manager.py +434 -434
  42. package/core/settings_defaults_registry.py +1087 -1087
  43. package/core/settings_integrity_validator.py +1111 -1111
  44. package/core/settings_serializer.py +557 -557
  45. package/core/settings_validator.py +1823 -1823
  46. package/core/smart_stats_calculator.py +709 -709
  47. package/core/statistics_update_manager.py +619 -619
  48. package/core/stats_config_manager.py +858 -858
  49. package/core/streaming_text_handler.py +723 -723
  50. package/core/task_scheduler.py +596 -596
  51. package/core/update_pattern_library.py +168 -168
  52. package/core/visibility_monitor.py +596 -596
  53. package/core/widget_cache.py +498 -498
  54. package/mcp.json +51 -61
  55. package/package.json +61 -57
  56. package/pomera.py +7482 -7482
  57. package/pomera_mcp_server.py +183 -144
  58. package/requirements.txt +32 -0
  59. package/tools/__init__.py +4 -4
  60. package/tools/ai_tools.py +2891 -2891
  61. package/tools/ascii_art_generator.py +352 -352
  62. package/tools/base64_tools.py +183 -183
  63. package/tools/base_tool.py +511 -511
  64. package/tools/case_tool.py +308 -308
  65. package/tools/column_tools.py +395 -395
  66. package/tools/cron_tool.py +884 -884
  67. package/tools/curl_history.py +600 -600
  68. package/tools/curl_processor.py +1207 -1207
  69. package/tools/curl_settings.py +502 -502
  70. package/tools/curl_tool.py +5467 -5467
  71. package/tools/diff_viewer.py +1071 -1071
  72. package/tools/email_extraction_tool.py +248 -248
  73. package/tools/email_header_analyzer.py +425 -425
  74. package/tools/extraction_tools.py +250 -250
  75. package/tools/find_replace.py +1750 -1750
  76. package/tools/folder_file_reporter.py +1463 -1463
  77. package/tools/folder_file_reporter_adapter.py +480 -480
  78. package/tools/generator_tools.py +1216 -1216
  79. package/tools/hash_generator.py +255 -255
  80. package/tools/html_tool.py +656 -656
  81. package/tools/jsonxml_tool.py +729 -729
  82. package/tools/line_tools.py +419 -419
  83. package/tools/markdown_tools.py +561 -561
  84. package/tools/mcp_widget.py +1417 -1417
  85. package/tools/notes_widget.py +973 -973
  86. package/tools/number_base_converter.py +372 -372
  87. package/tools/regex_extractor.py +571 -571
  88. package/tools/slug_generator.py +310 -310
  89. package/tools/sorter_tools.py +458 -458
  90. package/tools/string_escape_tool.py +392 -392
  91. package/tools/text_statistics_tool.py +365 -365
  92. package/tools/text_wrapper.py +430 -430
  93. package/tools/timestamp_converter.py +421 -421
  94. package/tools/tool_loader.py +710 -710
  95. package/tools/translator_tools.py +522 -522
  96. package/tools/url_link_extractor.py +261 -261
  97. package/tools/url_parser.py +204 -204
  98. package/tools/whitespace_tools.py +355 -355
  99. package/tools/word_frequency_counter.py +146 -146
  100. package/core/__pycache__/__init__.cpython-313.pyc +0 -0
  101. package/core/__pycache__/app_context.cpython-313.pyc +0 -0
  102. package/core/__pycache__/async_text_processor.cpython-313.pyc +0 -0
  103. package/core/__pycache__/backup_manager.cpython-313.pyc +0 -0
  104. package/core/__pycache__/backup_recovery_manager.cpython-313.pyc +0 -0
  105. package/core/__pycache__/content_hash_cache.cpython-313.pyc +0 -0
  106. package/core/__pycache__/context_menu.cpython-313.pyc +0 -0
  107. package/core/__pycache__/data_validator.cpython-313.pyc +0 -0
  108. package/core/__pycache__/database_connection_manager.cpython-313.pyc +0 -0
  109. package/core/__pycache__/database_curl_settings_manager.cpython-313.pyc +0 -0
  110. package/core/__pycache__/database_promera_ai_settings_manager.cpython-313.pyc +0 -0
  111. package/core/__pycache__/database_schema.cpython-313.pyc +0 -0
  112. package/core/__pycache__/database_schema_manager.cpython-313.pyc +0 -0
  113. package/core/__pycache__/database_settings_manager.cpython-313.pyc +0 -0
  114. package/core/__pycache__/database_settings_manager_interface.cpython-313.pyc +0 -0
  115. package/core/__pycache__/dialog_manager.cpython-313.pyc +0 -0
  116. package/core/__pycache__/efficient_line_numbers.cpython-313.pyc +0 -0
  117. package/core/__pycache__/error_handler.cpython-313.pyc +0 -0
  118. package/core/__pycache__/error_service.cpython-313.pyc +0 -0
  119. package/core/__pycache__/event_consolidator.cpython-313.pyc +0 -0
  120. package/core/__pycache__/memory_efficient_text_widget.cpython-313.pyc +0 -0
  121. package/core/__pycache__/migration_manager.cpython-313.pyc +0 -0
  122. package/core/__pycache__/migration_test_suite.cpython-313.pyc +0 -0
  123. package/core/__pycache__/migration_validator.cpython-313.pyc +0 -0
  124. package/core/__pycache__/optimized_find_replace.cpython-313.pyc +0 -0
  125. package/core/__pycache__/optimized_pattern_engine.cpython-313.pyc +0 -0
  126. package/core/__pycache__/optimized_search_highlighter.cpython-313.pyc +0 -0
  127. package/core/__pycache__/performance_monitor.cpython-313.pyc +0 -0
  128. package/core/__pycache__/persistence_manager.cpython-313.pyc +0 -0
  129. package/core/__pycache__/progressive_stats_calculator.cpython-313.pyc +0 -0
  130. package/core/__pycache__/regex_pattern_cache.cpython-313.pyc +0 -0
  131. package/core/__pycache__/regex_pattern_library.cpython-313.pyc +0 -0
  132. package/core/__pycache__/search_operation_manager.cpython-313.pyc +0 -0
  133. package/core/__pycache__/settings_defaults_registry.cpython-313.pyc +0 -0
  134. package/core/__pycache__/settings_integrity_validator.cpython-313.pyc +0 -0
  135. package/core/__pycache__/settings_serializer.cpython-313.pyc +0 -0
  136. package/core/__pycache__/settings_validator.cpython-313.pyc +0 -0
  137. package/core/__pycache__/smart_stats_calculator.cpython-313.pyc +0 -0
  138. package/core/__pycache__/statistics_update_manager.cpython-313.pyc +0 -0
  139. package/core/__pycache__/stats_config_manager.cpython-313.pyc +0 -0
  140. package/core/__pycache__/streaming_text_handler.cpython-313.pyc +0 -0
  141. package/core/__pycache__/task_scheduler.cpython-313.pyc +0 -0
  142. package/core/__pycache__/visibility_monitor.cpython-313.pyc +0 -0
  143. package/core/__pycache__/widget_cache.cpython-313.pyc +0 -0
  144. package/core/mcp/__pycache__/__init__.cpython-313.pyc +0 -0
  145. package/core/mcp/__pycache__/protocol.cpython-313.pyc +0 -0
  146. package/core/mcp/__pycache__/schema.cpython-313.pyc +0 -0
  147. package/core/mcp/__pycache__/server_stdio.cpython-313.pyc +0 -0
  148. package/core/mcp/__pycache__/tool_registry.cpython-313.pyc +0 -0
  149. package/tools/__pycache__/__init__.cpython-313.pyc +0 -0
  150. package/tools/__pycache__/ai_tools.cpython-313.pyc +0 -0
  151. package/tools/__pycache__/ascii_art_generator.cpython-313.pyc +0 -0
  152. package/tools/__pycache__/base64_tools.cpython-313.pyc +0 -0
  153. package/tools/__pycache__/base_tool.cpython-313.pyc +0 -0
  154. package/tools/__pycache__/case_tool.cpython-313.pyc +0 -0
  155. package/tools/__pycache__/column_tools.cpython-313.pyc +0 -0
  156. package/tools/__pycache__/cron_tool.cpython-313.pyc +0 -0
  157. package/tools/__pycache__/curl_history.cpython-313.pyc +0 -0
  158. package/tools/__pycache__/curl_processor.cpython-313.pyc +0 -0
  159. package/tools/__pycache__/curl_settings.cpython-313.pyc +0 -0
  160. package/tools/__pycache__/curl_tool.cpython-313.pyc +0 -0
  161. package/tools/__pycache__/diff_viewer.cpython-313.pyc +0 -0
  162. package/tools/__pycache__/email_extraction_tool.cpython-313.pyc +0 -0
  163. package/tools/__pycache__/email_header_analyzer.cpython-313.pyc +0 -0
  164. package/tools/__pycache__/extraction_tools.cpython-313.pyc +0 -0
  165. package/tools/__pycache__/find_replace.cpython-313.pyc +0 -0
  166. package/tools/__pycache__/folder_file_reporter.cpython-313.pyc +0 -0
  167. package/tools/__pycache__/folder_file_reporter_adapter.cpython-313.pyc +0 -0
  168. package/tools/__pycache__/generator_tools.cpython-313.pyc +0 -0
  169. package/tools/__pycache__/hash_generator.cpython-313.pyc +0 -0
  170. package/tools/__pycache__/html_tool.cpython-313.pyc +0 -0
  171. package/tools/__pycache__/huggingface_helper.cpython-313.pyc +0 -0
  172. package/tools/__pycache__/jsonxml_tool.cpython-313.pyc +0 -0
  173. package/tools/__pycache__/line_tools.cpython-313.pyc +0 -0
  174. package/tools/__pycache__/list_comparator.cpython-313.pyc +0 -0
  175. package/tools/__pycache__/markdown_tools.cpython-313.pyc +0 -0
  176. package/tools/__pycache__/mcp_widget.cpython-313.pyc +0 -0
  177. package/tools/__pycache__/notes_widget.cpython-313.pyc +0 -0
  178. package/tools/__pycache__/number_base_converter.cpython-313.pyc +0 -0
  179. package/tools/__pycache__/regex_extractor.cpython-313.pyc +0 -0
  180. package/tools/__pycache__/slug_generator.cpython-313.pyc +0 -0
  181. package/tools/__pycache__/sorter_tools.cpython-313.pyc +0 -0
  182. package/tools/__pycache__/string_escape_tool.cpython-313.pyc +0 -0
  183. package/tools/__pycache__/text_statistics_tool.cpython-313.pyc +0 -0
  184. package/tools/__pycache__/text_wrapper.cpython-313.pyc +0 -0
  185. package/tools/__pycache__/timestamp_converter.cpython-313.pyc +0 -0
  186. package/tools/__pycache__/tool_loader.cpython-313.pyc +0 -0
  187. package/tools/__pycache__/translator_tools.cpython-313.pyc +0 -0
  188. package/tools/__pycache__/url_link_extractor.cpython-313.pyc +0 -0
  189. package/tools/__pycache__/url_parser.cpython-313.pyc +0 -0
  190. package/tools/__pycache__/whitespace_tools.cpython-313.pyc +0 -0
  191. package/tools/__pycache__/word_frequency_counter.cpython-313.pyc +0 -0
@@ -1,262 +1,262 @@
1
- """
2
- URL and Link Extractor Module - URL extraction utility
3
-
4
- This module provides comprehensive URL and link extraction functionality with UI components
5
- for the Promera AI Commander application.
6
- """
7
-
8
- import tkinter as tk
9
- from tkinter import ttk
10
- import re
11
-
12
-
13
- class URLLinkExtractorProcessor:
14
- """URL and link extractor processor with multiple extraction modes and filtering."""
15
-
16
- @staticmethod
17
- def extract_urls(text, extract_href=False, extract_https=False, extract_any_protocol=False, extract_markdown=False, filter_text=""):
18
- """Extracts URLs and links from text based on selected options."""
19
- urls = set()
20
-
21
- # Extract from HTML href attributes
22
- if extract_href:
23
- href_pattern = r'href=["\']([^"\']+)["\']'
24
- urls.update(re.findall(href_pattern, text))
25
-
26
- # Extract http(s):// URLs
27
- if extract_https:
28
- https_pattern = r'https?://[^\s<>"{}|\\^`\[\]]+'
29
- urls.update(re.findall(https_pattern, text))
30
-
31
- # Extract any protocol:// URLs
32
- if extract_any_protocol:
33
- protocol_pattern = r'\b[a-zA-Z][a-zA-Z0-9+.-]*://[^\s<>"{}|\\^`\[\]]+'
34
- urls.update(re.findall(protocol_pattern, text))
35
-
36
- # Extract markdown links [text](url)
37
- if extract_markdown:
38
- markdown_pattern = r'\[([^\]]+)\]\(([^)]+)\)'
39
- markdown_urls = re.findall(markdown_pattern, text)
40
- urls.update([url for _, url in markdown_urls])
41
-
42
- # If no options selected, extract all
43
- if not any([extract_href, extract_https, extract_any_protocol, extract_markdown]):
44
- # Extract all types
45
- href_pattern = r'href=["\']([^"\']+)["\']'
46
- urls.update(re.findall(href_pattern, text))
47
-
48
- protocol_pattern = r'\b[a-zA-Z][a-zA-Z0-9+.-]*://[^\s<>"{}|\\^`\[\]]+'
49
- urls.update(re.findall(protocol_pattern, text))
50
-
51
- markdown_pattern = r'\[([^\]]+)\]\(([^)]+)\)'
52
- markdown_urls = re.findall(markdown_pattern, text)
53
- urls.update([url for _, url in markdown_urls])
54
-
55
- # Apply filter if provided
56
- if filter_text.strip():
57
- filter_lower = filter_text.lower()
58
- urls = {url for url in urls if filter_lower in url.lower()}
59
-
60
- return '\n'.join(sorted(urls)) if urls else "No URLs found."
61
-
62
- @staticmethod
63
- def process_text(input_text, settings):
64
- """Process text using the current settings."""
65
- return URLLinkExtractorProcessor.extract_urls(
66
- input_text,
67
- settings.get("extract_href", False),
68
- settings.get("extract_https", False),
69
- settings.get("extract_any_protocol", False),
70
- settings.get("extract_markdown", False),
71
- settings.get("filter_text", "")
72
- )
73
-
74
-
75
- class URLLinkExtractorUI:
76
- """UI components for the URL and Link Extractor."""
77
-
78
- def __init__(self, parent, settings, on_setting_change_callback=None, apply_tool_callback=None):
79
- """
80
- Initialize the URL and Link Extractor UI.
81
-
82
- Args:
83
- parent: Parent widget
84
- settings: Dictionary containing tool settings
85
- on_setting_change_callback: Callback function for setting changes
86
- apply_tool_callback: Callback function for applying the tool
87
- """
88
- self.parent = parent
89
- self.settings = settings
90
- self.on_setting_change_callback = on_setting_change_callback
91
- self.apply_tool_callback = apply_tool_callback
92
-
93
- # Initialize UI variables
94
- self.url_extract_href_var = tk.BooleanVar(value=settings.get("extract_href", False))
95
- self.url_extract_https_var = tk.BooleanVar(value=settings.get("extract_https", False))
96
- self.url_extract_any_protocol_var = tk.BooleanVar(value=settings.get("extract_any_protocol", False))
97
- self.url_extract_markdown_var = tk.BooleanVar(value=settings.get("extract_markdown", False))
98
- self.url_filter_var = tk.StringVar(value=settings.get("filter_text", ""))
99
-
100
- self.create_widgets()
101
-
102
- def create_widgets(self):
103
- """Creates the UI widgets for the URL and Link Extractor."""
104
- # Checkboxes for different extraction modes
105
- ttk.Checkbutton(
106
- self.parent,
107
- text='href=""',
108
- variable=self.url_extract_href_var,
109
- command=self._on_setting_change
110
- ).pack(side=tk.LEFT, padx=5)
111
-
112
- ttk.Checkbutton(
113
- self.parent,
114
- text="http(s)://",
115
- variable=self.url_extract_https_var,
116
- command=self._on_setting_change
117
- ).pack(side=tk.LEFT, padx=5)
118
-
119
- ttk.Checkbutton(
120
- self.parent,
121
- text="any protocol ://",
122
- variable=self.url_extract_any_protocol_var,
123
- command=self._on_setting_change
124
- ).pack(side=tk.LEFT, padx=5)
125
-
126
- ttk.Checkbutton(
127
- self.parent,
128
- text="markdown []()",
129
- variable=self.url_extract_markdown_var,
130
- command=self._on_setting_change
131
- ).pack(side=tk.LEFT, padx=5)
132
-
133
- # Filter field
134
- ttk.Label(self.parent, text="Filter:").pack(side=tk.LEFT, padx=(10, 2))
135
- filter_entry = ttk.Entry(self.parent, textvariable=self.url_filter_var, width=15)
136
- filter_entry.pack(side=tk.LEFT, padx=2)
137
- self.url_filter_var.trace_add("write", self._on_filter_change)
138
-
139
- # Extract button
140
- if self.apply_tool_callback:
141
- ttk.Button(
142
- self.parent,
143
- text="Extract",
144
- command=self.apply_tool_callback
145
- ).pack(side=tk.LEFT, padx=10)
146
-
147
- def _on_setting_change(self):
148
- """Handle setting changes."""
149
- if self.on_setting_change_callback:
150
- self.on_setting_change_callback()
151
-
152
- def _on_filter_change(self, *args):
153
- """Handle filter text changes."""
154
- if self.on_setting_change_callback:
155
- self.on_setting_change_callback()
156
-
157
- def get_current_settings(self):
158
- """Get the current settings from the UI."""
159
- return {
160
- "extract_href": self.url_extract_href_var.get(),
161
- "extract_https": self.url_extract_https_var.get(),
162
- "extract_any_protocol": self.url_extract_any_protocol_var.get(),
163
- "extract_markdown": self.url_extract_markdown_var.get(),
164
- "filter_text": self.url_filter_var.get()
165
- }
166
-
167
- def update_settings(self, settings):
168
- """Update the UI with new settings."""
169
- self.url_extract_href_var.set(settings.get("extract_href", False))
170
- self.url_extract_https_var.set(settings.get("extract_https", False))
171
- self.url_extract_any_protocol_var.set(settings.get("extract_any_protocol", False))
172
- self.url_extract_markdown_var.set(settings.get("extract_markdown", False))
173
- self.url_filter_var.set(settings.get("filter_text", ""))
174
-
175
-
176
- class URLLinkExtractor:
177
- """Main URL and Link Extractor class that combines processor and UI functionality."""
178
-
179
- def __init__(self):
180
- self.processor = URLLinkExtractorProcessor()
181
- self.ui = None
182
-
183
- def create_ui(self, parent, settings, on_setting_change_callback=None, apply_tool_callback=None):
184
- """Create and return the UI component."""
185
- self.ui = URLLinkExtractorUI(parent, settings, on_setting_change_callback, apply_tool_callback)
186
- return self.ui
187
-
188
- def process_text(self, input_text, settings):
189
- """Process text using the current settings."""
190
- return self.processor.process_text(input_text, settings)
191
-
192
- def get_default_settings(self):
193
- """Get default settings for the URL and Link Extractor."""
194
- return {
195
- "extract_href": False,
196
- "extract_https": False,
197
- "extract_any_protocol": False,
198
- "extract_markdown": False,
199
- "filter_text": ""
200
- }
201
-
202
-
203
- # Convenience functions for backward compatibility
204
- def extract_urls(text, extract_href=False, extract_https=False, extract_any_protocol=False, extract_markdown=False, filter_text=""):
205
- """Extract URLs with specified options."""
206
- return URLLinkExtractorProcessor.extract_urls(
207
- text, extract_href, extract_https, extract_any_protocol, extract_markdown, filter_text
208
- )
209
-
210
-
211
- def process_url_extraction(input_text, settings):
212
- """Process URL extraction with the specified settings."""
213
- return URLLinkExtractorProcessor.process_text(input_text, settings)
214
-
215
-
216
- # BaseTool-compatible wrapper
217
- try:
218
- from tools.base_tool import BaseTool
219
- from typing import Dict, Any
220
- import tkinter as tk
221
- from tkinter import ttk
222
-
223
- class URLLinkExtractorV2(BaseTool):
224
- """
225
- BaseTool-compatible version of URLLinkExtractor.
226
- """
227
-
228
- TOOL_NAME = "URL and Link Extractor"
229
- TOOL_DESCRIPTION = "Extract URLs and links from text"
230
- TOOL_VERSION = "2.0.0"
231
-
232
- def process_text(self, input_text: str, settings: Dict[str, Any]) -> str:
233
- """Extract URLs from text."""
234
- return URLLinkExtractorProcessor.extract_urls(
235
- input_text,
236
- settings.get("extract_href", False),
237
- settings.get("extract_https", True),
238
- settings.get("extract_any_protocol", False),
239
- settings.get("extract_markdown", False),
240
- settings.get("filter_text", "")
241
- )
242
-
243
- def get_default_settings(self) -> Dict[str, Any]:
244
- return {
245
- "extract_href": False,
246
- "extract_https": True,
247
- "extract_any_protocol": False,
248
- "extract_markdown": False,
249
- "filter_text": ""
250
- }
251
-
252
- def create_ui(self, parent: tk.Widget, settings: Dict[str, Any],
253
- on_change=None, on_apply=None) -> tk.Widget:
254
- """Create UI for URL Link Extractor."""
255
- frame = ttk.Frame(parent)
256
- ttk.Label(frame, text="Extract URLs and links").pack(side=tk.LEFT, padx=5)
257
- if on_apply:
258
- ttk.Button(frame, text="Extract", command=on_apply).pack(side=tk.LEFT, padx=5)
259
- return frame
260
-
261
- except ImportError:
1
+ """
2
+ URL and Link Extractor Module - URL extraction utility
3
+
4
+ This module provides comprehensive URL and link extraction functionality with UI components
5
+ for the Promera AI Commander application.
6
+ """
7
+
8
+ import tkinter as tk
9
+ from tkinter import ttk
10
+ import re
11
+
12
+
13
+ class URLLinkExtractorProcessor:
14
+ """URL and link extractor processor with multiple extraction modes and filtering."""
15
+
16
+ @staticmethod
17
+ def extract_urls(text, extract_href=False, extract_https=False, extract_any_protocol=False, extract_markdown=False, filter_text=""):
18
+ """Extracts URLs and links from text based on selected options."""
19
+ urls = set()
20
+
21
+ # Extract from HTML href attributes
22
+ if extract_href:
23
+ href_pattern = r'href=["\']([^"\']+)["\']'
24
+ urls.update(re.findall(href_pattern, text))
25
+
26
+ # Extract http(s):// URLs
27
+ if extract_https:
28
+ https_pattern = r'https?://[^\s<>"{}|\\^`\[\]]+'
29
+ urls.update(re.findall(https_pattern, text))
30
+
31
+ # Extract any protocol:// URLs
32
+ if extract_any_protocol:
33
+ protocol_pattern = r'\b[a-zA-Z][a-zA-Z0-9+.-]*://[^\s<>"{}|\\^`\[\]]+'
34
+ urls.update(re.findall(protocol_pattern, text))
35
+
36
+ # Extract markdown links [text](url)
37
+ if extract_markdown:
38
+ markdown_pattern = r'\[([^\]]+)\]\(([^)]+)\)'
39
+ markdown_urls = re.findall(markdown_pattern, text)
40
+ urls.update([url for _, url in markdown_urls])
41
+
42
+ # If no options selected, extract all
43
+ if not any([extract_href, extract_https, extract_any_protocol, extract_markdown]):
44
+ # Extract all types
45
+ href_pattern = r'href=["\']([^"\']+)["\']'
46
+ urls.update(re.findall(href_pattern, text))
47
+
48
+ protocol_pattern = r'\b[a-zA-Z][a-zA-Z0-9+.-]*://[^\s<>"{}|\\^`\[\]]+'
49
+ urls.update(re.findall(protocol_pattern, text))
50
+
51
+ markdown_pattern = r'\[([^\]]+)\]\(([^)]+)\)'
52
+ markdown_urls = re.findall(markdown_pattern, text)
53
+ urls.update([url for _, url in markdown_urls])
54
+
55
+ # Apply filter if provided
56
+ if filter_text.strip():
57
+ filter_lower = filter_text.lower()
58
+ urls = {url for url in urls if filter_lower in url.lower()}
59
+
60
+ return '\n'.join(sorted(urls)) if urls else "No URLs found."
61
+
62
+ @staticmethod
63
+ def process_text(input_text, settings):
64
+ """Process text using the current settings."""
65
+ return URLLinkExtractorProcessor.extract_urls(
66
+ input_text,
67
+ settings.get("extract_href", False),
68
+ settings.get("extract_https", False),
69
+ settings.get("extract_any_protocol", False),
70
+ settings.get("extract_markdown", False),
71
+ settings.get("filter_text", "")
72
+ )
73
+
74
+
75
+ class URLLinkExtractorUI:
76
+ """UI components for the URL and Link Extractor."""
77
+
78
+ def __init__(self, parent, settings, on_setting_change_callback=None, apply_tool_callback=None):
79
+ """
80
+ Initialize the URL and Link Extractor UI.
81
+
82
+ Args:
83
+ parent: Parent widget
84
+ settings: Dictionary containing tool settings
85
+ on_setting_change_callback: Callback function for setting changes
86
+ apply_tool_callback: Callback function for applying the tool
87
+ """
88
+ self.parent = parent
89
+ self.settings = settings
90
+ self.on_setting_change_callback = on_setting_change_callback
91
+ self.apply_tool_callback = apply_tool_callback
92
+
93
+ # Initialize UI variables
94
+ self.url_extract_href_var = tk.BooleanVar(value=settings.get("extract_href", False))
95
+ self.url_extract_https_var = tk.BooleanVar(value=settings.get("extract_https", False))
96
+ self.url_extract_any_protocol_var = tk.BooleanVar(value=settings.get("extract_any_protocol", False))
97
+ self.url_extract_markdown_var = tk.BooleanVar(value=settings.get("extract_markdown", False))
98
+ self.url_filter_var = tk.StringVar(value=settings.get("filter_text", ""))
99
+
100
+ self.create_widgets()
101
+
102
+ def create_widgets(self):
103
+ """Creates the UI widgets for the URL and Link Extractor."""
104
+ # Checkboxes for different extraction modes
105
+ ttk.Checkbutton(
106
+ self.parent,
107
+ text='href=""',
108
+ variable=self.url_extract_href_var,
109
+ command=self._on_setting_change
110
+ ).pack(side=tk.LEFT, padx=5)
111
+
112
+ ttk.Checkbutton(
113
+ self.parent,
114
+ text="http(s)://",
115
+ variable=self.url_extract_https_var,
116
+ command=self._on_setting_change
117
+ ).pack(side=tk.LEFT, padx=5)
118
+
119
+ ttk.Checkbutton(
120
+ self.parent,
121
+ text="any protocol ://",
122
+ variable=self.url_extract_any_protocol_var,
123
+ command=self._on_setting_change
124
+ ).pack(side=tk.LEFT, padx=5)
125
+
126
+ ttk.Checkbutton(
127
+ self.parent,
128
+ text="markdown []()",
129
+ variable=self.url_extract_markdown_var,
130
+ command=self._on_setting_change
131
+ ).pack(side=tk.LEFT, padx=5)
132
+
133
+ # Filter field
134
+ ttk.Label(self.parent, text="Filter:").pack(side=tk.LEFT, padx=(10, 2))
135
+ filter_entry = ttk.Entry(self.parent, textvariable=self.url_filter_var, width=15)
136
+ filter_entry.pack(side=tk.LEFT, padx=2)
137
+ self.url_filter_var.trace_add("write", self._on_filter_change)
138
+
139
+ # Extract button
140
+ if self.apply_tool_callback:
141
+ ttk.Button(
142
+ self.parent,
143
+ text="Extract",
144
+ command=self.apply_tool_callback
145
+ ).pack(side=tk.LEFT, padx=10)
146
+
147
+ def _on_setting_change(self):
148
+ """Handle setting changes."""
149
+ if self.on_setting_change_callback:
150
+ self.on_setting_change_callback()
151
+
152
+ def _on_filter_change(self, *args):
153
+ """Handle filter text changes."""
154
+ if self.on_setting_change_callback:
155
+ self.on_setting_change_callback()
156
+
157
+ def get_current_settings(self):
158
+ """Get the current settings from the UI."""
159
+ return {
160
+ "extract_href": self.url_extract_href_var.get(),
161
+ "extract_https": self.url_extract_https_var.get(),
162
+ "extract_any_protocol": self.url_extract_any_protocol_var.get(),
163
+ "extract_markdown": self.url_extract_markdown_var.get(),
164
+ "filter_text": self.url_filter_var.get()
165
+ }
166
+
167
+ def update_settings(self, settings):
168
+ """Update the UI with new settings."""
169
+ self.url_extract_href_var.set(settings.get("extract_href", False))
170
+ self.url_extract_https_var.set(settings.get("extract_https", False))
171
+ self.url_extract_any_protocol_var.set(settings.get("extract_any_protocol", False))
172
+ self.url_extract_markdown_var.set(settings.get("extract_markdown", False))
173
+ self.url_filter_var.set(settings.get("filter_text", ""))
174
+
175
+
176
+ class URLLinkExtractor:
177
+ """Main URL and Link Extractor class that combines processor and UI functionality."""
178
+
179
+ def __init__(self):
180
+ self.processor = URLLinkExtractorProcessor()
181
+ self.ui = None
182
+
183
+ def create_ui(self, parent, settings, on_setting_change_callback=None, apply_tool_callback=None):
184
+ """Create and return the UI component."""
185
+ self.ui = URLLinkExtractorUI(parent, settings, on_setting_change_callback, apply_tool_callback)
186
+ return self.ui
187
+
188
+ def process_text(self, input_text, settings):
189
+ """Process text using the current settings."""
190
+ return self.processor.process_text(input_text, settings)
191
+
192
+ def get_default_settings(self):
193
+ """Get default settings for the URL and Link Extractor."""
194
+ return {
195
+ "extract_href": False,
196
+ "extract_https": False,
197
+ "extract_any_protocol": False,
198
+ "extract_markdown": False,
199
+ "filter_text": ""
200
+ }
201
+
202
+
203
+ # Convenience functions for backward compatibility
204
+ def extract_urls(text, extract_href=False, extract_https=False, extract_any_protocol=False, extract_markdown=False, filter_text=""):
205
+ """Extract URLs with specified options."""
206
+ return URLLinkExtractorProcessor.extract_urls(
207
+ text, extract_href, extract_https, extract_any_protocol, extract_markdown, filter_text
208
+ )
209
+
210
+
211
+ def process_url_extraction(input_text, settings):
212
+ """Process URL extraction with the specified settings."""
213
+ return URLLinkExtractorProcessor.process_text(input_text, settings)
214
+
215
+
216
+ # BaseTool-compatible wrapper
217
+ try:
218
+ from tools.base_tool import BaseTool
219
+ from typing import Dict, Any
220
+ import tkinter as tk
221
+ from tkinter import ttk
222
+
223
+ class URLLinkExtractorV2(BaseTool):
224
+ """
225
+ BaseTool-compatible version of URLLinkExtractor.
226
+ """
227
+
228
+ TOOL_NAME = "URL and Link Extractor"
229
+ TOOL_DESCRIPTION = "Extract URLs and links from text"
230
+ TOOL_VERSION = "2.0.0"
231
+
232
+ def process_text(self, input_text: str, settings: Dict[str, Any]) -> str:
233
+ """Extract URLs from text."""
234
+ return URLLinkExtractorProcessor.extract_urls(
235
+ input_text,
236
+ settings.get("extract_href", False),
237
+ settings.get("extract_https", True),
238
+ settings.get("extract_any_protocol", False),
239
+ settings.get("extract_markdown", False),
240
+ settings.get("filter_text", "")
241
+ )
242
+
243
+ def get_default_settings(self) -> Dict[str, Any]:
244
+ return {
245
+ "extract_href": False,
246
+ "extract_https": True,
247
+ "extract_any_protocol": False,
248
+ "extract_markdown": False,
249
+ "filter_text": ""
250
+ }
251
+
252
+ def create_ui(self, parent: tk.Widget, settings: Dict[str, Any],
253
+ on_change=None, on_apply=None) -> tk.Widget:
254
+ """Create UI for URL Link Extractor."""
255
+ frame = ttk.Frame(parent)
256
+ ttk.Label(frame, text="Extract URLs and links").pack(side=tk.LEFT, padx=5)
257
+ if on_apply:
258
+ ttk.Button(frame, text="Extract", command=on_apply).pack(side=tk.LEFT, padx=5)
259
+ return frame
260
+
261
+ except ImportError:
262
262
  pass