pomera-ai-commander 1.1.1 → 1.2.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -21
- package/README.md +105 -680
- package/bin/pomera-ai-commander.js +62 -62
- package/core/__init__.py +65 -65
- package/core/app_context.py +482 -482
- package/core/async_text_processor.py +421 -421
- package/core/backup_manager.py +655 -655
- package/core/backup_recovery_manager.py +1199 -1033
- package/core/content_hash_cache.py +508 -508
- package/core/context_menu.py +313 -313
- package/core/data_directory.py +549 -0
- package/core/data_validator.py +1066 -1066
- package/core/database_connection_manager.py +744 -744
- package/core/database_curl_settings_manager.py +608 -608
- package/core/database_promera_ai_settings_manager.py +446 -446
- package/core/database_schema.py +411 -411
- package/core/database_schema_manager.py +395 -395
- package/core/database_settings_manager.py +1507 -1507
- package/core/database_settings_manager_interface.py +456 -456
- package/core/dialog_manager.py +734 -734
- package/core/diff_utils.py +239 -0
- package/core/efficient_line_numbers.py +540 -510
- package/core/error_handler.py +746 -746
- package/core/error_service.py +431 -431
- package/core/event_consolidator.py +511 -511
- package/core/mcp/__init__.py +43 -43
- package/core/mcp/find_replace_diff.py +334 -0
- package/core/mcp/protocol.py +288 -288
- package/core/mcp/schema.py +251 -251
- package/core/mcp/server_stdio.py +299 -299
- package/core/mcp/tool_registry.py +2699 -2345
- package/core/memento.py +275 -0
- package/core/memory_efficient_text_widget.py +711 -711
- package/core/migration_manager.py +914 -914
- package/core/migration_test_suite.py +1085 -1085
- package/core/migration_validator.py +1143 -1143
- package/core/optimized_find_replace.py +714 -714
- package/core/optimized_pattern_engine.py +424 -424
- package/core/optimized_search_highlighter.py +552 -552
- package/core/performance_monitor.py +674 -674
- package/core/persistence_manager.py +712 -712
- package/core/progressive_stats_calculator.py +632 -632
- package/core/regex_pattern_cache.py +529 -529
- package/core/regex_pattern_library.py +350 -350
- package/core/search_operation_manager.py +434 -434
- package/core/settings_defaults_registry.py +1087 -1087
- package/core/settings_integrity_validator.py +1111 -1111
- package/core/settings_serializer.py +557 -557
- package/core/settings_validator.py +1823 -1823
- package/core/smart_stats_calculator.py +709 -709
- package/core/statistics_update_manager.py +619 -619
- package/core/stats_config_manager.py +858 -858
- package/core/streaming_text_handler.py +723 -723
- package/core/task_scheduler.py +596 -596
- package/core/update_pattern_library.py +168 -168
- package/core/visibility_monitor.py +596 -596
- package/core/widget_cache.py +498 -498
- package/mcp.json +51 -61
- package/migrate_data.py +127 -0
- package/package.json +64 -57
- package/pomera.py +7883 -7482
- package/pomera_mcp_server.py +183 -144
- package/requirements.txt +33 -0
- package/scripts/Dockerfile.alpine +43 -0
- package/scripts/Dockerfile.gui-test +54 -0
- package/scripts/Dockerfile.linux +43 -0
- package/scripts/Dockerfile.test-linux +80 -0
- package/scripts/Dockerfile.ubuntu +39 -0
- package/scripts/README.md +53 -0
- package/scripts/build-all.bat +113 -0
- package/scripts/build-docker.bat +53 -0
- package/scripts/build-docker.sh +55 -0
- package/scripts/build-optimized.bat +101 -0
- package/scripts/build.sh +78 -0
- package/scripts/docker-compose.test.yml +27 -0
- package/scripts/docker-compose.yml +32 -0
- package/scripts/postinstall.js +62 -0
- package/scripts/requirements-minimal.txt +33 -0
- package/scripts/test-linux-simple.bat +28 -0
- package/scripts/validate-release-workflow.py +450 -0
- package/tools/__init__.py +4 -4
- package/tools/ai_tools.py +2891 -2891
- package/tools/ascii_art_generator.py +352 -352
- package/tools/base64_tools.py +183 -183
- package/tools/base_tool.py +511 -511
- package/tools/case_tool.py +308 -308
- package/tools/column_tools.py +395 -395
- package/tools/cron_tool.py +884 -884
- package/tools/curl_history.py +600 -600
- package/tools/curl_processor.py +1207 -1207
- package/tools/curl_settings.py +502 -502
- package/tools/curl_tool.py +5467 -5467
- package/tools/diff_viewer.py +1817 -1072
- package/tools/email_extraction_tool.py +248 -248
- package/tools/email_header_analyzer.py +425 -425
- package/tools/extraction_tools.py +250 -250
- package/tools/find_replace.py +2289 -1750
- package/tools/folder_file_reporter.py +1463 -1463
- package/tools/folder_file_reporter_adapter.py +480 -480
- package/tools/generator_tools.py +1216 -1216
- package/tools/hash_generator.py +255 -255
- package/tools/html_tool.py +656 -656
- package/tools/jsonxml_tool.py +729 -729
- package/tools/line_tools.py +419 -419
- package/tools/markdown_tools.py +561 -561
- package/tools/mcp_widget.py +1417 -1417
- package/tools/notes_widget.py +978 -973
- package/tools/number_base_converter.py +372 -372
- package/tools/regex_extractor.py +571 -571
- package/tools/slug_generator.py +310 -310
- package/tools/sorter_tools.py +458 -458
- package/tools/string_escape_tool.py +392 -392
- package/tools/text_statistics_tool.py +365 -365
- package/tools/text_wrapper.py +430 -430
- package/tools/timestamp_converter.py +421 -421
- package/tools/tool_loader.py +710 -710
- package/tools/translator_tools.py +522 -522
- package/tools/url_link_extractor.py +261 -261
- package/tools/url_parser.py +204 -204
- package/tools/whitespace_tools.py +355 -355
- package/tools/word_frequency_counter.py +146 -146
- package/core/__pycache__/__init__.cpython-313.pyc +0 -0
- package/core/__pycache__/app_context.cpython-313.pyc +0 -0
- package/core/__pycache__/async_text_processor.cpython-313.pyc +0 -0
- package/core/__pycache__/backup_manager.cpython-313.pyc +0 -0
- package/core/__pycache__/backup_recovery_manager.cpython-313.pyc +0 -0
- package/core/__pycache__/content_hash_cache.cpython-313.pyc +0 -0
- package/core/__pycache__/context_menu.cpython-313.pyc +0 -0
- package/core/__pycache__/data_validator.cpython-313.pyc +0 -0
- package/core/__pycache__/database_connection_manager.cpython-313.pyc +0 -0
- package/core/__pycache__/database_curl_settings_manager.cpython-313.pyc +0 -0
- package/core/__pycache__/database_promera_ai_settings_manager.cpython-313.pyc +0 -0
- package/core/__pycache__/database_schema.cpython-313.pyc +0 -0
- package/core/__pycache__/database_schema_manager.cpython-313.pyc +0 -0
- package/core/__pycache__/database_settings_manager.cpython-313.pyc +0 -0
- package/core/__pycache__/database_settings_manager_interface.cpython-313.pyc +0 -0
- package/core/__pycache__/dialog_manager.cpython-313.pyc +0 -0
- package/core/__pycache__/efficient_line_numbers.cpython-313.pyc +0 -0
- package/core/__pycache__/error_handler.cpython-313.pyc +0 -0
- package/core/__pycache__/error_service.cpython-313.pyc +0 -0
- package/core/__pycache__/event_consolidator.cpython-313.pyc +0 -0
- package/core/__pycache__/memory_efficient_text_widget.cpython-313.pyc +0 -0
- package/core/__pycache__/migration_manager.cpython-313.pyc +0 -0
- package/core/__pycache__/migration_test_suite.cpython-313.pyc +0 -0
- package/core/__pycache__/migration_validator.cpython-313.pyc +0 -0
- package/core/__pycache__/optimized_find_replace.cpython-313.pyc +0 -0
- package/core/__pycache__/optimized_pattern_engine.cpython-313.pyc +0 -0
- package/core/__pycache__/optimized_search_highlighter.cpython-313.pyc +0 -0
- package/core/__pycache__/performance_monitor.cpython-313.pyc +0 -0
- package/core/__pycache__/persistence_manager.cpython-313.pyc +0 -0
- package/core/__pycache__/progressive_stats_calculator.cpython-313.pyc +0 -0
- package/core/__pycache__/regex_pattern_cache.cpython-313.pyc +0 -0
- package/core/__pycache__/regex_pattern_library.cpython-313.pyc +0 -0
- package/core/__pycache__/search_operation_manager.cpython-313.pyc +0 -0
- package/core/__pycache__/settings_defaults_registry.cpython-313.pyc +0 -0
- package/core/__pycache__/settings_integrity_validator.cpython-313.pyc +0 -0
- package/core/__pycache__/settings_serializer.cpython-313.pyc +0 -0
- package/core/__pycache__/settings_validator.cpython-313.pyc +0 -0
- package/core/__pycache__/smart_stats_calculator.cpython-313.pyc +0 -0
- package/core/__pycache__/statistics_update_manager.cpython-313.pyc +0 -0
- package/core/__pycache__/stats_config_manager.cpython-313.pyc +0 -0
- package/core/__pycache__/streaming_text_handler.cpython-313.pyc +0 -0
- package/core/__pycache__/task_scheduler.cpython-313.pyc +0 -0
- package/core/__pycache__/visibility_monitor.cpython-313.pyc +0 -0
- package/core/__pycache__/widget_cache.cpython-313.pyc +0 -0
- package/core/mcp/__pycache__/__init__.cpython-313.pyc +0 -0
- package/core/mcp/__pycache__/protocol.cpython-313.pyc +0 -0
- package/core/mcp/__pycache__/schema.cpython-313.pyc +0 -0
- package/core/mcp/__pycache__/server_stdio.cpython-313.pyc +0 -0
- package/core/mcp/__pycache__/tool_registry.cpython-313.pyc +0 -0
- package/tools/__pycache__/__init__.cpython-313.pyc +0 -0
- package/tools/__pycache__/ai_tools.cpython-313.pyc +0 -0
- package/tools/__pycache__/ascii_art_generator.cpython-313.pyc +0 -0
- package/tools/__pycache__/base64_tools.cpython-313.pyc +0 -0
- package/tools/__pycache__/base_tool.cpython-313.pyc +0 -0
- package/tools/__pycache__/case_tool.cpython-313.pyc +0 -0
- package/tools/__pycache__/column_tools.cpython-313.pyc +0 -0
- package/tools/__pycache__/cron_tool.cpython-313.pyc +0 -0
- package/tools/__pycache__/curl_history.cpython-313.pyc +0 -0
- package/tools/__pycache__/curl_processor.cpython-313.pyc +0 -0
- package/tools/__pycache__/curl_settings.cpython-313.pyc +0 -0
- package/tools/__pycache__/curl_tool.cpython-313.pyc +0 -0
- package/tools/__pycache__/diff_viewer.cpython-313.pyc +0 -0
- package/tools/__pycache__/email_extraction_tool.cpython-313.pyc +0 -0
- package/tools/__pycache__/email_header_analyzer.cpython-313.pyc +0 -0
- package/tools/__pycache__/extraction_tools.cpython-313.pyc +0 -0
- package/tools/__pycache__/find_replace.cpython-313.pyc +0 -0
- package/tools/__pycache__/folder_file_reporter.cpython-313.pyc +0 -0
- package/tools/__pycache__/folder_file_reporter_adapter.cpython-313.pyc +0 -0
- package/tools/__pycache__/generator_tools.cpython-313.pyc +0 -0
- package/tools/__pycache__/hash_generator.cpython-313.pyc +0 -0
- package/tools/__pycache__/html_tool.cpython-313.pyc +0 -0
- package/tools/__pycache__/huggingface_helper.cpython-313.pyc +0 -0
- package/tools/__pycache__/jsonxml_tool.cpython-313.pyc +0 -0
- package/tools/__pycache__/line_tools.cpython-313.pyc +0 -0
- package/tools/__pycache__/list_comparator.cpython-313.pyc +0 -0
- package/tools/__pycache__/markdown_tools.cpython-313.pyc +0 -0
- package/tools/__pycache__/mcp_widget.cpython-313.pyc +0 -0
- package/tools/__pycache__/notes_widget.cpython-313.pyc +0 -0
- package/tools/__pycache__/number_base_converter.cpython-313.pyc +0 -0
- package/tools/__pycache__/regex_extractor.cpython-313.pyc +0 -0
- package/tools/__pycache__/slug_generator.cpython-313.pyc +0 -0
- package/tools/__pycache__/sorter_tools.cpython-313.pyc +0 -0
- package/tools/__pycache__/string_escape_tool.cpython-313.pyc +0 -0
- package/tools/__pycache__/text_statistics_tool.cpython-313.pyc +0 -0
- package/tools/__pycache__/text_wrapper.cpython-313.pyc +0 -0
- package/tools/__pycache__/timestamp_converter.cpython-313.pyc +0 -0
- package/tools/__pycache__/tool_loader.cpython-313.pyc +0 -0
- package/tools/__pycache__/translator_tools.cpython-313.pyc +0 -0
- package/tools/__pycache__/url_link_extractor.cpython-313.pyc +0 -0
- package/tools/__pycache__/url_parser.cpython-313.pyc +0 -0
- package/tools/__pycache__/whitespace_tools.cpython-313.pyc +0 -0
- package/tools/__pycache__/word_frequency_counter.cpython-313.pyc +0 -0
|
@@ -1,262 +1,262 @@
|
|
|
1
|
-
"""
|
|
2
|
-
URL and Link Extractor Module - URL extraction utility
|
|
3
|
-
|
|
4
|
-
This module provides comprehensive URL and link extraction functionality with UI components
|
|
5
|
-
for the Promera AI Commander application.
|
|
6
|
-
"""
|
|
7
|
-
|
|
8
|
-
import tkinter as tk
|
|
9
|
-
from tkinter import ttk
|
|
10
|
-
import re
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
class URLLinkExtractorProcessor:
|
|
14
|
-
"""URL and link extractor processor with multiple extraction modes and filtering."""
|
|
15
|
-
|
|
16
|
-
@staticmethod
|
|
17
|
-
def extract_urls(text, extract_href=False, extract_https=False, extract_any_protocol=False, extract_markdown=False, filter_text=""):
|
|
18
|
-
"""Extracts URLs and links from text based on selected options."""
|
|
19
|
-
urls = set()
|
|
20
|
-
|
|
21
|
-
# Extract from HTML href attributes
|
|
22
|
-
if extract_href:
|
|
23
|
-
href_pattern = r'href=["\']([^"\']+)["\']'
|
|
24
|
-
urls.update(re.findall(href_pattern, text))
|
|
25
|
-
|
|
26
|
-
# Extract http(s):// URLs
|
|
27
|
-
if extract_https:
|
|
28
|
-
https_pattern = r'https?://[^\s<>"{}|\\^`\[\]]+'
|
|
29
|
-
urls.update(re.findall(https_pattern, text))
|
|
30
|
-
|
|
31
|
-
# Extract any protocol:// URLs
|
|
32
|
-
if extract_any_protocol:
|
|
33
|
-
protocol_pattern = r'\b[a-zA-Z][a-zA-Z0-9+.-]*://[^\s<>"{}|\\^`\[\]]+'
|
|
34
|
-
urls.update(re.findall(protocol_pattern, text))
|
|
35
|
-
|
|
36
|
-
# Extract markdown links [text](url)
|
|
37
|
-
if extract_markdown:
|
|
38
|
-
markdown_pattern = r'\[([^\]]+)\]\(([^)]+)\)'
|
|
39
|
-
markdown_urls = re.findall(markdown_pattern, text)
|
|
40
|
-
urls.update([url for _, url in markdown_urls])
|
|
41
|
-
|
|
42
|
-
# If no options selected, extract all
|
|
43
|
-
if not any([extract_href, extract_https, extract_any_protocol, extract_markdown]):
|
|
44
|
-
# Extract all types
|
|
45
|
-
href_pattern = r'href=["\']([^"\']+)["\']'
|
|
46
|
-
urls.update(re.findall(href_pattern, text))
|
|
47
|
-
|
|
48
|
-
protocol_pattern = r'\b[a-zA-Z][a-zA-Z0-9+.-]*://[^\s<>"{}|\\^`\[\]]+'
|
|
49
|
-
urls.update(re.findall(protocol_pattern, text))
|
|
50
|
-
|
|
51
|
-
markdown_pattern = r'\[([^\]]+)\]\(([^)]+)\)'
|
|
52
|
-
markdown_urls = re.findall(markdown_pattern, text)
|
|
53
|
-
urls.update([url for _, url in markdown_urls])
|
|
54
|
-
|
|
55
|
-
# Apply filter if provided
|
|
56
|
-
if filter_text.strip():
|
|
57
|
-
filter_lower = filter_text.lower()
|
|
58
|
-
urls = {url for url in urls if filter_lower in url.lower()}
|
|
59
|
-
|
|
60
|
-
return '\n'.join(sorted(urls)) if urls else "No URLs found."
|
|
61
|
-
|
|
62
|
-
@staticmethod
|
|
63
|
-
def process_text(input_text, settings):
|
|
64
|
-
"""Process text using the current settings."""
|
|
65
|
-
return URLLinkExtractorProcessor.extract_urls(
|
|
66
|
-
input_text,
|
|
67
|
-
settings.get("extract_href", False),
|
|
68
|
-
settings.get("extract_https", False),
|
|
69
|
-
settings.get("extract_any_protocol", False),
|
|
70
|
-
settings.get("extract_markdown", False),
|
|
71
|
-
settings.get("filter_text", "")
|
|
72
|
-
)
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
class URLLinkExtractorUI:
|
|
76
|
-
"""UI components for the URL and Link Extractor."""
|
|
77
|
-
|
|
78
|
-
def __init__(self, parent, settings, on_setting_change_callback=None, apply_tool_callback=None):
|
|
79
|
-
"""
|
|
80
|
-
Initialize the URL and Link Extractor UI.
|
|
81
|
-
|
|
82
|
-
Args:
|
|
83
|
-
parent: Parent widget
|
|
84
|
-
settings: Dictionary containing tool settings
|
|
85
|
-
on_setting_change_callback: Callback function for setting changes
|
|
86
|
-
apply_tool_callback: Callback function for applying the tool
|
|
87
|
-
"""
|
|
88
|
-
self.parent = parent
|
|
89
|
-
self.settings = settings
|
|
90
|
-
self.on_setting_change_callback = on_setting_change_callback
|
|
91
|
-
self.apply_tool_callback = apply_tool_callback
|
|
92
|
-
|
|
93
|
-
# Initialize UI variables
|
|
94
|
-
self.url_extract_href_var = tk.BooleanVar(value=settings.get("extract_href", False))
|
|
95
|
-
self.url_extract_https_var = tk.BooleanVar(value=settings.get("extract_https", False))
|
|
96
|
-
self.url_extract_any_protocol_var = tk.BooleanVar(value=settings.get("extract_any_protocol", False))
|
|
97
|
-
self.url_extract_markdown_var = tk.BooleanVar(value=settings.get("extract_markdown", False))
|
|
98
|
-
self.url_filter_var = tk.StringVar(value=settings.get("filter_text", ""))
|
|
99
|
-
|
|
100
|
-
self.create_widgets()
|
|
101
|
-
|
|
102
|
-
def create_widgets(self):
|
|
103
|
-
"""Creates the UI widgets for the URL and Link Extractor."""
|
|
104
|
-
# Checkboxes for different extraction modes
|
|
105
|
-
ttk.Checkbutton(
|
|
106
|
-
self.parent,
|
|
107
|
-
text='href=""',
|
|
108
|
-
variable=self.url_extract_href_var,
|
|
109
|
-
command=self._on_setting_change
|
|
110
|
-
).pack(side=tk.LEFT, padx=5)
|
|
111
|
-
|
|
112
|
-
ttk.Checkbutton(
|
|
113
|
-
self.parent,
|
|
114
|
-
text="http(s)://",
|
|
115
|
-
variable=self.url_extract_https_var,
|
|
116
|
-
command=self._on_setting_change
|
|
117
|
-
).pack(side=tk.LEFT, padx=5)
|
|
118
|
-
|
|
119
|
-
ttk.Checkbutton(
|
|
120
|
-
self.parent,
|
|
121
|
-
text="any protocol ://",
|
|
122
|
-
variable=self.url_extract_any_protocol_var,
|
|
123
|
-
command=self._on_setting_change
|
|
124
|
-
).pack(side=tk.LEFT, padx=5)
|
|
125
|
-
|
|
126
|
-
ttk.Checkbutton(
|
|
127
|
-
self.parent,
|
|
128
|
-
text="markdown []()",
|
|
129
|
-
variable=self.url_extract_markdown_var,
|
|
130
|
-
command=self._on_setting_change
|
|
131
|
-
).pack(side=tk.LEFT, padx=5)
|
|
132
|
-
|
|
133
|
-
# Filter field
|
|
134
|
-
ttk.Label(self.parent, text="Filter:").pack(side=tk.LEFT, padx=(10, 2))
|
|
135
|
-
filter_entry = ttk.Entry(self.parent, textvariable=self.url_filter_var, width=15)
|
|
136
|
-
filter_entry.pack(side=tk.LEFT, padx=2)
|
|
137
|
-
self.url_filter_var.trace_add("write", self._on_filter_change)
|
|
138
|
-
|
|
139
|
-
# Extract button
|
|
140
|
-
if self.apply_tool_callback:
|
|
141
|
-
ttk.Button(
|
|
142
|
-
self.parent,
|
|
143
|
-
text="Extract",
|
|
144
|
-
command=self.apply_tool_callback
|
|
145
|
-
).pack(side=tk.LEFT, padx=10)
|
|
146
|
-
|
|
147
|
-
def _on_setting_change(self):
|
|
148
|
-
"""Handle setting changes."""
|
|
149
|
-
if self.on_setting_change_callback:
|
|
150
|
-
self.on_setting_change_callback()
|
|
151
|
-
|
|
152
|
-
def _on_filter_change(self, *args):
|
|
153
|
-
"""Handle filter text changes."""
|
|
154
|
-
if self.on_setting_change_callback:
|
|
155
|
-
self.on_setting_change_callback()
|
|
156
|
-
|
|
157
|
-
def get_current_settings(self):
|
|
158
|
-
"""Get the current settings from the UI."""
|
|
159
|
-
return {
|
|
160
|
-
"extract_href": self.url_extract_href_var.get(),
|
|
161
|
-
"extract_https": self.url_extract_https_var.get(),
|
|
162
|
-
"extract_any_protocol": self.url_extract_any_protocol_var.get(),
|
|
163
|
-
"extract_markdown": self.url_extract_markdown_var.get(),
|
|
164
|
-
"filter_text": self.url_filter_var.get()
|
|
165
|
-
}
|
|
166
|
-
|
|
167
|
-
def update_settings(self, settings):
|
|
168
|
-
"""Update the UI with new settings."""
|
|
169
|
-
self.url_extract_href_var.set(settings.get("extract_href", False))
|
|
170
|
-
self.url_extract_https_var.set(settings.get("extract_https", False))
|
|
171
|
-
self.url_extract_any_protocol_var.set(settings.get("extract_any_protocol", False))
|
|
172
|
-
self.url_extract_markdown_var.set(settings.get("extract_markdown", False))
|
|
173
|
-
self.url_filter_var.set(settings.get("filter_text", ""))
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
class URLLinkExtractor:
|
|
177
|
-
"""Main URL and Link Extractor class that combines processor and UI functionality."""
|
|
178
|
-
|
|
179
|
-
def __init__(self):
|
|
180
|
-
self.processor = URLLinkExtractorProcessor()
|
|
181
|
-
self.ui = None
|
|
182
|
-
|
|
183
|
-
def create_ui(self, parent, settings, on_setting_change_callback=None, apply_tool_callback=None):
|
|
184
|
-
"""Create and return the UI component."""
|
|
185
|
-
self.ui = URLLinkExtractorUI(parent, settings, on_setting_change_callback, apply_tool_callback)
|
|
186
|
-
return self.ui
|
|
187
|
-
|
|
188
|
-
def process_text(self, input_text, settings):
|
|
189
|
-
"""Process text using the current settings."""
|
|
190
|
-
return self.processor.process_text(input_text, settings)
|
|
191
|
-
|
|
192
|
-
def get_default_settings(self):
|
|
193
|
-
"""Get default settings for the URL and Link Extractor."""
|
|
194
|
-
return {
|
|
195
|
-
"extract_href": False,
|
|
196
|
-
"extract_https": False,
|
|
197
|
-
"extract_any_protocol": False,
|
|
198
|
-
"extract_markdown": False,
|
|
199
|
-
"filter_text": ""
|
|
200
|
-
}
|
|
201
|
-
|
|
202
|
-
|
|
203
|
-
# Convenience functions for backward compatibility
|
|
204
|
-
def extract_urls(text, extract_href=False, extract_https=False, extract_any_protocol=False, extract_markdown=False, filter_text=""):
|
|
205
|
-
"""Extract URLs with specified options."""
|
|
206
|
-
return URLLinkExtractorProcessor.extract_urls(
|
|
207
|
-
text, extract_href, extract_https, extract_any_protocol, extract_markdown, filter_text
|
|
208
|
-
)
|
|
209
|
-
|
|
210
|
-
|
|
211
|
-
def process_url_extraction(input_text, settings):
|
|
212
|
-
"""Process URL extraction with the specified settings."""
|
|
213
|
-
return URLLinkExtractorProcessor.process_text(input_text, settings)
|
|
214
|
-
|
|
215
|
-
|
|
216
|
-
# BaseTool-compatible wrapper
|
|
217
|
-
try:
|
|
218
|
-
from tools.base_tool import BaseTool
|
|
219
|
-
from typing import Dict, Any
|
|
220
|
-
import tkinter as tk
|
|
221
|
-
from tkinter import ttk
|
|
222
|
-
|
|
223
|
-
class URLLinkExtractorV2(BaseTool):
|
|
224
|
-
"""
|
|
225
|
-
BaseTool-compatible version of URLLinkExtractor.
|
|
226
|
-
"""
|
|
227
|
-
|
|
228
|
-
TOOL_NAME = "URL and Link Extractor"
|
|
229
|
-
TOOL_DESCRIPTION = "Extract URLs and links from text"
|
|
230
|
-
TOOL_VERSION = "2.0.0"
|
|
231
|
-
|
|
232
|
-
def process_text(self, input_text: str, settings: Dict[str, Any]) -> str:
|
|
233
|
-
"""Extract URLs from text."""
|
|
234
|
-
return URLLinkExtractorProcessor.extract_urls(
|
|
235
|
-
input_text,
|
|
236
|
-
settings.get("extract_href", False),
|
|
237
|
-
settings.get("extract_https", True),
|
|
238
|
-
settings.get("extract_any_protocol", False),
|
|
239
|
-
settings.get("extract_markdown", False),
|
|
240
|
-
settings.get("filter_text", "")
|
|
241
|
-
)
|
|
242
|
-
|
|
243
|
-
def get_default_settings(self) -> Dict[str, Any]:
|
|
244
|
-
return {
|
|
245
|
-
"extract_href": False,
|
|
246
|
-
"extract_https": True,
|
|
247
|
-
"extract_any_protocol": False,
|
|
248
|
-
"extract_markdown": False,
|
|
249
|
-
"filter_text": ""
|
|
250
|
-
}
|
|
251
|
-
|
|
252
|
-
def create_ui(self, parent: tk.Widget, settings: Dict[str, Any],
|
|
253
|
-
on_change=None, on_apply=None) -> tk.Widget:
|
|
254
|
-
"""Create UI for URL Link Extractor."""
|
|
255
|
-
frame = ttk.Frame(parent)
|
|
256
|
-
ttk.Label(frame, text="Extract URLs and links").pack(side=tk.LEFT, padx=5)
|
|
257
|
-
if on_apply:
|
|
258
|
-
ttk.Button(frame, text="Extract", command=on_apply).pack(side=tk.LEFT, padx=5)
|
|
259
|
-
return frame
|
|
260
|
-
|
|
261
|
-
except ImportError:
|
|
1
|
+
"""
|
|
2
|
+
URL and Link Extractor Module - URL extraction utility
|
|
3
|
+
|
|
4
|
+
This module provides comprehensive URL and link extraction functionality with UI components
|
|
5
|
+
for the Promera AI Commander application.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
import tkinter as tk
|
|
9
|
+
from tkinter import ttk
|
|
10
|
+
import re
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
class URLLinkExtractorProcessor:
|
|
14
|
+
"""URL and link extractor processor with multiple extraction modes and filtering."""
|
|
15
|
+
|
|
16
|
+
@staticmethod
|
|
17
|
+
def extract_urls(text, extract_href=False, extract_https=False, extract_any_protocol=False, extract_markdown=False, filter_text=""):
|
|
18
|
+
"""Extracts URLs and links from text based on selected options."""
|
|
19
|
+
urls = set()
|
|
20
|
+
|
|
21
|
+
# Extract from HTML href attributes
|
|
22
|
+
if extract_href:
|
|
23
|
+
href_pattern = r'href=["\']([^"\']+)["\']'
|
|
24
|
+
urls.update(re.findall(href_pattern, text))
|
|
25
|
+
|
|
26
|
+
# Extract http(s):// URLs
|
|
27
|
+
if extract_https:
|
|
28
|
+
https_pattern = r'https?://[^\s<>"{}|\\^`\[\]]+'
|
|
29
|
+
urls.update(re.findall(https_pattern, text))
|
|
30
|
+
|
|
31
|
+
# Extract any protocol:// URLs
|
|
32
|
+
if extract_any_protocol:
|
|
33
|
+
protocol_pattern = r'\b[a-zA-Z][a-zA-Z0-9+.-]*://[^\s<>"{}|\\^`\[\]]+'
|
|
34
|
+
urls.update(re.findall(protocol_pattern, text))
|
|
35
|
+
|
|
36
|
+
# Extract markdown links [text](url)
|
|
37
|
+
if extract_markdown:
|
|
38
|
+
markdown_pattern = r'\[([^\]]+)\]\(([^)]+)\)'
|
|
39
|
+
markdown_urls = re.findall(markdown_pattern, text)
|
|
40
|
+
urls.update([url for _, url in markdown_urls])
|
|
41
|
+
|
|
42
|
+
# If no options selected, extract all
|
|
43
|
+
if not any([extract_href, extract_https, extract_any_protocol, extract_markdown]):
|
|
44
|
+
# Extract all types
|
|
45
|
+
href_pattern = r'href=["\']([^"\']+)["\']'
|
|
46
|
+
urls.update(re.findall(href_pattern, text))
|
|
47
|
+
|
|
48
|
+
protocol_pattern = r'\b[a-zA-Z][a-zA-Z0-9+.-]*://[^\s<>"{}|\\^`\[\]]+'
|
|
49
|
+
urls.update(re.findall(protocol_pattern, text))
|
|
50
|
+
|
|
51
|
+
markdown_pattern = r'\[([^\]]+)\]\(([^)]+)\)'
|
|
52
|
+
markdown_urls = re.findall(markdown_pattern, text)
|
|
53
|
+
urls.update([url for _, url in markdown_urls])
|
|
54
|
+
|
|
55
|
+
# Apply filter if provided
|
|
56
|
+
if filter_text.strip():
|
|
57
|
+
filter_lower = filter_text.lower()
|
|
58
|
+
urls = {url for url in urls if filter_lower in url.lower()}
|
|
59
|
+
|
|
60
|
+
return '\n'.join(sorted(urls)) if urls else "No URLs found."
|
|
61
|
+
|
|
62
|
+
@staticmethod
|
|
63
|
+
def process_text(input_text, settings):
|
|
64
|
+
"""Process text using the current settings."""
|
|
65
|
+
return URLLinkExtractorProcessor.extract_urls(
|
|
66
|
+
input_text,
|
|
67
|
+
settings.get("extract_href", False),
|
|
68
|
+
settings.get("extract_https", False),
|
|
69
|
+
settings.get("extract_any_protocol", False),
|
|
70
|
+
settings.get("extract_markdown", False),
|
|
71
|
+
settings.get("filter_text", "")
|
|
72
|
+
)
|
|
73
|
+
|
|
74
|
+
|
|
75
|
+
class URLLinkExtractorUI:
|
|
76
|
+
"""UI components for the URL and Link Extractor."""
|
|
77
|
+
|
|
78
|
+
def __init__(self, parent, settings, on_setting_change_callback=None, apply_tool_callback=None):
|
|
79
|
+
"""
|
|
80
|
+
Initialize the URL and Link Extractor UI.
|
|
81
|
+
|
|
82
|
+
Args:
|
|
83
|
+
parent: Parent widget
|
|
84
|
+
settings: Dictionary containing tool settings
|
|
85
|
+
on_setting_change_callback: Callback function for setting changes
|
|
86
|
+
apply_tool_callback: Callback function for applying the tool
|
|
87
|
+
"""
|
|
88
|
+
self.parent = parent
|
|
89
|
+
self.settings = settings
|
|
90
|
+
self.on_setting_change_callback = on_setting_change_callback
|
|
91
|
+
self.apply_tool_callback = apply_tool_callback
|
|
92
|
+
|
|
93
|
+
# Initialize UI variables
|
|
94
|
+
self.url_extract_href_var = tk.BooleanVar(value=settings.get("extract_href", False))
|
|
95
|
+
self.url_extract_https_var = tk.BooleanVar(value=settings.get("extract_https", False))
|
|
96
|
+
self.url_extract_any_protocol_var = tk.BooleanVar(value=settings.get("extract_any_protocol", False))
|
|
97
|
+
self.url_extract_markdown_var = tk.BooleanVar(value=settings.get("extract_markdown", False))
|
|
98
|
+
self.url_filter_var = tk.StringVar(value=settings.get("filter_text", ""))
|
|
99
|
+
|
|
100
|
+
self.create_widgets()
|
|
101
|
+
|
|
102
|
+
def create_widgets(self):
|
|
103
|
+
"""Creates the UI widgets for the URL and Link Extractor."""
|
|
104
|
+
# Checkboxes for different extraction modes
|
|
105
|
+
ttk.Checkbutton(
|
|
106
|
+
self.parent,
|
|
107
|
+
text='href=""',
|
|
108
|
+
variable=self.url_extract_href_var,
|
|
109
|
+
command=self._on_setting_change
|
|
110
|
+
).pack(side=tk.LEFT, padx=5)
|
|
111
|
+
|
|
112
|
+
ttk.Checkbutton(
|
|
113
|
+
self.parent,
|
|
114
|
+
text="http(s)://",
|
|
115
|
+
variable=self.url_extract_https_var,
|
|
116
|
+
command=self._on_setting_change
|
|
117
|
+
).pack(side=tk.LEFT, padx=5)
|
|
118
|
+
|
|
119
|
+
ttk.Checkbutton(
|
|
120
|
+
self.parent,
|
|
121
|
+
text="any protocol ://",
|
|
122
|
+
variable=self.url_extract_any_protocol_var,
|
|
123
|
+
command=self._on_setting_change
|
|
124
|
+
).pack(side=tk.LEFT, padx=5)
|
|
125
|
+
|
|
126
|
+
ttk.Checkbutton(
|
|
127
|
+
self.parent,
|
|
128
|
+
text="markdown []()",
|
|
129
|
+
variable=self.url_extract_markdown_var,
|
|
130
|
+
command=self._on_setting_change
|
|
131
|
+
).pack(side=tk.LEFT, padx=5)
|
|
132
|
+
|
|
133
|
+
# Filter field
|
|
134
|
+
ttk.Label(self.parent, text="Filter:").pack(side=tk.LEFT, padx=(10, 2))
|
|
135
|
+
filter_entry = ttk.Entry(self.parent, textvariable=self.url_filter_var, width=15)
|
|
136
|
+
filter_entry.pack(side=tk.LEFT, padx=2)
|
|
137
|
+
self.url_filter_var.trace_add("write", self._on_filter_change)
|
|
138
|
+
|
|
139
|
+
# Extract button
|
|
140
|
+
if self.apply_tool_callback:
|
|
141
|
+
ttk.Button(
|
|
142
|
+
self.parent,
|
|
143
|
+
text="Extract",
|
|
144
|
+
command=self.apply_tool_callback
|
|
145
|
+
).pack(side=tk.LEFT, padx=10)
|
|
146
|
+
|
|
147
|
+
def _on_setting_change(self):
|
|
148
|
+
"""Handle setting changes."""
|
|
149
|
+
if self.on_setting_change_callback:
|
|
150
|
+
self.on_setting_change_callback()
|
|
151
|
+
|
|
152
|
+
def _on_filter_change(self, *args):
|
|
153
|
+
"""Handle filter text changes."""
|
|
154
|
+
if self.on_setting_change_callback:
|
|
155
|
+
self.on_setting_change_callback()
|
|
156
|
+
|
|
157
|
+
def get_current_settings(self):
|
|
158
|
+
"""Get the current settings from the UI."""
|
|
159
|
+
return {
|
|
160
|
+
"extract_href": self.url_extract_href_var.get(),
|
|
161
|
+
"extract_https": self.url_extract_https_var.get(),
|
|
162
|
+
"extract_any_protocol": self.url_extract_any_protocol_var.get(),
|
|
163
|
+
"extract_markdown": self.url_extract_markdown_var.get(),
|
|
164
|
+
"filter_text": self.url_filter_var.get()
|
|
165
|
+
}
|
|
166
|
+
|
|
167
|
+
def update_settings(self, settings):
|
|
168
|
+
"""Update the UI with new settings."""
|
|
169
|
+
self.url_extract_href_var.set(settings.get("extract_href", False))
|
|
170
|
+
self.url_extract_https_var.set(settings.get("extract_https", False))
|
|
171
|
+
self.url_extract_any_protocol_var.set(settings.get("extract_any_protocol", False))
|
|
172
|
+
self.url_extract_markdown_var.set(settings.get("extract_markdown", False))
|
|
173
|
+
self.url_filter_var.set(settings.get("filter_text", ""))
|
|
174
|
+
|
|
175
|
+
|
|
176
|
+
class URLLinkExtractor:
|
|
177
|
+
"""Main URL and Link Extractor class that combines processor and UI functionality."""
|
|
178
|
+
|
|
179
|
+
def __init__(self):
|
|
180
|
+
self.processor = URLLinkExtractorProcessor()
|
|
181
|
+
self.ui = None
|
|
182
|
+
|
|
183
|
+
def create_ui(self, parent, settings, on_setting_change_callback=None, apply_tool_callback=None):
|
|
184
|
+
"""Create and return the UI component."""
|
|
185
|
+
self.ui = URLLinkExtractorUI(parent, settings, on_setting_change_callback, apply_tool_callback)
|
|
186
|
+
return self.ui
|
|
187
|
+
|
|
188
|
+
def process_text(self, input_text, settings):
|
|
189
|
+
"""Process text using the current settings."""
|
|
190
|
+
return self.processor.process_text(input_text, settings)
|
|
191
|
+
|
|
192
|
+
def get_default_settings(self):
|
|
193
|
+
"""Get default settings for the URL and Link Extractor."""
|
|
194
|
+
return {
|
|
195
|
+
"extract_href": False,
|
|
196
|
+
"extract_https": False,
|
|
197
|
+
"extract_any_protocol": False,
|
|
198
|
+
"extract_markdown": False,
|
|
199
|
+
"filter_text": ""
|
|
200
|
+
}
|
|
201
|
+
|
|
202
|
+
|
|
203
|
+
# Convenience functions for backward compatibility
|
|
204
|
+
def extract_urls(text, extract_href=False, extract_https=False, extract_any_protocol=False, extract_markdown=False, filter_text=""):
|
|
205
|
+
"""Extract URLs with specified options."""
|
|
206
|
+
return URLLinkExtractorProcessor.extract_urls(
|
|
207
|
+
text, extract_href, extract_https, extract_any_protocol, extract_markdown, filter_text
|
|
208
|
+
)
|
|
209
|
+
|
|
210
|
+
|
|
211
|
+
def process_url_extraction(input_text, settings):
|
|
212
|
+
"""Process URL extraction with the specified settings."""
|
|
213
|
+
return URLLinkExtractorProcessor.process_text(input_text, settings)
|
|
214
|
+
|
|
215
|
+
|
|
216
|
+
# BaseTool-compatible wrapper
|
|
217
|
+
try:
|
|
218
|
+
from tools.base_tool import BaseTool
|
|
219
|
+
from typing import Dict, Any
|
|
220
|
+
import tkinter as tk
|
|
221
|
+
from tkinter import ttk
|
|
222
|
+
|
|
223
|
+
class URLLinkExtractorV2(BaseTool):
|
|
224
|
+
"""
|
|
225
|
+
BaseTool-compatible version of URLLinkExtractor.
|
|
226
|
+
"""
|
|
227
|
+
|
|
228
|
+
TOOL_NAME = "URL and Link Extractor"
|
|
229
|
+
TOOL_DESCRIPTION = "Extract URLs and links from text"
|
|
230
|
+
TOOL_VERSION = "2.0.0"
|
|
231
|
+
|
|
232
|
+
def process_text(self, input_text: str, settings: Dict[str, Any]) -> str:
|
|
233
|
+
"""Extract URLs from text."""
|
|
234
|
+
return URLLinkExtractorProcessor.extract_urls(
|
|
235
|
+
input_text,
|
|
236
|
+
settings.get("extract_href", False),
|
|
237
|
+
settings.get("extract_https", True),
|
|
238
|
+
settings.get("extract_any_protocol", False),
|
|
239
|
+
settings.get("extract_markdown", False),
|
|
240
|
+
settings.get("filter_text", "")
|
|
241
|
+
)
|
|
242
|
+
|
|
243
|
+
def get_default_settings(self) -> Dict[str, Any]:
|
|
244
|
+
return {
|
|
245
|
+
"extract_href": False,
|
|
246
|
+
"extract_https": True,
|
|
247
|
+
"extract_any_protocol": False,
|
|
248
|
+
"extract_markdown": False,
|
|
249
|
+
"filter_text": ""
|
|
250
|
+
}
|
|
251
|
+
|
|
252
|
+
def create_ui(self, parent: tk.Widget, settings: Dict[str, Any],
|
|
253
|
+
on_change=None, on_apply=None) -> tk.Widget:
|
|
254
|
+
"""Create UI for URL Link Extractor."""
|
|
255
|
+
frame = ttk.Frame(parent)
|
|
256
|
+
ttk.Label(frame, text="Extract URLs and links").pack(side=tk.LEFT, padx=5)
|
|
257
|
+
if on_apply:
|
|
258
|
+
ttk.Button(frame, text="Extract", command=on_apply).pack(side=tk.LEFT, padx=5)
|
|
259
|
+
return frame
|
|
260
|
+
|
|
261
|
+
except ImportError:
|
|
262
262
|
pass
|