pomera-ai-commander 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +680 -0
- package/bin/pomera-ai-commander.js +62 -0
- package/core/__init__.py +66 -0
- package/core/__pycache__/__init__.cpython-313.pyc +0 -0
- package/core/__pycache__/app_context.cpython-313.pyc +0 -0
- package/core/__pycache__/async_text_processor.cpython-313.pyc +0 -0
- package/core/__pycache__/backup_manager.cpython-313.pyc +0 -0
- package/core/__pycache__/backup_recovery_manager.cpython-313.pyc +0 -0
- package/core/__pycache__/content_hash_cache.cpython-313.pyc +0 -0
- package/core/__pycache__/context_menu.cpython-313.pyc +0 -0
- package/core/__pycache__/data_validator.cpython-313.pyc +0 -0
- package/core/__pycache__/database_connection_manager.cpython-313.pyc +0 -0
- package/core/__pycache__/database_curl_settings_manager.cpython-313.pyc +0 -0
- package/core/__pycache__/database_promera_ai_settings_manager.cpython-313.pyc +0 -0
- package/core/__pycache__/database_schema.cpython-313.pyc +0 -0
- package/core/__pycache__/database_schema_manager.cpython-313.pyc +0 -0
- package/core/__pycache__/database_settings_manager.cpython-313.pyc +0 -0
- package/core/__pycache__/database_settings_manager_interface.cpython-313.pyc +0 -0
- package/core/__pycache__/dialog_manager.cpython-313.pyc +0 -0
- package/core/__pycache__/efficient_line_numbers.cpython-313.pyc +0 -0
- package/core/__pycache__/error_handler.cpython-313.pyc +0 -0
- package/core/__pycache__/error_service.cpython-313.pyc +0 -0
- package/core/__pycache__/event_consolidator.cpython-313.pyc +0 -0
- package/core/__pycache__/memory_efficient_text_widget.cpython-313.pyc +0 -0
- package/core/__pycache__/migration_manager.cpython-313.pyc +0 -0
- package/core/__pycache__/migration_test_suite.cpython-313.pyc +0 -0
- package/core/__pycache__/migration_validator.cpython-313.pyc +0 -0
- package/core/__pycache__/optimized_find_replace.cpython-313.pyc +0 -0
- package/core/__pycache__/optimized_pattern_engine.cpython-313.pyc +0 -0
- package/core/__pycache__/optimized_search_highlighter.cpython-313.pyc +0 -0
- package/core/__pycache__/performance_monitor.cpython-313.pyc +0 -0
- package/core/__pycache__/persistence_manager.cpython-313.pyc +0 -0
- package/core/__pycache__/progressive_stats_calculator.cpython-313.pyc +0 -0
- package/core/__pycache__/regex_pattern_cache.cpython-313.pyc +0 -0
- package/core/__pycache__/regex_pattern_library.cpython-313.pyc +0 -0
- package/core/__pycache__/search_operation_manager.cpython-313.pyc +0 -0
- package/core/__pycache__/settings_defaults_registry.cpython-313.pyc +0 -0
- package/core/__pycache__/settings_integrity_validator.cpython-313.pyc +0 -0
- package/core/__pycache__/settings_serializer.cpython-313.pyc +0 -0
- package/core/__pycache__/settings_validator.cpython-313.pyc +0 -0
- package/core/__pycache__/smart_stats_calculator.cpython-313.pyc +0 -0
- package/core/__pycache__/statistics_update_manager.cpython-313.pyc +0 -0
- package/core/__pycache__/stats_config_manager.cpython-313.pyc +0 -0
- package/core/__pycache__/streaming_text_handler.cpython-313.pyc +0 -0
- package/core/__pycache__/task_scheduler.cpython-313.pyc +0 -0
- package/core/__pycache__/visibility_monitor.cpython-313.pyc +0 -0
- package/core/__pycache__/widget_cache.cpython-313.pyc +0 -0
- package/core/app_context.py +482 -0
- package/core/async_text_processor.py +422 -0
- package/core/backup_manager.py +656 -0
- package/core/backup_recovery_manager.py +1034 -0
- package/core/content_hash_cache.py +509 -0
- package/core/context_menu.py +313 -0
- package/core/data_validator.py +1067 -0
- package/core/database_connection_manager.py +745 -0
- package/core/database_curl_settings_manager.py +609 -0
- package/core/database_promera_ai_settings_manager.py +447 -0
- package/core/database_schema.py +412 -0
- package/core/database_schema_manager.py +396 -0
- package/core/database_settings_manager.py +1508 -0
- package/core/database_settings_manager_interface.py +457 -0
- package/core/dialog_manager.py +735 -0
- package/core/efficient_line_numbers.py +511 -0
- package/core/error_handler.py +747 -0
- package/core/error_service.py +431 -0
- package/core/event_consolidator.py +512 -0
- package/core/mcp/__init__.py +43 -0
- package/core/mcp/__pycache__/__init__.cpython-313.pyc +0 -0
- package/core/mcp/__pycache__/protocol.cpython-313.pyc +0 -0
- package/core/mcp/__pycache__/schema.cpython-313.pyc +0 -0
- package/core/mcp/__pycache__/server_stdio.cpython-313.pyc +0 -0
- package/core/mcp/__pycache__/tool_registry.cpython-313.pyc +0 -0
- package/core/mcp/protocol.py +288 -0
- package/core/mcp/schema.py +251 -0
- package/core/mcp/server_stdio.py +299 -0
- package/core/mcp/tool_registry.py +2345 -0
- package/core/memory_efficient_text_widget.py +712 -0
- package/core/migration_manager.py +915 -0
- package/core/migration_test_suite.py +1086 -0
- package/core/migration_validator.py +1144 -0
- package/core/optimized_find_replace.py +715 -0
- package/core/optimized_pattern_engine.py +424 -0
- package/core/optimized_search_highlighter.py +553 -0
- package/core/performance_monitor.py +675 -0
- package/core/persistence_manager.py +713 -0
- package/core/progressive_stats_calculator.py +632 -0
- package/core/regex_pattern_cache.py +530 -0
- package/core/regex_pattern_library.py +351 -0
- package/core/search_operation_manager.py +435 -0
- package/core/settings_defaults_registry.py +1087 -0
- package/core/settings_integrity_validator.py +1112 -0
- package/core/settings_serializer.py +558 -0
- package/core/settings_validator.py +1824 -0
- package/core/smart_stats_calculator.py +710 -0
- package/core/statistics_update_manager.py +619 -0
- package/core/stats_config_manager.py +858 -0
- package/core/streaming_text_handler.py +723 -0
- package/core/task_scheduler.py +596 -0
- package/core/update_pattern_library.py +169 -0
- package/core/visibility_monitor.py +596 -0
- package/core/widget_cache.py +498 -0
- package/mcp.json +61 -0
- package/package.json +57 -0
- package/pomera.py +7483 -0
- package/pomera_mcp_server.py +144 -0
- package/tools/__init__.py +5 -0
- package/tools/__pycache__/__init__.cpython-313.pyc +0 -0
- package/tools/__pycache__/ai_tools.cpython-313.pyc +0 -0
- package/tools/__pycache__/ascii_art_generator.cpython-313.pyc +0 -0
- package/tools/__pycache__/base64_tools.cpython-313.pyc +0 -0
- package/tools/__pycache__/base_tool.cpython-313.pyc +0 -0
- package/tools/__pycache__/case_tool.cpython-313.pyc +0 -0
- package/tools/__pycache__/column_tools.cpython-313.pyc +0 -0
- package/tools/__pycache__/cron_tool.cpython-313.pyc +0 -0
- package/tools/__pycache__/curl_history.cpython-313.pyc +0 -0
- package/tools/__pycache__/curl_processor.cpython-313.pyc +0 -0
- package/tools/__pycache__/curl_settings.cpython-313.pyc +0 -0
- package/tools/__pycache__/curl_tool.cpython-313.pyc +0 -0
- package/tools/__pycache__/diff_viewer.cpython-313.pyc +0 -0
- package/tools/__pycache__/email_extraction_tool.cpython-313.pyc +0 -0
- package/tools/__pycache__/email_header_analyzer.cpython-313.pyc +0 -0
- package/tools/__pycache__/extraction_tools.cpython-313.pyc +0 -0
- package/tools/__pycache__/find_replace.cpython-313.pyc +0 -0
- package/tools/__pycache__/folder_file_reporter.cpython-313.pyc +0 -0
- package/tools/__pycache__/folder_file_reporter_adapter.cpython-313.pyc +0 -0
- package/tools/__pycache__/generator_tools.cpython-313.pyc +0 -0
- package/tools/__pycache__/hash_generator.cpython-313.pyc +0 -0
- package/tools/__pycache__/html_tool.cpython-313.pyc +0 -0
- package/tools/__pycache__/huggingface_helper.cpython-313.pyc +0 -0
- package/tools/__pycache__/jsonxml_tool.cpython-313.pyc +0 -0
- package/tools/__pycache__/line_tools.cpython-313.pyc +0 -0
- package/tools/__pycache__/list_comparator.cpython-313.pyc +0 -0
- package/tools/__pycache__/markdown_tools.cpython-313.pyc +0 -0
- package/tools/__pycache__/mcp_widget.cpython-313.pyc +0 -0
- package/tools/__pycache__/notes_widget.cpython-313.pyc +0 -0
- package/tools/__pycache__/number_base_converter.cpython-313.pyc +0 -0
- package/tools/__pycache__/regex_extractor.cpython-313.pyc +0 -0
- package/tools/__pycache__/slug_generator.cpython-313.pyc +0 -0
- package/tools/__pycache__/sorter_tools.cpython-313.pyc +0 -0
- package/tools/__pycache__/string_escape_tool.cpython-313.pyc +0 -0
- package/tools/__pycache__/text_statistics_tool.cpython-313.pyc +0 -0
- package/tools/__pycache__/text_wrapper.cpython-313.pyc +0 -0
- package/tools/__pycache__/timestamp_converter.cpython-313.pyc +0 -0
- package/tools/__pycache__/tool_loader.cpython-313.pyc +0 -0
- package/tools/__pycache__/translator_tools.cpython-313.pyc +0 -0
- package/tools/__pycache__/url_link_extractor.cpython-313.pyc +0 -0
- package/tools/__pycache__/url_parser.cpython-313.pyc +0 -0
- package/tools/__pycache__/whitespace_tools.cpython-313.pyc +0 -0
- package/tools/__pycache__/word_frequency_counter.cpython-313.pyc +0 -0
- package/tools/ai_tools.py +2892 -0
- package/tools/ascii_art_generator.py +353 -0
- package/tools/base64_tools.py +184 -0
- package/tools/base_tool.py +511 -0
- package/tools/case_tool.py +309 -0
- package/tools/column_tools.py +396 -0
- package/tools/cron_tool.py +885 -0
- package/tools/curl_history.py +601 -0
- package/tools/curl_processor.py +1208 -0
- package/tools/curl_settings.py +503 -0
- package/tools/curl_tool.py +5467 -0
- package/tools/diff_viewer.py +1072 -0
- package/tools/email_extraction_tool.py +249 -0
- package/tools/email_header_analyzer.py +426 -0
- package/tools/extraction_tools.py +250 -0
- package/tools/find_replace.py +1751 -0
- package/tools/folder_file_reporter.py +1463 -0
- package/tools/folder_file_reporter_adapter.py +480 -0
- package/tools/generator_tools.py +1217 -0
- package/tools/hash_generator.py +256 -0
- package/tools/html_tool.py +657 -0
- package/tools/huggingface_helper.py +449 -0
- package/tools/jsonxml_tool.py +730 -0
- package/tools/line_tools.py +419 -0
- package/tools/list_comparator.py +720 -0
- package/tools/markdown_tools.py +562 -0
- package/tools/mcp_widget.py +1417 -0
- package/tools/notes_widget.py +973 -0
- package/tools/number_base_converter.py +373 -0
- package/tools/regex_extractor.py +572 -0
- package/tools/slug_generator.py +311 -0
- package/tools/sorter_tools.py +459 -0
- package/tools/string_escape_tool.py +393 -0
- package/tools/text_statistics_tool.py +366 -0
- package/tools/text_wrapper.py +431 -0
- package/tools/timestamp_converter.py +422 -0
- package/tools/tool_loader.py +710 -0
- package/tools/translator_tools.py +523 -0
- package/tools/url_link_extractor.py +262 -0
- package/tools/url_parser.py +205 -0
- package/tools/whitespace_tools.py +356 -0
- package/tools/word_frequency_counter.py +147 -0
|
@@ -0,0 +1,262 @@
|
|
|
1
|
+
"""
|
|
2
|
+
URL and Link Extractor Module - URL extraction utility
|
|
3
|
+
|
|
4
|
+
This module provides comprehensive URL and link extraction functionality with UI components
|
|
5
|
+
for the Promera AI Commander application.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
import tkinter as tk
|
|
9
|
+
from tkinter import ttk
|
|
10
|
+
import re
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
class URLLinkExtractorProcessor:
|
|
14
|
+
"""URL and link extractor processor with multiple extraction modes and filtering."""
|
|
15
|
+
|
|
16
|
+
@staticmethod
|
|
17
|
+
def extract_urls(text, extract_href=False, extract_https=False, extract_any_protocol=False, extract_markdown=False, filter_text=""):
|
|
18
|
+
"""Extracts URLs and links from text based on selected options."""
|
|
19
|
+
urls = set()
|
|
20
|
+
|
|
21
|
+
# Extract from HTML href attributes
|
|
22
|
+
if extract_href:
|
|
23
|
+
href_pattern = r'href=["\']([^"\']+)["\']'
|
|
24
|
+
urls.update(re.findall(href_pattern, text))
|
|
25
|
+
|
|
26
|
+
# Extract http(s):// URLs
|
|
27
|
+
if extract_https:
|
|
28
|
+
https_pattern = r'https?://[^\s<>"{}|\\^`\[\]]+'
|
|
29
|
+
urls.update(re.findall(https_pattern, text))
|
|
30
|
+
|
|
31
|
+
# Extract any protocol:// URLs
|
|
32
|
+
if extract_any_protocol:
|
|
33
|
+
protocol_pattern = r'\b[a-zA-Z][a-zA-Z0-9+.-]*://[^\s<>"{}|\\^`\[\]]+'
|
|
34
|
+
urls.update(re.findall(protocol_pattern, text))
|
|
35
|
+
|
|
36
|
+
# Extract markdown links [text](url)
|
|
37
|
+
if extract_markdown:
|
|
38
|
+
markdown_pattern = r'\[([^\]]+)\]\(([^)]+)\)'
|
|
39
|
+
markdown_urls = re.findall(markdown_pattern, text)
|
|
40
|
+
urls.update([url for _, url in markdown_urls])
|
|
41
|
+
|
|
42
|
+
# If no options selected, extract all
|
|
43
|
+
if not any([extract_href, extract_https, extract_any_protocol, extract_markdown]):
|
|
44
|
+
# Extract all types
|
|
45
|
+
href_pattern = r'href=["\']([^"\']+)["\']'
|
|
46
|
+
urls.update(re.findall(href_pattern, text))
|
|
47
|
+
|
|
48
|
+
protocol_pattern = r'\b[a-zA-Z][a-zA-Z0-9+.-]*://[^\s<>"{}|\\^`\[\]]+'
|
|
49
|
+
urls.update(re.findall(protocol_pattern, text))
|
|
50
|
+
|
|
51
|
+
markdown_pattern = r'\[([^\]]+)\]\(([^)]+)\)'
|
|
52
|
+
markdown_urls = re.findall(markdown_pattern, text)
|
|
53
|
+
urls.update([url for _, url in markdown_urls])
|
|
54
|
+
|
|
55
|
+
# Apply filter if provided
|
|
56
|
+
if filter_text.strip():
|
|
57
|
+
filter_lower = filter_text.lower()
|
|
58
|
+
urls = {url for url in urls if filter_lower in url.lower()}
|
|
59
|
+
|
|
60
|
+
return '\n'.join(sorted(urls)) if urls else "No URLs found."
|
|
61
|
+
|
|
62
|
+
@staticmethod
|
|
63
|
+
def process_text(input_text, settings):
|
|
64
|
+
"""Process text using the current settings."""
|
|
65
|
+
return URLLinkExtractorProcessor.extract_urls(
|
|
66
|
+
input_text,
|
|
67
|
+
settings.get("extract_href", False),
|
|
68
|
+
settings.get("extract_https", False),
|
|
69
|
+
settings.get("extract_any_protocol", False),
|
|
70
|
+
settings.get("extract_markdown", False),
|
|
71
|
+
settings.get("filter_text", "")
|
|
72
|
+
)
|
|
73
|
+
|
|
74
|
+
|
|
75
|
+
class URLLinkExtractorUI:
|
|
76
|
+
"""UI components for the URL and Link Extractor."""
|
|
77
|
+
|
|
78
|
+
def __init__(self, parent, settings, on_setting_change_callback=None, apply_tool_callback=None):
|
|
79
|
+
"""
|
|
80
|
+
Initialize the URL and Link Extractor UI.
|
|
81
|
+
|
|
82
|
+
Args:
|
|
83
|
+
parent: Parent widget
|
|
84
|
+
settings: Dictionary containing tool settings
|
|
85
|
+
on_setting_change_callback: Callback function for setting changes
|
|
86
|
+
apply_tool_callback: Callback function for applying the tool
|
|
87
|
+
"""
|
|
88
|
+
self.parent = parent
|
|
89
|
+
self.settings = settings
|
|
90
|
+
self.on_setting_change_callback = on_setting_change_callback
|
|
91
|
+
self.apply_tool_callback = apply_tool_callback
|
|
92
|
+
|
|
93
|
+
# Initialize UI variables
|
|
94
|
+
self.url_extract_href_var = tk.BooleanVar(value=settings.get("extract_href", False))
|
|
95
|
+
self.url_extract_https_var = tk.BooleanVar(value=settings.get("extract_https", False))
|
|
96
|
+
self.url_extract_any_protocol_var = tk.BooleanVar(value=settings.get("extract_any_protocol", False))
|
|
97
|
+
self.url_extract_markdown_var = tk.BooleanVar(value=settings.get("extract_markdown", False))
|
|
98
|
+
self.url_filter_var = tk.StringVar(value=settings.get("filter_text", ""))
|
|
99
|
+
|
|
100
|
+
self.create_widgets()
|
|
101
|
+
|
|
102
|
+
def create_widgets(self):
|
|
103
|
+
"""Creates the UI widgets for the URL and Link Extractor."""
|
|
104
|
+
# Checkboxes for different extraction modes
|
|
105
|
+
ttk.Checkbutton(
|
|
106
|
+
self.parent,
|
|
107
|
+
text='href=""',
|
|
108
|
+
variable=self.url_extract_href_var,
|
|
109
|
+
command=self._on_setting_change
|
|
110
|
+
).pack(side=tk.LEFT, padx=5)
|
|
111
|
+
|
|
112
|
+
ttk.Checkbutton(
|
|
113
|
+
self.parent,
|
|
114
|
+
text="http(s)://",
|
|
115
|
+
variable=self.url_extract_https_var,
|
|
116
|
+
command=self._on_setting_change
|
|
117
|
+
).pack(side=tk.LEFT, padx=5)
|
|
118
|
+
|
|
119
|
+
ttk.Checkbutton(
|
|
120
|
+
self.parent,
|
|
121
|
+
text="any protocol ://",
|
|
122
|
+
variable=self.url_extract_any_protocol_var,
|
|
123
|
+
command=self._on_setting_change
|
|
124
|
+
).pack(side=tk.LEFT, padx=5)
|
|
125
|
+
|
|
126
|
+
ttk.Checkbutton(
|
|
127
|
+
self.parent,
|
|
128
|
+
text="markdown []()",
|
|
129
|
+
variable=self.url_extract_markdown_var,
|
|
130
|
+
command=self._on_setting_change
|
|
131
|
+
).pack(side=tk.LEFT, padx=5)
|
|
132
|
+
|
|
133
|
+
# Filter field
|
|
134
|
+
ttk.Label(self.parent, text="Filter:").pack(side=tk.LEFT, padx=(10, 2))
|
|
135
|
+
filter_entry = ttk.Entry(self.parent, textvariable=self.url_filter_var, width=15)
|
|
136
|
+
filter_entry.pack(side=tk.LEFT, padx=2)
|
|
137
|
+
self.url_filter_var.trace_add("write", self._on_filter_change)
|
|
138
|
+
|
|
139
|
+
# Extract button
|
|
140
|
+
if self.apply_tool_callback:
|
|
141
|
+
ttk.Button(
|
|
142
|
+
self.parent,
|
|
143
|
+
text="Extract",
|
|
144
|
+
command=self.apply_tool_callback
|
|
145
|
+
).pack(side=tk.LEFT, padx=10)
|
|
146
|
+
|
|
147
|
+
def _on_setting_change(self):
|
|
148
|
+
"""Handle setting changes."""
|
|
149
|
+
if self.on_setting_change_callback:
|
|
150
|
+
self.on_setting_change_callback()
|
|
151
|
+
|
|
152
|
+
def _on_filter_change(self, *args):
|
|
153
|
+
"""Handle filter text changes."""
|
|
154
|
+
if self.on_setting_change_callback:
|
|
155
|
+
self.on_setting_change_callback()
|
|
156
|
+
|
|
157
|
+
def get_current_settings(self):
|
|
158
|
+
"""Get the current settings from the UI."""
|
|
159
|
+
return {
|
|
160
|
+
"extract_href": self.url_extract_href_var.get(),
|
|
161
|
+
"extract_https": self.url_extract_https_var.get(),
|
|
162
|
+
"extract_any_protocol": self.url_extract_any_protocol_var.get(),
|
|
163
|
+
"extract_markdown": self.url_extract_markdown_var.get(),
|
|
164
|
+
"filter_text": self.url_filter_var.get()
|
|
165
|
+
}
|
|
166
|
+
|
|
167
|
+
def update_settings(self, settings):
|
|
168
|
+
"""Update the UI with new settings."""
|
|
169
|
+
self.url_extract_href_var.set(settings.get("extract_href", False))
|
|
170
|
+
self.url_extract_https_var.set(settings.get("extract_https", False))
|
|
171
|
+
self.url_extract_any_protocol_var.set(settings.get("extract_any_protocol", False))
|
|
172
|
+
self.url_extract_markdown_var.set(settings.get("extract_markdown", False))
|
|
173
|
+
self.url_filter_var.set(settings.get("filter_text", ""))
|
|
174
|
+
|
|
175
|
+
|
|
176
|
+
class URLLinkExtractor:
|
|
177
|
+
"""Main URL and Link Extractor class that combines processor and UI functionality."""
|
|
178
|
+
|
|
179
|
+
def __init__(self):
|
|
180
|
+
self.processor = URLLinkExtractorProcessor()
|
|
181
|
+
self.ui = None
|
|
182
|
+
|
|
183
|
+
def create_ui(self, parent, settings, on_setting_change_callback=None, apply_tool_callback=None):
|
|
184
|
+
"""Create and return the UI component."""
|
|
185
|
+
self.ui = URLLinkExtractorUI(parent, settings, on_setting_change_callback, apply_tool_callback)
|
|
186
|
+
return self.ui
|
|
187
|
+
|
|
188
|
+
def process_text(self, input_text, settings):
|
|
189
|
+
"""Process text using the current settings."""
|
|
190
|
+
return self.processor.process_text(input_text, settings)
|
|
191
|
+
|
|
192
|
+
def get_default_settings(self):
|
|
193
|
+
"""Get default settings for the URL and Link Extractor."""
|
|
194
|
+
return {
|
|
195
|
+
"extract_href": False,
|
|
196
|
+
"extract_https": False,
|
|
197
|
+
"extract_any_protocol": False,
|
|
198
|
+
"extract_markdown": False,
|
|
199
|
+
"filter_text": ""
|
|
200
|
+
}
|
|
201
|
+
|
|
202
|
+
|
|
203
|
+
# Convenience functions for backward compatibility
|
|
204
|
+
def extract_urls(text, extract_href=False, extract_https=False, extract_any_protocol=False, extract_markdown=False, filter_text=""):
|
|
205
|
+
"""Extract URLs with specified options."""
|
|
206
|
+
return URLLinkExtractorProcessor.extract_urls(
|
|
207
|
+
text, extract_href, extract_https, extract_any_protocol, extract_markdown, filter_text
|
|
208
|
+
)
|
|
209
|
+
|
|
210
|
+
|
|
211
|
+
def process_url_extraction(input_text, settings):
|
|
212
|
+
"""Process URL extraction with the specified settings."""
|
|
213
|
+
return URLLinkExtractorProcessor.process_text(input_text, settings)
|
|
214
|
+
|
|
215
|
+
|
|
216
|
+
# BaseTool-compatible wrapper
|
|
217
|
+
try:
|
|
218
|
+
from tools.base_tool import BaseTool
|
|
219
|
+
from typing import Dict, Any
|
|
220
|
+
import tkinter as tk
|
|
221
|
+
from tkinter import ttk
|
|
222
|
+
|
|
223
|
+
class URLLinkExtractorV2(BaseTool):
|
|
224
|
+
"""
|
|
225
|
+
BaseTool-compatible version of URLLinkExtractor.
|
|
226
|
+
"""
|
|
227
|
+
|
|
228
|
+
TOOL_NAME = "URL and Link Extractor"
|
|
229
|
+
TOOL_DESCRIPTION = "Extract URLs and links from text"
|
|
230
|
+
TOOL_VERSION = "2.0.0"
|
|
231
|
+
|
|
232
|
+
def process_text(self, input_text: str, settings: Dict[str, Any]) -> str:
|
|
233
|
+
"""Extract URLs from text."""
|
|
234
|
+
return URLLinkExtractorProcessor.extract_urls(
|
|
235
|
+
input_text,
|
|
236
|
+
settings.get("extract_href", False),
|
|
237
|
+
settings.get("extract_https", True),
|
|
238
|
+
settings.get("extract_any_protocol", False),
|
|
239
|
+
settings.get("extract_markdown", False),
|
|
240
|
+
settings.get("filter_text", "")
|
|
241
|
+
)
|
|
242
|
+
|
|
243
|
+
def get_default_settings(self) -> Dict[str, Any]:
|
|
244
|
+
return {
|
|
245
|
+
"extract_href": False,
|
|
246
|
+
"extract_https": True,
|
|
247
|
+
"extract_any_protocol": False,
|
|
248
|
+
"extract_markdown": False,
|
|
249
|
+
"filter_text": ""
|
|
250
|
+
}
|
|
251
|
+
|
|
252
|
+
def create_ui(self, parent: tk.Widget, settings: Dict[str, Any],
|
|
253
|
+
on_change=None, on_apply=None) -> tk.Widget:
|
|
254
|
+
"""Create UI for URL Link Extractor."""
|
|
255
|
+
frame = ttk.Frame(parent)
|
|
256
|
+
ttk.Label(frame, text="Extract URLs and links").pack(side=tk.LEFT, padx=5)
|
|
257
|
+
if on_apply:
|
|
258
|
+
ttk.Button(frame, text="Extract", command=on_apply).pack(side=tk.LEFT, padx=5)
|
|
259
|
+
return frame
|
|
260
|
+
|
|
261
|
+
except ImportError:
|
|
262
|
+
pass
|
|
@@ -0,0 +1,205 @@
|
|
|
1
|
+
"""
|
|
2
|
+
URL Parser Module - URL parsing and analysis utility
|
|
3
|
+
|
|
4
|
+
This module provides comprehensive URL parsing functionality with UI components
|
|
5
|
+
for the Promera AI Commander application.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
import tkinter as tk
|
|
9
|
+
from tkinter import ttk
|
|
10
|
+
import urllib.parse
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
class URLParserProcessor:
|
|
14
|
+
"""URL parser processor with detailed URL component analysis."""
|
|
15
|
+
|
|
16
|
+
@staticmethod
|
|
17
|
+
def parse_url(text, ascii_decode=True):
|
|
18
|
+
"""Parses a URL into its components."""
|
|
19
|
+
if not text.strip():
|
|
20
|
+
return "Please enter a URL to parse."
|
|
21
|
+
|
|
22
|
+
try:
|
|
23
|
+
parsed_url = urllib.parse.urlparse(text)
|
|
24
|
+
output = []
|
|
25
|
+
|
|
26
|
+
# Protocol/Scheme
|
|
27
|
+
if parsed_url.scheme:
|
|
28
|
+
output.append(f"protocol: {parsed_url.scheme}")
|
|
29
|
+
|
|
30
|
+
# Host and domain analysis
|
|
31
|
+
if parsed_url.netloc:
|
|
32
|
+
output.append(f"host: {parsed_url.netloc}")
|
|
33
|
+
|
|
34
|
+
if parsed_url.hostname:
|
|
35
|
+
parts = parsed_url.hostname.split('.')
|
|
36
|
+
if len(parts) > 1:
|
|
37
|
+
domain = f"{parts[-2]}.{parts[-1]}"
|
|
38
|
+
output.append(f"domain: {domain}")
|
|
39
|
+
|
|
40
|
+
if len(parts) > 2:
|
|
41
|
+
output.append(f"subdomain: {'.'.join(parts[:-2])}")
|
|
42
|
+
|
|
43
|
+
output.append(f"tld: {parts[-1]}")
|
|
44
|
+
|
|
45
|
+
# Path
|
|
46
|
+
if parsed_url.path:
|
|
47
|
+
output.append(f"Path: {parsed_url.path}")
|
|
48
|
+
|
|
49
|
+
# Query string analysis
|
|
50
|
+
if parsed_url.query:
|
|
51
|
+
output.append("\nQuery String:")
|
|
52
|
+
|
|
53
|
+
if ascii_decode:
|
|
54
|
+
query_params = urllib.parse.parse_qs(parsed_url.query, keep_blank_values=True)
|
|
55
|
+
for key, values in query_params.items():
|
|
56
|
+
output.append(f"{key}= {', '.join(values)}")
|
|
57
|
+
else:
|
|
58
|
+
for pair in parsed_url.query.split('&'):
|
|
59
|
+
output.append(pair.replace('=', '= ', 1) if '=' in pair else pair)
|
|
60
|
+
|
|
61
|
+
# Fragment/Hash
|
|
62
|
+
if parsed_url.fragment:
|
|
63
|
+
output.append(f"\nHash/Fragment: {parsed_url.fragment}")
|
|
64
|
+
|
|
65
|
+
return '\n'.join(output)
|
|
66
|
+
|
|
67
|
+
except Exception as e:
|
|
68
|
+
return f"Error parsing URL: {e}"
|
|
69
|
+
|
|
70
|
+
@staticmethod
|
|
71
|
+
def process_text(input_text, settings):
|
|
72
|
+
"""Process text using the current settings."""
|
|
73
|
+
ascii_decode = settings.get("ascii_decode", True)
|
|
74
|
+
return URLParserProcessor.parse_url(input_text, ascii_decode)
|
|
75
|
+
|
|
76
|
+
|
|
77
|
+
class URLParserUI:
|
|
78
|
+
"""UI components for the URL Parser."""
|
|
79
|
+
|
|
80
|
+
def __init__(self, parent, settings, on_setting_change_callback=None, apply_tool_callback=None):
|
|
81
|
+
"""
|
|
82
|
+
Initialize the URL Parser UI.
|
|
83
|
+
|
|
84
|
+
Args:
|
|
85
|
+
parent: Parent widget
|
|
86
|
+
settings: Dictionary containing tool settings
|
|
87
|
+
on_setting_change_callback: Callback function for setting changes
|
|
88
|
+
apply_tool_callback: Callback function for applying the tool
|
|
89
|
+
"""
|
|
90
|
+
self.parent = parent
|
|
91
|
+
self.settings = settings
|
|
92
|
+
self.on_setting_change_callback = on_setting_change_callback
|
|
93
|
+
self.apply_tool_callback = apply_tool_callback
|
|
94
|
+
|
|
95
|
+
# Initialize UI variables
|
|
96
|
+
self.url_parser_decode_var = tk.BooleanVar(value=settings.get("ascii_decode", True))
|
|
97
|
+
|
|
98
|
+
self.create_widgets()
|
|
99
|
+
|
|
100
|
+
def create_widgets(self):
|
|
101
|
+
"""Creates the UI widgets for the URL Parser."""
|
|
102
|
+
# ASCII Decoding checkbox
|
|
103
|
+
chk = ttk.Checkbutton(
|
|
104
|
+
self.parent,
|
|
105
|
+
text="ASCII Decoding",
|
|
106
|
+
variable=self.url_parser_decode_var,
|
|
107
|
+
command=self._on_setting_change
|
|
108
|
+
)
|
|
109
|
+
chk.pack(side=tk.LEFT, padx=5)
|
|
110
|
+
|
|
111
|
+
# Parse button
|
|
112
|
+
if self.apply_tool_callback:
|
|
113
|
+
ttk.Button(
|
|
114
|
+
self.parent,
|
|
115
|
+
text="Parse",
|
|
116
|
+
command=self.apply_tool_callback
|
|
117
|
+
).pack(side=tk.LEFT, padx=10)
|
|
118
|
+
|
|
119
|
+
def _on_setting_change(self):
|
|
120
|
+
"""Handle setting changes."""
|
|
121
|
+
if self.on_setting_change_callback:
|
|
122
|
+
self.on_setting_change_callback()
|
|
123
|
+
|
|
124
|
+
def get_current_settings(self):
|
|
125
|
+
"""Get the current settings from the UI."""
|
|
126
|
+
return {
|
|
127
|
+
"ascii_decode": self.url_parser_decode_var.get()
|
|
128
|
+
}
|
|
129
|
+
|
|
130
|
+
def update_settings(self, settings):
|
|
131
|
+
"""Update the UI with new settings."""
|
|
132
|
+
self.url_parser_decode_var.set(settings.get("ascii_decode", True))
|
|
133
|
+
|
|
134
|
+
|
|
135
|
+
class URLParser:
|
|
136
|
+
"""Main URL Parser class that combines processor and UI functionality."""
|
|
137
|
+
|
|
138
|
+
def __init__(self):
|
|
139
|
+
self.processor = URLParserProcessor()
|
|
140
|
+
self.ui = None
|
|
141
|
+
|
|
142
|
+
def create_ui(self, parent, settings, on_setting_change_callback=None, apply_tool_callback=None):
|
|
143
|
+
"""Create and return the UI component."""
|
|
144
|
+
self.ui = URLParserUI(parent, settings, on_setting_change_callback, apply_tool_callback)
|
|
145
|
+
return self.ui
|
|
146
|
+
|
|
147
|
+
def process_text(self, input_text, settings):
|
|
148
|
+
"""Process text using the current settings."""
|
|
149
|
+
return self.processor.process_text(input_text, settings)
|
|
150
|
+
|
|
151
|
+
def get_default_settings(self):
|
|
152
|
+
"""Get default settings for the URL Parser."""
|
|
153
|
+
return {
|
|
154
|
+
"ascii_decode": True
|
|
155
|
+
}
|
|
156
|
+
|
|
157
|
+
|
|
158
|
+
# Convenience functions for backward compatibility
|
|
159
|
+
def parse_url(text, ascii_decode=True):
|
|
160
|
+
"""Parse URL with specified options."""
|
|
161
|
+
return URLParserProcessor.parse_url(text, ascii_decode)
|
|
162
|
+
|
|
163
|
+
|
|
164
|
+
def process_url_parsing(input_text, settings):
|
|
165
|
+
"""Process URL parsing with the specified settings."""
|
|
166
|
+
return URLParserProcessor.process_text(input_text, settings)
|
|
167
|
+
|
|
168
|
+
|
|
169
|
+
# BaseTool-compatible wrapper
|
|
170
|
+
try:
|
|
171
|
+
from tools.base_tool import BaseTool
|
|
172
|
+
from typing import Dict, Any
|
|
173
|
+
import tkinter as tk
|
|
174
|
+
from tkinter import ttk
|
|
175
|
+
|
|
176
|
+
class URLParserV2(BaseTool):
|
|
177
|
+
"""
|
|
178
|
+
BaseTool-compatible version of URLParser.
|
|
179
|
+
"""
|
|
180
|
+
|
|
181
|
+
TOOL_NAME = "URL Parser"
|
|
182
|
+
TOOL_DESCRIPTION = "Parse URL into components (scheme, host, path, query)"
|
|
183
|
+
TOOL_VERSION = "2.0.0"
|
|
184
|
+
|
|
185
|
+
def process_text(self, input_text: str, settings: Dict[str, Any]) -> str:
|
|
186
|
+
"""Parse URL and return components."""
|
|
187
|
+
return URLParserProcessor.parse_url(
|
|
188
|
+
input_text,
|
|
189
|
+
settings.get("ascii_decode", True)
|
|
190
|
+
)
|
|
191
|
+
|
|
192
|
+
def get_default_settings(self) -> Dict[str, Any]:
|
|
193
|
+
return {"ascii_decode": True}
|
|
194
|
+
|
|
195
|
+
def create_ui(self, parent: tk.Widget, settings: Dict[str, Any],
|
|
196
|
+
on_change=None, on_apply=None) -> tk.Widget:
|
|
197
|
+
"""Create a simple UI for URL Parser."""
|
|
198
|
+
frame = ttk.Frame(parent)
|
|
199
|
+
ttk.Label(frame, text="Parse URL components").pack(side=tk.LEFT, padx=5)
|
|
200
|
+
if on_apply:
|
|
201
|
+
ttk.Button(frame, text="Parse", command=on_apply).pack(side=tk.LEFT, padx=5)
|
|
202
|
+
return frame
|
|
203
|
+
|
|
204
|
+
except ImportError:
|
|
205
|
+
pass
|