pomera-ai-commander 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +680 -0
- package/bin/pomera-ai-commander.js +62 -0
- package/core/__init__.py +66 -0
- package/core/__pycache__/__init__.cpython-313.pyc +0 -0
- package/core/__pycache__/app_context.cpython-313.pyc +0 -0
- package/core/__pycache__/async_text_processor.cpython-313.pyc +0 -0
- package/core/__pycache__/backup_manager.cpython-313.pyc +0 -0
- package/core/__pycache__/backup_recovery_manager.cpython-313.pyc +0 -0
- package/core/__pycache__/content_hash_cache.cpython-313.pyc +0 -0
- package/core/__pycache__/context_menu.cpython-313.pyc +0 -0
- package/core/__pycache__/data_validator.cpython-313.pyc +0 -0
- package/core/__pycache__/database_connection_manager.cpython-313.pyc +0 -0
- package/core/__pycache__/database_curl_settings_manager.cpython-313.pyc +0 -0
- package/core/__pycache__/database_promera_ai_settings_manager.cpython-313.pyc +0 -0
- package/core/__pycache__/database_schema.cpython-313.pyc +0 -0
- package/core/__pycache__/database_schema_manager.cpython-313.pyc +0 -0
- package/core/__pycache__/database_settings_manager.cpython-313.pyc +0 -0
- package/core/__pycache__/database_settings_manager_interface.cpython-313.pyc +0 -0
- package/core/__pycache__/dialog_manager.cpython-313.pyc +0 -0
- package/core/__pycache__/efficient_line_numbers.cpython-313.pyc +0 -0
- package/core/__pycache__/error_handler.cpython-313.pyc +0 -0
- package/core/__pycache__/error_service.cpython-313.pyc +0 -0
- package/core/__pycache__/event_consolidator.cpython-313.pyc +0 -0
- package/core/__pycache__/memory_efficient_text_widget.cpython-313.pyc +0 -0
- package/core/__pycache__/migration_manager.cpython-313.pyc +0 -0
- package/core/__pycache__/migration_test_suite.cpython-313.pyc +0 -0
- package/core/__pycache__/migration_validator.cpython-313.pyc +0 -0
- package/core/__pycache__/optimized_find_replace.cpython-313.pyc +0 -0
- package/core/__pycache__/optimized_pattern_engine.cpython-313.pyc +0 -0
- package/core/__pycache__/optimized_search_highlighter.cpython-313.pyc +0 -0
- package/core/__pycache__/performance_monitor.cpython-313.pyc +0 -0
- package/core/__pycache__/persistence_manager.cpython-313.pyc +0 -0
- package/core/__pycache__/progressive_stats_calculator.cpython-313.pyc +0 -0
- package/core/__pycache__/regex_pattern_cache.cpython-313.pyc +0 -0
- package/core/__pycache__/regex_pattern_library.cpython-313.pyc +0 -0
- package/core/__pycache__/search_operation_manager.cpython-313.pyc +0 -0
- package/core/__pycache__/settings_defaults_registry.cpython-313.pyc +0 -0
- package/core/__pycache__/settings_integrity_validator.cpython-313.pyc +0 -0
- package/core/__pycache__/settings_serializer.cpython-313.pyc +0 -0
- package/core/__pycache__/settings_validator.cpython-313.pyc +0 -0
- package/core/__pycache__/smart_stats_calculator.cpython-313.pyc +0 -0
- package/core/__pycache__/statistics_update_manager.cpython-313.pyc +0 -0
- package/core/__pycache__/stats_config_manager.cpython-313.pyc +0 -0
- package/core/__pycache__/streaming_text_handler.cpython-313.pyc +0 -0
- package/core/__pycache__/task_scheduler.cpython-313.pyc +0 -0
- package/core/__pycache__/visibility_monitor.cpython-313.pyc +0 -0
- package/core/__pycache__/widget_cache.cpython-313.pyc +0 -0
- package/core/app_context.py +482 -0
- package/core/async_text_processor.py +422 -0
- package/core/backup_manager.py +656 -0
- package/core/backup_recovery_manager.py +1034 -0
- package/core/content_hash_cache.py +509 -0
- package/core/context_menu.py +313 -0
- package/core/data_validator.py +1067 -0
- package/core/database_connection_manager.py +745 -0
- package/core/database_curl_settings_manager.py +609 -0
- package/core/database_promera_ai_settings_manager.py +447 -0
- package/core/database_schema.py +412 -0
- package/core/database_schema_manager.py +396 -0
- package/core/database_settings_manager.py +1508 -0
- package/core/database_settings_manager_interface.py +457 -0
- package/core/dialog_manager.py +735 -0
- package/core/efficient_line_numbers.py +511 -0
- package/core/error_handler.py +747 -0
- package/core/error_service.py +431 -0
- package/core/event_consolidator.py +512 -0
- package/core/mcp/__init__.py +43 -0
- package/core/mcp/__pycache__/__init__.cpython-313.pyc +0 -0
- package/core/mcp/__pycache__/protocol.cpython-313.pyc +0 -0
- package/core/mcp/__pycache__/schema.cpython-313.pyc +0 -0
- package/core/mcp/__pycache__/server_stdio.cpython-313.pyc +0 -0
- package/core/mcp/__pycache__/tool_registry.cpython-313.pyc +0 -0
- package/core/mcp/protocol.py +288 -0
- package/core/mcp/schema.py +251 -0
- package/core/mcp/server_stdio.py +299 -0
- package/core/mcp/tool_registry.py +2345 -0
- package/core/memory_efficient_text_widget.py +712 -0
- package/core/migration_manager.py +915 -0
- package/core/migration_test_suite.py +1086 -0
- package/core/migration_validator.py +1144 -0
- package/core/optimized_find_replace.py +715 -0
- package/core/optimized_pattern_engine.py +424 -0
- package/core/optimized_search_highlighter.py +553 -0
- package/core/performance_monitor.py +675 -0
- package/core/persistence_manager.py +713 -0
- package/core/progressive_stats_calculator.py +632 -0
- package/core/regex_pattern_cache.py +530 -0
- package/core/regex_pattern_library.py +351 -0
- package/core/search_operation_manager.py +435 -0
- package/core/settings_defaults_registry.py +1087 -0
- package/core/settings_integrity_validator.py +1112 -0
- package/core/settings_serializer.py +558 -0
- package/core/settings_validator.py +1824 -0
- package/core/smart_stats_calculator.py +710 -0
- package/core/statistics_update_manager.py +619 -0
- package/core/stats_config_manager.py +858 -0
- package/core/streaming_text_handler.py +723 -0
- package/core/task_scheduler.py +596 -0
- package/core/update_pattern_library.py +169 -0
- package/core/visibility_monitor.py +596 -0
- package/core/widget_cache.py +498 -0
- package/mcp.json +61 -0
- package/package.json +57 -0
- package/pomera.py +7483 -0
- package/pomera_mcp_server.py +144 -0
- package/tools/__init__.py +5 -0
- package/tools/__pycache__/__init__.cpython-313.pyc +0 -0
- package/tools/__pycache__/ai_tools.cpython-313.pyc +0 -0
- package/tools/__pycache__/ascii_art_generator.cpython-313.pyc +0 -0
- package/tools/__pycache__/base64_tools.cpython-313.pyc +0 -0
- package/tools/__pycache__/base_tool.cpython-313.pyc +0 -0
- package/tools/__pycache__/case_tool.cpython-313.pyc +0 -0
- package/tools/__pycache__/column_tools.cpython-313.pyc +0 -0
- package/tools/__pycache__/cron_tool.cpython-313.pyc +0 -0
- package/tools/__pycache__/curl_history.cpython-313.pyc +0 -0
- package/tools/__pycache__/curl_processor.cpython-313.pyc +0 -0
- package/tools/__pycache__/curl_settings.cpython-313.pyc +0 -0
- package/tools/__pycache__/curl_tool.cpython-313.pyc +0 -0
- package/tools/__pycache__/diff_viewer.cpython-313.pyc +0 -0
- package/tools/__pycache__/email_extraction_tool.cpython-313.pyc +0 -0
- package/tools/__pycache__/email_header_analyzer.cpython-313.pyc +0 -0
- package/tools/__pycache__/extraction_tools.cpython-313.pyc +0 -0
- package/tools/__pycache__/find_replace.cpython-313.pyc +0 -0
- package/tools/__pycache__/folder_file_reporter.cpython-313.pyc +0 -0
- package/tools/__pycache__/folder_file_reporter_adapter.cpython-313.pyc +0 -0
- package/tools/__pycache__/generator_tools.cpython-313.pyc +0 -0
- package/tools/__pycache__/hash_generator.cpython-313.pyc +0 -0
- package/tools/__pycache__/html_tool.cpython-313.pyc +0 -0
- package/tools/__pycache__/huggingface_helper.cpython-313.pyc +0 -0
- package/tools/__pycache__/jsonxml_tool.cpython-313.pyc +0 -0
- package/tools/__pycache__/line_tools.cpython-313.pyc +0 -0
- package/tools/__pycache__/list_comparator.cpython-313.pyc +0 -0
- package/tools/__pycache__/markdown_tools.cpython-313.pyc +0 -0
- package/tools/__pycache__/mcp_widget.cpython-313.pyc +0 -0
- package/tools/__pycache__/notes_widget.cpython-313.pyc +0 -0
- package/tools/__pycache__/number_base_converter.cpython-313.pyc +0 -0
- package/tools/__pycache__/regex_extractor.cpython-313.pyc +0 -0
- package/tools/__pycache__/slug_generator.cpython-313.pyc +0 -0
- package/tools/__pycache__/sorter_tools.cpython-313.pyc +0 -0
- package/tools/__pycache__/string_escape_tool.cpython-313.pyc +0 -0
- package/tools/__pycache__/text_statistics_tool.cpython-313.pyc +0 -0
- package/tools/__pycache__/text_wrapper.cpython-313.pyc +0 -0
- package/tools/__pycache__/timestamp_converter.cpython-313.pyc +0 -0
- package/tools/__pycache__/tool_loader.cpython-313.pyc +0 -0
- package/tools/__pycache__/translator_tools.cpython-313.pyc +0 -0
- package/tools/__pycache__/url_link_extractor.cpython-313.pyc +0 -0
- package/tools/__pycache__/url_parser.cpython-313.pyc +0 -0
- package/tools/__pycache__/whitespace_tools.cpython-313.pyc +0 -0
- package/tools/__pycache__/word_frequency_counter.cpython-313.pyc +0 -0
- package/tools/ai_tools.py +2892 -0
- package/tools/ascii_art_generator.py +353 -0
- package/tools/base64_tools.py +184 -0
- package/tools/base_tool.py +511 -0
- package/tools/case_tool.py +309 -0
- package/tools/column_tools.py +396 -0
- package/tools/cron_tool.py +885 -0
- package/tools/curl_history.py +601 -0
- package/tools/curl_processor.py +1208 -0
- package/tools/curl_settings.py +503 -0
- package/tools/curl_tool.py +5467 -0
- package/tools/diff_viewer.py +1072 -0
- package/tools/email_extraction_tool.py +249 -0
- package/tools/email_header_analyzer.py +426 -0
- package/tools/extraction_tools.py +250 -0
- package/tools/find_replace.py +1751 -0
- package/tools/folder_file_reporter.py +1463 -0
- package/tools/folder_file_reporter_adapter.py +480 -0
- package/tools/generator_tools.py +1217 -0
- package/tools/hash_generator.py +256 -0
- package/tools/html_tool.py +657 -0
- package/tools/huggingface_helper.py +449 -0
- package/tools/jsonxml_tool.py +730 -0
- package/tools/line_tools.py +419 -0
- package/tools/list_comparator.py +720 -0
- package/tools/markdown_tools.py +562 -0
- package/tools/mcp_widget.py +1417 -0
- package/tools/notes_widget.py +973 -0
- package/tools/number_base_converter.py +373 -0
- package/tools/regex_extractor.py +572 -0
- package/tools/slug_generator.py +311 -0
- package/tools/sorter_tools.py +459 -0
- package/tools/string_escape_tool.py +393 -0
- package/tools/text_statistics_tool.py +366 -0
- package/tools/text_wrapper.py +431 -0
- package/tools/timestamp_converter.py +422 -0
- package/tools/tool_loader.py +710 -0
- package/tools/translator_tools.py +523 -0
- package/tools/url_link_extractor.py +262 -0
- package/tools/url_parser.py +205 -0
- package/tools/whitespace_tools.py +356 -0
- package/tools/word_frequency_counter.py +147 -0
|
@@ -0,0 +1,351 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""
|
|
3
|
+
Regex Pattern Library Module
|
|
4
|
+
|
|
5
|
+
This module provides the 20 regex use cases extracted
|
|
6
|
+
|
|
7
|
+
Usage:
|
|
8
|
+
from regex_pattern_library import RegexPatternLibrary
|
|
9
|
+
|
|
10
|
+
library = RegexPatternLibrary()
|
|
11
|
+
patterns = library.get_all_patterns()
|
|
12
|
+
validation_patterns = library.get_patterns_by_category("validation")
|
|
13
|
+
"""
|
|
14
|
+
|
|
15
|
+
import json
|
|
16
|
+
import os
|
|
17
|
+
from typing import List, Dict, Optional
|
|
18
|
+
|
|
19
|
+
class RegexPatternLibrary:
|
|
20
|
+
"""
|
|
21
|
+
A comprehensive library of regex patterns for common text processing tasks.
|
|
22
|
+
Based on the 20 use cases from the RegexUseCases.md document.
|
|
23
|
+
"""
|
|
24
|
+
|
|
25
|
+
def __init__(self):
|
|
26
|
+
self.patterns = self._get_default_patterns()
|
|
27
|
+
|
|
28
|
+
def _get_default_patterns(self) -> List[Dict[str, str]]:
|
|
29
|
+
"""
|
|
30
|
+
Returns the complete list of 20 regex patterns organized by category.
|
|
31
|
+
"""
|
|
32
|
+
return [
|
|
33
|
+
# Data Validation Patterns (8 patterns)
|
|
34
|
+
{
|
|
35
|
+
"find": r"^[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}$",
|
|
36
|
+
"replace": "[EMAIL]",
|
|
37
|
+
"purpose": "Email Address Validation - Validates standard email format",
|
|
38
|
+
"category": "validation",
|
|
39
|
+
"example_input": "user@example.com",
|
|
40
|
+
"example_output": "[EMAIL]"
|
|
41
|
+
},
|
|
42
|
+
{
|
|
43
|
+
"find": r"^(?=.*[a-z])(?=.*[A-Z])(?=.*\d)(?=.*[@$!%*?&])[A-Za-z\d@$!%*?&]{8,}$",
|
|
44
|
+
"replace": "[STRONG_PASSWORD]",
|
|
45
|
+
"purpose": "Password Strength - Min 8 chars, uppercase, lowercase, digit, special char",
|
|
46
|
+
"category": "validation",
|
|
47
|
+
"example_input": "MyPass123!",
|
|
48
|
+
"example_output": "[STRONG_PASSWORD]"
|
|
49
|
+
},
|
|
50
|
+
{
|
|
51
|
+
"find": r"^\(?(\d{3})\)?[-.\s]?(\d{3})[-.\s]?(\d{4})$",
|
|
52
|
+
"replace": r"(\1) \2-\3",
|
|
53
|
+
"purpose": "North American Phone Number - Validates and formats 10-digit phone numbers",
|
|
54
|
+
"category": "validation",
|
|
55
|
+
"example_input": "123-456-7890",
|
|
56
|
+
"example_output": "(123) 456-7890"
|
|
57
|
+
},
|
|
58
|
+
{
|
|
59
|
+
"find": r"^https?:\/\/(?:www\.)?[-a-zA-Z0-9@:%.\_\+~#=]{1,256}\.[a-zA-Z0-9()]{1,6}\b([-a-zA-Z0-9()@:%_\+.~#?&//=]*)$",
|
|
60
|
+
"replace": "[URL]",
|
|
61
|
+
"purpose": "URL Structure Validation - Validates HTTP/HTTPS URLs",
|
|
62
|
+
"category": "validation",
|
|
63
|
+
"example_input": "https://www.example.com/path",
|
|
64
|
+
"example_output": "[URL]"
|
|
65
|
+
},
|
|
66
|
+
{
|
|
67
|
+
"find": r"^[a-zA-Z0-9]([._-](?![._-])|[a-zA-Z0-9]){3,18}[a-zA-Z0-9]$",
|
|
68
|
+
"replace": "[USERNAME]",
|
|
69
|
+
"purpose": "Username Format - 5-20 chars, alphanumeric start/end, no consecutive special chars",
|
|
70
|
+
"category": "validation",
|
|
71
|
+
"example_input": "user_name123",
|
|
72
|
+
"example_output": "[USERNAME]"
|
|
73
|
+
},
|
|
74
|
+
{
|
|
75
|
+
"find": r"^((25[0-5]|2[0-4][0-9]|1?[0-9][0-9]?)\.){3}(25[0-5]|2[0-4][0-9]|1?[0-9][0-9]?)$",
|
|
76
|
+
"replace": "[IP_ADDRESS]",
|
|
77
|
+
"purpose": "IPv4 Address Validation - Validates IP addresses (0-255 per octet)",
|
|
78
|
+
"category": "validation",
|
|
79
|
+
"example_input": "192.168.1.1",
|
|
80
|
+
"example_output": "[IP_ADDRESS]"
|
|
81
|
+
},
|
|
82
|
+
{
|
|
83
|
+
"find": r"^\d{4}-(0[1-9]|1[0-2])-(0[1-9]|[12]\d|3[01])$",
|
|
84
|
+
"replace": "[DATE]",
|
|
85
|
+
"purpose": "YYYY-MM-DD Date Format - Validates ISO date format",
|
|
86
|
+
"category": "validation",
|
|
87
|
+
"example_input": "2024-12-25",
|
|
88
|
+
"example_output": "[DATE]"
|
|
89
|
+
},
|
|
90
|
+
{
|
|
91
|
+
"find": r"^(?:4[0-9]{12}(?:[0-9]{3})?|5[1-5][0-9]{14}|3[47][0-9]{13})$",
|
|
92
|
+
"replace": "[CREDIT_CARD]",
|
|
93
|
+
"purpose": "Credit Card Number - Identifies Visa, Mastercard, American Express formats",
|
|
94
|
+
"category": "validation",
|
|
95
|
+
"example_input": "4111111111111111",
|
|
96
|
+
"example_output": "[CREDIT_CARD]"
|
|
97
|
+
},
|
|
98
|
+
|
|
99
|
+
# Information Extraction Patterns (7 patterns)
|
|
100
|
+
{
|
|
101
|
+
"find": r"[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}",
|
|
102
|
+
"replace": "[EMAIL]",
|
|
103
|
+
"purpose": "Extract All Email Addresses - Finds emails anywhere in text",
|
|
104
|
+
"category": "extraction",
|
|
105
|
+
"example_input": "Contact us at support@example.com for help",
|
|
106
|
+
"example_output": "Contact us at [EMAIL] for help"
|
|
107
|
+
},
|
|
108
|
+
{
|
|
109
|
+
"find": r"https?:\/\/[^\s/$.?#].[^\s]*",
|
|
110
|
+
"replace": "[URL]",
|
|
111
|
+
"purpose": "Extract All URLs - Finds HTTP/HTTPS URLs in text",
|
|
112
|
+
"category": "extraction",
|
|
113
|
+
"example_input": "Visit https://example.com for more info",
|
|
114
|
+
"example_output": "Visit [URL] for more info"
|
|
115
|
+
},
|
|
116
|
+
{
|
|
117
|
+
"find": r"(?<=\s|^)#(\w+)",
|
|
118
|
+
"replace": "#[HASHTAG]",
|
|
119
|
+
"purpose": "Extract Hashtags - Finds social media hashtags",
|
|
120
|
+
"category": "extraction",
|
|
121
|
+
"example_input": "Love this #python tutorial!",
|
|
122
|
+
"example_output": "Love this #[HASHTAG] tutorial!"
|
|
123
|
+
},
|
|
124
|
+
{
|
|
125
|
+
"find": r"(?<=\s|^)@(\w{1,15})\b",
|
|
126
|
+
"replace": "@[MENTION]",
|
|
127
|
+
"purpose": "Extract @Mentions - Finds social media mentions (1-15 chars)",
|
|
128
|
+
"category": "extraction",
|
|
129
|
+
"example_input": "Thanks @john for the help!",
|
|
130
|
+
"example_output": "Thanks @[MENTION] for the help!"
|
|
131
|
+
},
|
|
132
|
+
{
|
|
133
|
+
"find": r"^(?P<ip>[\d.]+) (?P<identd>\S+) (?P<user>\S+) \[(?P<timestamp>.*?)\] \"(?P<request>.*?)\" (?P<status_code>\d{3}) (?P<size>\d+|-).*$",
|
|
134
|
+
"replace": "[LOG_ENTRY]",
|
|
135
|
+
"purpose": "Log File Parsing - Parses Apache/Nginx log entries with named groups",
|
|
136
|
+
"category": "extraction",
|
|
137
|
+
"example_input": '127.0.0.1 - - [10/Oct/2023:13:55:36 +0000] "GET /index.html HTTP/1.1" 200 42',
|
|
138
|
+
"example_output": "[LOG_ENTRY]"
|
|
139
|
+
},
|
|
140
|
+
{
|
|
141
|
+
"find": r'(?:^|,)("(?:[^"]|"")*"|[^,]*)',
|
|
142
|
+
"replace": "[CSV_FIELD]",
|
|
143
|
+
"purpose": "Simple CSV Parsing - Handles quoted fields with commas",
|
|
144
|
+
"category": "extraction",
|
|
145
|
+
"example_input": 'field1,"field,2",field3',
|
|
146
|
+
"example_output": "[CSV_FIELD][CSV_FIELD][CSV_FIELD]"
|
|
147
|
+
},
|
|
148
|
+
{
|
|
149
|
+
"find": r"<h1.*?>(.*?)<\/h1>",
|
|
150
|
+
"replace": r"\1",
|
|
151
|
+
"purpose": "HTML Tag Content - Extracts content from H1 tags",
|
|
152
|
+
"category": "extraction",
|
|
153
|
+
"example_input": '<h1 class="title">Welcome</h1>',
|
|
154
|
+
"example_output": "Welcome"
|
|
155
|
+
},
|
|
156
|
+
|
|
157
|
+
# Text Cleaning Patterns (5 patterns)
|
|
158
|
+
{
|
|
159
|
+
"find": r"<[^<]+?>",
|
|
160
|
+
"replace": "",
|
|
161
|
+
"purpose": "Strip HTML Tags - Removes all HTML tags from text",
|
|
162
|
+
"category": "cleaning",
|
|
163
|
+
"example_input": "<p>This is <b>bold</b> text.</p>",
|
|
164
|
+
"example_output": "This is bold text."
|
|
165
|
+
},
|
|
166
|
+
{
|
|
167
|
+
"find": r"\b(\w+)\s+\1\b",
|
|
168
|
+
"replace": r"\1",
|
|
169
|
+
"purpose": "Remove Duplicate Words - Removes consecutive duplicate words",
|
|
170
|
+
"category": "cleaning",
|
|
171
|
+
"example_input": "This is is a test",
|
|
172
|
+
"example_output": "This is a test"
|
|
173
|
+
},
|
|
174
|
+
{
|
|
175
|
+
"find": r"^\s+|\s+$",
|
|
176
|
+
"replace": "",
|
|
177
|
+
"purpose": "Trim Whitespace - Removes leading and trailing whitespace",
|
|
178
|
+
"category": "cleaning",
|
|
179
|
+
"example_input": " text with spaces ",
|
|
180
|
+
"example_output": "text with spaces"
|
|
181
|
+
},
|
|
182
|
+
{
|
|
183
|
+
"find": r"^\(?(\d{3})\)?[-.\s]?(\d{3})[-.\s]?(\d{4})$",
|
|
184
|
+
"replace": r"\1-\2-\3",
|
|
185
|
+
"purpose": "Normalize Phone Numbers - Converts to XXX-XXX-XXXX format",
|
|
186
|
+
"category": "cleaning",
|
|
187
|
+
"example_input": "(123) 456.7890",
|
|
188
|
+
"example_output": "123-456-7890"
|
|
189
|
+
},
|
|
190
|
+
{
|
|
191
|
+
"find": r"\b(\d{4}[- ]?){3}(\d{4})\b",
|
|
192
|
+
"replace": r"XXXX-XXXX-XXXX-\2",
|
|
193
|
+
"purpose": "Mask Sensitive Data - Masks credit card numbers, shows last 4 digits",
|
|
194
|
+
"category": "cleaning",
|
|
195
|
+
"example_input": "4111-1111-1111-1111",
|
|
196
|
+
"example_output": "XXXX-XXXX-XXXX-1111"
|
|
197
|
+
}
|
|
198
|
+
]
|
|
199
|
+
|
|
200
|
+
def get_all_patterns(self) -> List[Dict[str, str]]:
|
|
201
|
+
"""Returns all patterns in the library."""
|
|
202
|
+
return self.patterns
|
|
203
|
+
|
|
204
|
+
def get_patterns_by_category(self, category: str) -> List[Dict[str, str]]:
|
|
205
|
+
"""
|
|
206
|
+
Returns patterns filtered by category.
|
|
207
|
+
|
|
208
|
+
Args:
|
|
209
|
+
category: One of 'validation', 'extraction', 'cleaning'
|
|
210
|
+
"""
|
|
211
|
+
return [p for p in self.patterns if p.get('category') == category]
|
|
212
|
+
|
|
213
|
+
def get_pattern_by_purpose(self, purpose_keyword: str) -> List[Dict[str, str]]:
|
|
214
|
+
"""
|
|
215
|
+
Returns patterns that match a purpose keyword.
|
|
216
|
+
|
|
217
|
+
Args:
|
|
218
|
+
purpose_keyword: Keyword to search for in pattern purposes
|
|
219
|
+
"""
|
|
220
|
+
return [p for p in self.patterns if purpose_keyword.lower() in p.get('purpose', '').lower()]
|
|
221
|
+
|
|
222
|
+
def get_validation_patterns(self) -> List[Dict[str, str]]:
|
|
223
|
+
"""Returns all validation patterns."""
|
|
224
|
+
return self.get_patterns_by_category('validation')
|
|
225
|
+
|
|
226
|
+
def get_extraction_patterns(self) -> List[Dict[str, str]]:
|
|
227
|
+
"""Returns all extraction patterns."""
|
|
228
|
+
return self.get_patterns_by_category('extraction')
|
|
229
|
+
|
|
230
|
+
def get_cleaning_patterns(self) -> List[Dict[str, str]]:
|
|
231
|
+
"""Returns all cleaning patterns."""
|
|
232
|
+
return self.get_patterns_by_category('cleaning')
|
|
233
|
+
|
|
234
|
+
def update_settings_file(self, settings_file: str = "settings.json") -> bool:
|
|
235
|
+
"""
|
|
236
|
+
Updates the settings.json file with the pattern library.
|
|
237
|
+
Only updates if the pattern library is empty or has fewer than 10 patterns.
|
|
238
|
+
|
|
239
|
+
Args:
|
|
240
|
+
settings_file: Path to the settings.json file
|
|
241
|
+
|
|
242
|
+
Returns:
|
|
243
|
+
bool: True if updated, False if no update was needed
|
|
244
|
+
"""
|
|
245
|
+
try:
|
|
246
|
+
# Check if settings.json exists
|
|
247
|
+
if not os.path.exists(settings_file):
|
|
248
|
+
print(f"{settings_file} not found. Creating new file with pattern library.")
|
|
249
|
+
settings = {"pattern_library": self._convert_to_settings_format()}
|
|
250
|
+
else:
|
|
251
|
+
# Load existing settings
|
|
252
|
+
with open(settings_file, 'r', encoding='utf-8') as f:
|
|
253
|
+
settings = json.load(f)
|
|
254
|
+
|
|
255
|
+
# Check if pattern_library exists and if it needs updating
|
|
256
|
+
if "pattern_library" not in settings:
|
|
257
|
+
print("No pattern_library found. Adding complete pattern library.")
|
|
258
|
+
settings["pattern_library"] = self._convert_to_settings_format()
|
|
259
|
+
else:
|
|
260
|
+
current_patterns = settings["pattern_library"]
|
|
261
|
+
if len(current_patterns) < 10:
|
|
262
|
+
print(f"Pattern library has only {len(current_patterns)} patterns. Updating with complete library.")
|
|
263
|
+
settings["pattern_library"] = self._convert_to_settings_format()
|
|
264
|
+
else:
|
|
265
|
+
print(f"Pattern library already has {len(current_patterns)} patterns. No update needed.")
|
|
266
|
+
return False
|
|
267
|
+
|
|
268
|
+
# Save updated settings
|
|
269
|
+
with open(settings_file, 'w', encoding='utf-8') as f:
|
|
270
|
+
json.dump(settings, f, indent=4, ensure_ascii=False)
|
|
271
|
+
print(f"Successfully updated {settings_file} with {len(settings['pattern_library'])} regex patterns.")
|
|
272
|
+
return True
|
|
273
|
+
|
|
274
|
+
except Exception as e:
|
|
275
|
+
print(f"Error updating settings file: {e}")
|
|
276
|
+
return False
|
|
277
|
+
|
|
278
|
+
def _convert_to_settings_format(self) -> List[Dict[str, str]]:
|
|
279
|
+
"""
|
|
280
|
+
Converts the internal pattern format to the settings.json format.
|
|
281
|
+
"""
|
|
282
|
+
return [
|
|
283
|
+
{
|
|
284
|
+
"find": pattern["find"],
|
|
285
|
+
"replace": pattern["replace"],
|
|
286
|
+
"purpose": pattern["purpose"]
|
|
287
|
+
}
|
|
288
|
+
for pattern in self.patterns
|
|
289
|
+
]
|
|
290
|
+
|
|
291
|
+
def get_pattern_categories(self) -> List[str]:
|
|
292
|
+
"""Returns a list of all available categories."""
|
|
293
|
+
categories = set()
|
|
294
|
+
for pattern in self.patterns:
|
|
295
|
+
if 'category' in pattern:
|
|
296
|
+
categories.add(pattern['category'])
|
|
297
|
+
return sorted(list(categories))
|
|
298
|
+
|
|
299
|
+
def search_patterns(self, query: str) -> List[Dict[str, str]]:
|
|
300
|
+
"""
|
|
301
|
+
Searches patterns by query string in purpose or find pattern.
|
|
302
|
+
|
|
303
|
+
Args:
|
|
304
|
+
query: Search query
|
|
305
|
+
|
|
306
|
+
Returns:
|
|
307
|
+
List of matching patterns
|
|
308
|
+
"""
|
|
309
|
+
query = query.lower()
|
|
310
|
+
results = []
|
|
311
|
+
|
|
312
|
+
for pattern in self.patterns:
|
|
313
|
+
if (query in pattern.get('purpose', '').lower() or
|
|
314
|
+
query in pattern.get('find', '').lower()):
|
|
315
|
+
results.append(pattern)
|
|
316
|
+
|
|
317
|
+
return results
|
|
318
|
+
|
|
319
|
+
# Convenience functions for direct use
|
|
320
|
+
def get_all_regex_patterns() -> List[Dict[str, str]]:
|
|
321
|
+
"""Returns all 20 regex patterns from the library."""
|
|
322
|
+
library = RegexPatternLibrary()
|
|
323
|
+
return library.get_all_patterns()
|
|
324
|
+
|
|
325
|
+
def update_pattern_library_in_settings(settings_file: str = "settings.json") -> bool:
|
|
326
|
+
"""
|
|
327
|
+
Updates the pattern library in settings.json if needed.
|
|
328
|
+
|
|
329
|
+
Args:
|
|
330
|
+
settings_file: Path to settings file
|
|
331
|
+
|
|
332
|
+
Returns:
|
|
333
|
+
bool: True if updated, False if no update needed
|
|
334
|
+
"""
|
|
335
|
+
library = RegexPatternLibrary()
|
|
336
|
+
return library.update_settings_file(settings_file)
|
|
337
|
+
|
|
338
|
+
if __name__ == "__main__":
|
|
339
|
+
# Demo usage
|
|
340
|
+
library = RegexPatternLibrary()
|
|
341
|
+
|
|
342
|
+
print("=== Regex Pattern Library Demo ===")
|
|
343
|
+
print(f"Total patterns: {len(library.get_all_patterns())}")
|
|
344
|
+
print(f"Categories: {', '.join(library.get_pattern_categories())}")
|
|
345
|
+
|
|
346
|
+
print("\n=== Validation Patterns ===")
|
|
347
|
+
for pattern in library.get_validation_patterns():
|
|
348
|
+
print(f"- {pattern['purpose']}")
|
|
349
|
+
|
|
350
|
+
print("\n=== Updating settings.json ===")
|
|
351
|
+
library.update_settings_file()
|