pomera-ai-commander 1.1.1 → 1.2.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -21
- package/README.md +105 -680
- package/bin/pomera-ai-commander.js +62 -62
- package/core/__init__.py +65 -65
- package/core/app_context.py +482 -482
- package/core/async_text_processor.py +421 -421
- package/core/backup_manager.py +655 -655
- package/core/backup_recovery_manager.py +1199 -1033
- package/core/content_hash_cache.py +508 -508
- package/core/context_menu.py +313 -313
- package/core/data_directory.py +549 -0
- package/core/data_validator.py +1066 -1066
- package/core/database_connection_manager.py +744 -744
- package/core/database_curl_settings_manager.py +608 -608
- package/core/database_promera_ai_settings_manager.py +446 -446
- package/core/database_schema.py +411 -411
- package/core/database_schema_manager.py +395 -395
- package/core/database_settings_manager.py +1507 -1507
- package/core/database_settings_manager_interface.py +456 -456
- package/core/dialog_manager.py +734 -734
- package/core/diff_utils.py +239 -0
- package/core/efficient_line_numbers.py +540 -510
- package/core/error_handler.py +746 -746
- package/core/error_service.py +431 -431
- package/core/event_consolidator.py +511 -511
- package/core/mcp/__init__.py +43 -43
- package/core/mcp/find_replace_diff.py +334 -0
- package/core/mcp/protocol.py +288 -288
- package/core/mcp/schema.py +251 -251
- package/core/mcp/server_stdio.py +299 -299
- package/core/mcp/tool_registry.py +2699 -2345
- package/core/memento.py +275 -0
- package/core/memory_efficient_text_widget.py +711 -711
- package/core/migration_manager.py +914 -914
- package/core/migration_test_suite.py +1085 -1085
- package/core/migration_validator.py +1143 -1143
- package/core/optimized_find_replace.py +714 -714
- package/core/optimized_pattern_engine.py +424 -424
- package/core/optimized_search_highlighter.py +552 -552
- package/core/performance_monitor.py +674 -674
- package/core/persistence_manager.py +712 -712
- package/core/progressive_stats_calculator.py +632 -632
- package/core/regex_pattern_cache.py +529 -529
- package/core/regex_pattern_library.py +350 -350
- package/core/search_operation_manager.py +434 -434
- package/core/settings_defaults_registry.py +1087 -1087
- package/core/settings_integrity_validator.py +1111 -1111
- package/core/settings_serializer.py +557 -557
- package/core/settings_validator.py +1823 -1823
- package/core/smart_stats_calculator.py +709 -709
- package/core/statistics_update_manager.py +619 -619
- package/core/stats_config_manager.py +858 -858
- package/core/streaming_text_handler.py +723 -723
- package/core/task_scheduler.py +596 -596
- package/core/update_pattern_library.py +168 -168
- package/core/visibility_monitor.py +596 -596
- package/core/widget_cache.py +498 -498
- package/mcp.json +51 -61
- package/migrate_data.py +127 -0
- package/package.json +64 -57
- package/pomera.py +7883 -7482
- package/pomera_mcp_server.py +183 -144
- package/requirements.txt +33 -0
- package/scripts/Dockerfile.alpine +43 -0
- package/scripts/Dockerfile.gui-test +54 -0
- package/scripts/Dockerfile.linux +43 -0
- package/scripts/Dockerfile.test-linux +80 -0
- package/scripts/Dockerfile.ubuntu +39 -0
- package/scripts/README.md +53 -0
- package/scripts/build-all.bat +113 -0
- package/scripts/build-docker.bat +53 -0
- package/scripts/build-docker.sh +55 -0
- package/scripts/build-optimized.bat +101 -0
- package/scripts/build.sh +78 -0
- package/scripts/docker-compose.test.yml +27 -0
- package/scripts/docker-compose.yml +32 -0
- package/scripts/postinstall.js +62 -0
- package/scripts/requirements-minimal.txt +33 -0
- package/scripts/test-linux-simple.bat +28 -0
- package/scripts/validate-release-workflow.py +450 -0
- package/tools/__init__.py +4 -4
- package/tools/ai_tools.py +2891 -2891
- package/tools/ascii_art_generator.py +352 -352
- package/tools/base64_tools.py +183 -183
- package/tools/base_tool.py +511 -511
- package/tools/case_tool.py +308 -308
- package/tools/column_tools.py +395 -395
- package/tools/cron_tool.py +884 -884
- package/tools/curl_history.py +600 -600
- package/tools/curl_processor.py +1207 -1207
- package/tools/curl_settings.py +502 -502
- package/tools/curl_tool.py +5467 -5467
- package/tools/diff_viewer.py +1817 -1072
- package/tools/email_extraction_tool.py +248 -248
- package/tools/email_header_analyzer.py +425 -425
- package/tools/extraction_tools.py +250 -250
- package/tools/find_replace.py +2289 -1750
- package/tools/folder_file_reporter.py +1463 -1463
- package/tools/folder_file_reporter_adapter.py +480 -480
- package/tools/generator_tools.py +1216 -1216
- package/tools/hash_generator.py +255 -255
- package/tools/html_tool.py +656 -656
- package/tools/jsonxml_tool.py +729 -729
- package/tools/line_tools.py +419 -419
- package/tools/markdown_tools.py +561 -561
- package/tools/mcp_widget.py +1417 -1417
- package/tools/notes_widget.py +978 -973
- package/tools/number_base_converter.py +372 -372
- package/tools/regex_extractor.py +571 -571
- package/tools/slug_generator.py +310 -310
- package/tools/sorter_tools.py +458 -458
- package/tools/string_escape_tool.py +392 -392
- package/tools/text_statistics_tool.py +365 -365
- package/tools/text_wrapper.py +430 -430
- package/tools/timestamp_converter.py +421 -421
- package/tools/tool_loader.py +710 -710
- package/tools/translator_tools.py +522 -522
- package/tools/url_link_extractor.py +261 -261
- package/tools/url_parser.py +204 -204
- package/tools/whitespace_tools.py +355 -355
- package/tools/word_frequency_counter.py +146 -146
- package/core/__pycache__/__init__.cpython-313.pyc +0 -0
- package/core/__pycache__/app_context.cpython-313.pyc +0 -0
- package/core/__pycache__/async_text_processor.cpython-313.pyc +0 -0
- package/core/__pycache__/backup_manager.cpython-313.pyc +0 -0
- package/core/__pycache__/backup_recovery_manager.cpython-313.pyc +0 -0
- package/core/__pycache__/content_hash_cache.cpython-313.pyc +0 -0
- package/core/__pycache__/context_menu.cpython-313.pyc +0 -0
- package/core/__pycache__/data_validator.cpython-313.pyc +0 -0
- package/core/__pycache__/database_connection_manager.cpython-313.pyc +0 -0
- package/core/__pycache__/database_curl_settings_manager.cpython-313.pyc +0 -0
- package/core/__pycache__/database_promera_ai_settings_manager.cpython-313.pyc +0 -0
- package/core/__pycache__/database_schema.cpython-313.pyc +0 -0
- package/core/__pycache__/database_schema_manager.cpython-313.pyc +0 -0
- package/core/__pycache__/database_settings_manager.cpython-313.pyc +0 -0
- package/core/__pycache__/database_settings_manager_interface.cpython-313.pyc +0 -0
- package/core/__pycache__/dialog_manager.cpython-313.pyc +0 -0
- package/core/__pycache__/efficient_line_numbers.cpython-313.pyc +0 -0
- package/core/__pycache__/error_handler.cpython-313.pyc +0 -0
- package/core/__pycache__/error_service.cpython-313.pyc +0 -0
- package/core/__pycache__/event_consolidator.cpython-313.pyc +0 -0
- package/core/__pycache__/memory_efficient_text_widget.cpython-313.pyc +0 -0
- package/core/__pycache__/migration_manager.cpython-313.pyc +0 -0
- package/core/__pycache__/migration_test_suite.cpython-313.pyc +0 -0
- package/core/__pycache__/migration_validator.cpython-313.pyc +0 -0
- package/core/__pycache__/optimized_find_replace.cpython-313.pyc +0 -0
- package/core/__pycache__/optimized_pattern_engine.cpython-313.pyc +0 -0
- package/core/__pycache__/optimized_search_highlighter.cpython-313.pyc +0 -0
- package/core/__pycache__/performance_monitor.cpython-313.pyc +0 -0
- package/core/__pycache__/persistence_manager.cpython-313.pyc +0 -0
- package/core/__pycache__/progressive_stats_calculator.cpython-313.pyc +0 -0
- package/core/__pycache__/regex_pattern_cache.cpython-313.pyc +0 -0
- package/core/__pycache__/regex_pattern_library.cpython-313.pyc +0 -0
- package/core/__pycache__/search_operation_manager.cpython-313.pyc +0 -0
- package/core/__pycache__/settings_defaults_registry.cpython-313.pyc +0 -0
- package/core/__pycache__/settings_integrity_validator.cpython-313.pyc +0 -0
- package/core/__pycache__/settings_serializer.cpython-313.pyc +0 -0
- package/core/__pycache__/settings_validator.cpython-313.pyc +0 -0
- package/core/__pycache__/smart_stats_calculator.cpython-313.pyc +0 -0
- package/core/__pycache__/statistics_update_manager.cpython-313.pyc +0 -0
- package/core/__pycache__/stats_config_manager.cpython-313.pyc +0 -0
- package/core/__pycache__/streaming_text_handler.cpython-313.pyc +0 -0
- package/core/__pycache__/task_scheduler.cpython-313.pyc +0 -0
- package/core/__pycache__/visibility_monitor.cpython-313.pyc +0 -0
- package/core/__pycache__/widget_cache.cpython-313.pyc +0 -0
- package/core/mcp/__pycache__/__init__.cpython-313.pyc +0 -0
- package/core/mcp/__pycache__/protocol.cpython-313.pyc +0 -0
- package/core/mcp/__pycache__/schema.cpython-313.pyc +0 -0
- package/core/mcp/__pycache__/server_stdio.cpython-313.pyc +0 -0
- package/core/mcp/__pycache__/tool_registry.cpython-313.pyc +0 -0
- package/tools/__pycache__/__init__.cpython-313.pyc +0 -0
- package/tools/__pycache__/ai_tools.cpython-313.pyc +0 -0
- package/tools/__pycache__/ascii_art_generator.cpython-313.pyc +0 -0
- package/tools/__pycache__/base64_tools.cpython-313.pyc +0 -0
- package/tools/__pycache__/base_tool.cpython-313.pyc +0 -0
- package/tools/__pycache__/case_tool.cpython-313.pyc +0 -0
- package/tools/__pycache__/column_tools.cpython-313.pyc +0 -0
- package/tools/__pycache__/cron_tool.cpython-313.pyc +0 -0
- package/tools/__pycache__/curl_history.cpython-313.pyc +0 -0
- package/tools/__pycache__/curl_processor.cpython-313.pyc +0 -0
- package/tools/__pycache__/curl_settings.cpython-313.pyc +0 -0
- package/tools/__pycache__/curl_tool.cpython-313.pyc +0 -0
- package/tools/__pycache__/diff_viewer.cpython-313.pyc +0 -0
- package/tools/__pycache__/email_extraction_tool.cpython-313.pyc +0 -0
- package/tools/__pycache__/email_header_analyzer.cpython-313.pyc +0 -0
- package/tools/__pycache__/extraction_tools.cpython-313.pyc +0 -0
- package/tools/__pycache__/find_replace.cpython-313.pyc +0 -0
- package/tools/__pycache__/folder_file_reporter.cpython-313.pyc +0 -0
- package/tools/__pycache__/folder_file_reporter_adapter.cpython-313.pyc +0 -0
- package/tools/__pycache__/generator_tools.cpython-313.pyc +0 -0
- package/tools/__pycache__/hash_generator.cpython-313.pyc +0 -0
- package/tools/__pycache__/html_tool.cpython-313.pyc +0 -0
- package/tools/__pycache__/huggingface_helper.cpython-313.pyc +0 -0
- package/tools/__pycache__/jsonxml_tool.cpython-313.pyc +0 -0
- package/tools/__pycache__/line_tools.cpython-313.pyc +0 -0
- package/tools/__pycache__/list_comparator.cpython-313.pyc +0 -0
- package/tools/__pycache__/markdown_tools.cpython-313.pyc +0 -0
- package/tools/__pycache__/mcp_widget.cpython-313.pyc +0 -0
- package/tools/__pycache__/notes_widget.cpython-313.pyc +0 -0
- package/tools/__pycache__/number_base_converter.cpython-313.pyc +0 -0
- package/tools/__pycache__/regex_extractor.cpython-313.pyc +0 -0
- package/tools/__pycache__/slug_generator.cpython-313.pyc +0 -0
- package/tools/__pycache__/sorter_tools.cpython-313.pyc +0 -0
- package/tools/__pycache__/string_escape_tool.cpython-313.pyc +0 -0
- package/tools/__pycache__/text_statistics_tool.cpython-313.pyc +0 -0
- package/tools/__pycache__/text_wrapper.cpython-313.pyc +0 -0
- package/tools/__pycache__/timestamp_converter.cpython-313.pyc +0 -0
- package/tools/__pycache__/tool_loader.cpython-313.pyc +0 -0
- package/tools/__pycache__/translator_tools.cpython-313.pyc +0 -0
- package/tools/__pycache__/url_link_extractor.cpython-313.pyc +0 -0
- package/tools/__pycache__/url_parser.cpython-313.pyc +0 -0
- package/tools/__pycache__/whitespace_tools.cpython-313.pyc +0 -0
- package/tools/__pycache__/word_frequency_counter.cpython-313.pyc +0 -0
|
@@ -1,169 +1,169 @@
|
|
|
1
|
-
#!/usr/bin/env python3
|
|
2
|
-
"""
|
|
3
|
-
Script to update the pattern library in settings.json with the 20 regex use cases
|
|
4
|
-
"""
|
|
5
|
-
|
|
6
|
-
import json
|
|
7
|
-
import os
|
|
8
|
-
|
|
9
|
-
def get_default_pattern_library():
|
|
10
|
-
"""
|
|
11
|
-
Returns the 20 regex use cases extracted from RegexUseCases.md
|
|
12
|
-
"""
|
|
13
|
-
return [
|
|
14
|
-
# Data Validation Patterns
|
|
15
|
-
{
|
|
16
|
-
"find": r"^[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}$",
|
|
17
|
-
"replace": "[EMAIL]",
|
|
18
|
-
"purpose": "Email Address Validation - Validates standard email format"
|
|
19
|
-
},
|
|
20
|
-
{
|
|
21
|
-
"find": r"^(?=.*[a-z])(?=.*[A-Z])(?=.*\d)(?=.*[@$!%*?&])[A-Za-z\d@$!%*?&]{8,}$",
|
|
22
|
-
"replace": "[STRONG_PASSWORD]",
|
|
23
|
-
"purpose": "Password Strength - Min 8 chars, uppercase, lowercase, digit, special char"
|
|
24
|
-
},
|
|
25
|
-
{
|
|
26
|
-
"find": r"^\(?(\d{3})\)?[-.\s]?(\d{3})[-.\s]?(\d{4})$",
|
|
27
|
-
"replace": r"(\1) \2-\3",
|
|
28
|
-
"purpose": "North American Phone Number - Validates and formats 10-digit phone numbers"
|
|
29
|
-
},
|
|
30
|
-
{
|
|
31
|
-
"find": r"^https?:\/\/(?:www\.)?[-a-zA-Z0-9@:%.\_\+~#=]{1,256}\.[a-zA-Z0-9()]{1,6}\b([-a-zA-Z0-9()@:%_\+.~#?&//=]*)$",
|
|
32
|
-
"replace": "[URL]",
|
|
33
|
-
"purpose": "URL Structure Validation - Validates HTTP/HTTPS URLs"
|
|
34
|
-
},
|
|
35
|
-
{
|
|
36
|
-
"find": r"^[a-zA-Z0-9]([._-](?![._-])|[a-zA-Z0-9]){3,18}[a-zA-Z0-9]$",
|
|
37
|
-
"replace": "[USERNAME]",
|
|
38
|
-
"purpose": "Username Format - 5-20 chars, alphanumeric start/end, no consecutive special chars"
|
|
39
|
-
},
|
|
40
|
-
{
|
|
41
|
-
"find": r"^((25[0-5]|2[0-4][0-9]|1?[0-9][0-9]?)\.){3}(25[0-5]|2[0-4][0-9]|1?[0-9][0-9]?)$",
|
|
42
|
-
"replace": "[IP_ADDRESS]",
|
|
43
|
-
"purpose": "IPv4 Address Validation - Validates IP addresses (0-255 per octet)"
|
|
44
|
-
},
|
|
45
|
-
{
|
|
46
|
-
"find": r"^\d{4}-(0[1-9]|1[0-2])-(0[1-9]|[12]\d|3[01])$",
|
|
47
|
-
"replace": "[DATE]",
|
|
48
|
-
"purpose": "YYYY-MM-DD Date Format - Validates ISO date format"
|
|
49
|
-
},
|
|
50
|
-
{
|
|
51
|
-
"find": r"^(?:4[0-9]{12}(?:[0-9]{3})?|5[1-5][0-9]{14}|3[47][0-9]{13})$",
|
|
52
|
-
"replace": "[CREDIT_CARD]",
|
|
53
|
-
"purpose": "Credit Card Number - Identifies Visa, Mastercard, American Express formats"
|
|
54
|
-
},
|
|
55
|
-
|
|
56
|
-
# Information Extraction Patterns
|
|
57
|
-
{
|
|
58
|
-
"find": r"[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}",
|
|
59
|
-
"replace": "[EMAIL]",
|
|
60
|
-
"purpose": "Extract All Email Addresses - Finds emails anywhere in text"
|
|
61
|
-
},
|
|
62
|
-
{
|
|
63
|
-
"find": r"https?:\/\/[^\s/$.?#].[^\s]*",
|
|
64
|
-
"replace": "[URL]",
|
|
65
|
-
"purpose": "Extract All URLs - Finds HTTP/HTTPS URLs in text"
|
|
66
|
-
},
|
|
67
|
-
{
|
|
68
|
-
"find": r"(?<=\s|^)#(\w+)",
|
|
69
|
-
"replace": "#[HASHTAG]",
|
|
70
|
-
"purpose": "Extract Hashtags - Finds social media hashtags"
|
|
71
|
-
},
|
|
72
|
-
{
|
|
73
|
-
"find": r"(?<=\s|^)@(\w{1,15})\b",
|
|
74
|
-
"replace": "@[MENTION]",
|
|
75
|
-
"purpose": "Extract @Mentions - Finds social media mentions (1-15 chars)"
|
|
76
|
-
},
|
|
77
|
-
{
|
|
78
|
-
"find": r"^(?P<ip>[\d.]+) (?P<identd>\S+) (?P<user>\S+) \[(?P<timestamp>.*?)\] \"(?P<request>.*?)\" (?P<status_code>\d{3}) (?P<size>\d+|-).*$",
|
|
79
|
-
"replace": "[LOG_ENTRY]",
|
|
80
|
-
"purpose": "Log File Parsing - Parses Apache/Nginx log entries with named groups"
|
|
81
|
-
},
|
|
82
|
-
{
|
|
83
|
-
"find": r"(?:^|,)(\"(?:[^\"]|\"\")*\"|[^,]*)",
|
|
84
|
-
"replace": "[CSV_FIELD]",
|
|
85
|
-
"purpose": "Simple CSV Parsing - Handles quoted fields with commas"
|
|
86
|
-
},
|
|
87
|
-
{
|
|
88
|
-
"find": r"<h1.*?>(.*?)<\/h1>",
|
|
89
|
-
"replace": r"\1",
|
|
90
|
-
"purpose": "HTML Tag Content - Extracts content from H1 tags"
|
|
91
|
-
},
|
|
92
|
-
|
|
93
|
-
# Text Cleaning Patterns
|
|
94
|
-
{
|
|
95
|
-
"find": r"<[^<]+?>",
|
|
96
|
-
"replace": "",
|
|
97
|
-
"purpose": "Strip HTML Tags - Removes all HTML tags from text"
|
|
98
|
-
},
|
|
99
|
-
{
|
|
100
|
-
"find": r"\b(\w+)\s+\1\b",
|
|
101
|
-
"replace": r"\1",
|
|
102
|
-
"purpose": "Remove Duplicate Words - Removes consecutive duplicate words"
|
|
103
|
-
},
|
|
104
|
-
{
|
|
105
|
-
"find": r"^\s+|\s+$",
|
|
106
|
-
"replace": "",
|
|
107
|
-
"purpose": "Trim Whitespace - Removes leading and trailing whitespace"
|
|
108
|
-
},
|
|
109
|
-
{
|
|
110
|
-
"find": r"^\(?(\d{3})\)?[-.\s]?(\d{3})[-.\s]?(\d{4})$",
|
|
111
|
-
"replace": r"\1-\2-\3",
|
|
112
|
-
"purpose": "Normalize Phone Numbers - Converts to XXX-XXX-XXXX format"
|
|
113
|
-
},
|
|
114
|
-
{
|
|
115
|
-
"find": r"\b(\d{4}[- ]?){3}(\d{4})\b",
|
|
116
|
-
"replace": r"XXXX-XXXX-XXXX-\2",
|
|
117
|
-
"purpose": "Mask Sensitive Data - Masks credit card numbers, shows last 4 digits"
|
|
118
|
-
}
|
|
119
|
-
]
|
|
120
|
-
|
|
121
|
-
def update_settings_pattern_library():
|
|
122
|
-
"""
|
|
123
|
-
Updates the settings.json file with the complete pattern library
|
|
124
|
-
if it's empty or if the file is being created for the first time.
|
|
125
|
-
"""
|
|
126
|
-
settings_file = "settings.json"
|
|
127
|
-
|
|
128
|
-
# Check if settings.json exists
|
|
129
|
-
if not os.path.exists(settings_file):
|
|
130
|
-
print("settings.json not found. Creating new file with pattern library.")
|
|
131
|
-
settings = {
|
|
132
|
-
"pattern_library": get_default_pattern_library()
|
|
133
|
-
}
|
|
134
|
-
else:
|
|
135
|
-
# Load existing settings
|
|
136
|
-
try:
|
|
137
|
-
with open(settings_file, 'r', encoding='utf-8') as f:
|
|
138
|
-
settings = json.load(f)
|
|
139
|
-
except (json.JSONDecodeError, FileNotFoundError) as e:
|
|
140
|
-
print(f"Error reading settings.json: {e}")
|
|
141
|
-
print("Creating new settings with pattern library.")
|
|
142
|
-
settings = {
|
|
143
|
-
"pattern_library": get_default_pattern_library()
|
|
144
|
-
}
|
|
145
|
-
|
|
146
|
-
# Check if pattern_library exists and if it's empty or has only basic patterns
|
|
147
|
-
if "pattern_library" not in settings:
|
|
148
|
-
print("No pattern_library found. Adding complete pattern library.")
|
|
149
|
-
settings["pattern_library"] = get_default_pattern_library()
|
|
150
|
-
else:
|
|
151
|
-
current_patterns = settings["pattern_library"]
|
|
152
|
-
# Check if it's empty or has only basic patterns (less than 10 patterns)
|
|
153
|
-
if len(current_patterns) < 10:
|
|
154
|
-
print(f"Pattern library has only {len(current_patterns)} patterns. Updating with complete library.")
|
|
155
|
-
settings["pattern_library"] = get_default_pattern_library()
|
|
156
|
-
else:
|
|
157
|
-
print(f"Pattern library already has {len(current_patterns)} patterns. No update needed.")
|
|
158
|
-
return
|
|
159
|
-
|
|
160
|
-
# Save updated settings
|
|
161
|
-
try:
|
|
162
|
-
with open(settings_file, 'w', encoding='utf-8') as f:
|
|
163
|
-
json.dump(settings, f, indent=4, ensure_ascii=False)
|
|
164
|
-
print(f"Successfully updated {settings_file} with {len(settings['pattern_library'])} regex patterns.")
|
|
165
|
-
except Exception as e:
|
|
166
|
-
print(f"Error writing to settings.json: {e}")
|
|
167
|
-
|
|
168
|
-
if __name__ == "__main__":
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""
|
|
3
|
+
Script to update the pattern library in settings.json with the 20 regex use cases
|
|
4
|
+
"""
|
|
5
|
+
|
|
6
|
+
import json
|
|
7
|
+
import os
|
|
8
|
+
|
|
9
|
+
def get_default_pattern_library():
|
|
10
|
+
"""
|
|
11
|
+
Returns the 20 regex use cases extracted from RegexUseCases.md
|
|
12
|
+
"""
|
|
13
|
+
return [
|
|
14
|
+
# Data Validation Patterns
|
|
15
|
+
{
|
|
16
|
+
"find": r"^[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}$",
|
|
17
|
+
"replace": "[EMAIL]",
|
|
18
|
+
"purpose": "Email Address Validation - Validates standard email format"
|
|
19
|
+
},
|
|
20
|
+
{
|
|
21
|
+
"find": r"^(?=.*[a-z])(?=.*[A-Z])(?=.*\d)(?=.*[@$!%*?&])[A-Za-z\d@$!%*?&]{8,}$",
|
|
22
|
+
"replace": "[STRONG_PASSWORD]",
|
|
23
|
+
"purpose": "Password Strength - Min 8 chars, uppercase, lowercase, digit, special char"
|
|
24
|
+
},
|
|
25
|
+
{
|
|
26
|
+
"find": r"^\(?(\d{3})\)?[-.\s]?(\d{3})[-.\s]?(\d{4})$",
|
|
27
|
+
"replace": r"(\1) \2-\3",
|
|
28
|
+
"purpose": "North American Phone Number - Validates and formats 10-digit phone numbers"
|
|
29
|
+
},
|
|
30
|
+
{
|
|
31
|
+
"find": r"^https?:\/\/(?:www\.)?[-a-zA-Z0-9@:%.\_\+~#=]{1,256}\.[a-zA-Z0-9()]{1,6}\b([-a-zA-Z0-9()@:%_\+.~#?&//=]*)$",
|
|
32
|
+
"replace": "[URL]",
|
|
33
|
+
"purpose": "URL Structure Validation - Validates HTTP/HTTPS URLs"
|
|
34
|
+
},
|
|
35
|
+
{
|
|
36
|
+
"find": r"^[a-zA-Z0-9]([._-](?![._-])|[a-zA-Z0-9]){3,18}[a-zA-Z0-9]$",
|
|
37
|
+
"replace": "[USERNAME]",
|
|
38
|
+
"purpose": "Username Format - 5-20 chars, alphanumeric start/end, no consecutive special chars"
|
|
39
|
+
},
|
|
40
|
+
{
|
|
41
|
+
"find": r"^((25[0-5]|2[0-4][0-9]|1?[0-9][0-9]?)\.){3}(25[0-5]|2[0-4][0-9]|1?[0-9][0-9]?)$",
|
|
42
|
+
"replace": "[IP_ADDRESS]",
|
|
43
|
+
"purpose": "IPv4 Address Validation - Validates IP addresses (0-255 per octet)"
|
|
44
|
+
},
|
|
45
|
+
{
|
|
46
|
+
"find": r"^\d{4}-(0[1-9]|1[0-2])-(0[1-9]|[12]\d|3[01])$",
|
|
47
|
+
"replace": "[DATE]",
|
|
48
|
+
"purpose": "YYYY-MM-DD Date Format - Validates ISO date format"
|
|
49
|
+
},
|
|
50
|
+
{
|
|
51
|
+
"find": r"^(?:4[0-9]{12}(?:[0-9]{3})?|5[1-5][0-9]{14}|3[47][0-9]{13})$",
|
|
52
|
+
"replace": "[CREDIT_CARD]",
|
|
53
|
+
"purpose": "Credit Card Number - Identifies Visa, Mastercard, American Express formats"
|
|
54
|
+
},
|
|
55
|
+
|
|
56
|
+
# Information Extraction Patterns
|
|
57
|
+
{
|
|
58
|
+
"find": r"[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}",
|
|
59
|
+
"replace": "[EMAIL]",
|
|
60
|
+
"purpose": "Extract All Email Addresses - Finds emails anywhere in text"
|
|
61
|
+
},
|
|
62
|
+
{
|
|
63
|
+
"find": r"https?:\/\/[^\s/$.?#].[^\s]*",
|
|
64
|
+
"replace": "[URL]",
|
|
65
|
+
"purpose": "Extract All URLs - Finds HTTP/HTTPS URLs in text"
|
|
66
|
+
},
|
|
67
|
+
{
|
|
68
|
+
"find": r"(?<=\s|^)#(\w+)",
|
|
69
|
+
"replace": "#[HASHTAG]",
|
|
70
|
+
"purpose": "Extract Hashtags - Finds social media hashtags"
|
|
71
|
+
},
|
|
72
|
+
{
|
|
73
|
+
"find": r"(?<=\s|^)@(\w{1,15})\b",
|
|
74
|
+
"replace": "@[MENTION]",
|
|
75
|
+
"purpose": "Extract @Mentions - Finds social media mentions (1-15 chars)"
|
|
76
|
+
},
|
|
77
|
+
{
|
|
78
|
+
"find": r"^(?P<ip>[\d.]+) (?P<identd>\S+) (?P<user>\S+) \[(?P<timestamp>.*?)\] \"(?P<request>.*?)\" (?P<status_code>\d{3}) (?P<size>\d+|-).*$",
|
|
79
|
+
"replace": "[LOG_ENTRY]",
|
|
80
|
+
"purpose": "Log File Parsing - Parses Apache/Nginx log entries with named groups"
|
|
81
|
+
},
|
|
82
|
+
{
|
|
83
|
+
"find": r"(?:^|,)(\"(?:[^\"]|\"\")*\"|[^,]*)",
|
|
84
|
+
"replace": "[CSV_FIELD]",
|
|
85
|
+
"purpose": "Simple CSV Parsing - Handles quoted fields with commas"
|
|
86
|
+
},
|
|
87
|
+
{
|
|
88
|
+
"find": r"<h1.*?>(.*?)<\/h1>",
|
|
89
|
+
"replace": r"\1",
|
|
90
|
+
"purpose": "HTML Tag Content - Extracts content from H1 tags"
|
|
91
|
+
},
|
|
92
|
+
|
|
93
|
+
# Text Cleaning Patterns
|
|
94
|
+
{
|
|
95
|
+
"find": r"<[^<]+?>",
|
|
96
|
+
"replace": "",
|
|
97
|
+
"purpose": "Strip HTML Tags - Removes all HTML tags from text"
|
|
98
|
+
},
|
|
99
|
+
{
|
|
100
|
+
"find": r"\b(\w+)\s+\1\b",
|
|
101
|
+
"replace": r"\1",
|
|
102
|
+
"purpose": "Remove Duplicate Words - Removes consecutive duplicate words"
|
|
103
|
+
},
|
|
104
|
+
{
|
|
105
|
+
"find": r"^\s+|\s+$",
|
|
106
|
+
"replace": "",
|
|
107
|
+
"purpose": "Trim Whitespace - Removes leading and trailing whitespace"
|
|
108
|
+
},
|
|
109
|
+
{
|
|
110
|
+
"find": r"^\(?(\d{3})\)?[-.\s]?(\d{3})[-.\s]?(\d{4})$",
|
|
111
|
+
"replace": r"\1-\2-\3",
|
|
112
|
+
"purpose": "Normalize Phone Numbers - Converts to XXX-XXX-XXXX format"
|
|
113
|
+
},
|
|
114
|
+
{
|
|
115
|
+
"find": r"\b(\d{4}[- ]?){3}(\d{4})\b",
|
|
116
|
+
"replace": r"XXXX-XXXX-XXXX-\2",
|
|
117
|
+
"purpose": "Mask Sensitive Data - Masks credit card numbers, shows last 4 digits"
|
|
118
|
+
}
|
|
119
|
+
]
|
|
120
|
+
|
|
121
|
+
def update_settings_pattern_library():
|
|
122
|
+
"""
|
|
123
|
+
Updates the settings.json file with the complete pattern library
|
|
124
|
+
if it's empty or if the file is being created for the first time.
|
|
125
|
+
"""
|
|
126
|
+
settings_file = "settings.json"
|
|
127
|
+
|
|
128
|
+
# Check if settings.json exists
|
|
129
|
+
if not os.path.exists(settings_file):
|
|
130
|
+
print("settings.json not found. Creating new file with pattern library.")
|
|
131
|
+
settings = {
|
|
132
|
+
"pattern_library": get_default_pattern_library()
|
|
133
|
+
}
|
|
134
|
+
else:
|
|
135
|
+
# Load existing settings
|
|
136
|
+
try:
|
|
137
|
+
with open(settings_file, 'r', encoding='utf-8') as f:
|
|
138
|
+
settings = json.load(f)
|
|
139
|
+
except (json.JSONDecodeError, FileNotFoundError) as e:
|
|
140
|
+
print(f"Error reading settings.json: {e}")
|
|
141
|
+
print("Creating new settings with pattern library.")
|
|
142
|
+
settings = {
|
|
143
|
+
"pattern_library": get_default_pattern_library()
|
|
144
|
+
}
|
|
145
|
+
|
|
146
|
+
# Check if pattern_library exists and if it's empty or has only basic patterns
|
|
147
|
+
if "pattern_library" not in settings:
|
|
148
|
+
print("No pattern_library found. Adding complete pattern library.")
|
|
149
|
+
settings["pattern_library"] = get_default_pattern_library()
|
|
150
|
+
else:
|
|
151
|
+
current_patterns = settings["pattern_library"]
|
|
152
|
+
# Check if it's empty or has only basic patterns (less than 10 patterns)
|
|
153
|
+
if len(current_patterns) < 10:
|
|
154
|
+
print(f"Pattern library has only {len(current_patterns)} patterns. Updating with complete library.")
|
|
155
|
+
settings["pattern_library"] = get_default_pattern_library()
|
|
156
|
+
else:
|
|
157
|
+
print(f"Pattern library already has {len(current_patterns)} patterns. No update needed.")
|
|
158
|
+
return
|
|
159
|
+
|
|
160
|
+
# Save updated settings
|
|
161
|
+
try:
|
|
162
|
+
with open(settings_file, 'w', encoding='utf-8') as f:
|
|
163
|
+
json.dump(settings, f, indent=4, ensure_ascii=False)
|
|
164
|
+
print(f"Successfully updated {settings_file} with {len(settings['pattern_library'])} regex patterns.")
|
|
165
|
+
except Exception as e:
|
|
166
|
+
print(f"Error writing to settings.json: {e}")
|
|
167
|
+
|
|
168
|
+
if __name__ == "__main__":
|
|
169
169
|
update_settings_pattern_library()
|