pomera-ai-commander 1.1.1 → 1.2.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (213) hide show
  1. package/LICENSE +21 -21
  2. package/README.md +105 -680
  3. package/bin/pomera-ai-commander.js +62 -62
  4. package/core/__init__.py +65 -65
  5. package/core/app_context.py +482 -482
  6. package/core/async_text_processor.py +421 -421
  7. package/core/backup_manager.py +655 -655
  8. package/core/backup_recovery_manager.py +1199 -1033
  9. package/core/content_hash_cache.py +508 -508
  10. package/core/context_menu.py +313 -313
  11. package/core/data_directory.py +549 -0
  12. package/core/data_validator.py +1066 -1066
  13. package/core/database_connection_manager.py +744 -744
  14. package/core/database_curl_settings_manager.py +608 -608
  15. package/core/database_promera_ai_settings_manager.py +446 -446
  16. package/core/database_schema.py +411 -411
  17. package/core/database_schema_manager.py +395 -395
  18. package/core/database_settings_manager.py +1507 -1507
  19. package/core/database_settings_manager_interface.py +456 -456
  20. package/core/dialog_manager.py +734 -734
  21. package/core/diff_utils.py +239 -0
  22. package/core/efficient_line_numbers.py +540 -510
  23. package/core/error_handler.py +746 -746
  24. package/core/error_service.py +431 -431
  25. package/core/event_consolidator.py +511 -511
  26. package/core/mcp/__init__.py +43 -43
  27. package/core/mcp/find_replace_diff.py +334 -0
  28. package/core/mcp/protocol.py +288 -288
  29. package/core/mcp/schema.py +251 -251
  30. package/core/mcp/server_stdio.py +299 -299
  31. package/core/mcp/tool_registry.py +2699 -2345
  32. package/core/memento.py +275 -0
  33. package/core/memory_efficient_text_widget.py +711 -711
  34. package/core/migration_manager.py +914 -914
  35. package/core/migration_test_suite.py +1085 -1085
  36. package/core/migration_validator.py +1143 -1143
  37. package/core/optimized_find_replace.py +714 -714
  38. package/core/optimized_pattern_engine.py +424 -424
  39. package/core/optimized_search_highlighter.py +552 -552
  40. package/core/performance_monitor.py +674 -674
  41. package/core/persistence_manager.py +712 -712
  42. package/core/progressive_stats_calculator.py +632 -632
  43. package/core/regex_pattern_cache.py +529 -529
  44. package/core/regex_pattern_library.py +350 -350
  45. package/core/search_operation_manager.py +434 -434
  46. package/core/settings_defaults_registry.py +1087 -1087
  47. package/core/settings_integrity_validator.py +1111 -1111
  48. package/core/settings_serializer.py +557 -557
  49. package/core/settings_validator.py +1823 -1823
  50. package/core/smart_stats_calculator.py +709 -709
  51. package/core/statistics_update_manager.py +619 -619
  52. package/core/stats_config_manager.py +858 -858
  53. package/core/streaming_text_handler.py +723 -723
  54. package/core/task_scheduler.py +596 -596
  55. package/core/update_pattern_library.py +168 -168
  56. package/core/visibility_monitor.py +596 -596
  57. package/core/widget_cache.py +498 -498
  58. package/mcp.json +51 -61
  59. package/migrate_data.py +127 -0
  60. package/package.json +64 -57
  61. package/pomera.py +7883 -7482
  62. package/pomera_mcp_server.py +183 -144
  63. package/requirements.txt +33 -0
  64. package/scripts/Dockerfile.alpine +43 -0
  65. package/scripts/Dockerfile.gui-test +54 -0
  66. package/scripts/Dockerfile.linux +43 -0
  67. package/scripts/Dockerfile.test-linux +80 -0
  68. package/scripts/Dockerfile.ubuntu +39 -0
  69. package/scripts/README.md +53 -0
  70. package/scripts/build-all.bat +113 -0
  71. package/scripts/build-docker.bat +53 -0
  72. package/scripts/build-docker.sh +55 -0
  73. package/scripts/build-optimized.bat +101 -0
  74. package/scripts/build.sh +78 -0
  75. package/scripts/docker-compose.test.yml +27 -0
  76. package/scripts/docker-compose.yml +32 -0
  77. package/scripts/postinstall.js +62 -0
  78. package/scripts/requirements-minimal.txt +33 -0
  79. package/scripts/test-linux-simple.bat +28 -0
  80. package/scripts/validate-release-workflow.py +450 -0
  81. package/tools/__init__.py +4 -4
  82. package/tools/ai_tools.py +2891 -2891
  83. package/tools/ascii_art_generator.py +352 -352
  84. package/tools/base64_tools.py +183 -183
  85. package/tools/base_tool.py +511 -511
  86. package/tools/case_tool.py +308 -308
  87. package/tools/column_tools.py +395 -395
  88. package/tools/cron_tool.py +884 -884
  89. package/tools/curl_history.py +600 -600
  90. package/tools/curl_processor.py +1207 -1207
  91. package/tools/curl_settings.py +502 -502
  92. package/tools/curl_tool.py +5467 -5467
  93. package/tools/diff_viewer.py +1817 -1072
  94. package/tools/email_extraction_tool.py +248 -248
  95. package/tools/email_header_analyzer.py +425 -425
  96. package/tools/extraction_tools.py +250 -250
  97. package/tools/find_replace.py +2289 -1750
  98. package/tools/folder_file_reporter.py +1463 -1463
  99. package/tools/folder_file_reporter_adapter.py +480 -480
  100. package/tools/generator_tools.py +1216 -1216
  101. package/tools/hash_generator.py +255 -255
  102. package/tools/html_tool.py +656 -656
  103. package/tools/jsonxml_tool.py +729 -729
  104. package/tools/line_tools.py +419 -419
  105. package/tools/markdown_tools.py +561 -561
  106. package/tools/mcp_widget.py +1417 -1417
  107. package/tools/notes_widget.py +978 -973
  108. package/tools/number_base_converter.py +372 -372
  109. package/tools/regex_extractor.py +571 -571
  110. package/tools/slug_generator.py +310 -310
  111. package/tools/sorter_tools.py +458 -458
  112. package/tools/string_escape_tool.py +392 -392
  113. package/tools/text_statistics_tool.py +365 -365
  114. package/tools/text_wrapper.py +430 -430
  115. package/tools/timestamp_converter.py +421 -421
  116. package/tools/tool_loader.py +710 -710
  117. package/tools/translator_tools.py +522 -522
  118. package/tools/url_link_extractor.py +261 -261
  119. package/tools/url_parser.py +204 -204
  120. package/tools/whitespace_tools.py +355 -355
  121. package/tools/word_frequency_counter.py +146 -146
  122. package/core/__pycache__/__init__.cpython-313.pyc +0 -0
  123. package/core/__pycache__/app_context.cpython-313.pyc +0 -0
  124. package/core/__pycache__/async_text_processor.cpython-313.pyc +0 -0
  125. package/core/__pycache__/backup_manager.cpython-313.pyc +0 -0
  126. package/core/__pycache__/backup_recovery_manager.cpython-313.pyc +0 -0
  127. package/core/__pycache__/content_hash_cache.cpython-313.pyc +0 -0
  128. package/core/__pycache__/context_menu.cpython-313.pyc +0 -0
  129. package/core/__pycache__/data_validator.cpython-313.pyc +0 -0
  130. package/core/__pycache__/database_connection_manager.cpython-313.pyc +0 -0
  131. package/core/__pycache__/database_curl_settings_manager.cpython-313.pyc +0 -0
  132. package/core/__pycache__/database_promera_ai_settings_manager.cpython-313.pyc +0 -0
  133. package/core/__pycache__/database_schema.cpython-313.pyc +0 -0
  134. package/core/__pycache__/database_schema_manager.cpython-313.pyc +0 -0
  135. package/core/__pycache__/database_settings_manager.cpython-313.pyc +0 -0
  136. package/core/__pycache__/database_settings_manager_interface.cpython-313.pyc +0 -0
  137. package/core/__pycache__/dialog_manager.cpython-313.pyc +0 -0
  138. package/core/__pycache__/efficient_line_numbers.cpython-313.pyc +0 -0
  139. package/core/__pycache__/error_handler.cpython-313.pyc +0 -0
  140. package/core/__pycache__/error_service.cpython-313.pyc +0 -0
  141. package/core/__pycache__/event_consolidator.cpython-313.pyc +0 -0
  142. package/core/__pycache__/memory_efficient_text_widget.cpython-313.pyc +0 -0
  143. package/core/__pycache__/migration_manager.cpython-313.pyc +0 -0
  144. package/core/__pycache__/migration_test_suite.cpython-313.pyc +0 -0
  145. package/core/__pycache__/migration_validator.cpython-313.pyc +0 -0
  146. package/core/__pycache__/optimized_find_replace.cpython-313.pyc +0 -0
  147. package/core/__pycache__/optimized_pattern_engine.cpython-313.pyc +0 -0
  148. package/core/__pycache__/optimized_search_highlighter.cpython-313.pyc +0 -0
  149. package/core/__pycache__/performance_monitor.cpython-313.pyc +0 -0
  150. package/core/__pycache__/persistence_manager.cpython-313.pyc +0 -0
  151. package/core/__pycache__/progressive_stats_calculator.cpython-313.pyc +0 -0
  152. package/core/__pycache__/regex_pattern_cache.cpython-313.pyc +0 -0
  153. package/core/__pycache__/regex_pattern_library.cpython-313.pyc +0 -0
  154. package/core/__pycache__/search_operation_manager.cpython-313.pyc +0 -0
  155. package/core/__pycache__/settings_defaults_registry.cpython-313.pyc +0 -0
  156. package/core/__pycache__/settings_integrity_validator.cpython-313.pyc +0 -0
  157. package/core/__pycache__/settings_serializer.cpython-313.pyc +0 -0
  158. package/core/__pycache__/settings_validator.cpython-313.pyc +0 -0
  159. package/core/__pycache__/smart_stats_calculator.cpython-313.pyc +0 -0
  160. package/core/__pycache__/statistics_update_manager.cpython-313.pyc +0 -0
  161. package/core/__pycache__/stats_config_manager.cpython-313.pyc +0 -0
  162. package/core/__pycache__/streaming_text_handler.cpython-313.pyc +0 -0
  163. package/core/__pycache__/task_scheduler.cpython-313.pyc +0 -0
  164. package/core/__pycache__/visibility_monitor.cpython-313.pyc +0 -0
  165. package/core/__pycache__/widget_cache.cpython-313.pyc +0 -0
  166. package/core/mcp/__pycache__/__init__.cpython-313.pyc +0 -0
  167. package/core/mcp/__pycache__/protocol.cpython-313.pyc +0 -0
  168. package/core/mcp/__pycache__/schema.cpython-313.pyc +0 -0
  169. package/core/mcp/__pycache__/server_stdio.cpython-313.pyc +0 -0
  170. package/core/mcp/__pycache__/tool_registry.cpython-313.pyc +0 -0
  171. package/tools/__pycache__/__init__.cpython-313.pyc +0 -0
  172. package/tools/__pycache__/ai_tools.cpython-313.pyc +0 -0
  173. package/tools/__pycache__/ascii_art_generator.cpython-313.pyc +0 -0
  174. package/tools/__pycache__/base64_tools.cpython-313.pyc +0 -0
  175. package/tools/__pycache__/base_tool.cpython-313.pyc +0 -0
  176. package/tools/__pycache__/case_tool.cpython-313.pyc +0 -0
  177. package/tools/__pycache__/column_tools.cpython-313.pyc +0 -0
  178. package/tools/__pycache__/cron_tool.cpython-313.pyc +0 -0
  179. package/tools/__pycache__/curl_history.cpython-313.pyc +0 -0
  180. package/tools/__pycache__/curl_processor.cpython-313.pyc +0 -0
  181. package/tools/__pycache__/curl_settings.cpython-313.pyc +0 -0
  182. package/tools/__pycache__/curl_tool.cpython-313.pyc +0 -0
  183. package/tools/__pycache__/diff_viewer.cpython-313.pyc +0 -0
  184. package/tools/__pycache__/email_extraction_tool.cpython-313.pyc +0 -0
  185. package/tools/__pycache__/email_header_analyzer.cpython-313.pyc +0 -0
  186. package/tools/__pycache__/extraction_tools.cpython-313.pyc +0 -0
  187. package/tools/__pycache__/find_replace.cpython-313.pyc +0 -0
  188. package/tools/__pycache__/folder_file_reporter.cpython-313.pyc +0 -0
  189. package/tools/__pycache__/folder_file_reporter_adapter.cpython-313.pyc +0 -0
  190. package/tools/__pycache__/generator_tools.cpython-313.pyc +0 -0
  191. package/tools/__pycache__/hash_generator.cpython-313.pyc +0 -0
  192. package/tools/__pycache__/html_tool.cpython-313.pyc +0 -0
  193. package/tools/__pycache__/huggingface_helper.cpython-313.pyc +0 -0
  194. package/tools/__pycache__/jsonxml_tool.cpython-313.pyc +0 -0
  195. package/tools/__pycache__/line_tools.cpython-313.pyc +0 -0
  196. package/tools/__pycache__/list_comparator.cpython-313.pyc +0 -0
  197. package/tools/__pycache__/markdown_tools.cpython-313.pyc +0 -0
  198. package/tools/__pycache__/mcp_widget.cpython-313.pyc +0 -0
  199. package/tools/__pycache__/notes_widget.cpython-313.pyc +0 -0
  200. package/tools/__pycache__/number_base_converter.cpython-313.pyc +0 -0
  201. package/tools/__pycache__/regex_extractor.cpython-313.pyc +0 -0
  202. package/tools/__pycache__/slug_generator.cpython-313.pyc +0 -0
  203. package/tools/__pycache__/sorter_tools.cpython-313.pyc +0 -0
  204. package/tools/__pycache__/string_escape_tool.cpython-313.pyc +0 -0
  205. package/tools/__pycache__/text_statistics_tool.cpython-313.pyc +0 -0
  206. package/tools/__pycache__/text_wrapper.cpython-313.pyc +0 -0
  207. package/tools/__pycache__/timestamp_converter.cpython-313.pyc +0 -0
  208. package/tools/__pycache__/tool_loader.cpython-313.pyc +0 -0
  209. package/tools/__pycache__/translator_tools.cpython-313.pyc +0 -0
  210. package/tools/__pycache__/url_link_extractor.cpython-313.pyc +0 -0
  211. package/tools/__pycache__/url_parser.cpython-313.pyc +0 -0
  212. package/tools/__pycache__/whitespace_tools.cpython-313.pyc +0 -0
  213. package/tools/__pycache__/word_frequency_counter.cpython-313.pyc +0 -0
@@ -1,351 +1,351 @@
1
- #!/usr/bin/env python3
2
- """
3
- Regex Pattern Library Module
4
-
5
- This module provides the 20 regex use cases extracted
6
-
7
- Usage:
8
- from regex_pattern_library import RegexPatternLibrary
9
-
10
- library = RegexPatternLibrary()
11
- patterns = library.get_all_patterns()
12
- validation_patterns = library.get_patterns_by_category("validation")
13
- """
14
-
15
- import json
16
- import os
17
- from typing import List, Dict, Optional
18
-
19
- class RegexPatternLibrary:
20
- """
21
- A comprehensive library of regex patterns for common text processing tasks.
22
- Based on the 20 use cases from the RegexUseCases.md document.
23
- """
24
-
25
- def __init__(self):
26
- self.patterns = self._get_default_patterns()
27
-
28
- def _get_default_patterns(self) -> List[Dict[str, str]]:
29
- """
30
- Returns the complete list of 20 regex patterns organized by category.
31
- """
32
- return [
33
- # Data Validation Patterns (8 patterns)
34
- {
35
- "find": r"^[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}$",
36
- "replace": "[EMAIL]",
37
- "purpose": "Email Address Validation - Validates standard email format",
38
- "category": "validation",
39
- "example_input": "user@example.com",
40
- "example_output": "[EMAIL]"
41
- },
42
- {
43
- "find": r"^(?=.*[a-z])(?=.*[A-Z])(?=.*\d)(?=.*[@$!%*?&])[A-Za-z\d@$!%*?&]{8,}$",
44
- "replace": "[STRONG_PASSWORD]",
45
- "purpose": "Password Strength - Min 8 chars, uppercase, lowercase, digit, special char",
46
- "category": "validation",
47
- "example_input": "MyPass123!",
48
- "example_output": "[STRONG_PASSWORD]"
49
- },
50
- {
51
- "find": r"^\(?(\d{3})\)?[-.\s]?(\d{3})[-.\s]?(\d{4})$",
52
- "replace": r"(\1) \2-\3",
53
- "purpose": "North American Phone Number - Validates and formats 10-digit phone numbers",
54
- "category": "validation",
55
- "example_input": "123-456-7890",
56
- "example_output": "(123) 456-7890"
57
- },
58
- {
59
- "find": r"^https?:\/\/(?:www\.)?[-a-zA-Z0-9@:%.\_\+~#=]{1,256}\.[a-zA-Z0-9()]{1,6}\b([-a-zA-Z0-9()@:%_\+.~#?&//=]*)$",
60
- "replace": "[URL]",
61
- "purpose": "URL Structure Validation - Validates HTTP/HTTPS URLs",
62
- "category": "validation",
63
- "example_input": "https://www.example.com/path",
64
- "example_output": "[URL]"
65
- },
66
- {
67
- "find": r"^[a-zA-Z0-9]([._-](?![._-])|[a-zA-Z0-9]){3,18}[a-zA-Z0-9]$",
68
- "replace": "[USERNAME]",
69
- "purpose": "Username Format - 5-20 chars, alphanumeric start/end, no consecutive special chars",
70
- "category": "validation",
71
- "example_input": "user_name123",
72
- "example_output": "[USERNAME]"
73
- },
74
- {
75
- "find": r"^((25[0-5]|2[0-4][0-9]|1?[0-9][0-9]?)\.){3}(25[0-5]|2[0-4][0-9]|1?[0-9][0-9]?)$",
76
- "replace": "[IP_ADDRESS]",
77
- "purpose": "IPv4 Address Validation - Validates IP addresses (0-255 per octet)",
78
- "category": "validation",
79
- "example_input": "192.168.1.1",
80
- "example_output": "[IP_ADDRESS]"
81
- },
82
- {
83
- "find": r"^\d{4}-(0[1-9]|1[0-2])-(0[1-9]|[12]\d|3[01])$",
84
- "replace": "[DATE]",
85
- "purpose": "YYYY-MM-DD Date Format - Validates ISO date format",
86
- "category": "validation",
87
- "example_input": "2024-12-25",
88
- "example_output": "[DATE]"
89
- },
90
- {
91
- "find": r"^(?:4[0-9]{12}(?:[0-9]{3})?|5[1-5][0-9]{14}|3[47][0-9]{13})$",
92
- "replace": "[CREDIT_CARD]",
93
- "purpose": "Credit Card Number - Identifies Visa, Mastercard, American Express formats",
94
- "category": "validation",
95
- "example_input": "4111111111111111",
96
- "example_output": "[CREDIT_CARD]"
97
- },
98
-
99
- # Information Extraction Patterns (7 patterns)
100
- {
101
- "find": r"[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}",
102
- "replace": "[EMAIL]",
103
- "purpose": "Extract All Email Addresses - Finds emails anywhere in text",
104
- "category": "extraction",
105
- "example_input": "Contact us at support@example.com for help",
106
- "example_output": "Contact us at [EMAIL] for help"
107
- },
108
- {
109
- "find": r"https?:\/\/[^\s/$.?#].[^\s]*",
110
- "replace": "[URL]",
111
- "purpose": "Extract All URLs - Finds HTTP/HTTPS URLs in text",
112
- "category": "extraction",
113
- "example_input": "Visit https://example.com for more info",
114
- "example_output": "Visit [URL] for more info"
115
- },
116
- {
117
- "find": r"(?<=\s|^)#(\w+)",
118
- "replace": "#[HASHTAG]",
119
- "purpose": "Extract Hashtags - Finds social media hashtags",
120
- "category": "extraction",
121
- "example_input": "Love this #python tutorial!",
122
- "example_output": "Love this #[HASHTAG] tutorial!"
123
- },
124
- {
125
- "find": r"(?<=\s|^)@(\w{1,15})\b",
126
- "replace": "@[MENTION]",
127
- "purpose": "Extract @Mentions - Finds social media mentions (1-15 chars)",
128
- "category": "extraction",
129
- "example_input": "Thanks @john for the help!",
130
- "example_output": "Thanks @[MENTION] for the help!"
131
- },
132
- {
133
- "find": r"^(?P<ip>[\d.]+) (?P<identd>\S+) (?P<user>\S+) \[(?P<timestamp>.*?)\] \"(?P<request>.*?)\" (?P<status_code>\d{3}) (?P<size>\d+|-).*$",
134
- "replace": "[LOG_ENTRY]",
135
- "purpose": "Log File Parsing - Parses Apache/Nginx log entries with named groups",
136
- "category": "extraction",
137
- "example_input": '127.0.0.1 - - [10/Oct/2023:13:55:36 +0000] "GET /index.html HTTP/1.1" 200 42',
138
- "example_output": "[LOG_ENTRY]"
139
- },
140
- {
141
- "find": r'(?:^|,)("(?:[^"]|"")*"|[^,]*)',
142
- "replace": "[CSV_FIELD]",
143
- "purpose": "Simple CSV Parsing - Handles quoted fields with commas",
144
- "category": "extraction",
145
- "example_input": 'field1,"field,2",field3',
146
- "example_output": "[CSV_FIELD][CSV_FIELD][CSV_FIELD]"
147
- },
148
- {
149
- "find": r"<h1.*?>(.*?)<\/h1>",
150
- "replace": r"\1",
151
- "purpose": "HTML Tag Content - Extracts content from H1 tags",
152
- "category": "extraction",
153
- "example_input": '<h1 class="title">Welcome</h1>',
154
- "example_output": "Welcome"
155
- },
156
-
157
- # Text Cleaning Patterns (5 patterns)
158
- {
159
- "find": r"<[^<]+?>",
160
- "replace": "",
161
- "purpose": "Strip HTML Tags - Removes all HTML tags from text",
162
- "category": "cleaning",
163
- "example_input": "<p>This is <b>bold</b> text.</p>",
164
- "example_output": "This is bold text."
165
- },
166
- {
167
- "find": r"\b(\w+)\s+\1\b",
168
- "replace": r"\1",
169
- "purpose": "Remove Duplicate Words - Removes consecutive duplicate words",
170
- "category": "cleaning",
171
- "example_input": "This is is a test",
172
- "example_output": "This is a test"
173
- },
174
- {
175
- "find": r"^\s+|\s+$",
176
- "replace": "",
177
- "purpose": "Trim Whitespace - Removes leading and trailing whitespace",
178
- "category": "cleaning",
179
- "example_input": " text with spaces ",
180
- "example_output": "text with spaces"
181
- },
182
- {
183
- "find": r"^\(?(\d{3})\)?[-.\s]?(\d{3})[-.\s]?(\d{4})$",
184
- "replace": r"\1-\2-\3",
185
- "purpose": "Normalize Phone Numbers - Converts to XXX-XXX-XXXX format",
186
- "category": "cleaning",
187
- "example_input": "(123) 456.7890",
188
- "example_output": "123-456-7890"
189
- },
190
- {
191
- "find": r"\b(\d{4}[- ]?){3}(\d{4})\b",
192
- "replace": r"XXXX-XXXX-XXXX-\2",
193
- "purpose": "Mask Sensitive Data - Masks credit card numbers, shows last 4 digits",
194
- "category": "cleaning",
195
- "example_input": "4111-1111-1111-1111",
196
- "example_output": "XXXX-XXXX-XXXX-1111"
197
- }
198
- ]
199
-
200
- def get_all_patterns(self) -> List[Dict[str, str]]:
201
- """Returns all patterns in the library."""
202
- return self.patterns
203
-
204
- def get_patterns_by_category(self, category: str) -> List[Dict[str, str]]:
205
- """
206
- Returns patterns filtered by category.
207
-
208
- Args:
209
- category: One of 'validation', 'extraction', 'cleaning'
210
- """
211
- return [p for p in self.patterns if p.get('category') == category]
212
-
213
- def get_pattern_by_purpose(self, purpose_keyword: str) -> List[Dict[str, str]]:
214
- """
215
- Returns patterns that match a purpose keyword.
216
-
217
- Args:
218
- purpose_keyword: Keyword to search for in pattern purposes
219
- """
220
- return [p for p in self.patterns if purpose_keyword.lower() in p.get('purpose', '').lower()]
221
-
222
- def get_validation_patterns(self) -> List[Dict[str, str]]:
223
- """Returns all validation patterns."""
224
- return self.get_patterns_by_category('validation')
225
-
226
- def get_extraction_patterns(self) -> List[Dict[str, str]]:
227
- """Returns all extraction patterns."""
228
- return self.get_patterns_by_category('extraction')
229
-
230
- def get_cleaning_patterns(self) -> List[Dict[str, str]]:
231
- """Returns all cleaning patterns."""
232
- return self.get_patterns_by_category('cleaning')
233
-
234
- def update_settings_file(self, settings_file: str = "settings.json") -> bool:
235
- """
236
- Updates the settings.json file with the pattern library.
237
- Only updates if the pattern library is empty or has fewer than 10 patterns.
238
-
239
- Args:
240
- settings_file: Path to the settings.json file
241
-
242
- Returns:
243
- bool: True if updated, False if no update was needed
244
- """
245
- try:
246
- # Check if settings.json exists
247
- if not os.path.exists(settings_file):
248
- print(f"{settings_file} not found. Creating new file with pattern library.")
249
- settings = {"pattern_library": self._convert_to_settings_format()}
250
- else:
251
- # Load existing settings
252
- with open(settings_file, 'r', encoding='utf-8') as f:
253
- settings = json.load(f)
254
-
255
- # Check if pattern_library exists and if it needs updating
256
- if "pattern_library" not in settings:
257
- print("No pattern_library found. Adding complete pattern library.")
258
- settings["pattern_library"] = self._convert_to_settings_format()
259
- else:
260
- current_patterns = settings["pattern_library"]
261
- if len(current_patterns) < 10:
262
- print(f"Pattern library has only {len(current_patterns)} patterns. Updating with complete library.")
263
- settings["pattern_library"] = self._convert_to_settings_format()
264
- else:
265
- print(f"Pattern library already has {len(current_patterns)} patterns. No update needed.")
266
- return False
267
-
268
- # Save updated settings
269
- with open(settings_file, 'w', encoding='utf-8') as f:
270
- json.dump(settings, f, indent=4, ensure_ascii=False)
271
- print(f"Successfully updated {settings_file} with {len(settings['pattern_library'])} regex patterns.")
272
- return True
273
-
274
- except Exception as e:
275
- print(f"Error updating settings file: {e}")
276
- return False
277
-
278
- def _convert_to_settings_format(self) -> List[Dict[str, str]]:
279
- """
280
- Converts the internal pattern format to the settings.json format.
281
- """
282
- return [
283
- {
284
- "find": pattern["find"],
285
- "replace": pattern["replace"],
286
- "purpose": pattern["purpose"]
287
- }
288
- for pattern in self.patterns
289
- ]
290
-
291
- def get_pattern_categories(self) -> List[str]:
292
- """Returns a list of all available categories."""
293
- categories = set()
294
- for pattern in self.patterns:
295
- if 'category' in pattern:
296
- categories.add(pattern['category'])
297
- return sorted(list(categories))
298
-
299
- def search_patterns(self, query: str) -> List[Dict[str, str]]:
300
- """
301
- Searches patterns by query string in purpose or find pattern.
302
-
303
- Args:
304
- query: Search query
305
-
306
- Returns:
307
- List of matching patterns
308
- """
309
- query = query.lower()
310
- results = []
311
-
312
- for pattern in self.patterns:
313
- if (query in pattern.get('purpose', '').lower() or
314
- query in pattern.get('find', '').lower()):
315
- results.append(pattern)
316
-
317
- return results
318
-
319
- # Convenience functions for direct use
320
- def get_all_regex_patterns() -> List[Dict[str, str]]:
321
- """Returns all 20 regex patterns from the library."""
322
- library = RegexPatternLibrary()
323
- return library.get_all_patterns()
324
-
325
- def update_pattern_library_in_settings(settings_file: str = "settings.json") -> bool:
326
- """
327
- Updates the pattern library in settings.json if needed.
328
-
329
- Args:
330
- settings_file: Path to settings file
331
-
332
- Returns:
333
- bool: True if updated, False if no update needed
334
- """
335
- library = RegexPatternLibrary()
336
- return library.update_settings_file(settings_file)
337
-
338
- if __name__ == "__main__":
339
- # Demo usage
340
- library = RegexPatternLibrary()
341
-
342
- print("=== Regex Pattern Library Demo ===")
343
- print(f"Total patterns: {len(library.get_all_patterns())}")
344
- print(f"Categories: {', '.join(library.get_pattern_categories())}")
345
-
346
- print("\n=== Validation Patterns ===")
347
- for pattern in library.get_validation_patterns():
348
- print(f"- {pattern['purpose']}")
349
-
350
- print("\n=== Updating settings.json ===")
1
+ #!/usr/bin/env python3
2
+ """
3
+ Regex Pattern Library Module
4
+
5
+ This module provides the 20 regex use cases extracted
6
+
7
+ Usage:
8
+ from regex_pattern_library import RegexPatternLibrary
9
+
10
+ library = RegexPatternLibrary()
11
+ patterns = library.get_all_patterns()
12
+ validation_patterns = library.get_patterns_by_category("validation")
13
+ """
14
+
15
+ import json
16
+ import os
17
+ from typing import List, Dict, Optional
18
+
19
+ class RegexPatternLibrary:
20
+ """
21
+ A comprehensive library of regex patterns for common text processing tasks.
22
+ Based on the 20 use cases from the RegexUseCases.md document.
23
+ """
24
+
25
+ def __init__(self):
26
+ self.patterns = self._get_default_patterns()
27
+
28
+ def _get_default_patterns(self) -> List[Dict[str, str]]:
29
+ """
30
+ Returns the complete list of 20 regex patterns organized by category.
31
+ """
32
+ return [
33
+ # Data Validation Patterns (8 patterns)
34
+ {
35
+ "find": r"^[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}$",
36
+ "replace": "[EMAIL]",
37
+ "purpose": "Email Address Validation - Validates standard email format",
38
+ "category": "validation",
39
+ "example_input": "user@example.com",
40
+ "example_output": "[EMAIL]"
41
+ },
42
+ {
43
+ "find": r"^(?=.*[a-z])(?=.*[A-Z])(?=.*\d)(?=.*[@$!%*?&])[A-Za-z\d@$!%*?&]{8,}$",
44
+ "replace": "[STRONG_PASSWORD]",
45
+ "purpose": "Password Strength - Min 8 chars, uppercase, lowercase, digit, special char",
46
+ "category": "validation",
47
+ "example_input": "MyPass123!",
48
+ "example_output": "[STRONG_PASSWORD]"
49
+ },
50
+ {
51
+ "find": r"^\(?(\d{3})\)?[-.\s]?(\d{3})[-.\s]?(\d{4})$",
52
+ "replace": r"(\1) \2-\3",
53
+ "purpose": "North American Phone Number - Validates and formats 10-digit phone numbers",
54
+ "category": "validation",
55
+ "example_input": "123-456-7890",
56
+ "example_output": "(123) 456-7890"
57
+ },
58
+ {
59
+ "find": r"^https?:\/\/(?:www\.)?[-a-zA-Z0-9@:%.\_\+~#=]{1,256}\.[a-zA-Z0-9()]{1,6}\b([-a-zA-Z0-9()@:%_\+.~#?&//=]*)$",
60
+ "replace": "[URL]",
61
+ "purpose": "URL Structure Validation - Validates HTTP/HTTPS URLs",
62
+ "category": "validation",
63
+ "example_input": "https://www.example.com/path",
64
+ "example_output": "[URL]"
65
+ },
66
+ {
67
+ "find": r"^[a-zA-Z0-9]([._-](?![._-])|[a-zA-Z0-9]){3,18}[a-zA-Z0-9]$",
68
+ "replace": "[USERNAME]",
69
+ "purpose": "Username Format - 5-20 chars, alphanumeric start/end, no consecutive special chars",
70
+ "category": "validation",
71
+ "example_input": "user_name123",
72
+ "example_output": "[USERNAME]"
73
+ },
74
+ {
75
+ "find": r"^((25[0-5]|2[0-4][0-9]|1?[0-9][0-9]?)\.){3}(25[0-5]|2[0-4][0-9]|1?[0-9][0-9]?)$",
76
+ "replace": "[IP_ADDRESS]",
77
+ "purpose": "IPv4 Address Validation - Validates IP addresses (0-255 per octet)",
78
+ "category": "validation",
79
+ "example_input": "192.168.1.1",
80
+ "example_output": "[IP_ADDRESS]"
81
+ },
82
+ {
83
+ "find": r"^\d{4}-(0[1-9]|1[0-2])-(0[1-9]|[12]\d|3[01])$",
84
+ "replace": "[DATE]",
85
+ "purpose": "YYYY-MM-DD Date Format - Validates ISO date format",
86
+ "category": "validation",
87
+ "example_input": "2024-12-25",
88
+ "example_output": "[DATE]"
89
+ },
90
+ {
91
+ "find": r"^(?:4[0-9]{12}(?:[0-9]{3})?|5[1-5][0-9]{14}|3[47][0-9]{13})$",
92
+ "replace": "[CREDIT_CARD]",
93
+ "purpose": "Credit Card Number - Identifies Visa, Mastercard, American Express formats",
94
+ "category": "validation",
95
+ "example_input": "4111111111111111",
96
+ "example_output": "[CREDIT_CARD]"
97
+ },
98
+
99
+ # Information Extraction Patterns (7 patterns)
100
+ {
101
+ "find": r"[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}",
102
+ "replace": "[EMAIL]",
103
+ "purpose": "Extract All Email Addresses - Finds emails anywhere in text",
104
+ "category": "extraction",
105
+ "example_input": "Contact us at support@example.com for help",
106
+ "example_output": "Contact us at [EMAIL] for help"
107
+ },
108
+ {
109
+ "find": r"https?:\/\/[^\s/$.?#].[^\s]*",
110
+ "replace": "[URL]",
111
+ "purpose": "Extract All URLs - Finds HTTP/HTTPS URLs in text",
112
+ "category": "extraction",
113
+ "example_input": "Visit https://example.com for more info",
114
+ "example_output": "Visit [URL] for more info"
115
+ },
116
+ {
117
+ "find": r"(?<=\s|^)#(\w+)",
118
+ "replace": "#[HASHTAG]",
119
+ "purpose": "Extract Hashtags - Finds social media hashtags",
120
+ "category": "extraction",
121
+ "example_input": "Love this #python tutorial!",
122
+ "example_output": "Love this #[HASHTAG] tutorial!"
123
+ },
124
+ {
125
+ "find": r"(?<=\s|^)@(\w{1,15})\b",
126
+ "replace": "@[MENTION]",
127
+ "purpose": "Extract @Mentions - Finds social media mentions (1-15 chars)",
128
+ "category": "extraction",
129
+ "example_input": "Thanks @john for the help!",
130
+ "example_output": "Thanks @[MENTION] for the help!"
131
+ },
132
+ {
133
+ "find": r"^(?P<ip>[\d.]+) (?P<identd>\S+) (?P<user>\S+) \[(?P<timestamp>.*?)\] \"(?P<request>.*?)\" (?P<status_code>\d{3}) (?P<size>\d+|-).*$",
134
+ "replace": "[LOG_ENTRY]",
135
+ "purpose": "Log File Parsing - Parses Apache/Nginx log entries with named groups",
136
+ "category": "extraction",
137
+ "example_input": '127.0.0.1 - - [10/Oct/2023:13:55:36 +0000] "GET /index.html HTTP/1.1" 200 42',
138
+ "example_output": "[LOG_ENTRY]"
139
+ },
140
+ {
141
+ "find": r'(?:^|,)("(?:[^"]|"")*"|[^,]*)',
142
+ "replace": "[CSV_FIELD]",
143
+ "purpose": "Simple CSV Parsing - Handles quoted fields with commas",
144
+ "category": "extraction",
145
+ "example_input": 'field1,"field,2",field3',
146
+ "example_output": "[CSV_FIELD][CSV_FIELD][CSV_FIELD]"
147
+ },
148
+ {
149
+ "find": r"<h1.*?>(.*?)<\/h1>",
150
+ "replace": r"\1",
151
+ "purpose": "HTML Tag Content - Extracts content from H1 tags",
152
+ "category": "extraction",
153
+ "example_input": '<h1 class="title">Welcome</h1>',
154
+ "example_output": "Welcome"
155
+ },
156
+
157
+ # Text Cleaning Patterns (5 patterns)
158
+ {
159
+ "find": r"<[^<]+?>",
160
+ "replace": "",
161
+ "purpose": "Strip HTML Tags - Removes all HTML tags from text",
162
+ "category": "cleaning",
163
+ "example_input": "<p>This is <b>bold</b> text.</p>",
164
+ "example_output": "This is bold text."
165
+ },
166
+ {
167
+ "find": r"\b(\w+)\s+\1\b",
168
+ "replace": r"\1",
169
+ "purpose": "Remove Duplicate Words - Removes consecutive duplicate words",
170
+ "category": "cleaning",
171
+ "example_input": "This is is a test",
172
+ "example_output": "This is a test"
173
+ },
174
+ {
175
+ "find": r"^\s+|\s+$",
176
+ "replace": "",
177
+ "purpose": "Trim Whitespace - Removes leading and trailing whitespace",
178
+ "category": "cleaning",
179
+ "example_input": " text with spaces ",
180
+ "example_output": "text with spaces"
181
+ },
182
+ {
183
+ "find": r"^\(?(\d{3})\)?[-.\s]?(\d{3})[-.\s]?(\d{4})$",
184
+ "replace": r"\1-\2-\3",
185
+ "purpose": "Normalize Phone Numbers - Converts to XXX-XXX-XXXX format",
186
+ "category": "cleaning",
187
+ "example_input": "(123) 456.7890",
188
+ "example_output": "123-456-7890"
189
+ },
190
+ {
191
+ "find": r"\b(\d{4}[- ]?){3}(\d{4})\b",
192
+ "replace": r"XXXX-XXXX-XXXX-\2",
193
+ "purpose": "Mask Sensitive Data - Masks credit card numbers, shows last 4 digits",
194
+ "category": "cleaning",
195
+ "example_input": "4111-1111-1111-1111",
196
+ "example_output": "XXXX-XXXX-XXXX-1111"
197
+ }
198
+ ]
199
+
200
+ def get_all_patterns(self) -> List[Dict[str, str]]:
201
+ """Returns all patterns in the library."""
202
+ return self.patterns
203
+
204
+ def get_patterns_by_category(self, category: str) -> List[Dict[str, str]]:
205
+ """
206
+ Returns patterns filtered by category.
207
+
208
+ Args:
209
+ category: One of 'validation', 'extraction', 'cleaning'
210
+ """
211
+ return [p for p in self.patterns if p.get('category') == category]
212
+
213
+ def get_pattern_by_purpose(self, purpose_keyword: str) -> List[Dict[str, str]]:
214
+ """
215
+ Returns patterns that match a purpose keyword.
216
+
217
+ Args:
218
+ purpose_keyword: Keyword to search for in pattern purposes
219
+ """
220
+ return [p for p in self.patterns if purpose_keyword.lower() in p.get('purpose', '').lower()]
221
+
222
+ def get_validation_patterns(self) -> List[Dict[str, str]]:
223
+ """Returns all validation patterns."""
224
+ return self.get_patterns_by_category('validation')
225
+
226
+ def get_extraction_patterns(self) -> List[Dict[str, str]]:
227
+ """Returns all extraction patterns."""
228
+ return self.get_patterns_by_category('extraction')
229
+
230
+ def get_cleaning_patterns(self) -> List[Dict[str, str]]:
231
+ """Returns all cleaning patterns."""
232
+ return self.get_patterns_by_category('cleaning')
233
+
234
+ def update_settings_file(self, settings_file: str = "settings.json") -> bool:
235
+ """
236
+ Updates the settings.json file with the pattern library.
237
+ Only updates if the pattern library is empty or has fewer than 10 patterns.
238
+
239
+ Args:
240
+ settings_file: Path to the settings.json file
241
+
242
+ Returns:
243
+ bool: True if updated, False if no update was needed
244
+ """
245
+ try:
246
+ # Check if settings.json exists
247
+ if not os.path.exists(settings_file):
248
+ print(f"{settings_file} not found. Creating new file with pattern library.")
249
+ settings = {"pattern_library": self._convert_to_settings_format()}
250
+ else:
251
+ # Load existing settings
252
+ with open(settings_file, 'r', encoding='utf-8') as f:
253
+ settings = json.load(f)
254
+
255
+ # Check if pattern_library exists and if it needs updating
256
+ if "pattern_library" not in settings:
257
+ print("No pattern_library found. Adding complete pattern library.")
258
+ settings["pattern_library"] = self._convert_to_settings_format()
259
+ else:
260
+ current_patterns = settings["pattern_library"]
261
+ if len(current_patterns) < 10:
262
+ print(f"Pattern library has only {len(current_patterns)} patterns. Updating with complete library.")
263
+ settings["pattern_library"] = self._convert_to_settings_format()
264
+ else:
265
+ print(f"Pattern library already has {len(current_patterns)} patterns. No update needed.")
266
+ return False
267
+
268
+ # Save updated settings
269
+ with open(settings_file, 'w', encoding='utf-8') as f:
270
+ json.dump(settings, f, indent=4, ensure_ascii=False)
271
+ print(f"Successfully updated {settings_file} with {len(settings['pattern_library'])} regex patterns.")
272
+ return True
273
+
274
+ except Exception as e:
275
+ print(f"Error updating settings file: {e}")
276
+ return False
277
+
278
+ def _convert_to_settings_format(self) -> List[Dict[str, str]]:
279
+ """
280
+ Converts the internal pattern format to the settings.json format.
281
+ """
282
+ return [
283
+ {
284
+ "find": pattern["find"],
285
+ "replace": pattern["replace"],
286
+ "purpose": pattern["purpose"]
287
+ }
288
+ for pattern in self.patterns
289
+ ]
290
+
291
+ def get_pattern_categories(self) -> List[str]:
292
+ """Returns a list of all available categories."""
293
+ categories = set()
294
+ for pattern in self.patterns:
295
+ if 'category' in pattern:
296
+ categories.add(pattern['category'])
297
+ return sorted(list(categories))
298
+
299
+ def search_patterns(self, query: str) -> List[Dict[str, str]]:
300
+ """
301
+ Searches patterns by query string in purpose or find pattern.
302
+
303
+ Args:
304
+ query: Search query
305
+
306
+ Returns:
307
+ List of matching patterns
308
+ """
309
+ query = query.lower()
310
+ results = []
311
+
312
+ for pattern in self.patterns:
313
+ if (query in pattern.get('purpose', '').lower() or
314
+ query in pattern.get('find', '').lower()):
315
+ results.append(pattern)
316
+
317
+ return results
318
+
319
+ # Convenience functions for direct use
320
+ def get_all_regex_patterns() -> List[Dict[str, str]]:
321
+ """Returns all 20 regex patterns from the library."""
322
+ library = RegexPatternLibrary()
323
+ return library.get_all_patterns()
324
+
325
+ def update_pattern_library_in_settings(settings_file: str = "settings.json") -> bool:
326
+ """
327
+ Updates the pattern library in settings.json if needed.
328
+
329
+ Args:
330
+ settings_file: Path to settings file
331
+
332
+ Returns:
333
+ bool: True if updated, False if no update needed
334
+ """
335
+ library = RegexPatternLibrary()
336
+ return library.update_settings_file(settings_file)
337
+
338
+ if __name__ == "__main__":
339
+ # Demo usage
340
+ library = RegexPatternLibrary()
341
+
342
+ print("=== Regex Pattern Library Demo ===")
343
+ print(f"Total patterns: {len(library.get_all_patterns())}")
344
+ print(f"Categories: {', '.join(library.get_pattern_categories())}")
345
+
346
+ print("\n=== Validation Patterns ===")
347
+ for pattern in library.get_validation_patterns():
348
+ print(f"- {pattern['purpose']}")
349
+
350
+ print("\n=== Updating settings.json ===")
351
351
  library.update_settings_file()