pomera-ai-commander 1.1.1 → 1.2.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (213) hide show
  1. package/LICENSE +21 -21
  2. package/README.md +105 -680
  3. package/bin/pomera-ai-commander.js +62 -62
  4. package/core/__init__.py +65 -65
  5. package/core/app_context.py +482 -482
  6. package/core/async_text_processor.py +421 -421
  7. package/core/backup_manager.py +655 -655
  8. package/core/backup_recovery_manager.py +1199 -1033
  9. package/core/content_hash_cache.py +508 -508
  10. package/core/context_menu.py +313 -313
  11. package/core/data_directory.py +549 -0
  12. package/core/data_validator.py +1066 -1066
  13. package/core/database_connection_manager.py +744 -744
  14. package/core/database_curl_settings_manager.py +608 -608
  15. package/core/database_promera_ai_settings_manager.py +446 -446
  16. package/core/database_schema.py +411 -411
  17. package/core/database_schema_manager.py +395 -395
  18. package/core/database_settings_manager.py +1507 -1507
  19. package/core/database_settings_manager_interface.py +456 -456
  20. package/core/dialog_manager.py +734 -734
  21. package/core/diff_utils.py +239 -0
  22. package/core/efficient_line_numbers.py +540 -510
  23. package/core/error_handler.py +746 -746
  24. package/core/error_service.py +431 -431
  25. package/core/event_consolidator.py +511 -511
  26. package/core/mcp/__init__.py +43 -43
  27. package/core/mcp/find_replace_diff.py +334 -0
  28. package/core/mcp/protocol.py +288 -288
  29. package/core/mcp/schema.py +251 -251
  30. package/core/mcp/server_stdio.py +299 -299
  31. package/core/mcp/tool_registry.py +2699 -2345
  32. package/core/memento.py +275 -0
  33. package/core/memory_efficient_text_widget.py +711 -711
  34. package/core/migration_manager.py +914 -914
  35. package/core/migration_test_suite.py +1085 -1085
  36. package/core/migration_validator.py +1143 -1143
  37. package/core/optimized_find_replace.py +714 -714
  38. package/core/optimized_pattern_engine.py +424 -424
  39. package/core/optimized_search_highlighter.py +552 -552
  40. package/core/performance_monitor.py +674 -674
  41. package/core/persistence_manager.py +712 -712
  42. package/core/progressive_stats_calculator.py +632 -632
  43. package/core/regex_pattern_cache.py +529 -529
  44. package/core/regex_pattern_library.py +350 -350
  45. package/core/search_operation_manager.py +434 -434
  46. package/core/settings_defaults_registry.py +1087 -1087
  47. package/core/settings_integrity_validator.py +1111 -1111
  48. package/core/settings_serializer.py +557 -557
  49. package/core/settings_validator.py +1823 -1823
  50. package/core/smart_stats_calculator.py +709 -709
  51. package/core/statistics_update_manager.py +619 -619
  52. package/core/stats_config_manager.py +858 -858
  53. package/core/streaming_text_handler.py +723 -723
  54. package/core/task_scheduler.py +596 -596
  55. package/core/update_pattern_library.py +168 -168
  56. package/core/visibility_monitor.py +596 -596
  57. package/core/widget_cache.py +498 -498
  58. package/mcp.json +51 -61
  59. package/migrate_data.py +127 -0
  60. package/package.json +64 -57
  61. package/pomera.py +7883 -7482
  62. package/pomera_mcp_server.py +183 -144
  63. package/requirements.txt +33 -0
  64. package/scripts/Dockerfile.alpine +43 -0
  65. package/scripts/Dockerfile.gui-test +54 -0
  66. package/scripts/Dockerfile.linux +43 -0
  67. package/scripts/Dockerfile.test-linux +80 -0
  68. package/scripts/Dockerfile.ubuntu +39 -0
  69. package/scripts/README.md +53 -0
  70. package/scripts/build-all.bat +113 -0
  71. package/scripts/build-docker.bat +53 -0
  72. package/scripts/build-docker.sh +55 -0
  73. package/scripts/build-optimized.bat +101 -0
  74. package/scripts/build.sh +78 -0
  75. package/scripts/docker-compose.test.yml +27 -0
  76. package/scripts/docker-compose.yml +32 -0
  77. package/scripts/postinstall.js +62 -0
  78. package/scripts/requirements-minimal.txt +33 -0
  79. package/scripts/test-linux-simple.bat +28 -0
  80. package/scripts/validate-release-workflow.py +450 -0
  81. package/tools/__init__.py +4 -4
  82. package/tools/ai_tools.py +2891 -2891
  83. package/tools/ascii_art_generator.py +352 -352
  84. package/tools/base64_tools.py +183 -183
  85. package/tools/base_tool.py +511 -511
  86. package/tools/case_tool.py +308 -308
  87. package/tools/column_tools.py +395 -395
  88. package/tools/cron_tool.py +884 -884
  89. package/tools/curl_history.py +600 -600
  90. package/tools/curl_processor.py +1207 -1207
  91. package/tools/curl_settings.py +502 -502
  92. package/tools/curl_tool.py +5467 -5467
  93. package/tools/diff_viewer.py +1817 -1072
  94. package/tools/email_extraction_tool.py +248 -248
  95. package/tools/email_header_analyzer.py +425 -425
  96. package/tools/extraction_tools.py +250 -250
  97. package/tools/find_replace.py +2289 -1750
  98. package/tools/folder_file_reporter.py +1463 -1463
  99. package/tools/folder_file_reporter_adapter.py +480 -480
  100. package/tools/generator_tools.py +1216 -1216
  101. package/tools/hash_generator.py +255 -255
  102. package/tools/html_tool.py +656 -656
  103. package/tools/jsonxml_tool.py +729 -729
  104. package/tools/line_tools.py +419 -419
  105. package/tools/markdown_tools.py +561 -561
  106. package/tools/mcp_widget.py +1417 -1417
  107. package/tools/notes_widget.py +978 -973
  108. package/tools/number_base_converter.py +372 -372
  109. package/tools/regex_extractor.py +571 -571
  110. package/tools/slug_generator.py +310 -310
  111. package/tools/sorter_tools.py +458 -458
  112. package/tools/string_escape_tool.py +392 -392
  113. package/tools/text_statistics_tool.py +365 -365
  114. package/tools/text_wrapper.py +430 -430
  115. package/tools/timestamp_converter.py +421 -421
  116. package/tools/tool_loader.py +710 -710
  117. package/tools/translator_tools.py +522 -522
  118. package/tools/url_link_extractor.py +261 -261
  119. package/tools/url_parser.py +204 -204
  120. package/tools/whitespace_tools.py +355 -355
  121. package/tools/word_frequency_counter.py +146 -146
  122. package/core/__pycache__/__init__.cpython-313.pyc +0 -0
  123. package/core/__pycache__/app_context.cpython-313.pyc +0 -0
  124. package/core/__pycache__/async_text_processor.cpython-313.pyc +0 -0
  125. package/core/__pycache__/backup_manager.cpython-313.pyc +0 -0
  126. package/core/__pycache__/backup_recovery_manager.cpython-313.pyc +0 -0
  127. package/core/__pycache__/content_hash_cache.cpython-313.pyc +0 -0
  128. package/core/__pycache__/context_menu.cpython-313.pyc +0 -0
  129. package/core/__pycache__/data_validator.cpython-313.pyc +0 -0
  130. package/core/__pycache__/database_connection_manager.cpython-313.pyc +0 -0
  131. package/core/__pycache__/database_curl_settings_manager.cpython-313.pyc +0 -0
  132. package/core/__pycache__/database_promera_ai_settings_manager.cpython-313.pyc +0 -0
  133. package/core/__pycache__/database_schema.cpython-313.pyc +0 -0
  134. package/core/__pycache__/database_schema_manager.cpython-313.pyc +0 -0
  135. package/core/__pycache__/database_settings_manager.cpython-313.pyc +0 -0
  136. package/core/__pycache__/database_settings_manager_interface.cpython-313.pyc +0 -0
  137. package/core/__pycache__/dialog_manager.cpython-313.pyc +0 -0
  138. package/core/__pycache__/efficient_line_numbers.cpython-313.pyc +0 -0
  139. package/core/__pycache__/error_handler.cpython-313.pyc +0 -0
  140. package/core/__pycache__/error_service.cpython-313.pyc +0 -0
  141. package/core/__pycache__/event_consolidator.cpython-313.pyc +0 -0
  142. package/core/__pycache__/memory_efficient_text_widget.cpython-313.pyc +0 -0
  143. package/core/__pycache__/migration_manager.cpython-313.pyc +0 -0
  144. package/core/__pycache__/migration_test_suite.cpython-313.pyc +0 -0
  145. package/core/__pycache__/migration_validator.cpython-313.pyc +0 -0
  146. package/core/__pycache__/optimized_find_replace.cpython-313.pyc +0 -0
  147. package/core/__pycache__/optimized_pattern_engine.cpython-313.pyc +0 -0
  148. package/core/__pycache__/optimized_search_highlighter.cpython-313.pyc +0 -0
  149. package/core/__pycache__/performance_monitor.cpython-313.pyc +0 -0
  150. package/core/__pycache__/persistence_manager.cpython-313.pyc +0 -0
  151. package/core/__pycache__/progressive_stats_calculator.cpython-313.pyc +0 -0
  152. package/core/__pycache__/regex_pattern_cache.cpython-313.pyc +0 -0
  153. package/core/__pycache__/regex_pattern_library.cpython-313.pyc +0 -0
  154. package/core/__pycache__/search_operation_manager.cpython-313.pyc +0 -0
  155. package/core/__pycache__/settings_defaults_registry.cpython-313.pyc +0 -0
  156. package/core/__pycache__/settings_integrity_validator.cpython-313.pyc +0 -0
  157. package/core/__pycache__/settings_serializer.cpython-313.pyc +0 -0
  158. package/core/__pycache__/settings_validator.cpython-313.pyc +0 -0
  159. package/core/__pycache__/smart_stats_calculator.cpython-313.pyc +0 -0
  160. package/core/__pycache__/statistics_update_manager.cpython-313.pyc +0 -0
  161. package/core/__pycache__/stats_config_manager.cpython-313.pyc +0 -0
  162. package/core/__pycache__/streaming_text_handler.cpython-313.pyc +0 -0
  163. package/core/__pycache__/task_scheduler.cpython-313.pyc +0 -0
  164. package/core/__pycache__/visibility_monitor.cpython-313.pyc +0 -0
  165. package/core/__pycache__/widget_cache.cpython-313.pyc +0 -0
  166. package/core/mcp/__pycache__/__init__.cpython-313.pyc +0 -0
  167. package/core/mcp/__pycache__/protocol.cpython-313.pyc +0 -0
  168. package/core/mcp/__pycache__/schema.cpython-313.pyc +0 -0
  169. package/core/mcp/__pycache__/server_stdio.cpython-313.pyc +0 -0
  170. package/core/mcp/__pycache__/tool_registry.cpython-313.pyc +0 -0
  171. package/tools/__pycache__/__init__.cpython-313.pyc +0 -0
  172. package/tools/__pycache__/ai_tools.cpython-313.pyc +0 -0
  173. package/tools/__pycache__/ascii_art_generator.cpython-313.pyc +0 -0
  174. package/tools/__pycache__/base64_tools.cpython-313.pyc +0 -0
  175. package/tools/__pycache__/base_tool.cpython-313.pyc +0 -0
  176. package/tools/__pycache__/case_tool.cpython-313.pyc +0 -0
  177. package/tools/__pycache__/column_tools.cpython-313.pyc +0 -0
  178. package/tools/__pycache__/cron_tool.cpython-313.pyc +0 -0
  179. package/tools/__pycache__/curl_history.cpython-313.pyc +0 -0
  180. package/tools/__pycache__/curl_processor.cpython-313.pyc +0 -0
  181. package/tools/__pycache__/curl_settings.cpython-313.pyc +0 -0
  182. package/tools/__pycache__/curl_tool.cpython-313.pyc +0 -0
  183. package/tools/__pycache__/diff_viewer.cpython-313.pyc +0 -0
  184. package/tools/__pycache__/email_extraction_tool.cpython-313.pyc +0 -0
  185. package/tools/__pycache__/email_header_analyzer.cpython-313.pyc +0 -0
  186. package/tools/__pycache__/extraction_tools.cpython-313.pyc +0 -0
  187. package/tools/__pycache__/find_replace.cpython-313.pyc +0 -0
  188. package/tools/__pycache__/folder_file_reporter.cpython-313.pyc +0 -0
  189. package/tools/__pycache__/folder_file_reporter_adapter.cpython-313.pyc +0 -0
  190. package/tools/__pycache__/generator_tools.cpython-313.pyc +0 -0
  191. package/tools/__pycache__/hash_generator.cpython-313.pyc +0 -0
  192. package/tools/__pycache__/html_tool.cpython-313.pyc +0 -0
  193. package/tools/__pycache__/huggingface_helper.cpython-313.pyc +0 -0
  194. package/tools/__pycache__/jsonxml_tool.cpython-313.pyc +0 -0
  195. package/tools/__pycache__/line_tools.cpython-313.pyc +0 -0
  196. package/tools/__pycache__/list_comparator.cpython-313.pyc +0 -0
  197. package/tools/__pycache__/markdown_tools.cpython-313.pyc +0 -0
  198. package/tools/__pycache__/mcp_widget.cpython-313.pyc +0 -0
  199. package/tools/__pycache__/notes_widget.cpython-313.pyc +0 -0
  200. package/tools/__pycache__/number_base_converter.cpython-313.pyc +0 -0
  201. package/tools/__pycache__/regex_extractor.cpython-313.pyc +0 -0
  202. package/tools/__pycache__/slug_generator.cpython-313.pyc +0 -0
  203. package/tools/__pycache__/sorter_tools.cpython-313.pyc +0 -0
  204. package/tools/__pycache__/string_escape_tool.cpython-313.pyc +0 -0
  205. package/tools/__pycache__/text_statistics_tool.cpython-313.pyc +0 -0
  206. package/tools/__pycache__/text_wrapper.cpython-313.pyc +0 -0
  207. package/tools/__pycache__/timestamp_converter.cpython-313.pyc +0 -0
  208. package/tools/__pycache__/tool_loader.cpython-313.pyc +0 -0
  209. package/tools/__pycache__/translator_tools.cpython-313.pyc +0 -0
  210. package/tools/__pycache__/url_link_extractor.cpython-313.pyc +0 -0
  211. package/tools/__pycache__/url_parser.cpython-313.pyc +0 -0
  212. package/tools/__pycache__/whitespace_tools.cpython-313.pyc +0 -0
  213. package/tools/__pycache__/word_frequency_counter.cpython-313.pyc +0 -0
@@ -1,169 +1,169 @@
1
- #!/usr/bin/env python3
2
- """
3
- Script to update the pattern library in settings.json with the 20 regex use cases
4
- """
5
-
6
- import json
7
- import os
8
-
9
- def get_default_pattern_library():
10
- """
11
- Returns the 20 regex use cases extracted from RegexUseCases.md
12
- """
13
- return [
14
- # Data Validation Patterns
15
- {
16
- "find": r"^[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}$",
17
- "replace": "[EMAIL]",
18
- "purpose": "Email Address Validation - Validates standard email format"
19
- },
20
- {
21
- "find": r"^(?=.*[a-z])(?=.*[A-Z])(?=.*\d)(?=.*[@$!%*?&])[A-Za-z\d@$!%*?&]{8,}$",
22
- "replace": "[STRONG_PASSWORD]",
23
- "purpose": "Password Strength - Min 8 chars, uppercase, lowercase, digit, special char"
24
- },
25
- {
26
- "find": r"^\(?(\d{3})\)?[-.\s]?(\d{3})[-.\s]?(\d{4})$",
27
- "replace": r"(\1) \2-\3",
28
- "purpose": "North American Phone Number - Validates and formats 10-digit phone numbers"
29
- },
30
- {
31
- "find": r"^https?:\/\/(?:www\.)?[-a-zA-Z0-9@:%.\_\+~#=]{1,256}\.[a-zA-Z0-9()]{1,6}\b([-a-zA-Z0-9()@:%_\+.~#?&//=]*)$",
32
- "replace": "[URL]",
33
- "purpose": "URL Structure Validation - Validates HTTP/HTTPS URLs"
34
- },
35
- {
36
- "find": r"^[a-zA-Z0-9]([._-](?![._-])|[a-zA-Z0-9]){3,18}[a-zA-Z0-9]$",
37
- "replace": "[USERNAME]",
38
- "purpose": "Username Format - 5-20 chars, alphanumeric start/end, no consecutive special chars"
39
- },
40
- {
41
- "find": r"^((25[0-5]|2[0-4][0-9]|1?[0-9][0-9]?)\.){3}(25[0-5]|2[0-4][0-9]|1?[0-9][0-9]?)$",
42
- "replace": "[IP_ADDRESS]",
43
- "purpose": "IPv4 Address Validation - Validates IP addresses (0-255 per octet)"
44
- },
45
- {
46
- "find": r"^\d{4}-(0[1-9]|1[0-2])-(0[1-9]|[12]\d|3[01])$",
47
- "replace": "[DATE]",
48
- "purpose": "YYYY-MM-DD Date Format - Validates ISO date format"
49
- },
50
- {
51
- "find": r"^(?:4[0-9]{12}(?:[0-9]{3})?|5[1-5][0-9]{14}|3[47][0-9]{13})$",
52
- "replace": "[CREDIT_CARD]",
53
- "purpose": "Credit Card Number - Identifies Visa, Mastercard, American Express formats"
54
- },
55
-
56
- # Information Extraction Patterns
57
- {
58
- "find": r"[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}",
59
- "replace": "[EMAIL]",
60
- "purpose": "Extract All Email Addresses - Finds emails anywhere in text"
61
- },
62
- {
63
- "find": r"https?:\/\/[^\s/$.?#].[^\s]*",
64
- "replace": "[URL]",
65
- "purpose": "Extract All URLs - Finds HTTP/HTTPS URLs in text"
66
- },
67
- {
68
- "find": r"(?<=\s|^)#(\w+)",
69
- "replace": "#[HASHTAG]",
70
- "purpose": "Extract Hashtags - Finds social media hashtags"
71
- },
72
- {
73
- "find": r"(?<=\s|^)@(\w{1,15})\b",
74
- "replace": "@[MENTION]",
75
- "purpose": "Extract @Mentions - Finds social media mentions (1-15 chars)"
76
- },
77
- {
78
- "find": r"^(?P<ip>[\d.]+) (?P<identd>\S+) (?P<user>\S+) \[(?P<timestamp>.*?)\] \"(?P<request>.*?)\" (?P<status_code>\d{3}) (?P<size>\d+|-).*$",
79
- "replace": "[LOG_ENTRY]",
80
- "purpose": "Log File Parsing - Parses Apache/Nginx log entries with named groups"
81
- },
82
- {
83
- "find": r"(?:^|,)(\"(?:[^\"]|\"\")*\"|[^,]*)",
84
- "replace": "[CSV_FIELD]",
85
- "purpose": "Simple CSV Parsing - Handles quoted fields with commas"
86
- },
87
- {
88
- "find": r"<h1.*?>(.*?)<\/h1>",
89
- "replace": r"\1",
90
- "purpose": "HTML Tag Content - Extracts content from H1 tags"
91
- },
92
-
93
- # Text Cleaning Patterns
94
- {
95
- "find": r"<[^<]+?>",
96
- "replace": "",
97
- "purpose": "Strip HTML Tags - Removes all HTML tags from text"
98
- },
99
- {
100
- "find": r"\b(\w+)\s+\1\b",
101
- "replace": r"\1",
102
- "purpose": "Remove Duplicate Words - Removes consecutive duplicate words"
103
- },
104
- {
105
- "find": r"^\s+|\s+$",
106
- "replace": "",
107
- "purpose": "Trim Whitespace - Removes leading and trailing whitespace"
108
- },
109
- {
110
- "find": r"^\(?(\d{3})\)?[-.\s]?(\d{3})[-.\s]?(\d{4})$",
111
- "replace": r"\1-\2-\3",
112
- "purpose": "Normalize Phone Numbers - Converts to XXX-XXX-XXXX format"
113
- },
114
- {
115
- "find": r"\b(\d{4}[- ]?){3}(\d{4})\b",
116
- "replace": r"XXXX-XXXX-XXXX-\2",
117
- "purpose": "Mask Sensitive Data - Masks credit card numbers, shows last 4 digits"
118
- }
119
- ]
120
-
121
- def update_settings_pattern_library():
122
- """
123
- Updates the settings.json file with the complete pattern library
124
- if it's empty or if the file is being created for the first time.
125
- """
126
- settings_file = "settings.json"
127
-
128
- # Check if settings.json exists
129
- if not os.path.exists(settings_file):
130
- print("settings.json not found. Creating new file with pattern library.")
131
- settings = {
132
- "pattern_library": get_default_pattern_library()
133
- }
134
- else:
135
- # Load existing settings
136
- try:
137
- with open(settings_file, 'r', encoding='utf-8') as f:
138
- settings = json.load(f)
139
- except (json.JSONDecodeError, FileNotFoundError) as e:
140
- print(f"Error reading settings.json: {e}")
141
- print("Creating new settings with pattern library.")
142
- settings = {
143
- "pattern_library": get_default_pattern_library()
144
- }
145
-
146
- # Check if pattern_library exists and if it's empty or has only basic patterns
147
- if "pattern_library" not in settings:
148
- print("No pattern_library found. Adding complete pattern library.")
149
- settings["pattern_library"] = get_default_pattern_library()
150
- else:
151
- current_patterns = settings["pattern_library"]
152
- # Check if it's empty or has only basic patterns (less than 10 patterns)
153
- if len(current_patterns) < 10:
154
- print(f"Pattern library has only {len(current_patterns)} patterns. Updating with complete library.")
155
- settings["pattern_library"] = get_default_pattern_library()
156
- else:
157
- print(f"Pattern library already has {len(current_patterns)} patterns. No update needed.")
158
- return
159
-
160
- # Save updated settings
161
- try:
162
- with open(settings_file, 'w', encoding='utf-8') as f:
163
- json.dump(settings, f, indent=4, ensure_ascii=False)
164
- print(f"Successfully updated {settings_file} with {len(settings['pattern_library'])} regex patterns.")
165
- except Exception as e:
166
- print(f"Error writing to settings.json: {e}")
167
-
168
- if __name__ == "__main__":
1
+ #!/usr/bin/env python3
2
+ """
3
+ Script to update the pattern library in settings.json with the 20 regex use cases
4
+ """
5
+
6
+ import json
7
+ import os
8
+
9
+ def get_default_pattern_library():
10
+ """
11
+ Returns the 20 regex use cases extracted from RegexUseCases.md
12
+ """
13
+ return [
14
+ # Data Validation Patterns
15
+ {
16
+ "find": r"^[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}$",
17
+ "replace": "[EMAIL]",
18
+ "purpose": "Email Address Validation - Validates standard email format"
19
+ },
20
+ {
21
+ "find": r"^(?=.*[a-z])(?=.*[A-Z])(?=.*\d)(?=.*[@$!%*?&])[A-Za-z\d@$!%*?&]{8,}$",
22
+ "replace": "[STRONG_PASSWORD]",
23
+ "purpose": "Password Strength - Min 8 chars, uppercase, lowercase, digit, special char"
24
+ },
25
+ {
26
+ "find": r"^\(?(\d{3})\)?[-.\s]?(\d{3})[-.\s]?(\d{4})$",
27
+ "replace": r"(\1) \2-\3",
28
+ "purpose": "North American Phone Number - Validates and formats 10-digit phone numbers"
29
+ },
30
+ {
31
+ "find": r"^https?:\/\/(?:www\.)?[-a-zA-Z0-9@:%.\_\+~#=]{1,256}\.[a-zA-Z0-9()]{1,6}\b([-a-zA-Z0-9()@:%_\+.~#?&//=]*)$",
32
+ "replace": "[URL]",
33
+ "purpose": "URL Structure Validation - Validates HTTP/HTTPS URLs"
34
+ },
35
+ {
36
+ "find": r"^[a-zA-Z0-9]([._-](?![._-])|[a-zA-Z0-9]){3,18}[a-zA-Z0-9]$",
37
+ "replace": "[USERNAME]",
38
+ "purpose": "Username Format - 5-20 chars, alphanumeric start/end, no consecutive special chars"
39
+ },
40
+ {
41
+ "find": r"^((25[0-5]|2[0-4][0-9]|1?[0-9][0-9]?)\.){3}(25[0-5]|2[0-4][0-9]|1?[0-9][0-9]?)$",
42
+ "replace": "[IP_ADDRESS]",
43
+ "purpose": "IPv4 Address Validation - Validates IP addresses (0-255 per octet)"
44
+ },
45
+ {
46
+ "find": r"^\d{4}-(0[1-9]|1[0-2])-(0[1-9]|[12]\d|3[01])$",
47
+ "replace": "[DATE]",
48
+ "purpose": "YYYY-MM-DD Date Format - Validates ISO date format"
49
+ },
50
+ {
51
+ "find": r"^(?:4[0-9]{12}(?:[0-9]{3})?|5[1-5][0-9]{14}|3[47][0-9]{13})$",
52
+ "replace": "[CREDIT_CARD]",
53
+ "purpose": "Credit Card Number - Identifies Visa, Mastercard, American Express formats"
54
+ },
55
+
56
+ # Information Extraction Patterns
57
+ {
58
+ "find": r"[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}",
59
+ "replace": "[EMAIL]",
60
+ "purpose": "Extract All Email Addresses - Finds emails anywhere in text"
61
+ },
62
+ {
63
+ "find": r"https?:\/\/[^\s/$.?#].[^\s]*",
64
+ "replace": "[URL]",
65
+ "purpose": "Extract All URLs - Finds HTTP/HTTPS URLs in text"
66
+ },
67
+ {
68
+ "find": r"(?<=\s|^)#(\w+)",
69
+ "replace": "#[HASHTAG]",
70
+ "purpose": "Extract Hashtags - Finds social media hashtags"
71
+ },
72
+ {
73
+ "find": r"(?<=\s|^)@(\w{1,15})\b",
74
+ "replace": "@[MENTION]",
75
+ "purpose": "Extract @Mentions - Finds social media mentions (1-15 chars)"
76
+ },
77
+ {
78
+ "find": r"^(?P<ip>[\d.]+) (?P<identd>\S+) (?P<user>\S+) \[(?P<timestamp>.*?)\] \"(?P<request>.*?)\" (?P<status_code>\d{3}) (?P<size>\d+|-).*$",
79
+ "replace": "[LOG_ENTRY]",
80
+ "purpose": "Log File Parsing - Parses Apache/Nginx log entries with named groups"
81
+ },
82
+ {
83
+ "find": r"(?:^|,)(\"(?:[^\"]|\"\")*\"|[^,]*)",
84
+ "replace": "[CSV_FIELD]",
85
+ "purpose": "Simple CSV Parsing - Handles quoted fields with commas"
86
+ },
87
+ {
88
+ "find": r"<h1.*?>(.*?)<\/h1>",
89
+ "replace": r"\1",
90
+ "purpose": "HTML Tag Content - Extracts content from H1 tags"
91
+ },
92
+
93
+ # Text Cleaning Patterns
94
+ {
95
+ "find": r"<[^<]+?>",
96
+ "replace": "",
97
+ "purpose": "Strip HTML Tags - Removes all HTML tags from text"
98
+ },
99
+ {
100
+ "find": r"\b(\w+)\s+\1\b",
101
+ "replace": r"\1",
102
+ "purpose": "Remove Duplicate Words - Removes consecutive duplicate words"
103
+ },
104
+ {
105
+ "find": r"^\s+|\s+$",
106
+ "replace": "",
107
+ "purpose": "Trim Whitespace - Removes leading and trailing whitespace"
108
+ },
109
+ {
110
+ "find": r"^\(?(\d{3})\)?[-.\s]?(\d{3})[-.\s]?(\d{4})$",
111
+ "replace": r"\1-\2-\3",
112
+ "purpose": "Normalize Phone Numbers - Converts to XXX-XXX-XXXX format"
113
+ },
114
+ {
115
+ "find": r"\b(\d{4}[- ]?){3}(\d{4})\b",
116
+ "replace": r"XXXX-XXXX-XXXX-\2",
117
+ "purpose": "Mask Sensitive Data - Masks credit card numbers, shows last 4 digits"
118
+ }
119
+ ]
120
+
121
+ def update_settings_pattern_library():
122
+ """
123
+ Updates the settings.json file with the complete pattern library
124
+ if it's empty or if the file is being created for the first time.
125
+ """
126
+ settings_file = "settings.json"
127
+
128
+ # Check if settings.json exists
129
+ if not os.path.exists(settings_file):
130
+ print("settings.json not found. Creating new file with pattern library.")
131
+ settings = {
132
+ "pattern_library": get_default_pattern_library()
133
+ }
134
+ else:
135
+ # Load existing settings
136
+ try:
137
+ with open(settings_file, 'r', encoding='utf-8') as f:
138
+ settings = json.load(f)
139
+ except (json.JSONDecodeError, FileNotFoundError) as e:
140
+ print(f"Error reading settings.json: {e}")
141
+ print("Creating new settings with pattern library.")
142
+ settings = {
143
+ "pattern_library": get_default_pattern_library()
144
+ }
145
+
146
+ # Check if pattern_library exists and if it's empty or has only basic patterns
147
+ if "pattern_library" not in settings:
148
+ print("No pattern_library found. Adding complete pattern library.")
149
+ settings["pattern_library"] = get_default_pattern_library()
150
+ else:
151
+ current_patterns = settings["pattern_library"]
152
+ # Check if it's empty or has only basic patterns (less than 10 patterns)
153
+ if len(current_patterns) < 10:
154
+ print(f"Pattern library has only {len(current_patterns)} patterns. Updating with complete library.")
155
+ settings["pattern_library"] = get_default_pattern_library()
156
+ else:
157
+ print(f"Pattern library already has {len(current_patterns)} patterns. No update needed.")
158
+ return
159
+
160
+ # Save updated settings
161
+ try:
162
+ with open(settings_file, 'w', encoding='utf-8') as f:
163
+ json.dump(settings, f, indent=4, ensure_ascii=False)
164
+ print(f"Successfully updated {settings_file} with {len(settings['pattern_library'])} regex patterns.")
165
+ except Exception as e:
166
+ print(f"Error writing to settings.json: {e}")
167
+
168
+ if __name__ == "__main__":
169
169
  update_settings_pattern_library()