pomera-ai-commander 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (192) hide show
  1. package/LICENSE +21 -0
  2. package/README.md +680 -0
  3. package/bin/pomera-ai-commander.js +62 -0
  4. package/core/__init__.py +66 -0
  5. package/core/__pycache__/__init__.cpython-313.pyc +0 -0
  6. package/core/__pycache__/app_context.cpython-313.pyc +0 -0
  7. package/core/__pycache__/async_text_processor.cpython-313.pyc +0 -0
  8. package/core/__pycache__/backup_manager.cpython-313.pyc +0 -0
  9. package/core/__pycache__/backup_recovery_manager.cpython-313.pyc +0 -0
  10. package/core/__pycache__/content_hash_cache.cpython-313.pyc +0 -0
  11. package/core/__pycache__/context_menu.cpython-313.pyc +0 -0
  12. package/core/__pycache__/data_validator.cpython-313.pyc +0 -0
  13. package/core/__pycache__/database_connection_manager.cpython-313.pyc +0 -0
  14. package/core/__pycache__/database_curl_settings_manager.cpython-313.pyc +0 -0
  15. package/core/__pycache__/database_promera_ai_settings_manager.cpython-313.pyc +0 -0
  16. package/core/__pycache__/database_schema.cpython-313.pyc +0 -0
  17. package/core/__pycache__/database_schema_manager.cpython-313.pyc +0 -0
  18. package/core/__pycache__/database_settings_manager.cpython-313.pyc +0 -0
  19. package/core/__pycache__/database_settings_manager_interface.cpython-313.pyc +0 -0
  20. package/core/__pycache__/dialog_manager.cpython-313.pyc +0 -0
  21. package/core/__pycache__/efficient_line_numbers.cpython-313.pyc +0 -0
  22. package/core/__pycache__/error_handler.cpython-313.pyc +0 -0
  23. package/core/__pycache__/error_service.cpython-313.pyc +0 -0
  24. package/core/__pycache__/event_consolidator.cpython-313.pyc +0 -0
  25. package/core/__pycache__/memory_efficient_text_widget.cpython-313.pyc +0 -0
  26. package/core/__pycache__/migration_manager.cpython-313.pyc +0 -0
  27. package/core/__pycache__/migration_test_suite.cpython-313.pyc +0 -0
  28. package/core/__pycache__/migration_validator.cpython-313.pyc +0 -0
  29. package/core/__pycache__/optimized_find_replace.cpython-313.pyc +0 -0
  30. package/core/__pycache__/optimized_pattern_engine.cpython-313.pyc +0 -0
  31. package/core/__pycache__/optimized_search_highlighter.cpython-313.pyc +0 -0
  32. package/core/__pycache__/performance_monitor.cpython-313.pyc +0 -0
  33. package/core/__pycache__/persistence_manager.cpython-313.pyc +0 -0
  34. package/core/__pycache__/progressive_stats_calculator.cpython-313.pyc +0 -0
  35. package/core/__pycache__/regex_pattern_cache.cpython-313.pyc +0 -0
  36. package/core/__pycache__/regex_pattern_library.cpython-313.pyc +0 -0
  37. package/core/__pycache__/search_operation_manager.cpython-313.pyc +0 -0
  38. package/core/__pycache__/settings_defaults_registry.cpython-313.pyc +0 -0
  39. package/core/__pycache__/settings_integrity_validator.cpython-313.pyc +0 -0
  40. package/core/__pycache__/settings_serializer.cpython-313.pyc +0 -0
  41. package/core/__pycache__/settings_validator.cpython-313.pyc +0 -0
  42. package/core/__pycache__/smart_stats_calculator.cpython-313.pyc +0 -0
  43. package/core/__pycache__/statistics_update_manager.cpython-313.pyc +0 -0
  44. package/core/__pycache__/stats_config_manager.cpython-313.pyc +0 -0
  45. package/core/__pycache__/streaming_text_handler.cpython-313.pyc +0 -0
  46. package/core/__pycache__/task_scheduler.cpython-313.pyc +0 -0
  47. package/core/__pycache__/visibility_monitor.cpython-313.pyc +0 -0
  48. package/core/__pycache__/widget_cache.cpython-313.pyc +0 -0
  49. package/core/app_context.py +482 -0
  50. package/core/async_text_processor.py +422 -0
  51. package/core/backup_manager.py +656 -0
  52. package/core/backup_recovery_manager.py +1034 -0
  53. package/core/content_hash_cache.py +509 -0
  54. package/core/context_menu.py +313 -0
  55. package/core/data_validator.py +1067 -0
  56. package/core/database_connection_manager.py +745 -0
  57. package/core/database_curl_settings_manager.py +609 -0
  58. package/core/database_promera_ai_settings_manager.py +447 -0
  59. package/core/database_schema.py +412 -0
  60. package/core/database_schema_manager.py +396 -0
  61. package/core/database_settings_manager.py +1508 -0
  62. package/core/database_settings_manager_interface.py +457 -0
  63. package/core/dialog_manager.py +735 -0
  64. package/core/efficient_line_numbers.py +511 -0
  65. package/core/error_handler.py +747 -0
  66. package/core/error_service.py +431 -0
  67. package/core/event_consolidator.py +512 -0
  68. package/core/mcp/__init__.py +43 -0
  69. package/core/mcp/__pycache__/__init__.cpython-313.pyc +0 -0
  70. package/core/mcp/__pycache__/protocol.cpython-313.pyc +0 -0
  71. package/core/mcp/__pycache__/schema.cpython-313.pyc +0 -0
  72. package/core/mcp/__pycache__/server_stdio.cpython-313.pyc +0 -0
  73. package/core/mcp/__pycache__/tool_registry.cpython-313.pyc +0 -0
  74. package/core/mcp/protocol.py +288 -0
  75. package/core/mcp/schema.py +251 -0
  76. package/core/mcp/server_stdio.py +299 -0
  77. package/core/mcp/tool_registry.py +2345 -0
  78. package/core/memory_efficient_text_widget.py +712 -0
  79. package/core/migration_manager.py +915 -0
  80. package/core/migration_test_suite.py +1086 -0
  81. package/core/migration_validator.py +1144 -0
  82. package/core/optimized_find_replace.py +715 -0
  83. package/core/optimized_pattern_engine.py +424 -0
  84. package/core/optimized_search_highlighter.py +553 -0
  85. package/core/performance_monitor.py +675 -0
  86. package/core/persistence_manager.py +713 -0
  87. package/core/progressive_stats_calculator.py +632 -0
  88. package/core/regex_pattern_cache.py +530 -0
  89. package/core/regex_pattern_library.py +351 -0
  90. package/core/search_operation_manager.py +435 -0
  91. package/core/settings_defaults_registry.py +1087 -0
  92. package/core/settings_integrity_validator.py +1112 -0
  93. package/core/settings_serializer.py +558 -0
  94. package/core/settings_validator.py +1824 -0
  95. package/core/smart_stats_calculator.py +710 -0
  96. package/core/statistics_update_manager.py +619 -0
  97. package/core/stats_config_manager.py +858 -0
  98. package/core/streaming_text_handler.py +723 -0
  99. package/core/task_scheduler.py +596 -0
  100. package/core/update_pattern_library.py +169 -0
  101. package/core/visibility_monitor.py +596 -0
  102. package/core/widget_cache.py +498 -0
  103. package/mcp.json +61 -0
  104. package/package.json +57 -0
  105. package/pomera.py +7483 -0
  106. package/pomera_mcp_server.py +144 -0
  107. package/tools/__init__.py +5 -0
  108. package/tools/__pycache__/__init__.cpython-313.pyc +0 -0
  109. package/tools/__pycache__/ai_tools.cpython-313.pyc +0 -0
  110. package/tools/__pycache__/ascii_art_generator.cpython-313.pyc +0 -0
  111. package/tools/__pycache__/base64_tools.cpython-313.pyc +0 -0
  112. package/tools/__pycache__/base_tool.cpython-313.pyc +0 -0
  113. package/tools/__pycache__/case_tool.cpython-313.pyc +0 -0
  114. package/tools/__pycache__/column_tools.cpython-313.pyc +0 -0
  115. package/tools/__pycache__/cron_tool.cpython-313.pyc +0 -0
  116. package/tools/__pycache__/curl_history.cpython-313.pyc +0 -0
  117. package/tools/__pycache__/curl_processor.cpython-313.pyc +0 -0
  118. package/tools/__pycache__/curl_settings.cpython-313.pyc +0 -0
  119. package/tools/__pycache__/curl_tool.cpython-313.pyc +0 -0
  120. package/tools/__pycache__/diff_viewer.cpython-313.pyc +0 -0
  121. package/tools/__pycache__/email_extraction_tool.cpython-313.pyc +0 -0
  122. package/tools/__pycache__/email_header_analyzer.cpython-313.pyc +0 -0
  123. package/tools/__pycache__/extraction_tools.cpython-313.pyc +0 -0
  124. package/tools/__pycache__/find_replace.cpython-313.pyc +0 -0
  125. package/tools/__pycache__/folder_file_reporter.cpython-313.pyc +0 -0
  126. package/tools/__pycache__/folder_file_reporter_adapter.cpython-313.pyc +0 -0
  127. package/tools/__pycache__/generator_tools.cpython-313.pyc +0 -0
  128. package/tools/__pycache__/hash_generator.cpython-313.pyc +0 -0
  129. package/tools/__pycache__/html_tool.cpython-313.pyc +0 -0
  130. package/tools/__pycache__/huggingface_helper.cpython-313.pyc +0 -0
  131. package/tools/__pycache__/jsonxml_tool.cpython-313.pyc +0 -0
  132. package/tools/__pycache__/line_tools.cpython-313.pyc +0 -0
  133. package/tools/__pycache__/list_comparator.cpython-313.pyc +0 -0
  134. package/tools/__pycache__/markdown_tools.cpython-313.pyc +0 -0
  135. package/tools/__pycache__/mcp_widget.cpython-313.pyc +0 -0
  136. package/tools/__pycache__/notes_widget.cpython-313.pyc +0 -0
  137. package/tools/__pycache__/number_base_converter.cpython-313.pyc +0 -0
  138. package/tools/__pycache__/regex_extractor.cpython-313.pyc +0 -0
  139. package/tools/__pycache__/slug_generator.cpython-313.pyc +0 -0
  140. package/tools/__pycache__/sorter_tools.cpython-313.pyc +0 -0
  141. package/tools/__pycache__/string_escape_tool.cpython-313.pyc +0 -0
  142. package/tools/__pycache__/text_statistics_tool.cpython-313.pyc +0 -0
  143. package/tools/__pycache__/text_wrapper.cpython-313.pyc +0 -0
  144. package/tools/__pycache__/timestamp_converter.cpython-313.pyc +0 -0
  145. package/tools/__pycache__/tool_loader.cpython-313.pyc +0 -0
  146. package/tools/__pycache__/translator_tools.cpython-313.pyc +0 -0
  147. package/tools/__pycache__/url_link_extractor.cpython-313.pyc +0 -0
  148. package/tools/__pycache__/url_parser.cpython-313.pyc +0 -0
  149. package/tools/__pycache__/whitespace_tools.cpython-313.pyc +0 -0
  150. package/tools/__pycache__/word_frequency_counter.cpython-313.pyc +0 -0
  151. package/tools/ai_tools.py +2892 -0
  152. package/tools/ascii_art_generator.py +353 -0
  153. package/tools/base64_tools.py +184 -0
  154. package/tools/base_tool.py +511 -0
  155. package/tools/case_tool.py +309 -0
  156. package/tools/column_tools.py +396 -0
  157. package/tools/cron_tool.py +885 -0
  158. package/tools/curl_history.py +601 -0
  159. package/tools/curl_processor.py +1208 -0
  160. package/tools/curl_settings.py +503 -0
  161. package/tools/curl_tool.py +5467 -0
  162. package/tools/diff_viewer.py +1072 -0
  163. package/tools/email_extraction_tool.py +249 -0
  164. package/tools/email_header_analyzer.py +426 -0
  165. package/tools/extraction_tools.py +250 -0
  166. package/tools/find_replace.py +1751 -0
  167. package/tools/folder_file_reporter.py +1463 -0
  168. package/tools/folder_file_reporter_adapter.py +480 -0
  169. package/tools/generator_tools.py +1217 -0
  170. package/tools/hash_generator.py +256 -0
  171. package/tools/html_tool.py +657 -0
  172. package/tools/huggingface_helper.py +449 -0
  173. package/tools/jsonxml_tool.py +730 -0
  174. package/tools/line_tools.py +419 -0
  175. package/tools/list_comparator.py +720 -0
  176. package/tools/markdown_tools.py +562 -0
  177. package/tools/mcp_widget.py +1417 -0
  178. package/tools/notes_widget.py +973 -0
  179. package/tools/number_base_converter.py +373 -0
  180. package/tools/regex_extractor.py +572 -0
  181. package/tools/slug_generator.py +311 -0
  182. package/tools/sorter_tools.py +459 -0
  183. package/tools/string_escape_tool.py +393 -0
  184. package/tools/text_statistics_tool.py +366 -0
  185. package/tools/text_wrapper.py +431 -0
  186. package/tools/timestamp_converter.py +422 -0
  187. package/tools/tool_loader.py +710 -0
  188. package/tools/translator_tools.py +523 -0
  189. package/tools/url_link_extractor.py +262 -0
  190. package/tools/url_parser.py +205 -0
  191. package/tools/whitespace_tools.py +356 -0
  192. package/tools/word_frequency_counter.py +147 -0
@@ -0,0 +1,424 @@
1
+ """
2
+ Optimized Pattern Engine for fast text analysis with minimal regex usage.
3
+ Provides specialized algorithms optimized for different text sizes with Unicode awareness.
4
+ """
5
+
6
+ import re
7
+ import unicodedata
8
+ from typing import Dict, Optional, Tuple, List
9
+ from dataclasses import dataclass
10
+ from functools import lru_cache
11
+
12
+
13
+ @dataclass
14
+ class TextStructure:
15
+ """Detailed text structure analysis."""
16
+ char_count: int = 0
17
+ word_count: int = 0
18
+ sentence_count: int = 0
19
+ line_count: int = 0
20
+ paragraph_count: int = 0
21
+ whitespace_count: int = 0
22
+ punctuation_count: int = 0
23
+
24
+ # Performance metadata
25
+ processing_method: str = "fast" # fast, standard, regex
26
+ processing_time_ms: float = 0.0
27
+
28
+
29
+ class OptimizedPatternEngine:
30
+ """
31
+ High-performance pattern engine that minimizes regex usage.
32
+ Uses string-based counting methods where possible and compiled regex patterns with caching.
33
+ """
34
+
35
+ # Sentence ending punctuation
36
+ SENTENCE_ENDINGS = frozenset('.!?')
37
+
38
+ # Common whitespace characters
39
+ WHITESPACE_CHARS = frozenset(' \t\n\r\f\v')
40
+
41
+ # Word boundary characters (optimized set)
42
+ WORD_BOUNDARIES = frozenset(' \t\n\r\f\v.,;:!?()[]{}"\'-—–')
43
+
44
+ def __init__(self):
45
+ """Initialize the optimized pattern engine with compiled regex patterns."""
46
+ # Compiled regex patterns (cached for performance)
47
+ self._word_pattern = re.compile(r'\b\w+\b', re.UNICODE)
48
+ self._sentence_pattern = re.compile(r'[.!?]+(?:\s|$)', re.UNICODE)
49
+ self._paragraph_pattern = re.compile(r'\n\s*\n', re.UNICODE)
50
+ self._whitespace_pattern = re.compile(r'\s+', re.UNICODE)
51
+
52
+ # Complex sentence pattern for edge cases
53
+ self._complex_sentence_pattern = re.compile(
54
+ r'(?<=[.!?])\s+(?=[A-Z])|(?<=[.!?])$',
55
+ re.UNICODE
56
+ )
57
+
58
+ # Pattern cache for dynamic patterns
59
+ self._pattern_cache: Dict[str, re.Pattern] = {}
60
+ self._cache_max_size = 50
61
+
62
+ def count_words_fast(self, text: str) -> int:
63
+ """
64
+ Fast word counting using string-based methods.
65
+ Optimized for performance with minimal regex usage.
66
+
67
+ Args:
68
+ text: Text to analyze
69
+
70
+ Returns:
71
+ Word count
72
+ """
73
+ if not text:
74
+ return 0
75
+
76
+ # For small text, use simple split (fastest)
77
+ if len(text) < 500:
78
+ return self._count_words_simple(text)
79
+
80
+ # For all other sizes, use regex (fastest overall)
81
+ return len(self._word_pattern.findall(text))
82
+
83
+ def _count_words_simple(self, text: str) -> int:
84
+ """Simple word counting for small text."""
85
+ # Split on whitespace and filter empty strings
86
+ return len([word for word in text.split() if word])
87
+
88
+ def _count_words_optimized(self, text: str) -> int:
89
+ """
90
+ Optimized word counting using character scanning.
91
+ Handles Unicode word boundaries correctly.
92
+ """
93
+ word_count = 0
94
+ in_word = False
95
+
96
+ for char in text:
97
+ if char in self.WORD_BOUNDARIES or char.isspace():
98
+ if in_word:
99
+ word_count += 1
100
+ in_word = False
101
+ elif char.isalnum() or char == '_':
102
+ in_word = True
103
+
104
+ # Count last word if text ends with a word character
105
+ if in_word:
106
+ word_count += 1
107
+
108
+ return word_count
109
+
110
+ def count_sentences_fast(self, text: str) -> int:
111
+ """
112
+ Fast sentence counting with minimal regex usage.
113
+
114
+ Args:
115
+ text: Text to analyze
116
+
117
+ Returns:
118
+ Sentence count
119
+ """
120
+ if not text:
121
+ return 0
122
+
123
+ # Use compiled regex for all sizes (fastest and most accurate)
124
+ return len(self._sentence_pattern.findall(text))
125
+
126
+ def _count_sentences_simple(self, text: str) -> int:
127
+ """
128
+ Simple sentence counting using character scanning.
129
+ Looks for sentence-ending punctuation followed by whitespace or end of text.
130
+ """
131
+ sentence_count = 0
132
+ text_len = len(text)
133
+ i = 0
134
+
135
+ while i < text_len:
136
+ char = text[i]
137
+
138
+ # Check for sentence ending punctuation
139
+ if char in self.SENTENCE_ENDINGS:
140
+ # Look ahead to confirm it's a sentence boundary
141
+ # (followed by whitespace, uppercase, or end of text)
142
+ if i + 1 >= text_len:
143
+ sentence_count += 1
144
+ break
145
+
146
+ next_char = text[i + 1]
147
+ if next_char.isspace():
148
+ sentence_count += 1
149
+ # Skip consecutive punctuation
150
+ while i + 1 < text_len and text[i + 1] in self.SENTENCE_ENDINGS:
151
+ i += 1
152
+
153
+ i += 1
154
+
155
+ return max(sentence_count, 1 if text.strip() else 0)
156
+
157
+ def count_lines_fast(self, text: str) -> int:
158
+ """
159
+ Fast line counting using string method.
160
+
161
+ Args:
162
+ text: Text to analyze
163
+
164
+ Returns:
165
+ Line count
166
+ """
167
+ if not text:
168
+ return 0
169
+
170
+ # Simple and fast: count newlines and add 1
171
+ line_count = text.count('\n') + 1
172
+
173
+ # Adjust if text ends with newline
174
+ if text.endswith('\n'):
175
+ line_count -= 1
176
+
177
+ return max(line_count, 1 if text.strip() else 0)
178
+
179
+ def count_paragraphs_fast(self, text: str) -> int:
180
+ """
181
+ Fast paragraph counting.
182
+
183
+ Args:
184
+ text: Text to analyze
185
+
186
+ Returns:
187
+ Paragraph count
188
+ """
189
+ if not text.strip():
190
+ return 0
191
+
192
+ # For small text, use simple method
193
+ if len(text) < 5000:
194
+ return self._count_paragraphs_simple(text)
195
+
196
+ # For larger text, use regex
197
+ paragraphs = self._paragraph_pattern.split(text)
198
+ return len([p for p in paragraphs if p.strip()])
199
+
200
+ def _count_paragraphs_simple(self, text: str) -> int:
201
+ """Simple paragraph counting by looking for blank lines."""
202
+ lines = text.split('\n')
203
+ paragraph_count = 0
204
+ in_paragraph = False
205
+
206
+ for line in lines:
207
+ if line.strip():
208
+ if not in_paragraph:
209
+ paragraph_count += 1
210
+ in_paragraph = True
211
+ else:
212
+ in_paragraph = False
213
+
214
+ return paragraph_count
215
+
216
+ def count_characters_unicode_aware(self, text: str) -> Tuple[int, int]:
217
+ """
218
+ Count characters with Unicode awareness.
219
+
220
+ Args:
221
+ text: Text to analyze
222
+
223
+ Returns:
224
+ Tuple of (character_count, byte_count)
225
+ """
226
+ if not text:
227
+ return (0, 0)
228
+
229
+ # Character count (Unicode code points)
230
+ char_count = len(text)
231
+
232
+ # Byte count (UTF-8 encoding)
233
+ byte_count = len(text.encode('utf-8'))
234
+
235
+ return (char_count, byte_count)
236
+
237
+ def analyze_text_structure(self, text: str) -> TextStructure:
238
+ """
239
+ Comprehensive text structure analysis using optimized methods.
240
+
241
+ Args:
242
+ text: Text to analyze
243
+
244
+ Returns:
245
+ TextStructure with detailed analysis
246
+ """
247
+ import time
248
+ start_time = time.time()
249
+
250
+ structure = TextStructure()
251
+
252
+ if not text:
253
+ return structure
254
+
255
+ # Determine processing method based on text size
256
+ text_size = len(text)
257
+ if text_size < 1000:
258
+ structure.processing_method = "fast"
259
+ elif text_size < 50000:
260
+ structure.processing_method = "standard"
261
+ else:
262
+ structure.processing_method = "regex"
263
+
264
+ # Character counts
265
+ char_count, byte_count = self.count_characters_unicode_aware(text)
266
+ structure.char_count = byte_count # Use byte count for consistency
267
+
268
+ # Line count (always fast)
269
+ structure.line_count = self.count_lines_fast(text)
270
+
271
+ # Word count (optimized based on size)
272
+ structure.word_count = self.count_words_fast(text)
273
+
274
+ # Sentence count (optimized based on size)
275
+ structure.sentence_count = self.count_sentences_fast(text)
276
+
277
+ # Paragraph count (optimized based on size)
278
+ structure.paragraph_count = self.count_paragraphs_fast(text)
279
+
280
+ # Whitespace count (fast string method)
281
+ structure.whitespace_count = sum(1 for c in text if c.isspace())
282
+
283
+ # Punctuation count (fast character check)
284
+ structure.punctuation_count = sum(
285
+ 1 for c in text
286
+ if unicodedata.category(c).startswith('P')
287
+ )
288
+
289
+ # Record processing time
290
+ structure.processing_time_ms = (time.time() - start_time) * 1000
291
+
292
+ return structure
293
+
294
+ @lru_cache(maxsize=100)
295
+ def get_compiled_pattern(self, pattern: str, flags: int = 0) -> re.Pattern:
296
+ """
297
+ Get a compiled regex pattern with caching.
298
+
299
+ Args:
300
+ pattern: Regex pattern string
301
+ flags: Regex flags
302
+
303
+ Returns:
304
+ Compiled regex pattern
305
+ """
306
+ cache_key = f"{pattern}_{flags}"
307
+
308
+ if cache_key not in self._pattern_cache:
309
+ # Compile and cache the pattern
310
+ compiled = re.compile(pattern, flags)
311
+
312
+ # Manage cache size
313
+ if len(self._pattern_cache) >= self._cache_max_size:
314
+ # Remove oldest entry (simple FIFO)
315
+ oldest_key = next(iter(self._pattern_cache))
316
+ del self._pattern_cache[oldest_key]
317
+
318
+ self._pattern_cache[cache_key] = compiled
319
+
320
+ return self._pattern_cache[cache_key]
321
+
322
+ def find_all_optimized(self, pattern: str, text: str, flags: int = 0) -> List[str]:
323
+ """
324
+ Find all matches using cached compiled pattern.
325
+
326
+ Args:
327
+ pattern: Regex pattern
328
+ text: Text to search
329
+ flags: Regex flags
330
+
331
+ Returns:
332
+ List of matches
333
+ """
334
+ compiled_pattern = self.get_compiled_pattern(pattern, flags)
335
+ return compiled_pattern.findall(text)
336
+
337
+ def count_pattern_optimized(self, pattern: str, text: str, flags: int = 0) -> int:
338
+ """
339
+ Count pattern matches using cached compiled pattern.
340
+
341
+ Args:
342
+ pattern: Regex pattern
343
+ text: Text to search
344
+ flags: Regex flags
345
+
346
+ Returns:
347
+ Match count
348
+ """
349
+ matches = self.find_all_optimized(pattern, text, flags)
350
+ return len(matches)
351
+
352
+ def is_unicode_text(self, text: str) -> bool:
353
+ """
354
+ Check if text contains non-ASCII Unicode characters.
355
+
356
+ Args:
357
+ text: Text to check
358
+
359
+ Returns:
360
+ True if text contains Unicode characters beyond ASCII
361
+ """
362
+ if not text:
363
+ return False
364
+
365
+ # Fast check: if all characters are ASCII, no Unicode
366
+ try:
367
+ text.encode('ascii')
368
+ return False
369
+ except UnicodeEncodeError:
370
+ return True
371
+
372
+ def normalize_unicode(self, text: str, form: str = 'NFC') -> str:
373
+ """
374
+ Normalize Unicode text for consistent processing.
375
+
376
+ Args:
377
+ text: Text to normalize
378
+ form: Normalization form (NFC, NFD, NFKC, NFKD)
379
+
380
+ Returns:
381
+ Normalized text
382
+ """
383
+ return unicodedata.normalize(form, text)
384
+
385
+ def clear_pattern_cache(self):
386
+ """Clear the pattern cache."""
387
+ self._pattern_cache.clear()
388
+ # Clear LRU cache
389
+ self.get_compiled_pattern.cache_clear()
390
+
391
+ def get_cache_info(self) -> Dict[str, int]:
392
+ """
393
+ Get pattern cache information.
394
+
395
+ Returns:
396
+ Dictionary with cache statistics
397
+ """
398
+ lru_info = self.get_compiled_pattern.cache_info()
399
+
400
+ return {
401
+ 'pattern_cache_size': len(self._pattern_cache),
402
+ 'pattern_cache_max_size': self._cache_max_size,
403
+ 'lru_cache_hits': lru_info.hits,
404
+ 'lru_cache_misses': lru_info.misses,
405
+ 'lru_cache_size': lru_info.currsize,
406
+ 'lru_cache_max_size': lru_info.maxsize
407
+ }
408
+
409
+
410
+ # Global instance
411
+ _global_pattern_engine: Optional[OptimizedPatternEngine] = None
412
+
413
+
414
+ def get_pattern_engine() -> OptimizedPatternEngine:
415
+ """
416
+ Get the global optimized pattern engine instance.
417
+
418
+ Returns:
419
+ Global OptimizedPatternEngine instance
420
+ """
421
+ global _global_pattern_engine
422
+ if _global_pattern_engine is None:
423
+ _global_pattern_engine = OptimizedPatternEngine()
424
+ return _global_pattern_engine