pomera-ai-commander 0.1.0 → 1.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (191) hide show
  1. package/LICENSE +21 -21
  2. package/README.md +105 -680
  3. package/bin/pomera-ai-commander.js +62 -62
  4. package/core/__init__.py +65 -65
  5. package/core/app_context.py +482 -482
  6. package/core/async_text_processor.py +421 -421
  7. package/core/backup_manager.py +655 -655
  8. package/core/backup_recovery_manager.py +1033 -1033
  9. package/core/content_hash_cache.py +508 -508
  10. package/core/context_menu.py +313 -313
  11. package/core/data_validator.py +1066 -1066
  12. package/core/database_connection_manager.py +744 -744
  13. package/core/database_curl_settings_manager.py +608 -608
  14. package/core/database_promera_ai_settings_manager.py +446 -446
  15. package/core/database_schema.py +411 -411
  16. package/core/database_schema_manager.py +395 -395
  17. package/core/database_settings_manager.py +1507 -1507
  18. package/core/database_settings_manager_interface.py +456 -456
  19. package/core/dialog_manager.py +734 -734
  20. package/core/efficient_line_numbers.py +510 -510
  21. package/core/error_handler.py +746 -746
  22. package/core/error_service.py +431 -431
  23. package/core/event_consolidator.py +511 -511
  24. package/core/mcp/__init__.py +43 -43
  25. package/core/mcp/protocol.py +288 -288
  26. package/core/mcp/schema.py +251 -251
  27. package/core/mcp/server_stdio.py +299 -299
  28. package/core/mcp/tool_registry.py +2372 -2345
  29. package/core/memory_efficient_text_widget.py +711 -711
  30. package/core/migration_manager.py +914 -914
  31. package/core/migration_test_suite.py +1085 -1085
  32. package/core/migration_validator.py +1143 -1143
  33. package/core/optimized_find_replace.py +714 -714
  34. package/core/optimized_pattern_engine.py +424 -424
  35. package/core/optimized_search_highlighter.py +552 -552
  36. package/core/performance_monitor.py +674 -674
  37. package/core/persistence_manager.py +712 -712
  38. package/core/progressive_stats_calculator.py +632 -632
  39. package/core/regex_pattern_cache.py +529 -529
  40. package/core/regex_pattern_library.py +350 -350
  41. package/core/search_operation_manager.py +434 -434
  42. package/core/settings_defaults_registry.py +1087 -1087
  43. package/core/settings_integrity_validator.py +1111 -1111
  44. package/core/settings_serializer.py +557 -557
  45. package/core/settings_validator.py +1823 -1823
  46. package/core/smart_stats_calculator.py +709 -709
  47. package/core/statistics_update_manager.py +619 -619
  48. package/core/stats_config_manager.py +858 -858
  49. package/core/streaming_text_handler.py +723 -723
  50. package/core/task_scheduler.py +596 -596
  51. package/core/update_pattern_library.py +168 -168
  52. package/core/visibility_monitor.py +596 -596
  53. package/core/widget_cache.py +498 -498
  54. package/mcp.json +51 -61
  55. package/package.json +61 -57
  56. package/pomera.py +7482 -7482
  57. package/pomera_mcp_server.py +183 -144
  58. package/requirements.txt +32 -0
  59. package/tools/__init__.py +4 -4
  60. package/tools/ai_tools.py +2891 -2891
  61. package/tools/ascii_art_generator.py +352 -352
  62. package/tools/base64_tools.py +183 -183
  63. package/tools/base_tool.py +511 -511
  64. package/tools/case_tool.py +308 -308
  65. package/tools/column_tools.py +395 -395
  66. package/tools/cron_tool.py +884 -884
  67. package/tools/curl_history.py +600 -600
  68. package/tools/curl_processor.py +1207 -1207
  69. package/tools/curl_settings.py +502 -502
  70. package/tools/curl_tool.py +5467 -5467
  71. package/tools/diff_viewer.py +1071 -1071
  72. package/tools/email_extraction_tool.py +248 -248
  73. package/tools/email_header_analyzer.py +425 -425
  74. package/tools/extraction_tools.py +250 -250
  75. package/tools/find_replace.py +1750 -1750
  76. package/tools/folder_file_reporter.py +1463 -1463
  77. package/tools/folder_file_reporter_adapter.py +480 -480
  78. package/tools/generator_tools.py +1216 -1216
  79. package/tools/hash_generator.py +255 -255
  80. package/tools/html_tool.py +656 -656
  81. package/tools/jsonxml_tool.py +729 -729
  82. package/tools/line_tools.py +419 -419
  83. package/tools/markdown_tools.py +561 -561
  84. package/tools/mcp_widget.py +1417 -1417
  85. package/tools/notes_widget.py +973 -973
  86. package/tools/number_base_converter.py +372 -372
  87. package/tools/regex_extractor.py +571 -571
  88. package/tools/slug_generator.py +310 -310
  89. package/tools/sorter_tools.py +458 -458
  90. package/tools/string_escape_tool.py +392 -392
  91. package/tools/text_statistics_tool.py +365 -365
  92. package/tools/text_wrapper.py +430 -430
  93. package/tools/timestamp_converter.py +421 -421
  94. package/tools/tool_loader.py +710 -710
  95. package/tools/translator_tools.py +522 -522
  96. package/tools/url_link_extractor.py +261 -261
  97. package/tools/url_parser.py +204 -204
  98. package/tools/whitespace_tools.py +355 -355
  99. package/tools/word_frequency_counter.py +146 -146
  100. package/core/__pycache__/__init__.cpython-313.pyc +0 -0
  101. package/core/__pycache__/app_context.cpython-313.pyc +0 -0
  102. package/core/__pycache__/async_text_processor.cpython-313.pyc +0 -0
  103. package/core/__pycache__/backup_manager.cpython-313.pyc +0 -0
  104. package/core/__pycache__/backup_recovery_manager.cpython-313.pyc +0 -0
  105. package/core/__pycache__/content_hash_cache.cpython-313.pyc +0 -0
  106. package/core/__pycache__/context_menu.cpython-313.pyc +0 -0
  107. package/core/__pycache__/data_validator.cpython-313.pyc +0 -0
  108. package/core/__pycache__/database_connection_manager.cpython-313.pyc +0 -0
  109. package/core/__pycache__/database_curl_settings_manager.cpython-313.pyc +0 -0
  110. package/core/__pycache__/database_promera_ai_settings_manager.cpython-313.pyc +0 -0
  111. package/core/__pycache__/database_schema.cpython-313.pyc +0 -0
  112. package/core/__pycache__/database_schema_manager.cpython-313.pyc +0 -0
  113. package/core/__pycache__/database_settings_manager.cpython-313.pyc +0 -0
  114. package/core/__pycache__/database_settings_manager_interface.cpython-313.pyc +0 -0
  115. package/core/__pycache__/dialog_manager.cpython-313.pyc +0 -0
  116. package/core/__pycache__/efficient_line_numbers.cpython-313.pyc +0 -0
  117. package/core/__pycache__/error_handler.cpython-313.pyc +0 -0
  118. package/core/__pycache__/error_service.cpython-313.pyc +0 -0
  119. package/core/__pycache__/event_consolidator.cpython-313.pyc +0 -0
  120. package/core/__pycache__/memory_efficient_text_widget.cpython-313.pyc +0 -0
  121. package/core/__pycache__/migration_manager.cpython-313.pyc +0 -0
  122. package/core/__pycache__/migration_test_suite.cpython-313.pyc +0 -0
  123. package/core/__pycache__/migration_validator.cpython-313.pyc +0 -0
  124. package/core/__pycache__/optimized_find_replace.cpython-313.pyc +0 -0
  125. package/core/__pycache__/optimized_pattern_engine.cpython-313.pyc +0 -0
  126. package/core/__pycache__/optimized_search_highlighter.cpython-313.pyc +0 -0
  127. package/core/__pycache__/performance_monitor.cpython-313.pyc +0 -0
  128. package/core/__pycache__/persistence_manager.cpython-313.pyc +0 -0
  129. package/core/__pycache__/progressive_stats_calculator.cpython-313.pyc +0 -0
  130. package/core/__pycache__/regex_pattern_cache.cpython-313.pyc +0 -0
  131. package/core/__pycache__/regex_pattern_library.cpython-313.pyc +0 -0
  132. package/core/__pycache__/search_operation_manager.cpython-313.pyc +0 -0
  133. package/core/__pycache__/settings_defaults_registry.cpython-313.pyc +0 -0
  134. package/core/__pycache__/settings_integrity_validator.cpython-313.pyc +0 -0
  135. package/core/__pycache__/settings_serializer.cpython-313.pyc +0 -0
  136. package/core/__pycache__/settings_validator.cpython-313.pyc +0 -0
  137. package/core/__pycache__/smart_stats_calculator.cpython-313.pyc +0 -0
  138. package/core/__pycache__/statistics_update_manager.cpython-313.pyc +0 -0
  139. package/core/__pycache__/stats_config_manager.cpython-313.pyc +0 -0
  140. package/core/__pycache__/streaming_text_handler.cpython-313.pyc +0 -0
  141. package/core/__pycache__/task_scheduler.cpython-313.pyc +0 -0
  142. package/core/__pycache__/visibility_monitor.cpython-313.pyc +0 -0
  143. package/core/__pycache__/widget_cache.cpython-313.pyc +0 -0
  144. package/core/mcp/__pycache__/__init__.cpython-313.pyc +0 -0
  145. package/core/mcp/__pycache__/protocol.cpython-313.pyc +0 -0
  146. package/core/mcp/__pycache__/schema.cpython-313.pyc +0 -0
  147. package/core/mcp/__pycache__/server_stdio.cpython-313.pyc +0 -0
  148. package/core/mcp/__pycache__/tool_registry.cpython-313.pyc +0 -0
  149. package/tools/__pycache__/__init__.cpython-313.pyc +0 -0
  150. package/tools/__pycache__/ai_tools.cpython-313.pyc +0 -0
  151. package/tools/__pycache__/ascii_art_generator.cpython-313.pyc +0 -0
  152. package/tools/__pycache__/base64_tools.cpython-313.pyc +0 -0
  153. package/tools/__pycache__/base_tool.cpython-313.pyc +0 -0
  154. package/tools/__pycache__/case_tool.cpython-313.pyc +0 -0
  155. package/tools/__pycache__/column_tools.cpython-313.pyc +0 -0
  156. package/tools/__pycache__/cron_tool.cpython-313.pyc +0 -0
  157. package/tools/__pycache__/curl_history.cpython-313.pyc +0 -0
  158. package/tools/__pycache__/curl_processor.cpython-313.pyc +0 -0
  159. package/tools/__pycache__/curl_settings.cpython-313.pyc +0 -0
  160. package/tools/__pycache__/curl_tool.cpython-313.pyc +0 -0
  161. package/tools/__pycache__/diff_viewer.cpython-313.pyc +0 -0
  162. package/tools/__pycache__/email_extraction_tool.cpython-313.pyc +0 -0
  163. package/tools/__pycache__/email_header_analyzer.cpython-313.pyc +0 -0
  164. package/tools/__pycache__/extraction_tools.cpython-313.pyc +0 -0
  165. package/tools/__pycache__/find_replace.cpython-313.pyc +0 -0
  166. package/tools/__pycache__/folder_file_reporter.cpython-313.pyc +0 -0
  167. package/tools/__pycache__/folder_file_reporter_adapter.cpython-313.pyc +0 -0
  168. package/tools/__pycache__/generator_tools.cpython-313.pyc +0 -0
  169. package/tools/__pycache__/hash_generator.cpython-313.pyc +0 -0
  170. package/tools/__pycache__/html_tool.cpython-313.pyc +0 -0
  171. package/tools/__pycache__/huggingface_helper.cpython-313.pyc +0 -0
  172. package/tools/__pycache__/jsonxml_tool.cpython-313.pyc +0 -0
  173. package/tools/__pycache__/line_tools.cpython-313.pyc +0 -0
  174. package/tools/__pycache__/list_comparator.cpython-313.pyc +0 -0
  175. package/tools/__pycache__/markdown_tools.cpython-313.pyc +0 -0
  176. package/tools/__pycache__/mcp_widget.cpython-313.pyc +0 -0
  177. package/tools/__pycache__/notes_widget.cpython-313.pyc +0 -0
  178. package/tools/__pycache__/number_base_converter.cpython-313.pyc +0 -0
  179. package/tools/__pycache__/regex_extractor.cpython-313.pyc +0 -0
  180. package/tools/__pycache__/slug_generator.cpython-313.pyc +0 -0
  181. package/tools/__pycache__/sorter_tools.cpython-313.pyc +0 -0
  182. package/tools/__pycache__/string_escape_tool.cpython-313.pyc +0 -0
  183. package/tools/__pycache__/text_statistics_tool.cpython-313.pyc +0 -0
  184. package/tools/__pycache__/text_wrapper.cpython-313.pyc +0 -0
  185. package/tools/__pycache__/timestamp_converter.cpython-313.pyc +0 -0
  186. package/tools/__pycache__/tool_loader.cpython-313.pyc +0 -0
  187. package/tools/__pycache__/translator_tools.cpython-313.pyc +0 -0
  188. package/tools/__pycache__/url_link_extractor.cpython-313.pyc +0 -0
  189. package/tools/__pycache__/url_parser.cpython-313.pyc +0 -0
  190. package/tools/__pycache__/whitespace_tools.cpython-313.pyc +0 -0
  191. package/tools/__pycache__/word_frequency_counter.cpython-313.pyc +0 -0
@@ -1,710 +1,710 @@
1
- """
2
- Smart statistics calculator with intelligent caching for Promera AI Commander.
3
- Provides efficient text statistics calculation with incremental updates and caching.
4
- """
5
-
6
- import re
7
- import time
8
- import hashlib
9
- import threading
10
- import sys
11
- from typing import Dict, List, Optional, Any, Tuple
12
- from dataclasses import dataclass, field
13
- from collections import defaultdict
14
- import weakref
15
-
16
- # Import optimized pattern engine
17
- from core.optimized_pattern_engine import get_pattern_engine, OptimizedPatternEngine
18
-
19
- @dataclass
20
- class ChangeInfo:
21
- """Information about text changes for incremental updates."""
22
- start_pos: int
23
- end_pos: int
24
- inserted_text: str
25
- deleted_length: int
26
- change_type: str # insert, delete, replace
27
- timestamp: float = field(default_factory=time.time)
28
-
29
- def is_minor_change(self) -> bool:
30
- """Check if this is a minor change suitable for incremental update."""
31
- # Minor changes: single character edits, small insertions/deletions
32
- if self.change_type == "insert":
33
- return len(self.inserted_text) <= 10
34
- elif self.change_type == "delete":
35
- return self.deleted_length <= 10
36
- elif self.change_type == "replace":
37
- return len(self.inserted_text) <= 10 and self.deleted_length <= 10
38
- return False
39
-
40
- def affects_statistics(self) -> bool:
41
- """Check if this change affects statistics significantly."""
42
- # Changes that affect word/sentence boundaries
43
- if self.change_type == "insert":
44
- return any(c in self.inserted_text for c in [' ', '\n', '.', '!', '?'])
45
- return True
46
-
47
- @dataclass
48
- class TextStats:
49
- """Comprehensive text statistics."""
50
- char_count: int = 0
51
- word_count: int = 0
52
- sentence_count: int = 0
53
- line_count: int = 0
54
- paragraph_count: int = 0
55
- token_count: int = 0
56
-
57
- # Advanced statistics
58
- unique_words: int = 0
59
- average_word_length: float = 0.0
60
- average_sentence_length: float = 0.0
61
- reading_time_minutes: float = 0.0
62
-
63
- # Metadata
64
- content_hash: str = ""
65
- calculation_time_ms: float = 0.0
66
- timestamp: float = field(default_factory=time.time)
67
- cache_hit: bool = False
68
- incremental_update: bool = False
69
- processing_method: str = "full" # full, incremental, cached
70
- memory_usage_bytes: int = 0
71
-
72
- def is_stale(self, max_age_seconds: int = 300) -> bool:
73
- """Check if statistics are stale (older than max_age_seconds)."""
74
- return (time.time() - self.timestamp) > max_age_seconds
75
-
76
- def to_status_string(self) -> str:
77
- """Convert to status bar string format."""
78
- formatted_bytes = self._format_bytes(self.char_count)
79
- return f"Bytes: {formatted_bytes} | Word: {self.word_count} | Sentence: {self.sentence_count} | Line: {self.line_count} | Tokens: {self.token_count}"
80
-
81
- def _format_bytes(self, byte_count):
82
- """Format byte count with K/M suffixes for readability."""
83
- if byte_count >= 1000000:
84
- value = byte_count / 1000000
85
- formatted = f"{value:.1f}M"
86
- elif byte_count >= 1000:
87
- value = byte_count / 1000
88
- # Check if rounding to 1 decimal would make it >= 1000K
89
- if round(value, 1) >= 1000:
90
- formatted = f"{value / 1000:.1f}M"
91
- else:
92
- formatted = f"{value:.1f}K"
93
- else:
94
- return str(byte_count)
95
-
96
- # Remove trailing zeros and decimal point if not needed
97
- return formatted.rstrip('0').rstrip('.')
98
-
99
- def to_detailed_dict(self) -> Dict[str, Any]:
100
- """Convert to detailed dictionary for analysis."""
101
- return {
102
- 'basic': {
103
- 'characters': self.char_count,
104
- 'words': self.word_count,
105
- 'sentences': self.sentence_count,
106
- 'lines': self.line_count,
107
- 'paragraphs': self.paragraph_count,
108
- 'tokens': self.token_count
109
- },
110
- 'advanced': {
111
- 'unique_words': self.unique_words,
112
- 'average_word_length': self.average_word_length,
113
- 'average_sentence_length': self.average_sentence_length,
114
- 'reading_time_minutes': self.reading_time_minutes
115
- },
116
- 'metadata': {
117
- 'content_hash': self.content_hash,
118
- 'calculation_time_ms': self.calculation_time_ms,
119
- 'timestamp': self.timestamp
120
- }
121
- }
122
-
123
- @dataclass
124
- class CacheEntry:
125
- """Cache entry with metadata for intelligent cache management."""
126
- stats: TextStats
127
- access_count: int = 0
128
- last_access: float = field(default_factory=time.time)
129
- size_estimate: int = 0
130
- widget_id: Optional[str] = None
131
-
132
- @property
133
- def age_seconds(self) -> float:
134
- """Age of the cache entry in seconds."""
135
- return time.time() - self.last_access
136
-
137
- @property
138
- def access_frequency(self) -> float:
139
- """Access frequency (accesses per hour)."""
140
- age_hours = max(self.age_seconds / 3600, 0.01) # Avoid division by zero
141
- return self.access_count / age_hours
142
-
143
- @property
144
- def memory_usage(self) -> int:
145
- """Estimate memory usage of this cache entry."""
146
- # Rough estimate: stats object + metadata
147
- return self.size_estimate + sys.getsizeof(self.stats) + 200
148
-
149
- class SmartStatsCalculator:
150
- """
151
- Intelligent text statistics calculator with caching and incremental updates.
152
- """
153
-
154
- # Memory limits
155
- MAX_CACHE_MEMORY_BYTES = 50 * 1024 * 1024 # 50MB
156
- CLEANUP_THRESHOLD_BYTES = 45 * 1024 * 1024 # Start cleanup at 45MB
157
-
158
- def __init__(self, cache_size_limit: int = 1000, enable_advanced_stats: bool = True):
159
- self.cache_size_limit = cache_size_limit
160
- self.enable_advanced_stats = enable_advanced_stats
161
-
162
- # Cache storage
163
- self.stats_cache: Dict[str, CacheEntry] = {}
164
- self.cache_lock = threading.RLock()
165
-
166
- # Memory tracking
167
- self.current_memory_usage = 0
168
- self.last_cleanup_time = time.time()
169
- self.cleanup_interval = 300 # 5 minutes
170
-
171
- # Widget tracking for tool switching cleanup
172
- self.widget_cache_map: Dict[str, List[str]] = defaultdict(list)
173
-
174
- # Use optimized pattern engine for fast text analysis
175
- self.pattern_engine = get_pattern_engine()
176
-
177
- # Fallback regex patterns (for advanced features only)
178
- self.word_pattern = re.compile(r'\b\w+\b')
179
- self.sentence_pattern = re.compile(r'[.!?]+')
180
- self.paragraph_pattern = re.compile(r'\n\s*\n')
181
-
182
- # Word frequency cache for advanced stats
183
- self.word_frequency_cache: Dict[str, Dict[str, int]] = {}
184
-
185
- # Periodic cleanup thread
186
- self._start_periodic_cleanup()
187
-
188
- def calculate_stats(self, text: str, widget_id: Optional[str] = None) -> TextStats:
189
- """
190
- Calculate text statistics with intelligent caching.
191
-
192
- Args:
193
- text: Text content to analyze
194
- widget_id: Optional widget identifier for cache optimization
195
-
196
- Returns:
197
- TextStats object with comprehensive statistics
198
- """
199
- if not text.strip():
200
- return TextStats()
201
-
202
- # Generate content hash for caching
203
- content_hash = self._generate_content_hash(text)
204
-
205
- # Check cache first
206
- with self.cache_lock:
207
- if content_hash in self.stats_cache:
208
- entry = self.stats_cache[content_hash]
209
- entry.access_count += 1
210
- entry.last_access = time.time()
211
-
212
- # Update stats metadata
213
- stats = entry.stats
214
- stats.cache_hit = True
215
- stats.processing_method = "cached"
216
-
217
- return stats
218
-
219
- # Cache miss - calculate stats
220
- start_time = time.time()
221
- stats = self._calculate_stats_impl(text, content_hash)
222
- calculation_time = (time.time() - start_time) * 1000
223
-
224
- stats.calculation_time_ms = calculation_time
225
- stats.cache_hit = False
226
- stats.processing_method = "full"
227
-
228
- # Cache the result
229
- self._cache_stats(content_hash, stats, len(text), widget_id)
230
-
231
- # Check if periodic cleanup is needed
232
- self._check_periodic_cleanup()
233
-
234
- return stats
235
-
236
- def calculate_stats_incremental(self,
237
- text: str,
238
- previous_stats: Optional[TextStats] = None,
239
- change_info: Optional[ChangeInfo] = None,
240
- widget_id: Optional[str] = None) -> TextStats:
241
- """
242
- Calculate statistics incrementally when possible.
243
-
244
- Args:
245
- text: Current text content
246
- previous_stats: Previous statistics for comparison
247
- change_info: Information about what changed (position, length, etc.)
248
- widget_id: Optional widget identifier
249
-
250
- Returns:
251
- Updated TextStats object
252
- """
253
- # If no change info or previous stats, do full calculation
254
- if not change_info or not previous_stats:
255
- return self.calculate_stats(text, widget_id)
256
-
257
- # Check if incremental update is beneficial
258
- if not change_info.is_minor_change() or not change_info.affects_statistics():
259
- # For non-minor changes or changes that don't affect stats, do full calculation
260
- return self.calculate_stats(text, widget_id)
261
-
262
- # Attempt incremental update for minor changes
263
- start_time = time.time()
264
-
265
- try:
266
- stats = self._calculate_incremental_stats(text, previous_stats, change_info)
267
- calculation_time = (time.time() - start_time) * 1000
268
-
269
- stats.calculation_time_ms = calculation_time
270
- stats.incremental_update = True
271
- stats.processing_method = "incremental"
272
- stats.cache_hit = False
273
-
274
- # Cache the result
275
- content_hash = self._generate_content_hash(text)
276
- stats.content_hash = content_hash
277
- self._cache_stats(content_hash, stats, len(text), widget_id)
278
-
279
- return stats
280
-
281
- except Exception:
282
- # Fall back to full calculation on error
283
- return self.calculate_stats(text, widget_id)
284
-
285
- def _calculate_incremental_stats(self, text: str, previous_stats: TextStats, change_info: ChangeInfo) -> TextStats:
286
- """
287
- Calculate statistics incrementally based on change information.
288
-
289
- Args:
290
- text: Current text content
291
- previous_stats: Previous statistics
292
- change_info: Information about the change
293
-
294
- Returns:
295
- Updated TextStats object
296
- """
297
- stats = TextStats()
298
-
299
- # Start with previous stats
300
- stats.char_count = previous_stats.char_count
301
- stats.word_count = previous_stats.word_count
302
- stats.sentence_count = previous_stats.sentence_count
303
- stats.line_count = previous_stats.line_count
304
- stats.paragraph_count = previous_stats.paragraph_count
305
-
306
- # Adjust based on change type
307
- if change_info.change_type == "insert":
308
- # Update character count
309
- stats.char_count += len(change_info.inserted_text)
310
-
311
- # Update line count
312
- new_lines = change_info.inserted_text.count('\n')
313
- stats.line_count += new_lines
314
-
315
- # Update word count (approximate)
316
- new_words = len(change_info.inserted_text.split())
317
- stats.word_count += new_words
318
-
319
- # Update sentence count (approximate)
320
- new_sentences = sum(change_info.inserted_text.count(c) for c in ['.', '!', '?'])
321
- stats.sentence_count += new_sentences
322
-
323
- elif change_info.change_type == "delete":
324
- # For deletions, we need to recalculate (can't reliably decrement)
325
- # Fall back to full calculation
326
- return self._calculate_stats_impl(text, "")
327
-
328
- # Recalculate token count
329
- stats.token_count = max(1, round(stats.char_count / 4))
330
-
331
- # Advanced stats require full recalculation
332
- if self.enable_advanced_stats:
333
- # For incremental updates, skip advanced stats or recalculate
334
- pass
335
-
336
- return stats
337
-
338
- def _calculate_stats_impl(self, text: str, content_hash: str) -> TextStats:
339
- """Internal implementation of statistics calculation using optimized pattern engine."""
340
- stats = TextStats(content_hash=content_hash)
341
-
342
- # Use optimized pattern engine for fast counting
343
- text_structure = self.pattern_engine.analyze_text_structure(text)
344
-
345
- # Basic statistics from optimized engine
346
- stats.char_count = text_structure.char_count
347
- stats.line_count = text_structure.line_count
348
- stats.word_count = text_structure.word_count
349
- stats.sentence_count = text_structure.sentence_count
350
- stats.paragraph_count = text_structure.paragraph_count
351
-
352
- # Token count (rough estimate: 1 token ≈ 4 characters)
353
- stats.token_count = max(1, round(len(text) / 4))
354
-
355
- # Advanced statistics (if enabled)
356
- if self.enable_advanced_stats and stats.word_count > 0:
357
- # Use regex for word extraction (needed for advanced stats)
358
- words = self.word_pattern.findall(text)
359
-
360
- if words:
361
- stats.unique_words = len(set(word.lower() for word in words))
362
- stats.average_word_length = sum(len(word) for word in words) / len(words)
363
-
364
- if stats.sentence_count > 0:
365
- stats.average_sentence_length = stats.word_count / stats.sentence_count
366
-
367
- # Reading time estimate (average 200 words per minute)
368
- stats.reading_time_minutes = stats.word_count / 200.0
369
-
370
- return stats
371
-
372
- def _generate_content_hash(self, text: str) -> str:
373
- """Generate a hash for content caching."""
374
- # Use a combination of length and content hash for efficiency
375
- content_sample = text[:100] + text[-100:] if len(text) > 200 else text
376
- hash_input = f"{len(text)}_{content_sample}"
377
- return hashlib.md5(hash_input.encode('utf-8')).hexdigest()[:16]
378
-
379
- def _cache_stats(self, content_hash: str, stats: TextStats, content_size: int, widget_id: Optional[str] = None):
380
- """Cache statistics with intelligent cache management."""
381
- with self.cache_lock:
382
- # Create cache entry
383
- entry = CacheEntry(
384
- stats=stats,
385
- access_count=1,
386
- size_estimate=content_size,
387
- widget_id=widget_id
388
- )
389
-
390
- # Track widget association
391
- if widget_id:
392
- self.widget_cache_map[widget_id].append(content_hash)
393
-
394
- # Check memory usage before adding
395
- entry_memory = entry.memory_usage
396
- if self.current_memory_usage + entry_memory > self.MAX_CACHE_MEMORY_BYTES:
397
- self._evict_by_memory_pressure()
398
-
399
- # Check if cache is full by count
400
- if len(self.stats_cache) >= self.cache_size_limit:
401
- self._evict_cache_entries()
402
-
403
- # Store in cache
404
- self.stats_cache[content_hash] = entry
405
- self.current_memory_usage += entry_memory
406
-
407
- # Update stats memory usage
408
- stats.memory_usage_bytes = entry_memory
409
-
410
- def _evict_cache_entries(self):
411
- """Evict cache entries using intelligent LRU + frequency algorithm."""
412
- if not self.stats_cache:
413
- return
414
-
415
- # Calculate eviction scores (lower score = more likely to evict)
416
- entries_with_scores = []
417
- current_time = time.time()
418
-
419
- for hash_key, entry in self.stats_cache.items():
420
- # Score based on recency, frequency, and size
421
- recency_score = 1.0 / max(entry.age_seconds, 1.0)
422
- frequency_score = entry.access_frequency
423
- size_penalty = entry.size_estimate / 10000.0 # Penalize large entries
424
-
425
- score = (recency_score * 0.4 + frequency_score * 0.5) - (size_penalty * 0.1)
426
- entries_with_scores.append((score, hash_key))
427
-
428
- # Sort by score (lowest first) and evict bottom 25%
429
- entries_with_scores.sort()
430
- evict_count = max(1, len(entries_with_scores) // 4)
431
-
432
- for _, hash_key in entries_with_scores[:evict_count]:
433
- entry = self.stats_cache.pop(hash_key, None)
434
- if entry:
435
- self.current_memory_usage -= entry.memory_usage
436
- # Remove from widget map
437
- if entry.widget_id and entry.widget_id in self.widget_cache_map:
438
- try:
439
- self.widget_cache_map[entry.widget_id].remove(hash_key)
440
- except ValueError:
441
- pass
442
-
443
- def _evict_by_memory_pressure(self):
444
- """Evict cache entries when memory usage exceeds limits."""
445
- if not self.stats_cache:
446
- return
447
-
448
- # Calculate how much memory we need to free
449
- target_memory = self.CLEANUP_THRESHOLD_BYTES
450
- memory_to_free = self.current_memory_usage - target_memory
451
-
452
- if memory_to_free <= 0:
453
- return
454
-
455
- # Sort entries by eviction priority (oldest, least accessed, largest)
456
- entries_with_priority = []
457
-
458
- for hash_key, entry in self.stats_cache.items():
459
- # Priority score (higher = more likely to evict)
460
- age_score = entry.age_seconds
461
- access_score = 1.0 / max(entry.access_count, 1)
462
- size_score = entry.memory_usage / 1024.0 # KB
463
-
464
- priority = (age_score * 0.4) + (access_score * 0.3) + (size_score * 0.3)
465
- entries_with_priority.append((priority, hash_key, entry.memory_usage))
466
-
467
- # Sort by priority (highest first) and evict until we free enough memory
468
- entries_with_priority.sort(reverse=True)
469
-
470
- freed_memory = 0
471
- for priority, hash_key, entry_memory in entries_with_priority:
472
- if freed_memory >= memory_to_free:
473
- break
474
-
475
- entry = self.stats_cache.pop(hash_key, None)
476
- if entry:
477
- self.current_memory_usage -= entry.memory_usage
478
- freed_memory += entry.memory_usage
479
-
480
- # Remove from widget map
481
- if entry.widget_id and entry.widget_id in self.widget_cache_map:
482
- try:
483
- self.widget_cache_map[entry.widget_id].remove(hash_key)
484
- except ValueError:
485
- pass
486
-
487
- def get_cached_stats(self, content_hash: str) -> Optional[TextStats]:
488
- """Get cached statistics by content hash."""
489
- with self.cache_lock:
490
- if content_hash in self.stats_cache:
491
- entry = self.stats_cache[content_hash]
492
- entry.access_count += 1
493
- entry.last_access = time.time()
494
- return entry.stats
495
- return None
496
-
497
- def clear_cache(self):
498
- """Clear all cached statistics."""
499
- with self.cache_lock:
500
- self.stats_cache.clear()
501
- self.current_memory_usage = 0
502
- self.widget_cache_map.clear()
503
-
504
- def clear_widget_cache(self, widget_id: str):
505
- """Clear cache entries associated with a specific widget (for tool switching)."""
506
- with self.cache_lock:
507
- if widget_id not in self.widget_cache_map:
508
- return
509
-
510
- # Get all cache keys for this widget
511
- cache_keys = self.widget_cache_map[widget_id].copy()
512
-
513
- # Remove each entry
514
- for cache_key in cache_keys:
515
- entry = self.stats_cache.pop(cache_key, None)
516
- if entry:
517
- self.current_memory_usage -= entry.memory_usage
518
-
519
- # Clear widget mapping
520
- self.widget_cache_map.pop(widget_id, None)
521
-
522
- def _check_periodic_cleanup(self):
523
- """Check if periodic cleanup is needed and perform it."""
524
- current_time = time.time()
525
-
526
- # Check if cleanup interval has passed
527
- if current_time - self.last_cleanup_time < self.cleanup_interval:
528
- return
529
-
530
- # Perform cleanup
531
- self._perform_periodic_cleanup()
532
- self.last_cleanup_time = current_time
533
-
534
- def _perform_periodic_cleanup(self):
535
- """Perform periodic memory cleanup."""
536
- with self.cache_lock:
537
- # Remove stale entries (older than 10 minutes)
538
- stale_keys = []
539
-
540
- for hash_key, entry in self.stats_cache.items():
541
- if entry.age_seconds > 600: # 10 minutes
542
- stale_keys.append(hash_key)
543
-
544
- # Remove stale entries
545
- for hash_key in stale_keys:
546
- entry = self.stats_cache.pop(hash_key, None)
547
- if entry:
548
- self.current_memory_usage -= entry.memory_usage
549
-
550
- # Remove from widget map
551
- if entry.widget_id and entry.widget_id in self.widget_cache_map:
552
- try:
553
- self.widget_cache_map[entry.widget_id].remove(hash_key)
554
- except ValueError:
555
- pass
556
-
557
- # Check memory usage and evict if needed
558
- if self.current_memory_usage > self.CLEANUP_THRESHOLD_BYTES:
559
- self._evict_by_memory_pressure()
560
-
561
- def _start_periodic_cleanup(self):
562
- """Start background thread for periodic cleanup."""
563
- def cleanup_worker():
564
- while True:
565
- time.sleep(self.cleanup_interval)
566
- try:
567
- self._perform_periodic_cleanup()
568
- except Exception:
569
- pass # Silently handle errors in background thread
570
-
571
- # Start daemon thread
572
- cleanup_thread = threading.Thread(target=cleanup_worker, daemon=True)
573
- cleanup_thread.start()
574
-
575
- def get_cache_stats(self) -> Dict[str, Any]:
576
- """Get cache statistics."""
577
- with self.cache_lock:
578
- cache_size = len(self.stats_cache)
579
- memory_usage_mb = self.current_memory_usage / (1024 * 1024)
580
- memory_limit_mb = self.MAX_CACHE_MEMORY_BYTES / (1024 * 1024)
581
-
582
- # Calculate hit rate if we have access data
583
- total_accesses = sum(entry.access_count for entry in self.stats_cache.values())
584
-
585
- return {
586
- 'cache_size': cache_size,
587
- 'cache_size_limit': self.cache_size_limit,
588
- 'memory_usage_mb': round(memory_usage_mb, 2),
589
- 'memory_limit_mb': round(memory_limit_mb, 2),
590
- 'memory_usage_percent': round((self.current_memory_usage / self.MAX_CACHE_MEMORY_BYTES) * 100, 2),
591
- 'total_accesses': total_accesses,
592
- 'widget_count': len(self.widget_cache_map)
593
- }
594
-
595
- def optimize_cache_size(self, target_cache_size: int = 1000):
596
- """Optimize cache size based on usage patterns."""
597
- stats = self.get_cache_stats()
598
-
599
- if stats['cache_size'] < target_cache_size and self.cache_size_limit < 2000:
600
- # Increase cache size if current size is below target
601
- self.cache_size_limit = min(2000, int(self.cache_size_limit * 1.2))
602
- elif stats['cache_size'] > target_cache_size and self.cache_size_limit > 100:
603
- # Decrease cache size if current size is above target
604
- self.cache_size_limit = max(100, int(self.cache_size_limit * 0.9))
605
-
606
- def precompute_stats(self, texts: List[str], widget_ids: Optional[List[str]] = None):
607
- """Precompute statistics for a list of texts (background processing)."""
608
- for i, text in enumerate(texts):
609
- widget_id = widget_ids[i] if widget_ids and i < len(widget_ids) else None
610
- self.calculate_stats(text, widget_id)
611
-
612
- def get_word_frequency(self, text: str, top_n: int = 10) -> List[Tuple[str, int]]:
613
- """Get word frequency analysis with caching."""
614
- content_hash = self._generate_content_hash(text)
615
-
616
- if content_hash in self.word_frequency_cache:
617
- word_freq = self.word_frequency_cache[content_hash]
618
- else:
619
- words = self.word_pattern.findall(text.lower())
620
- word_freq = defaultdict(int)
621
- for word in words:
622
- word_freq[word] += 1
623
-
624
- # Cache the result (limit cache size)
625
- if len(self.word_frequency_cache) >= 100:
626
- # Remove oldest entry
627
- oldest_key = next(iter(self.word_frequency_cache))
628
- self.word_frequency_cache.pop(oldest_key)
629
-
630
- self.word_frequency_cache[content_hash] = dict(word_freq)
631
-
632
- # Return top N words
633
- return sorted(word_freq.items(), key=lambda x: x[1], reverse=True)[:top_n]
634
-
635
- class CachedStatsManager:
636
- """
637
- Manager for multiple SmartStatsCalculator instances with global optimization.
638
- """
639
-
640
- def __init__(self):
641
- self.calculators: Dict[str, SmartStatsCalculator] = {}
642
- self.active_tool: Optional[str] = None
643
-
644
- def get_calculator(self, calculator_id: str, **kwargs) -> SmartStatsCalculator:
645
- """Get or create a stats calculator."""
646
- if calculator_id not in self.calculators:
647
- self.calculators[calculator_id] = SmartStatsCalculator(**kwargs)
648
- return self.calculators[calculator_id]
649
-
650
- def on_tool_switch(self, old_tool: Optional[str], new_tool: str):
651
- """Handle tool switching - cleanup caches for old tool."""
652
- if old_tool and old_tool in self.calculators:
653
- # Clear cache for widgets associated with old tool
654
- calc = self.calculators[old_tool]
655
- # Clear all widget caches for this tool
656
- widget_ids = list(calc.widget_cache_map.keys())
657
- for widget_id in widget_ids:
658
- calc.clear_widget_cache(widget_id)
659
-
660
- self.active_tool = new_tool
661
-
662
- def get_global_stats(self) -> Dict[str, Any]:
663
- """Get aggregated statistics across all calculators."""
664
- total_cache_size = 0
665
- total_memory_mb = 0.0
666
-
667
- for calc in self.calculators.values():
668
- stats = calc.get_cache_stats()
669
- total_cache_size += stats['cache_size']
670
- total_memory_mb += stats['memory_usage_mb']
671
-
672
- return {
673
- 'calculators_count': len(self.calculators),
674
- 'total_cache_size': total_cache_size,
675
- 'total_memory_mb': round(total_memory_mb, 2),
676
- 'active_tool': self.active_tool
677
- }
678
-
679
- def optimize_all_caches(self):
680
- """Optimize all calculator caches."""
681
- for calc in self.calculators.values():
682
- calc.optimize_cache_size()
683
-
684
- def clear_all_caches(self):
685
- """Clear all caches."""
686
- for calc in self.calculators.values():
687
- calc.clear_cache()
688
-
689
- def perform_global_cleanup(self):
690
- """Perform cleanup across all calculators."""
691
- for calc in self.calculators.values():
692
- calc._perform_periodic_cleanup()
693
-
694
- # Global instances
695
- _global_stats_calculator = None
696
- _global_stats_manager = None
697
-
698
- def get_smart_stats_calculator() -> SmartStatsCalculator:
699
- """Get the global smart stats calculator instance."""
700
- global _global_stats_calculator
701
- if _global_stats_calculator is None:
702
- _global_stats_calculator = SmartStatsCalculator()
703
- return _global_stats_calculator
704
-
705
- def get_stats_manager() -> CachedStatsManager:
706
- """Get the global stats manager instance."""
707
- global _global_stats_manager
708
- if _global_stats_manager is None:
709
- _global_stats_manager = CachedStatsManager()
1
+ """
2
+ Smart statistics calculator with intelligent caching for Promera AI Commander.
3
+ Provides efficient text statistics calculation with incremental updates and caching.
4
+ """
5
+
6
+ import re
7
+ import time
8
+ import hashlib
9
+ import threading
10
+ import sys
11
+ from typing import Dict, List, Optional, Any, Tuple
12
+ from dataclasses import dataclass, field
13
+ from collections import defaultdict
14
+ import weakref
15
+
16
+ # Import optimized pattern engine
17
+ from core.optimized_pattern_engine import get_pattern_engine, OptimizedPatternEngine
18
+
19
+ @dataclass
20
+ class ChangeInfo:
21
+ """Information about text changes for incremental updates."""
22
+ start_pos: int
23
+ end_pos: int
24
+ inserted_text: str
25
+ deleted_length: int
26
+ change_type: str # insert, delete, replace
27
+ timestamp: float = field(default_factory=time.time)
28
+
29
+ def is_minor_change(self) -> bool:
30
+ """Check if this is a minor change suitable for incremental update."""
31
+ # Minor changes: single character edits, small insertions/deletions
32
+ if self.change_type == "insert":
33
+ return len(self.inserted_text) <= 10
34
+ elif self.change_type == "delete":
35
+ return self.deleted_length <= 10
36
+ elif self.change_type == "replace":
37
+ return len(self.inserted_text) <= 10 and self.deleted_length <= 10
38
+ return False
39
+
40
+ def affects_statistics(self) -> bool:
41
+ """Check if this change affects statistics significantly."""
42
+ # Changes that affect word/sentence boundaries
43
+ if self.change_type == "insert":
44
+ return any(c in self.inserted_text for c in [' ', '\n', '.', '!', '?'])
45
+ return True
46
+
47
+ @dataclass
48
+ class TextStats:
49
+ """Comprehensive text statistics."""
50
+ char_count: int = 0
51
+ word_count: int = 0
52
+ sentence_count: int = 0
53
+ line_count: int = 0
54
+ paragraph_count: int = 0
55
+ token_count: int = 0
56
+
57
+ # Advanced statistics
58
+ unique_words: int = 0
59
+ average_word_length: float = 0.0
60
+ average_sentence_length: float = 0.0
61
+ reading_time_minutes: float = 0.0
62
+
63
+ # Metadata
64
+ content_hash: str = ""
65
+ calculation_time_ms: float = 0.0
66
+ timestamp: float = field(default_factory=time.time)
67
+ cache_hit: bool = False
68
+ incremental_update: bool = False
69
+ processing_method: str = "full" # full, incremental, cached
70
+ memory_usage_bytes: int = 0
71
+
72
+ def is_stale(self, max_age_seconds: int = 300) -> bool:
73
+ """Check if statistics are stale (older than max_age_seconds)."""
74
+ return (time.time() - self.timestamp) > max_age_seconds
75
+
76
+ def to_status_string(self) -> str:
77
+ """Convert to status bar string format."""
78
+ formatted_bytes = self._format_bytes(self.char_count)
79
+ return f"Bytes: {formatted_bytes} | Word: {self.word_count} | Sentence: {self.sentence_count} | Line: {self.line_count} | Tokens: {self.token_count}"
80
+
81
+ def _format_bytes(self, byte_count):
82
+ """Format byte count with K/M suffixes for readability."""
83
+ if byte_count >= 1000000:
84
+ value = byte_count / 1000000
85
+ formatted = f"{value:.1f}M"
86
+ elif byte_count >= 1000:
87
+ value = byte_count / 1000
88
+ # Check if rounding to 1 decimal would make it >= 1000K
89
+ if round(value, 1) >= 1000:
90
+ formatted = f"{value / 1000:.1f}M"
91
+ else:
92
+ formatted = f"{value:.1f}K"
93
+ else:
94
+ return str(byte_count)
95
+
96
+ # Remove trailing zeros and decimal point if not needed
97
+ return formatted.rstrip('0').rstrip('.')
98
+
99
+ def to_detailed_dict(self) -> Dict[str, Any]:
100
+ """Convert to detailed dictionary for analysis."""
101
+ return {
102
+ 'basic': {
103
+ 'characters': self.char_count,
104
+ 'words': self.word_count,
105
+ 'sentences': self.sentence_count,
106
+ 'lines': self.line_count,
107
+ 'paragraphs': self.paragraph_count,
108
+ 'tokens': self.token_count
109
+ },
110
+ 'advanced': {
111
+ 'unique_words': self.unique_words,
112
+ 'average_word_length': self.average_word_length,
113
+ 'average_sentence_length': self.average_sentence_length,
114
+ 'reading_time_minutes': self.reading_time_minutes
115
+ },
116
+ 'metadata': {
117
+ 'content_hash': self.content_hash,
118
+ 'calculation_time_ms': self.calculation_time_ms,
119
+ 'timestamp': self.timestamp
120
+ }
121
+ }
122
+
123
+ @dataclass
124
+ class CacheEntry:
125
+ """Cache entry with metadata for intelligent cache management."""
126
+ stats: TextStats
127
+ access_count: int = 0
128
+ last_access: float = field(default_factory=time.time)
129
+ size_estimate: int = 0
130
+ widget_id: Optional[str] = None
131
+
132
+ @property
133
+ def age_seconds(self) -> float:
134
+ """Age of the cache entry in seconds."""
135
+ return time.time() - self.last_access
136
+
137
+ @property
138
+ def access_frequency(self) -> float:
139
+ """Access frequency (accesses per hour)."""
140
+ age_hours = max(self.age_seconds / 3600, 0.01) # Avoid division by zero
141
+ return self.access_count / age_hours
142
+
143
+ @property
144
+ def memory_usage(self) -> int:
145
+ """Estimate memory usage of this cache entry."""
146
+ # Rough estimate: stats object + metadata
147
+ return self.size_estimate + sys.getsizeof(self.stats) + 200
148
+
149
+ class SmartStatsCalculator:
150
+ """
151
+ Intelligent text statistics calculator with caching and incremental updates.
152
+ """
153
+
154
+ # Memory limits
155
+ MAX_CACHE_MEMORY_BYTES = 50 * 1024 * 1024 # 50MB
156
+ CLEANUP_THRESHOLD_BYTES = 45 * 1024 * 1024 # Start cleanup at 45MB
157
+
158
+ def __init__(self, cache_size_limit: int = 1000, enable_advanced_stats: bool = True):
159
+ self.cache_size_limit = cache_size_limit
160
+ self.enable_advanced_stats = enable_advanced_stats
161
+
162
+ # Cache storage
163
+ self.stats_cache: Dict[str, CacheEntry] = {}
164
+ self.cache_lock = threading.RLock()
165
+
166
+ # Memory tracking
167
+ self.current_memory_usage = 0
168
+ self.last_cleanup_time = time.time()
169
+ self.cleanup_interval = 300 # 5 minutes
170
+
171
+ # Widget tracking for tool switching cleanup
172
+ self.widget_cache_map: Dict[str, List[str]] = defaultdict(list)
173
+
174
+ # Use optimized pattern engine for fast text analysis
175
+ self.pattern_engine = get_pattern_engine()
176
+
177
+ # Fallback regex patterns (for advanced features only)
178
+ self.word_pattern = re.compile(r'\b\w+\b')
179
+ self.sentence_pattern = re.compile(r'[.!?]+')
180
+ self.paragraph_pattern = re.compile(r'\n\s*\n')
181
+
182
+ # Word frequency cache for advanced stats
183
+ self.word_frequency_cache: Dict[str, Dict[str, int]] = {}
184
+
185
+ # Periodic cleanup thread
186
+ self._start_periodic_cleanup()
187
+
188
+ def calculate_stats(self, text: str, widget_id: Optional[str] = None) -> TextStats:
189
+ """
190
+ Calculate text statistics with intelligent caching.
191
+
192
+ Args:
193
+ text: Text content to analyze
194
+ widget_id: Optional widget identifier for cache optimization
195
+
196
+ Returns:
197
+ TextStats object with comprehensive statistics
198
+ """
199
+ if not text.strip():
200
+ return TextStats()
201
+
202
+ # Generate content hash for caching
203
+ content_hash = self._generate_content_hash(text)
204
+
205
+ # Check cache first
206
+ with self.cache_lock:
207
+ if content_hash in self.stats_cache:
208
+ entry = self.stats_cache[content_hash]
209
+ entry.access_count += 1
210
+ entry.last_access = time.time()
211
+
212
+ # Update stats metadata
213
+ stats = entry.stats
214
+ stats.cache_hit = True
215
+ stats.processing_method = "cached"
216
+
217
+ return stats
218
+
219
+ # Cache miss - calculate stats
220
+ start_time = time.time()
221
+ stats = self._calculate_stats_impl(text, content_hash)
222
+ calculation_time = (time.time() - start_time) * 1000
223
+
224
+ stats.calculation_time_ms = calculation_time
225
+ stats.cache_hit = False
226
+ stats.processing_method = "full"
227
+
228
+ # Cache the result
229
+ self._cache_stats(content_hash, stats, len(text), widget_id)
230
+
231
+ # Check if periodic cleanup is needed
232
+ self._check_periodic_cleanup()
233
+
234
+ return stats
235
+
236
+ def calculate_stats_incremental(self,
237
+ text: str,
238
+ previous_stats: Optional[TextStats] = None,
239
+ change_info: Optional[ChangeInfo] = None,
240
+ widget_id: Optional[str] = None) -> TextStats:
241
+ """
242
+ Calculate statistics incrementally when possible.
243
+
244
+ Args:
245
+ text: Current text content
246
+ previous_stats: Previous statistics for comparison
247
+ change_info: Information about what changed (position, length, etc.)
248
+ widget_id: Optional widget identifier
249
+
250
+ Returns:
251
+ Updated TextStats object
252
+ """
253
+ # If no change info or previous stats, do full calculation
254
+ if not change_info or not previous_stats:
255
+ return self.calculate_stats(text, widget_id)
256
+
257
+ # Check if incremental update is beneficial
258
+ if not change_info.is_minor_change() or not change_info.affects_statistics():
259
+ # For non-minor changes or changes that don't affect stats, do full calculation
260
+ return self.calculate_stats(text, widget_id)
261
+
262
+ # Attempt incremental update for minor changes
263
+ start_time = time.time()
264
+
265
+ try:
266
+ stats = self._calculate_incremental_stats(text, previous_stats, change_info)
267
+ calculation_time = (time.time() - start_time) * 1000
268
+
269
+ stats.calculation_time_ms = calculation_time
270
+ stats.incremental_update = True
271
+ stats.processing_method = "incremental"
272
+ stats.cache_hit = False
273
+
274
+ # Cache the result
275
+ content_hash = self._generate_content_hash(text)
276
+ stats.content_hash = content_hash
277
+ self._cache_stats(content_hash, stats, len(text), widget_id)
278
+
279
+ return stats
280
+
281
+ except Exception:
282
+ # Fall back to full calculation on error
283
+ return self.calculate_stats(text, widget_id)
284
+
285
+ def _calculate_incremental_stats(self, text: str, previous_stats: TextStats, change_info: ChangeInfo) -> TextStats:
286
+ """
287
+ Calculate statistics incrementally based on change information.
288
+
289
+ Args:
290
+ text: Current text content
291
+ previous_stats: Previous statistics
292
+ change_info: Information about the change
293
+
294
+ Returns:
295
+ Updated TextStats object
296
+ """
297
+ stats = TextStats()
298
+
299
+ # Start with previous stats
300
+ stats.char_count = previous_stats.char_count
301
+ stats.word_count = previous_stats.word_count
302
+ stats.sentence_count = previous_stats.sentence_count
303
+ stats.line_count = previous_stats.line_count
304
+ stats.paragraph_count = previous_stats.paragraph_count
305
+
306
+ # Adjust based on change type
307
+ if change_info.change_type == "insert":
308
+ # Update character count
309
+ stats.char_count += len(change_info.inserted_text)
310
+
311
+ # Update line count
312
+ new_lines = change_info.inserted_text.count('\n')
313
+ stats.line_count += new_lines
314
+
315
+ # Update word count (approximate)
316
+ new_words = len(change_info.inserted_text.split())
317
+ stats.word_count += new_words
318
+
319
+ # Update sentence count (approximate)
320
+ new_sentences = sum(change_info.inserted_text.count(c) for c in ['.', '!', '?'])
321
+ stats.sentence_count += new_sentences
322
+
323
+ elif change_info.change_type == "delete":
324
+ # For deletions, we need to recalculate (can't reliably decrement)
325
+ # Fall back to full calculation
326
+ return self._calculate_stats_impl(text, "")
327
+
328
+ # Recalculate token count
329
+ stats.token_count = max(1, round(stats.char_count / 4))
330
+
331
+ # Advanced stats require full recalculation
332
+ if self.enable_advanced_stats:
333
+ # For incremental updates, skip advanced stats or recalculate
334
+ pass
335
+
336
+ return stats
337
+
338
+ def _calculate_stats_impl(self, text: str, content_hash: str) -> TextStats:
339
+ """Internal implementation of statistics calculation using optimized pattern engine."""
340
+ stats = TextStats(content_hash=content_hash)
341
+
342
+ # Use optimized pattern engine for fast counting
343
+ text_structure = self.pattern_engine.analyze_text_structure(text)
344
+
345
+ # Basic statistics from optimized engine
346
+ stats.char_count = text_structure.char_count
347
+ stats.line_count = text_structure.line_count
348
+ stats.word_count = text_structure.word_count
349
+ stats.sentence_count = text_structure.sentence_count
350
+ stats.paragraph_count = text_structure.paragraph_count
351
+
352
+ # Token count (rough estimate: 1 token ≈ 4 characters)
353
+ stats.token_count = max(1, round(len(text) / 4))
354
+
355
+ # Advanced statistics (if enabled)
356
+ if self.enable_advanced_stats and stats.word_count > 0:
357
+ # Use regex for word extraction (needed for advanced stats)
358
+ words = self.word_pattern.findall(text)
359
+
360
+ if words:
361
+ stats.unique_words = len(set(word.lower() for word in words))
362
+ stats.average_word_length = sum(len(word) for word in words) / len(words)
363
+
364
+ if stats.sentence_count > 0:
365
+ stats.average_sentence_length = stats.word_count / stats.sentence_count
366
+
367
+ # Reading time estimate (average 200 words per minute)
368
+ stats.reading_time_minutes = stats.word_count / 200.0
369
+
370
+ return stats
371
+
372
+ def _generate_content_hash(self, text: str) -> str:
373
+ """Generate a hash for content caching."""
374
+ # Use a combination of length and content hash for efficiency
375
+ content_sample = text[:100] + text[-100:] if len(text) > 200 else text
376
+ hash_input = f"{len(text)}_{content_sample}"
377
+ return hashlib.md5(hash_input.encode('utf-8')).hexdigest()[:16]
378
+
379
+ def _cache_stats(self, content_hash: str, stats: TextStats, content_size: int, widget_id: Optional[str] = None):
380
+ """Cache statistics with intelligent cache management."""
381
+ with self.cache_lock:
382
+ # Create cache entry
383
+ entry = CacheEntry(
384
+ stats=stats,
385
+ access_count=1,
386
+ size_estimate=content_size,
387
+ widget_id=widget_id
388
+ )
389
+
390
+ # Track widget association
391
+ if widget_id:
392
+ self.widget_cache_map[widget_id].append(content_hash)
393
+
394
+ # Check memory usage before adding
395
+ entry_memory = entry.memory_usage
396
+ if self.current_memory_usage + entry_memory > self.MAX_CACHE_MEMORY_BYTES:
397
+ self._evict_by_memory_pressure()
398
+
399
+ # Check if cache is full by count
400
+ if len(self.stats_cache) >= self.cache_size_limit:
401
+ self._evict_cache_entries()
402
+
403
+ # Store in cache
404
+ self.stats_cache[content_hash] = entry
405
+ self.current_memory_usage += entry_memory
406
+
407
+ # Update stats memory usage
408
+ stats.memory_usage_bytes = entry_memory
409
+
410
+ def _evict_cache_entries(self):
411
+ """Evict cache entries using intelligent LRU + frequency algorithm."""
412
+ if not self.stats_cache:
413
+ return
414
+
415
+ # Calculate eviction scores (lower score = more likely to evict)
416
+ entries_with_scores = []
417
+ current_time = time.time()
418
+
419
+ for hash_key, entry in self.stats_cache.items():
420
+ # Score based on recency, frequency, and size
421
+ recency_score = 1.0 / max(entry.age_seconds, 1.0)
422
+ frequency_score = entry.access_frequency
423
+ size_penalty = entry.size_estimate / 10000.0 # Penalize large entries
424
+
425
+ score = (recency_score * 0.4 + frequency_score * 0.5) - (size_penalty * 0.1)
426
+ entries_with_scores.append((score, hash_key))
427
+
428
+ # Sort by score (lowest first) and evict bottom 25%
429
+ entries_with_scores.sort()
430
+ evict_count = max(1, len(entries_with_scores) // 4)
431
+
432
+ for _, hash_key in entries_with_scores[:evict_count]:
433
+ entry = self.stats_cache.pop(hash_key, None)
434
+ if entry:
435
+ self.current_memory_usage -= entry.memory_usage
436
+ # Remove from widget map
437
+ if entry.widget_id and entry.widget_id in self.widget_cache_map:
438
+ try:
439
+ self.widget_cache_map[entry.widget_id].remove(hash_key)
440
+ except ValueError:
441
+ pass
442
+
443
+ def _evict_by_memory_pressure(self):
444
+ """Evict cache entries when memory usage exceeds limits."""
445
+ if not self.stats_cache:
446
+ return
447
+
448
+ # Calculate how much memory we need to free
449
+ target_memory = self.CLEANUP_THRESHOLD_BYTES
450
+ memory_to_free = self.current_memory_usage - target_memory
451
+
452
+ if memory_to_free <= 0:
453
+ return
454
+
455
+ # Sort entries by eviction priority (oldest, least accessed, largest)
456
+ entries_with_priority = []
457
+
458
+ for hash_key, entry in self.stats_cache.items():
459
+ # Priority score (higher = more likely to evict)
460
+ age_score = entry.age_seconds
461
+ access_score = 1.0 / max(entry.access_count, 1)
462
+ size_score = entry.memory_usage / 1024.0 # KB
463
+
464
+ priority = (age_score * 0.4) + (access_score * 0.3) + (size_score * 0.3)
465
+ entries_with_priority.append((priority, hash_key, entry.memory_usage))
466
+
467
+ # Sort by priority (highest first) and evict until we free enough memory
468
+ entries_with_priority.sort(reverse=True)
469
+
470
+ freed_memory = 0
471
+ for priority, hash_key, entry_memory in entries_with_priority:
472
+ if freed_memory >= memory_to_free:
473
+ break
474
+
475
+ entry = self.stats_cache.pop(hash_key, None)
476
+ if entry:
477
+ self.current_memory_usage -= entry.memory_usage
478
+ freed_memory += entry.memory_usage
479
+
480
+ # Remove from widget map
481
+ if entry.widget_id and entry.widget_id in self.widget_cache_map:
482
+ try:
483
+ self.widget_cache_map[entry.widget_id].remove(hash_key)
484
+ except ValueError:
485
+ pass
486
+
487
+ def get_cached_stats(self, content_hash: str) -> Optional[TextStats]:
488
+ """Get cached statistics by content hash."""
489
+ with self.cache_lock:
490
+ if content_hash in self.stats_cache:
491
+ entry = self.stats_cache[content_hash]
492
+ entry.access_count += 1
493
+ entry.last_access = time.time()
494
+ return entry.stats
495
+ return None
496
+
497
+ def clear_cache(self):
498
+ """Clear all cached statistics."""
499
+ with self.cache_lock:
500
+ self.stats_cache.clear()
501
+ self.current_memory_usage = 0
502
+ self.widget_cache_map.clear()
503
+
504
+ def clear_widget_cache(self, widget_id: str):
505
+ """Clear cache entries associated with a specific widget (for tool switching)."""
506
+ with self.cache_lock:
507
+ if widget_id not in self.widget_cache_map:
508
+ return
509
+
510
+ # Get all cache keys for this widget
511
+ cache_keys = self.widget_cache_map[widget_id].copy()
512
+
513
+ # Remove each entry
514
+ for cache_key in cache_keys:
515
+ entry = self.stats_cache.pop(cache_key, None)
516
+ if entry:
517
+ self.current_memory_usage -= entry.memory_usage
518
+
519
+ # Clear widget mapping
520
+ self.widget_cache_map.pop(widget_id, None)
521
+
522
+ def _check_periodic_cleanup(self):
523
+ """Check if periodic cleanup is needed and perform it."""
524
+ current_time = time.time()
525
+
526
+ # Check if cleanup interval has passed
527
+ if current_time - self.last_cleanup_time < self.cleanup_interval:
528
+ return
529
+
530
+ # Perform cleanup
531
+ self._perform_periodic_cleanup()
532
+ self.last_cleanup_time = current_time
533
+
534
+ def _perform_periodic_cleanup(self):
535
+ """Perform periodic memory cleanup."""
536
+ with self.cache_lock:
537
+ # Remove stale entries (older than 10 minutes)
538
+ stale_keys = []
539
+
540
+ for hash_key, entry in self.stats_cache.items():
541
+ if entry.age_seconds > 600: # 10 minutes
542
+ stale_keys.append(hash_key)
543
+
544
+ # Remove stale entries
545
+ for hash_key in stale_keys:
546
+ entry = self.stats_cache.pop(hash_key, None)
547
+ if entry:
548
+ self.current_memory_usage -= entry.memory_usage
549
+
550
+ # Remove from widget map
551
+ if entry.widget_id and entry.widget_id in self.widget_cache_map:
552
+ try:
553
+ self.widget_cache_map[entry.widget_id].remove(hash_key)
554
+ except ValueError:
555
+ pass
556
+
557
+ # Check memory usage and evict if needed
558
+ if self.current_memory_usage > self.CLEANUP_THRESHOLD_BYTES:
559
+ self._evict_by_memory_pressure()
560
+
561
+ def _start_periodic_cleanup(self):
562
+ """Start background thread for periodic cleanup."""
563
+ def cleanup_worker():
564
+ while True:
565
+ time.sleep(self.cleanup_interval)
566
+ try:
567
+ self._perform_periodic_cleanup()
568
+ except Exception:
569
+ pass # Silently handle errors in background thread
570
+
571
+ # Start daemon thread
572
+ cleanup_thread = threading.Thread(target=cleanup_worker, daemon=True)
573
+ cleanup_thread.start()
574
+
575
+ def get_cache_stats(self) -> Dict[str, Any]:
576
+ """Get cache statistics."""
577
+ with self.cache_lock:
578
+ cache_size = len(self.stats_cache)
579
+ memory_usage_mb = self.current_memory_usage / (1024 * 1024)
580
+ memory_limit_mb = self.MAX_CACHE_MEMORY_BYTES / (1024 * 1024)
581
+
582
+ # Calculate hit rate if we have access data
583
+ total_accesses = sum(entry.access_count for entry in self.stats_cache.values())
584
+
585
+ return {
586
+ 'cache_size': cache_size,
587
+ 'cache_size_limit': self.cache_size_limit,
588
+ 'memory_usage_mb': round(memory_usage_mb, 2),
589
+ 'memory_limit_mb': round(memory_limit_mb, 2),
590
+ 'memory_usage_percent': round((self.current_memory_usage / self.MAX_CACHE_MEMORY_BYTES) * 100, 2),
591
+ 'total_accesses': total_accesses,
592
+ 'widget_count': len(self.widget_cache_map)
593
+ }
594
+
595
+ def optimize_cache_size(self, target_cache_size: int = 1000):
596
+ """Optimize cache size based on usage patterns."""
597
+ stats = self.get_cache_stats()
598
+
599
+ if stats['cache_size'] < target_cache_size and self.cache_size_limit < 2000:
600
+ # Increase cache size if current size is below target
601
+ self.cache_size_limit = min(2000, int(self.cache_size_limit * 1.2))
602
+ elif stats['cache_size'] > target_cache_size and self.cache_size_limit > 100:
603
+ # Decrease cache size if current size is above target
604
+ self.cache_size_limit = max(100, int(self.cache_size_limit * 0.9))
605
+
606
+ def precompute_stats(self, texts: List[str], widget_ids: Optional[List[str]] = None):
607
+ """Precompute statistics for a list of texts (background processing)."""
608
+ for i, text in enumerate(texts):
609
+ widget_id = widget_ids[i] if widget_ids and i < len(widget_ids) else None
610
+ self.calculate_stats(text, widget_id)
611
+
612
+ def get_word_frequency(self, text: str, top_n: int = 10) -> List[Tuple[str, int]]:
613
+ """Get word frequency analysis with caching."""
614
+ content_hash = self._generate_content_hash(text)
615
+
616
+ if content_hash in self.word_frequency_cache:
617
+ word_freq = self.word_frequency_cache[content_hash]
618
+ else:
619
+ words = self.word_pattern.findall(text.lower())
620
+ word_freq = defaultdict(int)
621
+ for word in words:
622
+ word_freq[word] += 1
623
+
624
+ # Cache the result (limit cache size)
625
+ if len(self.word_frequency_cache) >= 100:
626
+ # Remove oldest entry
627
+ oldest_key = next(iter(self.word_frequency_cache))
628
+ self.word_frequency_cache.pop(oldest_key)
629
+
630
+ self.word_frequency_cache[content_hash] = dict(word_freq)
631
+
632
+ # Return top N words
633
+ return sorted(word_freq.items(), key=lambda x: x[1], reverse=True)[:top_n]
634
+
635
+ class CachedStatsManager:
636
+ """
637
+ Manager for multiple SmartStatsCalculator instances with global optimization.
638
+ """
639
+
640
+ def __init__(self):
641
+ self.calculators: Dict[str, SmartStatsCalculator] = {}
642
+ self.active_tool: Optional[str] = None
643
+
644
+ def get_calculator(self, calculator_id: str, **kwargs) -> SmartStatsCalculator:
645
+ """Get or create a stats calculator."""
646
+ if calculator_id not in self.calculators:
647
+ self.calculators[calculator_id] = SmartStatsCalculator(**kwargs)
648
+ return self.calculators[calculator_id]
649
+
650
+ def on_tool_switch(self, old_tool: Optional[str], new_tool: str):
651
+ """Handle tool switching - cleanup caches for old tool."""
652
+ if old_tool and old_tool in self.calculators:
653
+ # Clear cache for widgets associated with old tool
654
+ calc = self.calculators[old_tool]
655
+ # Clear all widget caches for this tool
656
+ widget_ids = list(calc.widget_cache_map.keys())
657
+ for widget_id in widget_ids:
658
+ calc.clear_widget_cache(widget_id)
659
+
660
+ self.active_tool = new_tool
661
+
662
+ def get_global_stats(self) -> Dict[str, Any]:
663
+ """Get aggregated statistics across all calculators."""
664
+ total_cache_size = 0
665
+ total_memory_mb = 0.0
666
+
667
+ for calc in self.calculators.values():
668
+ stats = calc.get_cache_stats()
669
+ total_cache_size += stats['cache_size']
670
+ total_memory_mb += stats['memory_usage_mb']
671
+
672
+ return {
673
+ 'calculators_count': len(self.calculators),
674
+ 'total_cache_size': total_cache_size,
675
+ 'total_memory_mb': round(total_memory_mb, 2),
676
+ 'active_tool': self.active_tool
677
+ }
678
+
679
+ def optimize_all_caches(self):
680
+ """Optimize all calculator caches."""
681
+ for calc in self.calculators.values():
682
+ calc.optimize_cache_size()
683
+
684
+ def clear_all_caches(self):
685
+ """Clear all caches."""
686
+ for calc in self.calculators.values():
687
+ calc.clear_cache()
688
+
689
+ def perform_global_cleanup(self):
690
+ """Perform cleanup across all calculators."""
691
+ for calc in self.calculators.values():
692
+ calc._perform_periodic_cleanup()
693
+
694
+ # Global instances
695
+ _global_stats_calculator = None
696
+ _global_stats_manager = None
697
+
698
+ def get_smart_stats_calculator() -> SmartStatsCalculator:
699
+ """Get the global smart stats calculator instance."""
700
+ global _global_stats_calculator
701
+ if _global_stats_calculator is None:
702
+ _global_stats_calculator = SmartStatsCalculator()
703
+ return _global_stats_calculator
704
+
705
+ def get_stats_manager() -> CachedStatsManager:
706
+ """Get the global stats manager instance."""
707
+ global _global_stats_manager
708
+ if _global_stats_manager is None:
709
+ _global_stats_manager = CachedStatsManager()
710
710
  return _global_stats_manager