pomera-ai-commander 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (192) hide show
  1. package/LICENSE +21 -0
  2. package/README.md +680 -0
  3. package/bin/pomera-ai-commander.js +62 -0
  4. package/core/__init__.py +66 -0
  5. package/core/__pycache__/__init__.cpython-313.pyc +0 -0
  6. package/core/__pycache__/app_context.cpython-313.pyc +0 -0
  7. package/core/__pycache__/async_text_processor.cpython-313.pyc +0 -0
  8. package/core/__pycache__/backup_manager.cpython-313.pyc +0 -0
  9. package/core/__pycache__/backup_recovery_manager.cpython-313.pyc +0 -0
  10. package/core/__pycache__/content_hash_cache.cpython-313.pyc +0 -0
  11. package/core/__pycache__/context_menu.cpython-313.pyc +0 -0
  12. package/core/__pycache__/data_validator.cpython-313.pyc +0 -0
  13. package/core/__pycache__/database_connection_manager.cpython-313.pyc +0 -0
  14. package/core/__pycache__/database_curl_settings_manager.cpython-313.pyc +0 -0
  15. package/core/__pycache__/database_promera_ai_settings_manager.cpython-313.pyc +0 -0
  16. package/core/__pycache__/database_schema.cpython-313.pyc +0 -0
  17. package/core/__pycache__/database_schema_manager.cpython-313.pyc +0 -0
  18. package/core/__pycache__/database_settings_manager.cpython-313.pyc +0 -0
  19. package/core/__pycache__/database_settings_manager_interface.cpython-313.pyc +0 -0
  20. package/core/__pycache__/dialog_manager.cpython-313.pyc +0 -0
  21. package/core/__pycache__/efficient_line_numbers.cpython-313.pyc +0 -0
  22. package/core/__pycache__/error_handler.cpython-313.pyc +0 -0
  23. package/core/__pycache__/error_service.cpython-313.pyc +0 -0
  24. package/core/__pycache__/event_consolidator.cpython-313.pyc +0 -0
  25. package/core/__pycache__/memory_efficient_text_widget.cpython-313.pyc +0 -0
  26. package/core/__pycache__/migration_manager.cpython-313.pyc +0 -0
  27. package/core/__pycache__/migration_test_suite.cpython-313.pyc +0 -0
  28. package/core/__pycache__/migration_validator.cpython-313.pyc +0 -0
  29. package/core/__pycache__/optimized_find_replace.cpython-313.pyc +0 -0
  30. package/core/__pycache__/optimized_pattern_engine.cpython-313.pyc +0 -0
  31. package/core/__pycache__/optimized_search_highlighter.cpython-313.pyc +0 -0
  32. package/core/__pycache__/performance_monitor.cpython-313.pyc +0 -0
  33. package/core/__pycache__/persistence_manager.cpython-313.pyc +0 -0
  34. package/core/__pycache__/progressive_stats_calculator.cpython-313.pyc +0 -0
  35. package/core/__pycache__/regex_pattern_cache.cpython-313.pyc +0 -0
  36. package/core/__pycache__/regex_pattern_library.cpython-313.pyc +0 -0
  37. package/core/__pycache__/search_operation_manager.cpython-313.pyc +0 -0
  38. package/core/__pycache__/settings_defaults_registry.cpython-313.pyc +0 -0
  39. package/core/__pycache__/settings_integrity_validator.cpython-313.pyc +0 -0
  40. package/core/__pycache__/settings_serializer.cpython-313.pyc +0 -0
  41. package/core/__pycache__/settings_validator.cpython-313.pyc +0 -0
  42. package/core/__pycache__/smart_stats_calculator.cpython-313.pyc +0 -0
  43. package/core/__pycache__/statistics_update_manager.cpython-313.pyc +0 -0
  44. package/core/__pycache__/stats_config_manager.cpython-313.pyc +0 -0
  45. package/core/__pycache__/streaming_text_handler.cpython-313.pyc +0 -0
  46. package/core/__pycache__/task_scheduler.cpython-313.pyc +0 -0
  47. package/core/__pycache__/visibility_monitor.cpython-313.pyc +0 -0
  48. package/core/__pycache__/widget_cache.cpython-313.pyc +0 -0
  49. package/core/app_context.py +482 -0
  50. package/core/async_text_processor.py +422 -0
  51. package/core/backup_manager.py +656 -0
  52. package/core/backup_recovery_manager.py +1034 -0
  53. package/core/content_hash_cache.py +509 -0
  54. package/core/context_menu.py +313 -0
  55. package/core/data_validator.py +1067 -0
  56. package/core/database_connection_manager.py +745 -0
  57. package/core/database_curl_settings_manager.py +609 -0
  58. package/core/database_promera_ai_settings_manager.py +447 -0
  59. package/core/database_schema.py +412 -0
  60. package/core/database_schema_manager.py +396 -0
  61. package/core/database_settings_manager.py +1508 -0
  62. package/core/database_settings_manager_interface.py +457 -0
  63. package/core/dialog_manager.py +735 -0
  64. package/core/efficient_line_numbers.py +511 -0
  65. package/core/error_handler.py +747 -0
  66. package/core/error_service.py +431 -0
  67. package/core/event_consolidator.py +512 -0
  68. package/core/mcp/__init__.py +43 -0
  69. package/core/mcp/__pycache__/__init__.cpython-313.pyc +0 -0
  70. package/core/mcp/__pycache__/protocol.cpython-313.pyc +0 -0
  71. package/core/mcp/__pycache__/schema.cpython-313.pyc +0 -0
  72. package/core/mcp/__pycache__/server_stdio.cpython-313.pyc +0 -0
  73. package/core/mcp/__pycache__/tool_registry.cpython-313.pyc +0 -0
  74. package/core/mcp/protocol.py +288 -0
  75. package/core/mcp/schema.py +251 -0
  76. package/core/mcp/server_stdio.py +299 -0
  77. package/core/mcp/tool_registry.py +2345 -0
  78. package/core/memory_efficient_text_widget.py +712 -0
  79. package/core/migration_manager.py +915 -0
  80. package/core/migration_test_suite.py +1086 -0
  81. package/core/migration_validator.py +1144 -0
  82. package/core/optimized_find_replace.py +715 -0
  83. package/core/optimized_pattern_engine.py +424 -0
  84. package/core/optimized_search_highlighter.py +553 -0
  85. package/core/performance_monitor.py +675 -0
  86. package/core/persistence_manager.py +713 -0
  87. package/core/progressive_stats_calculator.py +632 -0
  88. package/core/regex_pattern_cache.py +530 -0
  89. package/core/regex_pattern_library.py +351 -0
  90. package/core/search_operation_manager.py +435 -0
  91. package/core/settings_defaults_registry.py +1087 -0
  92. package/core/settings_integrity_validator.py +1112 -0
  93. package/core/settings_serializer.py +558 -0
  94. package/core/settings_validator.py +1824 -0
  95. package/core/smart_stats_calculator.py +710 -0
  96. package/core/statistics_update_manager.py +619 -0
  97. package/core/stats_config_manager.py +858 -0
  98. package/core/streaming_text_handler.py +723 -0
  99. package/core/task_scheduler.py +596 -0
  100. package/core/update_pattern_library.py +169 -0
  101. package/core/visibility_monitor.py +596 -0
  102. package/core/widget_cache.py +498 -0
  103. package/mcp.json +61 -0
  104. package/package.json +57 -0
  105. package/pomera.py +7483 -0
  106. package/pomera_mcp_server.py +144 -0
  107. package/tools/__init__.py +5 -0
  108. package/tools/__pycache__/__init__.cpython-313.pyc +0 -0
  109. package/tools/__pycache__/ai_tools.cpython-313.pyc +0 -0
  110. package/tools/__pycache__/ascii_art_generator.cpython-313.pyc +0 -0
  111. package/tools/__pycache__/base64_tools.cpython-313.pyc +0 -0
  112. package/tools/__pycache__/base_tool.cpython-313.pyc +0 -0
  113. package/tools/__pycache__/case_tool.cpython-313.pyc +0 -0
  114. package/tools/__pycache__/column_tools.cpython-313.pyc +0 -0
  115. package/tools/__pycache__/cron_tool.cpython-313.pyc +0 -0
  116. package/tools/__pycache__/curl_history.cpython-313.pyc +0 -0
  117. package/tools/__pycache__/curl_processor.cpython-313.pyc +0 -0
  118. package/tools/__pycache__/curl_settings.cpython-313.pyc +0 -0
  119. package/tools/__pycache__/curl_tool.cpython-313.pyc +0 -0
  120. package/tools/__pycache__/diff_viewer.cpython-313.pyc +0 -0
  121. package/tools/__pycache__/email_extraction_tool.cpython-313.pyc +0 -0
  122. package/tools/__pycache__/email_header_analyzer.cpython-313.pyc +0 -0
  123. package/tools/__pycache__/extraction_tools.cpython-313.pyc +0 -0
  124. package/tools/__pycache__/find_replace.cpython-313.pyc +0 -0
  125. package/tools/__pycache__/folder_file_reporter.cpython-313.pyc +0 -0
  126. package/tools/__pycache__/folder_file_reporter_adapter.cpython-313.pyc +0 -0
  127. package/tools/__pycache__/generator_tools.cpython-313.pyc +0 -0
  128. package/tools/__pycache__/hash_generator.cpython-313.pyc +0 -0
  129. package/tools/__pycache__/html_tool.cpython-313.pyc +0 -0
  130. package/tools/__pycache__/huggingface_helper.cpython-313.pyc +0 -0
  131. package/tools/__pycache__/jsonxml_tool.cpython-313.pyc +0 -0
  132. package/tools/__pycache__/line_tools.cpython-313.pyc +0 -0
  133. package/tools/__pycache__/list_comparator.cpython-313.pyc +0 -0
  134. package/tools/__pycache__/markdown_tools.cpython-313.pyc +0 -0
  135. package/tools/__pycache__/mcp_widget.cpython-313.pyc +0 -0
  136. package/tools/__pycache__/notes_widget.cpython-313.pyc +0 -0
  137. package/tools/__pycache__/number_base_converter.cpython-313.pyc +0 -0
  138. package/tools/__pycache__/regex_extractor.cpython-313.pyc +0 -0
  139. package/tools/__pycache__/slug_generator.cpython-313.pyc +0 -0
  140. package/tools/__pycache__/sorter_tools.cpython-313.pyc +0 -0
  141. package/tools/__pycache__/string_escape_tool.cpython-313.pyc +0 -0
  142. package/tools/__pycache__/text_statistics_tool.cpython-313.pyc +0 -0
  143. package/tools/__pycache__/text_wrapper.cpython-313.pyc +0 -0
  144. package/tools/__pycache__/timestamp_converter.cpython-313.pyc +0 -0
  145. package/tools/__pycache__/tool_loader.cpython-313.pyc +0 -0
  146. package/tools/__pycache__/translator_tools.cpython-313.pyc +0 -0
  147. package/tools/__pycache__/url_link_extractor.cpython-313.pyc +0 -0
  148. package/tools/__pycache__/url_parser.cpython-313.pyc +0 -0
  149. package/tools/__pycache__/whitespace_tools.cpython-313.pyc +0 -0
  150. package/tools/__pycache__/word_frequency_counter.cpython-313.pyc +0 -0
  151. package/tools/ai_tools.py +2892 -0
  152. package/tools/ascii_art_generator.py +353 -0
  153. package/tools/base64_tools.py +184 -0
  154. package/tools/base_tool.py +511 -0
  155. package/tools/case_tool.py +309 -0
  156. package/tools/column_tools.py +396 -0
  157. package/tools/cron_tool.py +885 -0
  158. package/tools/curl_history.py +601 -0
  159. package/tools/curl_processor.py +1208 -0
  160. package/tools/curl_settings.py +503 -0
  161. package/tools/curl_tool.py +5467 -0
  162. package/tools/diff_viewer.py +1072 -0
  163. package/tools/email_extraction_tool.py +249 -0
  164. package/tools/email_header_analyzer.py +426 -0
  165. package/tools/extraction_tools.py +250 -0
  166. package/tools/find_replace.py +1751 -0
  167. package/tools/folder_file_reporter.py +1463 -0
  168. package/tools/folder_file_reporter_adapter.py +480 -0
  169. package/tools/generator_tools.py +1217 -0
  170. package/tools/hash_generator.py +256 -0
  171. package/tools/html_tool.py +657 -0
  172. package/tools/huggingface_helper.py +449 -0
  173. package/tools/jsonxml_tool.py +730 -0
  174. package/tools/line_tools.py +419 -0
  175. package/tools/list_comparator.py +720 -0
  176. package/tools/markdown_tools.py +562 -0
  177. package/tools/mcp_widget.py +1417 -0
  178. package/tools/notes_widget.py +973 -0
  179. package/tools/number_base_converter.py +373 -0
  180. package/tools/regex_extractor.py +572 -0
  181. package/tools/slug_generator.py +311 -0
  182. package/tools/sorter_tools.py +459 -0
  183. package/tools/string_escape_tool.py +393 -0
  184. package/tools/text_statistics_tool.py +366 -0
  185. package/tools/text_wrapper.py +431 -0
  186. package/tools/timestamp_converter.py +422 -0
  187. package/tools/tool_loader.py +710 -0
  188. package/tools/translator_tools.py +523 -0
  189. package/tools/url_link_extractor.py +262 -0
  190. package/tools/url_parser.py +205 -0
  191. package/tools/whitespace_tools.py +356 -0
  192. package/tools/word_frequency_counter.py +147 -0
@@ -0,0 +1,509 @@
1
+ """
2
+ Content hash-based caching system for processed results in Promera AI Commander.
3
+ Provides intelligent caching of text processing results using content hashing.
4
+ """
5
+
6
+ import hashlib
7
+ import time
8
+ import threading
9
+ import pickle
10
+ import zlib
11
+ from typing import Dict, List, Optional, Any, Tuple, Union
12
+ from dataclasses import dataclass, field
13
+ from collections import OrderedDict
14
+ import weakref
15
+ import os
16
+
17
+ @dataclass
18
+ class ProcessedResult:
19
+ """Container for processed text results with metadata."""
20
+ content: str
21
+ tool_name: str
22
+ tool_settings: Dict[str, Any]
23
+ processing_time_ms: float
24
+ content_hash: str
25
+ result_hash: str
26
+ timestamp: float = field(default_factory=time.time)
27
+ access_count: int = 0
28
+ last_access: float = field(default_factory=time.time)
29
+
30
+ @property
31
+ def age_seconds(self) -> float:
32
+ """Age of the result in seconds."""
33
+ return time.time() - self.timestamp
34
+
35
+ @property
36
+ def size_estimate(self) -> int:
37
+ """Estimated memory size of the result."""
38
+ return len(self.content) + len(str(self.tool_settings)) + 200 # Overhead
39
+
40
+ @dataclass
41
+ class CacheMetrics:
42
+ """Cache performance metrics."""
43
+ hits: int = 0
44
+ misses: int = 0
45
+ evictions: int = 0
46
+ total_processing_time_saved_ms: float = 0.0
47
+ cache_size_bytes: int = 0
48
+
49
+ @property
50
+ def hit_rate(self) -> float:
51
+ """Cache hit rate as percentage."""
52
+ total = self.hits + self.misses
53
+ return (self.hits / max(total, 1)) * 100
54
+
55
+ @property
56
+ def average_time_saved_ms(self) -> float:
57
+ """Average processing time saved per hit."""
58
+ return self.total_processing_time_saved_ms / max(self.hits, 1)
59
+
60
+ class ContentHashCache:
61
+ """
62
+ Intelligent content hash-based cache for processed text results.
63
+ """
64
+
65
+ def __init__(self,
66
+ max_cache_size_mb: int = 50,
67
+ max_entries: int = 1000,
68
+ enable_compression: bool = True,
69
+ enable_persistence: bool = False):
70
+ self.max_cache_size_bytes = max_cache_size_mb * 1024 * 1024
71
+ self.max_entries = max_entries
72
+ self.enable_compression = enable_compression
73
+ self.enable_persistence = enable_persistence
74
+
75
+ # Cache storage
76
+ self.cache: OrderedDict[str, ProcessedResult] = OrderedDict()
77
+ self.cache_lock = threading.RLock()
78
+
79
+ # Metrics
80
+ self.metrics = CacheMetrics()
81
+
82
+ # Tool-specific cache settings
83
+ self.tool_cache_settings = {
84
+ 'Case Tool': {'priority': 'high', 'ttl_hours': 24},
85
+ 'Find & Replace Text': {'priority': 'medium', 'ttl_hours': 12},
86
+ 'URL and Link Extractor': {'priority': 'high', 'ttl_hours': 48},
87
+ 'Word Frequency Counter': {'priority': 'medium', 'ttl_hours': 24},
88
+ 'Alphabetical Sorter': {'priority': 'high', 'ttl_hours': 48},
89
+ 'Number Sorter': {'priority': 'high', 'ttl_hours': 48},
90
+ 'Base64 Encoder/Decoder': {'priority': 'low', 'ttl_hours': 6},
91
+ 'Binary Code Translator': {'priority': 'low', 'ttl_hours': 6},
92
+ 'Morse Code Translator': {'priority': 'low', 'ttl_hours': 6}
93
+ }
94
+
95
+ # Persistence settings
96
+ if self.enable_persistence:
97
+ self.cache_file = "content_cache.pkl"
98
+ self._load_cache_from_disk()
99
+
100
+ def get_cached_result(self,
101
+ content: str,
102
+ tool_name: str,
103
+ tool_settings: Dict[str, Any]) -> Optional[str]:
104
+ """
105
+ Get cached result for processed content.
106
+
107
+ Args:
108
+ content: Original text content
109
+ tool_name: Name of the processing tool
110
+ tool_settings: Tool configuration settings
111
+
112
+ Returns:
113
+ Cached processed result or None if not found
114
+ """
115
+ cache_key = self._generate_cache_key(content, tool_name, tool_settings)
116
+
117
+ with self.cache_lock:
118
+ if cache_key in self.cache:
119
+ result = self.cache[cache_key]
120
+
121
+ # Check if result is still valid (TTL)
122
+ if self._is_result_valid(result, tool_name):
123
+ # Update access statistics
124
+ result.access_count += 1
125
+ result.last_access = time.time()
126
+
127
+ # Move to end (LRU)
128
+ self.cache.move_to_end(cache_key)
129
+
130
+ # Update metrics
131
+ self.metrics.hits += 1
132
+ self.metrics.total_processing_time_saved_ms += result.processing_time_ms
133
+
134
+ return result.content
135
+ else:
136
+ # Result expired, remove from cache
137
+ self.cache.pop(cache_key)
138
+
139
+ # Cache miss
140
+ self.metrics.misses += 1
141
+ return None
142
+
143
+ def cache_result(self,
144
+ original_content: str,
145
+ processed_content: str,
146
+ tool_name: str,
147
+ tool_settings: Dict[str, Any],
148
+ processing_time_ms: float):
149
+ """
150
+ Cache a processed result.
151
+
152
+ Args:
153
+ original_content: Original text content
154
+ processed_content: Processed result
155
+ tool_name: Name of the processing tool
156
+ tool_settings: Tool configuration settings
157
+ processing_time_ms: Time taken to process
158
+ """
159
+ # Don't cache if result is same as input (no processing benefit)
160
+ if original_content == processed_content:
161
+ return
162
+
163
+ # Don't cache very large results (memory efficiency)
164
+ if len(processed_content) > 1024 * 1024: # 1MB limit
165
+ return
166
+
167
+ # Check if tool should be cached
168
+ tool_config = self.tool_cache_settings.get(tool_name, {'priority': 'medium'})
169
+ if tool_config.get('priority') == 'none':
170
+ return
171
+
172
+ cache_key = self._generate_cache_key(original_content, tool_name, tool_settings)
173
+
174
+ # Create result object
175
+ result = ProcessedResult(
176
+ content=self._compress_content(processed_content) if self.enable_compression else processed_content,
177
+ tool_name=tool_name,
178
+ tool_settings=tool_settings.copy(),
179
+ processing_time_ms=processing_time_ms,
180
+ content_hash=self._generate_content_hash(original_content),
181
+ result_hash=self._generate_content_hash(processed_content)
182
+ )
183
+
184
+ with self.cache_lock:
185
+ # Check cache size limits
186
+ self._enforce_cache_limits()
187
+
188
+ # Add to cache
189
+ self.cache[cache_key] = result
190
+
191
+ # Update metrics
192
+ self.metrics.cache_size_bytes += result.size_estimate
193
+
194
+ # Persist to disk if enabled
195
+ if self.enable_persistence:
196
+ self._save_cache_to_disk()
197
+
198
+ def _generate_cache_key(self,
199
+ content: str,
200
+ tool_name: str,
201
+ tool_settings: Dict[str, Any]) -> str:
202
+ """Generate a unique cache key for the content and processing parameters."""
203
+ # Create a stable hash from content and settings
204
+ content_hash = self._generate_content_hash(content)
205
+ settings_str = str(sorted(tool_settings.items()))
206
+ key_data = f"{tool_name}_{content_hash}_{settings_str}"
207
+ return hashlib.sha256(key_data.encode('utf-8')).hexdigest()[:32]
208
+
209
+ def _generate_content_hash(self, content: str) -> str:
210
+ """Generate a hash for content."""
211
+ return hashlib.md5(content.encode('utf-8')).hexdigest()[:16]
212
+
213
+ def _compress_content(self, content: str) -> bytes:
214
+ """Compress content for storage efficiency."""
215
+ return zlib.compress(content.encode('utf-8'))
216
+
217
+ def _decompress_content(self, compressed_content: bytes) -> str:
218
+ """Decompress content for retrieval."""
219
+ return zlib.decompress(compressed_content).decode('utf-8')
220
+
221
+ def _is_result_valid(self, result: ProcessedResult, tool_name: str) -> bool:
222
+ """Check if a cached result is still valid based on TTL."""
223
+ tool_config = self.tool_cache_settings.get(tool_name, {'ttl_hours': 24})
224
+ ttl_seconds = tool_config.get('ttl_hours', 24) * 3600
225
+
226
+ return result.age_seconds < ttl_seconds
227
+
228
+ def _enforce_cache_limits(self):
229
+ """Enforce cache size and entry limits."""
230
+ # Remove expired entries first
231
+ self._remove_expired_entries()
232
+
233
+ # Check entry count limit
234
+ while len(self.cache) >= self.max_entries:
235
+ self._evict_least_valuable_entry()
236
+
237
+ # Check memory size limit
238
+ while self.metrics.cache_size_bytes > self.max_cache_size_bytes:
239
+ self._evict_least_valuable_entry()
240
+
241
+ def _remove_expired_entries(self):
242
+ """Remove expired cache entries."""
243
+ current_time = time.time()
244
+ expired_keys = []
245
+
246
+ for cache_key, result in self.cache.items():
247
+ if not self._is_result_valid(result, result.tool_name):
248
+ expired_keys.append(cache_key)
249
+
250
+ for key in expired_keys:
251
+ result = self.cache.pop(key)
252
+ self.metrics.cache_size_bytes -= result.size_estimate
253
+ self.metrics.evictions += 1
254
+
255
+ def _evict_least_valuable_entry(self):
256
+ """Evict the least valuable cache entry using a scoring algorithm."""
257
+ if not self.cache:
258
+ return
259
+
260
+ # Calculate value scores for all entries
261
+ entries_with_scores = []
262
+ current_time = time.time()
263
+
264
+ for cache_key, result in self.cache.items():
265
+ # Score based on access frequency, recency, processing time saved, and tool priority
266
+ tool_config = self.tool_cache_settings.get(result.tool_name, {'priority': 'medium'})
267
+
268
+ # Priority multiplier
269
+ priority_multiplier = {'high': 3.0, 'medium': 2.0, 'low': 1.0, 'none': 0.1}.get(
270
+ tool_config.get('priority', 'medium'), 2.0
271
+ )
272
+
273
+ # Recency score (more recent = higher score)
274
+ recency_score = 1.0 / max(result.age_seconds / 3600, 0.1) # Hours
275
+
276
+ # Access frequency score
277
+ frequency_score = result.access_count / max(result.age_seconds / 3600, 0.1)
278
+
279
+ # Processing time saved score
280
+ time_saved_score = result.processing_time_ms / 100.0 # Normalize to reasonable range
281
+
282
+ # Size penalty (larger entries are less valuable)
283
+ size_penalty = result.size_estimate / (1024 * 1024) # MB
284
+
285
+ # Combined score
286
+ score = (
287
+ (recency_score * 0.3 + frequency_score * 0.4 + time_saved_score * 0.2) *
288
+ priority_multiplier - size_penalty * 0.1
289
+ )
290
+
291
+ entries_with_scores.append((score, cache_key))
292
+
293
+ # Sort by score (lowest first) and evict the least valuable
294
+ entries_with_scores.sort()
295
+ if entries_with_scores:
296
+ _, evict_key = entries_with_scores[0]
297
+ result = self.cache.pop(evict_key)
298
+ self.metrics.cache_size_bytes -= result.size_estimate
299
+ self.metrics.evictions += 1
300
+
301
+ def get_cache_stats(self) -> Dict[str, Any]:
302
+ """Get comprehensive cache statistics."""
303
+ with self.cache_lock:
304
+ # Calculate additional statistics
305
+ total_entries = len(self.cache)
306
+
307
+ # Tool distribution
308
+ tool_distribution = {}
309
+ total_processing_time = 0.0
310
+
311
+ for result in self.cache.values():
312
+ tool_name = result.tool_name
313
+ tool_distribution[tool_name] = tool_distribution.get(tool_name, 0) + 1
314
+ total_processing_time += result.processing_time_ms
315
+
316
+ return {
317
+ 'metrics': {
318
+ 'hit_rate_percent': self.metrics.hit_rate,
319
+ 'hits': self.metrics.hits,
320
+ 'misses': self.metrics.misses,
321
+ 'evictions': self.metrics.evictions,
322
+ 'total_time_saved_ms': self.metrics.total_processing_time_saved_ms,
323
+ 'average_time_saved_ms': self.metrics.average_time_saved_ms
324
+ },
325
+ 'cache_info': {
326
+ 'total_entries': total_entries,
327
+ 'cache_size_mb': self.metrics.cache_size_bytes / (1024 * 1024),
328
+ 'max_cache_size_mb': self.max_cache_size_bytes / (1024 * 1024),
329
+ 'max_entries': self.max_entries,
330
+ 'compression_enabled': self.enable_compression,
331
+ 'persistence_enabled': self.enable_persistence
332
+ },
333
+ 'tool_distribution': tool_distribution,
334
+ 'total_cached_processing_time_ms': total_processing_time
335
+ }
336
+
337
+ def get_tool_stats(self, tool_name: str) -> Dict[str, Any]:
338
+ """Get statistics for a specific tool."""
339
+ with self.cache_lock:
340
+ tool_entries = [r for r in self.cache.values() if r.tool_name == tool_name]
341
+
342
+ if not tool_entries:
343
+ return {'tool_name': tool_name, 'cached_entries': 0}
344
+
345
+ total_access_count = sum(r.access_count for r in tool_entries)
346
+ total_processing_time = sum(r.processing_time_ms for r in tool_entries)
347
+ average_age = sum(r.age_seconds for r in tool_entries) / len(tool_entries)
348
+
349
+ return {
350
+ 'tool_name': tool_name,
351
+ 'cached_entries': len(tool_entries),
352
+ 'total_access_count': total_access_count,
353
+ 'total_processing_time_ms': total_processing_time,
354
+ 'average_age_seconds': average_age,
355
+ 'cache_settings': self.tool_cache_settings.get(tool_name, {})
356
+ }
357
+
358
+ def clear_cache(self, tool_name: Optional[str] = None):
359
+ """Clear cache entries, optionally for a specific tool."""
360
+ with self.cache_lock:
361
+ if tool_name:
362
+ # Clear entries for specific tool
363
+ keys_to_remove = [k for k, v in self.cache.items() if v.tool_name == tool_name]
364
+ for key in keys_to_remove:
365
+ result = self.cache.pop(key)
366
+ self.metrics.cache_size_bytes -= result.size_estimate
367
+ else:
368
+ # Clear all entries
369
+ self.cache.clear()
370
+ self.metrics.cache_size_bytes = 0
371
+
372
+ if self.enable_persistence:
373
+ self._save_cache_to_disk()
374
+
375
+ def optimize_cache(self):
376
+ """Optimize cache by removing expired entries and adjusting settings."""
377
+ with self.cache_lock:
378
+ # Remove expired entries
379
+ self._remove_expired_entries()
380
+
381
+ # Analyze cache usage patterns
382
+ stats = self.get_cache_stats()
383
+
384
+ # Adjust cache size based on hit rate
385
+ if stats['metrics']['hit_rate_percent'] < 50 and len(self.cache) < self.max_entries // 2:
386
+ # Low hit rate with plenty of space - might need different caching strategy
387
+ pass
388
+ elif stats['metrics']['hit_rate_percent'] > 90 and self.metrics.cache_size_bytes > self.max_cache_size_bytes * 0.8:
389
+ # High hit rate but near capacity - consider increasing cache size
390
+ pass
391
+
392
+ def _save_cache_to_disk(self):
393
+ """Save cache to disk for persistence."""
394
+ if not self.enable_persistence:
395
+ return
396
+
397
+ try:
398
+ with open(self.cache_file, 'wb') as f:
399
+ # Save only essential data to reduce file size
400
+ cache_data = {
401
+ 'cache': dict(self.cache),
402
+ 'metrics': self.metrics,
403
+ 'timestamp': time.time()
404
+ }
405
+ pickle.dump(cache_data, f)
406
+ except Exception as e:
407
+ print(f"Error saving cache to disk: {e}")
408
+
409
+ def _load_cache_from_disk(self):
410
+ """Load cache from disk if available."""
411
+ if not self.enable_persistence or not os.path.exists(self.cache_file):
412
+ return
413
+
414
+ try:
415
+ with open(self.cache_file, 'rb') as f:
416
+ cache_data = pickle.load(f)
417
+
418
+ # Check if cache is not too old (24 hours)
419
+ if time.time() - cache_data.get('timestamp', 0) < 24 * 3600:
420
+ self.cache = OrderedDict(cache_data.get('cache', {}))
421
+ self.metrics = cache_data.get('metrics', CacheMetrics())
422
+
423
+ # Recalculate cache size
424
+ self.metrics.cache_size_bytes = sum(r.size_estimate for r in self.cache.values())
425
+ except Exception as e:
426
+ print(f"Error loading cache from disk: {e}")
427
+
428
+ class ProcessingResultCache:
429
+ """
430
+ High-level interface for caching text processing results.
431
+ """
432
+
433
+ def __init__(self, content_cache: ContentHashCache):
434
+ self.content_cache = content_cache
435
+ self.processing_stats = {
436
+ 'cache_enabled_operations': 0,
437
+ 'cache_bypassed_operations': 0,
438
+ 'total_time_saved_ms': 0.0
439
+ }
440
+
441
+ def process_with_cache(self,
442
+ content: str,
443
+ tool_name: str,
444
+ tool_settings: Dict[str, Any],
445
+ processor_func) -> Tuple[str, bool]:
446
+ """
447
+ Process content with caching.
448
+
449
+ Args:
450
+ content: Content to process
451
+ tool_name: Name of the processing tool
452
+ tool_settings: Tool settings
453
+ processor_func: Function to call if cache miss
454
+
455
+ Returns:
456
+ Tuple of (processed_result, was_cached)
457
+ """
458
+ # Check cache first
459
+ cached_result = self.content_cache.get_cached_result(content, tool_name, tool_settings)
460
+
461
+ if cached_result is not None:
462
+ # Cache hit
463
+ if self.content_cache.enable_compression and isinstance(cached_result, bytes):
464
+ cached_result = self.content_cache._decompress_content(cached_result)
465
+
466
+ self.processing_stats['cache_enabled_operations'] += 1
467
+ return cached_result, True
468
+
469
+ # Cache miss - process content
470
+ start_time = time.time()
471
+ processed_result = processor_func(content)
472
+ processing_time_ms = (time.time() - start_time) * 1000
473
+
474
+ # Cache the result
475
+ self.content_cache.cache_result(
476
+ content, processed_result, tool_name, tool_settings, processing_time_ms
477
+ )
478
+
479
+ self.processing_stats['cache_enabled_operations'] += 1
480
+ return processed_result, False
481
+
482
+ def get_processing_stats(self) -> Dict[str, Any]:
483
+ """Get processing statistics."""
484
+ cache_stats = self.content_cache.get_cache_stats()
485
+
486
+ return {
487
+ **self.processing_stats,
488
+ 'cache_stats': cache_stats
489
+ }
490
+
491
+ # Global instances
492
+ _global_content_cache = None
493
+ _global_processing_cache = None
494
+
495
+ def get_content_hash_cache() -> ContentHashCache:
496
+ """Get the global content hash cache instance."""
497
+ global _global_content_cache
498
+ if _global_content_cache is None:
499
+ _global_content_cache = ContentHashCache()
500
+ return _global_content_cache
501
+
502
+ def get_processing_result_cache() -> ProcessingResultCache:
503
+ """Get the global processing result cache instance."""
504
+ global _global_processing_cache, _global_content_cache
505
+ if _global_processing_cache is None:
506
+ if _global_content_cache is None:
507
+ _global_content_cache = ContentHashCache()
508
+ _global_processing_cache = ProcessingResultCache(_global_content_cache)
509
+ return _global_processing_cache