pomera-ai-commander 1.1.1 → 1.2.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -21
- package/README.md +105 -680
- package/bin/pomera-ai-commander.js +62 -62
- package/core/__init__.py +65 -65
- package/core/app_context.py +482 -482
- package/core/async_text_processor.py +421 -421
- package/core/backup_manager.py +655 -655
- package/core/backup_recovery_manager.py +1199 -1033
- package/core/content_hash_cache.py +508 -508
- package/core/context_menu.py +313 -313
- package/core/data_directory.py +549 -0
- package/core/data_validator.py +1066 -1066
- package/core/database_connection_manager.py +744 -744
- package/core/database_curl_settings_manager.py +608 -608
- package/core/database_promera_ai_settings_manager.py +446 -446
- package/core/database_schema.py +411 -411
- package/core/database_schema_manager.py +395 -395
- package/core/database_settings_manager.py +1507 -1507
- package/core/database_settings_manager_interface.py +456 -456
- package/core/dialog_manager.py +734 -734
- package/core/diff_utils.py +239 -0
- package/core/efficient_line_numbers.py +540 -510
- package/core/error_handler.py +746 -746
- package/core/error_service.py +431 -431
- package/core/event_consolidator.py +511 -511
- package/core/mcp/__init__.py +43 -43
- package/core/mcp/find_replace_diff.py +334 -0
- package/core/mcp/protocol.py +288 -288
- package/core/mcp/schema.py +251 -251
- package/core/mcp/server_stdio.py +299 -299
- package/core/mcp/tool_registry.py +2699 -2345
- package/core/memento.py +275 -0
- package/core/memory_efficient_text_widget.py +711 -711
- package/core/migration_manager.py +914 -914
- package/core/migration_test_suite.py +1085 -1085
- package/core/migration_validator.py +1143 -1143
- package/core/optimized_find_replace.py +714 -714
- package/core/optimized_pattern_engine.py +424 -424
- package/core/optimized_search_highlighter.py +552 -552
- package/core/performance_monitor.py +674 -674
- package/core/persistence_manager.py +712 -712
- package/core/progressive_stats_calculator.py +632 -632
- package/core/regex_pattern_cache.py +529 -529
- package/core/regex_pattern_library.py +350 -350
- package/core/search_operation_manager.py +434 -434
- package/core/settings_defaults_registry.py +1087 -1087
- package/core/settings_integrity_validator.py +1111 -1111
- package/core/settings_serializer.py +557 -557
- package/core/settings_validator.py +1823 -1823
- package/core/smart_stats_calculator.py +709 -709
- package/core/statistics_update_manager.py +619 -619
- package/core/stats_config_manager.py +858 -858
- package/core/streaming_text_handler.py +723 -723
- package/core/task_scheduler.py +596 -596
- package/core/update_pattern_library.py +168 -168
- package/core/visibility_monitor.py +596 -596
- package/core/widget_cache.py +498 -498
- package/mcp.json +51 -61
- package/migrate_data.py +127 -0
- package/package.json +64 -57
- package/pomera.py +7883 -7482
- package/pomera_mcp_server.py +183 -144
- package/requirements.txt +33 -0
- package/scripts/Dockerfile.alpine +43 -0
- package/scripts/Dockerfile.gui-test +54 -0
- package/scripts/Dockerfile.linux +43 -0
- package/scripts/Dockerfile.test-linux +80 -0
- package/scripts/Dockerfile.ubuntu +39 -0
- package/scripts/README.md +53 -0
- package/scripts/build-all.bat +113 -0
- package/scripts/build-docker.bat +53 -0
- package/scripts/build-docker.sh +55 -0
- package/scripts/build-optimized.bat +101 -0
- package/scripts/build.sh +78 -0
- package/scripts/docker-compose.test.yml +27 -0
- package/scripts/docker-compose.yml +32 -0
- package/scripts/postinstall.js +62 -0
- package/scripts/requirements-minimal.txt +33 -0
- package/scripts/test-linux-simple.bat +28 -0
- package/scripts/validate-release-workflow.py +450 -0
- package/tools/__init__.py +4 -4
- package/tools/ai_tools.py +2891 -2891
- package/tools/ascii_art_generator.py +352 -352
- package/tools/base64_tools.py +183 -183
- package/tools/base_tool.py +511 -511
- package/tools/case_tool.py +308 -308
- package/tools/column_tools.py +395 -395
- package/tools/cron_tool.py +884 -884
- package/tools/curl_history.py +600 -600
- package/tools/curl_processor.py +1207 -1207
- package/tools/curl_settings.py +502 -502
- package/tools/curl_tool.py +5467 -5467
- package/tools/diff_viewer.py +1817 -1072
- package/tools/email_extraction_tool.py +248 -248
- package/tools/email_header_analyzer.py +425 -425
- package/tools/extraction_tools.py +250 -250
- package/tools/find_replace.py +2289 -1750
- package/tools/folder_file_reporter.py +1463 -1463
- package/tools/folder_file_reporter_adapter.py +480 -480
- package/tools/generator_tools.py +1216 -1216
- package/tools/hash_generator.py +255 -255
- package/tools/html_tool.py +656 -656
- package/tools/jsonxml_tool.py +729 -729
- package/tools/line_tools.py +419 -419
- package/tools/markdown_tools.py +561 -561
- package/tools/mcp_widget.py +1417 -1417
- package/tools/notes_widget.py +978 -973
- package/tools/number_base_converter.py +372 -372
- package/tools/regex_extractor.py +571 -571
- package/tools/slug_generator.py +310 -310
- package/tools/sorter_tools.py +458 -458
- package/tools/string_escape_tool.py +392 -392
- package/tools/text_statistics_tool.py +365 -365
- package/tools/text_wrapper.py +430 -430
- package/tools/timestamp_converter.py +421 -421
- package/tools/tool_loader.py +710 -710
- package/tools/translator_tools.py +522 -522
- package/tools/url_link_extractor.py +261 -261
- package/tools/url_parser.py +204 -204
- package/tools/whitespace_tools.py +355 -355
- package/tools/word_frequency_counter.py +146 -146
- package/core/__pycache__/__init__.cpython-313.pyc +0 -0
- package/core/__pycache__/app_context.cpython-313.pyc +0 -0
- package/core/__pycache__/async_text_processor.cpython-313.pyc +0 -0
- package/core/__pycache__/backup_manager.cpython-313.pyc +0 -0
- package/core/__pycache__/backup_recovery_manager.cpython-313.pyc +0 -0
- package/core/__pycache__/content_hash_cache.cpython-313.pyc +0 -0
- package/core/__pycache__/context_menu.cpython-313.pyc +0 -0
- package/core/__pycache__/data_validator.cpython-313.pyc +0 -0
- package/core/__pycache__/database_connection_manager.cpython-313.pyc +0 -0
- package/core/__pycache__/database_curl_settings_manager.cpython-313.pyc +0 -0
- package/core/__pycache__/database_promera_ai_settings_manager.cpython-313.pyc +0 -0
- package/core/__pycache__/database_schema.cpython-313.pyc +0 -0
- package/core/__pycache__/database_schema_manager.cpython-313.pyc +0 -0
- package/core/__pycache__/database_settings_manager.cpython-313.pyc +0 -0
- package/core/__pycache__/database_settings_manager_interface.cpython-313.pyc +0 -0
- package/core/__pycache__/dialog_manager.cpython-313.pyc +0 -0
- package/core/__pycache__/efficient_line_numbers.cpython-313.pyc +0 -0
- package/core/__pycache__/error_handler.cpython-313.pyc +0 -0
- package/core/__pycache__/error_service.cpython-313.pyc +0 -0
- package/core/__pycache__/event_consolidator.cpython-313.pyc +0 -0
- package/core/__pycache__/memory_efficient_text_widget.cpython-313.pyc +0 -0
- package/core/__pycache__/migration_manager.cpython-313.pyc +0 -0
- package/core/__pycache__/migration_test_suite.cpython-313.pyc +0 -0
- package/core/__pycache__/migration_validator.cpython-313.pyc +0 -0
- package/core/__pycache__/optimized_find_replace.cpython-313.pyc +0 -0
- package/core/__pycache__/optimized_pattern_engine.cpython-313.pyc +0 -0
- package/core/__pycache__/optimized_search_highlighter.cpython-313.pyc +0 -0
- package/core/__pycache__/performance_monitor.cpython-313.pyc +0 -0
- package/core/__pycache__/persistence_manager.cpython-313.pyc +0 -0
- package/core/__pycache__/progressive_stats_calculator.cpython-313.pyc +0 -0
- package/core/__pycache__/regex_pattern_cache.cpython-313.pyc +0 -0
- package/core/__pycache__/regex_pattern_library.cpython-313.pyc +0 -0
- package/core/__pycache__/search_operation_manager.cpython-313.pyc +0 -0
- package/core/__pycache__/settings_defaults_registry.cpython-313.pyc +0 -0
- package/core/__pycache__/settings_integrity_validator.cpython-313.pyc +0 -0
- package/core/__pycache__/settings_serializer.cpython-313.pyc +0 -0
- package/core/__pycache__/settings_validator.cpython-313.pyc +0 -0
- package/core/__pycache__/smart_stats_calculator.cpython-313.pyc +0 -0
- package/core/__pycache__/statistics_update_manager.cpython-313.pyc +0 -0
- package/core/__pycache__/stats_config_manager.cpython-313.pyc +0 -0
- package/core/__pycache__/streaming_text_handler.cpython-313.pyc +0 -0
- package/core/__pycache__/task_scheduler.cpython-313.pyc +0 -0
- package/core/__pycache__/visibility_monitor.cpython-313.pyc +0 -0
- package/core/__pycache__/widget_cache.cpython-313.pyc +0 -0
- package/core/mcp/__pycache__/__init__.cpython-313.pyc +0 -0
- package/core/mcp/__pycache__/protocol.cpython-313.pyc +0 -0
- package/core/mcp/__pycache__/schema.cpython-313.pyc +0 -0
- package/core/mcp/__pycache__/server_stdio.cpython-313.pyc +0 -0
- package/core/mcp/__pycache__/tool_registry.cpython-313.pyc +0 -0
- package/tools/__pycache__/__init__.cpython-313.pyc +0 -0
- package/tools/__pycache__/ai_tools.cpython-313.pyc +0 -0
- package/tools/__pycache__/ascii_art_generator.cpython-313.pyc +0 -0
- package/tools/__pycache__/base64_tools.cpython-313.pyc +0 -0
- package/tools/__pycache__/base_tool.cpython-313.pyc +0 -0
- package/tools/__pycache__/case_tool.cpython-313.pyc +0 -0
- package/tools/__pycache__/column_tools.cpython-313.pyc +0 -0
- package/tools/__pycache__/cron_tool.cpython-313.pyc +0 -0
- package/tools/__pycache__/curl_history.cpython-313.pyc +0 -0
- package/tools/__pycache__/curl_processor.cpython-313.pyc +0 -0
- package/tools/__pycache__/curl_settings.cpython-313.pyc +0 -0
- package/tools/__pycache__/curl_tool.cpython-313.pyc +0 -0
- package/tools/__pycache__/diff_viewer.cpython-313.pyc +0 -0
- package/tools/__pycache__/email_extraction_tool.cpython-313.pyc +0 -0
- package/tools/__pycache__/email_header_analyzer.cpython-313.pyc +0 -0
- package/tools/__pycache__/extraction_tools.cpython-313.pyc +0 -0
- package/tools/__pycache__/find_replace.cpython-313.pyc +0 -0
- package/tools/__pycache__/folder_file_reporter.cpython-313.pyc +0 -0
- package/tools/__pycache__/folder_file_reporter_adapter.cpython-313.pyc +0 -0
- package/tools/__pycache__/generator_tools.cpython-313.pyc +0 -0
- package/tools/__pycache__/hash_generator.cpython-313.pyc +0 -0
- package/tools/__pycache__/html_tool.cpython-313.pyc +0 -0
- package/tools/__pycache__/huggingface_helper.cpython-313.pyc +0 -0
- package/tools/__pycache__/jsonxml_tool.cpython-313.pyc +0 -0
- package/tools/__pycache__/line_tools.cpython-313.pyc +0 -0
- package/tools/__pycache__/list_comparator.cpython-313.pyc +0 -0
- package/tools/__pycache__/markdown_tools.cpython-313.pyc +0 -0
- package/tools/__pycache__/mcp_widget.cpython-313.pyc +0 -0
- package/tools/__pycache__/notes_widget.cpython-313.pyc +0 -0
- package/tools/__pycache__/number_base_converter.cpython-313.pyc +0 -0
- package/tools/__pycache__/regex_extractor.cpython-313.pyc +0 -0
- package/tools/__pycache__/slug_generator.cpython-313.pyc +0 -0
- package/tools/__pycache__/sorter_tools.cpython-313.pyc +0 -0
- package/tools/__pycache__/string_escape_tool.cpython-313.pyc +0 -0
- package/tools/__pycache__/text_statistics_tool.cpython-313.pyc +0 -0
- package/tools/__pycache__/text_wrapper.cpython-313.pyc +0 -0
- package/tools/__pycache__/timestamp_converter.cpython-313.pyc +0 -0
- package/tools/__pycache__/tool_loader.cpython-313.pyc +0 -0
- package/tools/__pycache__/translator_tools.cpython-313.pyc +0 -0
- package/tools/__pycache__/url_link_extractor.cpython-313.pyc +0 -0
- package/tools/__pycache__/url_parser.cpython-313.pyc +0 -0
- package/tools/__pycache__/whitespace_tools.cpython-313.pyc +0 -0
- package/tools/__pycache__/word_frequency_counter.cpython-313.pyc +0 -0
|
@@ -1,509 +1,509 @@
|
|
|
1
|
-
"""
|
|
2
|
-
Content hash-based caching system for processed results in Promera AI Commander.
|
|
3
|
-
Provides intelligent caching of text processing results using content hashing.
|
|
4
|
-
"""
|
|
5
|
-
|
|
6
|
-
import hashlib
|
|
7
|
-
import time
|
|
8
|
-
import threading
|
|
9
|
-
import pickle
|
|
10
|
-
import zlib
|
|
11
|
-
from typing import Dict, List, Optional, Any, Tuple, Union
|
|
12
|
-
from dataclasses import dataclass, field
|
|
13
|
-
from collections import OrderedDict
|
|
14
|
-
import weakref
|
|
15
|
-
import os
|
|
16
|
-
|
|
17
|
-
@dataclass
|
|
18
|
-
class ProcessedResult:
|
|
19
|
-
"""Container for processed text results with metadata."""
|
|
20
|
-
content: str
|
|
21
|
-
tool_name: str
|
|
22
|
-
tool_settings: Dict[str, Any]
|
|
23
|
-
processing_time_ms: float
|
|
24
|
-
content_hash: str
|
|
25
|
-
result_hash: str
|
|
26
|
-
timestamp: float = field(default_factory=time.time)
|
|
27
|
-
access_count: int = 0
|
|
28
|
-
last_access: float = field(default_factory=time.time)
|
|
29
|
-
|
|
30
|
-
@property
|
|
31
|
-
def age_seconds(self) -> float:
|
|
32
|
-
"""Age of the result in seconds."""
|
|
33
|
-
return time.time() - self.timestamp
|
|
34
|
-
|
|
35
|
-
@property
|
|
36
|
-
def size_estimate(self) -> int:
|
|
37
|
-
"""Estimated memory size of the result."""
|
|
38
|
-
return len(self.content) + len(str(self.tool_settings)) + 200 # Overhead
|
|
39
|
-
|
|
40
|
-
@dataclass
|
|
41
|
-
class CacheMetrics:
|
|
42
|
-
"""Cache performance metrics."""
|
|
43
|
-
hits: int = 0
|
|
44
|
-
misses: int = 0
|
|
45
|
-
evictions: int = 0
|
|
46
|
-
total_processing_time_saved_ms: float = 0.0
|
|
47
|
-
cache_size_bytes: int = 0
|
|
48
|
-
|
|
49
|
-
@property
|
|
50
|
-
def hit_rate(self) -> float:
|
|
51
|
-
"""Cache hit rate as percentage."""
|
|
52
|
-
total = self.hits + self.misses
|
|
53
|
-
return (self.hits / max(total, 1)) * 100
|
|
54
|
-
|
|
55
|
-
@property
|
|
56
|
-
def average_time_saved_ms(self) -> float:
|
|
57
|
-
"""Average processing time saved per hit."""
|
|
58
|
-
return self.total_processing_time_saved_ms / max(self.hits, 1)
|
|
59
|
-
|
|
60
|
-
class ContentHashCache:
|
|
61
|
-
"""
|
|
62
|
-
Intelligent content hash-based cache for processed text results.
|
|
63
|
-
"""
|
|
64
|
-
|
|
65
|
-
def __init__(self,
|
|
66
|
-
max_cache_size_mb: int = 50,
|
|
67
|
-
max_entries: int = 1000,
|
|
68
|
-
enable_compression: bool = True,
|
|
69
|
-
enable_persistence: bool = False):
|
|
70
|
-
self.max_cache_size_bytes = max_cache_size_mb * 1024 * 1024
|
|
71
|
-
self.max_entries = max_entries
|
|
72
|
-
self.enable_compression = enable_compression
|
|
73
|
-
self.enable_persistence = enable_persistence
|
|
74
|
-
|
|
75
|
-
# Cache storage
|
|
76
|
-
self.cache: OrderedDict[str, ProcessedResult] = OrderedDict()
|
|
77
|
-
self.cache_lock = threading.RLock()
|
|
78
|
-
|
|
79
|
-
# Metrics
|
|
80
|
-
self.metrics = CacheMetrics()
|
|
81
|
-
|
|
82
|
-
# Tool-specific cache settings
|
|
83
|
-
self.tool_cache_settings = {
|
|
84
|
-
'Case Tool': {'priority': 'high', 'ttl_hours': 24},
|
|
85
|
-
'Find & Replace Text': {'priority': 'medium', 'ttl_hours': 12},
|
|
86
|
-
'URL and Link Extractor': {'priority': 'high', 'ttl_hours': 48},
|
|
87
|
-
'Word Frequency Counter': {'priority': 'medium', 'ttl_hours': 24},
|
|
88
|
-
'Alphabetical Sorter': {'priority': 'high', 'ttl_hours': 48},
|
|
89
|
-
'Number Sorter': {'priority': 'high', 'ttl_hours': 48},
|
|
90
|
-
'Base64 Encoder/Decoder': {'priority': 'low', 'ttl_hours': 6},
|
|
91
|
-
'Binary Code Translator': {'priority': 'low', 'ttl_hours': 6},
|
|
92
|
-
'Morse Code Translator': {'priority': 'low', 'ttl_hours': 6}
|
|
93
|
-
}
|
|
94
|
-
|
|
95
|
-
# Persistence settings
|
|
96
|
-
if self.enable_persistence:
|
|
97
|
-
self.cache_file = "content_cache.pkl"
|
|
98
|
-
self._load_cache_from_disk()
|
|
99
|
-
|
|
100
|
-
def get_cached_result(self,
|
|
101
|
-
content: str,
|
|
102
|
-
tool_name: str,
|
|
103
|
-
tool_settings: Dict[str, Any]) -> Optional[str]:
|
|
104
|
-
"""
|
|
105
|
-
Get cached result for processed content.
|
|
106
|
-
|
|
107
|
-
Args:
|
|
108
|
-
content: Original text content
|
|
109
|
-
tool_name: Name of the processing tool
|
|
110
|
-
tool_settings: Tool configuration settings
|
|
111
|
-
|
|
112
|
-
Returns:
|
|
113
|
-
Cached processed result or None if not found
|
|
114
|
-
"""
|
|
115
|
-
cache_key = self._generate_cache_key(content, tool_name, tool_settings)
|
|
116
|
-
|
|
117
|
-
with self.cache_lock:
|
|
118
|
-
if cache_key in self.cache:
|
|
119
|
-
result = self.cache[cache_key]
|
|
120
|
-
|
|
121
|
-
# Check if result is still valid (TTL)
|
|
122
|
-
if self._is_result_valid(result, tool_name):
|
|
123
|
-
# Update access statistics
|
|
124
|
-
result.access_count += 1
|
|
125
|
-
result.last_access = time.time()
|
|
126
|
-
|
|
127
|
-
# Move to end (LRU)
|
|
128
|
-
self.cache.move_to_end(cache_key)
|
|
129
|
-
|
|
130
|
-
# Update metrics
|
|
131
|
-
self.metrics.hits += 1
|
|
132
|
-
self.metrics.total_processing_time_saved_ms += result.processing_time_ms
|
|
133
|
-
|
|
134
|
-
return result.content
|
|
135
|
-
else:
|
|
136
|
-
# Result expired, remove from cache
|
|
137
|
-
self.cache.pop(cache_key)
|
|
138
|
-
|
|
139
|
-
# Cache miss
|
|
140
|
-
self.metrics.misses += 1
|
|
141
|
-
return None
|
|
142
|
-
|
|
143
|
-
def cache_result(self,
|
|
144
|
-
original_content: str,
|
|
145
|
-
processed_content: str,
|
|
146
|
-
tool_name: str,
|
|
147
|
-
tool_settings: Dict[str, Any],
|
|
148
|
-
processing_time_ms: float):
|
|
149
|
-
"""
|
|
150
|
-
Cache a processed result.
|
|
151
|
-
|
|
152
|
-
Args:
|
|
153
|
-
original_content: Original text content
|
|
154
|
-
processed_content: Processed result
|
|
155
|
-
tool_name: Name of the processing tool
|
|
156
|
-
tool_settings: Tool configuration settings
|
|
157
|
-
processing_time_ms: Time taken to process
|
|
158
|
-
"""
|
|
159
|
-
# Don't cache if result is same as input (no processing benefit)
|
|
160
|
-
if original_content == processed_content:
|
|
161
|
-
return
|
|
162
|
-
|
|
163
|
-
# Don't cache very large results (memory efficiency)
|
|
164
|
-
if len(processed_content) > 1024 * 1024: # 1MB limit
|
|
165
|
-
return
|
|
166
|
-
|
|
167
|
-
# Check if tool should be cached
|
|
168
|
-
tool_config = self.tool_cache_settings.get(tool_name, {'priority': 'medium'})
|
|
169
|
-
if tool_config.get('priority') == 'none':
|
|
170
|
-
return
|
|
171
|
-
|
|
172
|
-
cache_key = self._generate_cache_key(original_content, tool_name, tool_settings)
|
|
173
|
-
|
|
174
|
-
# Create result object
|
|
175
|
-
result = ProcessedResult(
|
|
176
|
-
content=self._compress_content(processed_content) if self.enable_compression else processed_content,
|
|
177
|
-
tool_name=tool_name,
|
|
178
|
-
tool_settings=tool_settings.copy(),
|
|
179
|
-
processing_time_ms=processing_time_ms,
|
|
180
|
-
content_hash=self._generate_content_hash(original_content),
|
|
181
|
-
result_hash=self._generate_content_hash(processed_content)
|
|
182
|
-
)
|
|
183
|
-
|
|
184
|
-
with self.cache_lock:
|
|
185
|
-
# Check cache size limits
|
|
186
|
-
self._enforce_cache_limits()
|
|
187
|
-
|
|
188
|
-
# Add to cache
|
|
189
|
-
self.cache[cache_key] = result
|
|
190
|
-
|
|
191
|
-
# Update metrics
|
|
192
|
-
self.metrics.cache_size_bytes += result.size_estimate
|
|
193
|
-
|
|
194
|
-
# Persist to disk if enabled
|
|
195
|
-
if self.enable_persistence:
|
|
196
|
-
self._save_cache_to_disk()
|
|
197
|
-
|
|
198
|
-
def _generate_cache_key(self,
|
|
199
|
-
content: str,
|
|
200
|
-
tool_name: str,
|
|
201
|
-
tool_settings: Dict[str, Any]) -> str:
|
|
202
|
-
"""Generate a unique cache key for the content and processing parameters."""
|
|
203
|
-
# Create a stable hash from content and settings
|
|
204
|
-
content_hash = self._generate_content_hash(content)
|
|
205
|
-
settings_str = str(sorted(tool_settings.items()))
|
|
206
|
-
key_data = f"{tool_name}_{content_hash}_{settings_str}"
|
|
207
|
-
return hashlib.sha256(key_data.encode('utf-8')).hexdigest()[:32]
|
|
208
|
-
|
|
209
|
-
def _generate_content_hash(self, content: str) -> str:
|
|
210
|
-
"""Generate a hash for content."""
|
|
211
|
-
return hashlib.md5(content.encode('utf-8')).hexdigest()[:16]
|
|
212
|
-
|
|
213
|
-
def _compress_content(self, content: str) -> bytes:
|
|
214
|
-
"""Compress content for storage efficiency."""
|
|
215
|
-
return zlib.compress(content.encode('utf-8'))
|
|
216
|
-
|
|
217
|
-
def _decompress_content(self, compressed_content: bytes) -> str:
|
|
218
|
-
"""Decompress content for retrieval."""
|
|
219
|
-
return zlib.decompress(compressed_content).decode('utf-8')
|
|
220
|
-
|
|
221
|
-
def _is_result_valid(self, result: ProcessedResult, tool_name: str) -> bool:
|
|
222
|
-
"""Check if a cached result is still valid based on TTL."""
|
|
223
|
-
tool_config = self.tool_cache_settings.get(tool_name, {'ttl_hours': 24})
|
|
224
|
-
ttl_seconds = tool_config.get('ttl_hours', 24) * 3600
|
|
225
|
-
|
|
226
|
-
return result.age_seconds < ttl_seconds
|
|
227
|
-
|
|
228
|
-
def _enforce_cache_limits(self):
|
|
229
|
-
"""Enforce cache size and entry limits."""
|
|
230
|
-
# Remove expired entries first
|
|
231
|
-
self._remove_expired_entries()
|
|
232
|
-
|
|
233
|
-
# Check entry count limit
|
|
234
|
-
while len(self.cache) >= self.max_entries:
|
|
235
|
-
self._evict_least_valuable_entry()
|
|
236
|
-
|
|
237
|
-
# Check memory size limit
|
|
238
|
-
while self.metrics.cache_size_bytes > self.max_cache_size_bytes:
|
|
239
|
-
self._evict_least_valuable_entry()
|
|
240
|
-
|
|
241
|
-
def _remove_expired_entries(self):
|
|
242
|
-
"""Remove expired cache entries."""
|
|
243
|
-
current_time = time.time()
|
|
244
|
-
expired_keys = []
|
|
245
|
-
|
|
246
|
-
for cache_key, result in self.cache.items():
|
|
247
|
-
if not self._is_result_valid(result, result.tool_name):
|
|
248
|
-
expired_keys.append(cache_key)
|
|
249
|
-
|
|
250
|
-
for key in expired_keys:
|
|
251
|
-
result = self.cache.pop(key)
|
|
252
|
-
self.metrics.cache_size_bytes -= result.size_estimate
|
|
253
|
-
self.metrics.evictions += 1
|
|
254
|
-
|
|
255
|
-
def _evict_least_valuable_entry(self):
|
|
256
|
-
"""Evict the least valuable cache entry using a scoring algorithm."""
|
|
257
|
-
if not self.cache:
|
|
258
|
-
return
|
|
259
|
-
|
|
260
|
-
# Calculate value scores for all entries
|
|
261
|
-
entries_with_scores = []
|
|
262
|
-
current_time = time.time()
|
|
263
|
-
|
|
264
|
-
for cache_key, result in self.cache.items():
|
|
265
|
-
# Score based on access frequency, recency, processing time saved, and tool priority
|
|
266
|
-
tool_config = self.tool_cache_settings.get(result.tool_name, {'priority': 'medium'})
|
|
267
|
-
|
|
268
|
-
# Priority multiplier
|
|
269
|
-
priority_multiplier = {'high': 3.0, 'medium': 2.0, 'low': 1.0, 'none': 0.1}.get(
|
|
270
|
-
tool_config.get('priority', 'medium'), 2.0
|
|
271
|
-
)
|
|
272
|
-
|
|
273
|
-
# Recency score (more recent = higher score)
|
|
274
|
-
recency_score = 1.0 / max(result.age_seconds / 3600, 0.1) # Hours
|
|
275
|
-
|
|
276
|
-
# Access frequency score
|
|
277
|
-
frequency_score = result.access_count / max(result.age_seconds / 3600, 0.1)
|
|
278
|
-
|
|
279
|
-
# Processing time saved score
|
|
280
|
-
time_saved_score = result.processing_time_ms / 100.0 # Normalize to reasonable range
|
|
281
|
-
|
|
282
|
-
# Size penalty (larger entries are less valuable)
|
|
283
|
-
size_penalty = result.size_estimate / (1024 * 1024) # MB
|
|
284
|
-
|
|
285
|
-
# Combined score
|
|
286
|
-
score = (
|
|
287
|
-
(recency_score * 0.3 + frequency_score * 0.4 + time_saved_score * 0.2) *
|
|
288
|
-
priority_multiplier - size_penalty * 0.1
|
|
289
|
-
)
|
|
290
|
-
|
|
291
|
-
entries_with_scores.append((score, cache_key))
|
|
292
|
-
|
|
293
|
-
# Sort by score (lowest first) and evict the least valuable
|
|
294
|
-
entries_with_scores.sort()
|
|
295
|
-
if entries_with_scores:
|
|
296
|
-
_, evict_key = entries_with_scores[0]
|
|
297
|
-
result = self.cache.pop(evict_key)
|
|
298
|
-
self.metrics.cache_size_bytes -= result.size_estimate
|
|
299
|
-
self.metrics.evictions += 1
|
|
300
|
-
|
|
301
|
-
def get_cache_stats(self) -> Dict[str, Any]:
|
|
302
|
-
"""Get comprehensive cache statistics."""
|
|
303
|
-
with self.cache_lock:
|
|
304
|
-
# Calculate additional statistics
|
|
305
|
-
total_entries = len(self.cache)
|
|
306
|
-
|
|
307
|
-
# Tool distribution
|
|
308
|
-
tool_distribution = {}
|
|
309
|
-
total_processing_time = 0.0
|
|
310
|
-
|
|
311
|
-
for result in self.cache.values():
|
|
312
|
-
tool_name = result.tool_name
|
|
313
|
-
tool_distribution[tool_name] = tool_distribution.get(tool_name, 0) + 1
|
|
314
|
-
total_processing_time += result.processing_time_ms
|
|
315
|
-
|
|
316
|
-
return {
|
|
317
|
-
'metrics': {
|
|
318
|
-
'hit_rate_percent': self.metrics.hit_rate,
|
|
319
|
-
'hits': self.metrics.hits,
|
|
320
|
-
'misses': self.metrics.misses,
|
|
321
|
-
'evictions': self.metrics.evictions,
|
|
322
|
-
'total_time_saved_ms': self.metrics.total_processing_time_saved_ms,
|
|
323
|
-
'average_time_saved_ms': self.metrics.average_time_saved_ms
|
|
324
|
-
},
|
|
325
|
-
'cache_info': {
|
|
326
|
-
'total_entries': total_entries,
|
|
327
|
-
'cache_size_mb': self.metrics.cache_size_bytes / (1024 * 1024),
|
|
328
|
-
'max_cache_size_mb': self.max_cache_size_bytes / (1024 * 1024),
|
|
329
|
-
'max_entries': self.max_entries,
|
|
330
|
-
'compression_enabled': self.enable_compression,
|
|
331
|
-
'persistence_enabled': self.enable_persistence
|
|
332
|
-
},
|
|
333
|
-
'tool_distribution': tool_distribution,
|
|
334
|
-
'total_cached_processing_time_ms': total_processing_time
|
|
335
|
-
}
|
|
336
|
-
|
|
337
|
-
def get_tool_stats(self, tool_name: str) -> Dict[str, Any]:
|
|
338
|
-
"""Get statistics for a specific tool."""
|
|
339
|
-
with self.cache_lock:
|
|
340
|
-
tool_entries = [r for r in self.cache.values() if r.tool_name == tool_name]
|
|
341
|
-
|
|
342
|
-
if not tool_entries:
|
|
343
|
-
return {'tool_name': tool_name, 'cached_entries': 0}
|
|
344
|
-
|
|
345
|
-
total_access_count = sum(r.access_count for r in tool_entries)
|
|
346
|
-
total_processing_time = sum(r.processing_time_ms for r in tool_entries)
|
|
347
|
-
average_age = sum(r.age_seconds for r in tool_entries) / len(tool_entries)
|
|
348
|
-
|
|
349
|
-
return {
|
|
350
|
-
'tool_name': tool_name,
|
|
351
|
-
'cached_entries': len(tool_entries),
|
|
352
|
-
'total_access_count': total_access_count,
|
|
353
|
-
'total_processing_time_ms': total_processing_time,
|
|
354
|
-
'average_age_seconds': average_age,
|
|
355
|
-
'cache_settings': self.tool_cache_settings.get(tool_name, {})
|
|
356
|
-
}
|
|
357
|
-
|
|
358
|
-
def clear_cache(self, tool_name: Optional[str] = None):
|
|
359
|
-
"""Clear cache entries, optionally for a specific tool."""
|
|
360
|
-
with self.cache_lock:
|
|
361
|
-
if tool_name:
|
|
362
|
-
# Clear entries for specific tool
|
|
363
|
-
keys_to_remove = [k for k, v in self.cache.items() if v.tool_name == tool_name]
|
|
364
|
-
for key in keys_to_remove:
|
|
365
|
-
result = self.cache.pop(key)
|
|
366
|
-
self.metrics.cache_size_bytes -= result.size_estimate
|
|
367
|
-
else:
|
|
368
|
-
# Clear all entries
|
|
369
|
-
self.cache.clear()
|
|
370
|
-
self.metrics.cache_size_bytes = 0
|
|
371
|
-
|
|
372
|
-
if self.enable_persistence:
|
|
373
|
-
self._save_cache_to_disk()
|
|
374
|
-
|
|
375
|
-
def optimize_cache(self):
|
|
376
|
-
"""Optimize cache by removing expired entries and adjusting settings."""
|
|
377
|
-
with self.cache_lock:
|
|
378
|
-
# Remove expired entries
|
|
379
|
-
self._remove_expired_entries()
|
|
380
|
-
|
|
381
|
-
# Analyze cache usage patterns
|
|
382
|
-
stats = self.get_cache_stats()
|
|
383
|
-
|
|
384
|
-
# Adjust cache size based on hit rate
|
|
385
|
-
if stats['metrics']['hit_rate_percent'] < 50 and len(self.cache) < self.max_entries // 2:
|
|
386
|
-
# Low hit rate with plenty of space - might need different caching strategy
|
|
387
|
-
pass
|
|
388
|
-
elif stats['metrics']['hit_rate_percent'] > 90 and self.metrics.cache_size_bytes > self.max_cache_size_bytes * 0.8:
|
|
389
|
-
# High hit rate but near capacity - consider increasing cache size
|
|
390
|
-
pass
|
|
391
|
-
|
|
392
|
-
def _save_cache_to_disk(self):
|
|
393
|
-
"""Save cache to disk for persistence."""
|
|
394
|
-
if not self.enable_persistence:
|
|
395
|
-
return
|
|
396
|
-
|
|
397
|
-
try:
|
|
398
|
-
with open(self.cache_file, 'wb') as f:
|
|
399
|
-
# Save only essential data to reduce file size
|
|
400
|
-
cache_data = {
|
|
401
|
-
'cache': dict(self.cache),
|
|
402
|
-
'metrics': self.metrics,
|
|
403
|
-
'timestamp': time.time()
|
|
404
|
-
}
|
|
405
|
-
pickle.dump(cache_data, f)
|
|
406
|
-
except Exception as e:
|
|
407
|
-
print(f"Error saving cache to disk: {e}")
|
|
408
|
-
|
|
409
|
-
def _load_cache_from_disk(self):
|
|
410
|
-
"""Load cache from disk if available."""
|
|
411
|
-
if not self.enable_persistence or not os.path.exists(self.cache_file):
|
|
412
|
-
return
|
|
413
|
-
|
|
414
|
-
try:
|
|
415
|
-
with open(self.cache_file, 'rb') as f:
|
|
416
|
-
cache_data = pickle.load(f)
|
|
417
|
-
|
|
418
|
-
# Check if cache is not too old (24 hours)
|
|
419
|
-
if time.time() - cache_data.get('timestamp', 0) < 24 * 3600:
|
|
420
|
-
self.cache = OrderedDict(cache_data.get('cache', {}))
|
|
421
|
-
self.metrics = cache_data.get('metrics', CacheMetrics())
|
|
422
|
-
|
|
423
|
-
# Recalculate cache size
|
|
424
|
-
self.metrics.cache_size_bytes = sum(r.size_estimate for r in self.cache.values())
|
|
425
|
-
except Exception as e:
|
|
426
|
-
print(f"Error loading cache from disk: {e}")
|
|
427
|
-
|
|
428
|
-
class ProcessingResultCache:
|
|
429
|
-
"""
|
|
430
|
-
High-level interface for caching text processing results.
|
|
431
|
-
"""
|
|
432
|
-
|
|
433
|
-
def __init__(self, content_cache: ContentHashCache):
|
|
434
|
-
self.content_cache = content_cache
|
|
435
|
-
self.processing_stats = {
|
|
436
|
-
'cache_enabled_operations': 0,
|
|
437
|
-
'cache_bypassed_operations': 0,
|
|
438
|
-
'total_time_saved_ms': 0.0
|
|
439
|
-
}
|
|
440
|
-
|
|
441
|
-
def process_with_cache(self,
|
|
442
|
-
content: str,
|
|
443
|
-
tool_name: str,
|
|
444
|
-
tool_settings: Dict[str, Any],
|
|
445
|
-
processor_func) -> Tuple[str, bool]:
|
|
446
|
-
"""
|
|
447
|
-
Process content with caching.
|
|
448
|
-
|
|
449
|
-
Args:
|
|
450
|
-
content: Content to process
|
|
451
|
-
tool_name: Name of the processing tool
|
|
452
|
-
tool_settings: Tool settings
|
|
453
|
-
processor_func: Function to call if cache miss
|
|
454
|
-
|
|
455
|
-
Returns:
|
|
456
|
-
Tuple of (processed_result, was_cached)
|
|
457
|
-
"""
|
|
458
|
-
# Check cache first
|
|
459
|
-
cached_result = self.content_cache.get_cached_result(content, tool_name, tool_settings)
|
|
460
|
-
|
|
461
|
-
if cached_result is not None:
|
|
462
|
-
# Cache hit
|
|
463
|
-
if self.content_cache.enable_compression and isinstance(cached_result, bytes):
|
|
464
|
-
cached_result = self.content_cache._decompress_content(cached_result)
|
|
465
|
-
|
|
466
|
-
self.processing_stats['cache_enabled_operations'] += 1
|
|
467
|
-
return cached_result, True
|
|
468
|
-
|
|
469
|
-
# Cache miss - process content
|
|
470
|
-
start_time = time.time()
|
|
471
|
-
processed_result = processor_func(content)
|
|
472
|
-
processing_time_ms = (time.time() - start_time) * 1000
|
|
473
|
-
|
|
474
|
-
# Cache the result
|
|
475
|
-
self.content_cache.cache_result(
|
|
476
|
-
content, processed_result, tool_name, tool_settings, processing_time_ms
|
|
477
|
-
)
|
|
478
|
-
|
|
479
|
-
self.processing_stats['cache_enabled_operations'] += 1
|
|
480
|
-
return processed_result, False
|
|
481
|
-
|
|
482
|
-
def get_processing_stats(self) -> Dict[str, Any]:
|
|
483
|
-
"""Get processing statistics."""
|
|
484
|
-
cache_stats = self.content_cache.get_cache_stats()
|
|
485
|
-
|
|
486
|
-
return {
|
|
487
|
-
**self.processing_stats,
|
|
488
|
-
'cache_stats': cache_stats
|
|
489
|
-
}
|
|
490
|
-
|
|
491
|
-
# Global instances
|
|
492
|
-
_global_content_cache = None
|
|
493
|
-
_global_processing_cache = None
|
|
494
|
-
|
|
495
|
-
def get_content_hash_cache() -> ContentHashCache:
|
|
496
|
-
"""Get the global content hash cache instance."""
|
|
497
|
-
global _global_content_cache
|
|
498
|
-
if _global_content_cache is None:
|
|
499
|
-
_global_content_cache = ContentHashCache()
|
|
500
|
-
return _global_content_cache
|
|
501
|
-
|
|
502
|
-
def get_processing_result_cache() -> ProcessingResultCache:
|
|
503
|
-
"""Get the global processing result cache instance."""
|
|
504
|
-
global _global_processing_cache, _global_content_cache
|
|
505
|
-
if _global_processing_cache is None:
|
|
506
|
-
if _global_content_cache is None:
|
|
507
|
-
_global_content_cache = ContentHashCache()
|
|
508
|
-
_global_processing_cache = ProcessingResultCache(_global_content_cache)
|
|
1
|
+
"""
|
|
2
|
+
Content hash-based caching system for processed results in Promera AI Commander.
|
|
3
|
+
Provides intelligent caching of text processing results using content hashing.
|
|
4
|
+
"""
|
|
5
|
+
|
|
6
|
+
import hashlib
|
|
7
|
+
import time
|
|
8
|
+
import threading
|
|
9
|
+
import pickle
|
|
10
|
+
import zlib
|
|
11
|
+
from typing import Dict, List, Optional, Any, Tuple, Union
|
|
12
|
+
from dataclasses import dataclass, field
|
|
13
|
+
from collections import OrderedDict
|
|
14
|
+
import weakref
|
|
15
|
+
import os
|
|
16
|
+
|
|
17
|
+
@dataclass
|
|
18
|
+
class ProcessedResult:
|
|
19
|
+
"""Container for processed text results with metadata."""
|
|
20
|
+
content: str
|
|
21
|
+
tool_name: str
|
|
22
|
+
tool_settings: Dict[str, Any]
|
|
23
|
+
processing_time_ms: float
|
|
24
|
+
content_hash: str
|
|
25
|
+
result_hash: str
|
|
26
|
+
timestamp: float = field(default_factory=time.time)
|
|
27
|
+
access_count: int = 0
|
|
28
|
+
last_access: float = field(default_factory=time.time)
|
|
29
|
+
|
|
30
|
+
@property
|
|
31
|
+
def age_seconds(self) -> float:
|
|
32
|
+
"""Age of the result in seconds."""
|
|
33
|
+
return time.time() - self.timestamp
|
|
34
|
+
|
|
35
|
+
@property
|
|
36
|
+
def size_estimate(self) -> int:
|
|
37
|
+
"""Estimated memory size of the result."""
|
|
38
|
+
return len(self.content) + len(str(self.tool_settings)) + 200 # Overhead
|
|
39
|
+
|
|
40
|
+
@dataclass
|
|
41
|
+
class CacheMetrics:
|
|
42
|
+
"""Cache performance metrics."""
|
|
43
|
+
hits: int = 0
|
|
44
|
+
misses: int = 0
|
|
45
|
+
evictions: int = 0
|
|
46
|
+
total_processing_time_saved_ms: float = 0.0
|
|
47
|
+
cache_size_bytes: int = 0
|
|
48
|
+
|
|
49
|
+
@property
|
|
50
|
+
def hit_rate(self) -> float:
|
|
51
|
+
"""Cache hit rate as percentage."""
|
|
52
|
+
total = self.hits + self.misses
|
|
53
|
+
return (self.hits / max(total, 1)) * 100
|
|
54
|
+
|
|
55
|
+
@property
|
|
56
|
+
def average_time_saved_ms(self) -> float:
|
|
57
|
+
"""Average processing time saved per hit."""
|
|
58
|
+
return self.total_processing_time_saved_ms / max(self.hits, 1)
|
|
59
|
+
|
|
60
|
+
class ContentHashCache:
|
|
61
|
+
"""
|
|
62
|
+
Intelligent content hash-based cache for processed text results.
|
|
63
|
+
"""
|
|
64
|
+
|
|
65
|
+
def __init__(self,
|
|
66
|
+
max_cache_size_mb: int = 50,
|
|
67
|
+
max_entries: int = 1000,
|
|
68
|
+
enable_compression: bool = True,
|
|
69
|
+
enable_persistence: bool = False):
|
|
70
|
+
self.max_cache_size_bytes = max_cache_size_mb * 1024 * 1024
|
|
71
|
+
self.max_entries = max_entries
|
|
72
|
+
self.enable_compression = enable_compression
|
|
73
|
+
self.enable_persistence = enable_persistence
|
|
74
|
+
|
|
75
|
+
# Cache storage
|
|
76
|
+
self.cache: OrderedDict[str, ProcessedResult] = OrderedDict()
|
|
77
|
+
self.cache_lock = threading.RLock()
|
|
78
|
+
|
|
79
|
+
# Metrics
|
|
80
|
+
self.metrics = CacheMetrics()
|
|
81
|
+
|
|
82
|
+
# Tool-specific cache settings
|
|
83
|
+
self.tool_cache_settings = {
|
|
84
|
+
'Case Tool': {'priority': 'high', 'ttl_hours': 24},
|
|
85
|
+
'Find & Replace Text': {'priority': 'medium', 'ttl_hours': 12},
|
|
86
|
+
'URL and Link Extractor': {'priority': 'high', 'ttl_hours': 48},
|
|
87
|
+
'Word Frequency Counter': {'priority': 'medium', 'ttl_hours': 24},
|
|
88
|
+
'Alphabetical Sorter': {'priority': 'high', 'ttl_hours': 48},
|
|
89
|
+
'Number Sorter': {'priority': 'high', 'ttl_hours': 48},
|
|
90
|
+
'Base64 Encoder/Decoder': {'priority': 'low', 'ttl_hours': 6},
|
|
91
|
+
'Binary Code Translator': {'priority': 'low', 'ttl_hours': 6},
|
|
92
|
+
'Morse Code Translator': {'priority': 'low', 'ttl_hours': 6}
|
|
93
|
+
}
|
|
94
|
+
|
|
95
|
+
# Persistence settings
|
|
96
|
+
if self.enable_persistence:
|
|
97
|
+
self.cache_file = "content_cache.pkl"
|
|
98
|
+
self._load_cache_from_disk()
|
|
99
|
+
|
|
100
|
+
def get_cached_result(self,
|
|
101
|
+
content: str,
|
|
102
|
+
tool_name: str,
|
|
103
|
+
tool_settings: Dict[str, Any]) -> Optional[str]:
|
|
104
|
+
"""
|
|
105
|
+
Get cached result for processed content.
|
|
106
|
+
|
|
107
|
+
Args:
|
|
108
|
+
content: Original text content
|
|
109
|
+
tool_name: Name of the processing tool
|
|
110
|
+
tool_settings: Tool configuration settings
|
|
111
|
+
|
|
112
|
+
Returns:
|
|
113
|
+
Cached processed result or None if not found
|
|
114
|
+
"""
|
|
115
|
+
cache_key = self._generate_cache_key(content, tool_name, tool_settings)
|
|
116
|
+
|
|
117
|
+
with self.cache_lock:
|
|
118
|
+
if cache_key in self.cache:
|
|
119
|
+
result = self.cache[cache_key]
|
|
120
|
+
|
|
121
|
+
# Check if result is still valid (TTL)
|
|
122
|
+
if self._is_result_valid(result, tool_name):
|
|
123
|
+
# Update access statistics
|
|
124
|
+
result.access_count += 1
|
|
125
|
+
result.last_access = time.time()
|
|
126
|
+
|
|
127
|
+
# Move to end (LRU)
|
|
128
|
+
self.cache.move_to_end(cache_key)
|
|
129
|
+
|
|
130
|
+
# Update metrics
|
|
131
|
+
self.metrics.hits += 1
|
|
132
|
+
self.metrics.total_processing_time_saved_ms += result.processing_time_ms
|
|
133
|
+
|
|
134
|
+
return result.content
|
|
135
|
+
else:
|
|
136
|
+
# Result expired, remove from cache
|
|
137
|
+
self.cache.pop(cache_key)
|
|
138
|
+
|
|
139
|
+
# Cache miss
|
|
140
|
+
self.metrics.misses += 1
|
|
141
|
+
return None
|
|
142
|
+
|
|
143
|
+
def cache_result(self,
|
|
144
|
+
original_content: str,
|
|
145
|
+
processed_content: str,
|
|
146
|
+
tool_name: str,
|
|
147
|
+
tool_settings: Dict[str, Any],
|
|
148
|
+
processing_time_ms: float):
|
|
149
|
+
"""
|
|
150
|
+
Cache a processed result.
|
|
151
|
+
|
|
152
|
+
Args:
|
|
153
|
+
original_content: Original text content
|
|
154
|
+
processed_content: Processed result
|
|
155
|
+
tool_name: Name of the processing tool
|
|
156
|
+
tool_settings: Tool configuration settings
|
|
157
|
+
processing_time_ms: Time taken to process
|
|
158
|
+
"""
|
|
159
|
+
# Don't cache if result is same as input (no processing benefit)
|
|
160
|
+
if original_content == processed_content:
|
|
161
|
+
return
|
|
162
|
+
|
|
163
|
+
# Don't cache very large results (memory efficiency)
|
|
164
|
+
if len(processed_content) > 1024 * 1024: # 1MB limit
|
|
165
|
+
return
|
|
166
|
+
|
|
167
|
+
# Check if tool should be cached
|
|
168
|
+
tool_config = self.tool_cache_settings.get(tool_name, {'priority': 'medium'})
|
|
169
|
+
if tool_config.get('priority') == 'none':
|
|
170
|
+
return
|
|
171
|
+
|
|
172
|
+
cache_key = self._generate_cache_key(original_content, tool_name, tool_settings)
|
|
173
|
+
|
|
174
|
+
# Create result object
|
|
175
|
+
result = ProcessedResult(
|
|
176
|
+
content=self._compress_content(processed_content) if self.enable_compression else processed_content,
|
|
177
|
+
tool_name=tool_name,
|
|
178
|
+
tool_settings=tool_settings.copy(),
|
|
179
|
+
processing_time_ms=processing_time_ms,
|
|
180
|
+
content_hash=self._generate_content_hash(original_content),
|
|
181
|
+
result_hash=self._generate_content_hash(processed_content)
|
|
182
|
+
)
|
|
183
|
+
|
|
184
|
+
with self.cache_lock:
|
|
185
|
+
# Check cache size limits
|
|
186
|
+
self._enforce_cache_limits()
|
|
187
|
+
|
|
188
|
+
# Add to cache
|
|
189
|
+
self.cache[cache_key] = result
|
|
190
|
+
|
|
191
|
+
# Update metrics
|
|
192
|
+
self.metrics.cache_size_bytes += result.size_estimate
|
|
193
|
+
|
|
194
|
+
# Persist to disk if enabled
|
|
195
|
+
if self.enable_persistence:
|
|
196
|
+
self._save_cache_to_disk()
|
|
197
|
+
|
|
198
|
+
def _generate_cache_key(self,
|
|
199
|
+
content: str,
|
|
200
|
+
tool_name: str,
|
|
201
|
+
tool_settings: Dict[str, Any]) -> str:
|
|
202
|
+
"""Generate a unique cache key for the content and processing parameters."""
|
|
203
|
+
# Create a stable hash from content and settings
|
|
204
|
+
content_hash = self._generate_content_hash(content)
|
|
205
|
+
settings_str = str(sorted(tool_settings.items()))
|
|
206
|
+
key_data = f"{tool_name}_{content_hash}_{settings_str}"
|
|
207
|
+
return hashlib.sha256(key_data.encode('utf-8')).hexdigest()[:32]
|
|
208
|
+
|
|
209
|
+
def _generate_content_hash(self, content: str) -> str:
|
|
210
|
+
"""Generate a hash for content."""
|
|
211
|
+
return hashlib.md5(content.encode('utf-8')).hexdigest()[:16]
|
|
212
|
+
|
|
213
|
+
def _compress_content(self, content: str) -> bytes:
|
|
214
|
+
"""Compress content for storage efficiency."""
|
|
215
|
+
return zlib.compress(content.encode('utf-8'))
|
|
216
|
+
|
|
217
|
+
def _decompress_content(self, compressed_content: bytes) -> str:
|
|
218
|
+
"""Decompress content for retrieval."""
|
|
219
|
+
return zlib.decompress(compressed_content).decode('utf-8')
|
|
220
|
+
|
|
221
|
+
def _is_result_valid(self, result: ProcessedResult, tool_name: str) -> bool:
|
|
222
|
+
"""Check if a cached result is still valid based on TTL."""
|
|
223
|
+
tool_config = self.tool_cache_settings.get(tool_name, {'ttl_hours': 24})
|
|
224
|
+
ttl_seconds = tool_config.get('ttl_hours', 24) * 3600
|
|
225
|
+
|
|
226
|
+
return result.age_seconds < ttl_seconds
|
|
227
|
+
|
|
228
|
+
def _enforce_cache_limits(self):
|
|
229
|
+
"""Enforce cache size and entry limits."""
|
|
230
|
+
# Remove expired entries first
|
|
231
|
+
self._remove_expired_entries()
|
|
232
|
+
|
|
233
|
+
# Check entry count limit
|
|
234
|
+
while len(self.cache) >= self.max_entries:
|
|
235
|
+
self._evict_least_valuable_entry()
|
|
236
|
+
|
|
237
|
+
# Check memory size limit
|
|
238
|
+
while self.metrics.cache_size_bytes > self.max_cache_size_bytes:
|
|
239
|
+
self._evict_least_valuable_entry()
|
|
240
|
+
|
|
241
|
+
def _remove_expired_entries(self):
|
|
242
|
+
"""Remove expired cache entries."""
|
|
243
|
+
current_time = time.time()
|
|
244
|
+
expired_keys = []
|
|
245
|
+
|
|
246
|
+
for cache_key, result in self.cache.items():
|
|
247
|
+
if not self._is_result_valid(result, result.tool_name):
|
|
248
|
+
expired_keys.append(cache_key)
|
|
249
|
+
|
|
250
|
+
for key in expired_keys:
|
|
251
|
+
result = self.cache.pop(key)
|
|
252
|
+
self.metrics.cache_size_bytes -= result.size_estimate
|
|
253
|
+
self.metrics.evictions += 1
|
|
254
|
+
|
|
255
|
+
def _evict_least_valuable_entry(self):
|
|
256
|
+
"""Evict the least valuable cache entry using a scoring algorithm."""
|
|
257
|
+
if not self.cache:
|
|
258
|
+
return
|
|
259
|
+
|
|
260
|
+
# Calculate value scores for all entries
|
|
261
|
+
entries_with_scores = []
|
|
262
|
+
current_time = time.time()
|
|
263
|
+
|
|
264
|
+
for cache_key, result in self.cache.items():
|
|
265
|
+
# Score based on access frequency, recency, processing time saved, and tool priority
|
|
266
|
+
tool_config = self.tool_cache_settings.get(result.tool_name, {'priority': 'medium'})
|
|
267
|
+
|
|
268
|
+
# Priority multiplier
|
|
269
|
+
priority_multiplier = {'high': 3.0, 'medium': 2.0, 'low': 1.0, 'none': 0.1}.get(
|
|
270
|
+
tool_config.get('priority', 'medium'), 2.0
|
|
271
|
+
)
|
|
272
|
+
|
|
273
|
+
# Recency score (more recent = higher score)
|
|
274
|
+
recency_score = 1.0 / max(result.age_seconds / 3600, 0.1) # Hours
|
|
275
|
+
|
|
276
|
+
# Access frequency score
|
|
277
|
+
frequency_score = result.access_count / max(result.age_seconds / 3600, 0.1)
|
|
278
|
+
|
|
279
|
+
# Processing time saved score
|
|
280
|
+
time_saved_score = result.processing_time_ms / 100.0 # Normalize to reasonable range
|
|
281
|
+
|
|
282
|
+
# Size penalty (larger entries are less valuable)
|
|
283
|
+
size_penalty = result.size_estimate / (1024 * 1024) # MB
|
|
284
|
+
|
|
285
|
+
# Combined score
|
|
286
|
+
score = (
|
|
287
|
+
(recency_score * 0.3 + frequency_score * 0.4 + time_saved_score * 0.2) *
|
|
288
|
+
priority_multiplier - size_penalty * 0.1
|
|
289
|
+
)
|
|
290
|
+
|
|
291
|
+
entries_with_scores.append((score, cache_key))
|
|
292
|
+
|
|
293
|
+
# Sort by score (lowest first) and evict the least valuable
|
|
294
|
+
entries_with_scores.sort()
|
|
295
|
+
if entries_with_scores:
|
|
296
|
+
_, evict_key = entries_with_scores[0]
|
|
297
|
+
result = self.cache.pop(evict_key)
|
|
298
|
+
self.metrics.cache_size_bytes -= result.size_estimate
|
|
299
|
+
self.metrics.evictions += 1
|
|
300
|
+
|
|
301
|
+
def get_cache_stats(self) -> Dict[str, Any]:
|
|
302
|
+
"""Get comprehensive cache statistics."""
|
|
303
|
+
with self.cache_lock:
|
|
304
|
+
# Calculate additional statistics
|
|
305
|
+
total_entries = len(self.cache)
|
|
306
|
+
|
|
307
|
+
# Tool distribution
|
|
308
|
+
tool_distribution = {}
|
|
309
|
+
total_processing_time = 0.0
|
|
310
|
+
|
|
311
|
+
for result in self.cache.values():
|
|
312
|
+
tool_name = result.tool_name
|
|
313
|
+
tool_distribution[tool_name] = tool_distribution.get(tool_name, 0) + 1
|
|
314
|
+
total_processing_time += result.processing_time_ms
|
|
315
|
+
|
|
316
|
+
return {
|
|
317
|
+
'metrics': {
|
|
318
|
+
'hit_rate_percent': self.metrics.hit_rate,
|
|
319
|
+
'hits': self.metrics.hits,
|
|
320
|
+
'misses': self.metrics.misses,
|
|
321
|
+
'evictions': self.metrics.evictions,
|
|
322
|
+
'total_time_saved_ms': self.metrics.total_processing_time_saved_ms,
|
|
323
|
+
'average_time_saved_ms': self.metrics.average_time_saved_ms
|
|
324
|
+
},
|
|
325
|
+
'cache_info': {
|
|
326
|
+
'total_entries': total_entries,
|
|
327
|
+
'cache_size_mb': self.metrics.cache_size_bytes / (1024 * 1024),
|
|
328
|
+
'max_cache_size_mb': self.max_cache_size_bytes / (1024 * 1024),
|
|
329
|
+
'max_entries': self.max_entries,
|
|
330
|
+
'compression_enabled': self.enable_compression,
|
|
331
|
+
'persistence_enabled': self.enable_persistence
|
|
332
|
+
},
|
|
333
|
+
'tool_distribution': tool_distribution,
|
|
334
|
+
'total_cached_processing_time_ms': total_processing_time
|
|
335
|
+
}
|
|
336
|
+
|
|
337
|
+
def get_tool_stats(self, tool_name: str) -> Dict[str, Any]:
|
|
338
|
+
"""Get statistics for a specific tool."""
|
|
339
|
+
with self.cache_lock:
|
|
340
|
+
tool_entries = [r for r in self.cache.values() if r.tool_name == tool_name]
|
|
341
|
+
|
|
342
|
+
if not tool_entries:
|
|
343
|
+
return {'tool_name': tool_name, 'cached_entries': 0}
|
|
344
|
+
|
|
345
|
+
total_access_count = sum(r.access_count for r in tool_entries)
|
|
346
|
+
total_processing_time = sum(r.processing_time_ms for r in tool_entries)
|
|
347
|
+
average_age = sum(r.age_seconds for r in tool_entries) / len(tool_entries)
|
|
348
|
+
|
|
349
|
+
return {
|
|
350
|
+
'tool_name': tool_name,
|
|
351
|
+
'cached_entries': len(tool_entries),
|
|
352
|
+
'total_access_count': total_access_count,
|
|
353
|
+
'total_processing_time_ms': total_processing_time,
|
|
354
|
+
'average_age_seconds': average_age,
|
|
355
|
+
'cache_settings': self.tool_cache_settings.get(tool_name, {})
|
|
356
|
+
}
|
|
357
|
+
|
|
358
|
+
def clear_cache(self, tool_name: Optional[str] = None):
|
|
359
|
+
"""Clear cache entries, optionally for a specific tool."""
|
|
360
|
+
with self.cache_lock:
|
|
361
|
+
if tool_name:
|
|
362
|
+
# Clear entries for specific tool
|
|
363
|
+
keys_to_remove = [k for k, v in self.cache.items() if v.tool_name == tool_name]
|
|
364
|
+
for key in keys_to_remove:
|
|
365
|
+
result = self.cache.pop(key)
|
|
366
|
+
self.metrics.cache_size_bytes -= result.size_estimate
|
|
367
|
+
else:
|
|
368
|
+
# Clear all entries
|
|
369
|
+
self.cache.clear()
|
|
370
|
+
self.metrics.cache_size_bytes = 0
|
|
371
|
+
|
|
372
|
+
if self.enable_persistence:
|
|
373
|
+
self._save_cache_to_disk()
|
|
374
|
+
|
|
375
|
+
def optimize_cache(self):
|
|
376
|
+
"""Optimize cache by removing expired entries and adjusting settings."""
|
|
377
|
+
with self.cache_lock:
|
|
378
|
+
# Remove expired entries
|
|
379
|
+
self._remove_expired_entries()
|
|
380
|
+
|
|
381
|
+
# Analyze cache usage patterns
|
|
382
|
+
stats = self.get_cache_stats()
|
|
383
|
+
|
|
384
|
+
# Adjust cache size based on hit rate
|
|
385
|
+
if stats['metrics']['hit_rate_percent'] < 50 and len(self.cache) < self.max_entries // 2:
|
|
386
|
+
# Low hit rate with plenty of space - might need different caching strategy
|
|
387
|
+
pass
|
|
388
|
+
elif stats['metrics']['hit_rate_percent'] > 90 and self.metrics.cache_size_bytes > self.max_cache_size_bytes * 0.8:
|
|
389
|
+
# High hit rate but near capacity - consider increasing cache size
|
|
390
|
+
pass
|
|
391
|
+
|
|
392
|
+
def _save_cache_to_disk(self):
|
|
393
|
+
"""Save cache to disk for persistence."""
|
|
394
|
+
if not self.enable_persistence:
|
|
395
|
+
return
|
|
396
|
+
|
|
397
|
+
try:
|
|
398
|
+
with open(self.cache_file, 'wb') as f:
|
|
399
|
+
# Save only essential data to reduce file size
|
|
400
|
+
cache_data = {
|
|
401
|
+
'cache': dict(self.cache),
|
|
402
|
+
'metrics': self.metrics,
|
|
403
|
+
'timestamp': time.time()
|
|
404
|
+
}
|
|
405
|
+
pickle.dump(cache_data, f)
|
|
406
|
+
except Exception as e:
|
|
407
|
+
print(f"Error saving cache to disk: {e}")
|
|
408
|
+
|
|
409
|
+
def _load_cache_from_disk(self):
|
|
410
|
+
"""Load cache from disk if available."""
|
|
411
|
+
if not self.enable_persistence or not os.path.exists(self.cache_file):
|
|
412
|
+
return
|
|
413
|
+
|
|
414
|
+
try:
|
|
415
|
+
with open(self.cache_file, 'rb') as f:
|
|
416
|
+
cache_data = pickle.load(f)
|
|
417
|
+
|
|
418
|
+
# Check if cache is not too old (24 hours)
|
|
419
|
+
if time.time() - cache_data.get('timestamp', 0) < 24 * 3600:
|
|
420
|
+
self.cache = OrderedDict(cache_data.get('cache', {}))
|
|
421
|
+
self.metrics = cache_data.get('metrics', CacheMetrics())
|
|
422
|
+
|
|
423
|
+
# Recalculate cache size
|
|
424
|
+
self.metrics.cache_size_bytes = sum(r.size_estimate for r in self.cache.values())
|
|
425
|
+
except Exception as e:
|
|
426
|
+
print(f"Error loading cache from disk: {e}")
|
|
427
|
+
|
|
428
|
+
class ProcessingResultCache:
|
|
429
|
+
"""
|
|
430
|
+
High-level interface for caching text processing results.
|
|
431
|
+
"""
|
|
432
|
+
|
|
433
|
+
def __init__(self, content_cache: ContentHashCache):
|
|
434
|
+
self.content_cache = content_cache
|
|
435
|
+
self.processing_stats = {
|
|
436
|
+
'cache_enabled_operations': 0,
|
|
437
|
+
'cache_bypassed_operations': 0,
|
|
438
|
+
'total_time_saved_ms': 0.0
|
|
439
|
+
}
|
|
440
|
+
|
|
441
|
+
def process_with_cache(self,
|
|
442
|
+
content: str,
|
|
443
|
+
tool_name: str,
|
|
444
|
+
tool_settings: Dict[str, Any],
|
|
445
|
+
processor_func) -> Tuple[str, bool]:
|
|
446
|
+
"""
|
|
447
|
+
Process content with caching.
|
|
448
|
+
|
|
449
|
+
Args:
|
|
450
|
+
content: Content to process
|
|
451
|
+
tool_name: Name of the processing tool
|
|
452
|
+
tool_settings: Tool settings
|
|
453
|
+
processor_func: Function to call if cache miss
|
|
454
|
+
|
|
455
|
+
Returns:
|
|
456
|
+
Tuple of (processed_result, was_cached)
|
|
457
|
+
"""
|
|
458
|
+
# Check cache first
|
|
459
|
+
cached_result = self.content_cache.get_cached_result(content, tool_name, tool_settings)
|
|
460
|
+
|
|
461
|
+
if cached_result is not None:
|
|
462
|
+
# Cache hit
|
|
463
|
+
if self.content_cache.enable_compression and isinstance(cached_result, bytes):
|
|
464
|
+
cached_result = self.content_cache._decompress_content(cached_result)
|
|
465
|
+
|
|
466
|
+
self.processing_stats['cache_enabled_operations'] += 1
|
|
467
|
+
return cached_result, True
|
|
468
|
+
|
|
469
|
+
# Cache miss - process content
|
|
470
|
+
start_time = time.time()
|
|
471
|
+
processed_result = processor_func(content)
|
|
472
|
+
processing_time_ms = (time.time() - start_time) * 1000
|
|
473
|
+
|
|
474
|
+
# Cache the result
|
|
475
|
+
self.content_cache.cache_result(
|
|
476
|
+
content, processed_result, tool_name, tool_settings, processing_time_ms
|
|
477
|
+
)
|
|
478
|
+
|
|
479
|
+
self.processing_stats['cache_enabled_operations'] += 1
|
|
480
|
+
return processed_result, False
|
|
481
|
+
|
|
482
|
+
def get_processing_stats(self) -> Dict[str, Any]:
|
|
483
|
+
"""Get processing statistics."""
|
|
484
|
+
cache_stats = self.content_cache.get_cache_stats()
|
|
485
|
+
|
|
486
|
+
return {
|
|
487
|
+
**self.processing_stats,
|
|
488
|
+
'cache_stats': cache_stats
|
|
489
|
+
}
|
|
490
|
+
|
|
491
|
+
# Global instances
|
|
492
|
+
_global_content_cache = None
|
|
493
|
+
_global_processing_cache = None
|
|
494
|
+
|
|
495
|
+
def get_content_hash_cache() -> ContentHashCache:
|
|
496
|
+
"""Get the global content hash cache instance."""
|
|
497
|
+
global _global_content_cache
|
|
498
|
+
if _global_content_cache is None:
|
|
499
|
+
_global_content_cache = ContentHashCache()
|
|
500
|
+
return _global_content_cache
|
|
501
|
+
|
|
502
|
+
def get_processing_result_cache() -> ProcessingResultCache:
|
|
503
|
+
"""Get the global processing result cache instance."""
|
|
504
|
+
global _global_processing_cache, _global_content_cache
|
|
505
|
+
if _global_processing_cache is None:
|
|
506
|
+
if _global_content_cache is None:
|
|
507
|
+
_global_content_cache = ContentHashCache()
|
|
508
|
+
_global_processing_cache = ProcessingResultCache(_global_content_cache)
|
|
509
509
|
return _global_processing_cache
|