additory 0.1.0a4__py3-none-any.whl → 0.1.1a1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- additory/__init__.py +58 -14
- additory/common/__init__.py +31 -147
- additory/common/column_selector.py +255 -0
- additory/common/distributions.py +286 -613
- additory/common/extractors.py +313 -0
- additory/common/knn_imputation.py +332 -0
- additory/common/result.py +380 -0
- additory/common/strategy_parser.py +243 -0
- additory/common/unit_conversions.py +338 -0
- additory/common/validation.py +283 -103
- additory/core/__init__.py +34 -22
- additory/core/backend.py +258 -0
- additory/core/config.py +177 -305
- additory/core/logging.py +230 -24
- additory/core/memory_manager.py +157 -495
- additory/expressions/__init__.py +2 -23
- additory/expressions/compiler.py +457 -0
- additory/expressions/engine.py +264 -487
- additory/expressions/integrity.py +179 -0
- additory/expressions/loader.py +263 -0
- additory/expressions/parser.py +363 -167
- additory/expressions/resolver.py +274 -0
- additory/functions/__init__.py +1 -0
- additory/functions/analyze/__init__.py +144 -0
- additory/functions/analyze/cardinality.py +58 -0
- additory/functions/analyze/correlations.py +66 -0
- additory/functions/analyze/distributions.py +53 -0
- additory/functions/analyze/duplicates.py +49 -0
- additory/functions/analyze/features.py +61 -0
- additory/functions/analyze/imputation.py +66 -0
- additory/functions/analyze/outliers.py +65 -0
- additory/functions/analyze/patterns.py +65 -0
- additory/functions/analyze/presets.py +72 -0
- additory/functions/analyze/quality.py +59 -0
- additory/functions/analyze/timeseries.py +53 -0
- additory/functions/analyze/types.py +45 -0
- additory/functions/expressions/__init__.py +161 -0
- additory/functions/snapshot/__init__.py +82 -0
- additory/functions/snapshot/filter.py +119 -0
- additory/functions/synthetic/__init__.py +113 -0
- additory/functions/synthetic/mode_detector.py +47 -0
- additory/functions/synthetic/strategies/__init__.py +1 -0
- additory/functions/synthetic/strategies/advanced.py +35 -0
- additory/functions/synthetic/strategies/augmentative.py +160 -0
- additory/functions/synthetic/strategies/generative.py +168 -0
- additory/functions/synthetic/strategies/presets.py +116 -0
- additory/functions/to/__init__.py +188 -0
- additory/functions/to/lookup.py +351 -0
- additory/functions/to/merge.py +189 -0
- additory/functions/to/sort.py +91 -0
- additory/functions/to/summarize.py +170 -0
- additory/functions/transform/__init__.py +140 -0
- additory/functions/transform/datetime.py +79 -0
- additory/functions/transform/extract.py +85 -0
- additory/functions/transform/harmonize.py +105 -0
- additory/functions/transform/knn.py +62 -0
- additory/functions/transform/onehotencoding.py +68 -0
- additory/functions/transform/transpose.py +42 -0
- additory-0.1.1a1.dist-info/METADATA +83 -0
- additory-0.1.1a1.dist-info/RECORD +62 -0
- additory/analysis/__init__.py +0 -48
- additory/analysis/cardinality.py +0 -126
- additory/analysis/correlations.py +0 -124
- additory/analysis/distributions.py +0 -376
- additory/analysis/quality.py +0 -158
- additory/analysis/scan.py +0 -400
- additory/common/backend.py +0 -371
- additory/common/column_utils.py +0 -191
- additory/common/exceptions.py +0 -62
- additory/common/lists.py +0 -229
- additory/common/patterns.py +0 -240
- additory/common/resolver.py +0 -567
- additory/common/sample_data.py +0 -182
- additory/core/ast_builder.py +0 -165
- additory/core/backends/__init__.py +0 -23
- additory/core/backends/arrow_bridge.py +0 -483
- additory/core/backends/cudf_bridge.py +0 -355
- additory/core/column_positioning.py +0 -358
- additory/core/compiler_polars.py +0 -166
- additory/core/enhanced_cache_manager.py +0 -1119
- additory/core/enhanced_matchers.py +0 -473
- additory/core/enhanced_version_manager.py +0 -325
- additory/core/executor.py +0 -59
- additory/core/integrity_manager.py +0 -477
- additory/core/loader.py +0 -190
- additory/core/namespace_manager.py +0 -657
- additory/core/parser.py +0 -176
- additory/core/polars_expression_engine.py +0 -601
- additory/core/registry.py +0 -177
- additory/core/sample_data_manager.py +0 -492
- additory/core/user_namespace.py +0 -751
- additory/core/validator.py +0 -27
- additory/dynamic_api.py +0 -352
- additory/expressions/proxy.py +0 -549
- additory/expressions/registry.py +0 -313
- additory/expressions/samples.py +0 -492
- additory/synthetic/__init__.py +0 -13
- additory/synthetic/column_name_resolver.py +0 -149
- additory/synthetic/deduce.py +0 -259
- additory/synthetic/distributions.py +0 -22
- additory/synthetic/forecast.py +0 -1132
- additory/synthetic/linked_list_parser.py +0 -415
- additory/synthetic/namespace_lookup.py +0 -129
- additory/synthetic/smote.py +0 -320
- additory/synthetic/strategies.py +0 -926
- additory/synthetic/synthesizer.py +0 -713
- additory/utilities/__init__.py +0 -53
- additory/utilities/encoding.py +0 -600
- additory/utilities/games.py +0 -300
- additory/utilities/keys.py +0 -8
- additory/utilities/lookup.py +0 -103
- additory/utilities/matchers.py +0 -216
- additory/utilities/resolvers.py +0 -286
- additory/utilities/settings.py +0 -167
- additory/utilities/units.py +0 -749
- additory/utilities/validators.py +0 -153
- additory-0.1.0a4.dist-info/METADATA +0 -311
- additory-0.1.0a4.dist-info/RECORD +0 -72
- additory-0.1.0a4.dist-info/licenses/LICENSE +0 -21
- {additory-0.1.0a4.dist-info → additory-0.1.1a1.dist-info}/WHEEL +0 -0
- {additory-0.1.0a4.dist-info → additory-0.1.1a1.dist-info}/top_level.txt +0 -0
|
@@ -1,1119 +0,0 @@
|
|
|
1
|
-
# enhanced_cache_manager.py
|
|
2
|
-
# Content-hash based cache manager for enhanced expressions system
|
|
3
|
-
|
|
4
|
-
import os
|
|
5
|
-
import json
|
|
6
|
-
import shutil
|
|
7
|
-
import hashlib
|
|
8
|
-
import threading
|
|
9
|
-
from typing import Dict, List, Optional, Set, Any, Tuple
|
|
10
|
-
from dataclasses import dataclass, field
|
|
11
|
-
from datetime import datetime, timedelta
|
|
12
|
-
from pathlib import Path
|
|
13
|
-
|
|
14
|
-
from .logging import log_info, log_warning
|
|
15
|
-
from .integrity_manager import IntegrityManager, SecurityError
|
|
16
|
-
from .namespace_manager import NamespaceManager
|
|
17
|
-
from .enhanced_version_manager import EnhancedVersionManager
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
@dataclass
|
|
21
|
-
class CacheEntry:
|
|
22
|
-
"""Represents a cached expression entry"""
|
|
23
|
-
expression_name: str
|
|
24
|
-
version: str
|
|
25
|
-
namespace: str
|
|
26
|
-
source_path: str
|
|
27
|
-
cached_path: str
|
|
28
|
-
content_hash: str
|
|
29
|
-
cached_at: datetime
|
|
30
|
-
last_accessed: datetime
|
|
31
|
-
access_count: int = 0
|
|
32
|
-
file_size: int = 0
|
|
33
|
-
integrity_verified: bool = False
|
|
34
|
-
metadata: Dict[str, Any] = field(default_factory=dict)
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
@dataclass
|
|
38
|
-
class CacheStats:
|
|
39
|
-
"""Cache statistics and metrics"""
|
|
40
|
-
total_entries: int = 0
|
|
41
|
-
cache_hits: int = 0
|
|
42
|
-
cache_misses: int = 0
|
|
43
|
-
integrity_failures: int = 0
|
|
44
|
-
cache_size_bytes: int = 0
|
|
45
|
-
last_cleanup: Optional[datetime] = None
|
|
46
|
-
corruption_recoveries: int = 0
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
class CacheCorruptionError(Exception):
|
|
50
|
-
"""Raised when cache corruption is detected"""
|
|
51
|
-
pass
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
class CacheValidationError(Exception):
|
|
55
|
-
"""Raised when cache validation fails"""
|
|
56
|
-
pass
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
class EnhancedCacheManager:
|
|
60
|
-
"""Content-hash based cache manager with namespace separation"""
|
|
61
|
-
|
|
62
|
-
def __init__(self):
|
|
63
|
-
# Cache paths for different namespaces
|
|
64
|
-
self.cache_paths = {
|
|
65
|
-
"builtin": os.path.expanduser("~/.additory/cache/expressions/core/"),
|
|
66
|
-
"user": os.path.expanduser("~/.additory/cache/expressions/user/")
|
|
67
|
-
}
|
|
68
|
-
|
|
69
|
-
# Component managers
|
|
70
|
-
self.integrity_manager = IntegrityManager()
|
|
71
|
-
self.namespace_manager = NamespaceManager()
|
|
72
|
-
self.version_manager = EnhancedVersionManager()
|
|
73
|
-
|
|
74
|
-
# Cache state
|
|
75
|
-
self.cache_entries: Dict[str, Dict[str, CacheEntry]] = {
|
|
76
|
-
"builtin": {},
|
|
77
|
-
"user": {}
|
|
78
|
-
}
|
|
79
|
-
|
|
80
|
-
# Statistics
|
|
81
|
-
self.stats = CacheStats()
|
|
82
|
-
|
|
83
|
-
# Thread safety
|
|
84
|
-
self._cache_lock = threading.RLock()
|
|
85
|
-
|
|
86
|
-
# Configuration
|
|
87
|
-
self.max_cache_age_days = 30
|
|
88
|
-
self.max_cache_size_mb = 500
|
|
89
|
-
self.auto_cleanup_enabled = True
|
|
90
|
-
|
|
91
|
-
# Initialize cache directories and load existing cache
|
|
92
|
-
self._initialize_cache()
|
|
93
|
-
|
|
94
|
-
log_info("[cache_manager] Enhanced Cache Manager initialized")
|
|
95
|
-
|
|
96
|
-
def _initialize_cache(self):
|
|
97
|
-
"""Initialize cache directories and load existing cache metadata"""
|
|
98
|
-
try:
|
|
99
|
-
# Ensure cache directories exist
|
|
100
|
-
for namespace, cache_path in self.cache_paths.items():
|
|
101
|
-
os.makedirs(cache_path, exist_ok=True)
|
|
102
|
-
log_info(f"[cache_manager] Ensured cache directory: {cache_path}")
|
|
103
|
-
|
|
104
|
-
# Load existing cache metadata
|
|
105
|
-
self._load_cache_metadata()
|
|
106
|
-
|
|
107
|
-
except Exception as e:
|
|
108
|
-
log_warning(f"[cache_manager] Failed to initialize cache: {e}")
|
|
109
|
-
|
|
110
|
-
def _load_cache_metadata(self):
|
|
111
|
-
"""Load cache metadata from all namespaces"""
|
|
112
|
-
with self._cache_lock:
|
|
113
|
-
for namespace in self.cache_paths:
|
|
114
|
-
try:
|
|
115
|
-
metadata_path = os.path.join(self.cache_paths[namespace], "metadata.json")
|
|
116
|
-
|
|
117
|
-
if os.path.exists(metadata_path):
|
|
118
|
-
with open(metadata_path, 'r', encoding='utf-8') as f:
|
|
119
|
-
metadata = json.load(f)
|
|
120
|
-
|
|
121
|
-
# Load cache entries
|
|
122
|
-
expressions = metadata.get("expressions", {})
|
|
123
|
-
for filename, entry_data in expressions.items():
|
|
124
|
-
cache_entry = self._parse_cache_entry(entry_data, namespace)
|
|
125
|
-
if cache_entry:
|
|
126
|
-
key = f"{cache_entry.expression_name}_{cache_entry.version}"
|
|
127
|
-
self.cache_entries[namespace][key] = cache_entry
|
|
128
|
-
|
|
129
|
-
log_info(f"[cache_manager] Loaded {len(expressions)} cache entries for {namespace}")
|
|
130
|
-
|
|
131
|
-
except Exception as e:
|
|
132
|
-
log_warning(f"[cache_manager] Failed to load cache metadata for {namespace}: {e}")
|
|
133
|
-
|
|
134
|
-
def _parse_cache_entry(self, entry_data: dict, namespace: str) -> Optional[CacheEntry]:
|
|
135
|
-
"""Parse cache entry from metadata"""
|
|
136
|
-
try:
|
|
137
|
-
return CacheEntry(
|
|
138
|
-
expression_name=entry_data.get("expression_name", ""),
|
|
139
|
-
version=entry_data.get("version", ""),
|
|
140
|
-
namespace=namespace,
|
|
141
|
-
source_path=entry_data.get("source_path", ""),
|
|
142
|
-
cached_path=entry_data.get("cached_path", ""),
|
|
143
|
-
content_hash=entry_data.get("content_hash", ""),
|
|
144
|
-
cached_at=datetime.fromisoformat(entry_data.get("cached_at", datetime.now().isoformat())),
|
|
145
|
-
last_accessed=datetime.fromisoformat(entry_data.get("last_accessed", datetime.now().isoformat())),
|
|
146
|
-
access_count=entry_data.get("access_count", 0),
|
|
147
|
-
file_size=entry_data.get("file_size", 0),
|
|
148
|
-
integrity_verified=entry_data.get("integrity_verified", False),
|
|
149
|
-
metadata=entry_data.get("metadata", {})
|
|
150
|
-
)
|
|
151
|
-
except Exception as e:
|
|
152
|
-
log_warning(f"[cache_manager] Failed to parse cache entry: {e}")
|
|
153
|
-
return None
|
|
154
|
-
|
|
155
|
-
def cache_expression(self, source_path: str, namespace: str,
|
|
156
|
-
expression_name: str, version: str) -> bool:
|
|
157
|
-
"""
|
|
158
|
-
Cache an expression file with content-hash validation
|
|
159
|
-
|
|
160
|
-
Args:
|
|
161
|
-
source_path: Path to source expression file
|
|
162
|
-
namespace: Namespace (builtin or user)
|
|
163
|
-
expression_name: Name of the expression
|
|
164
|
-
version: Version of the expression
|
|
165
|
-
|
|
166
|
-
Returns:
|
|
167
|
-
True if caching was successful
|
|
168
|
-
|
|
169
|
-
Raises:
|
|
170
|
-
CacheValidationError: If validation fails
|
|
171
|
-
"""
|
|
172
|
-
with self._cache_lock:
|
|
173
|
-
try:
|
|
174
|
-
# Validate inputs
|
|
175
|
-
if namespace not in self.cache_paths:
|
|
176
|
-
raise CacheValidationError(f"Invalid namespace: {namespace}")
|
|
177
|
-
|
|
178
|
-
if not os.path.exists(source_path):
|
|
179
|
-
raise CacheValidationError(f"Source file not found: {source_path}")
|
|
180
|
-
|
|
181
|
-
# Generate cache filename and path
|
|
182
|
-
cache_filename = f"{expression_name}_{version}.add"
|
|
183
|
-
cache_path = self.cache_paths[namespace]
|
|
184
|
-
cached_file_path = os.path.join(cache_path, cache_filename)
|
|
185
|
-
|
|
186
|
-
# Calculate content hash before copying
|
|
187
|
-
source_content_hash = self._calculate_content_hash(source_path)
|
|
188
|
-
|
|
189
|
-
# Copy file to cache
|
|
190
|
-
shutil.copy2(source_path, cached_file_path)
|
|
191
|
-
|
|
192
|
-
# Validate integrity of cached file with namespace policy
|
|
193
|
-
try:
|
|
194
|
-
self.integrity_manager.validate_integrity_with_policy(cached_file_path, namespace)
|
|
195
|
-
integrity_verified = True
|
|
196
|
-
except SecurityError as e:
|
|
197
|
-
# For built-in namespace, re-raise the error (strict policy)
|
|
198
|
-
if namespace == "builtin":
|
|
199
|
-
# Clean up the cached file
|
|
200
|
-
if os.path.exists(cached_file_path):
|
|
201
|
-
os.remove(cached_file_path)
|
|
202
|
-
raise CacheValidationError(f"Built-in expression integrity validation failed: {e}")
|
|
203
|
-
# For user namespace, log warning and continue (flexible policy)
|
|
204
|
-
log_warning(f"[cache_manager] Integrity validation failed for {expression_name}: {e}")
|
|
205
|
-
integrity_verified = False
|
|
206
|
-
|
|
207
|
-
# Create cache entry
|
|
208
|
-
cache_entry = CacheEntry(
|
|
209
|
-
expression_name=expression_name,
|
|
210
|
-
version=version,
|
|
211
|
-
namespace=namespace,
|
|
212
|
-
source_path=source_path,
|
|
213
|
-
cached_path=cached_file_path,
|
|
214
|
-
content_hash=source_content_hash,
|
|
215
|
-
cached_at=datetime.now(),
|
|
216
|
-
last_accessed=datetime.now(),
|
|
217
|
-
access_count=0,
|
|
218
|
-
file_size=os.path.getsize(cached_file_path),
|
|
219
|
-
integrity_verified=integrity_verified
|
|
220
|
-
)
|
|
221
|
-
|
|
222
|
-
# Store cache entry
|
|
223
|
-
key = f"{expression_name}_{version}"
|
|
224
|
-
self.cache_entries[namespace][key] = cache_entry
|
|
225
|
-
|
|
226
|
-
# Update metadata
|
|
227
|
-
self._update_cache_metadata(namespace)
|
|
228
|
-
|
|
229
|
-
# Update statistics
|
|
230
|
-
self.stats.total_entries += 1
|
|
231
|
-
self.stats.cache_size_bytes += cache_entry.file_size
|
|
232
|
-
|
|
233
|
-
log_info(f"[cache_manager] Cached expression {expression_name} v{version} in {namespace}")
|
|
234
|
-
return True
|
|
235
|
-
|
|
236
|
-
except Exception as e:
|
|
237
|
-
# Clean up on failure
|
|
238
|
-
if 'cached_file_path' in locals() and os.path.exists(cached_file_path):
|
|
239
|
-
try:
|
|
240
|
-
os.remove(cached_file_path)
|
|
241
|
-
except Exception:
|
|
242
|
-
pass
|
|
243
|
-
|
|
244
|
-
raise CacheValidationError(f"Failed to cache expression {expression_name}: {e}")
|
|
245
|
-
|
|
246
|
-
def get_cached_expression(self, expression_name: str, version: str,
|
|
247
|
-
namespace: str) -> Optional[str]:
|
|
248
|
-
"""
|
|
249
|
-
Get cached expression file path with validation
|
|
250
|
-
|
|
251
|
-
Args:
|
|
252
|
-
expression_name: Name of the expression
|
|
253
|
-
version: Version of the expression
|
|
254
|
-
namespace: Namespace to search in
|
|
255
|
-
|
|
256
|
-
Returns:
|
|
257
|
-
Path to cached file if valid, None otherwise
|
|
258
|
-
"""
|
|
259
|
-
with self._cache_lock:
|
|
260
|
-
try:
|
|
261
|
-
key = f"{expression_name}_{version}"
|
|
262
|
-
|
|
263
|
-
if namespace not in self.cache_entries:
|
|
264
|
-
self.stats.cache_misses += 1
|
|
265
|
-
return None
|
|
266
|
-
|
|
267
|
-
if key not in self.cache_entries[namespace]:
|
|
268
|
-
self.stats.cache_misses += 1
|
|
269
|
-
return None
|
|
270
|
-
|
|
271
|
-
cache_entry = self.cache_entries[namespace][key]
|
|
272
|
-
|
|
273
|
-
# Check if cached file exists
|
|
274
|
-
if not os.path.exists(cache_entry.cached_path):
|
|
275
|
-
log_warning(f"[cache_manager] Cached file missing: {cache_entry.cached_path}")
|
|
276
|
-
self._remove_cache_entry(namespace, key)
|
|
277
|
-
self.stats.cache_misses += 1
|
|
278
|
-
return None
|
|
279
|
-
|
|
280
|
-
# Validate content hash
|
|
281
|
-
if not self._validate_content_hash(cache_entry):
|
|
282
|
-
log_warning(f"[cache_manager] Content hash validation failed for {expression_name}")
|
|
283
|
-
self._remove_cache_entry(namespace, key)
|
|
284
|
-
self.stats.integrity_failures += 1
|
|
285
|
-
return None
|
|
286
|
-
|
|
287
|
-
# Validate integrity if required with namespace policy
|
|
288
|
-
if cache_entry.integrity_verified:
|
|
289
|
-
try:
|
|
290
|
-
self.integrity_manager.validate_integrity_with_policy(cache_entry.cached_path, namespace)
|
|
291
|
-
except SecurityError as e:
|
|
292
|
-
log_warning(f"[cache_manager] Integrity validation failed: {e}")
|
|
293
|
-
self._remove_cache_entry(namespace, key)
|
|
294
|
-
self.stats.integrity_failures += 1
|
|
295
|
-
return None
|
|
296
|
-
|
|
297
|
-
# Update access statistics
|
|
298
|
-
cache_entry.last_accessed = datetime.now()
|
|
299
|
-
cache_entry.access_count += 1
|
|
300
|
-
self.stats.cache_hits += 1
|
|
301
|
-
|
|
302
|
-
log_info(f"[cache_manager] Cache hit for {expression_name} v{version}")
|
|
303
|
-
return cache_entry.cached_path
|
|
304
|
-
|
|
305
|
-
except Exception as e:
|
|
306
|
-
log_warning(f"[cache_manager] Failed to get cached expression: {e}")
|
|
307
|
-
self.stats.cache_misses += 1
|
|
308
|
-
return None
|
|
309
|
-
|
|
310
|
-
def _calculate_content_hash(self, file_path: str) -> str:
|
|
311
|
-
"""Calculate SHA256 hash of file content"""
|
|
312
|
-
try:
|
|
313
|
-
with open(file_path, 'rb') as f:
|
|
314
|
-
content = f.read()
|
|
315
|
-
return f"sha256:{hashlib.sha256(content).hexdigest()}"
|
|
316
|
-
except Exception as e:
|
|
317
|
-
log_warning(f"[cache_manager] Failed to calculate content hash: {e}")
|
|
318
|
-
return ""
|
|
319
|
-
|
|
320
|
-
def _validate_content_hash(self, cache_entry: CacheEntry) -> bool:
|
|
321
|
-
"""Validate content hash of cached file"""
|
|
322
|
-
try:
|
|
323
|
-
current_hash = self._calculate_content_hash(cache_entry.cached_path)
|
|
324
|
-
return current_hash == cache_entry.content_hash
|
|
325
|
-
except Exception:
|
|
326
|
-
return False
|
|
327
|
-
|
|
328
|
-
def _remove_cache_entry(self, namespace: str, key: str):
|
|
329
|
-
"""Remove cache entry and associated file"""
|
|
330
|
-
try:
|
|
331
|
-
if key in self.cache_entries[namespace]:
|
|
332
|
-
cache_entry = self.cache_entries[namespace][key]
|
|
333
|
-
|
|
334
|
-
# Remove file
|
|
335
|
-
if os.path.exists(cache_entry.cached_path):
|
|
336
|
-
os.remove(cache_entry.cached_path)
|
|
337
|
-
|
|
338
|
-
# Update statistics
|
|
339
|
-
self.stats.cache_size_bytes -= cache_entry.file_size
|
|
340
|
-
self.stats.total_entries -= 1
|
|
341
|
-
|
|
342
|
-
# Remove from cache
|
|
343
|
-
del self.cache_entries[namespace][key]
|
|
344
|
-
|
|
345
|
-
log_info(f"[cache_manager] Removed cache entry: {key}")
|
|
346
|
-
except Exception as e:
|
|
347
|
-
log_warning(f"[cache_manager] Failed to remove cache entry {key}: {e}")
|
|
348
|
-
|
|
349
|
-
def _update_cache_metadata(self, namespace: str):
|
|
350
|
-
"""Update cache metadata file for namespace"""
|
|
351
|
-
try:
|
|
352
|
-
metadata_path = os.path.join(self.cache_paths[namespace], "metadata.json")
|
|
353
|
-
|
|
354
|
-
# Prepare metadata
|
|
355
|
-
metadata = {
|
|
356
|
-
"cache_version": "2.0",
|
|
357
|
-
"namespace": namespace,
|
|
358
|
-
"created_at": datetime.now().isoformat(),
|
|
359
|
-
"last_updated": datetime.now().isoformat(),
|
|
360
|
-
"expressions": {}
|
|
361
|
-
}
|
|
362
|
-
|
|
363
|
-
# Add cache entries
|
|
364
|
-
for key, cache_entry in self.cache_entries[namespace].items():
|
|
365
|
-
filename = os.path.basename(cache_entry.cached_path)
|
|
366
|
-
metadata["expressions"][filename] = {
|
|
367
|
-
"expression_name": cache_entry.expression_name,
|
|
368
|
-
"version": cache_entry.version,
|
|
369
|
-
"source_path": cache_entry.source_path,
|
|
370
|
-
"cached_path": cache_entry.cached_path,
|
|
371
|
-
"content_hash": cache_entry.content_hash,
|
|
372
|
-
"cached_at": cache_entry.cached_at.isoformat(),
|
|
373
|
-
"last_accessed": cache_entry.last_accessed.isoformat(),
|
|
374
|
-
"access_count": cache_entry.access_count,
|
|
375
|
-
"file_size": cache_entry.file_size,
|
|
376
|
-
"integrity_verified": cache_entry.integrity_verified,
|
|
377
|
-
"metadata": cache_entry.metadata
|
|
378
|
-
}
|
|
379
|
-
|
|
380
|
-
# Write metadata
|
|
381
|
-
with open(metadata_path, 'w', encoding='utf-8') as f:
|
|
382
|
-
json.dump(metadata, f, indent=2, ensure_ascii=False)
|
|
383
|
-
|
|
384
|
-
except Exception as e:
|
|
385
|
-
log_warning(f"[cache_manager] Failed to update cache metadata for {namespace}: {e}")
|
|
386
|
-
|
|
387
|
-
def validate_cache_integrity(self, namespace: str) -> bool:
|
|
388
|
-
"""
|
|
389
|
-
Validate integrity of all cached expressions in namespace
|
|
390
|
-
|
|
391
|
-
Args:
|
|
392
|
-
namespace: Namespace to validate
|
|
393
|
-
|
|
394
|
-
Returns:
|
|
395
|
-
True if all entries are valid
|
|
396
|
-
"""
|
|
397
|
-
with self._cache_lock:
|
|
398
|
-
try:
|
|
399
|
-
if namespace not in self.cache_entries:
|
|
400
|
-
return True
|
|
401
|
-
|
|
402
|
-
invalid_entries = []
|
|
403
|
-
|
|
404
|
-
for key, cache_entry in self.cache_entries[namespace].items():
|
|
405
|
-
# Check file exists
|
|
406
|
-
if not os.path.exists(cache_entry.cached_path):
|
|
407
|
-
invalid_entries.append(key)
|
|
408
|
-
continue
|
|
409
|
-
|
|
410
|
-
# Validate content hash
|
|
411
|
-
if not self._validate_content_hash(cache_entry):
|
|
412
|
-
invalid_entries.append(key)
|
|
413
|
-
continue
|
|
414
|
-
|
|
415
|
-
# Validate integrity if required with namespace policy
|
|
416
|
-
if cache_entry.integrity_verified:
|
|
417
|
-
try:
|
|
418
|
-
self.integrity_manager.validate_integrity_with_policy(cache_entry.cached_path, namespace)
|
|
419
|
-
except SecurityError:
|
|
420
|
-
invalid_entries.append(key)
|
|
421
|
-
continue
|
|
422
|
-
|
|
423
|
-
# Remove invalid entries
|
|
424
|
-
for key in invalid_entries:
|
|
425
|
-
self._remove_cache_entry(namespace, key)
|
|
426
|
-
|
|
427
|
-
if invalid_entries:
|
|
428
|
-
log_warning(f"[cache_manager] Removed {len(invalid_entries)} invalid cache entries from {namespace}")
|
|
429
|
-
self._update_cache_metadata(namespace)
|
|
430
|
-
|
|
431
|
-
return len(invalid_entries) == 0
|
|
432
|
-
|
|
433
|
-
except Exception as e:
|
|
434
|
-
log_warning(f"[cache_manager] Cache integrity validation failed for {namespace}: {e}")
|
|
435
|
-
return False
|
|
436
|
-
|
|
437
|
-
def refresh_cache(self, namespace: str = None) -> Dict[str, int]:
|
|
438
|
-
"""
|
|
439
|
-
Refresh cache by reloading from source files
|
|
440
|
-
|
|
441
|
-
Args:
|
|
442
|
-
namespace: Specific namespace to refresh, or None for all
|
|
443
|
-
|
|
444
|
-
Returns:
|
|
445
|
-
Dictionary with refresh statistics
|
|
446
|
-
"""
|
|
447
|
-
with self._cache_lock:
|
|
448
|
-
stats = {"refreshed": 0, "failed": 0, "removed": 0}
|
|
449
|
-
|
|
450
|
-
namespaces = [namespace] if namespace else list(self.cache_paths.keys())
|
|
451
|
-
|
|
452
|
-
for ns in namespaces:
|
|
453
|
-
try:
|
|
454
|
-
log_info(f"[cache_manager] Refreshing cache for namespace: {ns}")
|
|
455
|
-
|
|
456
|
-
# Get current cache entries
|
|
457
|
-
current_entries = list(self.cache_entries[ns].items())
|
|
458
|
-
|
|
459
|
-
for key, cache_entry in current_entries:
|
|
460
|
-
try:
|
|
461
|
-
# Check if source file still exists
|
|
462
|
-
if not os.path.exists(cache_entry.source_path):
|
|
463
|
-
self._remove_cache_entry(ns, key)
|
|
464
|
-
stats["removed"] += 1
|
|
465
|
-
continue
|
|
466
|
-
|
|
467
|
-
# Check if source has changed
|
|
468
|
-
source_hash = self._calculate_content_hash(cache_entry.source_path)
|
|
469
|
-
if source_hash != cache_entry.content_hash:
|
|
470
|
-
# Re-cache the expression
|
|
471
|
-
self._remove_cache_entry(ns, key)
|
|
472
|
-
|
|
473
|
-
if self.cache_expression(
|
|
474
|
-
cache_entry.source_path,
|
|
475
|
-
ns,
|
|
476
|
-
cache_entry.expression_name,
|
|
477
|
-
cache_entry.version
|
|
478
|
-
):
|
|
479
|
-
stats["refreshed"] += 1
|
|
480
|
-
else:
|
|
481
|
-
stats["failed"] += 1
|
|
482
|
-
|
|
483
|
-
except Exception as e:
|
|
484
|
-
log_warning(f"[cache_manager] Failed to refresh {key}: {e}")
|
|
485
|
-
stats["failed"] += 1
|
|
486
|
-
|
|
487
|
-
# Update metadata
|
|
488
|
-
self._update_cache_metadata(ns)
|
|
489
|
-
|
|
490
|
-
except Exception as e:
|
|
491
|
-
log_warning(f"[cache_manager] Failed to refresh namespace {ns}: {e}")
|
|
492
|
-
|
|
493
|
-
log_info(f"[cache_manager] Cache refresh completed: {stats}")
|
|
494
|
-
return stats
|
|
495
|
-
|
|
496
|
-
def cleanup_cache(self, max_age_days: int = None, max_size_mb: int = None) -> Dict[str, int]:
|
|
497
|
-
"""
|
|
498
|
-
Clean up old or excessive cache entries
|
|
499
|
-
|
|
500
|
-
Args:
|
|
501
|
-
max_age_days: Maximum age in days (uses default if None)
|
|
502
|
-
max_size_mb: Maximum cache size in MB (uses default if None)
|
|
503
|
-
|
|
504
|
-
Returns:
|
|
505
|
-
Cleanup statistics
|
|
506
|
-
"""
|
|
507
|
-
with self._cache_lock:
|
|
508
|
-
max_age = max_age_days or self.max_cache_age_days
|
|
509
|
-
max_size = max_size_mb or self.max_cache_size_mb
|
|
510
|
-
|
|
511
|
-
stats = {"removed_old": 0, "removed_excess": 0, "bytes_freed": 0}
|
|
512
|
-
cutoff_date = datetime.now() - timedelta(days=max_age)
|
|
513
|
-
|
|
514
|
-
for namespace in self.cache_paths:
|
|
515
|
-
# Remove old entries
|
|
516
|
-
old_entries = []
|
|
517
|
-
for key, cache_entry in self.cache_entries[namespace].items():
|
|
518
|
-
if cache_entry.last_accessed < cutoff_date:
|
|
519
|
-
old_entries.append(key)
|
|
520
|
-
|
|
521
|
-
for key in old_entries:
|
|
522
|
-
cache_entry = self.cache_entries[namespace][key]
|
|
523
|
-
stats["bytes_freed"] += cache_entry.file_size
|
|
524
|
-
self._remove_cache_entry(namespace, key)
|
|
525
|
-
stats["removed_old"] += 1
|
|
526
|
-
|
|
527
|
-
# Remove excess entries if cache is too large
|
|
528
|
-
if self.stats.cache_size_bytes > max_size * 1024 * 1024:
|
|
529
|
-
# Sort by last accessed (oldest first)
|
|
530
|
-
entries_by_access = sorted(
|
|
531
|
-
self.cache_entries[namespace].items(),
|
|
532
|
-
key=lambda x: x[1].last_accessed
|
|
533
|
-
)
|
|
534
|
-
|
|
535
|
-
while (self.stats.cache_size_bytes > max_size * 1024 * 1024 and
|
|
536
|
-
entries_by_access):
|
|
537
|
-
key, cache_entry = entries_by_access.pop(0)
|
|
538
|
-
stats["bytes_freed"] += cache_entry.file_size
|
|
539
|
-
self._remove_cache_entry(namespace, key)
|
|
540
|
-
stats["removed_excess"] += 1
|
|
541
|
-
|
|
542
|
-
# Update metadata
|
|
543
|
-
self._update_cache_metadata(namespace)
|
|
544
|
-
|
|
545
|
-
self.stats.last_cleanup = datetime.now()
|
|
546
|
-
log_info(f"[cache_manager] Cache cleanup completed: {stats}")
|
|
547
|
-
return stats
|
|
548
|
-
|
|
549
|
-
def get_cache_stats(self) -> Dict[str, Any]:
|
|
550
|
-
"""Get comprehensive cache statistics"""
|
|
551
|
-
with self._cache_lock:
|
|
552
|
-
namespace_stats = {}
|
|
553
|
-
|
|
554
|
-
for namespace in self.cache_paths:
|
|
555
|
-
entries = self.cache_entries[namespace]
|
|
556
|
-
total_size = sum(entry.file_size for entry in entries.values())
|
|
557
|
-
|
|
558
|
-
namespace_stats[namespace] = {
|
|
559
|
-
"entry_count": len(entries),
|
|
560
|
-
"total_size_bytes": total_size,
|
|
561
|
-
"total_size_mb": total_size / (1024 * 1024),
|
|
562
|
-
"cache_path": self.cache_paths[namespace],
|
|
563
|
-
"most_accessed": self._get_most_accessed_entry(namespace),
|
|
564
|
-
"oldest_entry": self._get_oldest_entry(namespace),
|
|
565
|
-
"newest_entry": self._get_newest_entry(namespace)
|
|
566
|
-
}
|
|
567
|
-
|
|
568
|
-
return {
|
|
569
|
-
"global_stats": {
|
|
570
|
-
"total_entries": self.stats.total_entries,
|
|
571
|
-
"cache_hits": self.stats.cache_hits,
|
|
572
|
-
"cache_misses": self.stats.cache_misses,
|
|
573
|
-
"hit_rate": self._calculate_hit_rate(),
|
|
574
|
-
"integrity_failures": self.stats.integrity_failures,
|
|
575
|
-
"cache_size_bytes": self.stats.cache_size_bytes,
|
|
576
|
-
"cache_size_mb": self.stats.cache_size_bytes / (1024 * 1024),
|
|
577
|
-
"last_cleanup": self.stats.last_cleanup.isoformat() if self.stats.last_cleanup else None,
|
|
578
|
-
"corruption_recoveries": self.stats.corruption_recoveries
|
|
579
|
-
},
|
|
580
|
-
"namespace_stats": namespace_stats,
|
|
581
|
-
"configuration": {
|
|
582
|
-
"max_cache_age_days": self.max_cache_age_days,
|
|
583
|
-
"max_cache_size_mb": self.max_cache_size_mb,
|
|
584
|
-
"auto_cleanup_enabled": self.auto_cleanup_enabled
|
|
585
|
-
}
|
|
586
|
-
}
|
|
587
|
-
|
|
588
|
-
def _calculate_hit_rate(self) -> float:
|
|
589
|
-
"""Calculate cache hit rate"""
|
|
590
|
-
total_requests = self.stats.cache_hits + self.stats.cache_misses
|
|
591
|
-
if total_requests == 0:
|
|
592
|
-
return 0.0
|
|
593
|
-
return (self.stats.cache_hits / total_requests) * 100.0
|
|
594
|
-
|
|
595
|
-
def _get_most_accessed_entry(self, namespace: str) -> Optional[Dict[str, Any]]:
|
|
596
|
-
"""Get most accessed cache entry in namespace"""
|
|
597
|
-
if not self.cache_entries[namespace]:
|
|
598
|
-
return None
|
|
599
|
-
|
|
600
|
-
most_accessed = max(
|
|
601
|
-
self.cache_entries[namespace].values(),
|
|
602
|
-
key=lambda x: x.access_count
|
|
603
|
-
)
|
|
604
|
-
|
|
605
|
-
return {
|
|
606
|
-
"expression_name": most_accessed.expression_name,
|
|
607
|
-
"version": most_accessed.version,
|
|
608
|
-
"access_count": most_accessed.access_count
|
|
609
|
-
}
|
|
610
|
-
|
|
611
|
-
def _get_oldest_entry(self, namespace: str) -> Optional[Dict[str, Any]]:
|
|
612
|
-
"""Get oldest cache entry in namespace"""
|
|
613
|
-
if not self.cache_entries[namespace]:
|
|
614
|
-
return None
|
|
615
|
-
|
|
616
|
-
oldest = min(
|
|
617
|
-
self.cache_entries[namespace].values(),
|
|
618
|
-
key=lambda x: x.cached_at
|
|
619
|
-
)
|
|
620
|
-
|
|
621
|
-
return {
|
|
622
|
-
"expression_name": oldest.expression_name,
|
|
623
|
-
"version": oldest.version,
|
|
624
|
-
"cached_at": oldest.cached_at.isoformat()
|
|
625
|
-
}
|
|
626
|
-
|
|
627
|
-
def _get_newest_entry(self, namespace: str) -> Optional[Dict[str, Any]]:
|
|
628
|
-
"""Get newest cache entry in namespace"""
|
|
629
|
-
if not self.cache_entries[namespace]:
|
|
630
|
-
return None
|
|
631
|
-
|
|
632
|
-
newest = max(
|
|
633
|
-
self.cache_entries[namespace].values(),
|
|
634
|
-
key=lambda x: x.cached_at
|
|
635
|
-
)
|
|
636
|
-
|
|
637
|
-
return {
|
|
638
|
-
"expression_name": newest.expression_name,
|
|
639
|
-
"version": newest.version,
|
|
640
|
-
"cached_at": newest.cached_at.isoformat()
|
|
641
|
-
}
|
|
642
|
-
|
|
643
|
-
def clear_cache(self, namespace: str = None) -> int:
|
|
644
|
-
"""
|
|
645
|
-
Clear cache entries
|
|
646
|
-
|
|
647
|
-
Args:
|
|
648
|
-
namespace: Specific namespace to clear, or None for all
|
|
649
|
-
|
|
650
|
-
Returns:
|
|
651
|
-
Number of entries removed
|
|
652
|
-
"""
|
|
653
|
-
with self._cache_lock:
|
|
654
|
-
removed_count = 0
|
|
655
|
-
|
|
656
|
-
namespaces = [namespace] if namespace else list(self.cache_paths.keys())
|
|
657
|
-
|
|
658
|
-
for ns in namespaces:
|
|
659
|
-
# Remove all entries
|
|
660
|
-
entries_to_remove = list(self.cache_entries[ns].keys())
|
|
661
|
-
for key in entries_to_remove:
|
|
662
|
-
self._remove_cache_entry(ns, key)
|
|
663
|
-
removed_count += 1
|
|
664
|
-
|
|
665
|
-
# Update metadata
|
|
666
|
-
self._update_cache_metadata(ns)
|
|
667
|
-
|
|
668
|
-
log_info(f"[cache_manager] Cleared {removed_count} cache entries")
|
|
669
|
-
return removed_count
|
|
670
|
-
|
|
671
|
-
def handle_cache_corruption(self, namespace: str, expression_name: str,
|
|
672
|
-
version: str) -> bool:
|
|
673
|
-
"""
|
|
674
|
-
Handle cache corruption by attempting recovery
|
|
675
|
-
|
|
676
|
-
Args:
|
|
677
|
-
namespace: Affected namespace
|
|
678
|
-
expression_name: Expression name
|
|
679
|
-
version: Expression version
|
|
680
|
-
|
|
681
|
-
Returns:
|
|
682
|
-
True if recovery was successful
|
|
683
|
-
"""
|
|
684
|
-
with self._cache_lock:
|
|
685
|
-
try:
|
|
686
|
-
key = f"{expression_name}_{version}"
|
|
687
|
-
|
|
688
|
-
if key not in self.cache_entries[namespace]:
|
|
689
|
-
return False
|
|
690
|
-
|
|
691
|
-
cache_entry = self.cache_entries[namespace][key]
|
|
692
|
-
|
|
693
|
-
log_warning(f"[cache_manager] Handling cache corruption for {expression_name} v{version}")
|
|
694
|
-
|
|
695
|
-
# Remove corrupted entry
|
|
696
|
-
self._remove_cache_entry(namespace, key)
|
|
697
|
-
|
|
698
|
-
# Attempt to re-cache from source if available
|
|
699
|
-
if os.path.exists(cache_entry.source_path):
|
|
700
|
-
success = self.cache_expression(
|
|
701
|
-
cache_entry.source_path,
|
|
702
|
-
namespace,
|
|
703
|
-
expression_name,
|
|
704
|
-
version
|
|
705
|
-
)
|
|
706
|
-
|
|
707
|
-
if success:
|
|
708
|
-
self.stats.corruption_recoveries += 1
|
|
709
|
-
log_info(f"[cache_manager] Successfully recovered corrupted cache entry")
|
|
710
|
-
return True
|
|
711
|
-
|
|
712
|
-
log_warning(f"[cache_manager] Failed to recover corrupted cache entry")
|
|
713
|
-
return False
|
|
714
|
-
|
|
715
|
-
except Exception as e:
|
|
716
|
-
log_warning(f"[cache_manager] Cache corruption recovery failed: {e}")
|
|
717
|
-
return False
|
|
718
|
-
|
|
719
|
-
def set_configuration(self, **kwargs):
|
|
720
|
-
"""Update cache configuration"""
|
|
721
|
-
for key, value in kwargs.items():
|
|
722
|
-
if hasattr(self, key):
|
|
723
|
-
setattr(self, key, value)
|
|
724
|
-
log_info(f"[cache_manager] Updated configuration {key} = {value}")
|
|
725
|
-
else:
|
|
726
|
-
log_warning(f"[cache_manager] Unknown configuration key: {key}")
|
|
727
|
-
|
|
728
|
-
def reset_stats(self):
|
|
729
|
-
"""Reset cache statistics"""
|
|
730
|
-
self.stats = CacheStats()
|
|
731
|
-
log_info("[cache_manager] Cache statistics reset")
|
|
732
|
-
|
|
733
|
-
# ===== TASK 3.2: Cache Invalidation and Refresh Methods =====
|
|
734
|
-
|
|
735
|
-
def invalidate_expression(self, namespace: str, expression_name: str, version: str) -> bool:
|
|
736
|
-
"""
|
|
737
|
-
Invalidate a specific expression from cache
|
|
738
|
-
|
|
739
|
-
Args:
|
|
740
|
-
namespace: Namespace (builtin or user)
|
|
741
|
-
expression_name: Name of the expression
|
|
742
|
-
version: Version of the expression
|
|
743
|
-
|
|
744
|
-
Returns:
|
|
745
|
-
True if expression was invalidated
|
|
746
|
-
"""
|
|
747
|
-
with self._cache_lock:
|
|
748
|
-
try:
|
|
749
|
-
key = f"{expression_name}_{version}"
|
|
750
|
-
|
|
751
|
-
if namespace not in self.cache_entries:
|
|
752
|
-
log_warning(f"[cache_manager] Invalid namespace for invalidation: {namespace}")
|
|
753
|
-
return False
|
|
754
|
-
|
|
755
|
-
if key not in self.cache_entries[namespace]:
|
|
756
|
-
log_info(f"[cache_manager] Expression {expression_name} v{version} not in cache")
|
|
757
|
-
return False
|
|
758
|
-
|
|
759
|
-
# Remove the cache entry
|
|
760
|
-
self._remove_cache_entry(namespace, key)
|
|
761
|
-
|
|
762
|
-
# Update metadata
|
|
763
|
-
self._update_cache_metadata(namespace)
|
|
764
|
-
|
|
765
|
-
log_info(f"[cache_manager] Invalidated {expression_name} v{version} from {namespace}")
|
|
766
|
-
return True
|
|
767
|
-
|
|
768
|
-
except Exception as e:
|
|
769
|
-
log_warning(f"[cache_manager] Failed to invalidate expression {expression_name}: {e}")
|
|
770
|
-
return False
|
|
771
|
-
|
|
772
|
-
def invalidate_version(self, namespace: str, version: str) -> int:
|
|
773
|
-
"""
|
|
774
|
-
Invalidate all expressions of a specific version from cache
|
|
775
|
-
|
|
776
|
-
Args:
|
|
777
|
-
namespace: Namespace (builtin or user)
|
|
778
|
-
version: Version to invalidate
|
|
779
|
-
|
|
780
|
-
Returns:
|
|
781
|
-
Number of expressions invalidated
|
|
782
|
-
"""
|
|
783
|
-
with self._cache_lock:
|
|
784
|
-
try:
|
|
785
|
-
if namespace not in self.cache_entries:
|
|
786
|
-
log_warning(f"[cache_manager] Invalid namespace for version invalidation: {namespace}")
|
|
787
|
-
return 0
|
|
788
|
-
|
|
789
|
-
# Find all entries with the specified version
|
|
790
|
-
entries_to_invalidate = []
|
|
791
|
-
for key, cache_entry in self.cache_entries[namespace].items():
|
|
792
|
-
if cache_entry.version == version:
|
|
793
|
-
entries_to_invalidate.append(key)
|
|
794
|
-
|
|
795
|
-
# Remove all matching entries
|
|
796
|
-
invalidated_count = 0
|
|
797
|
-
for key in entries_to_invalidate:
|
|
798
|
-
self._remove_cache_entry(namespace, key)
|
|
799
|
-
invalidated_count += 1
|
|
800
|
-
|
|
801
|
-
# Update metadata if any entries were removed
|
|
802
|
-
if invalidated_count > 0:
|
|
803
|
-
self._update_cache_metadata(namespace)
|
|
804
|
-
|
|
805
|
-
log_info(f"[cache_manager] Invalidated {invalidated_count} expressions of version {version} from {namespace}")
|
|
806
|
-
return invalidated_count
|
|
807
|
-
|
|
808
|
-
except Exception as e:
|
|
809
|
-
log_warning(f"[cache_manager] Failed to invalidate version {version}: {e}")
|
|
810
|
-
return 0
|
|
811
|
-
|
|
812
|
-
def invalidate_namespace(self, namespace: str) -> int:
|
|
813
|
-
"""
|
|
814
|
-
Invalidate all expressions in a namespace
|
|
815
|
-
|
|
816
|
-
Args:
|
|
817
|
-
namespace: Namespace to invalidate
|
|
818
|
-
|
|
819
|
-
Returns:
|
|
820
|
-
Number of expressions invalidated
|
|
821
|
-
"""
|
|
822
|
-
with self._cache_lock:
|
|
823
|
-
try:
|
|
824
|
-
if namespace not in self.cache_entries:
|
|
825
|
-
log_warning(f"[cache_manager] Invalid namespace for invalidation: {namespace}")
|
|
826
|
-
return 0
|
|
827
|
-
|
|
828
|
-
# Get count before clearing
|
|
829
|
-
invalidated_count = len(self.cache_entries[namespace])
|
|
830
|
-
|
|
831
|
-
# Remove all entries
|
|
832
|
-
entries_to_remove = list(self.cache_entries[namespace].keys())
|
|
833
|
-
for key in entries_to_remove:
|
|
834
|
-
self._remove_cache_entry(namespace, key)
|
|
835
|
-
|
|
836
|
-
# Update metadata
|
|
837
|
-
self._update_cache_metadata(namespace)
|
|
838
|
-
|
|
839
|
-
log_info(f"[cache_manager] Invalidated {invalidated_count} expressions from {namespace}")
|
|
840
|
-
return invalidated_count
|
|
841
|
-
|
|
842
|
-
except Exception as e:
|
|
843
|
-
log_warning(f"[cache_manager] Failed to invalidate namespace {namespace}: {e}")
|
|
844
|
-
return 0
|
|
845
|
-
|
|
846
|
-
def cleanup_expired_cache(self, max_age_days: int = None) -> Dict[str, int]:
|
|
847
|
-
"""
|
|
848
|
-
Clean up expired cache entries based on age
|
|
849
|
-
|
|
850
|
-
Args:
|
|
851
|
-
max_age_days: Maximum age in days (uses default if None)
|
|
852
|
-
|
|
853
|
-
Returns:
|
|
854
|
-
Cleanup statistics
|
|
855
|
-
"""
|
|
856
|
-
max_age = max_age_days or self.max_cache_age_days
|
|
857
|
-
return self.cleanup_cache(max_age_days=max_age, max_size_mb=None)
|
|
858
|
-
|
|
859
|
-
def cleanup_orphaned_cache(self) -> Dict[str, int]:
|
|
860
|
-
"""
|
|
861
|
-
Clean up orphaned cache files (cached files without source files)
|
|
862
|
-
|
|
863
|
-
Returns:
|
|
864
|
-
Cleanup statistics
|
|
865
|
-
"""
|
|
866
|
-
with self._cache_lock:
|
|
867
|
-
stats = {"removed_orphaned": 0, "bytes_freed": 0}
|
|
868
|
-
|
|
869
|
-
for namespace in self.cache_paths:
|
|
870
|
-
orphaned_entries = []
|
|
871
|
-
|
|
872
|
-
# Find entries where source file no longer exists
|
|
873
|
-
for key, cache_entry in self.cache_entries[namespace].items():
|
|
874
|
-
if not os.path.exists(cache_entry.source_path):
|
|
875
|
-
orphaned_entries.append(key)
|
|
876
|
-
|
|
877
|
-
# Remove orphaned entries
|
|
878
|
-
for key in orphaned_entries:
|
|
879
|
-
cache_entry = self.cache_entries[namespace][key]
|
|
880
|
-
stats["bytes_freed"] += cache_entry.file_size
|
|
881
|
-
self._remove_cache_entry(namespace, key)
|
|
882
|
-
stats["removed_orphaned"] += 1
|
|
883
|
-
|
|
884
|
-
# Update metadata if any entries were removed
|
|
885
|
-
if orphaned_entries:
|
|
886
|
-
self._update_cache_metadata(namespace)
|
|
887
|
-
|
|
888
|
-
log_info(f"[cache_manager] Cleaned up {stats['removed_orphaned']} orphaned cache entries")
|
|
889
|
-
return stats
|
|
890
|
-
|
|
891
|
-
def get_cache_status(self) -> Dict[str, Any]:
|
|
892
|
-
"""
|
|
893
|
-
Get comprehensive cache status and health information
|
|
894
|
-
|
|
895
|
-
Returns:
|
|
896
|
-
Detailed cache status information
|
|
897
|
-
"""
|
|
898
|
-
with self._cache_lock:
|
|
899
|
-
status = {
|
|
900
|
-
"health": "healthy",
|
|
901
|
-
"issues": [],
|
|
902
|
-
"recommendations": [],
|
|
903
|
-
"statistics": self.get_cache_stats(),
|
|
904
|
-
"namespace_health": {},
|
|
905
|
-
"disk_usage": {},
|
|
906
|
-
"integrity_status": {}
|
|
907
|
-
}
|
|
908
|
-
|
|
909
|
-
# Check each namespace
|
|
910
|
-
for namespace in self.cache_paths:
|
|
911
|
-
namespace_status = {
|
|
912
|
-
"entry_count": len(self.cache_entries[namespace]),
|
|
913
|
-
"integrity_valid": True,
|
|
914
|
-
"orphaned_entries": 0,
|
|
915
|
-
"expired_entries": 0,
|
|
916
|
-
"corrupted_entries": 0
|
|
917
|
-
}
|
|
918
|
-
|
|
919
|
-
# Check for orphaned entries
|
|
920
|
-
orphaned_count = 0
|
|
921
|
-
expired_count = 0
|
|
922
|
-
corrupted_count = 0
|
|
923
|
-
cutoff_date = datetime.now() - timedelta(days=self.max_cache_age_days)
|
|
924
|
-
|
|
925
|
-
for key, cache_entry in self.cache_entries[namespace].items():
|
|
926
|
-
# Check if source exists (orphaned)
|
|
927
|
-
if not os.path.exists(cache_entry.source_path):
|
|
928
|
-
orphaned_count += 1
|
|
929
|
-
|
|
930
|
-
# Check if expired
|
|
931
|
-
if cache_entry.last_accessed < cutoff_date:
|
|
932
|
-
expired_count += 1
|
|
933
|
-
|
|
934
|
-
# Check if cached file exists and is valid
|
|
935
|
-
if not os.path.exists(cache_entry.cached_path):
|
|
936
|
-
corrupted_count += 1
|
|
937
|
-
elif not self._validate_content_hash(cache_entry):
|
|
938
|
-
corrupted_count += 1
|
|
939
|
-
|
|
940
|
-
namespace_status["orphaned_entries"] = orphaned_count
|
|
941
|
-
namespace_status["expired_entries"] = expired_count
|
|
942
|
-
namespace_status["corrupted_entries"] = corrupted_count
|
|
943
|
-
|
|
944
|
-
# Determine namespace health
|
|
945
|
-
if corrupted_count > 0:
|
|
946
|
-
namespace_status["integrity_valid"] = False
|
|
947
|
-
status["issues"].append(f"{namespace}: {corrupted_count} corrupted entries")
|
|
948
|
-
|
|
949
|
-
if orphaned_count > 0:
|
|
950
|
-
status["issues"].append(f"{namespace}: {orphaned_count} orphaned entries")
|
|
951
|
-
status["recommendations"].append(f"Run cleanup_orphaned_cache() to remove orphaned entries")
|
|
952
|
-
|
|
953
|
-
if expired_count > 0:
|
|
954
|
-
status["recommendations"].append(f"Run cleanup_expired_cache() to remove {expired_count} expired entries")
|
|
955
|
-
|
|
956
|
-
status["namespace_health"][namespace] = namespace_status
|
|
957
|
-
|
|
958
|
-
# Get disk usage for namespace
|
|
959
|
-
try:
|
|
960
|
-
cache_path = self.cache_paths[namespace]
|
|
961
|
-
if os.path.exists(cache_path):
|
|
962
|
-
total_size = 0
|
|
963
|
-
file_count = 0
|
|
964
|
-
for root, dirs, files in os.walk(cache_path):
|
|
965
|
-
for file in files:
|
|
966
|
-
file_path = os.path.join(root, file)
|
|
967
|
-
if os.path.exists(file_path):
|
|
968
|
-
total_size += os.path.getsize(file_path)
|
|
969
|
-
file_count += 1
|
|
970
|
-
|
|
971
|
-
status["disk_usage"][namespace] = {
|
|
972
|
-
"total_size_bytes": total_size,
|
|
973
|
-
"total_size_mb": total_size / (1024 * 1024),
|
|
974
|
-
"file_count": file_count
|
|
975
|
-
}
|
|
976
|
-
except Exception as e:
|
|
977
|
-
log_warning(f"[cache_manager] Failed to get disk usage for {namespace}: {e}")
|
|
978
|
-
|
|
979
|
-
# Overall health assessment
|
|
980
|
-
if status["issues"]:
|
|
981
|
-
status["health"] = "degraded" if len(status["issues"]) < 5 else "unhealthy"
|
|
982
|
-
|
|
983
|
-
# Add integrity status
|
|
984
|
-
for namespace in self.cache_paths:
|
|
985
|
-
try:
|
|
986
|
-
integrity_valid = self.validate_cache_integrity(namespace)
|
|
987
|
-
status["integrity_status"][namespace] = {
|
|
988
|
-
"valid": integrity_valid,
|
|
989
|
-
"last_checked": datetime.now().isoformat()
|
|
990
|
-
}
|
|
991
|
-
except Exception as e:
|
|
992
|
-
status["integrity_status"][namespace] = {
|
|
993
|
-
"valid": False,
|
|
994
|
-
"error": str(e),
|
|
995
|
-
"last_checked": datetime.now().isoformat()
|
|
996
|
-
}
|
|
997
|
-
|
|
998
|
-
return status
|
|
999
|
-
|
|
1000
|
-
def get_expression_cache_info(self, expression_name: str, version: str, namespace: str) -> Optional[Dict[str, Any]]:
|
|
1001
|
-
"""
|
|
1002
|
-
Get detailed cache information for a specific expression
|
|
1003
|
-
|
|
1004
|
-
Args:
|
|
1005
|
-
expression_name: Name of the expression
|
|
1006
|
-
version: Version of the expression
|
|
1007
|
-
namespace: Namespace to search in
|
|
1008
|
-
|
|
1009
|
-
Returns:
|
|
1010
|
-
Detailed cache information or None if not cached
|
|
1011
|
-
"""
|
|
1012
|
-
with self._cache_lock:
|
|
1013
|
-
try:
|
|
1014
|
-
key = f"{expression_name}_{version}"
|
|
1015
|
-
|
|
1016
|
-
if namespace not in self.cache_entries or key not in self.cache_entries[namespace]:
|
|
1017
|
-
return None
|
|
1018
|
-
|
|
1019
|
-
cache_entry = self.cache_entries[namespace][key]
|
|
1020
|
-
|
|
1021
|
-
# Check file status
|
|
1022
|
-
cached_file_exists = os.path.exists(cache_entry.cached_path)
|
|
1023
|
-
source_file_exists = os.path.exists(cache_entry.source_path)
|
|
1024
|
-
content_hash_valid = self._validate_content_hash(cache_entry) if cached_file_exists else False
|
|
1025
|
-
|
|
1026
|
-
# Calculate age
|
|
1027
|
-
age_days = (datetime.now() - cache_entry.cached_at).days
|
|
1028
|
-
last_access_days = (datetime.now() - cache_entry.last_accessed).days
|
|
1029
|
-
|
|
1030
|
-
return {
|
|
1031
|
-
"expression_name": cache_entry.expression_name,
|
|
1032
|
-
"version": cache_entry.version,
|
|
1033
|
-
"namespace": cache_entry.namespace,
|
|
1034
|
-
"source_path": cache_entry.source_path,
|
|
1035
|
-
"cached_path": cache_entry.cached_path,
|
|
1036
|
-
"content_hash": cache_entry.content_hash,
|
|
1037
|
-
"cached_at": cache_entry.cached_at.isoformat(),
|
|
1038
|
-
"last_accessed": cache_entry.last_accessed.isoformat(),
|
|
1039
|
-
"access_count": cache_entry.access_count,
|
|
1040
|
-
"file_size": cache_entry.file_size,
|
|
1041
|
-
"file_size_mb": cache_entry.file_size / (1024 * 1024),
|
|
1042
|
-
"integrity_verified": cache_entry.integrity_verified,
|
|
1043
|
-
"age_days": age_days,
|
|
1044
|
-
"last_access_days": last_access_days,
|
|
1045
|
-
"status": {
|
|
1046
|
-
"cached_file_exists": cached_file_exists,
|
|
1047
|
-
"source_file_exists": source_file_exists,
|
|
1048
|
-
"content_hash_valid": content_hash_valid,
|
|
1049
|
-
"is_orphaned": not source_file_exists,
|
|
1050
|
-
"is_expired": age_days > self.max_cache_age_days,
|
|
1051
|
-
"is_corrupted": not (cached_file_exists and content_hash_valid)
|
|
1052
|
-
},
|
|
1053
|
-
"metadata": cache_entry.metadata
|
|
1054
|
-
}
|
|
1055
|
-
|
|
1056
|
-
except Exception as e:
|
|
1057
|
-
log_warning(f"[cache_manager] Failed to get cache info for {expression_name}: {e}")
|
|
1058
|
-
return None
|
|
1059
|
-
|
|
1060
|
-
def force_refresh_expression(self, expression_name: str, version: str, namespace: str) -> bool:
|
|
1061
|
-
"""
|
|
1062
|
-
Force refresh a specific expression from its source file
|
|
1063
|
-
|
|
1064
|
-
Args:
|
|
1065
|
-
expression_name: Name of the expression
|
|
1066
|
-
version: Version of the expression
|
|
1067
|
-
namespace: Namespace of the expression
|
|
1068
|
-
|
|
1069
|
-
Returns:
|
|
1070
|
-
True if refresh was successful
|
|
1071
|
-
"""
|
|
1072
|
-
with self._cache_lock:
|
|
1073
|
-
try:
|
|
1074
|
-
key = f"{expression_name}_{version}"
|
|
1075
|
-
|
|
1076
|
-
if namespace not in self.cache_entries or key not in self.cache_entries[namespace]:
|
|
1077
|
-
log_warning(f"[cache_manager] Expression {expression_name} v{version} not in cache")
|
|
1078
|
-
return False
|
|
1079
|
-
|
|
1080
|
-
cache_entry = self.cache_entries[namespace][key]
|
|
1081
|
-
|
|
1082
|
-
# Check if source file exists
|
|
1083
|
-
if not os.path.exists(cache_entry.source_path):
|
|
1084
|
-
log_warning(f"[cache_manager] Source file not found for refresh: {cache_entry.source_path}")
|
|
1085
|
-
return False
|
|
1086
|
-
|
|
1087
|
-
# Remove current cache entry
|
|
1088
|
-
self._remove_cache_entry(namespace, key)
|
|
1089
|
-
|
|
1090
|
-
# Re-cache from source
|
|
1091
|
-
success = self.cache_expression(
|
|
1092
|
-
cache_entry.source_path,
|
|
1093
|
-
namespace,
|
|
1094
|
-
expression_name,
|
|
1095
|
-
version
|
|
1096
|
-
)
|
|
1097
|
-
|
|
1098
|
-
if success:
|
|
1099
|
-
log_info(f"[cache_manager] Force refreshed {expression_name} v{version}")
|
|
1100
|
-
else:
|
|
1101
|
-
log_warning(f"[cache_manager] Failed to force refresh {expression_name} v{version}")
|
|
1102
|
-
|
|
1103
|
-
return success
|
|
1104
|
-
|
|
1105
|
-
except Exception as e:
|
|
1106
|
-
log_warning(f"[cache_manager] Force refresh failed for {expression_name}: {e}")
|
|
1107
|
-
return False
|
|
1108
|
-
|
|
1109
|
-
|
|
1110
|
-
# Global cache manager instance
|
|
1111
|
-
_global_cache_manager = None
|
|
1112
|
-
|
|
1113
|
-
|
|
1114
|
-
def get_cache_manager() -> EnhancedCacheManager:
|
|
1115
|
-
"""Get the global cache manager instance"""
|
|
1116
|
-
global _global_cache_manager
|
|
1117
|
-
if _global_cache_manager is None:
|
|
1118
|
-
_global_cache_manager = EnhancedCacheManager()
|
|
1119
|
-
return _global_cache_manager
|