abstractcore 2.5.0__py3-none-any.whl → 2.5.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (60) hide show
  1. abstractcore/__init__.py +12 -0
  2. abstractcore/apps/__main__.py +8 -1
  3. abstractcore/apps/deepsearch.py +644 -0
  4. abstractcore/apps/intent.py +614 -0
  5. abstractcore/architectures/detection.py +250 -4
  6. abstractcore/assets/architecture_formats.json +14 -1
  7. abstractcore/assets/model_capabilities.json +583 -44
  8. abstractcore/compression/__init__.py +29 -0
  9. abstractcore/compression/analytics.py +420 -0
  10. abstractcore/compression/cache.py +250 -0
  11. abstractcore/compression/config.py +279 -0
  12. abstractcore/compression/exceptions.py +30 -0
  13. abstractcore/compression/glyph_processor.py +381 -0
  14. abstractcore/compression/optimizer.py +388 -0
  15. abstractcore/compression/orchestrator.py +380 -0
  16. abstractcore/compression/pil_text_renderer.py +818 -0
  17. abstractcore/compression/quality.py +226 -0
  18. abstractcore/compression/text_formatter.py +666 -0
  19. abstractcore/compression/vision_compressor.py +371 -0
  20. abstractcore/config/main.py +66 -1
  21. abstractcore/config/manager.py +111 -5
  22. abstractcore/core/session.py +105 -5
  23. abstractcore/events/__init__.py +1 -1
  24. abstractcore/media/auto_handler.py +312 -18
  25. abstractcore/media/handlers/local_handler.py +14 -2
  26. abstractcore/media/handlers/openai_handler.py +62 -3
  27. abstractcore/media/processors/__init__.py +11 -1
  28. abstractcore/media/processors/direct_pdf_processor.py +210 -0
  29. abstractcore/media/processors/glyph_pdf_processor.py +227 -0
  30. abstractcore/media/processors/image_processor.py +7 -1
  31. abstractcore/media/processors/text_processor.py +18 -3
  32. abstractcore/media/types.py +164 -7
  33. abstractcore/processing/__init__.py +5 -1
  34. abstractcore/processing/basic_deepsearch.py +2173 -0
  35. abstractcore/processing/basic_intent.py +690 -0
  36. abstractcore/providers/__init__.py +18 -0
  37. abstractcore/providers/anthropic_provider.py +29 -2
  38. abstractcore/providers/base.py +279 -6
  39. abstractcore/providers/huggingface_provider.py +658 -27
  40. abstractcore/providers/lmstudio_provider.py +52 -2
  41. abstractcore/providers/mlx_provider.py +103 -4
  42. abstractcore/providers/model_capabilities.py +352 -0
  43. abstractcore/providers/ollama_provider.py +44 -6
  44. abstractcore/providers/openai_provider.py +29 -2
  45. abstractcore/providers/registry.py +91 -19
  46. abstractcore/server/app.py +91 -81
  47. abstractcore/structured/handler.py +161 -1
  48. abstractcore/tools/common_tools.py +98 -3
  49. abstractcore/utils/__init__.py +4 -1
  50. abstractcore/utils/cli.py +114 -1
  51. abstractcore/utils/trace_export.py +287 -0
  52. abstractcore/utils/version.py +1 -1
  53. abstractcore/utils/vlm_token_calculator.py +655 -0
  54. {abstractcore-2.5.0.dist-info → abstractcore-2.5.3.dist-info}/METADATA +140 -23
  55. abstractcore-2.5.3.dist-info/RECORD +107 -0
  56. {abstractcore-2.5.0.dist-info → abstractcore-2.5.3.dist-info}/entry_points.txt +4 -0
  57. abstractcore-2.5.0.dist-info/RECORD +0 -86
  58. {abstractcore-2.5.0.dist-info → abstractcore-2.5.3.dist-info}/WHEEL +0 -0
  59. {abstractcore-2.5.0.dist-info → abstractcore-2.5.3.dist-info}/licenses/LICENSE +0 -0
  60. {abstractcore-2.5.0.dist-info → abstractcore-2.5.3.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,29 @@
1
+ """
2
+ Glyph visual-text compression system for AbstractCore.
3
+
4
+ This module provides visual-text compression capabilities that transform long textual
5
+ sequences into optimized images for processing by Vision-Language Models (VLMs),
6
+ achieving 3-4x token compression without accuracy loss.
7
+
8
+ Based on the Glyph framework by Z.ai/THU-COAI with AbstractCore-specific enhancements.
9
+ """
10
+
11
+ from .glyph_processor import GlyphProcessor
12
+ from .orchestrator import CompressionOrchestrator
13
+ from .config import GlyphConfig, RenderingConfig
14
+ from .quality import QualityValidator, CompressionStats
15
+ from .cache import CompressionCache
16
+ from .exceptions import CompressionError, CompressionQualityError
17
+
18
+ __all__ = [
19
+ 'GlyphProcessor',
20
+ 'CompressionOrchestrator',
21
+ 'GlyphConfig',
22
+ 'RenderingConfig',
23
+ 'QualityValidator',
24
+ 'CompressionStats',
25
+ 'CompressionCache',
26
+ 'CompressionError',
27
+ 'CompressionQualityError'
28
+ ]
29
+
@@ -0,0 +1,420 @@
1
+ """
2
+ Compression analytics system for tracking and analyzing compression performance.
3
+ """
4
+
5
+ import time
6
+ import json
7
+ from pathlib import Path
8
+ from typing import Dict, Any, List, Optional
9
+ from dataclasses import dataclass, field
10
+ from datetime import datetime
11
+ import statistics
12
+
13
+ from ..utils.structured_logging import get_logger
14
+
15
+
16
+ @dataclass
17
+ class CompressionMetrics:
18
+ """Metrics for a single compression operation."""
19
+ timestamp: float
20
+ provider: str
21
+ model: str
22
+ original_tokens: int
23
+ compressed_tokens: int
24
+ compression_ratio: float
25
+ quality_score: float
26
+ processing_time: float
27
+ images_created: int
28
+ method: str # "glyph", "hybrid", etc.
29
+ success: bool
30
+ metadata: Dict[str, Any] = field(default_factory=dict)
31
+
32
+ def to_dict(self) -> Dict[str, Any]:
33
+ """Convert to dictionary."""
34
+ return {
35
+ "timestamp": self.timestamp,
36
+ "provider": self.provider,
37
+ "model": self.model,
38
+ "original_tokens": self.original_tokens,
39
+ "compressed_tokens": self.compressed_tokens,
40
+ "compression_ratio": self.compression_ratio,
41
+ "quality_score": self.quality_score,
42
+ "processing_time": self.processing_time,
43
+ "images_created": self.images_created,
44
+ "method": self.method,
45
+ "success": self.success,
46
+ "metadata": self.metadata
47
+ }
48
+
49
+
50
+ class CompressionAnalytics:
51
+ """
52
+ Analytics system for tracking compression performance.
53
+
54
+ Collects metrics, analyzes trends, and provides insights for optimization.
55
+ """
56
+
57
+ def __init__(self, storage_path: Optional[Path] = None):
58
+ """
59
+ Initialize analytics system.
60
+
61
+ Args:
62
+ storage_path: Path to store analytics data
63
+ """
64
+ self.logger = get_logger(self.__class__.__name__)
65
+ self.metrics: List[CompressionMetrics] = []
66
+ self.storage_path = storage_path or Path.home() / ".abstractcore" / "analytics" / "compression.json"
67
+ self.storage_path.parent.mkdir(parents=True, exist_ok=True)
68
+
69
+ # Load existing metrics
70
+ self._load_metrics()
71
+
72
+ def _load_metrics(self):
73
+ """Load existing metrics from storage."""
74
+ if self.storage_path.exists():
75
+ try:
76
+ with open(self.storage_path, 'r') as f:
77
+ data = json.load(f)
78
+ for item in data:
79
+ self.metrics.append(CompressionMetrics(**item))
80
+ self.logger.debug(f"Loaded {len(self.metrics)} historical metrics")
81
+ except Exception as e:
82
+ self.logger.warning(f"Failed to load metrics: {e}")
83
+
84
+ def _save_metrics(self):
85
+ """Save metrics to storage."""
86
+ try:
87
+ data = [m.to_dict() for m in self.metrics]
88
+ with open(self.storage_path, 'w') as f:
89
+ json.dump(data, f, indent=2)
90
+ self.logger.debug(f"Saved {len(self.metrics)} metrics")
91
+ except Exception as e:
92
+ self.logger.error(f"Failed to save metrics: {e}")
93
+
94
+ def record_compression(
95
+ self,
96
+ provider: str,
97
+ model: str,
98
+ original_tokens: int,
99
+ compressed_tokens: int,
100
+ quality_score: float,
101
+ processing_time: float,
102
+ images_created: int = 0,
103
+ method: str = "glyph",
104
+ success: bool = True,
105
+ metadata: Dict[str, Any] = None
106
+ ) -> CompressionMetrics:
107
+ """
108
+ Record a compression operation.
109
+
110
+ Args:
111
+ provider: Provider used
112
+ model: Model used
113
+ original_tokens: Original token count
114
+ compressed_tokens: Compressed token count
115
+ quality_score: Quality score (0-1)
116
+ processing_time: Processing time in seconds
117
+ images_created: Number of images created
118
+ method: Compression method used
119
+ success: Whether compression succeeded
120
+ metadata: Additional metadata
121
+
122
+ Returns:
123
+ Created CompressionMetrics object
124
+ """
125
+ compression_ratio = original_tokens / compressed_tokens if compressed_tokens > 0 else 1.0
126
+
127
+ metric = CompressionMetrics(
128
+ timestamp=time.time(),
129
+ provider=provider,
130
+ model=model,
131
+ original_tokens=original_tokens,
132
+ compressed_tokens=compressed_tokens,
133
+ compression_ratio=compression_ratio,
134
+ quality_score=quality_score,
135
+ processing_time=processing_time,
136
+ images_created=images_created,
137
+ method=method,
138
+ success=success,
139
+ metadata=metadata or {}
140
+ )
141
+
142
+ self.metrics.append(metric)
143
+ self._save_metrics()
144
+
145
+ self.logger.info(
146
+ f"Recorded compression: {provider}/{model}, "
147
+ f"{compression_ratio:.1f}x, {quality_score:.2%} quality"
148
+ )
149
+
150
+ return metric
151
+
152
+ def get_provider_stats(self, provider: str) -> Dict[str, Any]:
153
+ """
154
+ Get statistics for a specific provider.
155
+
156
+ Args:
157
+ provider: Provider name
158
+
159
+ Returns:
160
+ Statistics dictionary
161
+ """
162
+ provider_metrics = [m for m in self.metrics if m.provider == provider and m.success]
163
+
164
+ if not provider_metrics:
165
+ return {"provider": provider, "no_data": True}
166
+
167
+ ratios = [m.compression_ratio for m in provider_metrics]
168
+ qualities = [m.quality_score for m in provider_metrics]
169
+ times = [m.processing_time for m in provider_metrics]
170
+
171
+ return {
172
+ "provider": provider,
173
+ "total_compressions": len(provider_metrics),
174
+ "avg_compression_ratio": statistics.mean(ratios),
175
+ "median_compression_ratio": statistics.median(ratios),
176
+ "best_compression_ratio": max(ratios),
177
+ "avg_quality_score": statistics.mean(qualities),
178
+ "avg_processing_time": statistics.mean(times),
179
+ "success_rate": len(provider_metrics) / len([m for m in self.metrics if m.provider == provider])
180
+ }
181
+
182
+ def get_model_stats(self, provider: str, model: str) -> Dict[str, Any]:
183
+ """
184
+ Get statistics for a specific model.
185
+
186
+ Args:
187
+ provider: Provider name
188
+ model: Model name
189
+
190
+ Returns:
191
+ Statistics dictionary
192
+ """
193
+ model_metrics = [
194
+ m for m in self.metrics
195
+ if m.provider == provider and m.model == model and m.success
196
+ ]
197
+
198
+ if not model_metrics:
199
+ return {"provider": provider, "model": model, "no_data": True}
200
+
201
+ ratios = [m.compression_ratio for m in model_metrics]
202
+ qualities = [m.quality_score for m in model_metrics]
203
+ times = [m.processing_time for m in model_metrics]
204
+ images = [m.images_created for m in model_metrics]
205
+
206
+ return {
207
+ "provider": provider,
208
+ "model": model,
209
+ "total_compressions": len(model_metrics),
210
+ "avg_compression_ratio": statistics.mean(ratios),
211
+ "std_compression_ratio": statistics.stdev(ratios) if len(ratios) > 1 else 0,
212
+ "avg_quality_score": statistics.mean(qualities),
213
+ "avg_processing_time": statistics.mean(times),
214
+ "avg_images_created": statistics.mean(images),
215
+ "percentiles": {
216
+ "p25": statistics.quantiles(ratios, n=4)[0] if len(ratios) > 1 else ratios[0],
217
+ "p50": statistics.median(ratios),
218
+ "p75": statistics.quantiles(ratios, n=4)[2] if len(ratios) > 1 else ratios[0],
219
+ } if ratios else {}
220
+ }
221
+
222
+ def get_method_comparison(self) -> Dict[str, Any]:
223
+ """
224
+ Compare different compression methods.
225
+
226
+ Returns:
227
+ Comparison statistics
228
+ """
229
+ methods = {}
230
+
231
+ for method in set(m.method for m in self.metrics):
232
+ method_metrics = [m for m in self.metrics if m.method == method and m.success]
233
+
234
+ if method_metrics:
235
+ ratios = [m.compression_ratio for m in method_metrics]
236
+ qualities = [m.quality_score for m in method_metrics]
237
+ times = [m.processing_time for m in method_metrics]
238
+
239
+ methods[method] = {
240
+ "count": len(method_metrics),
241
+ "avg_compression": statistics.mean(ratios),
242
+ "avg_quality": statistics.mean(qualities),
243
+ "avg_time": statistics.mean(times),
244
+ "efficiency": statistics.mean(ratios) / statistics.mean(times) if times else 0
245
+ }
246
+
247
+ return methods
248
+
249
+ def get_trends(self, hours: int = 24) -> Dict[str, Any]:
250
+ """
251
+ Get compression trends over time.
252
+
253
+ Args:
254
+ hours: Number of hours to analyze
255
+
256
+ Returns:
257
+ Trend analysis
258
+ """
259
+ cutoff_time = time.time() - (hours * 3600)
260
+ recent_metrics = [m for m in self.metrics if m.timestamp > cutoff_time and m.success]
261
+
262
+ if not recent_metrics:
263
+ return {"no_recent_data": True}
264
+
265
+ # Sort by timestamp
266
+ recent_metrics.sort(key=lambda m: m.timestamp)
267
+
268
+ # Calculate rolling averages
269
+ window_size = max(1, len(recent_metrics) // 10)
270
+ rolling_ratios = []
271
+ rolling_qualities = []
272
+
273
+ for i in range(len(recent_metrics) - window_size + 1):
274
+ window = recent_metrics[i:i + window_size]
275
+ rolling_ratios.append(statistics.mean([m.compression_ratio for m in window]))
276
+ rolling_qualities.append(statistics.mean([m.quality_score for m in window]))
277
+
278
+ # Detect trends
279
+ if len(rolling_ratios) > 1:
280
+ ratio_trend = "improving" if rolling_ratios[-1] > rolling_ratios[0] else "declining"
281
+ quality_trend = "improving" if rolling_qualities[-1] > rolling_qualities[0] else "declining"
282
+ else:
283
+ ratio_trend = "stable"
284
+ quality_trend = "stable"
285
+
286
+ return {
287
+ "period_hours": hours,
288
+ "total_compressions": len(recent_metrics),
289
+ "ratio_trend": ratio_trend,
290
+ "quality_trend": quality_trend,
291
+ "current_avg_ratio": statistics.mean([m.compression_ratio for m in recent_metrics[-window_size:]]) if recent_metrics else 0,
292
+ "current_avg_quality": statistics.mean([m.quality_score for m in recent_metrics[-window_size:]]) if recent_metrics else 0
293
+ }
294
+
295
+ def get_optimization_suggestions(self) -> List[str]:
296
+ """
297
+ Generate optimization suggestions based on analytics.
298
+
299
+ Returns:
300
+ List of suggestions
301
+ """
302
+ suggestions = []
303
+
304
+ # Analyze recent performance
305
+ recent_metrics = [m for m in self.metrics[-100:] if m.success] # Last 100 compressions
306
+
307
+ if recent_metrics:
308
+ avg_ratio = statistics.mean([m.compression_ratio for m in recent_metrics])
309
+ avg_quality = statistics.mean([m.quality_score for m in recent_metrics])
310
+
311
+ # Compression ratio suggestions
312
+ if avg_ratio < 3.0:
313
+ suggestions.append("Consider using more aggressive compression settings (smaller fonts, more columns)")
314
+ elif avg_ratio > 5.0 and avg_quality < 0.90:
315
+ suggestions.append("High compression may be affecting quality - consider balancing settings")
316
+
317
+ # Quality suggestions
318
+ if avg_quality < 0.90:
319
+ suggestions.append("Quality scores are low - increase DPI or font size")
320
+ elif avg_quality > 0.95 and avg_ratio < 4.0:
321
+ suggestions.append("Quality is very high - room for more aggressive compression")
322
+
323
+ # Provider-specific suggestions
324
+ provider_stats = {}
325
+ for provider in set(m.provider for m in recent_metrics):
326
+ stats = self.get_provider_stats(provider)
327
+ if not stats.get("no_data"):
328
+ provider_stats[provider] = stats
329
+
330
+ if provider_stats:
331
+ best_provider = max(provider_stats.items(), key=lambda x: x[1].get("avg_compression_ratio", 0))
332
+ if best_provider[1]["avg_compression_ratio"] > avg_ratio * 1.2:
333
+ suggestions.append(f"Provider '{best_provider[0]}' shows better compression - consider using it more")
334
+
335
+ # Method suggestions
336
+ method_comparison = self.get_method_comparison()
337
+ if "hybrid" in method_comparison and "glyph" in method_comparison:
338
+ if method_comparison["hybrid"]["avg_compression"] > method_comparison["glyph"]["avg_compression"] * 2:
339
+ suggestions.append("Hybrid compression shows significant improvement - use it for large documents")
340
+
341
+ if not suggestions:
342
+ suggestions.append("Performance is optimal - no specific improvements recommended")
343
+
344
+ return suggestions
345
+
346
+ def generate_report(self) -> str:
347
+ """
348
+ Generate a comprehensive analytics report.
349
+
350
+ Returns:
351
+ Formatted report string
352
+ """
353
+ report = ["=" * 80]
354
+ report.append("COMPRESSION ANALYTICS REPORT")
355
+ report.append("=" * 80)
356
+ report.append(f"Generated: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
357
+ report.append(f"Total compressions: {len(self.metrics)}")
358
+
359
+ # Overall statistics
360
+ if self.metrics:
361
+ successful = [m for m in self.metrics if m.success]
362
+ if successful:
363
+ report.append(f"\nOVERALL STATISTICS:")
364
+ report.append(f" Success rate: {len(successful)/len(self.metrics):.1%}")
365
+ report.append(f" Avg compression: {statistics.mean([m.compression_ratio for m in successful]):.1f}x")
366
+ report.append(f" Avg quality: {statistics.mean([m.quality_score for m in successful]):.1%}")
367
+ report.append(f" Avg time: {statistics.mean([m.processing_time for m in successful]):.2f}s")
368
+
369
+ # Provider breakdown
370
+ providers = set(m.provider for m in self.metrics)
371
+ if providers:
372
+ report.append(f"\nPROVIDER PERFORMANCE:")
373
+ for provider in sorted(providers):
374
+ stats = self.get_provider_stats(provider)
375
+ if not stats.get("no_data"):
376
+ report.append(f" {provider}:")
377
+ report.append(f" Compressions: {stats['total_compressions']}")
378
+ report.append(f" Avg ratio: {stats['avg_compression_ratio']:.1f}x")
379
+ report.append(f" Avg quality: {stats['avg_quality_score']:.1%}")
380
+
381
+ # Method comparison
382
+ methods = self.get_method_comparison()
383
+ if methods:
384
+ report.append(f"\nMETHOD COMPARISON:")
385
+ for method, stats in methods.items():
386
+ report.append(f" {method}:")
387
+ report.append(f" Avg compression: {stats['avg_compression']:.1f}x")
388
+ report.append(f" Avg quality: {stats['avg_quality']:.1%}")
389
+ report.append(f" Efficiency: {stats['efficiency']:.2f}")
390
+
391
+ # Trends
392
+ trends = self.get_trends(24)
393
+ if not trends.get("no_recent_data"):
394
+ report.append(f"\nRECENT TRENDS (24h):")
395
+ report.append(f" Compression trend: {trends['ratio_trend']}")
396
+ report.append(f" Quality trend: {trends['quality_trend']}")
397
+ report.append(f" Current avg ratio: {trends['current_avg_ratio']:.1f}x")
398
+
399
+ # Suggestions
400
+ suggestions = self.get_optimization_suggestions()
401
+ if suggestions:
402
+ report.append(f"\nOPTIMIZATION SUGGESTIONS:")
403
+ for i, suggestion in enumerate(suggestions, 1):
404
+ report.append(f" {i}. {suggestion}")
405
+
406
+ report.append("=" * 80)
407
+
408
+ return "\n".join(report)
409
+
410
+
411
+ # Global analytics instance
412
+ _analytics_instance = None
413
+
414
+
415
+ def get_analytics() -> CompressionAnalytics:
416
+ """Get global analytics instance."""
417
+ global _analytics_instance
418
+ if _analytics_instance is None:
419
+ _analytics_instance = CompressionAnalytics()
420
+ return _analytics_instance
@@ -0,0 +1,250 @@
1
+ """
2
+ Compression cache system for Glyph.
3
+ """
4
+
5
+ import os
6
+ import json
7
+ import time
8
+ import hashlib
9
+ import shutil
10
+ from pathlib import Path
11
+ from typing import List, Optional, Dict, Any
12
+ from dataclasses import asdict
13
+
14
+ from .exceptions import CompressionCacheError
15
+ from ..utils.structured_logging import get_logger
16
+
17
+
18
+ class CompressionCache:
19
+ """Cache system for compressed content."""
20
+
21
+ def __init__(self, cache_dir: str = None, max_size_gb: float = 1.0, ttl_days: int = 7):
22
+ """
23
+ Initialize compression cache.
24
+
25
+ Args:
26
+ cache_dir: Cache directory path
27
+ max_size_gb: Maximum cache size in GB
28
+ ttl_days: Time-to-live in days
29
+ """
30
+ self.logger = get_logger(self.__class__.__name__)
31
+
32
+ if cache_dir is None:
33
+ cache_dir = str(Path.home() / ".abstractcore" / "glyph_cache")
34
+
35
+ self.cache_dir = Path(cache_dir)
36
+ self.max_size_bytes = int(max_size_gb * 1024 * 1024 * 1024)
37
+ self.ttl_seconds = ttl_days * 24 * 3600
38
+
39
+ # Create cache directory (thread-safe for parallel executions)
40
+ try:
41
+ self.cache_dir.mkdir(parents=True, exist_ok=True)
42
+ self.logger.debug(f"Cache directory created/verified: {self.cache_dir}")
43
+ except Exception as e:
44
+ self.logger.error(f"Failed to create cache directory {self.cache_dir}: {e}")
45
+ raise CompressionCacheError(f"Cannot create cache directory: {e}")
46
+
47
+ # Initialize metadata
48
+ self.metadata_file = self.cache_dir / "cache_metadata.json"
49
+ self.metadata = self._load_metadata()
50
+
51
+ self.logger.debug(f"Initialized compression cache at {self.cache_dir}")
52
+
53
+ def _load_metadata(self) -> Dict[str, Any]:
54
+ """Load cache metadata."""
55
+ if self.metadata_file.exists():
56
+ try:
57
+ with open(self.metadata_file, 'r') as f:
58
+ return json.load(f)
59
+ except Exception as e:
60
+ self.logger.warning(f"Failed to load cache metadata: {e}")
61
+
62
+ return {
63
+ 'entries': {},
64
+ 'created': time.time(),
65
+ 'last_cleanup': time.time()
66
+ }
67
+
68
+ def _save_metadata(self):
69
+ """Save cache metadata."""
70
+ try:
71
+ with open(self.metadata_file, 'w') as f:
72
+ json.dump(self.metadata, f, indent=2)
73
+ except Exception as e:
74
+ self.logger.error(f"Failed to save cache metadata: {e}")
75
+
76
+ def _generate_cache_key(self, content: str, config: Dict[str, Any]) -> str:
77
+ """Generate cache key from content and configuration."""
78
+ # Create a hash of content + configuration
79
+ content_hash = hashlib.sha256(content.encode('utf-8')).hexdigest()[:16]
80
+ config_str = json.dumps(config, sort_keys=True)
81
+ config_hash = hashlib.sha256(config_str.encode('utf-8')).hexdigest()[:8]
82
+
83
+ return f"{content_hash}_{config_hash}"
84
+
85
+ def get(self, cache_key: str) -> Optional[List[Path]]:
86
+ """
87
+ Get cached compression result.
88
+
89
+ Args:
90
+ cache_key: Cache key
91
+
92
+ Returns:
93
+ List of image paths if cached, None otherwise
94
+ """
95
+ if cache_key not in self.metadata['entries']:
96
+ return None
97
+
98
+ entry = self.metadata['entries'][cache_key]
99
+
100
+ # Check TTL
101
+ if time.time() - entry['created'] > self.ttl_seconds:
102
+ self._remove_entry(cache_key)
103
+ return None
104
+
105
+ # Check if files still exist
106
+ image_paths = [Path(path) for path in entry['image_paths']]
107
+ if not all(path.exists() for path in image_paths):
108
+ self._remove_entry(cache_key)
109
+ return None
110
+
111
+ # Update access time
112
+ entry['last_accessed'] = time.time()
113
+ self._save_metadata()
114
+
115
+ self.logger.debug(f"Cache hit for key {cache_key}")
116
+ return image_paths
117
+
118
+ def set(self, cache_key: str, image_paths: List[Path],
119
+ compression_stats: Dict[str, Any] = None):
120
+ """
121
+ Store compression result in cache.
122
+
123
+ Args:
124
+ cache_key: Cache key
125
+ image_paths: List of rendered image paths
126
+ compression_stats: Optional compression statistics
127
+ """
128
+ try:
129
+ # Create cache entry directory
130
+ entry_dir = self.cache_dir / cache_key
131
+ entry_dir.mkdir(exist_ok=True)
132
+
133
+ # Copy images to cache
134
+ cached_paths = []
135
+ for i, image_path in enumerate(image_paths):
136
+ if image_path.exists():
137
+ cached_path = entry_dir / f"image_{i}{image_path.suffix}"
138
+ shutil.copy2(image_path, cached_path)
139
+ cached_paths.append(str(cached_path))
140
+
141
+ # Store metadata
142
+ entry = {
143
+ 'created': time.time(),
144
+ 'last_accessed': time.time(),
145
+ 'image_paths': cached_paths,
146
+ 'compression_stats': compression_stats or {},
147
+ 'size_bytes': sum(Path(path).stat().st_size for path in cached_paths)
148
+ }
149
+
150
+ self.metadata['entries'][cache_key] = entry
151
+ self._save_metadata()
152
+
153
+ # Cleanup if needed
154
+ self._cleanup_if_needed()
155
+
156
+ self.logger.debug(f"Cached compression result for key {cache_key}")
157
+
158
+ except Exception as e:
159
+ self.logger.error(f"Failed to cache compression result: {e}")
160
+ raise CompressionCacheError(f"Cache storage failed: {e}")
161
+
162
+ def _remove_entry(self, cache_key: str):
163
+ """Remove cache entry."""
164
+ if cache_key in self.metadata['entries']:
165
+ # Remove files
166
+ entry_dir = self.cache_dir / cache_key
167
+ if entry_dir.exists():
168
+ shutil.rmtree(entry_dir, ignore_errors=True)
169
+
170
+ # Remove metadata
171
+ del self.metadata['entries'][cache_key]
172
+ self._save_metadata()
173
+
174
+ def _cleanup_if_needed(self):
175
+ """Cleanup cache if size or TTL limits exceeded."""
176
+ now = time.time()
177
+
178
+ # Skip if cleaned up recently
179
+ if now - self.metadata.get('last_cleanup', 0) < 3600: # 1 hour
180
+ return
181
+
182
+ self.logger.debug("Starting cache cleanup")
183
+
184
+ # Remove expired entries
185
+ expired_keys = []
186
+ for key, entry in self.metadata['entries'].items():
187
+ if now - entry['created'] > self.ttl_seconds:
188
+ expired_keys.append(key)
189
+
190
+ for key in expired_keys:
191
+ self._remove_entry(key)
192
+
193
+ # Check size limit
194
+ total_size = sum(entry.get('size_bytes', 0) for entry in self.metadata['entries'].values())
195
+
196
+ if total_size > self.max_size_bytes:
197
+ # Remove least recently accessed entries
198
+ entries_by_access = sorted(
199
+ self.metadata['entries'].items(),
200
+ key=lambda x: x[1].get('last_accessed', 0)
201
+ )
202
+
203
+ while total_size > self.max_size_bytes and entries_by_access:
204
+ key, entry = entries_by_access.pop(0)
205
+ total_size -= entry.get('size_bytes', 0)
206
+ self._remove_entry(key)
207
+
208
+ self.metadata['last_cleanup'] = now
209
+ self._save_metadata()
210
+
211
+ self.logger.debug(f"Cache cleanup completed. Entries: {len(self.metadata['entries'])}")
212
+
213
+ def clear(self):
214
+ """Clear all cache entries."""
215
+ try:
216
+ # Remove all entry directories
217
+ for entry_dir in self.cache_dir.iterdir():
218
+ if entry_dir.is_dir() and entry_dir.name != "cache_metadata.json":
219
+ shutil.rmtree(entry_dir, ignore_errors=True)
220
+
221
+ # Reset metadata
222
+ self.metadata = {
223
+ 'entries': {},
224
+ 'created': time.time(),
225
+ 'last_cleanup': time.time()
226
+ }
227
+ self._save_metadata()
228
+
229
+ self.logger.info("Cache cleared")
230
+
231
+ except Exception as e:
232
+ self.logger.error(f"Failed to clear cache: {e}")
233
+ raise CompressionCacheError(f"Cache clear failed: {e}")
234
+
235
+ def get_stats(self) -> Dict[str, Any]:
236
+ """Get cache statistics."""
237
+ entries = self.metadata['entries']
238
+ total_size = sum(entry.get('size_bytes', 0) for entry in entries.values())
239
+
240
+ return {
241
+ 'total_entries': len(entries),
242
+ 'total_size_mb': total_size / (1024 * 1024),
243
+ 'max_size_mb': self.max_size_bytes / (1024 * 1024),
244
+ 'utilization': total_size / self.max_size_bytes if self.max_size_bytes > 0 else 0,
245
+ 'cache_dir': str(self.cache_dir),
246
+ 'ttl_days': self.ttl_seconds / (24 * 3600),
247
+ 'oldest_entry': min((entry['created'] for entry in entries.values()), default=time.time()),
248
+ 'newest_entry': max((entry['created'] for entry in entries.values()), default=time.time())
249
+ }
250
+