abstractcore 2.5.0__py3-none-any.whl → 2.5.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- abstractcore/__init__.py +12 -0
- abstractcore/apps/__main__.py +8 -1
- abstractcore/apps/deepsearch.py +644 -0
- abstractcore/apps/intent.py +614 -0
- abstractcore/architectures/detection.py +250 -4
- abstractcore/assets/architecture_formats.json +14 -1
- abstractcore/assets/model_capabilities.json +583 -44
- abstractcore/compression/__init__.py +29 -0
- abstractcore/compression/analytics.py +420 -0
- abstractcore/compression/cache.py +250 -0
- abstractcore/compression/config.py +279 -0
- abstractcore/compression/exceptions.py +30 -0
- abstractcore/compression/glyph_processor.py +381 -0
- abstractcore/compression/optimizer.py +388 -0
- abstractcore/compression/orchestrator.py +380 -0
- abstractcore/compression/pil_text_renderer.py +818 -0
- abstractcore/compression/quality.py +226 -0
- abstractcore/compression/text_formatter.py +666 -0
- abstractcore/compression/vision_compressor.py +371 -0
- abstractcore/config/main.py +66 -1
- abstractcore/config/manager.py +111 -5
- abstractcore/core/session.py +105 -5
- abstractcore/events/__init__.py +1 -1
- abstractcore/media/auto_handler.py +312 -18
- abstractcore/media/handlers/local_handler.py +14 -2
- abstractcore/media/handlers/openai_handler.py +62 -3
- abstractcore/media/processors/__init__.py +11 -1
- abstractcore/media/processors/direct_pdf_processor.py +210 -0
- abstractcore/media/processors/glyph_pdf_processor.py +227 -0
- abstractcore/media/processors/image_processor.py +7 -1
- abstractcore/media/processors/text_processor.py +18 -3
- abstractcore/media/types.py +164 -7
- abstractcore/processing/__init__.py +5 -1
- abstractcore/processing/basic_deepsearch.py +2173 -0
- abstractcore/processing/basic_intent.py +690 -0
- abstractcore/providers/__init__.py +18 -0
- abstractcore/providers/anthropic_provider.py +29 -2
- abstractcore/providers/base.py +279 -6
- abstractcore/providers/huggingface_provider.py +658 -27
- abstractcore/providers/lmstudio_provider.py +52 -2
- abstractcore/providers/mlx_provider.py +103 -4
- abstractcore/providers/model_capabilities.py +352 -0
- abstractcore/providers/ollama_provider.py +44 -6
- abstractcore/providers/openai_provider.py +29 -2
- abstractcore/providers/registry.py +91 -19
- abstractcore/server/app.py +91 -81
- abstractcore/structured/handler.py +161 -1
- abstractcore/tools/common_tools.py +98 -3
- abstractcore/utils/__init__.py +4 -1
- abstractcore/utils/cli.py +114 -1
- abstractcore/utils/trace_export.py +287 -0
- abstractcore/utils/version.py +1 -1
- abstractcore/utils/vlm_token_calculator.py +655 -0
- {abstractcore-2.5.0.dist-info → abstractcore-2.5.3.dist-info}/METADATA +140 -23
- abstractcore-2.5.3.dist-info/RECORD +107 -0
- {abstractcore-2.5.0.dist-info → abstractcore-2.5.3.dist-info}/entry_points.txt +4 -0
- abstractcore-2.5.0.dist-info/RECORD +0 -86
- {abstractcore-2.5.0.dist-info → abstractcore-2.5.3.dist-info}/WHEEL +0 -0
- {abstractcore-2.5.0.dist-info → abstractcore-2.5.3.dist-info}/licenses/LICENSE +0 -0
- {abstractcore-2.5.0.dist-info → abstractcore-2.5.3.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Glyph visual-text compression system for AbstractCore.
|
|
3
|
+
|
|
4
|
+
This module provides visual-text compression capabilities that transform long textual
|
|
5
|
+
sequences into optimized images for processing by Vision-Language Models (VLMs),
|
|
6
|
+
achieving 3-4x token compression without accuracy loss.
|
|
7
|
+
|
|
8
|
+
Based on the Glyph framework by Z.ai/THU-COAI with AbstractCore-specific enhancements.
|
|
9
|
+
"""
|
|
10
|
+
|
|
11
|
+
from .glyph_processor import GlyphProcessor
|
|
12
|
+
from .orchestrator import CompressionOrchestrator
|
|
13
|
+
from .config import GlyphConfig, RenderingConfig
|
|
14
|
+
from .quality import QualityValidator, CompressionStats
|
|
15
|
+
from .cache import CompressionCache
|
|
16
|
+
from .exceptions import CompressionError, CompressionQualityError
|
|
17
|
+
|
|
18
|
+
__all__ = [
|
|
19
|
+
'GlyphProcessor',
|
|
20
|
+
'CompressionOrchestrator',
|
|
21
|
+
'GlyphConfig',
|
|
22
|
+
'RenderingConfig',
|
|
23
|
+
'QualityValidator',
|
|
24
|
+
'CompressionStats',
|
|
25
|
+
'CompressionCache',
|
|
26
|
+
'CompressionError',
|
|
27
|
+
'CompressionQualityError'
|
|
28
|
+
]
|
|
29
|
+
|
|
@@ -0,0 +1,420 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Compression analytics system for tracking and analyzing compression performance.
|
|
3
|
+
"""
|
|
4
|
+
|
|
5
|
+
import time
|
|
6
|
+
import json
|
|
7
|
+
from pathlib import Path
|
|
8
|
+
from typing import Dict, Any, List, Optional
|
|
9
|
+
from dataclasses import dataclass, field
|
|
10
|
+
from datetime import datetime
|
|
11
|
+
import statistics
|
|
12
|
+
|
|
13
|
+
from ..utils.structured_logging import get_logger
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
@dataclass
|
|
17
|
+
class CompressionMetrics:
|
|
18
|
+
"""Metrics for a single compression operation."""
|
|
19
|
+
timestamp: float
|
|
20
|
+
provider: str
|
|
21
|
+
model: str
|
|
22
|
+
original_tokens: int
|
|
23
|
+
compressed_tokens: int
|
|
24
|
+
compression_ratio: float
|
|
25
|
+
quality_score: float
|
|
26
|
+
processing_time: float
|
|
27
|
+
images_created: int
|
|
28
|
+
method: str # "glyph", "hybrid", etc.
|
|
29
|
+
success: bool
|
|
30
|
+
metadata: Dict[str, Any] = field(default_factory=dict)
|
|
31
|
+
|
|
32
|
+
def to_dict(self) -> Dict[str, Any]:
|
|
33
|
+
"""Convert to dictionary."""
|
|
34
|
+
return {
|
|
35
|
+
"timestamp": self.timestamp,
|
|
36
|
+
"provider": self.provider,
|
|
37
|
+
"model": self.model,
|
|
38
|
+
"original_tokens": self.original_tokens,
|
|
39
|
+
"compressed_tokens": self.compressed_tokens,
|
|
40
|
+
"compression_ratio": self.compression_ratio,
|
|
41
|
+
"quality_score": self.quality_score,
|
|
42
|
+
"processing_time": self.processing_time,
|
|
43
|
+
"images_created": self.images_created,
|
|
44
|
+
"method": self.method,
|
|
45
|
+
"success": self.success,
|
|
46
|
+
"metadata": self.metadata
|
|
47
|
+
}
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
class CompressionAnalytics:
|
|
51
|
+
"""
|
|
52
|
+
Analytics system for tracking compression performance.
|
|
53
|
+
|
|
54
|
+
Collects metrics, analyzes trends, and provides insights for optimization.
|
|
55
|
+
"""
|
|
56
|
+
|
|
57
|
+
def __init__(self, storage_path: Optional[Path] = None):
|
|
58
|
+
"""
|
|
59
|
+
Initialize analytics system.
|
|
60
|
+
|
|
61
|
+
Args:
|
|
62
|
+
storage_path: Path to store analytics data
|
|
63
|
+
"""
|
|
64
|
+
self.logger = get_logger(self.__class__.__name__)
|
|
65
|
+
self.metrics: List[CompressionMetrics] = []
|
|
66
|
+
self.storage_path = storage_path or Path.home() / ".abstractcore" / "analytics" / "compression.json"
|
|
67
|
+
self.storage_path.parent.mkdir(parents=True, exist_ok=True)
|
|
68
|
+
|
|
69
|
+
# Load existing metrics
|
|
70
|
+
self._load_metrics()
|
|
71
|
+
|
|
72
|
+
def _load_metrics(self):
|
|
73
|
+
"""Load existing metrics from storage."""
|
|
74
|
+
if self.storage_path.exists():
|
|
75
|
+
try:
|
|
76
|
+
with open(self.storage_path, 'r') as f:
|
|
77
|
+
data = json.load(f)
|
|
78
|
+
for item in data:
|
|
79
|
+
self.metrics.append(CompressionMetrics(**item))
|
|
80
|
+
self.logger.debug(f"Loaded {len(self.metrics)} historical metrics")
|
|
81
|
+
except Exception as e:
|
|
82
|
+
self.logger.warning(f"Failed to load metrics: {e}")
|
|
83
|
+
|
|
84
|
+
def _save_metrics(self):
|
|
85
|
+
"""Save metrics to storage."""
|
|
86
|
+
try:
|
|
87
|
+
data = [m.to_dict() for m in self.metrics]
|
|
88
|
+
with open(self.storage_path, 'w') as f:
|
|
89
|
+
json.dump(data, f, indent=2)
|
|
90
|
+
self.logger.debug(f"Saved {len(self.metrics)} metrics")
|
|
91
|
+
except Exception as e:
|
|
92
|
+
self.logger.error(f"Failed to save metrics: {e}")
|
|
93
|
+
|
|
94
|
+
def record_compression(
|
|
95
|
+
self,
|
|
96
|
+
provider: str,
|
|
97
|
+
model: str,
|
|
98
|
+
original_tokens: int,
|
|
99
|
+
compressed_tokens: int,
|
|
100
|
+
quality_score: float,
|
|
101
|
+
processing_time: float,
|
|
102
|
+
images_created: int = 0,
|
|
103
|
+
method: str = "glyph",
|
|
104
|
+
success: bool = True,
|
|
105
|
+
metadata: Dict[str, Any] = None
|
|
106
|
+
) -> CompressionMetrics:
|
|
107
|
+
"""
|
|
108
|
+
Record a compression operation.
|
|
109
|
+
|
|
110
|
+
Args:
|
|
111
|
+
provider: Provider used
|
|
112
|
+
model: Model used
|
|
113
|
+
original_tokens: Original token count
|
|
114
|
+
compressed_tokens: Compressed token count
|
|
115
|
+
quality_score: Quality score (0-1)
|
|
116
|
+
processing_time: Processing time in seconds
|
|
117
|
+
images_created: Number of images created
|
|
118
|
+
method: Compression method used
|
|
119
|
+
success: Whether compression succeeded
|
|
120
|
+
metadata: Additional metadata
|
|
121
|
+
|
|
122
|
+
Returns:
|
|
123
|
+
Created CompressionMetrics object
|
|
124
|
+
"""
|
|
125
|
+
compression_ratio = original_tokens / compressed_tokens if compressed_tokens > 0 else 1.0
|
|
126
|
+
|
|
127
|
+
metric = CompressionMetrics(
|
|
128
|
+
timestamp=time.time(),
|
|
129
|
+
provider=provider,
|
|
130
|
+
model=model,
|
|
131
|
+
original_tokens=original_tokens,
|
|
132
|
+
compressed_tokens=compressed_tokens,
|
|
133
|
+
compression_ratio=compression_ratio,
|
|
134
|
+
quality_score=quality_score,
|
|
135
|
+
processing_time=processing_time,
|
|
136
|
+
images_created=images_created,
|
|
137
|
+
method=method,
|
|
138
|
+
success=success,
|
|
139
|
+
metadata=metadata or {}
|
|
140
|
+
)
|
|
141
|
+
|
|
142
|
+
self.metrics.append(metric)
|
|
143
|
+
self._save_metrics()
|
|
144
|
+
|
|
145
|
+
self.logger.info(
|
|
146
|
+
f"Recorded compression: {provider}/{model}, "
|
|
147
|
+
f"{compression_ratio:.1f}x, {quality_score:.2%} quality"
|
|
148
|
+
)
|
|
149
|
+
|
|
150
|
+
return metric
|
|
151
|
+
|
|
152
|
+
def get_provider_stats(self, provider: str) -> Dict[str, Any]:
|
|
153
|
+
"""
|
|
154
|
+
Get statistics for a specific provider.
|
|
155
|
+
|
|
156
|
+
Args:
|
|
157
|
+
provider: Provider name
|
|
158
|
+
|
|
159
|
+
Returns:
|
|
160
|
+
Statistics dictionary
|
|
161
|
+
"""
|
|
162
|
+
provider_metrics = [m for m in self.metrics if m.provider == provider and m.success]
|
|
163
|
+
|
|
164
|
+
if not provider_metrics:
|
|
165
|
+
return {"provider": provider, "no_data": True}
|
|
166
|
+
|
|
167
|
+
ratios = [m.compression_ratio for m in provider_metrics]
|
|
168
|
+
qualities = [m.quality_score for m in provider_metrics]
|
|
169
|
+
times = [m.processing_time for m in provider_metrics]
|
|
170
|
+
|
|
171
|
+
return {
|
|
172
|
+
"provider": provider,
|
|
173
|
+
"total_compressions": len(provider_metrics),
|
|
174
|
+
"avg_compression_ratio": statistics.mean(ratios),
|
|
175
|
+
"median_compression_ratio": statistics.median(ratios),
|
|
176
|
+
"best_compression_ratio": max(ratios),
|
|
177
|
+
"avg_quality_score": statistics.mean(qualities),
|
|
178
|
+
"avg_processing_time": statistics.mean(times),
|
|
179
|
+
"success_rate": len(provider_metrics) / len([m for m in self.metrics if m.provider == provider])
|
|
180
|
+
}
|
|
181
|
+
|
|
182
|
+
def get_model_stats(self, provider: str, model: str) -> Dict[str, Any]:
|
|
183
|
+
"""
|
|
184
|
+
Get statistics for a specific model.
|
|
185
|
+
|
|
186
|
+
Args:
|
|
187
|
+
provider: Provider name
|
|
188
|
+
model: Model name
|
|
189
|
+
|
|
190
|
+
Returns:
|
|
191
|
+
Statistics dictionary
|
|
192
|
+
"""
|
|
193
|
+
model_metrics = [
|
|
194
|
+
m for m in self.metrics
|
|
195
|
+
if m.provider == provider and m.model == model and m.success
|
|
196
|
+
]
|
|
197
|
+
|
|
198
|
+
if not model_metrics:
|
|
199
|
+
return {"provider": provider, "model": model, "no_data": True}
|
|
200
|
+
|
|
201
|
+
ratios = [m.compression_ratio for m in model_metrics]
|
|
202
|
+
qualities = [m.quality_score for m in model_metrics]
|
|
203
|
+
times = [m.processing_time for m in model_metrics]
|
|
204
|
+
images = [m.images_created for m in model_metrics]
|
|
205
|
+
|
|
206
|
+
return {
|
|
207
|
+
"provider": provider,
|
|
208
|
+
"model": model,
|
|
209
|
+
"total_compressions": len(model_metrics),
|
|
210
|
+
"avg_compression_ratio": statistics.mean(ratios),
|
|
211
|
+
"std_compression_ratio": statistics.stdev(ratios) if len(ratios) > 1 else 0,
|
|
212
|
+
"avg_quality_score": statistics.mean(qualities),
|
|
213
|
+
"avg_processing_time": statistics.mean(times),
|
|
214
|
+
"avg_images_created": statistics.mean(images),
|
|
215
|
+
"percentiles": {
|
|
216
|
+
"p25": statistics.quantiles(ratios, n=4)[0] if len(ratios) > 1 else ratios[0],
|
|
217
|
+
"p50": statistics.median(ratios),
|
|
218
|
+
"p75": statistics.quantiles(ratios, n=4)[2] if len(ratios) > 1 else ratios[0],
|
|
219
|
+
} if ratios else {}
|
|
220
|
+
}
|
|
221
|
+
|
|
222
|
+
def get_method_comparison(self) -> Dict[str, Any]:
|
|
223
|
+
"""
|
|
224
|
+
Compare different compression methods.
|
|
225
|
+
|
|
226
|
+
Returns:
|
|
227
|
+
Comparison statistics
|
|
228
|
+
"""
|
|
229
|
+
methods = {}
|
|
230
|
+
|
|
231
|
+
for method in set(m.method for m in self.metrics):
|
|
232
|
+
method_metrics = [m for m in self.metrics if m.method == method and m.success]
|
|
233
|
+
|
|
234
|
+
if method_metrics:
|
|
235
|
+
ratios = [m.compression_ratio for m in method_metrics]
|
|
236
|
+
qualities = [m.quality_score for m in method_metrics]
|
|
237
|
+
times = [m.processing_time for m in method_metrics]
|
|
238
|
+
|
|
239
|
+
methods[method] = {
|
|
240
|
+
"count": len(method_metrics),
|
|
241
|
+
"avg_compression": statistics.mean(ratios),
|
|
242
|
+
"avg_quality": statistics.mean(qualities),
|
|
243
|
+
"avg_time": statistics.mean(times),
|
|
244
|
+
"efficiency": statistics.mean(ratios) / statistics.mean(times) if times else 0
|
|
245
|
+
}
|
|
246
|
+
|
|
247
|
+
return methods
|
|
248
|
+
|
|
249
|
+
def get_trends(self, hours: int = 24) -> Dict[str, Any]:
|
|
250
|
+
"""
|
|
251
|
+
Get compression trends over time.
|
|
252
|
+
|
|
253
|
+
Args:
|
|
254
|
+
hours: Number of hours to analyze
|
|
255
|
+
|
|
256
|
+
Returns:
|
|
257
|
+
Trend analysis
|
|
258
|
+
"""
|
|
259
|
+
cutoff_time = time.time() - (hours * 3600)
|
|
260
|
+
recent_metrics = [m for m in self.metrics if m.timestamp > cutoff_time and m.success]
|
|
261
|
+
|
|
262
|
+
if not recent_metrics:
|
|
263
|
+
return {"no_recent_data": True}
|
|
264
|
+
|
|
265
|
+
# Sort by timestamp
|
|
266
|
+
recent_metrics.sort(key=lambda m: m.timestamp)
|
|
267
|
+
|
|
268
|
+
# Calculate rolling averages
|
|
269
|
+
window_size = max(1, len(recent_metrics) // 10)
|
|
270
|
+
rolling_ratios = []
|
|
271
|
+
rolling_qualities = []
|
|
272
|
+
|
|
273
|
+
for i in range(len(recent_metrics) - window_size + 1):
|
|
274
|
+
window = recent_metrics[i:i + window_size]
|
|
275
|
+
rolling_ratios.append(statistics.mean([m.compression_ratio for m in window]))
|
|
276
|
+
rolling_qualities.append(statistics.mean([m.quality_score for m in window]))
|
|
277
|
+
|
|
278
|
+
# Detect trends
|
|
279
|
+
if len(rolling_ratios) > 1:
|
|
280
|
+
ratio_trend = "improving" if rolling_ratios[-1] > rolling_ratios[0] else "declining"
|
|
281
|
+
quality_trend = "improving" if rolling_qualities[-1] > rolling_qualities[0] else "declining"
|
|
282
|
+
else:
|
|
283
|
+
ratio_trend = "stable"
|
|
284
|
+
quality_trend = "stable"
|
|
285
|
+
|
|
286
|
+
return {
|
|
287
|
+
"period_hours": hours,
|
|
288
|
+
"total_compressions": len(recent_metrics),
|
|
289
|
+
"ratio_trend": ratio_trend,
|
|
290
|
+
"quality_trend": quality_trend,
|
|
291
|
+
"current_avg_ratio": statistics.mean([m.compression_ratio for m in recent_metrics[-window_size:]]) if recent_metrics else 0,
|
|
292
|
+
"current_avg_quality": statistics.mean([m.quality_score for m in recent_metrics[-window_size:]]) if recent_metrics else 0
|
|
293
|
+
}
|
|
294
|
+
|
|
295
|
+
def get_optimization_suggestions(self) -> List[str]:
|
|
296
|
+
"""
|
|
297
|
+
Generate optimization suggestions based on analytics.
|
|
298
|
+
|
|
299
|
+
Returns:
|
|
300
|
+
List of suggestions
|
|
301
|
+
"""
|
|
302
|
+
suggestions = []
|
|
303
|
+
|
|
304
|
+
# Analyze recent performance
|
|
305
|
+
recent_metrics = [m for m in self.metrics[-100:] if m.success] # Last 100 compressions
|
|
306
|
+
|
|
307
|
+
if recent_metrics:
|
|
308
|
+
avg_ratio = statistics.mean([m.compression_ratio for m in recent_metrics])
|
|
309
|
+
avg_quality = statistics.mean([m.quality_score for m in recent_metrics])
|
|
310
|
+
|
|
311
|
+
# Compression ratio suggestions
|
|
312
|
+
if avg_ratio < 3.0:
|
|
313
|
+
suggestions.append("Consider using more aggressive compression settings (smaller fonts, more columns)")
|
|
314
|
+
elif avg_ratio > 5.0 and avg_quality < 0.90:
|
|
315
|
+
suggestions.append("High compression may be affecting quality - consider balancing settings")
|
|
316
|
+
|
|
317
|
+
# Quality suggestions
|
|
318
|
+
if avg_quality < 0.90:
|
|
319
|
+
suggestions.append("Quality scores are low - increase DPI or font size")
|
|
320
|
+
elif avg_quality > 0.95 and avg_ratio < 4.0:
|
|
321
|
+
suggestions.append("Quality is very high - room for more aggressive compression")
|
|
322
|
+
|
|
323
|
+
# Provider-specific suggestions
|
|
324
|
+
provider_stats = {}
|
|
325
|
+
for provider in set(m.provider for m in recent_metrics):
|
|
326
|
+
stats = self.get_provider_stats(provider)
|
|
327
|
+
if not stats.get("no_data"):
|
|
328
|
+
provider_stats[provider] = stats
|
|
329
|
+
|
|
330
|
+
if provider_stats:
|
|
331
|
+
best_provider = max(provider_stats.items(), key=lambda x: x[1].get("avg_compression_ratio", 0))
|
|
332
|
+
if best_provider[1]["avg_compression_ratio"] > avg_ratio * 1.2:
|
|
333
|
+
suggestions.append(f"Provider '{best_provider[0]}' shows better compression - consider using it more")
|
|
334
|
+
|
|
335
|
+
# Method suggestions
|
|
336
|
+
method_comparison = self.get_method_comparison()
|
|
337
|
+
if "hybrid" in method_comparison and "glyph" in method_comparison:
|
|
338
|
+
if method_comparison["hybrid"]["avg_compression"] > method_comparison["glyph"]["avg_compression"] * 2:
|
|
339
|
+
suggestions.append("Hybrid compression shows significant improvement - use it for large documents")
|
|
340
|
+
|
|
341
|
+
if not suggestions:
|
|
342
|
+
suggestions.append("Performance is optimal - no specific improvements recommended")
|
|
343
|
+
|
|
344
|
+
return suggestions
|
|
345
|
+
|
|
346
|
+
def generate_report(self) -> str:
|
|
347
|
+
"""
|
|
348
|
+
Generate a comprehensive analytics report.
|
|
349
|
+
|
|
350
|
+
Returns:
|
|
351
|
+
Formatted report string
|
|
352
|
+
"""
|
|
353
|
+
report = ["=" * 80]
|
|
354
|
+
report.append("COMPRESSION ANALYTICS REPORT")
|
|
355
|
+
report.append("=" * 80)
|
|
356
|
+
report.append(f"Generated: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
|
|
357
|
+
report.append(f"Total compressions: {len(self.metrics)}")
|
|
358
|
+
|
|
359
|
+
# Overall statistics
|
|
360
|
+
if self.metrics:
|
|
361
|
+
successful = [m for m in self.metrics if m.success]
|
|
362
|
+
if successful:
|
|
363
|
+
report.append(f"\nOVERALL STATISTICS:")
|
|
364
|
+
report.append(f" Success rate: {len(successful)/len(self.metrics):.1%}")
|
|
365
|
+
report.append(f" Avg compression: {statistics.mean([m.compression_ratio for m in successful]):.1f}x")
|
|
366
|
+
report.append(f" Avg quality: {statistics.mean([m.quality_score for m in successful]):.1%}")
|
|
367
|
+
report.append(f" Avg time: {statistics.mean([m.processing_time for m in successful]):.2f}s")
|
|
368
|
+
|
|
369
|
+
# Provider breakdown
|
|
370
|
+
providers = set(m.provider for m in self.metrics)
|
|
371
|
+
if providers:
|
|
372
|
+
report.append(f"\nPROVIDER PERFORMANCE:")
|
|
373
|
+
for provider in sorted(providers):
|
|
374
|
+
stats = self.get_provider_stats(provider)
|
|
375
|
+
if not stats.get("no_data"):
|
|
376
|
+
report.append(f" {provider}:")
|
|
377
|
+
report.append(f" Compressions: {stats['total_compressions']}")
|
|
378
|
+
report.append(f" Avg ratio: {stats['avg_compression_ratio']:.1f}x")
|
|
379
|
+
report.append(f" Avg quality: {stats['avg_quality_score']:.1%}")
|
|
380
|
+
|
|
381
|
+
# Method comparison
|
|
382
|
+
methods = self.get_method_comparison()
|
|
383
|
+
if methods:
|
|
384
|
+
report.append(f"\nMETHOD COMPARISON:")
|
|
385
|
+
for method, stats in methods.items():
|
|
386
|
+
report.append(f" {method}:")
|
|
387
|
+
report.append(f" Avg compression: {stats['avg_compression']:.1f}x")
|
|
388
|
+
report.append(f" Avg quality: {stats['avg_quality']:.1%}")
|
|
389
|
+
report.append(f" Efficiency: {stats['efficiency']:.2f}")
|
|
390
|
+
|
|
391
|
+
# Trends
|
|
392
|
+
trends = self.get_trends(24)
|
|
393
|
+
if not trends.get("no_recent_data"):
|
|
394
|
+
report.append(f"\nRECENT TRENDS (24h):")
|
|
395
|
+
report.append(f" Compression trend: {trends['ratio_trend']}")
|
|
396
|
+
report.append(f" Quality trend: {trends['quality_trend']}")
|
|
397
|
+
report.append(f" Current avg ratio: {trends['current_avg_ratio']:.1f}x")
|
|
398
|
+
|
|
399
|
+
# Suggestions
|
|
400
|
+
suggestions = self.get_optimization_suggestions()
|
|
401
|
+
if suggestions:
|
|
402
|
+
report.append(f"\nOPTIMIZATION SUGGESTIONS:")
|
|
403
|
+
for i, suggestion in enumerate(suggestions, 1):
|
|
404
|
+
report.append(f" {i}. {suggestion}")
|
|
405
|
+
|
|
406
|
+
report.append("=" * 80)
|
|
407
|
+
|
|
408
|
+
return "\n".join(report)
|
|
409
|
+
|
|
410
|
+
|
|
411
|
+
# Global analytics instance
|
|
412
|
+
_analytics_instance = None
|
|
413
|
+
|
|
414
|
+
|
|
415
|
+
def get_analytics() -> CompressionAnalytics:
|
|
416
|
+
"""Get global analytics instance."""
|
|
417
|
+
global _analytics_instance
|
|
418
|
+
if _analytics_instance is None:
|
|
419
|
+
_analytics_instance = CompressionAnalytics()
|
|
420
|
+
return _analytics_instance
|
|
@@ -0,0 +1,250 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Compression cache system for Glyph.
|
|
3
|
+
"""
|
|
4
|
+
|
|
5
|
+
import os
|
|
6
|
+
import json
|
|
7
|
+
import time
|
|
8
|
+
import hashlib
|
|
9
|
+
import shutil
|
|
10
|
+
from pathlib import Path
|
|
11
|
+
from typing import List, Optional, Dict, Any
|
|
12
|
+
from dataclasses import asdict
|
|
13
|
+
|
|
14
|
+
from .exceptions import CompressionCacheError
|
|
15
|
+
from ..utils.structured_logging import get_logger
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
class CompressionCache:
|
|
19
|
+
"""Cache system for compressed content."""
|
|
20
|
+
|
|
21
|
+
def __init__(self, cache_dir: str = None, max_size_gb: float = 1.0, ttl_days: int = 7):
|
|
22
|
+
"""
|
|
23
|
+
Initialize compression cache.
|
|
24
|
+
|
|
25
|
+
Args:
|
|
26
|
+
cache_dir: Cache directory path
|
|
27
|
+
max_size_gb: Maximum cache size in GB
|
|
28
|
+
ttl_days: Time-to-live in days
|
|
29
|
+
"""
|
|
30
|
+
self.logger = get_logger(self.__class__.__name__)
|
|
31
|
+
|
|
32
|
+
if cache_dir is None:
|
|
33
|
+
cache_dir = str(Path.home() / ".abstractcore" / "glyph_cache")
|
|
34
|
+
|
|
35
|
+
self.cache_dir = Path(cache_dir)
|
|
36
|
+
self.max_size_bytes = int(max_size_gb * 1024 * 1024 * 1024)
|
|
37
|
+
self.ttl_seconds = ttl_days * 24 * 3600
|
|
38
|
+
|
|
39
|
+
# Create cache directory (thread-safe for parallel executions)
|
|
40
|
+
try:
|
|
41
|
+
self.cache_dir.mkdir(parents=True, exist_ok=True)
|
|
42
|
+
self.logger.debug(f"Cache directory created/verified: {self.cache_dir}")
|
|
43
|
+
except Exception as e:
|
|
44
|
+
self.logger.error(f"Failed to create cache directory {self.cache_dir}: {e}")
|
|
45
|
+
raise CompressionCacheError(f"Cannot create cache directory: {e}")
|
|
46
|
+
|
|
47
|
+
# Initialize metadata
|
|
48
|
+
self.metadata_file = self.cache_dir / "cache_metadata.json"
|
|
49
|
+
self.metadata = self._load_metadata()
|
|
50
|
+
|
|
51
|
+
self.logger.debug(f"Initialized compression cache at {self.cache_dir}")
|
|
52
|
+
|
|
53
|
+
def _load_metadata(self) -> Dict[str, Any]:
|
|
54
|
+
"""Load cache metadata."""
|
|
55
|
+
if self.metadata_file.exists():
|
|
56
|
+
try:
|
|
57
|
+
with open(self.metadata_file, 'r') as f:
|
|
58
|
+
return json.load(f)
|
|
59
|
+
except Exception as e:
|
|
60
|
+
self.logger.warning(f"Failed to load cache metadata: {e}")
|
|
61
|
+
|
|
62
|
+
return {
|
|
63
|
+
'entries': {},
|
|
64
|
+
'created': time.time(),
|
|
65
|
+
'last_cleanup': time.time()
|
|
66
|
+
}
|
|
67
|
+
|
|
68
|
+
def _save_metadata(self):
|
|
69
|
+
"""Save cache metadata."""
|
|
70
|
+
try:
|
|
71
|
+
with open(self.metadata_file, 'w') as f:
|
|
72
|
+
json.dump(self.metadata, f, indent=2)
|
|
73
|
+
except Exception as e:
|
|
74
|
+
self.logger.error(f"Failed to save cache metadata: {e}")
|
|
75
|
+
|
|
76
|
+
def _generate_cache_key(self, content: str, config: Dict[str, Any]) -> str:
|
|
77
|
+
"""Generate cache key from content and configuration."""
|
|
78
|
+
# Create a hash of content + configuration
|
|
79
|
+
content_hash = hashlib.sha256(content.encode('utf-8')).hexdigest()[:16]
|
|
80
|
+
config_str = json.dumps(config, sort_keys=True)
|
|
81
|
+
config_hash = hashlib.sha256(config_str.encode('utf-8')).hexdigest()[:8]
|
|
82
|
+
|
|
83
|
+
return f"{content_hash}_{config_hash}"
|
|
84
|
+
|
|
85
|
+
def get(self, cache_key: str) -> Optional[List[Path]]:
|
|
86
|
+
"""
|
|
87
|
+
Get cached compression result.
|
|
88
|
+
|
|
89
|
+
Args:
|
|
90
|
+
cache_key: Cache key
|
|
91
|
+
|
|
92
|
+
Returns:
|
|
93
|
+
List of image paths if cached, None otherwise
|
|
94
|
+
"""
|
|
95
|
+
if cache_key not in self.metadata['entries']:
|
|
96
|
+
return None
|
|
97
|
+
|
|
98
|
+
entry = self.metadata['entries'][cache_key]
|
|
99
|
+
|
|
100
|
+
# Check TTL
|
|
101
|
+
if time.time() - entry['created'] > self.ttl_seconds:
|
|
102
|
+
self._remove_entry(cache_key)
|
|
103
|
+
return None
|
|
104
|
+
|
|
105
|
+
# Check if files still exist
|
|
106
|
+
image_paths = [Path(path) for path in entry['image_paths']]
|
|
107
|
+
if not all(path.exists() for path in image_paths):
|
|
108
|
+
self._remove_entry(cache_key)
|
|
109
|
+
return None
|
|
110
|
+
|
|
111
|
+
# Update access time
|
|
112
|
+
entry['last_accessed'] = time.time()
|
|
113
|
+
self._save_metadata()
|
|
114
|
+
|
|
115
|
+
self.logger.debug(f"Cache hit for key {cache_key}")
|
|
116
|
+
return image_paths
|
|
117
|
+
|
|
118
|
+
def set(self, cache_key: str, image_paths: List[Path],
|
|
119
|
+
compression_stats: Dict[str, Any] = None):
|
|
120
|
+
"""
|
|
121
|
+
Store compression result in cache.
|
|
122
|
+
|
|
123
|
+
Args:
|
|
124
|
+
cache_key: Cache key
|
|
125
|
+
image_paths: List of rendered image paths
|
|
126
|
+
compression_stats: Optional compression statistics
|
|
127
|
+
"""
|
|
128
|
+
try:
|
|
129
|
+
# Create cache entry directory
|
|
130
|
+
entry_dir = self.cache_dir / cache_key
|
|
131
|
+
entry_dir.mkdir(exist_ok=True)
|
|
132
|
+
|
|
133
|
+
# Copy images to cache
|
|
134
|
+
cached_paths = []
|
|
135
|
+
for i, image_path in enumerate(image_paths):
|
|
136
|
+
if image_path.exists():
|
|
137
|
+
cached_path = entry_dir / f"image_{i}{image_path.suffix}"
|
|
138
|
+
shutil.copy2(image_path, cached_path)
|
|
139
|
+
cached_paths.append(str(cached_path))
|
|
140
|
+
|
|
141
|
+
# Store metadata
|
|
142
|
+
entry = {
|
|
143
|
+
'created': time.time(),
|
|
144
|
+
'last_accessed': time.time(),
|
|
145
|
+
'image_paths': cached_paths,
|
|
146
|
+
'compression_stats': compression_stats or {},
|
|
147
|
+
'size_bytes': sum(Path(path).stat().st_size for path in cached_paths)
|
|
148
|
+
}
|
|
149
|
+
|
|
150
|
+
self.metadata['entries'][cache_key] = entry
|
|
151
|
+
self._save_metadata()
|
|
152
|
+
|
|
153
|
+
# Cleanup if needed
|
|
154
|
+
self._cleanup_if_needed()
|
|
155
|
+
|
|
156
|
+
self.logger.debug(f"Cached compression result for key {cache_key}")
|
|
157
|
+
|
|
158
|
+
except Exception as e:
|
|
159
|
+
self.logger.error(f"Failed to cache compression result: {e}")
|
|
160
|
+
raise CompressionCacheError(f"Cache storage failed: {e}")
|
|
161
|
+
|
|
162
|
+
def _remove_entry(self, cache_key: str):
|
|
163
|
+
"""Remove cache entry."""
|
|
164
|
+
if cache_key in self.metadata['entries']:
|
|
165
|
+
# Remove files
|
|
166
|
+
entry_dir = self.cache_dir / cache_key
|
|
167
|
+
if entry_dir.exists():
|
|
168
|
+
shutil.rmtree(entry_dir, ignore_errors=True)
|
|
169
|
+
|
|
170
|
+
# Remove metadata
|
|
171
|
+
del self.metadata['entries'][cache_key]
|
|
172
|
+
self._save_metadata()
|
|
173
|
+
|
|
174
|
+
def _cleanup_if_needed(self):
|
|
175
|
+
"""Cleanup cache if size or TTL limits exceeded."""
|
|
176
|
+
now = time.time()
|
|
177
|
+
|
|
178
|
+
# Skip if cleaned up recently
|
|
179
|
+
if now - self.metadata.get('last_cleanup', 0) < 3600: # 1 hour
|
|
180
|
+
return
|
|
181
|
+
|
|
182
|
+
self.logger.debug("Starting cache cleanup")
|
|
183
|
+
|
|
184
|
+
# Remove expired entries
|
|
185
|
+
expired_keys = []
|
|
186
|
+
for key, entry in self.metadata['entries'].items():
|
|
187
|
+
if now - entry['created'] > self.ttl_seconds:
|
|
188
|
+
expired_keys.append(key)
|
|
189
|
+
|
|
190
|
+
for key in expired_keys:
|
|
191
|
+
self._remove_entry(key)
|
|
192
|
+
|
|
193
|
+
# Check size limit
|
|
194
|
+
total_size = sum(entry.get('size_bytes', 0) for entry in self.metadata['entries'].values())
|
|
195
|
+
|
|
196
|
+
if total_size > self.max_size_bytes:
|
|
197
|
+
# Remove least recently accessed entries
|
|
198
|
+
entries_by_access = sorted(
|
|
199
|
+
self.metadata['entries'].items(),
|
|
200
|
+
key=lambda x: x[1].get('last_accessed', 0)
|
|
201
|
+
)
|
|
202
|
+
|
|
203
|
+
while total_size > self.max_size_bytes and entries_by_access:
|
|
204
|
+
key, entry = entries_by_access.pop(0)
|
|
205
|
+
total_size -= entry.get('size_bytes', 0)
|
|
206
|
+
self._remove_entry(key)
|
|
207
|
+
|
|
208
|
+
self.metadata['last_cleanup'] = now
|
|
209
|
+
self._save_metadata()
|
|
210
|
+
|
|
211
|
+
self.logger.debug(f"Cache cleanup completed. Entries: {len(self.metadata['entries'])}")
|
|
212
|
+
|
|
213
|
+
def clear(self):
|
|
214
|
+
"""Clear all cache entries."""
|
|
215
|
+
try:
|
|
216
|
+
# Remove all entry directories
|
|
217
|
+
for entry_dir in self.cache_dir.iterdir():
|
|
218
|
+
if entry_dir.is_dir() and entry_dir.name != "cache_metadata.json":
|
|
219
|
+
shutil.rmtree(entry_dir, ignore_errors=True)
|
|
220
|
+
|
|
221
|
+
# Reset metadata
|
|
222
|
+
self.metadata = {
|
|
223
|
+
'entries': {},
|
|
224
|
+
'created': time.time(),
|
|
225
|
+
'last_cleanup': time.time()
|
|
226
|
+
}
|
|
227
|
+
self._save_metadata()
|
|
228
|
+
|
|
229
|
+
self.logger.info("Cache cleared")
|
|
230
|
+
|
|
231
|
+
except Exception as e:
|
|
232
|
+
self.logger.error(f"Failed to clear cache: {e}")
|
|
233
|
+
raise CompressionCacheError(f"Cache clear failed: {e}")
|
|
234
|
+
|
|
235
|
+
def get_stats(self) -> Dict[str, Any]:
|
|
236
|
+
"""Get cache statistics."""
|
|
237
|
+
entries = self.metadata['entries']
|
|
238
|
+
total_size = sum(entry.get('size_bytes', 0) for entry in entries.values())
|
|
239
|
+
|
|
240
|
+
return {
|
|
241
|
+
'total_entries': len(entries),
|
|
242
|
+
'total_size_mb': total_size / (1024 * 1024),
|
|
243
|
+
'max_size_mb': self.max_size_bytes / (1024 * 1024),
|
|
244
|
+
'utilization': total_size / self.max_size_bytes if self.max_size_bytes > 0 else 0,
|
|
245
|
+
'cache_dir': str(self.cache_dir),
|
|
246
|
+
'ttl_days': self.ttl_seconds / (24 * 3600),
|
|
247
|
+
'oldest_entry': min((entry['created'] for entry in entries.values()), default=time.time()),
|
|
248
|
+
'newest_entry': max((entry['created'] for entry in entries.values()), default=time.time())
|
|
249
|
+
}
|
|
250
|
+
|