agmem 0.2.0__py3-none-any.whl → 0.3.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,427 @@
1
+ """
2
+ Compliance Dashboard - Privacy, Encryption, and Audit verification.
3
+
4
+ This module provides compliance monitoring capabilities:
5
+ - Privacy budget tracking (ε/δ for differential privacy)
6
+ - Encryption status verification
7
+ - Tamper detection via Merkle tree verification
8
+ - Audit trail analysis
9
+ """
10
+
11
+ import hashlib
12
+ import json
13
+ import os
14
+ from dataclasses import dataclass, field
15
+ from datetime import datetime, timezone
16
+ from pathlib import Path
17
+ from typing import Any, Dict, List, Optional, Tuple
18
+
19
+
20
+ @dataclass
21
+ class PrivacyBudget:
22
+ """Tracks differential privacy budget consumption."""
23
+
24
+ epsilon: float # Privacy loss parameter
25
+ delta: float # Failure probability
26
+ queries_made: int = 0
27
+ budget_consumed: float = 0.0
28
+ budget_limit: float = 1.0
29
+ last_query: Optional[str] = None
30
+
31
+ def consume(self, epsilon_cost: float) -> bool:
32
+ """Consume privacy budget. Returns True if within limit."""
33
+ if self.budget_consumed + epsilon_cost > self.budget_limit:
34
+ return False
35
+ self.budget_consumed += epsilon_cost
36
+ self.queries_made += 1
37
+ self.last_query = datetime.now(timezone.utc).isoformat()
38
+ return True
39
+
40
+ def remaining(self) -> float:
41
+ """Get remaining privacy budget."""
42
+ return max(0, self.budget_limit - self.budget_consumed)
43
+
44
+ def is_exhausted(self) -> bool:
45
+ """Check if budget is exhausted."""
46
+ return self.budget_consumed >= self.budget_limit
47
+
48
+ def to_dict(self) -> Dict[str, Any]:
49
+ return {
50
+ "epsilon": self.epsilon,
51
+ "delta": self.delta,
52
+ "queries_made": self.queries_made,
53
+ "budget_consumed": self.budget_consumed,
54
+ "budget_limit": self.budget_limit,
55
+ "remaining": self.remaining(),
56
+ "exhausted": self.is_exhausted(),
57
+ "last_query": self.last_query,
58
+ }
59
+
60
+
61
+ class PrivacyManager:
62
+ """Manages privacy budgets for different data sources."""
63
+
64
+ def __init__(self, mem_dir: Path):
65
+ self.mem_dir = Path(mem_dir)
66
+ self.privacy_file = self.mem_dir / "privacy.json"
67
+ self._budgets: Dict[str, PrivacyBudget] = {}
68
+ self._load()
69
+
70
+ def _load(self) -> None:
71
+ """Load privacy budgets from disk."""
72
+ if self.privacy_file.exists():
73
+ try:
74
+ data = json.loads(self.privacy_file.read_text())
75
+ for name, budget_data in data.get("budgets", {}).items():
76
+ self._budgets[name] = PrivacyBudget(
77
+ epsilon=budget_data["epsilon"],
78
+ delta=budget_data["delta"],
79
+ queries_made=budget_data.get("queries_made", 0),
80
+ budget_consumed=budget_data.get("budget_consumed", 0.0),
81
+ budget_limit=budget_data.get("budget_limit", 1.0),
82
+ last_query=budget_data.get("last_query"),
83
+ )
84
+ except Exception:
85
+ pass
86
+
87
+ def _save(self) -> None:
88
+ """Save privacy budgets to disk."""
89
+ self.mem_dir.mkdir(parents=True, exist_ok=True)
90
+ data = {"budgets": {name: b.to_dict() for name, b in self._budgets.items()}}
91
+ self.privacy_file.write_text(json.dumps(data, indent=2))
92
+
93
+ def create_budget(
94
+ self, name: str, epsilon: float = 0.1, delta: float = 1e-5, limit: float = 1.0
95
+ ) -> PrivacyBudget:
96
+ """Create a new privacy budget."""
97
+ budget = PrivacyBudget(epsilon=epsilon, delta=delta, budget_limit=limit)
98
+ self._budgets[name] = budget
99
+ self._save()
100
+ return budget
101
+
102
+ def consume(self, name: str, epsilon_cost: float) -> Tuple[bool, Optional[PrivacyBudget]]:
103
+ """Consume budget for a data source. Returns (success, budget)."""
104
+ budget = self._budgets.get(name)
105
+ if not budget:
106
+ return False, None
107
+ success = budget.consume(epsilon_cost)
108
+ self._save()
109
+ return success, budget
110
+
111
+ def get_budget(self, name: str) -> Optional[PrivacyBudget]:
112
+ """Get a privacy budget by name."""
113
+ return self._budgets.get(name)
114
+
115
+ def get_all_budgets(self) -> Dict[str, PrivacyBudget]:
116
+ """Get all privacy budgets."""
117
+ return self._budgets.copy()
118
+
119
+ def get_dashboard_data(self) -> Dict[str, Any]:
120
+ """Get data for privacy dashboard."""
121
+ return {
122
+ "budgets": [
123
+ {"name": name, **budget.to_dict()} for name, budget in self._budgets.items()
124
+ ],
125
+ "total_queries": sum(b.queries_made for b in self._budgets.values()),
126
+ "total_consumed": sum(b.budget_consumed for b in self._budgets.values()),
127
+ }
128
+
129
+
130
+ @dataclass
131
+ class EncryptionStatus:
132
+ """Status of an encrypted file."""
133
+
134
+ path: str
135
+ is_encrypted: bool
136
+ algorithm: Optional[str] = None
137
+ key_id: Optional[str] = None
138
+ encrypted_at: Optional[str] = None
139
+ can_decrypt: bool = False
140
+
141
+
142
+ class EncryptionVerifier:
143
+ """Verifies encryption status of memory files."""
144
+
145
+ ENCRYPTION_MARKERS = [b"-----BEGIN ENCRYPTED", b"$ENCRYPTED$", b"\x00AGMEM-ENC"]
146
+
147
+ def __init__(self, mem_dir: Path, current_dir: Path):
148
+ self.mem_dir = Path(mem_dir)
149
+ self.current_dir = Path(current_dir)
150
+ self.key_file = self.mem_dir / "encryption_keys.json"
151
+
152
+ def check_file(self, filepath: Path) -> EncryptionStatus:
153
+ """Check encryption status of a file."""
154
+ if not filepath.exists():
155
+ return EncryptionStatus(path=str(filepath), is_encrypted=False)
156
+
157
+ try:
158
+ content = filepath.read_bytes()[:100]
159
+ is_encrypted = any(marker in content for marker in self.ENCRYPTION_MARKERS)
160
+
161
+ if is_encrypted:
162
+ algorithm = self._detect_algorithm(content)
163
+ return EncryptionStatus(
164
+ path=str(filepath),
165
+ is_encrypted=True,
166
+ algorithm=algorithm,
167
+ can_decrypt=self._can_decrypt(filepath),
168
+ )
169
+ else:
170
+ return EncryptionStatus(
171
+ path=str(filepath),
172
+ is_encrypted=False,
173
+ )
174
+ except Exception:
175
+ return EncryptionStatus(path=str(filepath), is_encrypted=False)
176
+
177
+ def _detect_algorithm(self, content: bytes) -> str:
178
+ """Detect encryption algorithm from header."""
179
+ if b"AES-256" in content:
180
+ return "AES-256-GCM"
181
+ elif b"CHACHA20" in content:
182
+ return "ChaCha20-Poly1305"
183
+ elif b"FERNET" in content:
184
+ return "Fernet"
185
+ return "Unknown"
186
+
187
+ def _can_decrypt(self, filepath: Path) -> bool:
188
+ """Check if we have the key to decrypt."""
189
+ if not self.key_file.exists():
190
+ return False
191
+ # Simplified check - just verify key file exists
192
+ return True
193
+
194
+ def scan_directory(self) -> Dict[str, Any]:
195
+ """Scan current directory for encryption status."""
196
+ results = {"encrypted": [], "unencrypted": [], "errors": []}
197
+
198
+ for filepath in self.current_dir.rglob("*"):
199
+ if filepath.is_file():
200
+ try:
201
+ status = self.check_file(filepath)
202
+ if status.is_encrypted:
203
+ results["encrypted"].append(status)
204
+ else:
205
+ results["unencrypted"].append(status)
206
+ except Exception as e:
207
+ results["errors"].append({"path": str(filepath), "error": str(e)})
208
+
209
+ return {
210
+ "total": len(results["encrypted"]) + len(results["unencrypted"]),
211
+ "encrypted_count": len(results["encrypted"]),
212
+ "unencrypted_count": len(results["unencrypted"]),
213
+ "error_count": len(results["errors"]),
214
+ "encrypted_files": [e.path for e in results["encrypted"]],
215
+ "encryption_coverage": len(results["encrypted"])
216
+ / max(1, len(results["encrypted"]) + len(results["unencrypted"]))
217
+ * 100,
218
+ }
219
+
220
+
221
+ class TamperDetector:
222
+ """Detects tampering via Merkle tree verification."""
223
+
224
+ def __init__(self, mem_dir: Path):
225
+ self.mem_dir = Path(mem_dir)
226
+ self.merkle_file = self.mem_dir / "merkle_root.json"
227
+
228
+ def compute_file_hash(self, filepath: Path) -> str:
229
+ """Compute SHA-256 hash of a file."""
230
+ hasher = hashlib.sha256()
231
+ try:
232
+ with open(filepath, "rb") as f:
233
+ for chunk in iter(lambda: f.read(8192), b""):
234
+ hasher.update(chunk)
235
+ return hasher.hexdigest()
236
+ except Exception:
237
+ return ""
238
+
239
+ def compute_merkle_root(self, file_hashes: List[str]) -> str:
240
+ """Compute Merkle root from file hashes."""
241
+ if not file_hashes:
242
+ return hashlib.sha256(b"").hexdigest()
243
+
244
+ # Pad to power of 2
245
+ while len(file_hashes) & (len(file_hashes) - 1):
246
+ file_hashes.append(file_hashes[-1])
247
+
248
+ # Build tree
249
+ level = file_hashes
250
+ while len(level) > 1:
251
+ next_level = []
252
+ for i in range(0, len(level), 2):
253
+ combined = level[i] + level[i + 1]
254
+ next_level.append(hashlib.sha256(combined.encode()).hexdigest())
255
+ level = next_level
256
+
257
+ return level[0]
258
+
259
+ def store_merkle_state(self, directory: Path) -> Dict[str, Any]:
260
+ """Store current Merkle state for later verification."""
261
+ file_hashes = []
262
+ file_paths = []
263
+
264
+ for filepath in sorted(directory.rglob("*")):
265
+ if filepath.is_file():
266
+ file_hash = self.compute_file_hash(filepath)
267
+ if file_hash:
268
+ file_hashes.append(file_hash)
269
+ file_paths.append(str(filepath.relative_to(directory)))
270
+
271
+ merkle_root = self.compute_merkle_root(file_hashes)
272
+
273
+ state = {
274
+ "merkle_root": merkle_root,
275
+ "file_count": len(file_hashes),
276
+ "computed_at": datetime.now(timezone.utc).isoformat(),
277
+ "file_hashes": dict(zip(file_paths, file_hashes)),
278
+ }
279
+
280
+ self.mem_dir.mkdir(parents=True, exist_ok=True)
281
+ self.merkle_file.write_text(json.dumps(state, indent=2))
282
+
283
+ return state
284
+
285
+ def verify_integrity(self, directory: Path) -> Dict[str, Any]:
286
+ """Verify current state against stored Merkle root."""
287
+ if not self.merkle_file.exists():
288
+ return {"verified": False, "error": "No stored Merkle state found"}
289
+
290
+ stored = json.loads(self.merkle_file.read_text())
291
+ stored_hashes = stored.get("file_hashes", {})
292
+
293
+ current_hashes = {}
294
+ for filepath in sorted(directory.rglob("*")):
295
+ if filepath.is_file():
296
+ rel_path = str(filepath.relative_to(directory))
297
+ current_hashes[rel_path] = self.compute_file_hash(filepath)
298
+
299
+ # Compare
300
+ modified = []
301
+ added = []
302
+ deleted = []
303
+
304
+ for path, hash_value in current_hashes.items():
305
+ if path not in stored_hashes:
306
+ added.append(path)
307
+ elif stored_hashes[path] != hash_value:
308
+ modified.append(path)
309
+
310
+ for path in stored_hashes:
311
+ if path not in current_hashes:
312
+ deleted.append(path)
313
+
314
+ current_root = self.compute_merkle_root(list(current_hashes.values()))
315
+
316
+ return {
317
+ "verified": len(modified) == 0 and len(added) == 0 and len(deleted) == 0,
318
+ "stored_root": stored.get("merkle_root"),
319
+ "current_root": current_root,
320
+ "roots_match": stored.get("merkle_root") == current_root,
321
+ "modified_files": modified,
322
+ "added_files": added,
323
+ "deleted_files": deleted,
324
+ "stored_at": stored.get("computed_at"),
325
+ }
326
+
327
+
328
+ class AuditAnalyzer:
329
+ """Analyzes audit trail for compliance."""
330
+
331
+ def __init__(self, mem_dir: Path):
332
+ self.mem_dir = Path(mem_dir)
333
+ self.audit_file = self.mem_dir / "audit.log"
334
+
335
+ def load_audit_entries(self) -> List[Dict[str, Any]]:
336
+ """Load audit log entries."""
337
+ if not self.audit_file.exists():
338
+ return []
339
+
340
+ entries = []
341
+ try:
342
+ for line in self.audit_file.read_text().strip().split("\n"):
343
+ if line:
344
+ try:
345
+ entries.append(json.loads(line))
346
+ except Exception:
347
+ pass
348
+ except Exception:
349
+ pass
350
+
351
+ return entries
352
+
353
+ def verify_chain(self) -> Dict[str, Any]:
354
+ """Verify audit chain integrity."""
355
+ entries = self.load_audit_entries()
356
+ if not entries:
357
+ return {"valid": True, "entries": 0, "message": "No audit entries"}
358
+
359
+ valid = True
360
+ errors = []
361
+ prev_hash = None
362
+
363
+ for i, entry in enumerate(entries):
364
+ # Verify hash chain
365
+ entry_hash = entry.get("hash")
366
+ entry_prev = entry.get("prev_hash")
367
+
368
+ if i > 0 and entry_prev != prev_hash:
369
+ valid = False
370
+ errors.append(f"Chain break at entry {i}")
371
+
372
+ prev_hash = entry_hash
373
+
374
+ return {
375
+ "valid": valid,
376
+ "entries": len(entries),
377
+ "errors": errors,
378
+ "first_entry": entries[0].get("timestamp") if entries else None,
379
+ "last_entry": entries[-1].get("timestamp") if entries else None,
380
+ }
381
+
382
+ def get_statistics(self) -> Dict[str, Any]:
383
+ """Get audit statistics."""
384
+ entries = self.load_audit_entries()
385
+
386
+ operations = {}
387
+ agents = {}
388
+ by_day = {}
389
+
390
+ for entry in entries:
391
+ op = entry.get("operation", "unknown")
392
+ operations[op] = operations.get(op, 0) + 1
393
+
394
+ agent = entry.get("agent", "unknown")
395
+ agents[agent] = agents.get(agent, 0) + 1
396
+
397
+ ts = entry.get("timestamp", "")[:10]
398
+ if ts:
399
+ by_day[ts] = by_day.get(ts, 0) + 1
400
+
401
+ return {
402
+ "total_entries": len(entries),
403
+ "operations": operations,
404
+ "agents": agents,
405
+ "by_day": by_day,
406
+ }
407
+
408
+
409
+ # --- Dashboard Helper ---
410
+
411
+
412
+ def get_compliance_dashboard(mem_dir: Path, current_dir: Path) -> Dict[str, Any]:
413
+ """Get data for compliance dashboard."""
414
+ privacy_mgr = PrivacyManager(mem_dir)
415
+ encryption_verifier = EncryptionVerifier(mem_dir, current_dir)
416
+ tamper_detector = TamperDetector(mem_dir)
417
+ audit_analyzer = AuditAnalyzer(mem_dir)
418
+
419
+ return {
420
+ "privacy": privacy_mgr.get_dashboard_data(),
421
+ "encryption": encryption_verifier.scan_directory(),
422
+ "integrity": tamper_detector.verify_integrity(current_dir),
423
+ "audit": {
424
+ "chain_valid": audit_analyzer.verify_chain(),
425
+ "statistics": audit_analyzer.get_statistics(),
426
+ },
427
+ }
@@ -0,0 +1,248 @@
1
+ """
2
+ Delta compression metrics and observability.
3
+
4
+ Tracks compression effectiveness across object types to enable future
5
+ optimization and auto-tuning of delta encoding parameters.
6
+
7
+ Provides:
8
+ - DeltaCompressionMetrics: Tracks compression ratio, object types, benefits
9
+ - CompressionHeatmap: Visualizes which types compress best
10
+ - Statistics reporting for gc --repack operations
11
+ """
12
+
13
+ from dataclasses import dataclass, field
14
+ from typing import Dict, List, Any, Optional, Tuple
15
+ from collections import defaultdict
16
+
17
+
18
+ @dataclass
19
+ class ObjectCompressionStats:
20
+ """Statistics for a single object's compression."""
21
+
22
+ object_id: str
23
+ object_type: str # "semantic", "episodic", "procedural"
24
+ original_size: int # bytes
25
+ compressed_size: int # bytes after delta encoding
26
+ compression_ratio: float # compressed_size / original_size (0.0 = 100% compression)
27
+ delta_used: bool # Whether delta encoding was applied
28
+ compression_benefit: float # original_size - compressed_size
29
+
30
+
31
+ @dataclass
32
+ class TypeCompressionStats:
33
+ """Aggregated statistics for an object type."""
34
+
35
+ object_type: str
36
+ count: int = 0
37
+ total_original_size: int = 0
38
+ total_compressed_size: int = 0
39
+ avg_compression_ratio: float = 0.0
40
+ total_benefit: int = 0 # Total bytes saved
41
+ objects_with_delta: int = 0 # How many used delta encoding
42
+ min_ratio: float = 1.0
43
+ max_ratio: float = 0.0
44
+
45
+ def update_from_object(self, obj_stats: ObjectCompressionStats) -> None:
46
+ """Update type stats with a single object's stats."""
47
+ self.count += 1
48
+ self.total_original_size += obj_stats.original_size
49
+ self.total_compressed_size += obj_stats.compressed_size
50
+ self.total_benefit += int(obj_stats.compression_benefit)
51
+ if obj_stats.delta_used:
52
+ self.objects_with_delta += 1
53
+ self.min_ratio = min(self.min_ratio, obj_stats.compression_ratio)
54
+ self.max_ratio = max(self.max_ratio, obj_stats.compression_ratio)
55
+
56
+ # Recalculate average
57
+ if self.total_original_size > 0:
58
+ self.avg_compression_ratio = self.total_compressed_size / self.total_original_size
59
+
60
+ def to_dict(self) -> Dict[str, Any]:
61
+ """Convert to dict for reporting."""
62
+ savings_pct = 0.0
63
+ if self.total_original_size > 0:
64
+ savings_pct = (self.total_benefit / self.total_original_size) * 100
65
+
66
+ return {
67
+ "object_type": self.object_type,
68
+ "count": self.count,
69
+ "total_original_bytes": self.total_original_size,
70
+ "total_compressed_bytes": self.total_compressed_size,
71
+ "avg_compression_ratio": round(self.avg_compression_ratio, 3),
72
+ "compression_range": f"{self.min_ratio:.1%} - {self.max_ratio:.1%}",
73
+ "total_bytes_saved": self.total_benefit,
74
+ "savings_percentage": round(savings_pct, 1),
75
+ "objects_using_delta": self.objects_with_delta,
76
+ "delta_adoption_rate": (
77
+ round((self.objects_with_delta / self.count * 100), 1) if self.count > 0 else 0
78
+ ),
79
+ }
80
+
81
+
82
+ class DeltaCompressionMetrics:
83
+ """Tracks delta compression statistics across all objects.
84
+
85
+ Usage:
86
+ metrics = DeltaCompressionMetrics()
87
+ # ... during packing ...
88
+ metrics.record_object(ObjectCompressionStats(...))
89
+ # ... after packing ...
90
+ report = metrics.get_report()
91
+ """
92
+
93
+ def __init__(self):
94
+ self.objects: List[ObjectCompressionStats] = []
95
+ self.type_stats: Dict[str, TypeCompressionStats] = {}
96
+ self.total_original_size: int = 0
97
+ self.total_compressed_size: int = 0
98
+
99
+ def record_object(self, obj_stats: ObjectCompressionStats) -> None:
100
+ """Record compression stats for a single object."""
101
+ self.objects.append(obj_stats)
102
+ self.total_original_size += obj_stats.original_size
103
+ self.total_compressed_size += obj_stats.compressed_size
104
+
105
+ # Update type-specific stats
106
+ if obj_stats.object_type not in self.type_stats:
107
+ self.type_stats[obj_stats.object_type] = TypeCompressionStats(
108
+ object_type=obj_stats.object_type
109
+ )
110
+ self.type_stats[obj_stats.object_type].update_from_object(obj_stats)
111
+
112
+ def get_type_stats(self, object_type: str) -> Optional[TypeCompressionStats]:
113
+ """Get stats for a specific object type."""
114
+ return self.type_stats.get(object_type)
115
+
116
+ def get_overall_ratio(self) -> float:
117
+ """Get overall compression ratio across all objects."""
118
+ if self.total_original_size == 0:
119
+ return 0.0
120
+ return self.total_compressed_size / self.total_original_size
121
+
122
+ def get_overall_savings(self) -> int:
123
+ """Get total bytes saved across all objects."""
124
+ return self.total_original_size - self.total_compressed_size
125
+
126
+ def get_report(self) -> Dict[str, Any]:
127
+ """Generate a comprehensive compression report."""
128
+ overall_ratio = self.get_overall_ratio()
129
+ overall_savings = self.get_overall_savings()
130
+ savings_pct = (
131
+ (overall_savings / self.total_original_size * 100)
132
+ if self.total_original_size > 0
133
+ else 0
134
+ )
135
+
136
+ return {
137
+ "timestamp": None, # Set by caller if needed
138
+ "total_objects": len(self.objects),
139
+ "total_original_bytes": self.total_original_size,
140
+ "total_compressed_bytes": self.total_compressed_size,
141
+ "overall_compression_ratio": round(overall_ratio, 3),
142
+ "total_bytes_saved": overall_savings,
143
+ "compression_percentage": round(savings_pct, 1),
144
+ "type_statistics": {otype: stats.to_dict() for otype, stats in self.type_stats.items()},
145
+ "recommendations": self._generate_recommendations(),
146
+ }
147
+
148
+ def _generate_recommendations(self) -> List[str]:
149
+ """Generate optimization recommendations based on compression stats."""
150
+ recommendations = []
151
+
152
+ # Check if delta encoding is worth it
153
+ objects_with_delta = sum(s.objects_with_delta for s in self.type_stats.values())
154
+ if objects_with_delta == 0:
155
+ recommendations.append("No objects used delta encoding. Check similarity thresholds.")
156
+
157
+ # Check for types with poor compression
158
+ for otype, stats in self.type_stats.items():
159
+ if stats.count > 0 and stats.avg_compression_ratio > 0.9:
160
+ recommendations.append(
161
+ f"Type '{otype}' compresses poorly (ratio: {stats.avg_compression_ratio:.1%}). "
162
+ f"Consider increasing similarity threshold or reducing delta cost."
163
+ )
164
+
165
+ # Check for types with excellent compression
166
+ for otype, stats in self.type_stats.items():
167
+ if stats.count > 0 and stats.avg_compression_ratio < 0.5:
168
+ recommendations.append(
169
+ f"Type '{otype}' compresses very well (ratio: {stats.avg_compression_ratio:.1%}). "
170
+ f"Consider aggressive delta encoding or reduced threshold."
171
+ )
172
+
173
+ if not recommendations:
174
+ recommendations.append("Compression is operating normally.")
175
+
176
+ return recommendations
177
+
178
+ def get_heatmap(self) -> str:
179
+ """Generate a text-based compression heatmap."""
180
+ lines = ["Delta Compression Heatmap", "=" * 50]
181
+
182
+ if not self.type_stats:
183
+ lines.append("No compression data available")
184
+ return "\n".join(lines)
185
+
186
+ # Sort by compression ratio
187
+ sorted_types = sorted(
188
+ self.type_stats.values(),
189
+ key=lambda s: s.avg_compression_ratio,
190
+ )
191
+
192
+ for stats in sorted_types:
193
+ if stats.count == 0:
194
+ continue
195
+ ratio = stats.avg_compression_ratio
196
+ # Create a simple bar chart
197
+ bar_width = 30
198
+ filled = int(bar_width * ratio)
199
+ bar = "█" * filled + "░" * (bar_width - filled)
200
+ saved_pct = (
201
+ (stats.total_benefit / stats.total_original_size * 100)
202
+ if stats.total_original_size > 0
203
+ else 0
204
+ )
205
+ lines.append(
206
+ f"{stats.object_type:12} {bar} {saved_pct:5.1f}% saved ({stats.objects_with_delta}/{stats.count} using delta)"
207
+ )
208
+
209
+ return "\n".join(lines)
210
+
211
+ def log_report(self, logger: Any = None) -> None:
212
+ """Log the compression report."""
213
+ report = self.get_report()
214
+ heatmap = self.get_heatmap()
215
+
216
+ output = [
217
+ "=" * 70,
218
+ "Delta Compression Report",
219
+ "=" * 70,
220
+ f"Total Objects: {report['total_objects']}",
221
+ f"Total Original: {report['total_original_bytes']:,} bytes",
222
+ f"Total Compressed: {report['total_compressed_bytes']:,} bytes",
223
+ f"Overall Ratio: {report['overall_compression_ratio']:.1%}",
224
+ f"Bytes Saved: {report['total_bytes_saved']:,} ({report['compression_percentage']:.1f}%)",
225
+ "",
226
+ heatmap,
227
+ "",
228
+ "Type Breakdown:",
229
+ ]
230
+
231
+ for otype, stats in sorted(report["type_statistics"].items()):
232
+ output.append(f" {otype}:")
233
+ output.append(f" Count: {stats['count']}")
234
+ output.append(f" Compression: {stats['avg_compression_ratio']:.1%}")
235
+ output.append(f" Saved: {stats['total_bytes_saved']:,} bytes")
236
+ output.append(f" Delta adoption: {stats['delta_adoption_rate']:.0f}%")
237
+
238
+ output.extend(["", "Recommendations:"])
239
+ for rec in report["recommendations"]:
240
+ output.append(f" - {rec}")
241
+
242
+ output.append("=" * 70)
243
+
244
+ full_output = "\n".join(output)
245
+ if logger:
246
+ logger.info(full_output)
247
+ else:
248
+ print(full_output)