agmem 0.2.0__py3-none-any.whl → 0.2.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: agmem
3
- Version: 0.2.0
3
+ Version: 0.2.1
4
4
  Summary: Agentic Memory Version Control System - Git for AI agent memories
5
5
  Home-page: https://github.com/vivek-tiwari-vt/agmem
6
6
  Author: agmem Team
@@ -137,14 +137,15 @@ agmem solves all of these problems with a familiar Git-like interface.
137
137
  - ✅ **Tamper-evident audit trail** — Append-only hash-chained log (init, add, commit, checkout, merge, push, pull, config); `agmem audit` and `agmem audit --verify`
138
138
  - ✅ **Multi-agent trust** — Trust store (full / conditional / untrusted) per public key; applied on pull/merge; clone copies remote keys
139
139
  - ✅ **Conflict resolution** — `agmem resolve` with ours/theirs/both; conflicts persisted in `.mem/merge/`; path-safe
140
- - ✅ **Differential privacy** — Epsilon/delta budget in `.mem/privacy_budget.json`; `--private` on `agmem distill` and `agmem garden`; noise applied to counts and frontmatter
140
+ - ✅ **Differential privacy** — Epsilon/delta budget in `.mem/privacy_budget.json`; `--private` on `agmem distill` and `agmem garden`; noise applies to fact-level data only (metadata fields excluded)
141
141
  - ✅ **Pack files & GC** — `agmem gc [--repack]` (reachable from refs, prune loose, optional pack file + index); ObjectStore reads from pack when loose missing
142
142
  - ✅ **Multi-provider LLM** — OpenAI and Anthropic via `memvcs.core.llm`; config/repo or env; used by gardener, distiller, consistency, merge
143
143
  - ✅ **Temporal querying** — Point-in-time and range queries in temporal index; frontmatter timestamps
144
- - ✅ **Federated collaboration** — `agmem federated push|pull`; real summaries (topic counts, fact hashes); optional DP on outbound; coordinator API in docs/FEDERATED.md
144
+ - ✅ **Federated collaboration** — `agmem federated push|pull`; protocol-compliant summaries (agent_id, timestamp, topic_counts, fact_hashes); optional DP on outbound; coordinator API in docs/FEDERATED.md
145
145
  - ✅ **Zero-knowledge proofs** — `agmem prove` (hash/signature-based): keyword containment (Merkle set membership), memory freshness (signed timestamp). **Note:** Current implementation is proof-of-knowledge with known limitations; see docs for migration to true zk-SNARKs.
146
146
  - ✅ **Daemon health** — 4-point health monitoring (storage, redundancy, staleness, graph consistency) with periodic checks; visible warnings and JSON reports
147
- - ✅ **Delta encoding** — 5-10x compression for similar objects using Levenshtein distance and SequenceMatcher; optional feature in pack files
147
+ - ✅ **Delta encoding** — 5-10x compression for similar objects using Levenshtein distance and SequenceMatcher; enabled in GC repack with multi-tier similarity filtering
148
+ - ✅ **Performance safeguards** — Multi-tier similarity filter (length ratio + SimHash) avoids O(n²×m²) worst-case comparisons
148
149
  - ✅ **GPU acceleration** — Vector store detects GPU for embedding model when available
149
150
  - ✅ **Optional** — `serve`, `daemon` (watch + auto-commit), `garden` (episode archival), MCP server; install extras as needed
150
151
 
@@ -1,6 +1,6 @@
1
- agmem-0.2.0.dist-info/licenses/LICENSE,sha256=X_S6RBErW-F0IDbM3FAEoDB-zxExFnl2m8640rTXphM,1067
2
- memvcs/__init__.py,sha256=pheWPxubHVcp2N6vk6M7hGXgkJQ06KajbWgCpOlUSJ8,193
3
- memvcs/cli.py,sha256=YF06oMNjKWUmiNahILmfjrIXgoXzU-5BJFmbunSb8Sc,6075
1
+ agmem-0.2.1.dist-info/licenses/LICENSE,sha256=X_S6RBErW-F0IDbM3FAEoDB-zxExFnl2m8640rTXphM,1067
2
+ memvcs/__init__.py,sha256=PwF2IkjOfw5nZCDcZdsNKns-h-FEvRahAqNd37Ti8_8,193
3
+ memvcs/cli.py,sha256=WPjhbevcOc_w_7SEXV5oitbEA5kYY5lHWgyTOq6x8sU,6075
4
4
  memvcs/commands/__init__.py,sha256=A2D6xWaO6epU7iV4QSvqvF5TspnwRyDN7NojmGatPrE,510
5
5
  memvcs/commands/add.py,sha256=k9eM7qf2NFvneiJkFQNiAYFB2GgKmyPw_NXmkCxblQE,8736
6
6
  memvcs/commands/audit.py,sha256=E6m54B726tqDQR3rrgRXWrjE-seu2UocqrFxN1aHkY4,1680
@@ -47,10 +47,11 @@ memvcs/commands/tree.py,sha256=vdULq4vIXA_4gNfMnHn_Y78BwE0sJoeTBOnFJR3WsZ4,4927
47
47
  memvcs/commands/verify.py,sha256=04CVW5NYWkUlPJ5z1Kci6dfQFM6UmPTGZh9ZextFLMc,3887
48
48
  memvcs/commands/when.py,sha256=bxG_tEYnZNBTl2IPkoxpc2LUEbO_5ev1hRvEzxQQDmc,4773
49
49
  memvcs/coordinator/__init__.py,sha256=XJEXEXJFvvhtRInPeyAC9bFNXGbshSrtuK6wZo3wS6g,139
50
- memvcs/coordinator/server.py,sha256=-kCEyqUi7eLC45qqkH1KCk8zZk8T1NdJyffOY8133ec,7045
50
+ memvcs/coordinator/server.py,sha256=M0wnww0EbtxuDaunP29LJDCnsTm1mcOn7h_fqZbQy5c,7550
51
51
  memvcs/core/__init__.py,sha256=dkIC-4tS0GhwV2mZIbofEe8xR8uiFwrxslGf1aXwhYg,493
52
52
  memvcs/core/access_index.py,sha256=HhacnzSUASzRV2jhDHkwRFoPS3rtqh9n9yE1VV7JXpk,5596
53
53
  memvcs/core/audit.py,sha256=8APkm9Spl_-1rIdyRQz1elyxOeK3nlpwm0CLkpLlhTE,3732
54
+ memvcs/core/compression_metrics.py,sha256=0JrbkCGr0hnaKlmPLqv5WVLwO3emOEz2iFhdMTDNTNY,9835
54
55
  memvcs/core/compression_pipeline.py,sha256=Vzr5v_0pgAG20C8znC0-Ho5fEwBoaTOLddxMTldd64M,5564
55
56
  memvcs/core/config_loader.py,sha256=j-jgLDp2TRzWN9ZEZebfWSfatevBNYs0FEb3ud1SIR8,8277
56
57
  memvcs/core/consistency.py,sha256=YOG8xhqZLKZCLbai2rdcP0KxYPNGFv5RRMwrQ6qCeyc,7462
@@ -59,18 +60,21 @@ memvcs/core/crypto_verify.py,sha256=DTuC7Kfx6z2b8UWOWziBTqP633LrjXbdtGmBBqrJTF0,
59
60
  memvcs/core/decay.py,sha256=ROGwnqngs7eJNkbKmwyOdij607m73vpmoJqzrIDLBzk,6581
60
61
  memvcs/core/delta.py,sha256=obXzojUSc2HaEUqH3L_1LF-GcJ63Wr_yYvIPM8iyeSg,7865
61
62
  memvcs/core/diff.py,sha256=koEHTLciIUxYKVJVuvmY0GDXMgDgGZP_qg5RayhF-iE,13226
62
- memvcs/core/distiller.py,sha256=QA4acLc005cLac09IvIaog1fJt5IGXWRiSdZq_Ya27g,14086
63
+ memvcs/core/distiller.py,sha256=wwY3xQVRBjVfxnOUIwMsQCSeQ2tlG68w2-KiCwkF9yo,13844
63
64
  memvcs/core/encryption.py,sha256=epny_nlW6ylllv1qxs1mAcFq-PrLIisgfot4llOoAqw,5289
64
- memvcs/core/federated.py,sha256=vUYMZ0xv80hqGDRKq645Od1i8N33l-pIAkklJbJUlVg,5445
65
- memvcs/core/gardener.py,sha256=lBWkyE72O-JMiHM-oqrnex9k_xSv7FvztjkOdLdB0Kk,18610
65
+ memvcs/core/fast_similarity.py,sha256=phgjxkSchJg7om9AFFSMbtP6bSidyRy-vVrR3XyMmDQ,13934
66
+ memvcs/core/federated.py,sha256=qwvfhNgga-lHadbinAfKPI4oAl0RMn5ab01ChmQTP1s,5863
67
+ memvcs/core/gardener.py,sha256=bpoJbK6PJ6nvK3ytj23jpMUBUB7Nn_fB80Ap1E7-Nv8,17041
66
68
  memvcs/core/hooks.py,sha256=XF9z8J5sWjAcuOyWQ2nuvEzK0UV8s4ThrcltaBZttzw,5448
67
69
  memvcs/core/ipfs_remote.py,sha256=xmEO14bn_7Ej-W5jhx2QJyBd-ljj9S2COOxMmcZBiTs,6643
68
70
  memvcs/core/knowledge_graph.py,sha256=GY27e1rgraF2zMpz_jsumdUtpgTRk48yH5CAEQ3TDl4,16416
69
71
  memvcs/core/merge.py,sha256=x2eSaxr4f63Eq00FCJ6DDe2TZU8H5yHQpzKzMhYsaFw,19871
70
72
  memvcs/core/objects.py,sha256=Xgw1IpQnJLCG5o_7gDHVQ-TNGR9CSpDYWRXzLgLSuec,11006
71
- memvcs/core/pack.py,sha256=Kq0hyMNroT0MwiS4pVJVuJO9nZ04P3wssep2tADvnpQ,15950
73
+ memvcs/core/pack.py,sha256=jtbeBh625K6nshPgBGf7zelU-BhvK5-t5NYBJPoYfgs,15961
72
74
  memvcs/core/pii_scanner.py,sha256=T6gQ1APFrSDk980fjnv4ZMF-UztbJgmUFSwGrwWixEw,10802
73
75
  memvcs/core/privacy_budget.py,sha256=fOPlxoKEAmsKtda-OJCrSaKjTyw7ekcqdN7KfRBw1CY,2113
76
+ memvcs/core/privacy_validator.py,sha256=g3l1zxSIxkjMYJMwL5yfuDY5FFjmkm6HZ2Wo4xBiEkQ,6795
77
+ memvcs/core/protocol_builder.py,sha256=b_5FphgmMdp7qP34ws3U2agXEoeYzTBjSgsQqd2Jx6Y,7713
74
78
  memvcs/core/refs.py,sha256=4Nx2ZVRa_DzfUZ4O1AwzOHEjoGAEICJKqSd9GxaiD_g,16754
75
79
  memvcs/core/remote.py,sha256=sZbAO9JEaDJM96PylB0CjpmR5UxWYdoXlq86sj3R2gU,22228
76
80
  memvcs/core/repository.py,sha256=NzC2UFPv6ePxi5lfiSKyZFLclH4bJpWJz88pY7tDiv4,20605
@@ -104,8 +108,8 @@ memvcs/retrieval/recaller.py,sha256=8KY-XjMUz5_vcKf46zI64uk1DEM__u7wM92ShukOtsY,
104
108
  memvcs/retrieval/strategies.py,sha256=26yxQQubQfjxWQXknfVMxuzPHf2EcZxJg_B99BEdl5c,11458
105
109
  memvcs/utils/__init__.py,sha256=8psUzz4Ntv2GzbRebkeVsoyC6Ck-FIwi0_lfYdj5oho,185
106
110
  memvcs/utils/helpers.py,sha256=37zg_DcQ2y99b9NSLqxFkglHe13rJXKhFDpEbQ7iLhM,4121
107
- agmem-0.2.0.dist-info/METADATA,sha256=Oh9LOeoQR_A9ZXrdrkt-sTFoiYA-peWVHvpVaUYncns,42100
108
- agmem-0.2.0.dist-info/WHEEL,sha256=wUyA8OaulRlbfwMtmQsvNngGrxQHAvkKcvRmdizlJi0,92
109
- agmem-0.2.0.dist-info/entry_points.txt,sha256=at7eWycgjqOo1wbUMECnXUsNo3gpCkJTU71OzrGLHu0,42
110
- agmem-0.2.0.dist-info/top_level.txt,sha256=HtMMsKuwLKLOdgF1GxqQztqFM54tTJctVdJuOec6B-4,7
111
- agmem-0.2.0.dist-info/RECORD,,
111
+ agmem-0.2.1.dist-info/METADATA,sha256=6UV86NAOpGnnqpRJJE_9XkU-7j2aoLSIf3TB1oQ3dC0,42320
112
+ agmem-0.2.1.dist-info/WHEEL,sha256=wUyA8OaulRlbfwMtmQsvNngGrxQHAvkKcvRmdizlJi0,92
113
+ agmem-0.2.1.dist-info/entry_points.txt,sha256=at7eWycgjqOo1wbUMECnXUsNo3gpCkJTU71OzrGLHu0,42
114
+ agmem-0.2.1.dist-info/top_level.txt,sha256=HtMMsKuwLKLOdgF1GxqQztqFM54tTJctVdJuOec6B-4,7
115
+ agmem-0.2.1.dist-info/RECORD,,
memvcs/__init__.py CHANGED
@@ -4,6 +4,6 @@ agmem - Agentic Memory Version Control System
4
4
  A Git-inspired version control system for AI agent memory artifacts.
5
5
  """
6
6
 
7
- __version__ = "0.1.6"
7
+ __version__ = "0.2.1"
8
8
  __author__ = "agmem Team"
9
9
  __license__ = "MIT"
memvcs/cli.py CHANGED
@@ -141,7 +141,7 @@ For more information: https://github.com/vivek-tiwari-vt/agmem
141
141
  """,
142
142
  )
143
143
 
144
- parser.add_argument("--version", "-v", action="version", version="%(prog)s 0.1.0")
144
+ parser.add_argument("--version", "-v", action="version", version="%(prog)s 0.2.1")
145
145
 
146
146
  parser.add_argument("--verbose", action="store_true", help="Enable verbose output")
147
147
 
@@ -21,6 +21,7 @@ from typing import Dict, List, Optional, Any
21
21
  from pathlib import Path
22
22
  import json
23
23
  import hashlib
24
+ import re
24
25
 
25
26
  try:
26
27
  from fastapi import FastAPI, HTTPException, Request
@@ -39,10 +40,25 @@ except ImportError:
39
40
  return None
40
41
 
41
42
 
43
+ def _get_version() -> str:
44
+ """Get agmem version from pyproject.toml. Falls back to 0.2.1 if not found."""
45
+ try:
46
+ pyproject_path = Path(__file__).parent.parent.parent / "pyproject.toml"
47
+ if pyproject_path.exists():
48
+ content = pyproject_path.read_text()
49
+ match = re.search(r'version\s*=\s*"([^"]+)"', content)
50
+ if match:
51
+ return match.group(1)
52
+ except Exception:
53
+ pass
54
+ return "0.2.1"
55
+
56
+
42
57
  # Storage: In-memory for simplicity (use Redis/PostgreSQL for production)
43
58
  summaries_store: Dict[str, List[Dict[str, Any]]] = {}
59
+ _version = _get_version()
44
60
  metadata_store: Dict[str, Any] = {
45
- "coordinator_version": "0.1.6",
61
+ "coordinator_version": _version,
46
62
  "started_at": datetime.now(timezone.utc).isoformat(),
47
63
  "total_pushes": 0,
48
64
  "total_agents": 0,
@@ -79,7 +95,7 @@ if FASTAPI_AVAILABLE:
79
95
  app = FastAPI(
80
96
  title="agmem Federated Coordinator",
81
97
  description="Minimal coordinator for federated agent memory collaboration",
82
- version="0.1.6",
98
+ version=_version,
83
99
  )
84
100
 
85
101
  @app.get("/")
@@ -0,0 +1,248 @@
1
+ """
2
+ Delta compression metrics and observability.
3
+
4
+ Tracks compression effectiveness across object types to enable future
5
+ optimization and auto-tuning of delta encoding parameters.
6
+
7
+ Provides:
8
+ - DeltaCompressionMetrics: Tracks compression ratio, object types, benefits
9
+ - CompressionHeatmap: Visualizes which types compress best
10
+ - Statistics reporting for gc --repack operations
11
+ """
12
+
13
+ from dataclasses import dataclass, field
14
+ from typing import Dict, List, Any, Optional, Tuple
15
+ from collections import defaultdict
16
+
17
+
18
+ @dataclass
19
+ class ObjectCompressionStats:
20
+ """Statistics for a single object's compression."""
21
+
22
+ object_id: str
23
+ object_type: str # "semantic", "episodic", "procedural"
24
+ original_size: int # bytes
25
+ compressed_size: int # bytes after delta encoding
26
+ compression_ratio: float # compressed_size / original_size (0.0 = 100% compression)
27
+ delta_used: bool # Whether delta encoding was applied
28
+ compression_benefit: float # original_size - compressed_size
29
+
30
+
31
+ @dataclass
32
+ class TypeCompressionStats:
33
+ """Aggregated statistics for an object type."""
34
+
35
+ object_type: str
36
+ count: int = 0
37
+ total_original_size: int = 0
38
+ total_compressed_size: int = 0
39
+ avg_compression_ratio: float = 0.0
40
+ total_benefit: int = 0 # Total bytes saved
41
+ objects_with_delta: int = 0 # How many used delta encoding
42
+ min_ratio: float = 1.0
43
+ max_ratio: float = 0.0
44
+
45
+ def update_from_object(self, obj_stats: ObjectCompressionStats) -> None:
46
+ """Update type stats with a single object's stats."""
47
+ self.count += 1
48
+ self.total_original_size += obj_stats.original_size
49
+ self.total_compressed_size += obj_stats.compressed_size
50
+ self.total_benefit += int(obj_stats.compression_benefit)
51
+ if obj_stats.delta_used:
52
+ self.objects_with_delta += 1
53
+ self.min_ratio = min(self.min_ratio, obj_stats.compression_ratio)
54
+ self.max_ratio = max(self.max_ratio, obj_stats.compression_ratio)
55
+
56
+ # Recalculate average
57
+ if self.total_original_size > 0:
58
+ self.avg_compression_ratio = self.total_compressed_size / self.total_original_size
59
+
60
+ def to_dict(self) -> Dict[str, Any]:
61
+ """Convert to dict for reporting."""
62
+ savings_pct = 0.0
63
+ if self.total_original_size > 0:
64
+ savings_pct = (self.total_benefit / self.total_original_size) * 100
65
+
66
+ return {
67
+ "object_type": self.object_type,
68
+ "count": self.count,
69
+ "total_original_bytes": self.total_original_size,
70
+ "total_compressed_bytes": self.total_compressed_size,
71
+ "avg_compression_ratio": round(self.avg_compression_ratio, 3),
72
+ "compression_range": f"{self.min_ratio:.1%} - {self.max_ratio:.1%}",
73
+ "total_bytes_saved": self.total_benefit,
74
+ "savings_percentage": round(savings_pct, 1),
75
+ "objects_using_delta": self.objects_with_delta,
76
+ "delta_adoption_rate": (
77
+ round((self.objects_with_delta / self.count * 100), 1) if self.count > 0 else 0
78
+ ),
79
+ }
80
+
81
+
82
+ class DeltaCompressionMetrics:
83
+ """Tracks delta compression statistics across all objects.
84
+
85
+ Usage:
86
+ metrics = DeltaCompressionMetrics()
87
+ # ... during packing ...
88
+ metrics.record_object(ObjectCompressionStats(...))
89
+ # ... after packing ...
90
+ report = metrics.get_report()
91
+ """
92
+
93
+ def __init__(self):
94
+ self.objects: List[ObjectCompressionStats] = []
95
+ self.type_stats: Dict[str, TypeCompressionStats] = {}
96
+ self.total_original_size: int = 0
97
+ self.total_compressed_size: int = 0
98
+
99
+ def record_object(self, obj_stats: ObjectCompressionStats) -> None:
100
+ """Record compression stats for a single object."""
101
+ self.objects.append(obj_stats)
102
+ self.total_original_size += obj_stats.original_size
103
+ self.total_compressed_size += obj_stats.compressed_size
104
+
105
+ # Update type-specific stats
106
+ if obj_stats.object_type not in self.type_stats:
107
+ self.type_stats[obj_stats.object_type] = TypeCompressionStats(
108
+ object_type=obj_stats.object_type
109
+ )
110
+ self.type_stats[obj_stats.object_type].update_from_object(obj_stats)
111
+
112
+ def get_type_stats(self, object_type: str) -> Optional[TypeCompressionStats]:
113
+ """Get stats for a specific object type."""
114
+ return self.type_stats.get(object_type)
115
+
116
+ def get_overall_ratio(self) -> float:
117
+ """Get overall compression ratio across all objects."""
118
+ if self.total_original_size == 0:
119
+ return 0.0
120
+ return self.total_compressed_size / self.total_original_size
121
+
122
+ def get_overall_savings(self) -> int:
123
+ """Get total bytes saved across all objects."""
124
+ return self.total_original_size - self.total_compressed_size
125
+
126
+ def get_report(self) -> Dict[str, Any]:
127
+ """Generate a comprehensive compression report."""
128
+ overall_ratio = self.get_overall_ratio()
129
+ overall_savings = self.get_overall_savings()
130
+ savings_pct = (
131
+ (overall_savings / self.total_original_size * 100)
132
+ if self.total_original_size > 0
133
+ else 0
134
+ )
135
+
136
+ return {
137
+ "timestamp": None, # Set by caller if needed
138
+ "total_objects": len(self.objects),
139
+ "total_original_bytes": self.total_original_size,
140
+ "total_compressed_bytes": self.total_compressed_size,
141
+ "overall_compression_ratio": round(overall_ratio, 3),
142
+ "total_bytes_saved": overall_savings,
143
+ "compression_percentage": round(savings_pct, 1),
144
+ "type_statistics": {otype: stats.to_dict() for otype, stats in self.type_stats.items()},
145
+ "recommendations": self._generate_recommendations(),
146
+ }
147
+
148
+ def _generate_recommendations(self) -> List[str]:
149
+ """Generate optimization recommendations based on compression stats."""
150
+ recommendations = []
151
+
152
+ # Check if delta encoding is worth it
153
+ objects_with_delta = sum(s.objects_with_delta for s in self.type_stats.values())
154
+ if objects_with_delta == 0:
155
+ recommendations.append("No objects used delta encoding. Check similarity thresholds.")
156
+
157
+ # Check for types with poor compression
158
+ for otype, stats in self.type_stats.items():
159
+ if stats.count > 0 and stats.avg_compression_ratio > 0.9:
160
+ recommendations.append(
161
+ f"Type '{otype}' compresses poorly (ratio: {stats.avg_compression_ratio:.1%}). "
162
+ f"Consider increasing similarity threshold or reducing delta cost."
163
+ )
164
+
165
+ # Check for types with excellent compression
166
+ for otype, stats in self.type_stats.items():
167
+ if stats.count > 0 and stats.avg_compression_ratio < 0.5:
168
+ recommendations.append(
169
+ f"Type '{otype}' compresses very well (ratio: {stats.avg_compression_ratio:.1%}). "
170
+ f"Consider aggressive delta encoding or reduced threshold."
171
+ )
172
+
173
+ if not recommendations:
174
+ recommendations.append("Compression is operating normally.")
175
+
176
+ return recommendations
177
+
178
+ def get_heatmap(self) -> str:
179
+ """Generate a text-based compression heatmap."""
180
+ lines = ["Delta Compression Heatmap", "=" * 50]
181
+
182
+ if not self.type_stats:
183
+ lines.append("No compression data available")
184
+ return "\n".join(lines)
185
+
186
+ # Sort by compression ratio
187
+ sorted_types = sorted(
188
+ self.type_stats.values(),
189
+ key=lambda s: s.avg_compression_ratio,
190
+ )
191
+
192
+ for stats in sorted_types:
193
+ if stats.count == 0:
194
+ continue
195
+ ratio = stats.avg_compression_ratio
196
+ # Create a simple bar chart
197
+ bar_width = 30
198
+ filled = int(bar_width * ratio)
199
+ bar = "█" * filled + "░" * (bar_width - filled)
200
+ saved_pct = (
201
+ (stats.total_benefit / stats.total_original_size * 100)
202
+ if stats.total_original_size > 0
203
+ else 0
204
+ )
205
+ lines.append(
206
+ f"{stats.object_type:12} {bar} {saved_pct:5.1f}% saved ({stats.objects_with_delta}/{stats.count} using delta)"
207
+ )
208
+
209
+ return "\n".join(lines)
210
+
211
+ def log_report(self, logger: Any = None) -> None:
212
+ """Log the compression report."""
213
+ report = self.get_report()
214
+ heatmap = self.get_heatmap()
215
+
216
+ output = [
217
+ "=" * 70,
218
+ "Delta Compression Report",
219
+ "=" * 70,
220
+ f"Total Objects: {report['total_objects']}",
221
+ f"Total Original: {report['total_original_bytes']:,} bytes",
222
+ f"Total Compressed: {report['total_compressed_bytes']:,} bytes",
223
+ f"Overall Ratio: {report['overall_compression_ratio']:.1%}",
224
+ f"Bytes Saved: {report['total_bytes_saved']:,} ({report['compression_percentage']:.1f}%)",
225
+ "",
226
+ heatmap,
227
+ "",
228
+ "Type Breakdown:",
229
+ ]
230
+
231
+ for otype, stats in sorted(report["type_statistics"].items()):
232
+ output.append(f" {otype}:")
233
+ output.append(f" Count: {stats['count']}")
234
+ output.append(f" Compression: {stats['avg_compression_ratio']:.1%}")
235
+ output.append(f" Saved: {stats['total_bytes_saved']:,} bytes")
236
+ output.append(f" Delta adoption: {stats['delta_adoption_rate']:.0f}%")
237
+
238
+ output.extend(["", "Recommendations:"])
239
+ for rec in report["recommendations"]:
240
+ output.append(f" - {rec}")
241
+
242
+ output.append("=" * 70)
243
+
244
+ full_output = "\n".join(output)
245
+ if logger:
246
+ logger.info(full_output)
247
+ else:
248
+ print(full_output)
memvcs/core/distiller.py CHANGED
@@ -211,7 +211,6 @@ class Distiller:
211
211
  # Sample facts with noise - prevents any single episode from dominating
212
212
  import random
213
213
 
214
- random.seed(42) # Deterministic but different per cluster due to content
215
214
  sampled = random.sample(facts, min(noisy_count, len(facts)))
216
215
 
217
216
  # Optional: Add slight noise to fact embeddings if vector store available
@@ -233,17 +232,9 @@ class Distiller:
233
232
  out_path = self.target_dir / f"consolidated-{ts}.md"
234
233
 
235
234
  confidence_score = self.config.extraction_confidence_threshold
236
- if (
237
- self.config.use_dp
238
- and self.config.dp_epsilon is not None
239
- and self.config.dp_delta is not None
240
- ):
241
- from .privacy_budget import add_noise
242
-
243
- confidence_score = add_noise(
244
- confidence_score, 0.1, self.config.dp_epsilon, self.config.dp_delta
245
- )
246
- confidence_score = max(0.0, min(1.0, confidence_score))
235
+ # Metadata noise removed: confidence_score is a metadata field (threshold setting),
236
+ # not an individual fact. Adding noise to metadata doesn't provide meaningful
237
+ # privacy guarantees. See privacy_validator.py for the distinction.
247
238
  frontmatter = {
248
239
  "schema_version": "1.0",
249
240
  "last_updated": datetime.utcnow().isoformat() + "Z",