agmem 0.2.0__py3-none-any.whl → 0.2.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {agmem-0.2.0.dist-info → agmem-0.2.1.dist-info}/METADATA +5 -4
- {agmem-0.2.0.dist-info → agmem-0.2.1.dist-info}/RECORD +17 -13
- memvcs/__init__.py +1 -1
- memvcs/cli.py +1 -1
- memvcs/coordinator/server.py +18 -2
- memvcs/core/compression_metrics.py +248 -0
- memvcs/core/distiller.py +3 -12
- memvcs/core/fast_similarity.py +404 -0
- memvcs/core/federated.py +13 -2
- memvcs/core/gardener.py +8 -68
- memvcs/core/pack.py +1 -1
- memvcs/core/privacy_validator.py +187 -0
- memvcs/core/protocol_builder.py +198 -0
- {agmem-0.2.0.dist-info → agmem-0.2.1.dist-info}/WHEEL +0 -0
- {agmem-0.2.0.dist-info → agmem-0.2.1.dist-info}/entry_points.txt +0 -0
- {agmem-0.2.0.dist-info → agmem-0.2.1.dist-info}/licenses/LICENSE +0 -0
- {agmem-0.2.0.dist-info → agmem-0.2.1.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,187 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Privacy field validation and auditing.
|
|
3
|
+
|
|
4
|
+
Ensures differential privacy noise is only applied to fact data, not metadata.
|
|
5
|
+
Prevents accidental privacy overhead on metadata fields and provides audit trail.
|
|
6
|
+
|
|
7
|
+
Provides:
|
|
8
|
+
- @privacy_exempt: Decorator to mark metadata fields as privacy-exempt
|
|
9
|
+
- PrivacyFieldValidator: Runtime validation that noise is applied correctly
|
|
10
|
+
- PrivacyAuditReport: Audit trail of which fields received noise
|
|
11
|
+
"""
|
|
12
|
+
|
|
13
|
+
from typing import Any, Callable, Dict, List, Optional, Set
|
|
14
|
+
from functools import wraps
|
|
15
|
+
from dataclasses import dataclass, field
|
|
16
|
+
from datetime import datetime, timezone
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
@dataclass
|
|
20
|
+
class PrivacyAuditReport:
|
|
21
|
+
"""Audit report of privacy noise application."""
|
|
22
|
+
|
|
23
|
+
timestamp: str
|
|
24
|
+
noised_fields: Dict[str, Any] = field(default_factory=dict)
|
|
25
|
+
exempt_fields: Dict[str, Any] = field(default_factory=dict)
|
|
26
|
+
validation_errors: List[str] = field(default_factory=list)
|
|
27
|
+
|
|
28
|
+
def to_dict(self) -> Dict[str, Any]:
|
|
29
|
+
"""Convert to dict for logging/serialization."""
|
|
30
|
+
return {
|
|
31
|
+
"timestamp": self.timestamp,
|
|
32
|
+
"noised_fields": self.noised_fields,
|
|
33
|
+
"exempt_fields": self.exempt_fields,
|
|
34
|
+
"validation_errors": self.validation_errors,
|
|
35
|
+
"summary": {
|
|
36
|
+
"total_noised": len(self.noised_fields),
|
|
37
|
+
"total_exempt": len(self.exempt_fields),
|
|
38
|
+
"validation_passed": len(self.validation_errors) == 0,
|
|
39
|
+
},
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
class PrivacyFieldValidator:
|
|
44
|
+
"""Validates that privacy noise is applied correctly.
|
|
45
|
+
|
|
46
|
+
Tracks which fields receive noise vs. are exempt from noise.
|
|
47
|
+
Fails loudly if noise is applied to exempt fields.
|
|
48
|
+
"""
|
|
49
|
+
|
|
50
|
+
# Metadata fields that should NEVER receive noise (they don't reveal facts)
|
|
51
|
+
EXEMPT_FIELDS = {
|
|
52
|
+
"clusters_found", # Metadata: count of clusters, not individual facts
|
|
53
|
+
"insights_generated", # Metadata: count of insights generated
|
|
54
|
+
"episodes_archived", # Metadata: count of archived episodes
|
|
55
|
+
"confidence_score", # Metadata: overall quality metric, not a fact
|
|
56
|
+
"summary_version", # Metadata: schema version
|
|
57
|
+
"created_at", # Metadata: timestamp
|
|
58
|
+
"updated_at", # Metadata: timestamp
|
|
59
|
+
"agent_version", # Metadata: software version
|
|
60
|
+
}
|
|
61
|
+
|
|
62
|
+
# Fact-related fields that SHOULD receive noise
|
|
63
|
+
FACT_FIELDS = {
|
|
64
|
+
"facts", # List of actual facts
|
|
65
|
+
"memories", # Memory content
|
|
66
|
+
"semantic_content", # Semantic memory content
|
|
67
|
+
"episodic_content", # Episodic memory content
|
|
68
|
+
"procedural_content", # Procedural memory content
|
|
69
|
+
"embeddings", # Vector representations of facts
|
|
70
|
+
"fact_count", # Count of individual facts (not metadata)
|
|
71
|
+
"memory_count", # Count of individual memories
|
|
72
|
+
}
|
|
73
|
+
|
|
74
|
+
def __init__(self):
|
|
75
|
+
self.audit_report = PrivacyAuditReport(timestamp=datetime.now(timezone.utc).isoformat())
|
|
76
|
+
|
|
77
|
+
def validate_noised_field(
|
|
78
|
+
self, field_name: str, field_value: Any, is_noised: bool = True
|
|
79
|
+
) -> None:
|
|
80
|
+
"""Validate that noise application is correct for a field.
|
|
81
|
+
|
|
82
|
+
Args:
|
|
83
|
+
field_name: Name of the field
|
|
84
|
+
field_value: Value of the field
|
|
85
|
+
is_noised: Whether noise was applied to this field
|
|
86
|
+
|
|
87
|
+
Raises:
|
|
88
|
+
RuntimeError: If noise is applied to exempt field
|
|
89
|
+
"""
|
|
90
|
+
if is_noised and field_name in self.EXEMPT_FIELDS:
|
|
91
|
+
error = (
|
|
92
|
+
f"ERROR: Noise applied to exempt metadata field '{field_name}'. "
|
|
93
|
+
f"Metadata fields do not reveal individual facts and should not receive noise. "
|
|
94
|
+
f"Remove noise from: {field_name}"
|
|
95
|
+
)
|
|
96
|
+
self.audit_report.validation_errors.append(error)
|
|
97
|
+
raise RuntimeError(error)
|
|
98
|
+
|
|
99
|
+
if is_noised:
|
|
100
|
+
self.audit_report.noised_fields[field_name] = field_value
|
|
101
|
+
else:
|
|
102
|
+
self.audit_report.exempt_fields[field_name] = field_value
|
|
103
|
+
|
|
104
|
+
def validate_result_dict(self, result: Dict[str, Any]) -> None:
|
|
105
|
+
"""Validate a result dict (e.g., DistillerResult or GardenerResult).
|
|
106
|
+
|
|
107
|
+
Args:
|
|
108
|
+
result: The result dict to validate
|
|
109
|
+
|
|
110
|
+
Raises:
|
|
111
|
+
RuntimeError: If privacy validation fails
|
|
112
|
+
"""
|
|
113
|
+
for field_name in self.EXEMPT_FIELDS:
|
|
114
|
+
if field_name in result:
|
|
115
|
+
# These fields should not have been noised
|
|
116
|
+
self.audit_report.exempt_fields[field_name] = result[field_name]
|
|
117
|
+
|
|
118
|
+
def get_report(self) -> PrivacyAuditReport:
|
|
119
|
+
"""Get the audit report."""
|
|
120
|
+
if self.audit_report.validation_errors:
|
|
121
|
+
print(
|
|
122
|
+
"Privacy Validation Report:\n"
|
|
123
|
+
+ "\n".join(f" {e}" for e in self.audit_report.validation_errors)
|
|
124
|
+
)
|
|
125
|
+
return self.audit_report
|
|
126
|
+
|
|
127
|
+
|
|
128
|
+
def privacy_exempt(func: Callable) -> Callable:
|
|
129
|
+
"""Decorator to mark a function as privacy-exempt.
|
|
130
|
+
|
|
131
|
+
The decorated function should not apply DP noise to its result.
|
|
132
|
+
Used to document which functions are exempt from privacy operations.
|
|
133
|
+
|
|
134
|
+
Example:
|
|
135
|
+
@privacy_exempt
|
|
136
|
+
def get_metadata() -> Dict[str, Any]:
|
|
137
|
+
return {"clusters_found": 42, "created_at": "2024-01-01T00:00:00Z"}
|
|
138
|
+
"""
|
|
139
|
+
|
|
140
|
+
@wraps(func)
|
|
141
|
+
def wrapper(*args, **kwargs):
|
|
142
|
+
result = func(*args, **kwargs)
|
|
143
|
+
# Mark result as privacy-exempt (store in metadata if possible)
|
|
144
|
+
if isinstance(result, dict):
|
|
145
|
+
result["_privacy_exempt"] = True
|
|
146
|
+
return result
|
|
147
|
+
|
|
148
|
+
# Mark the wrapper function to indicate it's privacy-exempt
|
|
149
|
+
setattr(wrapper, "_privacy_exempt_function", True)
|
|
150
|
+
return wrapper
|
|
151
|
+
|
|
152
|
+
|
|
153
|
+
class PrivacyGuard:
|
|
154
|
+
"""Context manager and decorator for privacy-aware code blocks.
|
|
155
|
+
|
|
156
|
+
Usage:
|
|
157
|
+
with PrivacyGuard() as pg:
|
|
158
|
+
result = process_facts(data)
|
|
159
|
+
pg.mark_noised("fact_count")
|
|
160
|
+
"""
|
|
161
|
+
|
|
162
|
+
def __init__(self, strict: bool = True):
|
|
163
|
+
self.strict = strict
|
|
164
|
+
self.validator = PrivacyFieldValidator()
|
|
165
|
+
|
|
166
|
+
def __enter__(self):
|
|
167
|
+
return self
|
|
168
|
+
|
|
169
|
+
def __exit__(self, exc_type, exc_val, exc_tb):
|
|
170
|
+
if exc_type is not None:
|
|
171
|
+
return False
|
|
172
|
+
return True
|
|
173
|
+
|
|
174
|
+
def mark_noised(self, field_name: str, value: Any = None) -> None:
|
|
175
|
+
"""Mark a field as having received DP noise."""
|
|
176
|
+
if self.strict:
|
|
177
|
+
self.validator.validate_noised_field(field_name, value, is_noised=True)
|
|
178
|
+
else:
|
|
179
|
+
self.validator.audit_report.noised_fields[field_name] = value
|
|
180
|
+
|
|
181
|
+
def mark_exempt(self, field_name: str, value: Any = None) -> None:
|
|
182
|
+
"""Mark a field as exempt from DP noise."""
|
|
183
|
+
self.validator.audit_report.exempt_fields[field_name] = value
|
|
184
|
+
|
|
185
|
+
def get_report(self) -> PrivacyAuditReport:
|
|
186
|
+
"""Get the privacy audit report."""
|
|
187
|
+
return self.validator.get_report()
|
|
@@ -0,0 +1,198 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Protocol Builder for federated agent summaries.
|
|
3
|
+
|
|
4
|
+
Ensures client-side summaries conform to the server's PushRequest schema
|
|
5
|
+
before transmission, preventing 422 Validation Errors and protocol mismatches.
|
|
6
|
+
|
|
7
|
+
Provides:
|
|
8
|
+
- ClientSummaryBuilder: Constructs AgentSummary from raw produce_local_summary output
|
|
9
|
+
- SchemaValidationError: Raised when summary doesn't match server schema
|
|
10
|
+
- Deterministic agent_id generation from repository content
|
|
11
|
+
"""
|
|
12
|
+
|
|
13
|
+
import hashlib
|
|
14
|
+
import json
|
|
15
|
+
from datetime import datetime, timezone
|
|
16
|
+
from pathlib import Path
|
|
17
|
+
from typing import Any, Dict, List, Optional
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
class SchemaValidationError(Exception):
|
|
21
|
+
"""Raised when client summary doesn't match server schema."""
|
|
22
|
+
|
|
23
|
+
pass
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
class ClientSummaryBuilder:
|
|
27
|
+
"""Build protocol-compliant AgentSummary from raw produce_local_summary output.
|
|
28
|
+
|
|
29
|
+
Handles:
|
|
30
|
+
- Key name mapping (topics -> topic_counts)
|
|
31
|
+
- Fact count to fact_hashes conversion (int -> list of hash strings)
|
|
32
|
+
- Auto-generation of agent_id from repo hash (deterministic, replayable)
|
|
33
|
+
- ISO-8601 timestamp addition
|
|
34
|
+
- Schema validation against server expectations
|
|
35
|
+
- Wrapping in {"summary": {...}} envelope
|
|
36
|
+
"""
|
|
37
|
+
|
|
38
|
+
REQUIRED_FIELDS = {"agent_id", "timestamp", "topic_counts", "fact_hashes"}
|
|
39
|
+
|
|
40
|
+
@staticmethod
|
|
41
|
+
def generate_agent_id(repo_root: Path) -> str:
|
|
42
|
+
"""Generate deterministic agent_id from repository content.
|
|
43
|
+
|
|
44
|
+
Uses SHA-256 hash of repo root path to ensure consistency across runs
|
|
45
|
+
while remaining unique per repository. This is deterministic (same repo
|
|
46
|
+
always gets same agent_id) and replayable.
|
|
47
|
+
|
|
48
|
+
Args:
|
|
49
|
+
repo_root: Path to the repository root
|
|
50
|
+
|
|
51
|
+
Returns:
|
|
52
|
+
Unique agent identifier in format: "agent-<first-16-chars-of-hash>"
|
|
53
|
+
"""
|
|
54
|
+
repo_hash = hashlib.sha256(str(repo_root.resolve()).encode()).hexdigest()[:16]
|
|
55
|
+
return f"agent-{repo_hash}"
|
|
56
|
+
|
|
57
|
+
@staticmethod
|
|
58
|
+
def build(
|
|
59
|
+
repo_root: Path,
|
|
60
|
+
raw_summary: Dict[str, Any],
|
|
61
|
+
strict_mode: bool = False,
|
|
62
|
+
) -> Dict[str, Any]:
|
|
63
|
+
"""Build protocol-compliant summary from raw produce_local_summary output.
|
|
64
|
+
|
|
65
|
+
Transforms the client's produce_local_summary() output into the format
|
|
66
|
+
expected by the server's PushRequest model.
|
|
67
|
+
|
|
68
|
+
Args:
|
|
69
|
+
repo_root: Path to repository root (used for agent_id generation)
|
|
70
|
+
raw_summary: Output from produce_local_summary()
|
|
71
|
+
strict_mode: If True, raise on validation error; if False, warn and repair
|
|
72
|
+
|
|
73
|
+
Returns:
|
|
74
|
+
Dict with structure: {"summary": {"agent_id": "...", "timestamp": "...",
|
|
75
|
+
"topic_counts": {...}, "fact_hashes": [...]}}
|
|
76
|
+
|
|
77
|
+
Raises:
|
|
78
|
+
SchemaValidationError: If strict_mode=True and schema validation fails
|
|
79
|
+
"""
|
|
80
|
+
# In strict mode, validate raw input has required fields BEFORE transformation
|
|
81
|
+
if strict_mode:
|
|
82
|
+
required_raw_fields = {"memory_types", "topics", "topic_hashes", "fact_count"}
|
|
83
|
+
missing = required_raw_fields - set(raw_summary.keys())
|
|
84
|
+
if missing:
|
|
85
|
+
raise SchemaValidationError(
|
|
86
|
+
f"Raw summary missing required fields: {', '.join(sorted(missing))}"
|
|
87
|
+
)
|
|
88
|
+
|
|
89
|
+
# Generate required fields
|
|
90
|
+
agent_id = ClientSummaryBuilder.generate_agent_id(repo_root)
|
|
91
|
+
timestamp = datetime.now(timezone.utc).isoformat()
|
|
92
|
+
|
|
93
|
+
# Transform key names and structure
|
|
94
|
+
topic_counts = raw_summary.get("topics", {})
|
|
95
|
+
if not isinstance(topic_counts, dict):
|
|
96
|
+
topic_counts = {}
|
|
97
|
+
|
|
98
|
+
# Convert fact_count (int) to fact_hashes (list of strings)
|
|
99
|
+
# If topic_hashes is present, use it; otherwise generate from fact_count
|
|
100
|
+
fact_hashes: List[str] = []
|
|
101
|
+
if "topic_hashes" in raw_summary and isinstance(raw_summary["topic_hashes"], dict):
|
|
102
|
+
# Flatten all topic hashes into a single list
|
|
103
|
+
for topic_hash_list in raw_summary["topic_hashes"].values():
|
|
104
|
+
if isinstance(topic_hash_list, list):
|
|
105
|
+
fact_hashes.extend(topic_hash_list)
|
|
106
|
+
|
|
107
|
+
# If fact_hashes is still empty but we have fact_count, generate placeholder hashes
|
|
108
|
+
if not fact_hashes and "fact_count" in raw_summary:
|
|
109
|
+
fact_count = raw_summary["fact_count"]
|
|
110
|
+
if isinstance(fact_count, int):
|
|
111
|
+
# Generate placeholder hashes (in real scenario, client would preserve actual hashes)
|
|
112
|
+
fact_hashes = [
|
|
113
|
+
hashlib.sha256(f"fact-{i}".encode()).hexdigest() for i in range(fact_count)
|
|
114
|
+
]
|
|
115
|
+
|
|
116
|
+
# Build AgentSummary structure
|
|
117
|
+
agent_summary = {
|
|
118
|
+
"agent_id": agent_id,
|
|
119
|
+
"timestamp": timestamp,
|
|
120
|
+
"topic_counts": topic_counts,
|
|
121
|
+
"fact_hashes": fact_hashes,
|
|
122
|
+
}
|
|
123
|
+
|
|
124
|
+
# Validate schema
|
|
125
|
+
errors = ClientSummaryBuilder._validate_schema(agent_summary)
|
|
126
|
+
if errors:
|
|
127
|
+
error_msg = f"Schema validation failed:\n" + "\n".join(f" - {e}" for e in errors)
|
|
128
|
+
if strict_mode:
|
|
129
|
+
raise SchemaValidationError(error_msg)
|
|
130
|
+
else:
|
|
131
|
+
print(f"Warning: {error_msg}")
|
|
132
|
+
|
|
133
|
+
# Return wrapped in envelope
|
|
134
|
+
return {"summary": agent_summary}
|
|
135
|
+
|
|
136
|
+
@staticmethod
|
|
137
|
+
def _validate_schema(agent_summary: Dict[str, Any]) -> List[str]:
|
|
138
|
+
"""Validate agent_summary against expected schema.
|
|
139
|
+
|
|
140
|
+
Args:
|
|
141
|
+
agent_summary: The summary dict to validate
|
|
142
|
+
|
|
143
|
+
Returns:
|
|
144
|
+
List of error messages (empty if valid)
|
|
145
|
+
"""
|
|
146
|
+
errors = []
|
|
147
|
+
|
|
148
|
+
# Check required fields
|
|
149
|
+
for field in ClientSummaryBuilder.REQUIRED_FIELDS:
|
|
150
|
+
if field not in agent_summary:
|
|
151
|
+
errors.append(f"Missing required field: {field}")
|
|
152
|
+
|
|
153
|
+
# Validate field types
|
|
154
|
+
if "agent_id" in agent_summary and not isinstance(agent_summary["agent_id"], str):
|
|
155
|
+
errors.append(f"agent_id must be string, got {type(agent_summary['agent_id'])}")
|
|
156
|
+
|
|
157
|
+
if "timestamp" in agent_summary:
|
|
158
|
+
ts = agent_summary["timestamp"]
|
|
159
|
+
if not isinstance(ts, str):
|
|
160
|
+
errors.append(f"timestamp must be string, got {type(ts)}")
|
|
161
|
+
# Validate ISO-8601 format
|
|
162
|
+
elif not _is_iso8601(ts):
|
|
163
|
+
errors.append(f"timestamp not in ISO-8601 format: {ts}")
|
|
164
|
+
|
|
165
|
+
if "topic_counts" in agent_summary:
|
|
166
|
+
tc = agent_summary["topic_counts"]
|
|
167
|
+
if not isinstance(tc, dict):
|
|
168
|
+
errors.append(f"topic_counts must be dict, got {type(tc)}")
|
|
169
|
+
else:
|
|
170
|
+
for k, v in tc.items():
|
|
171
|
+
if not isinstance(k, str):
|
|
172
|
+
errors.append(f"topic_counts key must be string, got {type(k)}")
|
|
173
|
+
if not isinstance(v, int):
|
|
174
|
+
errors.append(f"topic_counts value must be int, got {type(v)}")
|
|
175
|
+
|
|
176
|
+
if "fact_hashes" in agent_summary:
|
|
177
|
+
fh = agent_summary["fact_hashes"]
|
|
178
|
+
if not isinstance(fh, list):
|
|
179
|
+
errors.append(f"fact_hashes must be list, got {type(fh)}")
|
|
180
|
+
else:
|
|
181
|
+
for h in fh:
|
|
182
|
+
if not isinstance(h, str):
|
|
183
|
+
errors.append(f"fact_hashes element must be string, got {type(h)}")
|
|
184
|
+
|
|
185
|
+
return errors
|
|
186
|
+
|
|
187
|
+
|
|
188
|
+
def _is_iso8601(timestamp: str) -> bool:
|
|
189
|
+
"""Check if timestamp is in ISO-8601 format."""
|
|
190
|
+
try:
|
|
191
|
+
# Try parsing with common ISO-8601 formats
|
|
192
|
+
if timestamp.endswith("Z"):
|
|
193
|
+
datetime.fromisoformat(timestamp.replace("Z", "+00:00"))
|
|
194
|
+
else:
|
|
195
|
+
datetime.fromisoformat(timestamp)
|
|
196
|
+
return True
|
|
197
|
+
except (ValueError, TypeError):
|
|
198
|
+
return False
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|