agmem 0.2.0__py3-none-any.whl → 0.2.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,187 @@
1
+ """
2
+ Privacy field validation and auditing.
3
+
4
+ Ensures differential privacy noise is only applied to fact data, not metadata.
5
+ Prevents accidental privacy overhead on metadata fields and provides audit trail.
6
+
7
+ Provides:
8
+ - @privacy_exempt: Decorator to mark metadata fields as privacy-exempt
9
+ - PrivacyFieldValidator: Runtime validation that noise is applied correctly
10
+ - PrivacyAuditReport: Audit trail of which fields received noise
11
+ """
12
+
13
+ from typing import Any, Callable, Dict, List, Optional, Set
14
+ from functools import wraps
15
+ from dataclasses import dataclass, field
16
+ from datetime import datetime, timezone
17
+
18
+
19
+ @dataclass
20
+ class PrivacyAuditReport:
21
+ """Audit report of privacy noise application."""
22
+
23
+ timestamp: str
24
+ noised_fields: Dict[str, Any] = field(default_factory=dict)
25
+ exempt_fields: Dict[str, Any] = field(default_factory=dict)
26
+ validation_errors: List[str] = field(default_factory=list)
27
+
28
+ def to_dict(self) -> Dict[str, Any]:
29
+ """Convert to dict for logging/serialization."""
30
+ return {
31
+ "timestamp": self.timestamp,
32
+ "noised_fields": self.noised_fields,
33
+ "exempt_fields": self.exempt_fields,
34
+ "validation_errors": self.validation_errors,
35
+ "summary": {
36
+ "total_noised": len(self.noised_fields),
37
+ "total_exempt": len(self.exempt_fields),
38
+ "validation_passed": len(self.validation_errors) == 0,
39
+ },
40
+ }
41
+
42
+
43
+ class PrivacyFieldValidator:
44
+ """Validates that privacy noise is applied correctly.
45
+
46
+ Tracks which fields receive noise vs. are exempt from noise.
47
+ Fails loudly if noise is applied to exempt fields.
48
+ """
49
+
50
+ # Metadata fields that should NEVER receive noise (they don't reveal facts)
51
+ EXEMPT_FIELDS = {
52
+ "clusters_found", # Metadata: count of clusters, not individual facts
53
+ "insights_generated", # Metadata: count of insights generated
54
+ "episodes_archived", # Metadata: count of archived episodes
55
+ "confidence_score", # Metadata: overall quality metric, not a fact
56
+ "summary_version", # Metadata: schema version
57
+ "created_at", # Metadata: timestamp
58
+ "updated_at", # Metadata: timestamp
59
+ "agent_version", # Metadata: software version
60
+ }
61
+
62
+ # Fact-related fields that SHOULD receive noise
63
+ FACT_FIELDS = {
64
+ "facts", # List of actual facts
65
+ "memories", # Memory content
66
+ "semantic_content", # Semantic memory content
67
+ "episodic_content", # Episodic memory content
68
+ "procedural_content", # Procedural memory content
69
+ "embeddings", # Vector representations of facts
70
+ "fact_count", # Count of individual facts (not metadata)
71
+ "memory_count", # Count of individual memories
72
+ }
73
+
74
+ def __init__(self):
75
+ self.audit_report = PrivacyAuditReport(timestamp=datetime.now(timezone.utc).isoformat())
76
+
77
+ def validate_noised_field(
78
+ self, field_name: str, field_value: Any, is_noised: bool = True
79
+ ) -> None:
80
+ """Validate that noise application is correct for a field.
81
+
82
+ Args:
83
+ field_name: Name of the field
84
+ field_value: Value of the field
85
+ is_noised: Whether noise was applied to this field
86
+
87
+ Raises:
88
+ RuntimeError: If noise is applied to exempt field
89
+ """
90
+ if is_noised and field_name in self.EXEMPT_FIELDS:
91
+ error = (
92
+ f"ERROR: Noise applied to exempt metadata field '{field_name}'. "
93
+ f"Metadata fields do not reveal individual facts and should not receive noise. "
94
+ f"Remove noise from: {field_name}"
95
+ )
96
+ self.audit_report.validation_errors.append(error)
97
+ raise RuntimeError(error)
98
+
99
+ if is_noised:
100
+ self.audit_report.noised_fields[field_name] = field_value
101
+ else:
102
+ self.audit_report.exempt_fields[field_name] = field_value
103
+
104
+ def validate_result_dict(self, result: Dict[str, Any]) -> None:
105
+ """Validate a result dict (e.g., DistillerResult or GardenerResult).
106
+
107
+ Args:
108
+ result: The result dict to validate
109
+
110
+ Raises:
111
+ RuntimeError: If privacy validation fails
112
+ """
113
+ for field_name in self.EXEMPT_FIELDS:
114
+ if field_name in result:
115
+ # These fields should not have been noised
116
+ self.audit_report.exempt_fields[field_name] = result[field_name]
117
+
118
+ def get_report(self) -> PrivacyAuditReport:
119
+ """Get the audit report."""
120
+ if self.audit_report.validation_errors:
121
+ print(
122
+ "Privacy Validation Report:\n"
123
+ + "\n".join(f" {e}" for e in self.audit_report.validation_errors)
124
+ )
125
+ return self.audit_report
126
+
127
+
128
+ def privacy_exempt(func: Callable) -> Callable:
129
+ """Decorator to mark a function as privacy-exempt.
130
+
131
+ The decorated function should not apply DP noise to its result.
132
+ Used to document which functions are exempt from privacy operations.
133
+
134
+ Example:
135
+ @privacy_exempt
136
+ def get_metadata() -> Dict[str, Any]:
137
+ return {"clusters_found": 42, "created_at": "2024-01-01T00:00:00Z"}
138
+ """
139
+
140
+ @wraps(func)
141
+ def wrapper(*args, **kwargs):
142
+ result = func(*args, **kwargs)
143
+ # Mark result as privacy-exempt (store in metadata if possible)
144
+ if isinstance(result, dict):
145
+ result["_privacy_exempt"] = True
146
+ return result
147
+
148
+ # Mark the wrapper function to indicate it's privacy-exempt
149
+ setattr(wrapper, "_privacy_exempt_function", True)
150
+ return wrapper
151
+
152
+
153
+ class PrivacyGuard:
154
+ """Context manager and decorator for privacy-aware code blocks.
155
+
156
+ Usage:
157
+ with PrivacyGuard() as pg:
158
+ result = process_facts(data)
159
+ pg.mark_noised("fact_count")
160
+ """
161
+
162
+ def __init__(self, strict: bool = True):
163
+ self.strict = strict
164
+ self.validator = PrivacyFieldValidator()
165
+
166
+ def __enter__(self):
167
+ return self
168
+
169
+ def __exit__(self, exc_type, exc_val, exc_tb):
170
+ if exc_type is not None:
171
+ return False
172
+ return True
173
+
174
+ def mark_noised(self, field_name: str, value: Any = None) -> None:
175
+ """Mark a field as having received DP noise."""
176
+ if self.strict:
177
+ self.validator.validate_noised_field(field_name, value, is_noised=True)
178
+ else:
179
+ self.validator.audit_report.noised_fields[field_name] = value
180
+
181
+ def mark_exempt(self, field_name: str, value: Any = None) -> None:
182
+ """Mark a field as exempt from DP noise."""
183
+ self.validator.audit_report.exempt_fields[field_name] = value
184
+
185
+ def get_report(self) -> PrivacyAuditReport:
186
+ """Get the privacy audit report."""
187
+ return self.validator.get_report()
@@ -0,0 +1,198 @@
1
+ """
2
+ Protocol Builder for federated agent summaries.
3
+
4
+ Ensures client-side summaries conform to the server's PushRequest schema
5
+ before transmission, preventing 422 Validation Errors and protocol mismatches.
6
+
7
+ Provides:
8
+ - ClientSummaryBuilder: Constructs AgentSummary from raw produce_local_summary output
9
+ - SchemaValidationError: Raised when summary doesn't match server schema
10
+ - Deterministic agent_id generation from repository content
11
+ """
12
+
13
+ import hashlib
14
+ import json
15
+ from datetime import datetime, timezone
16
+ from pathlib import Path
17
+ from typing import Any, Dict, List, Optional
18
+
19
+
20
+ class SchemaValidationError(Exception):
21
+ """Raised when client summary doesn't match server schema."""
22
+
23
+ pass
24
+
25
+
26
+ class ClientSummaryBuilder:
27
+ """Build protocol-compliant AgentSummary from raw produce_local_summary output.
28
+
29
+ Handles:
30
+ - Key name mapping (topics -> topic_counts)
31
+ - Fact count to fact_hashes conversion (int -> list of hash strings)
32
+ - Auto-generation of agent_id from repo hash (deterministic, replayable)
33
+ - ISO-8601 timestamp addition
34
+ - Schema validation against server expectations
35
+ - Wrapping in {"summary": {...}} envelope
36
+ """
37
+
38
+ REQUIRED_FIELDS = {"agent_id", "timestamp", "topic_counts", "fact_hashes"}
39
+
40
+ @staticmethod
41
+ def generate_agent_id(repo_root: Path) -> str:
42
+ """Generate deterministic agent_id from repository content.
43
+
44
+ Uses SHA-256 hash of repo root path to ensure consistency across runs
45
+ while remaining unique per repository. This is deterministic (same repo
46
+ always gets same agent_id) and replayable.
47
+
48
+ Args:
49
+ repo_root: Path to the repository root
50
+
51
+ Returns:
52
+ Unique agent identifier in format: "agent-<first-16-chars-of-hash>"
53
+ """
54
+ repo_hash = hashlib.sha256(str(repo_root.resolve()).encode()).hexdigest()[:16]
55
+ return f"agent-{repo_hash}"
56
+
57
+ @staticmethod
58
+ def build(
59
+ repo_root: Path,
60
+ raw_summary: Dict[str, Any],
61
+ strict_mode: bool = False,
62
+ ) -> Dict[str, Any]:
63
+ """Build protocol-compliant summary from raw produce_local_summary output.
64
+
65
+ Transforms the client's produce_local_summary() output into the format
66
+ expected by the server's PushRequest model.
67
+
68
+ Args:
69
+ repo_root: Path to repository root (used for agent_id generation)
70
+ raw_summary: Output from produce_local_summary()
71
+ strict_mode: If True, raise on validation error; if False, warn and repair
72
+
73
+ Returns:
74
+ Dict with structure: {"summary": {"agent_id": "...", "timestamp": "...",
75
+ "topic_counts": {...}, "fact_hashes": [...]}}
76
+
77
+ Raises:
78
+ SchemaValidationError: If strict_mode=True and schema validation fails
79
+ """
80
+ # In strict mode, validate raw input has required fields BEFORE transformation
81
+ if strict_mode:
82
+ required_raw_fields = {"memory_types", "topics", "topic_hashes", "fact_count"}
83
+ missing = required_raw_fields - set(raw_summary.keys())
84
+ if missing:
85
+ raise SchemaValidationError(
86
+ f"Raw summary missing required fields: {', '.join(sorted(missing))}"
87
+ )
88
+
89
+ # Generate required fields
90
+ agent_id = ClientSummaryBuilder.generate_agent_id(repo_root)
91
+ timestamp = datetime.now(timezone.utc).isoformat()
92
+
93
+ # Transform key names and structure
94
+ topic_counts = raw_summary.get("topics", {})
95
+ if not isinstance(topic_counts, dict):
96
+ topic_counts = {}
97
+
98
+ # Convert fact_count (int) to fact_hashes (list of strings)
99
+ # If topic_hashes is present, use it; otherwise generate from fact_count
100
+ fact_hashes: List[str] = []
101
+ if "topic_hashes" in raw_summary and isinstance(raw_summary["topic_hashes"], dict):
102
+ # Flatten all topic hashes into a single list
103
+ for topic_hash_list in raw_summary["topic_hashes"].values():
104
+ if isinstance(topic_hash_list, list):
105
+ fact_hashes.extend(topic_hash_list)
106
+
107
+ # If fact_hashes is still empty but we have fact_count, generate placeholder hashes
108
+ if not fact_hashes and "fact_count" in raw_summary:
109
+ fact_count = raw_summary["fact_count"]
110
+ if isinstance(fact_count, int):
111
+ # Generate placeholder hashes (in real scenario, client would preserve actual hashes)
112
+ fact_hashes = [
113
+ hashlib.sha256(f"fact-{i}".encode()).hexdigest() for i in range(fact_count)
114
+ ]
115
+
116
+ # Build AgentSummary structure
117
+ agent_summary = {
118
+ "agent_id": agent_id,
119
+ "timestamp": timestamp,
120
+ "topic_counts": topic_counts,
121
+ "fact_hashes": fact_hashes,
122
+ }
123
+
124
+ # Validate schema
125
+ errors = ClientSummaryBuilder._validate_schema(agent_summary)
126
+ if errors:
127
+ error_msg = f"Schema validation failed:\n" + "\n".join(f" - {e}" for e in errors)
128
+ if strict_mode:
129
+ raise SchemaValidationError(error_msg)
130
+ else:
131
+ print(f"Warning: {error_msg}")
132
+
133
+ # Return wrapped in envelope
134
+ return {"summary": agent_summary}
135
+
136
+ @staticmethod
137
+ def _validate_schema(agent_summary: Dict[str, Any]) -> List[str]:
138
+ """Validate agent_summary against expected schema.
139
+
140
+ Args:
141
+ agent_summary: The summary dict to validate
142
+
143
+ Returns:
144
+ List of error messages (empty if valid)
145
+ """
146
+ errors = []
147
+
148
+ # Check required fields
149
+ for field in ClientSummaryBuilder.REQUIRED_FIELDS:
150
+ if field not in agent_summary:
151
+ errors.append(f"Missing required field: {field}")
152
+
153
+ # Validate field types
154
+ if "agent_id" in agent_summary and not isinstance(agent_summary["agent_id"], str):
155
+ errors.append(f"agent_id must be string, got {type(agent_summary['agent_id'])}")
156
+
157
+ if "timestamp" in agent_summary:
158
+ ts = agent_summary["timestamp"]
159
+ if not isinstance(ts, str):
160
+ errors.append(f"timestamp must be string, got {type(ts)}")
161
+ # Validate ISO-8601 format
162
+ elif not _is_iso8601(ts):
163
+ errors.append(f"timestamp not in ISO-8601 format: {ts}")
164
+
165
+ if "topic_counts" in agent_summary:
166
+ tc = agent_summary["topic_counts"]
167
+ if not isinstance(tc, dict):
168
+ errors.append(f"topic_counts must be dict, got {type(tc)}")
169
+ else:
170
+ for k, v in tc.items():
171
+ if not isinstance(k, str):
172
+ errors.append(f"topic_counts key must be string, got {type(k)}")
173
+ if not isinstance(v, int):
174
+ errors.append(f"topic_counts value must be int, got {type(v)}")
175
+
176
+ if "fact_hashes" in agent_summary:
177
+ fh = agent_summary["fact_hashes"]
178
+ if not isinstance(fh, list):
179
+ errors.append(f"fact_hashes must be list, got {type(fh)}")
180
+ else:
181
+ for h in fh:
182
+ if not isinstance(h, str):
183
+ errors.append(f"fact_hashes element must be string, got {type(h)}")
184
+
185
+ return errors
186
+
187
+
188
+ def _is_iso8601(timestamp: str) -> bool:
189
+ """Check if timestamp is in ISO-8601 format."""
190
+ try:
191
+ # Try parsing with common ISO-8601 formats
192
+ if timestamp.endswith("Z"):
193
+ datetime.fromisoformat(timestamp.replace("Z", "+00:00"))
194
+ else:
195
+ datetime.fromisoformat(timestamp)
196
+ return True
197
+ except (ValueError, TypeError):
198
+ return False
File without changes