agmem 0.2.0__py3-none-any.whl → 0.3.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {agmem-0.2.0.dist-info → agmem-0.3.0.dist-info}/METADATA +338 -26
- {agmem-0.2.0.dist-info → agmem-0.3.0.dist-info}/RECORD +32 -16
- memvcs/__init__.py +1 -1
- memvcs/cli.py +1 -1
- memvcs/coordinator/server.py +18 -2
- memvcs/core/agents.py +411 -0
- memvcs/core/archaeology.py +410 -0
- memvcs/core/collaboration.py +435 -0
- memvcs/core/compliance.py +427 -0
- memvcs/core/compression_metrics.py +248 -0
- memvcs/core/confidence.py +379 -0
- memvcs/core/daemon.py +735 -0
- memvcs/core/delta.py +45 -23
- memvcs/core/distiller.py +3 -12
- memvcs/core/fast_similarity.py +404 -0
- memvcs/core/federated.py +13 -2
- memvcs/core/gardener.py +8 -68
- memvcs/core/pack.py +1 -1
- memvcs/core/privacy_validator.py +187 -0
- memvcs/core/private_search.py +327 -0
- memvcs/core/protocol_builder.py +198 -0
- memvcs/core/search_index.py +538 -0
- memvcs/core/semantic_graph.py +388 -0
- memvcs/core/session.py +520 -0
- memvcs/core/timetravel.py +430 -0
- memvcs/integrations/mcp_server.py +775 -4
- memvcs/integrations/web_ui/server.py +424 -0
- memvcs/integrations/web_ui/websocket.py +223 -0
- {agmem-0.2.0.dist-info → agmem-0.3.0.dist-info}/WHEEL +0 -0
- {agmem-0.2.0.dist-info → agmem-0.3.0.dist-info}/entry_points.txt +0 -0
- {agmem-0.2.0.dist-info → agmem-0.3.0.dist-info}/licenses/LICENSE +0 -0
- {agmem-0.2.0.dist-info → agmem-0.3.0.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,187 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Privacy field validation and auditing.
|
|
3
|
+
|
|
4
|
+
Ensures differential privacy noise is only applied to fact data, not metadata.
|
|
5
|
+
Prevents accidental privacy overhead on metadata fields and provides audit trail.
|
|
6
|
+
|
|
7
|
+
Provides:
|
|
8
|
+
- @privacy_exempt: Decorator to mark metadata fields as privacy-exempt
|
|
9
|
+
- PrivacyFieldValidator: Runtime validation that noise is applied correctly
|
|
10
|
+
- PrivacyAuditReport: Audit trail of which fields received noise
|
|
11
|
+
"""
|
|
12
|
+
|
|
13
|
+
from typing import Any, Callable, Dict, List, Optional, Set
|
|
14
|
+
from functools import wraps
|
|
15
|
+
from dataclasses import dataclass, field
|
|
16
|
+
from datetime import datetime, timezone
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
@dataclass
|
|
20
|
+
class PrivacyAuditReport:
|
|
21
|
+
"""Audit report of privacy noise application."""
|
|
22
|
+
|
|
23
|
+
timestamp: str
|
|
24
|
+
noised_fields: Dict[str, Any] = field(default_factory=dict)
|
|
25
|
+
exempt_fields: Dict[str, Any] = field(default_factory=dict)
|
|
26
|
+
validation_errors: List[str] = field(default_factory=list)
|
|
27
|
+
|
|
28
|
+
def to_dict(self) -> Dict[str, Any]:
|
|
29
|
+
"""Convert to dict for logging/serialization."""
|
|
30
|
+
return {
|
|
31
|
+
"timestamp": self.timestamp,
|
|
32
|
+
"noised_fields": self.noised_fields,
|
|
33
|
+
"exempt_fields": self.exempt_fields,
|
|
34
|
+
"validation_errors": self.validation_errors,
|
|
35
|
+
"summary": {
|
|
36
|
+
"total_noised": len(self.noised_fields),
|
|
37
|
+
"total_exempt": len(self.exempt_fields),
|
|
38
|
+
"validation_passed": len(self.validation_errors) == 0,
|
|
39
|
+
},
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
class PrivacyFieldValidator:
|
|
44
|
+
"""Validates that privacy noise is applied correctly.
|
|
45
|
+
|
|
46
|
+
Tracks which fields receive noise vs. are exempt from noise.
|
|
47
|
+
Fails loudly if noise is applied to exempt fields.
|
|
48
|
+
"""
|
|
49
|
+
|
|
50
|
+
# Metadata fields that should NEVER receive noise (they don't reveal facts)
|
|
51
|
+
EXEMPT_FIELDS = {
|
|
52
|
+
"clusters_found", # Metadata: count of clusters, not individual facts
|
|
53
|
+
"insights_generated", # Metadata: count of insights generated
|
|
54
|
+
"episodes_archived", # Metadata: count of archived episodes
|
|
55
|
+
"confidence_score", # Metadata: overall quality metric, not a fact
|
|
56
|
+
"summary_version", # Metadata: schema version
|
|
57
|
+
"created_at", # Metadata: timestamp
|
|
58
|
+
"updated_at", # Metadata: timestamp
|
|
59
|
+
"agent_version", # Metadata: software version
|
|
60
|
+
}
|
|
61
|
+
|
|
62
|
+
# Fact-related fields that SHOULD receive noise
|
|
63
|
+
FACT_FIELDS = {
|
|
64
|
+
"facts", # List of actual facts
|
|
65
|
+
"memories", # Memory content
|
|
66
|
+
"semantic_content", # Semantic memory content
|
|
67
|
+
"episodic_content", # Episodic memory content
|
|
68
|
+
"procedural_content", # Procedural memory content
|
|
69
|
+
"embeddings", # Vector representations of facts
|
|
70
|
+
"fact_count", # Count of individual facts (not metadata)
|
|
71
|
+
"memory_count", # Count of individual memories
|
|
72
|
+
}
|
|
73
|
+
|
|
74
|
+
def __init__(self):
|
|
75
|
+
self.audit_report = PrivacyAuditReport(timestamp=datetime.now(timezone.utc).isoformat())
|
|
76
|
+
|
|
77
|
+
def validate_noised_field(
|
|
78
|
+
self, field_name: str, field_value: Any, is_noised: bool = True
|
|
79
|
+
) -> None:
|
|
80
|
+
"""Validate that noise application is correct for a field.
|
|
81
|
+
|
|
82
|
+
Args:
|
|
83
|
+
field_name: Name of the field
|
|
84
|
+
field_value: Value of the field
|
|
85
|
+
is_noised: Whether noise was applied to this field
|
|
86
|
+
|
|
87
|
+
Raises:
|
|
88
|
+
RuntimeError: If noise is applied to exempt field
|
|
89
|
+
"""
|
|
90
|
+
if is_noised and field_name in self.EXEMPT_FIELDS:
|
|
91
|
+
error = (
|
|
92
|
+
f"ERROR: Noise applied to exempt metadata field '{field_name}'. "
|
|
93
|
+
f"Metadata fields do not reveal individual facts and should not receive noise. "
|
|
94
|
+
f"Remove noise from: {field_name}"
|
|
95
|
+
)
|
|
96
|
+
self.audit_report.validation_errors.append(error)
|
|
97
|
+
raise RuntimeError(error)
|
|
98
|
+
|
|
99
|
+
if is_noised:
|
|
100
|
+
self.audit_report.noised_fields[field_name] = field_value
|
|
101
|
+
else:
|
|
102
|
+
self.audit_report.exempt_fields[field_name] = field_value
|
|
103
|
+
|
|
104
|
+
def validate_result_dict(self, result: Dict[str, Any]) -> None:
|
|
105
|
+
"""Validate a result dict (e.g., DistillerResult or GardenerResult).
|
|
106
|
+
|
|
107
|
+
Args:
|
|
108
|
+
result: The result dict to validate
|
|
109
|
+
|
|
110
|
+
Raises:
|
|
111
|
+
RuntimeError: If privacy validation fails
|
|
112
|
+
"""
|
|
113
|
+
for field_name in self.EXEMPT_FIELDS:
|
|
114
|
+
if field_name in result:
|
|
115
|
+
# These fields should not have been noised
|
|
116
|
+
self.audit_report.exempt_fields[field_name] = result[field_name]
|
|
117
|
+
|
|
118
|
+
def get_report(self) -> PrivacyAuditReport:
|
|
119
|
+
"""Get the audit report."""
|
|
120
|
+
if self.audit_report.validation_errors:
|
|
121
|
+
print(
|
|
122
|
+
"Privacy Validation Report:\n"
|
|
123
|
+
+ "\n".join(f" {e}" for e in self.audit_report.validation_errors)
|
|
124
|
+
)
|
|
125
|
+
return self.audit_report
|
|
126
|
+
|
|
127
|
+
|
|
128
|
+
def privacy_exempt(func: Callable) -> Callable:
|
|
129
|
+
"""Decorator to mark a function as privacy-exempt.
|
|
130
|
+
|
|
131
|
+
The decorated function should not apply DP noise to its result.
|
|
132
|
+
Used to document which functions are exempt from privacy operations.
|
|
133
|
+
|
|
134
|
+
Example:
|
|
135
|
+
@privacy_exempt
|
|
136
|
+
def get_metadata() -> Dict[str, Any]:
|
|
137
|
+
return {"clusters_found": 42, "created_at": "2024-01-01T00:00:00Z"}
|
|
138
|
+
"""
|
|
139
|
+
|
|
140
|
+
@wraps(func)
|
|
141
|
+
def wrapper(*args, **kwargs):
|
|
142
|
+
result = func(*args, **kwargs)
|
|
143
|
+
# Mark result as privacy-exempt (store in metadata if possible)
|
|
144
|
+
if isinstance(result, dict):
|
|
145
|
+
result["_privacy_exempt"] = True
|
|
146
|
+
return result
|
|
147
|
+
|
|
148
|
+
# Mark the wrapper function to indicate it's privacy-exempt
|
|
149
|
+
setattr(wrapper, "_privacy_exempt_function", True)
|
|
150
|
+
return wrapper
|
|
151
|
+
|
|
152
|
+
|
|
153
|
+
class PrivacyGuard:
|
|
154
|
+
"""Context manager and decorator for privacy-aware code blocks.
|
|
155
|
+
|
|
156
|
+
Usage:
|
|
157
|
+
with PrivacyGuard() as pg:
|
|
158
|
+
result = process_facts(data)
|
|
159
|
+
pg.mark_noised("fact_count")
|
|
160
|
+
"""
|
|
161
|
+
|
|
162
|
+
def __init__(self, strict: bool = True):
|
|
163
|
+
self.strict = strict
|
|
164
|
+
self.validator = PrivacyFieldValidator()
|
|
165
|
+
|
|
166
|
+
def __enter__(self):
|
|
167
|
+
return self
|
|
168
|
+
|
|
169
|
+
def __exit__(self, exc_type, exc_val, exc_tb):
|
|
170
|
+
if exc_type is not None:
|
|
171
|
+
return False
|
|
172
|
+
return True
|
|
173
|
+
|
|
174
|
+
def mark_noised(self, field_name: str, value: Any = None) -> None:
|
|
175
|
+
"""Mark a field as having received DP noise."""
|
|
176
|
+
if self.strict:
|
|
177
|
+
self.validator.validate_noised_field(field_name, value, is_noised=True)
|
|
178
|
+
else:
|
|
179
|
+
self.validator.audit_report.noised_fields[field_name] = value
|
|
180
|
+
|
|
181
|
+
def mark_exempt(self, field_name: str, value: Any = None) -> None:
|
|
182
|
+
"""Mark a field as exempt from DP noise."""
|
|
183
|
+
self.validator.audit_report.exempt_fields[field_name] = value
|
|
184
|
+
|
|
185
|
+
def get_report(self) -> PrivacyAuditReport:
|
|
186
|
+
"""Get the privacy audit report."""
|
|
187
|
+
return self.validator.get_report()
|
|
@@ -0,0 +1,327 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Privacy-Preserving Search - Secure search with encryption and differential privacy.
|
|
3
|
+
|
|
4
|
+
This module provides:
|
|
5
|
+
- Encrypted search indices
|
|
6
|
+
- Differential privacy for queries
|
|
7
|
+
- Access control integration
|
|
8
|
+
- Secure search result handling
|
|
9
|
+
"""
|
|
10
|
+
|
|
11
|
+
import hashlib
|
|
12
|
+
import hmac
|
|
13
|
+
import json
|
|
14
|
+
import os
|
|
15
|
+
import secrets
|
|
16
|
+
from dataclasses import dataclass, field
|
|
17
|
+
from datetime import datetime, timezone
|
|
18
|
+
from pathlib import Path
|
|
19
|
+
from typing import Any, Dict, List, Optional, Set, Tuple
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
@dataclass
|
|
23
|
+
class SearchQuery:
|
|
24
|
+
"""A search query with privacy metadata."""
|
|
25
|
+
|
|
26
|
+
query: str
|
|
27
|
+
requester_id: str
|
|
28
|
+
privacy_level: str = "normal" # "public", "normal", "sensitive", "secret"
|
|
29
|
+
max_results: int = 10
|
|
30
|
+
include_content: bool = False
|
|
31
|
+
|
|
32
|
+
def to_dict(self) -> Dict[str, Any]:
|
|
33
|
+
return {
|
|
34
|
+
"query": self.query,
|
|
35
|
+
"requester_id": self.requester_id,
|
|
36
|
+
"privacy_level": self.privacy_level,
|
|
37
|
+
"max_results": self.max_results,
|
|
38
|
+
"include_content": self.include_content,
|
|
39
|
+
}
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
@dataclass
|
|
43
|
+
class SecureSearchResult:
|
|
44
|
+
"""A search result with privacy handling."""
|
|
45
|
+
|
|
46
|
+
path: str
|
|
47
|
+
score: float
|
|
48
|
+
snippet: Optional[str] = None
|
|
49
|
+
accessed_at: Optional[str] = None
|
|
50
|
+
privacy_level: str = "normal"
|
|
51
|
+
redacted: bool = False
|
|
52
|
+
|
|
53
|
+
def to_dict(self) -> Dict[str, Any]:
|
|
54
|
+
return {
|
|
55
|
+
"path": self.path,
|
|
56
|
+
"score": self.score,
|
|
57
|
+
"snippet": self.snippet,
|
|
58
|
+
"privacy_level": self.privacy_level,
|
|
59
|
+
"redacted": self.redacted,
|
|
60
|
+
}
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
class SearchTokenizer:
|
|
64
|
+
"""Tokenizes and hashes search terms for privacy."""
|
|
65
|
+
|
|
66
|
+
def __init__(self, secret_key: Optional[bytes] = None):
|
|
67
|
+
self.secret_key = secret_key or secrets.token_bytes(32)
|
|
68
|
+
|
|
69
|
+
def tokenize(self, text: str) -> List[str]:
|
|
70
|
+
"""Tokenize text into search terms."""
|
|
71
|
+
# Simple tokenization
|
|
72
|
+
import re
|
|
73
|
+
|
|
74
|
+
words = re.findall(r"\b\w+\b", text.lower())
|
|
75
|
+
return [w for w in words if len(w) >= 3]
|
|
76
|
+
|
|
77
|
+
def hash_token(self, token: str) -> str:
|
|
78
|
+
"""Create a keyed hash of a token for blind search."""
|
|
79
|
+
return hmac.new(self.secret_key, token.encode(), hashlib.sha256).hexdigest()[:16]
|
|
80
|
+
|
|
81
|
+
def tokenize_and_hash(self, text: str) -> List[str]:
|
|
82
|
+
"""Tokenize and hash all terms."""
|
|
83
|
+
tokens = self.tokenize(text)
|
|
84
|
+
return [self.hash_token(t) for t in tokens]
|
|
85
|
+
|
|
86
|
+
|
|
87
|
+
class AccessControl:
|
|
88
|
+
"""Controls access to search results based on permissions."""
|
|
89
|
+
|
|
90
|
+
def __init__(self, mem_dir: Path):
|
|
91
|
+
self.mem_dir = Path(mem_dir)
|
|
92
|
+
self.acl_file = self.mem_dir / "search_acl.json"
|
|
93
|
+
self._acl: Dict[str, Dict[str, Any]] = {}
|
|
94
|
+
self._load()
|
|
95
|
+
|
|
96
|
+
def _load(self) -> None:
|
|
97
|
+
"""Load ACL from disk."""
|
|
98
|
+
if self.acl_file.exists():
|
|
99
|
+
try:
|
|
100
|
+
self._acl = json.loads(self.acl_file.read_text())
|
|
101
|
+
except Exception:
|
|
102
|
+
pass
|
|
103
|
+
|
|
104
|
+
def _save(self) -> None:
|
|
105
|
+
"""Save ACL to disk."""
|
|
106
|
+
self.mem_dir.mkdir(parents=True, exist_ok=True)
|
|
107
|
+
self.acl_file.write_text(json.dumps(self._acl, indent=2))
|
|
108
|
+
|
|
109
|
+
def set_file_access(
|
|
110
|
+
self,
|
|
111
|
+
path: str,
|
|
112
|
+
allowed_users: List[str],
|
|
113
|
+
privacy_level: str = "normal",
|
|
114
|
+
) -> None:
|
|
115
|
+
"""Set access control for a file."""
|
|
116
|
+
self._acl[path] = {
|
|
117
|
+
"allowed_users": allowed_users,
|
|
118
|
+
"privacy_level": privacy_level,
|
|
119
|
+
"updated_at": datetime.now(timezone.utc).isoformat(),
|
|
120
|
+
}
|
|
121
|
+
self._save()
|
|
122
|
+
|
|
123
|
+
def can_access(self, path: str, user_id: str, user_level: str = "normal") -> bool:
|
|
124
|
+
"""Check if a user can access a file."""
|
|
125
|
+
acl = self._acl.get(path)
|
|
126
|
+
if not acl:
|
|
127
|
+
return True # No ACL = public access
|
|
128
|
+
|
|
129
|
+
# Check explicit user list
|
|
130
|
+
if acl.get("allowed_users"):
|
|
131
|
+
if user_id not in acl["allowed_users"]:
|
|
132
|
+
return False
|
|
133
|
+
|
|
134
|
+
# Check privacy level
|
|
135
|
+
level_order = ["public", "normal", "sensitive", "secret"]
|
|
136
|
+
file_level = acl.get("privacy_level", "normal")
|
|
137
|
+
|
|
138
|
+
try:
|
|
139
|
+
file_idx = level_order.index(file_level)
|
|
140
|
+
user_idx = level_order.index(user_level)
|
|
141
|
+
return user_idx >= file_idx
|
|
142
|
+
except ValueError:
|
|
143
|
+
return False
|
|
144
|
+
|
|
145
|
+
def get_file_acl(self, path: str) -> Optional[Dict[str, Any]]:
|
|
146
|
+
"""Get ACL for a file."""
|
|
147
|
+
return self._acl.get(path)
|
|
148
|
+
|
|
149
|
+
|
|
150
|
+
class DifferentialPrivacyNoise:
|
|
151
|
+
"""Adds differential privacy noise to search results."""
|
|
152
|
+
|
|
153
|
+
def __init__(self, epsilon: float = 0.1):
|
|
154
|
+
self.epsilon = epsilon
|
|
155
|
+
|
|
156
|
+
def add_laplace_noise(self, value: float, sensitivity: float = 1.0) -> float:
|
|
157
|
+
"""Add Laplace noise for differential privacy."""
|
|
158
|
+
import random
|
|
159
|
+
|
|
160
|
+
scale = sensitivity / self.epsilon
|
|
161
|
+
u = random.random() - 0.5
|
|
162
|
+
noise = -scale * (1 if u >= 0 else -1) * (1 - 2 * abs(u))
|
|
163
|
+
return value + noise
|
|
164
|
+
|
|
165
|
+
def randomize_order(self, results: List[Any], threshold: float = 0.8) -> List[Any]:
|
|
166
|
+
"""Randomly reorder similar results to add privacy."""
|
|
167
|
+
import random
|
|
168
|
+
|
|
169
|
+
# Group by similar scores
|
|
170
|
+
groups: List[List[Any]] = []
|
|
171
|
+
current_group: List[Any] = []
|
|
172
|
+
prev_score = None
|
|
173
|
+
|
|
174
|
+
for r in results:
|
|
175
|
+
score = getattr(r, "score", 0) if hasattr(r, "score") else r.get("score", 0)
|
|
176
|
+
if prev_score is None or abs(score - prev_score) < threshold:
|
|
177
|
+
current_group.append(r)
|
|
178
|
+
else:
|
|
179
|
+
if current_group:
|
|
180
|
+
groups.append(current_group)
|
|
181
|
+
current_group = [r]
|
|
182
|
+
prev_score = score
|
|
183
|
+
|
|
184
|
+
if current_group:
|
|
185
|
+
groups.append(current_group)
|
|
186
|
+
|
|
187
|
+
# Shuffle within groups
|
|
188
|
+
reordered = []
|
|
189
|
+
for group in groups:
|
|
190
|
+
random.shuffle(group)
|
|
191
|
+
reordered.extend(group)
|
|
192
|
+
|
|
193
|
+
return reordered
|
|
194
|
+
|
|
195
|
+
def truncate_snippets(self, snippet: str, max_len: int = 100) -> str:
|
|
196
|
+
"""Truncate snippets to limit information leakage."""
|
|
197
|
+
if len(snippet) <= max_len:
|
|
198
|
+
return snippet
|
|
199
|
+
|
|
200
|
+
# Find a good break point
|
|
201
|
+
break_point = snippet.rfind(" ", max_len - 20, max_len)
|
|
202
|
+
if break_point == -1:
|
|
203
|
+
break_point = max_len
|
|
204
|
+
|
|
205
|
+
return snippet[:break_point] + "..."
|
|
206
|
+
|
|
207
|
+
|
|
208
|
+
class PrivateSearchEngine:
|
|
209
|
+
"""Privacy-preserving search engine."""
|
|
210
|
+
|
|
211
|
+
def __init__(self, mem_dir: Path, current_dir: Path):
|
|
212
|
+
self.mem_dir = Path(mem_dir)
|
|
213
|
+
self.current_dir = Path(current_dir)
|
|
214
|
+
self.tokenizer = SearchTokenizer()
|
|
215
|
+
self.access_control = AccessControl(mem_dir)
|
|
216
|
+
self.dp_noise = DifferentialPrivacyNoise(epsilon=0.1)
|
|
217
|
+
self.query_log: List[Dict[str, Any]] = []
|
|
218
|
+
|
|
219
|
+
def search(self, query: SearchQuery) -> List[SecureSearchResult]:
|
|
220
|
+
"""Perform a privacy-preserving search."""
|
|
221
|
+
results = []
|
|
222
|
+
|
|
223
|
+
# Token-based search
|
|
224
|
+
query_tokens = self.tokenizer.tokenize(query.query)
|
|
225
|
+
|
|
226
|
+
# Search through files
|
|
227
|
+
for filepath in self.current_dir.rglob("*"):
|
|
228
|
+
if not filepath.is_file():
|
|
229
|
+
continue
|
|
230
|
+
|
|
231
|
+
rel_path = str(filepath.relative_to(self.current_dir))
|
|
232
|
+
|
|
233
|
+
try:
|
|
234
|
+
content = filepath.read_text(encoding="utf-8", errors="replace")
|
|
235
|
+
content_tokens = self.tokenizer.tokenize(content)
|
|
236
|
+
|
|
237
|
+
# Simple scoring
|
|
238
|
+
matches = sum(1 for t in query_tokens if t in content_tokens)
|
|
239
|
+
if matches == 0:
|
|
240
|
+
continue
|
|
241
|
+
|
|
242
|
+
score = matches / len(query_tokens)
|
|
243
|
+
|
|
244
|
+
# Check access control
|
|
245
|
+
can_access = self.access_control.can_access(
|
|
246
|
+
rel_path, query.requester_id, query.privacy_level
|
|
247
|
+
)
|
|
248
|
+
|
|
249
|
+
if not can_access:
|
|
250
|
+
# Include redacted result
|
|
251
|
+
results.append(
|
|
252
|
+
SecureSearchResult(
|
|
253
|
+
path=rel_path,
|
|
254
|
+
score=score,
|
|
255
|
+
snippet=None,
|
|
256
|
+
privacy_level=query.privacy_level,
|
|
257
|
+
redacted=True,
|
|
258
|
+
)
|
|
259
|
+
)
|
|
260
|
+
else:
|
|
261
|
+
# Include full result
|
|
262
|
+
snippet = None
|
|
263
|
+
if query.include_content:
|
|
264
|
+
# Find snippet around first match
|
|
265
|
+
query_lower = query.query.lower()
|
|
266
|
+
idx = content.lower().find(query_lower)
|
|
267
|
+
if idx >= 0:
|
|
268
|
+
start = max(0, idx - 50)
|
|
269
|
+
end = min(len(content), idx + len(query.query) + 50)
|
|
270
|
+
snippet = content[start:end]
|
|
271
|
+
snippet = self.dp_noise.truncate_snippets(snippet)
|
|
272
|
+
|
|
273
|
+
results.append(
|
|
274
|
+
SecureSearchResult(
|
|
275
|
+
path=rel_path,
|
|
276
|
+
score=score,
|
|
277
|
+
snippet=snippet,
|
|
278
|
+
privacy_level=query.privacy_level,
|
|
279
|
+
redacted=False,
|
|
280
|
+
accessed_at=datetime.now(timezone.utc).isoformat(),
|
|
281
|
+
)
|
|
282
|
+
)
|
|
283
|
+
except Exception:
|
|
284
|
+
pass
|
|
285
|
+
|
|
286
|
+
# Sort by score
|
|
287
|
+
results.sort(key=lambda r: r.score, reverse=True)
|
|
288
|
+
|
|
289
|
+
# Apply differential privacy
|
|
290
|
+
results = self.dp_noise.randomize_order(results[: query.max_results * 2])
|
|
291
|
+
|
|
292
|
+
# Log query
|
|
293
|
+
self._log_query(query, len(results))
|
|
294
|
+
|
|
295
|
+
return results[: query.max_results]
|
|
296
|
+
|
|
297
|
+
def _log_query(self, query: SearchQuery, result_count: int) -> None:
|
|
298
|
+
"""Log query for auditing (without preserving full query)."""
|
|
299
|
+
self.query_log.append(
|
|
300
|
+
{
|
|
301
|
+
"query_hash": hashlib.sha256(query.query.encode()).hexdigest()[:8],
|
|
302
|
+
"requester": query.requester_id,
|
|
303
|
+
"result_count": result_count,
|
|
304
|
+
"timestamp": datetime.now(timezone.utc).isoformat(),
|
|
305
|
+
}
|
|
306
|
+
)
|
|
307
|
+
|
|
308
|
+
def get_query_stats(self) -> Dict[str, Any]:
|
|
309
|
+
"""Get query statistics."""
|
|
310
|
+
return {
|
|
311
|
+
"total_queries": len(self.query_log),
|
|
312
|
+
"recent_queries": self.query_log[-10:],
|
|
313
|
+
}
|
|
314
|
+
|
|
315
|
+
|
|
316
|
+
# --- Dashboard Helper ---
|
|
317
|
+
|
|
318
|
+
|
|
319
|
+
def get_private_search_stats(mem_dir: Path, current_dir: Path) -> Dict[str, Any]:
|
|
320
|
+
"""Get private search statistics."""
|
|
321
|
+
engine = PrivateSearchEngine(mem_dir, current_dir)
|
|
322
|
+
access_control = AccessControl(mem_dir)
|
|
323
|
+
|
|
324
|
+
return {
|
|
325
|
+
"query_stats": engine.get_query_stats(),
|
|
326
|
+
"acl_count": len(access_control._acl),
|
|
327
|
+
}
|