agmem 0.2.1__py3-none-any.whl → 0.3.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {agmem-0.2.1.dist-info → agmem-0.3.0.dist-info}/METADATA +338 -27
- {agmem-0.2.1.dist-info → agmem-0.3.0.dist-info}/RECORD +21 -9
- memvcs/core/agents.py +411 -0
- memvcs/core/archaeology.py +410 -0
- memvcs/core/collaboration.py +435 -0
- memvcs/core/compliance.py +427 -0
- memvcs/core/confidence.py +379 -0
- memvcs/core/daemon.py +735 -0
- memvcs/core/delta.py +45 -23
- memvcs/core/private_search.py +327 -0
- memvcs/core/search_index.py +538 -0
- memvcs/core/semantic_graph.py +388 -0
- memvcs/core/session.py +520 -0
- memvcs/core/timetravel.py +430 -0
- memvcs/integrations/mcp_server.py +775 -4
- memvcs/integrations/web_ui/server.py +424 -0
- memvcs/integrations/web_ui/websocket.py +223 -0
- {agmem-0.2.1.dist-info → agmem-0.3.0.dist-info}/WHEEL +0 -0
- {agmem-0.2.1.dist-info → agmem-0.3.0.dist-info}/entry_points.txt +0 -0
- {agmem-0.2.1.dist-info → agmem-0.3.0.dist-info}/licenses/LICENSE +0 -0
- {agmem-0.2.1.dist-info → agmem-0.3.0.dist-info}/top_level.txt +0 -0
memvcs/core/delta.py
CHANGED
|
@@ -9,8 +9,11 @@ This can achieve 5-10x compression improvement for highly similar content
|
|
|
9
9
|
"""
|
|
10
10
|
|
|
11
11
|
import hashlib
|
|
12
|
+
from collections import defaultdict
|
|
12
13
|
from typing import List, Tuple, Dict, Optional
|
|
13
14
|
|
|
15
|
+
from memvcs.core.fast_similarity import FastSimilarityMatcher
|
|
16
|
+
|
|
14
17
|
|
|
15
18
|
def levenshtein_distance(s1: bytes, s2: bytes) -> int:
|
|
16
19
|
"""
|
|
@@ -75,34 +78,53 @@ def find_similar_objects(
|
|
|
75
78
|
"""
|
|
76
79
|
candidates = {h: content for h, content in objects.items() if len(content) >= min_size}
|
|
77
80
|
|
|
78
|
-
if
|
|
81
|
+
if len(candidates) < 2:
|
|
79
82
|
return []
|
|
80
83
|
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
84
|
+
use_parallel = len(candidates) > 10
|
|
85
|
+
max_len = max(len(content) for content in candidates.values())
|
|
86
|
+
simhash_threshold = 64 if max_len < 256 else 15
|
|
87
|
+
matcher = FastSimilarityMatcher(
|
|
88
|
+
length_ratio_threshold=0.5,
|
|
89
|
+
simhash_threshold=simhash_threshold,
|
|
90
|
+
min_similarity=similarity_threshold,
|
|
91
|
+
use_parallel=use_parallel,
|
|
92
|
+
max_workers=None,
|
|
93
|
+
)
|
|
94
|
+
|
|
95
|
+
similar_pairs = matcher.find_similar_pairs(candidates)
|
|
96
|
+
if not similar_pairs:
|
|
97
|
+
return []
|
|
94
98
|
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
+
graph: Dict[str, set] = defaultdict(set)
|
|
100
|
+
for id1, id2, _score in similar_pairs:
|
|
101
|
+
graph[id1].add(id2)
|
|
102
|
+
graph[id2].add(id1)
|
|
99
103
|
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
group.sort(key=lambda h: len(candidates[h]))
|
|
103
|
-
grouped[group[0]] = group
|
|
104
|
+
groups: List[List[str]] = []
|
|
105
|
+
visited = set()
|
|
104
106
|
|
|
105
|
-
|
|
107
|
+
for node in graph:
|
|
108
|
+
if node in visited:
|
|
109
|
+
continue
|
|
110
|
+
stack = [node]
|
|
111
|
+
component = []
|
|
112
|
+
visited.add(node)
|
|
113
|
+
|
|
114
|
+
while stack:
|
|
115
|
+
current = stack.pop()
|
|
116
|
+
component.append(current)
|
|
117
|
+
for neighbor in graph[current]:
|
|
118
|
+
if neighbor in visited:
|
|
119
|
+
continue
|
|
120
|
+
visited.add(neighbor)
|
|
121
|
+
stack.append(neighbor)
|
|
122
|
+
|
|
123
|
+
if len(component) > 1:
|
|
124
|
+
component.sort(key=lambda h: len(candidates[h]))
|
|
125
|
+
groups.append(component)
|
|
126
|
+
|
|
127
|
+
return groups
|
|
106
128
|
|
|
107
129
|
|
|
108
130
|
def compute_delta(base: bytes, target: bytes) -> bytes:
|
|
@@ -0,0 +1,327 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Privacy-Preserving Search - Secure search with encryption and differential privacy.
|
|
3
|
+
|
|
4
|
+
This module provides:
|
|
5
|
+
- Encrypted search indices
|
|
6
|
+
- Differential privacy for queries
|
|
7
|
+
- Access control integration
|
|
8
|
+
- Secure search result handling
|
|
9
|
+
"""
|
|
10
|
+
|
|
11
|
+
import hashlib
|
|
12
|
+
import hmac
|
|
13
|
+
import json
|
|
14
|
+
import os
|
|
15
|
+
import secrets
|
|
16
|
+
from dataclasses import dataclass, field
|
|
17
|
+
from datetime import datetime, timezone
|
|
18
|
+
from pathlib import Path
|
|
19
|
+
from typing import Any, Dict, List, Optional, Set, Tuple
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
@dataclass
|
|
23
|
+
class SearchQuery:
|
|
24
|
+
"""A search query with privacy metadata."""
|
|
25
|
+
|
|
26
|
+
query: str
|
|
27
|
+
requester_id: str
|
|
28
|
+
privacy_level: str = "normal" # "public", "normal", "sensitive", "secret"
|
|
29
|
+
max_results: int = 10
|
|
30
|
+
include_content: bool = False
|
|
31
|
+
|
|
32
|
+
def to_dict(self) -> Dict[str, Any]:
|
|
33
|
+
return {
|
|
34
|
+
"query": self.query,
|
|
35
|
+
"requester_id": self.requester_id,
|
|
36
|
+
"privacy_level": self.privacy_level,
|
|
37
|
+
"max_results": self.max_results,
|
|
38
|
+
"include_content": self.include_content,
|
|
39
|
+
}
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
@dataclass
|
|
43
|
+
class SecureSearchResult:
|
|
44
|
+
"""A search result with privacy handling."""
|
|
45
|
+
|
|
46
|
+
path: str
|
|
47
|
+
score: float
|
|
48
|
+
snippet: Optional[str] = None
|
|
49
|
+
accessed_at: Optional[str] = None
|
|
50
|
+
privacy_level: str = "normal"
|
|
51
|
+
redacted: bool = False
|
|
52
|
+
|
|
53
|
+
def to_dict(self) -> Dict[str, Any]:
|
|
54
|
+
return {
|
|
55
|
+
"path": self.path,
|
|
56
|
+
"score": self.score,
|
|
57
|
+
"snippet": self.snippet,
|
|
58
|
+
"privacy_level": self.privacy_level,
|
|
59
|
+
"redacted": self.redacted,
|
|
60
|
+
}
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
class SearchTokenizer:
|
|
64
|
+
"""Tokenizes and hashes search terms for privacy."""
|
|
65
|
+
|
|
66
|
+
def __init__(self, secret_key: Optional[bytes] = None):
|
|
67
|
+
self.secret_key = secret_key or secrets.token_bytes(32)
|
|
68
|
+
|
|
69
|
+
def tokenize(self, text: str) -> List[str]:
|
|
70
|
+
"""Tokenize text into search terms."""
|
|
71
|
+
# Simple tokenization
|
|
72
|
+
import re
|
|
73
|
+
|
|
74
|
+
words = re.findall(r"\b\w+\b", text.lower())
|
|
75
|
+
return [w for w in words if len(w) >= 3]
|
|
76
|
+
|
|
77
|
+
def hash_token(self, token: str) -> str:
|
|
78
|
+
"""Create a keyed hash of a token for blind search."""
|
|
79
|
+
return hmac.new(self.secret_key, token.encode(), hashlib.sha256).hexdigest()[:16]
|
|
80
|
+
|
|
81
|
+
def tokenize_and_hash(self, text: str) -> List[str]:
|
|
82
|
+
"""Tokenize and hash all terms."""
|
|
83
|
+
tokens = self.tokenize(text)
|
|
84
|
+
return [self.hash_token(t) for t in tokens]
|
|
85
|
+
|
|
86
|
+
|
|
87
|
+
class AccessControl:
|
|
88
|
+
"""Controls access to search results based on permissions."""
|
|
89
|
+
|
|
90
|
+
def __init__(self, mem_dir: Path):
|
|
91
|
+
self.mem_dir = Path(mem_dir)
|
|
92
|
+
self.acl_file = self.mem_dir / "search_acl.json"
|
|
93
|
+
self._acl: Dict[str, Dict[str, Any]] = {}
|
|
94
|
+
self._load()
|
|
95
|
+
|
|
96
|
+
def _load(self) -> None:
|
|
97
|
+
"""Load ACL from disk."""
|
|
98
|
+
if self.acl_file.exists():
|
|
99
|
+
try:
|
|
100
|
+
self._acl = json.loads(self.acl_file.read_text())
|
|
101
|
+
except Exception:
|
|
102
|
+
pass
|
|
103
|
+
|
|
104
|
+
def _save(self) -> None:
|
|
105
|
+
"""Save ACL to disk."""
|
|
106
|
+
self.mem_dir.mkdir(parents=True, exist_ok=True)
|
|
107
|
+
self.acl_file.write_text(json.dumps(self._acl, indent=2))
|
|
108
|
+
|
|
109
|
+
def set_file_access(
|
|
110
|
+
self,
|
|
111
|
+
path: str,
|
|
112
|
+
allowed_users: List[str],
|
|
113
|
+
privacy_level: str = "normal",
|
|
114
|
+
) -> None:
|
|
115
|
+
"""Set access control for a file."""
|
|
116
|
+
self._acl[path] = {
|
|
117
|
+
"allowed_users": allowed_users,
|
|
118
|
+
"privacy_level": privacy_level,
|
|
119
|
+
"updated_at": datetime.now(timezone.utc).isoformat(),
|
|
120
|
+
}
|
|
121
|
+
self._save()
|
|
122
|
+
|
|
123
|
+
def can_access(self, path: str, user_id: str, user_level: str = "normal") -> bool:
|
|
124
|
+
"""Check if a user can access a file."""
|
|
125
|
+
acl = self._acl.get(path)
|
|
126
|
+
if not acl:
|
|
127
|
+
return True # No ACL = public access
|
|
128
|
+
|
|
129
|
+
# Check explicit user list
|
|
130
|
+
if acl.get("allowed_users"):
|
|
131
|
+
if user_id not in acl["allowed_users"]:
|
|
132
|
+
return False
|
|
133
|
+
|
|
134
|
+
# Check privacy level
|
|
135
|
+
level_order = ["public", "normal", "sensitive", "secret"]
|
|
136
|
+
file_level = acl.get("privacy_level", "normal")
|
|
137
|
+
|
|
138
|
+
try:
|
|
139
|
+
file_idx = level_order.index(file_level)
|
|
140
|
+
user_idx = level_order.index(user_level)
|
|
141
|
+
return user_idx >= file_idx
|
|
142
|
+
except ValueError:
|
|
143
|
+
return False
|
|
144
|
+
|
|
145
|
+
def get_file_acl(self, path: str) -> Optional[Dict[str, Any]]:
|
|
146
|
+
"""Get ACL for a file."""
|
|
147
|
+
return self._acl.get(path)
|
|
148
|
+
|
|
149
|
+
|
|
150
|
+
class DifferentialPrivacyNoise:
|
|
151
|
+
"""Adds differential privacy noise to search results."""
|
|
152
|
+
|
|
153
|
+
def __init__(self, epsilon: float = 0.1):
|
|
154
|
+
self.epsilon = epsilon
|
|
155
|
+
|
|
156
|
+
def add_laplace_noise(self, value: float, sensitivity: float = 1.0) -> float:
|
|
157
|
+
"""Add Laplace noise for differential privacy."""
|
|
158
|
+
import random
|
|
159
|
+
|
|
160
|
+
scale = sensitivity / self.epsilon
|
|
161
|
+
u = random.random() - 0.5
|
|
162
|
+
noise = -scale * (1 if u >= 0 else -1) * (1 - 2 * abs(u))
|
|
163
|
+
return value + noise
|
|
164
|
+
|
|
165
|
+
def randomize_order(self, results: List[Any], threshold: float = 0.8) -> List[Any]:
|
|
166
|
+
"""Randomly reorder similar results to add privacy."""
|
|
167
|
+
import random
|
|
168
|
+
|
|
169
|
+
# Group by similar scores
|
|
170
|
+
groups: List[List[Any]] = []
|
|
171
|
+
current_group: List[Any] = []
|
|
172
|
+
prev_score = None
|
|
173
|
+
|
|
174
|
+
for r in results:
|
|
175
|
+
score = getattr(r, "score", 0) if hasattr(r, "score") else r.get("score", 0)
|
|
176
|
+
if prev_score is None or abs(score - prev_score) < threshold:
|
|
177
|
+
current_group.append(r)
|
|
178
|
+
else:
|
|
179
|
+
if current_group:
|
|
180
|
+
groups.append(current_group)
|
|
181
|
+
current_group = [r]
|
|
182
|
+
prev_score = score
|
|
183
|
+
|
|
184
|
+
if current_group:
|
|
185
|
+
groups.append(current_group)
|
|
186
|
+
|
|
187
|
+
# Shuffle within groups
|
|
188
|
+
reordered = []
|
|
189
|
+
for group in groups:
|
|
190
|
+
random.shuffle(group)
|
|
191
|
+
reordered.extend(group)
|
|
192
|
+
|
|
193
|
+
return reordered
|
|
194
|
+
|
|
195
|
+
def truncate_snippets(self, snippet: str, max_len: int = 100) -> str:
|
|
196
|
+
"""Truncate snippets to limit information leakage."""
|
|
197
|
+
if len(snippet) <= max_len:
|
|
198
|
+
return snippet
|
|
199
|
+
|
|
200
|
+
# Find a good break point
|
|
201
|
+
break_point = snippet.rfind(" ", max_len - 20, max_len)
|
|
202
|
+
if break_point == -1:
|
|
203
|
+
break_point = max_len
|
|
204
|
+
|
|
205
|
+
return snippet[:break_point] + "..."
|
|
206
|
+
|
|
207
|
+
|
|
208
|
+
class PrivateSearchEngine:
|
|
209
|
+
"""Privacy-preserving search engine."""
|
|
210
|
+
|
|
211
|
+
def __init__(self, mem_dir: Path, current_dir: Path):
|
|
212
|
+
self.mem_dir = Path(mem_dir)
|
|
213
|
+
self.current_dir = Path(current_dir)
|
|
214
|
+
self.tokenizer = SearchTokenizer()
|
|
215
|
+
self.access_control = AccessControl(mem_dir)
|
|
216
|
+
self.dp_noise = DifferentialPrivacyNoise(epsilon=0.1)
|
|
217
|
+
self.query_log: List[Dict[str, Any]] = []
|
|
218
|
+
|
|
219
|
+
def search(self, query: SearchQuery) -> List[SecureSearchResult]:
|
|
220
|
+
"""Perform a privacy-preserving search."""
|
|
221
|
+
results = []
|
|
222
|
+
|
|
223
|
+
# Token-based search
|
|
224
|
+
query_tokens = self.tokenizer.tokenize(query.query)
|
|
225
|
+
|
|
226
|
+
# Search through files
|
|
227
|
+
for filepath in self.current_dir.rglob("*"):
|
|
228
|
+
if not filepath.is_file():
|
|
229
|
+
continue
|
|
230
|
+
|
|
231
|
+
rel_path = str(filepath.relative_to(self.current_dir))
|
|
232
|
+
|
|
233
|
+
try:
|
|
234
|
+
content = filepath.read_text(encoding="utf-8", errors="replace")
|
|
235
|
+
content_tokens = self.tokenizer.tokenize(content)
|
|
236
|
+
|
|
237
|
+
# Simple scoring
|
|
238
|
+
matches = sum(1 for t in query_tokens if t in content_tokens)
|
|
239
|
+
if matches == 0:
|
|
240
|
+
continue
|
|
241
|
+
|
|
242
|
+
score = matches / len(query_tokens)
|
|
243
|
+
|
|
244
|
+
# Check access control
|
|
245
|
+
can_access = self.access_control.can_access(
|
|
246
|
+
rel_path, query.requester_id, query.privacy_level
|
|
247
|
+
)
|
|
248
|
+
|
|
249
|
+
if not can_access:
|
|
250
|
+
# Include redacted result
|
|
251
|
+
results.append(
|
|
252
|
+
SecureSearchResult(
|
|
253
|
+
path=rel_path,
|
|
254
|
+
score=score,
|
|
255
|
+
snippet=None,
|
|
256
|
+
privacy_level=query.privacy_level,
|
|
257
|
+
redacted=True,
|
|
258
|
+
)
|
|
259
|
+
)
|
|
260
|
+
else:
|
|
261
|
+
# Include full result
|
|
262
|
+
snippet = None
|
|
263
|
+
if query.include_content:
|
|
264
|
+
# Find snippet around first match
|
|
265
|
+
query_lower = query.query.lower()
|
|
266
|
+
idx = content.lower().find(query_lower)
|
|
267
|
+
if idx >= 0:
|
|
268
|
+
start = max(0, idx - 50)
|
|
269
|
+
end = min(len(content), idx + len(query.query) + 50)
|
|
270
|
+
snippet = content[start:end]
|
|
271
|
+
snippet = self.dp_noise.truncate_snippets(snippet)
|
|
272
|
+
|
|
273
|
+
results.append(
|
|
274
|
+
SecureSearchResult(
|
|
275
|
+
path=rel_path,
|
|
276
|
+
score=score,
|
|
277
|
+
snippet=snippet,
|
|
278
|
+
privacy_level=query.privacy_level,
|
|
279
|
+
redacted=False,
|
|
280
|
+
accessed_at=datetime.now(timezone.utc).isoformat(),
|
|
281
|
+
)
|
|
282
|
+
)
|
|
283
|
+
except Exception:
|
|
284
|
+
pass
|
|
285
|
+
|
|
286
|
+
# Sort by score
|
|
287
|
+
results.sort(key=lambda r: r.score, reverse=True)
|
|
288
|
+
|
|
289
|
+
# Apply differential privacy
|
|
290
|
+
results = self.dp_noise.randomize_order(results[: query.max_results * 2])
|
|
291
|
+
|
|
292
|
+
# Log query
|
|
293
|
+
self._log_query(query, len(results))
|
|
294
|
+
|
|
295
|
+
return results[: query.max_results]
|
|
296
|
+
|
|
297
|
+
def _log_query(self, query: SearchQuery, result_count: int) -> None:
|
|
298
|
+
"""Log query for auditing (without preserving full query)."""
|
|
299
|
+
self.query_log.append(
|
|
300
|
+
{
|
|
301
|
+
"query_hash": hashlib.sha256(query.query.encode()).hexdigest()[:8],
|
|
302
|
+
"requester": query.requester_id,
|
|
303
|
+
"result_count": result_count,
|
|
304
|
+
"timestamp": datetime.now(timezone.utc).isoformat(),
|
|
305
|
+
}
|
|
306
|
+
)
|
|
307
|
+
|
|
308
|
+
def get_query_stats(self) -> Dict[str, Any]:
|
|
309
|
+
"""Get query statistics."""
|
|
310
|
+
return {
|
|
311
|
+
"total_queries": len(self.query_log),
|
|
312
|
+
"recent_queries": self.query_log[-10:],
|
|
313
|
+
}
|
|
314
|
+
|
|
315
|
+
|
|
316
|
+
# --- Dashboard Helper ---
|
|
317
|
+
|
|
318
|
+
|
|
319
|
+
def get_private_search_stats(mem_dir: Path, current_dir: Path) -> Dict[str, Any]:
|
|
320
|
+
"""Get private search statistics."""
|
|
321
|
+
engine = PrivateSearchEngine(mem_dir, current_dir)
|
|
322
|
+
access_control = AccessControl(mem_dir)
|
|
323
|
+
|
|
324
|
+
return {
|
|
325
|
+
"query_stats": engine.get_query_stats(),
|
|
326
|
+
"acl_count": len(access_control._acl),
|
|
327
|
+
}
|