agmem 0.1.2__py3-none-any.whl → 0.1.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {agmem-0.1.2.dist-info → agmem-0.1.4.dist-info}/METADATA +144 -14
- {agmem-0.1.2.dist-info → agmem-0.1.4.dist-info}/RECORD +48 -28
- memvcs/cli.py +10 -0
- memvcs/commands/add.py +6 -0
- memvcs/commands/audit.py +59 -0
- memvcs/commands/clone.py +7 -0
- memvcs/commands/daemon.py +45 -0
- memvcs/commands/distill.py +24 -0
- memvcs/commands/federated.py +59 -0
- memvcs/commands/fsck.py +31 -0
- memvcs/commands/garden.py +22 -0
- memvcs/commands/gc.py +66 -0
- memvcs/commands/merge.py +55 -1
- memvcs/commands/prove.py +66 -0
- memvcs/commands/pull.py +27 -0
- memvcs/commands/resolve.py +130 -0
- memvcs/commands/timeline.py +27 -0
- memvcs/commands/verify.py +74 -23
- memvcs/commands/when.py +27 -0
- memvcs/core/audit.py +124 -0
- memvcs/core/compression_pipeline.py +157 -0
- memvcs/core/consistency.py +9 -9
- memvcs/core/crypto_verify.py +291 -0
- memvcs/core/distiller.py +47 -29
- memvcs/core/encryption.py +169 -0
- memvcs/core/federated.py +147 -0
- memvcs/core/gardener.py +47 -29
- memvcs/core/ipfs_remote.py +200 -0
- memvcs/core/knowledge_graph.py +77 -5
- memvcs/core/llm/__init__.py +10 -0
- memvcs/core/llm/anthropic_provider.py +50 -0
- memvcs/core/llm/base.py +27 -0
- memvcs/core/llm/factory.py +30 -0
- memvcs/core/llm/openai_provider.py +36 -0
- memvcs/core/merge.py +36 -23
- memvcs/core/objects.py +39 -19
- memvcs/core/pack.py +278 -0
- memvcs/core/privacy_budget.py +63 -0
- memvcs/core/remote.py +229 -3
- memvcs/core/repository.py +82 -2
- memvcs/core/temporal_index.py +9 -0
- memvcs/core/trust.py +103 -0
- memvcs/core/vector_store.py +15 -1
- memvcs/core/zk_proofs.py +158 -0
- {agmem-0.1.2.dist-info → agmem-0.1.4.dist-info}/WHEEL +0 -0
- {agmem-0.1.2.dist-info → agmem-0.1.4.dist-info}/entry_points.txt +0 -0
- {agmem-0.1.2.dist-info → agmem-0.1.4.dist-info}/licenses/LICENSE +0 -0
- {agmem-0.1.2.dist-info → agmem-0.1.4.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,291 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Cryptographic commit verification for agmem.
|
|
3
|
+
|
|
4
|
+
Merkle tree over commit blobs, optional Ed25519 signing of Merkle root.
|
|
5
|
+
Verification on checkout, pull, and via verify/fsck.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
import hashlib
|
|
9
|
+
import hmac
|
|
10
|
+
import json
|
|
11
|
+
import os
|
|
12
|
+
from pathlib import Path
|
|
13
|
+
from typing import Optional, List, Tuple, Any, Dict
|
|
14
|
+
|
|
15
|
+
from .objects import ObjectStore, Tree, Commit, Blob
|
|
16
|
+
|
|
17
|
+
# Ed25519 via cryptography (optional)
|
|
18
|
+
try:
|
|
19
|
+
from cryptography.hazmat.primitives.asymmetric.ed25519 import (
|
|
20
|
+
Ed25519PrivateKey,
|
|
21
|
+
Ed25519PublicKey,
|
|
22
|
+
)
|
|
23
|
+
from cryptography.exceptions import InvalidSignature
|
|
24
|
+
from cryptography.hazmat.primitives import serialization
|
|
25
|
+
|
|
26
|
+
ED25519_AVAILABLE = True
|
|
27
|
+
except ImportError:
|
|
28
|
+
ED25519_AVAILABLE = False
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
def _collect_blob_hashes_from_tree(store: ObjectStore, tree_hash: str) -> List[str]:
|
|
32
|
+
"""Recursively collect all blob hashes from a tree. Returns sorted list for deterministic Merkle."""
|
|
33
|
+
tree = Tree.load(store, tree_hash)
|
|
34
|
+
if not tree:
|
|
35
|
+
return []
|
|
36
|
+
blobs: List[str] = []
|
|
37
|
+
for entry in tree.entries:
|
|
38
|
+
if entry.obj_type == "blob":
|
|
39
|
+
blobs.append(entry.hash)
|
|
40
|
+
elif entry.obj_type == "tree":
|
|
41
|
+
blobs.extend(_collect_blob_hashes_from_tree(store, entry.hash))
|
|
42
|
+
return sorted(blobs)
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
def _merkle_hash(data: bytes) -> str:
|
|
46
|
+
"""SHA-256 hash for Merkle tree nodes."""
|
|
47
|
+
return hashlib.sha256(data).hexdigest()
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
def build_merkle_tree(blob_hashes: List[str]) -> str:
|
|
51
|
+
"""
|
|
52
|
+
Build balanced binary Merkle tree from blob hashes.
|
|
53
|
+
Leaves are hashes of blob hashes (as hex strings); internal nodes hash(left_hex || right_hex).
|
|
54
|
+
Returns root hash (hex).
|
|
55
|
+
"""
|
|
56
|
+
if not blob_hashes:
|
|
57
|
+
return _merkle_hash(b"empty")
|
|
58
|
+
# Leaves: hash each blob hash string to fixed-size leaf
|
|
59
|
+
layer = [_merkle_hash(h.encode()) for h in blob_hashes]
|
|
60
|
+
while len(layer) > 1:
|
|
61
|
+
next_layer = []
|
|
62
|
+
for i in range(0, len(layer), 2):
|
|
63
|
+
left = layer[i]
|
|
64
|
+
right = layer[i + 1] if i + 1 < len(layer) else layer[i]
|
|
65
|
+
combined = (left + right).encode()
|
|
66
|
+
next_layer.append(_merkle_hash(combined))
|
|
67
|
+
layer = next_layer
|
|
68
|
+
return layer[0]
|
|
69
|
+
|
|
70
|
+
|
|
71
|
+
def build_merkle_root_for_commit(store: ObjectStore, commit_hash: str) -> Optional[str]:
|
|
72
|
+
"""Build Merkle root for a commit's tree. Returns None if commit/tree missing."""
|
|
73
|
+
commit = Commit.load(store, commit_hash)
|
|
74
|
+
if not commit:
|
|
75
|
+
return None
|
|
76
|
+
blobs = _collect_blob_hashes_from_tree(store, commit.tree)
|
|
77
|
+
return build_merkle_tree(blobs)
|
|
78
|
+
|
|
79
|
+
|
|
80
|
+
def merkle_proof(blob_hashes: List[str], target_blob_hash: str) -> Optional[List[Tuple[str, str]]]:
|
|
81
|
+
"""
|
|
82
|
+
Produce Merkle proof for a blob: list of (sibling_hash, "L"|"R") from leaf to root.
|
|
83
|
+
Returns None if target not in list.
|
|
84
|
+
"""
|
|
85
|
+
if target_blob_hash not in blob_hashes:
|
|
86
|
+
return None
|
|
87
|
+
layer = [_merkle_hash(h.encode()) for h in sorted(blob_hashes)]
|
|
88
|
+
leaf_index = sorted(blob_hashes).index(target_blob_hash)
|
|
89
|
+
proof: List[Tuple[str, str]] = []
|
|
90
|
+
idx = leaf_index
|
|
91
|
+
while len(layer) > 1:
|
|
92
|
+
next_layer = []
|
|
93
|
+
for i in range(0, len(layer), 2):
|
|
94
|
+
left = layer[i]
|
|
95
|
+
right = layer[i + 1] if i + 1 < len(layer) else layer[i]
|
|
96
|
+
combined = (left + right).encode()
|
|
97
|
+
parent = _merkle_hash(combined)
|
|
98
|
+
next_layer.append(parent)
|
|
99
|
+
# If current idx is in this pair, record sibling and advance index
|
|
100
|
+
pair_idx = i // 2
|
|
101
|
+
if idx == i:
|
|
102
|
+
proof.append((right, "R"))
|
|
103
|
+
idx = pair_idx
|
|
104
|
+
elif idx == i + 1:
|
|
105
|
+
proof.append((left, "L"))
|
|
106
|
+
idx = pair_idx
|
|
107
|
+
layer = next_layer
|
|
108
|
+
return proof if proof else []
|
|
109
|
+
|
|
110
|
+
|
|
111
|
+
def verify_merkle_proof(blob_hash: str, proof: List[Tuple[str, str]], expected_root: str) -> bool:
|
|
112
|
+
"""Verify a Merkle proof for a blob against expected root."""
|
|
113
|
+
current = _merkle_hash(blob_hash.encode())
|
|
114
|
+
for sibling, side in proof:
|
|
115
|
+
if side == "L":
|
|
116
|
+
current = _merkle_hash((sibling + current).encode())
|
|
117
|
+
else:
|
|
118
|
+
current = _merkle_hash((current + sibling).encode())
|
|
119
|
+
return current == expected_root
|
|
120
|
+
|
|
121
|
+
|
|
122
|
+
# --- Signing (Ed25519) ---
|
|
123
|
+
|
|
124
|
+
|
|
125
|
+
def _keys_dir(mem_dir: Path) -> Path:
|
|
126
|
+
return mem_dir / "keys"
|
|
127
|
+
|
|
128
|
+
|
|
129
|
+
def get_signing_key_paths(mem_dir: Path) -> Tuple[Path, Path]:
|
|
130
|
+
"""Return (private_key_path, public_key_path). Private may not exist (env-only)."""
|
|
131
|
+
kd = _keys_dir(mem_dir)
|
|
132
|
+
return (kd / "private.pem", kd / "public.pem")
|
|
133
|
+
|
|
134
|
+
|
|
135
|
+
def ensure_keys_dir(mem_dir: Path) -> Path:
|
|
136
|
+
"""Ensure .mem/keys exists; return keys dir."""
|
|
137
|
+
kd = _keys_dir(mem_dir)
|
|
138
|
+
kd.mkdir(parents=True, exist_ok=True)
|
|
139
|
+
return kd
|
|
140
|
+
|
|
141
|
+
|
|
142
|
+
def generate_keypair(mem_dir: Path) -> Tuple[bytes, bytes]:
|
|
143
|
+
"""Generate Ed25519 keypair. Returns (private_pem, public_pem). Requires cryptography."""
|
|
144
|
+
if not ED25519_AVAILABLE:
|
|
145
|
+
raise RuntimeError(
|
|
146
|
+
"Signing requires 'cryptography'; install with: pip install cryptography"
|
|
147
|
+
)
|
|
148
|
+
private_key = Ed25519PrivateKey.generate()
|
|
149
|
+
public_key = private_key.public_key()
|
|
150
|
+
private_pem = private_key.private_bytes(
|
|
151
|
+
encoding=serialization.Encoding.PEM,
|
|
152
|
+
format=serialization.PrivateFormat.PKCS8,
|
|
153
|
+
encryption_algorithm=serialization.NoEncryption(),
|
|
154
|
+
)
|
|
155
|
+
public_pem = public_key.public_bytes(
|
|
156
|
+
encoding=serialization.Encoding.PEM,
|
|
157
|
+
format=serialization.PublicFormat.SubjectPublicKeyInfo,
|
|
158
|
+
)
|
|
159
|
+
return (private_pem, public_pem)
|
|
160
|
+
|
|
161
|
+
|
|
162
|
+
def save_public_key(mem_dir: Path, public_pem: bytes) -> Path:
|
|
163
|
+
"""Save public key to .mem/keys/public.pem. Returns path."""
|
|
164
|
+
ensure_keys_dir(mem_dir)
|
|
165
|
+
path = _keys_dir(mem_dir) / "public.pem"
|
|
166
|
+
path.write_bytes(public_pem)
|
|
167
|
+
return path
|
|
168
|
+
|
|
169
|
+
|
|
170
|
+
def load_public_key(mem_dir: Path) -> Optional[bytes]:
|
|
171
|
+
"""Load public key PEM from .mem/keys/public.pem or config. Returns None if not found."""
|
|
172
|
+
path = _keys_dir(mem_dir) / "public.pem"
|
|
173
|
+
if path.exists():
|
|
174
|
+
return path.read_bytes()
|
|
175
|
+
config_file = mem_dir / "config.json"
|
|
176
|
+
if config_file.exists():
|
|
177
|
+
try:
|
|
178
|
+
config = json.loads(config_file.read_text())
|
|
179
|
+
return config.get("signing", {}).get("public_key_pem")
|
|
180
|
+
except Exception:
|
|
181
|
+
pass
|
|
182
|
+
return None
|
|
183
|
+
|
|
184
|
+
|
|
185
|
+
def load_private_key_from_env() -> Optional[bytes]:
|
|
186
|
+
"""Load private key PEM from env AGMEM_SIGNING_PRIVATE_KEY (or path in AGMEM_SIGNING_PRIVATE_KEY_FILE)."""
|
|
187
|
+
pem = os.environ.get("AGMEM_SIGNING_PRIVATE_KEY")
|
|
188
|
+
if pem:
|
|
189
|
+
return pem.encode() if isinstance(pem, str) else pem
|
|
190
|
+
path = os.environ.get("AGMEM_SIGNING_PRIVATE_KEY_FILE")
|
|
191
|
+
if path and os.path.isfile(path):
|
|
192
|
+
return Path(path).read_bytes()
|
|
193
|
+
return None
|
|
194
|
+
|
|
195
|
+
|
|
196
|
+
def sign_merkle_root(root_hex: str, private_key_pem: bytes) -> str:
|
|
197
|
+
"""Sign Merkle root (hex string). Returns signature as hex."""
|
|
198
|
+
if not ED25519_AVAILABLE:
|
|
199
|
+
raise RuntimeError("Signing requires 'cryptography'")
|
|
200
|
+
key = serialization.load_pem_private_key(private_key_pem, password=None)
|
|
201
|
+
if not isinstance(key, Ed25519PrivateKey):
|
|
202
|
+
raise TypeError("Ed25519 private key required")
|
|
203
|
+
sig = key.sign(root_hex.encode())
|
|
204
|
+
return sig.hex()
|
|
205
|
+
|
|
206
|
+
|
|
207
|
+
def verify_signature(root_hex: str, signature_hex: str, public_key_pem: bytes) -> bool:
|
|
208
|
+
"""Verify signature of Merkle root. Returns True if valid."""
|
|
209
|
+
if not ED25519_AVAILABLE:
|
|
210
|
+
return False
|
|
211
|
+
try:
|
|
212
|
+
key = serialization.load_pem_public_key(public_key_pem)
|
|
213
|
+
if not isinstance(key, Ed25519PublicKey):
|
|
214
|
+
return False
|
|
215
|
+
key.verify(bytes.fromhex(signature_hex), root_hex.encode())
|
|
216
|
+
return True
|
|
217
|
+
except InvalidSignature:
|
|
218
|
+
return False
|
|
219
|
+
except Exception:
|
|
220
|
+
return False
|
|
221
|
+
|
|
222
|
+
|
|
223
|
+
def verify_commit(
|
|
224
|
+
store: ObjectStore,
|
|
225
|
+
commit_hash: str,
|
|
226
|
+
public_key_pem: Optional[bytes] = None,
|
|
227
|
+
*,
|
|
228
|
+
mem_dir: Optional[Path] = None,
|
|
229
|
+
) -> Tuple[bool, Optional[str]]:
|
|
230
|
+
"""
|
|
231
|
+
Verify commit: rebuild Merkle tree from blobs, compare root to stored, verify signature.
|
|
232
|
+
Returns (verified, error_message). verified=True means OK; False + message means tampered or unverified.
|
|
233
|
+
If public_key_pem is None and mem_dir is set, load from mem_dir.
|
|
234
|
+
"""
|
|
235
|
+
commit = Commit.load(store, commit_hash)
|
|
236
|
+
if not commit:
|
|
237
|
+
return (False, "commit not found")
|
|
238
|
+
stored_root = (commit.metadata or {}).get("merkle_root")
|
|
239
|
+
stored_sig = (commit.metadata or {}).get("signature")
|
|
240
|
+
if not stored_root:
|
|
241
|
+
return (False, "commit has no merkle_root (unverified)")
|
|
242
|
+
|
|
243
|
+
# Verify that blob objects can be loaded successfully (detects tampering in compressed/encrypted content)
|
|
244
|
+
blob_hashes = _collect_blob_hashes_from_tree(store, commit.tree)
|
|
245
|
+
for blob_hash in blob_hashes:
|
|
246
|
+
try:
|
|
247
|
+
blob = Blob.load(store, blob_hash)
|
|
248
|
+
if blob is None:
|
|
249
|
+
return (False, f"blob {blob_hash[:8]} corrupted or missing")
|
|
250
|
+
except Exception as e:
|
|
251
|
+
return (False, f"merkle_root mismatch (commit tampered)")
|
|
252
|
+
|
|
253
|
+
computed_root = build_merkle_root_for_commit(store, commit_hash)
|
|
254
|
+
if not computed_root:
|
|
255
|
+
return (False, "could not build Merkle tree (missing tree/blobs)")
|
|
256
|
+
if not hmac.compare_digest(computed_root, stored_root):
|
|
257
|
+
return (False, "merkle_root mismatch (commit tampered)")
|
|
258
|
+
if not stored_sig:
|
|
259
|
+
return (True, None) # Root matches; no signature (legacy)
|
|
260
|
+
pub = public_key_pem
|
|
261
|
+
if not pub and mem_dir:
|
|
262
|
+
pub = load_public_key(mem_dir)
|
|
263
|
+
if not pub:
|
|
264
|
+
return (False, "signature present but no public key configured")
|
|
265
|
+
if isinstance(pub, str):
|
|
266
|
+
pub = pub.encode()
|
|
267
|
+
if not verify_signature(stored_root, stored_sig, pub):
|
|
268
|
+
return (False, "signature verification failed")
|
|
269
|
+
return (True, None)
|
|
270
|
+
|
|
271
|
+
|
|
272
|
+
def verify_commit_optional(
|
|
273
|
+
store: ObjectStore,
|
|
274
|
+
commit_hash: str,
|
|
275
|
+
mem_dir: Optional[Path] = None,
|
|
276
|
+
*,
|
|
277
|
+
strict: bool = False,
|
|
278
|
+
) -> None:
|
|
279
|
+
"""
|
|
280
|
+
Verify commit; if strict=True raise on failure. If strict=False, only raise on tamper (root mismatch).
|
|
281
|
+
Unverified (no merkle_root) is OK when not strict.
|
|
282
|
+
"""
|
|
283
|
+
ok, err = verify_commit(store, commit_hash, None, mem_dir=mem_dir)
|
|
284
|
+
if ok:
|
|
285
|
+
return
|
|
286
|
+
if not err:
|
|
287
|
+
return
|
|
288
|
+
if "tampered" in err or "mismatch" in err or "signature verification failed" in err:
|
|
289
|
+
raise ValueError(f"Commit verification failed: {err}")
|
|
290
|
+
if strict:
|
|
291
|
+
raise ValueError(f"Commit verification failed: {err}")
|
memvcs/core/distiller.py
CHANGED
|
@@ -35,6 +35,9 @@ class DistillerConfig:
|
|
|
35
35
|
llm_provider: Optional[str] = None
|
|
36
36
|
llm_model: Optional[str] = None
|
|
37
37
|
create_safety_branch: bool = True
|
|
38
|
+
use_dp: bool = False
|
|
39
|
+
dp_epsilon: Optional[float] = None
|
|
40
|
+
dp_delta: Optional[float] = None
|
|
38
41
|
|
|
39
42
|
|
|
40
43
|
@dataclass
|
|
@@ -110,9 +113,32 @@ class Distiller:
|
|
|
110
113
|
continue
|
|
111
114
|
combined = "\n---\n".join(contents)
|
|
112
115
|
|
|
113
|
-
if self.config.llm_provider
|
|
116
|
+
if self.config.llm_provider and self.config.llm_model:
|
|
114
117
|
try:
|
|
115
|
-
|
|
118
|
+
from .llm import get_provider
|
|
119
|
+
|
|
120
|
+
config = {
|
|
121
|
+
"llm_provider": self.config.llm_provider,
|
|
122
|
+
"llm_model": self.config.llm_model,
|
|
123
|
+
}
|
|
124
|
+
provider = get_provider(config=config)
|
|
125
|
+
if provider:
|
|
126
|
+
text = provider.complete(
|
|
127
|
+
[
|
|
128
|
+
{
|
|
129
|
+
"role": "system",
|
|
130
|
+
"content": "Extract factual statements from the text. Output as bullet points (one fact per line). Focus on: user preferences, learned facts, key decisions.",
|
|
131
|
+
},
|
|
132
|
+
{
|
|
133
|
+
"role": "user",
|
|
134
|
+
"content": f"Topic: {cluster.topic}\n\n{combined[:4000]}",
|
|
135
|
+
},
|
|
136
|
+
],
|
|
137
|
+
max_tokens=500,
|
|
138
|
+
)
|
|
139
|
+
return [
|
|
140
|
+
line.strip() for line in text.splitlines() if line.strip().startswith("-")
|
|
141
|
+
][:15]
|
|
116
142
|
except Exception:
|
|
117
143
|
pass
|
|
118
144
|
|
|
@@ -125,29 +151,6 @@ class Distiller:
|
|
|
125
151
|
facts.append(f"- {line[:200]}")
|
|
126
152
|
return facts[:10] if facts else [f"- Learned about {cluster.topic}"]
|
|
127
153
|
|
|
128
|
-
def _extract_with_openai(self, content: str, topic: str) -> List[str]:
|
|
129
|
-
"""Extract facts using OpenAI API."""
|
|
130
|
-
import openai
|
|
131
|
-
|
|
132
|
-
response = openai.chat.completions.create(
|
|
133
|
-
model=self.config.llm_model or "gpt-3.5-turbo",
|
|
134
|
-
messages=[
|
|
135
|
-
{
|
|
136
|
-
"role": "system",
|
|
137
|
-
"content": "Extract factual statements from the text. "
|
|
138
|
-
"Output as bullet points (one fact per line). "
|
|
139
|
-
"Focus on: user preferences, learned facts, key decisions.",
|
|
140
|
-
},
|
|
141
|
-
{
|
|
142
|
-
"role": "user",
|
|
143
|
-
"content": f"Topic: {topic}\n\n{content[:4000]}",
|
|
144
|
-
},
|
|
145
|
-
],
|
|
146
|
-
max_tokens=500,
|
|
147
|
-
)
|
|
148
|
-
text = response.choices[0].message.content
|
|
149
|
-
return [line.strip() for line in text.splitlines() if line.strip().startswith("-")][:15]
|
|
150
|
-
|
|
151
154
|
def write_consolidated(self, cluster: EpisodeCluster, facts: List[str]) -> Path:
|
|
152
155
|
"""Write consolidated semantic file."""
|
|
153
156
|
self.target_dir.mkdir(parents=True, exist_ok=True)
|
|
@@ -160,13 +163,18 @@ class Distiller:
|
|
|
160
163
|
except ValueError:
|
|
161
164
|
out_path = self.target_dir / f"consolidated-{ts}.md"
|
|
162
165
|
|
|
166
|
+
confidence_score = self.config.extraction_confidence_threshold
|
|
167
|
+
if self.config.use_dp and self.config.dp_epsilon is not None and self.config.dp_delta is not None:
|
|
168
|
+
from .privacy_budget import add_noise
|
|
169
|
+
confidence_score = add_noise(confidence_score, 0.1, self.config.dp_epsilon, self.config.dp_delta)
|
|
170
|
+
confidence_score = max(0.0, min(1.0, confidence_score))
|
|
163
171
|
frontmatter = {
|
|
164
172
|
"schema_version": "1.0",
|
|
165
173
|
"last_updated": datetime.utcnow().isoformat() + "Z",
|
|
166
174
|
"source_agent_id": "distiller",
|
|
167
175
|
"memory_type": "semantic",
|
|
168
176
|
"tags": cluster.tags + ["auto-generated", "consolidated"],
|
|
169
|
-
"confidence_score":
|
|
177
|
+
"confidence_score": confidence_score,
|
|
170
178
|
}
|
|
171
179
|
body = f"# Consolidated: {cluster.topic}\n\n" + "\n".join(facts)
|
|
172
180
|
if YAML_AVAILABLE:
|
|
@@ -266,11 +274,21 @@ class Distiller:
|
|
|
266
274
|
except Exception:
|
|
267
275
|
pass
|
|
268
276
|
|
|
277
|
+
clusters_processed = len(clusters)
|
|
278
|
+
facts_extracted = facts_count
|
|
279
|
+
episodes_archived = archived
|
|
280
|
+
if self.config.use_dp and self.config.dp_epsilon is not None and self.config.dp_delta is not None:
|
|
281
|
+
from .privacy_budget import add_noise
|
|
282
|
+
sensitivity = 1.0
|
|
283
|
+
clusters_processed = max(0, int(round(add_noise(float(clusters_processed), sensitivity, self.config.dp_epsilon, self.config.dp_delta))))
|
|
284
|
+
facts_extracted = max(0, int(round(add_noise(float(facts_extracted), sensitivity, self.config.dp_epsilon, self.config.dp_delta))))
|
|
285
|
+
episodes_archived = max(0, int(round(add_noise(float(episodes_archived), sensitivity, self.config.dp_epsilon, self.config.dp_delta))))
|
|
286
|
+
|
|
269
287
|
return DistillerResult(
|
|
270
288
|
success=True,
|
|
271
|
-
clusters_processed=
|
|
272
|
-
facts_extracted=
|
|
273
|
-
episodes_archived=
|
|
289
|
+
clusters_processed=clusters_processed,
|
|
290
|
+
facts_extracted=facts_extracted,
|
|
291
|
+
episodes_archived=episodes_archived,
|
|
274
292
|
branch_created=branch_name,
|
|
275
293
|
commit_hash=commit_hash,
|
|
276
294
|
message=f"Processed {len(clusters)} clusters, extracted {facts_count} facts",
|
|
@@ -0,0 +1,169 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Encryption at rest for agmem object store.
|
|
3
|
+
|
|
4
|
+
AES-256-GCM for object payloads; key derived from passphrase via Argon2id.
|
|
5
|
+
Hash-then-encrypt so content-addressable paths stay based on plaintext hash.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
import json
|
|
9
|
+
import os
|
|
10
|
+
import secrets
|
|
11
|
+
from pathlib import Path
|
|
12
|
+
from typing import Optional, Tuple, Dict, Any, Callable
|
|
13
|
+
|
|
14
|
+
# AES-GCM and Argon2id via cryptography
|
|
15
|
+
try:
|
|
16
|
+
from cryptography.hazmat.primitives.ciphers.aead import AESGCM
|
|
17
|
+
from cryptography.hazmat.primitives.kdf.argon2 import Argon2id
|
|
18
|
+
|
|
19
|
+
ENCRYPTION_AVAILABLE = True
|
|
20
|
+
except ImportError:
|
|
21
|
+
ENCRYPTION_AVAILABLE = False
|
|
22
|
+
|
|
23
|
+
IV_LEN = 12
|
|
24
|
+
TAG_LEN = 16
|
|
25
|
+
KEY_LEN = 32
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
def _encryption_config_path(mem_dir: Path) -> Path:
|
|
29
|
+
return mem_dir / "encryption.json"
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
def load_encryption_config(mem_dir: Path) -> Optional[Dict[str, Any]]:
|
|
33
|
+
"""Load encryption config (salt, time_cost, memory_cost) from .mem/encryption.json."""
|
|
34
|
+
path = _encryption_config_path(mem_dir)
|
|
35
|
+
if not path.exists():
|
|
36
|
+
return None
|
|
37
|
+
try:
|
|
38
|
+
return json.loads(path.read_text())
|
|
39
|
+
except Exception:
|
|
40
|
+
return None
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
def save_encryption_config(
|
|
44
|
+
mem_dir: Path,
|
|
45
|
+
salt: bytes,
|
|
46
|
+
time_cost: int = 3,
|
|
47
|
+
memory_cost: int = 65536,
|
|
48
|
+
parallelism: int = 4,
|
|
49
|
+
) -> Path:
|
|
50
|
+
"""Save encryption config; salt stored as hex. Returns config path."""
|
|
51
|
+
mem_dir.mkdir(parents=True, exist_ok=True)
|
|
52
|
+
path = _encryption_config_path(mem_dir)
|
|
53
|
+
path.write_text(
|
|
54
|
+
json.dumps(
|
|
55
|
+
{
|
|
56
|
+
"salt_hex": salt.hex(),
|
|
57
|
+
"time_cost": time_cost,
|
|
58
|
+
"memory_cost": memory_cost,
|
|
59
|
+
"parallelism": parallelism,
|
|
60
|
+
},
|
|
61
|
+
indent=2,
|
|
62
|
+
)
|
|
63
|
+
)
|
|
64
|
+
return path
|
|
65
|
+
|
|
66
|
+
|
|
67
|
+
def derive_key(
|
|
68
|
+
passphrase: bytes,
|
|
69
|
+
salt: bytes,
|
|
70
|
+
time_cost: int = 3,
|
|
71
|
+
memory_cost: int = 65536,
|
|
72
|
+
parallelism: int = 4,
|
|
73
|
+
) -> bytes:
|
|
74
|
+
"""Derive 32-byte key from passphrase using Argon2id."""
|
|
75
|
+
if not ENCRYPTION_AVAILABLE:
|
|
76
|
+
raise RuntimeError("Encryption requires 'cryptography'")
|
|
77
|
+
kdf = Argon2id(
|
|
78
|
+
salt=salt,
|
|
79
|
+
length=KEY_LEN,
|
|
80
|
+
time_cost=time_cost,
|
|
81
|
+
memory_cost=memory_cost,
|
|
82
|
+
parallelism=parallelism,
|
|
83
|
+
)
|
|
84
|
+
return kdf.derive(passphrase)
|
|
85
|
+
|
|
86
|
+
|
|
87
|
+
def encrypt(plaintext: bytes, key: bytes) -> Tuple[bytes, bytes]:
|
|
88
|
+
"""Encrypt with AES-256-GCM. Returns (iv, ciphertext_with_tag)."""
|
|
89
|
+
if not ENCRYPTION_AVAILABLE:
|
|
90
|
+
raise RuntimeError("Encryption requires 'cryptography'")
|
|
91
|
+
aes = AESGCM(key)
|
|
92
|
+
iv = secrets.token_bytes(IV_LEN)
|
|
93
|
+
ct = aes.encrypt(iv, plaintext, None) # ct includes 16-byte tag
|
|
94
|
+
return (iv, ct)
|
|
95
|
+
|
|
96
|
+
|
|
97
|
+
def decrypt(iv: bytes, ciphertext_with_tag: bytes, key: bytes) -> bytes:
|
|
98
|
+
"""Decrypt AES-256-GCM. Raises on auth failure."""
|
|
99
|
+
if not ENCRYPTION_AVAILABLE:
|
|
100
|
+
raise RuntimeError("Encryption requires 'cryptography'")
|
|
101
|
+
aes = AESGCM(key)
|
|
102
|
+
return aes.decrypt(iv, ciphertext_with_tag, None)
|
|
103
|
+
|
|
104
|
+
|
|
105
|
+
def init_encryption(mem_dir: Path, time_cost: int = 3, memory_cost: int = 65536) -> bytes:
|
|
106
|
+
"""Create new encryption config with random salt. Returns salt (caller derives key from passphrase)."""
|
|
107
|
+
salt = secrets.token_bytes(16)
|
|
108
|
+
save_encryption_config(mem_dir, salt, time_cost=time_cost, memory_cost=memory_cost)
|
|
109
|
+
return salt
|
|
110
|
+
|
|
111
|
+
|
|
112
|
+
class ObjectStoreEncryptor:
|
|
113
|
+
"""
|
|
114
|
+
Encryptor for object store payloads (compressed bytes).
|
|
115
|
+
Uses AES-256-GCM; IV and tag stored with ciphertext.
|
|
116
|
+
"""
|
|
117
|
+
|
|
118
|
+
def __init__(self, get_key: Callable[[], Optional[bytes]]):
|
|
119
|
+
self._get_key = get_key
|
|
120
|
+
|
|
121
|
+
def encrypt_payload(self, plaintext: bytes) -> bytes:
|
|
122
|
+
"""Encrypt payload. Returns iv (12) + ciphertext_with_tag."""
|
|
123
|
+
key = self._get_key()
|
|
124
|
+
if not key:
|
|
125
|
+
raise ValueError("Encryption key not available (passphrase required)")
|
|
126
|
+
iv, ct = encrypt(plaintext, key)
|
|
127
|
+
return iv + ct
|
|
128
|
+
|
|
129
|
+
def decrypt_payload(self, raw: bytes) -> bytes:
|
|
130
|
+
"""Decrypt payload. raw = iv (12) + ciphertext_with_tag."""
|
|
131
|
+
key = self._get_key()
|
|
132
|
+
if not key:
|
|
133
|
+
raise ValueError("Encryption key not available (passphrase required)")
|
|
134
|
+
if len(raw) < IV_LEN + TAG_LEN:
|
|
135
|
+
raise ValueError("Payload too short for encrypted object")
|
|
136
|
+
iv = raw[:IV_LEN]
|
|
137
|
+
ct = raw[IV_LEN:]
|
|
138
|
+
return decrypt(iv, ct, key)
|
|
139
|
+
|
|
140
|
+
|
|
141
|
+
def get_key_from_env_or_cache(
|
|
142
|
+
mem_dir: Path,
|
|
143
|
+
env_var: str = "AGMEM_ENCRYPTION_PASSPHRASE",
|
|
144
|
+
cache_var: str = "_agmem_encryption_key_cache",
|
|
145
|
+
) -> Optional[bytes]:
|
|
146
|
+
"""Get key from env or process cache. Derives key if passphrase in env and config exists."""
|
|
147
|
+
# Module-level cache for session (same process)
|
|
148
|
+
import sys
|
|
149
|
+
|
|
150
|
+
mod = sys.modules.get("memvcs.core.encryption")
|
|
151
|
+
if mod and getattr(mod, cache_var, None) is not None:
|
|
152
|
+
return getattr(mod, cache_var)
|
|
153
|
+
passphrase = os.environ.get(env_var)
|
|
154
|
+
if not passphrase:
|
|
155
|
+
return None
|
|
156
|
+
cfg = load_encryption_config(mem_dir)
|
|
157
|
+
if not cfg:
|
|
158
|
+
return None
|
|
159
|
+
salt = bytes.fromhex(cfg["salt_hex"])
|
|
160
|
+
key = derive_key(
|
|
161
|
+
passphrase.encode() if isinstance(passphrase, str) else passphrase,
|
|
162
|
+
salt,
|
|
163
|
+
time_cost=cfg.get("time_cost", 3),
|
|
164
|
+
memory_cost=cfg.get("memory_cost", 65536),
|
|
165
|
+
parallelism=cfg.get("parallelism", 4),
|
|
166
|
+
)
|
|
167
|
+
if mod is not None:
|
|
168
|
+
setattr(mod, cache_var, key)
|
|
169
|
+
return key
|