agmem 0.1.1__py3-none-any.whl → 0.1.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {agmem-0.1.1.dist-info → agmem-0.1.3.dist-info}/METADATA +157 -16
- agmem-0.1.3.dist-info/RECORD +105 -0
- memvcs/__init__.py +1 -1
- memvcs/cli.py +45 -31
- memvcs/commands/__init__.py +9 -9
- memvcs/commands/add.py +83 -76
- memvcs/commands/audit.py +59 -0
- memvcs/commands/blame.py +46 -53
- memvcs/commands/branch.py +13 -33
- memvcs/commands/checkout.py +27 -32
- memvcs/commands/clean.py +18 -23
- memvcs/commands/clone.py +11 -1
- memvcs/commands/commit.py +40 -39
- memvcs/commands/daemon.py +109 -76
- memvcs/commands/decay.py +77 -0
- memvcs/commands/diff.py +56 -57
- memvcs/commands/distill.py +90 -0
- memvcs/commands/federated.py +53 -0
- memvcs/commands/fsck.py +86 -61
- memvcs/commands/garden.py +40 -35
- memvcs/commands/gc.py +51 -0
- memvcs/commands/graph.py +41 -48
- memvcs/commands/init.py +16 -24
- memvcs/commands/log.py +25 -40
- memvcs/commands/merge.py +69 -27
- memvcs/commands/pack.py +129 -0
- memvcs/commands/prove.py +66 -0
- memvcs/commands/pull.py +31 -1
- memvcs/commands/push.py +4 -2
- memvcs/commands/recall.py +145 -0
- memvcs/commands/reflog.py +13 -22
- memvcs/commands/remote.py +1 -0
- memvcs/commands/repair.py +66 -0
- memvcs/commands/reset.py +23 -33
- memvcs/commands/resolve.py +130 -0
- memvcs/commands/resurrect.py +82 -0
- memvcs/commands/search.py +3 -4
- memvcs/commands/serve.py +2 -1
- memvcs/commands/show.py +66 -36
- memvcs/commands/stash.py +34 -34
- memvcs/commands/status.py +27 -35
- memvcs/commands/tag.py +23 -47
- memvcs/commands/test.py +30 -44
- memvcs/commands/timeline.py +111 -0
- memvcs/commands/tree.py +26 -27
- memvcs/commands/verify.py +110 -0
- memvcs/commands/when.py +115 -0
- memvcs/core/access_index.py +167 -0
- memvcs/core/audit.py +124 -0
- memvcs/core/config_loader.py +3 -1
- memvcs/core/consistency.py +214 -0
- memvcs/core/crypto_verify.py +280 -0
- memvcs/core/decay.py +185 -0
- memvcs/core/diff.py +158 -143
- memvcs/core/distiller.py +277 -0
- memvcs/core/encryption.py +169 -0
- memvcs/core/federated.py +86 -0
- memvcs/core/gardener.py +176 -145
- memvcs/core/hooks.py +48 -14
- memvcs/core/ipfs_remote.py +39 -0
- memvcs/core/knowledge_graph.py +135 -138
- memvcs/core/llm/__init__.py +10 -0
- memvcs/core/llm/anthropic_provider.py +50 -0
- memvcs/core/llm/base.py +27 -0
- memvcs/core/llm/factory.py +30 -0
- memvcs/core/llm/openai_provider.py +36 -0
- memvcs/core/merge.py +260 -170
- memvcs/core/objects.py +110 -101
- memvcs/core/pack.py +92 -0
- memvcs/core/pii_scanner.py +147 -146
- memvcs/core/privacy_budget.py +63 -0
- memvcs/core/refs.py +132 -115
- memvcs/core/remote.py +38 -0
- memvcs/core/repository.py +254 -164
- memvcs/core/schema.py +155 -113
- memvcs/core/staging.py +60 -65
- memvcs/core/storage/__init__.py +20 -18
- memvcs/core/storage/base.py +74 -70
- memvcs/core/storage/gcs.py +70 -68
- memvcs/core/storage/local.py +42 -40
- memvcs/core/storage/s3.py +105 -110
- memvcs/core/temporal_index.py +121 -0
- memvcs/core/test_runner.py +101 -93
- memvcs/core/trust.py +103 -0
- memvcs/core/vector_store.py +56 -36
- memvcs/core/zk_proofs.py +26 -0
- memvcs/integrations/mcp_server.py +1 -3
- memvcs/integrations/web_ui/server.py +25 -26
- memvcs/retrieval/__init__.py +22 -0
- memvcs/retrieval/base.py +54 -0
- memvcs/retrieval/pack.py +128 -0
- memvcs/retrieval/recaller.py +105 -0
- memvcs/retrieval/strategies.py +314 -0
- memvcs/utils/__init__.py +3 -3
- memvcs/utils/helpers.py +52 -52
- agmem-0.1.1.dist-info/RECORD +0 -67
- {agmem-0.1.1.dist-info → agmem-0.1.3.dist-info}/WHEEL +0 -0
- {agmem-0.1.1.dist-info → agmem-0.1.3.dist-info}/entry_points.txt +0 -0
- {agmem-0.1.1.dist-info → agmem-0.1.3.dist-info}/licenses/LICENSE +0 -0
- {agmem-0.1.1.dist-info → agmem-0.1.3.dist-info}/top_level.txt +0 -0
memvcs/core/distiller.py
ADDED
|
@@ -0,0 +1,277 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Distiller - Episodic-to-semantic distillation pipeline for agmem.
|
|
3
|
+
|
|
4
|
+
Converts session logs into compact facts (like memory consolidation during sleep).
|
|
5
|
+
Extends Gardener with factual extraction and safety branches.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
import shutil
|
|
9
|
+
from pathlib import Path
|
|
10
|
+
from typing import List, Dict, Any, Optional, Tuple
|
|
11
|
+
from dataclasses import dataclass, field
|
|
12
|
+
from datetime import datetime
|
|
13
|
+
from collections import defaultdict
|
|
14
|
+
|
|
15
|
+
try:
|
|
16
|
+
import yaml
|
|
17
|
+
|
|
18
|
+
YAML_AVAILABLE = True
|
|
19
|
+
except ImportError:
|
|
20
|
+
YAML_AVAILABLE = False
|
|
21
|
+
|
|
22
|
+
from .gardener import Gardener, GardenerConfig, EpisodeCluster
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
@dataclass
|
|
26
|
+
class DistillerConfig:
|
|
27
|
+
"""Configuration for the Distiller."""
|
|
28
|
+
|
|
29
|
+
source_dir: str = "episodic"
|
|
30
|
+
target_dir: str = "semantic/consolidated"
|
|
31
|
+
archive_dir: str = "archive"
|
|
32
|
+
min_cluster_size: int = 3
|
|
33
|
+
extraction_confidence_threshold: float = 0.7
|
|
34
|
+
safety_branch_prefix: str = "auto-distill/"
|
|
35
|
+
llm_provider: Optional[str] = None
|
|
36
|
+
llm_model: Optional[str] = None
|
|
37
|
+
create_safety_branch: bool = True
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
@dataclass
|
|
41
|
+
class DistillerResult:
|
|
42
|
+
"""Result of a distillation run."""
|
|
43
|
+
|
|
44
|
+
success: bool
|
|
45
|
+
clusters_processed: int
|
|
46
|
+
facts_extracted: int
|
|
47
|
+
episodes_archived: int
|
|
48
|
+
branch_created: Optional[str] = None
|
|
49
|
+
commit_hash: Optional[str] = None
|
|
50
|
+
message: str = ""
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
class Distiller:
|
|
54
|
+
"""
|
|
55
|
+
Distills episodic memory into semantic facts.
|
|
56
|
+
|
|
57
|
+
Pipeline: cluster episodes -> extract facts via LLM -> merge with semantic -> archive.
|
|
58
|
+
Creates safety branch for human review before merging to main.
|
|
59
|
+
"""
|
|
60
|
+
|
|
61
|
+
def __init__(self, repo: Any, config: Optional[DistillerConfig] = None):
|
|
62
|
+
self.repo = repo
|
|
63
|
+
self.config = config or DistillerConfig()
|
|
64
|
+
self.source_dir = repo.root / "current" / self.config.source_dir
|
|
65
|
+
self.target_dir = repo.root / "current" / self.config.target_dir.rstrip("/")
|
|
66
|
+
archive_candidate = repo.current_dir / self.config.archive_dir
|
|
67
|
+
try:
|
|
68
|
+
archive_candidate.resolve().relative_to(repo.current_dir.resolve())
|
|
69
|
+
self.archive_dir = archive_candidate
|
|
70
|
+
except (ValueError, RuntimeError):
|
|
71
|
+
self.archive_dir = repo.current_dir / "archive"
|
|
72
|
+
self.gardener = Gardener(
|
|
73
|
+
repo,
|
|
74
|
+
GardenerConfig(
|
|
75
|
+
threshold=1,
|
|
76
|
+
archive_dir=self.config.archive_dir,
|
|
77
|
+
min_cluster_size=self.config.min_cluster_size,
|
|
78
|
+
llm_provider=self.config.llm_provider,
|
|
79
|
+
llm_model=self.config.llm_model,
|
|
80
|
+
),
|
|
81
|
+
)
|
|
82
|
+
|
|
83
|
+
def load_episodes_from(self, source_path: Path) -> List[Tuple[Path, str]]:
|
|
84
|
+
"""Load episodes from source directory."""
|
|
85
|
+
episodes = []
|
|
86
|
+
if not source_path.exists():
|
|
87
|
+
return episodes
|
|
88
|
+
for f in source_path.glob("**/*.md"):
|
|
89
|
+
if f.is_file():
|
|
90
|
+
try:
|
|
91
|
+
episodes.append((f, f.read_text(encoding="utf-8", errors="replace")))
|
|
92
|
+
except Exception:
|
|
93
|
+
continue
|
|
94
|
+
return episodes
|
|
95
|
+
|
|
96
|
+
def cluster_episodes(self, episodes: List[Tuple[Path, str]]) -> List[EpisodeCluster]:
|
|
97
|
+
"""Cluster episodes using Gardener's logic."""
|
|
98
|
+
try:
|
|
99
|
+
return self.gardener.cluster_episodes_with_embeddings(episodes)
|
|
100
|
+
except Exception:
|
|
101
|
+
return self.gardener.cluster_episodes(episodes)
|
|
102
|
+
|
|
103
|
+
def extract_facts(self, cluster: EpisodeCluster) -> List[str]:
|
|
104
|
+
"""Extract factual statements from cluster via LLM or heuristics."""
|
|
105
|
+
contents = []
|
|
106
|
+
for ep_path in cluster.episodes[:10]:
|
|
107
|
+
try:
|
|
108
|
+
contents.append(ep_path.read_text()[:1000])
|
|
109
|
+
except Exception:
|
|
110
|
+
continue
|
|
111
|
+
combined = "\n---\n".join(contents)
|
|
112
|
+
|
|
113
|
+
if self.config.llm_provider and self.config.llm_model:
|
|
114
|
+
try:
|
|
115
|
+
from .llm import get_provider
|
|
116
|
+
|
|
117
|
+
config = {
|
|
118
|
+
"llm_provider": self.config.llm_provider,
|
|
119
|
+
"llm_model": self.config.llm_model,
|
|
120
|
+
}
|
|
121
|
+
provider = get_provider(config=config)
|
|
122
|
+
if provider:
|
|
123
|
+
text = provider.complete(
|
|
124
|
+
[
|
|
125
|
+
{
|
|
126
|
+
"role": "system",
|
|
127
|
+
"content": "Extract factual statements from the text. Output as bullet points (one fact per line). Focus on: user preferences, learned facts, key decisions.",
|
|
128
|
+
},
|
|
129
|
+
{
|
|
130
|
+
"role": "user",
|
|
131
|
+
"content": f"Topic: {cluster.topic}\n\n{combined[:4000]}",
|
|
132
|
+
},
|
|
133
|
+
],
|
|
134
|
+
max_tokens=500,
|
|
135
|
+
)
|
|
136
|
+
return [
|
|
137
|
+
line.strip() for line in text.splitlines() if line.strip().startswith("-")
|
|
138
|
+
][:15]
|
|
139
|
+
except Exception:
|
|
140
|
+
pass
|
|
141
|
+
|
|
142
|
+
# Fallback: simple extraction
|
|
143
|
+
facts = []
|
|
144
|
+
for line in combined.splitlines():
|
|
145
|
+
line = line.strip()
|
|
146
|
+
if len(line) > 20 and not line.startswith("#") and not line.startswith("-"):
|
|
147
|
+
if any(w in line.lower() for w in ["prefers", "likes", "uses", "learned", "user"]):
|
|
148
|
+
facts.append(f"- {line[:200]}")
|
|
149
|
+
return facts[:10] if facts else [f"- Learned about {cluster.topic}"]
|
|
150
|
+
|
|
151
|
+
def write_consolidated(self, cluster: EpisodeCluster, facts: List[str]) -> Path:
|
|
152
|
+
"""Write consolidated semantic file."""
|
|
153
|
+
self.target_dir.mkdir(parents=True, exist_ok=True)
|
|
154
|
+
safe_topic = cluster.topic.replace(" ", "-").lower().replace("/", "_")[:30]
|
|
155
|
+
ts = datetime.utcnow().strftime("%Y%m%d")
|
|
156
|
+
filename = f"consolidated-{safe_topic}-{ts}.md"
|
|
157
|
+
out_path = (self.target_dir / filename).resolve()
|
|
158
|
+
try:
|
|
159
|
+
out_path.relative_to(self.repo.current_dir.resolve())
|
|
160
|
+
except ValueError:
|
|
161
|
+
out_path = self.target_dir / f"consolidated-{ts}.md"
|
|
162
|
+
|
|
163
|
+
frontmatter = {
|
|
164
|
+
"schema_version": "1.0",
|
|
165
|
+
"last_updated": datetime.utcnow().isoformat() + "Z",
|
|
166
|
+
"source_agent_id": "distiller",
|
|
167
|
+
"memory_type": "semantic",
|
|
168
|
+
"tags": cluster.tags + ["auto-generated", "consolidated"],
|
|
169
|
+
"confidence_score": self.config.extraction_confidence_threshold,
|
|
170
|
+
}
|
|
171
|
+
body = f"# Consolidated: {cluster.topic}\n\n" + "\n".join(facts)
|
|
172
|
+
if YAML_AVAILABLE:
|
|
173
|
+
import yaml
|
|
174
|
+
|
|
175
|
+
content = f"---\n{yaml.dump(frontmatter, default_flow_style=False)}---\n\n{body}"
|
|
176
|
+
else:
|
|
177
|
+
content = body
|
|
178
|
+
out_path.write_text(content)
|
|
179
|
+
return out_path
|
|
180
|
+
|
|
181
|
+
def archive_episodes(self, episodes: List[Path]) -> int:
|
|
182
|
+
"""Archive processed episodes to .mem/archive/."""
|
|
183
|
+
archive_base = self.repo.mem_dir / "archive"
|
|
184
|
+
archive_base.mkdir(parents=True, exist_ok=True)
|
|
185
|
+
ts = datetime.utcnow().strftime("%Y%m%d-%H%M%S")
|
|
186
|
+
archive_sub = archive_base / ts
|
|
187
|
+
archive_sub.mkdir(exist_ok=True)
|
|
188
|
+
count = 0
|
|
189
|
+
for ep in episodes:
|
|
190
|
+
try:
|
|
191
|
+
safe_name = ep.name.replace("..", "_").replace("/", "_")
|
|
192
|
+
dest = (archive_sub / safe_name).resolve()
|
|
193
|
+
dest.relative_to(archive_base.resolve())
|
|
194
|
+
shutil.move(str(ep), str(dest))
|
|
195
|
+
count += 1
|
|
196
|
+
except (ValueError, Exception):
|
|
197
|
+
continue
|
|
198
|
+
return count
|
|
199
|
+
|
|
200
|
+
def run(
|
|
201
|
+
self,
|
|
202
|
+
source: Optional[str] = None,
|
|
203
|
+
target: Optional[str] = None,
|
|
204
|
+
model: Optional[str] = None,
|
|
205
|
+
) -> DistillerResult:
|
|
206
|
+
"""Run distillation pipeline."""
|
|
207
|
+
source_path = Path(source) if source else self.source_dir
|
|
208
|
+
if not source_path.is_absolute():
|
|
209
|
+
source_path = self.repo.root / "current" / source_path
|
|
210
|
+
target_path = Path(target) if target else self.target_dir
|
|
211
|
+
if not target_path.is_absolute():
|
|
212
|
+
target_path = self.repo.root / "current" / target_path
|
|
213
|
+
self.target_dir = target_path
|
|
214
|
+
if model:
|
|
215
|
+
self.config.llm_model = model
|
|
216
|
+
|
|
217
|
+
episodes = self.load_episodes_from(source_path)
|
|
218
|
+
if not episodes:
|
|
219
|
+
return DistillerResult(
|
|
220
|
+
success=True,
|
|
221
|
+
clusters_processed=0,
|
|
222
|
+
facts_extracted=0,
|
|
223
|
+
episodes_archived=0,
|
|
224
|
+
message="No episodes to process",
|
|
225
|
+
)
|
|
226
|
+
|
|
227
|
+
clusters = self.cluster_episodes(episodes)
|
|
228
|
+
if not clusters:
|
|
229
|
+
return DistillerResult(
|
|
230
|
+
success=True,
|
|
231
|
+
clusters_processed=0,
|
|
232
|
+
facts_extracted=0,
|
|
233
|
+
episodes_archived=0,
|
|
234
|
+
message="No clusters formed",
|
|
235
|
+
)
|
|
236
|
+
|
|
237
|
+
# Create safety branch if configured
|
|
238
|
+
branch_name = None
|
|
239
|
+
if self.config.create_safety_branch:
|
|
240
|
+
ts = datetime.utcnow().strftime("%Y-%m-%d")
|
|
241
|
+
branch_name = f"{self.config.safety_branch_prefix}{ts}"
|
|
242
|
+
if not self.repo.refs.branch_exists(branch_name):
|
|
243
|
+
self.repo.refs.create_branch(branch_name)
|
|
244
|
+
self.repo.checkout(branch_name, force=True)
|
|
245
|
+
|
|
246
|
+
facts_count = 0
|
|
247
|
+
all_archived = []
|
|
248
|
+
for cluster in clusters:
|
|
249
|
+
facts = self.extract_facts(cluster)
|
|
250
|
+
self.write_consolidated(cluster, facts)
|
|
251
|
+
facts_count += len(facts)
|
|
252
|
+
all_archived.extend(cluster.episodes)
|
|
253
|
+
|
|
254
|
+
archived = self.archive_episodes(all_archived)
|
|
255
|
+
|
|
256
|
+
commit_hash = None
|
|
257
|
+
if facts_count > 0:
|
|
258
|
+
try:
|
|
259
|
+
for f in self.target_dir.glob("consolidated-*.md"):
|
|
260
|
+
rel = str(f.relative_to(self.repo.root / "current"))
|
|
261
|
+
self.repo.stage_file(rel)
|
|
262
|
+
commit_hash = self.repo.commit(
|
|
263
|
+
f"distiller: consolidated {facts_count} facts from {len(episodes)} episodes",
|
|
264
|
+
{"distiller": True, "clusters": len(clusters)},
|
|
265
|
+
)
|
|
266
|
+
except Exception:
|
|
267
|
+
pass
|
|
268
|
+
|
|
269
|
+
return DistillerResult(
|
|
270
|
+
success=True,
|
|
271
|
+
clusters_processed=len(clusters),
|
|
272
|
+
facts_extracted=facts_count,
|
|
273
|
+
episodes_archived=archived,
|
|
274
|
+
branch_created=branch_name,
|
|
275
|
+
commit_hash=commit_hash,
|
|
276
|
+
message=f"Processed {len(clusters)} clusters, extracted {facts_count} facts",
|
|
277
|
+
)
|
|
@@ -0,0 +1,169 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Encryption at rest for agmem object store.
|
|
3
|
+
|
|
4
|
+
AES-256-GCM for object payloads; key derived from passphrase via Argon2id.
|
|
5
|
+
Hash-then-encrypt so content-addressable paths stay based on plaintext hash.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
import json
|
|
9
|
+
import os
|
|
10
|
+
import secrets
|
|
11
|
+
from pathlib import Path
|
|
12
|
+
from typing import Optional, Tuple, Dict, Any, Callable
|
|
13
|
+
|
|
14
|
+
# AES-GCM and Argon2id via cryptography
|
|
15
|
+
try:
|
|
16
|
+
from cryptography.hazmat.primitives.ciphers.aead import AESGCM
|
|
17
|
+
from cryptography.hazmat.primitives.kdf.argon2 import Argon2id
|
|
18
|
+
|
|
19
|
+
ENCRYPTION_AVAILABLE = True
|
|
20
|
+
except ImportError:
|
|
21
|
+
ENCRYPTION_AVAILABLE = False
|
|
22
|
+
|
|
23
|
+
IV_LEN = 12
|
|
24
|
+
TAG_LEN = 16
|
|
25
|
+
KEY_LEN = 32
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
def _encryption_config_path(mem_dir: Path) -> Path:
|
|
29
|
+
return mem_dir / "encryption.json"
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
def load_encryption_config(mem_dir: Path) -> Optional[Dict[str, Any]]:
|
|
33
|
+
"""Load encryption config (salt, time_cost, memory_cost) from .mem/encryption.json."""
|
|
34
|
+
path = _encryption_config_path(mem_dir)
|
|
35
|
+
if not path.exists():
|
|
36
|
+
return None
|
|
37
|
+
try:
|
|
38
|
+
return json.loads(path.read_text())
|
|
39
|
+
except Exception:
|
|
40
|
+
return None
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
def save_encryption_config(
|
|
44
|
+
mem_dir: Path,
|
|
45
|
+
salt: bytes,
|
|
46
|
+
time_cost: int = 3,
|
|
47
|
+
memory_cost: int = 65536,
|
|
48
|
+
parallelism: int = 4,
|
|
49
|
+
) -> Path:
|
|
50
|
+
"""Save encryption config; salt stored as hex. Returns config path."""
|
|
51
|
+
mem_dir.mkdir(parents=True, exist_ok=True)
|
|
52
|
+
path = _encryption_config_path(mem_dir)
|
|
53
|
+
path.write_text(
|
|
54
|
+
json.dumps(
|
|
55
|
+
{
|
|
56
|
+
"salt_hex": salt.hex(),
|
|
57
|
+
"time_cost": time_cost,
|
|
58
|
+
"memory_cost": memory_cost,
|
|
59
|
+
"parallelism": parallelism,
|
|
60
|
+
},
|
|
61
|
+
indent=2,
|
|
62
|
+
)
|
|
63
|
+
)
|
|
64
|
+
return path
|
|
65
|
+
|
|
66
|
+
|
|
67
|
+
def derive_key(
|
|
68
|
+
passphrase: bytes,
|
|
69
|
+
salt: bytes,
|
|
70
|
+
time_cost: int = 3,
|
|
71
|
+
memory_cost: int = 65536,
|
|
72
|
+
parallelism: int = 4,
|
|
73
|
+
) -> bytes:
|
|
74
|
+
"""Derive 32-byte key from passphrase using Argon2id."""
|
|
75
|
+
if not ENCRYPTION_AVAILABLE:
|
|
76
|
+
raise RuntimeError("Encryption requires 'cryptography'")
|
|
77
|
+
kdf = Argon2id(
|
|
78
|
+
salt=salt,
|
|
79
|
+
length=KEY_LEN,
|
|
80
|
+
time_cost=time_cost,
|
|
81
|
+
memory_cost=memory_cost,
|
|
82
|
+
parallelism=parallelism,
|
|
83
|
+
)
|
|
84
|
+
return kdf.derive(passphrase)
|
|
85
|
+
|
|
86
|
+
|
|
87
|
+
def encrypt(plaintext: bytes, key: bytes) -> Tuple[bytes, bytes]:
|
|
88
|
+
"""Encrypt with AES-256-GCM. Returns (iv, ciphertext_with_tag)."""
|
|
89
|
+
if not ENCRYPTION_AVAILABLE:
|
|
90
|
+
raise RuntimeError("Encryption requires 'cryptography'")
|
|
91
|
+
aes = AESGCM(key)
|
|
92
|
+
iv = secrets.token_bytes(IV_LEN)
|
|
93
|
+
ct = aes.encrypt(iv, plaintext, None) # ct includes 16-byte tag
|
|
94
|
+
return (iv, ct)
|
|
95
|
+
|
|
96
|
+
|
|
97
|
+
def decrypt(iv: bytes, ciphertext_with_tag: bytes, key: bytes) -> bytes:
|
|
98
|
+
"""Decrypt AES-256-GCM. Raises on auth failure."""
|
|
99
|
+
if not ENCRYPTION_AVAILABLE:
|
|
100
|
+
raise RuntimeError("Encryption requires 'cryptography'")
|
|
101
|
+
aes = AESGCM(key)
|
|
102
|
+
return aes.decrypt(iv, ciphertext_with_tag, None)
|
|
103
|
+
|
|
104
|
+
|
|
105
|
+
def init_encryption(mem_dir: Path, time_cost: int = 3, memory_cost: int = 65536) -> bytes:
|
|
106
|
+
"""Create new encryption config with random salt. Returns salt (caller derives key from passphrase)."""
|
|
107
|
+
salt = secrets.token_bytes(16)
|
|
108
|
+
save_encryption_config(mem_dir, salt, time_cost=time_cost, memory_cost=memory_cost)
|
|
109
|
+
return salt
|
|
110
|
+
|
|
111
|
+
|
|
112
|
+
class ObjectStoreEncryptor:
|
|
113
|
+
"""
|
|
114
|
+
Encryptor for object store payloads (compressed bytes).
|
|
115
|
+
Uses AES-256-GCM; IV and tag stored with ciphertext.
|
|
116
|
+
"""
|
|
117
|
+
|
|
118
|
+
def __init__(self, get_key: Callable[[], Optional[bytes]]):
|
|
119
|
+
self._get_key = get_key
|
|
120
|
+
|
|
121
|
+
def encrypt_payload(self, plaintext: bytes) -> bytes:
|
|
122
|
+
"""Encrypt payload. Returns iv (12) + ciphertext_with_tag."""
|
|
123
|
+
key = self._get_key()
|
|
124
|
+
if not key:
|
|
125
|
+
raise ValueError("Encryption key not available (passphrase required)")
|
|
126
|
+
iv, ct = encrypt(plaintext, key)
|
|
127
|
+
return iv + ct
|
|
128
|
+
|
|
129
|
+
def decrypt_payload(self, raw: bytes) -> bytes:
|
|
130
|
+
"""Decrypt payload. raw = iv (12) + ciphertext_with_tag."""
|
|
131
|
+
key = self._get_key()
|
|
132
|
+
if not key:
|
|
133
|
+
raise ValueError("Encryption key not available (passphrase required)")
|
|
134
|
+
if len(raw) < IV_LEN + TAG_LEN:
|
|
135
|
+
raise ValueError("Payload too short for encrypted object")
|
|
136
|
+
iv = raw[:IV_LEN]
|
|
137
|
+
ct = raw[IV_LEN:]
|
|
138
|
+
return decrypt(iv, ct, key)
|
|
139
|
+
|
|
140
|
+
|
|
141
|
+
def get_key_from_env_or_cache(
|
|
142
|
+
mem_dir: Path,
|
|
143
|
+
env_var: str = "AGMEM_ENCRYPTION_PASSPHRASE",
|
|
144
|
+
cache_var: str = "_agmem_encryption_key_cache",
|
|
145
|
+
) -> Optional[bytes]:
|
|
146
|
+
"""Get key from env or process cache. Derives key if passphrase in env and config exists."""
|
|
147
|
+
# Module-level cache for session (same process)
|
|
148
|
+
import sys
|
|
149
|
+
|
|
150
|
+
mod = sys.modules.get("memvcs.core.encryption")
|
|
151
|
+
if mod and getattr(mod, cache_var, None) is not None:
|
|
152
|
+
return getattr(mod, cache_var)
|
|
153
|
+
passphrase = os.environ.get(env_var)
|
|
154
|
+
if not passphrase:
|
|
155
|
+
return None
|
|
156
|
+
cfg = load_encryption_config(mem_dir)
|
|
157
|
+
if not cfg:
|
|
158
|
+
return None
|
|
159
|
+
salt = bytes.fromhex(cfg["salt_hex"])
|
|
160
|
+
key = derive_key(
|
|
161
|
+
passphrase.encode() if isinstance(passphrase, str) else passphrase,
|
|
162
|
+
salt,
|
|
163
|
+
time_cost=cfg.get("time_cost", 3),
|
|
164
|
+
memory_cost=cfg.get("memory_cost", 65536),
|
|
165
|
+
parallelism=cfg.get("parallelism", 4),
|
|
166
|
+
)
|
|
167
|
+
if mod is not None:
|
|
168
|
+
setattr(mod, cache_var, key)
|
|
169
|
+
return key
|
memvcs/core/federated.py
ADDED
|
@@ -0,0 +1,86 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Federated memory collaboration for agmem.
|
|
3
|
+
|
|
4
|
+
Agents share model updates or aggregated summaries instead of raw episodic logs.
|
|
5
|
+
Optional coordinator URL; optional differential privacy (Tier 3).
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
import json
|
|
9
|
+
from pathlib import Path
|
|
10
|
+
from typing import Optional, List, Dict, Any
|
|
11
|
+
|
|
12
|
+
from .config_loader import load_agmem_config
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
def get_federated_config(repo_root: Path) -> Optional[Dict[str, Any]]:
|
|
16
|
+
"""Get federated config from repo/user config. Returns None if disabled."""
|
|
17
|
+
config = load_agmem_config(repo_root)
|
|
18
|
+
fed = config.get("federated") or {}
|
|
19
|
+
if not fed.get("enabled"):
|
|
20
|
+
return None
|
|
21
|
+
url = fed.get("coordinator_url")
|
|
22
|
+
if not url:
|
|
23
|
+
return None
|
|
24
|
+
return {
|
|
25
|
+
"coordinator_url": url.rstrip("/"),
|
|
26
|
+
"memory_types": fed.get("memory_types", ["episodic", "semantic"]),
|
|
27
|
+
}
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
def produce_local_summary(repo_root: Path, memory_types: List[str]) -> Dict[str, Any]:
|
|
31
|
+
"""
|
|
32
|
+
Produce a local summary from episodic/semantic data (no raw content).
|
|
33
|
+
Returns dict suitable for sending to coordinator (e.g. topic counts, fact hashes).
|
|
34
|
+
"""
|
|
35
|
+
current_dir = repo_root / "current"
|
|
36
|
+
summary = {"memory_types": memory_types, "topics": {}, "fact_count": 0}
|
|
37
|
+
for mtype in memory_types:
|
|
38
|
+
d = current_dir / mtype
|
|
39
|
+
if not d.exists():
|
|
40
|
+
continue
|
|
41
|
+
count = 0
|
|
42
|
+
for f in d.rglob("*.md"):
|
|
43
|
+
if f.is_file():
|
|
44
|
+
count += 1
|
|
45
|
+
summary["topics"][mtype] = count
|
|
46
|
+
if mtype == "semantic":
|
|
47
|
+
summary["fact_count"] = count
|
|
48
|
+
return summary
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
def push_updates(repo_root: Path, summary: Dict[str, Any]) -> str:
|
|
52
|
+
"""Send local summary to coordinator. Returns status message."""
|
|
53
|
+
cfg = get_federated_config(repo_root)
|
|
54
|
+
if not cfg:
|
|
55
|
+
return "Federated collaboration not configured"
|
|
56
|
+
url = cfg["coordinator_url"] + "/push"
|
|
57
|
+
try:
|
|
58
|
+
import urllib.request
|
|
59
|
+
|
|
60
|
+
req = urllib.request.Request(
|
|
61
|
+
url,
|
|
62
|
+
data=json.dumps(summary).encode(),
|
|
63
|
+
headers={"Content-Type": "application/json"},
|
|
64
|
+
method="POST",
|
|
65
|
+
)
|
|
66
|
+
with urllib.request.urlopen(req, timeout=30) as resp:
|
|
67
|
+
if resp.status in (200, 201):
|
|
68
|
+
return "Pushed updates to coordinator"
|
|
69
|
+
return f"Coordinator returned {resp.status}"
|
|
70
|
+
except Exception as e:
|
|
71
|
+
return f"Push failed: {e}"
|
|
72
|
+
|
|
73
|
+
|
|
74
|
+
def pull_merged(repo_root: Path) -> Optional[Dict[str, Any]]:
|
|
75
|
+
"""Pull merged summaries from coordinator. Returns merged data or None."""
|
|
76
|
+
cfg = get_federated_config(repo_root)
|
|
77
|
+
if not cfg:
|
|
78
|
+
return None
|
|
79
|
+
url = cfg["coordinator_url"] + "/pull"
|
|
80
|
+
try:
|
|
81
|
+
import urllib.request
|
|
82
|
+
|
|
83
|
+
with urllib.request.urlopen(url, timeout=30) as resp:
|
|
84
|
+
return json.loads(resp.read().decode())
|
|
85
|
+
except Exception:
|
|
86
|
+
return None
|