agmem 0.1.2__py3-none-any.whl → 0.1.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {agmem-0.1.2.dist-info → agmem-0.1.3.dist-info}/METADATA +138 -14
- {agmem-0.1.2.dist-info → agmem-0.1.3.dist-info}/RECORD +45 -26
- memvcs/cli.py +10 -0
- memvcs/commands/add.py +6 -0
- memvcs/commands/audit.py +59 -0
- memvcs/commands/clone.py +7 -0
- memvcs/commands/daemon.py +28 -0
- memvcs/commands/distill.py +16 -0
- memvcs/commands/federated.py +53 -0
- memvcs/commands/fsck.py +31 -0
- memvcs/commands/garden.py +14 -0
- memvcs/commands/gc.py +51 -0
- memvcs/commands/merge.py +55 -1
- memvcs/commands/prove.py +66 -0
- memvcs/commands/pull.py +27 -0
- memvcs/commands/resolve.py +130 -0
- memvcs/commands/verify.py +74 -23
- memvcs/core/audit.py +124 -0
- memvcs/core/consistency.py +9 -9
- memvcs/core/crypto_verify.py +280 -0
- memvcs/core/distiller.py +25 -25
- memvcs/core/encryption.py +169 -0
- memvcs/core/federated.py +86 -0
- memvcs/core/gardener.py +23 -24
- memvcs/core/ipfs_remote.py +39 -0
- memvcs/core/knowledge_graph.py +1 -0
- memvcs/core/llm/__init__.py +10 -0
- memvcs/core/llm/anthropic_provider.py +50 -0
- memvcs/core/llm/base.py +27 -0
- memvcs/core/llm/factory.py +30 -0
- memvcs/core/llm/openai_provider.py +36 -0
- memvcs/core/merge.py +36 -23
- memvcs/core/objects.py +16 -6
- memvcs/core/pack.py +92 -0
- memvcs/core/privacy_budget.py +63 -0
- memvcs/core/remote.py +38 -0
- memvcs/core/repository.py +82 -2
- memvcs/core/temporal_index.py +9 -0
- memvcs/core/trust.py +103 -0
- memvcs/core/vector_store.py +15 -1
- memvcs/core/zk_proofs.py +26 -0
- {agmem-0.1.2.dist-info → agmem-0.1.3.dist-info}/WHEEL +0 -0
- {agmem-0.1.2.dist-info → agmem-0.1.3.dist-info}/entry_points.txt +0 -0
- {agmem-0.1.2.dist-info → agmem-0.1.3.dist-info}/licenses/LICENSE +0 -0
- {agmem-0.1.2.dist-info → agmem-0.1.3.dist-info}/top_level.txt +0 -0
memvcs/core/federated.py
ADDED
|
@@ -0,0 +1,86 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Federated memory collaboration for agmem.
|
|
3
|
+
|
|
4
|
+
Agents share model updates or aggregated summaries instead of raw episodic logs.
|
|
5
|
+
Optional coordinator URL; optional differential privacy (Tier 3).
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
import json
|
|
9
|
+
from pathlib import Path
|
|
10
|
+
from typing import Optional, List, Dict, Any
|
|
11
|
+
|
|
12
|
+
from .config_loader import load_agmem_config
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
def get_federated_config(repo_root: Path) -> Optional[Dict[str, Any]]:
|
|
16
|
+
"""Get federated config from repo/user config. Returns None if disabled."""
|
|
17
|
+
config = load_agmem_config(repo_root)
|
|
18
|
+
fed = config.get("federated") or {}
|
|
19
|
+
if not fed.get("enabled"):
|
|
20
|
+
return None
|
|
21
|
+
url = fed.get("coordinator_url")
|
|
22
|
+
if not url:
|
|
23
|
+
return None
|
|
24
|
+
return {
|
|
25
|
+
"coordinator_url": url.rstrip("/"),
|
|
26
|
+
"memory_types": fed.get("memory_types", ["episodic", "semantic"]),
|
|
27
|
+
}
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
def produce_local_summary(repo_root: Path, memory_types: List[str]) -> Dict[str, Any]:
|
|
31
|
+
"""
|
|
32
|
+
Produce a local summary from episodic/semantic data (no raw content).
|
|
33
|
+
Returns dict suitable for sending to coordinator (e.g. topic counts, fact hashes).
|
|
34
|
+
"""
|
|
35
|
+
current_dir = repo_root / "current"
|
|
36
|
+
summary = {"memory_types": memory_types, "topics": {}, "fact_count": 0}
|
|
37
|
+
for mtype in memory_types:
|
|
38
|
+
d = current_dir / mtype
|
|
39
|
+
if not d.exists():
|
|
40
|
+
continue
|
|
41
|
+
count = 0
|
|
42
|
+
for f in d.rglob("*.md"):
|
|
43
|
+
if f.is_file():
|
|
44
|
+
count += 1
|
|
45
|
+
summary["topics"][mtype] = count
|
|
46
|
+
if mtype == "semantic":
|
|
47
|
+
summary["fact_count"] = count
|
|
48
|
+
return summary
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
def push_updates(repo_root: Path, summary: Dict[str, Any]) -> str:
|
|
52
|
+
"""Send local summary to coordinator. Returns status message."""
|
|
53
|
+
cfg = get_federated_config(repo_root)
|
|
54
|
+
if not cfg:
|
|
55
|
+
return "Federated collaboration not configured"
|
|
56
|
+
url = cfg["coordinator_url"] + "/push"
|
|
57
|
+
try:
|
|
58
|
+
import urllib.request
|
|
59
|
+
|
|
60
|
+
req = urllib.request.Request(
|
|
61
|
+
url,
|
|
62
|
+
data=json.dumps(summary).encode(),
|
|
63
|
+
headers={"Content-Type": "application/json"},
|
|
64
|
+
method="POST",
|
|
65
|
+
)
|
|
66
|
+
with urllib.request.urlopen(req, timeout=30) as resp:
|
|
67
|
+
if resp.status in (200, 201):
|
|
68
|
+
return "Pushed updates to coordinator"
|
|
69
|
+
return f"Coordinator returned {resp.status}"
|
|
70
|
+
except Exception as e:
|
|
71
|
+
return f"Push failed: {e}"
|
|
72
|
+
|
|
73
|
+
|
|
74
|
+
def pull_merged(repo_root: Path) -> Optional[Dict[str, Any]]:
|
|
75
|
+
"""Pull merged summaries from coordinator. Returns merged data or None."""
|
|
76
|
+
cfg = get_federated_config(repo_root)
|
|
77
|
+
if not cfg:
|
|
78
|
+
return None
|
|
79
|
+
url = cfg["coordinator_url"] + "/pull"
|
|
80
|
+
try:
|
|
81
|
+
import urllib.request
|
|
82
|
+
|
|
83
|
+
with urllib.request.urlopen(url, timeout=30) as resp:
|
|
84
|
+
return json.loads(resp.read().decode())
|
|
85
|
+
except Exception:
|
|
86
|
+
return None
|
memvcs/core/gardener.py
CHANGED
|
@@ -284,37 +284,36 @@ class Gardener:
|
|
|
284
284
|
|
|
285
285
|
combined = "\n---\n".join(contents)
|
|
286
286
|
|
|
287
|
-
# Try LLM summarization
|
|
288
|
-
if self.config.llm_provider
|
|
287
|
+
# Try LLM summarization (multi-provider)
|
|
288
|
+
if self.config.llm_provider and self.config.llm_model:
|
|
289
289
|
try:
|
|
290
|
-
|
|
290
|
+
from .llm import get_provider
|
|
291
|
+
|
|
292
|
+
config = {
|
|
293
|
+
"llm_provider": self.config.llm_provider,
|
|
294
|
+
"llm_model": self.config.llm_model,
|
|
295
|
+
}
|
|
296
|
+
provider = get_provider(config=config)
|
|
297
|
+
if provider:
|
|
298
|
+
return provider.complete(
|
|
299
|
+
[
|
|
300
|
+
{
|
|
301
|
+
"role": "system",
|
|
302
|
+
"content": "You are a helpful assistant that summarizes conversation logs into actionable insights.",
|
|
303
|
+
},
|
|
304
|
+
{
|
|
305
|
+
"role": "user",
|
|
306
|
+
"content": f"Summarize these conversation logs about '{cluster.topic}' into 2-3 key insights:\n\n{combined[:4000]}",
|
|
307
|
+
},
|
|
308
|
+
],
|
|
309
|
+
max_tokens=500,
|
|
310
|
+
)
|
|
291
311
|
except Exception:
|
|
292
312
|
pass
|
|
293
313
|
|
|
294
314
|
# Fall back to simple summary
|
|
295
315
|
return self._simple_summary(cluster, contents)
|
|
296
316
|
|
|
297
|
-
def _summarize_with_openai(self, content: str, topic: str) -> str:
|
|
298
|
-
"""Summarize using OpenAI API."""
|
|
299
|
-
import openai
|
|
300
|
-
|
|
301
|
-
response = openai.chat.completions.create(
|
|
302
|
-
model=self.config.llm_model or "gpt-3.5-turbo",
|
|
303
|
-
messages=[
|
|
304
|
-
{
|
|
305
|
-
"role": "system",
|
|
306
|
-
"content": "You are a helpful assistant that summarizes conversation logs into actionable insights.",
|
|
307
|
-
},
|
|
308
|
-
{
|
|
309
|
-
"role": "user",
|
|
310
|
-
"content": f"Summarize these conversation logs about '{topic}' into 2-3 key insights:\n\n{content[:4000]}",
|
|
311
|
-
},
|
|
312
|
-
],
|
|
313
|
-
max_tokens=500,
|
|
314
|
-
)
|
|
315
|
-
|
|
316
|
-
return response.choices[0].message.content
|
|
317
|
-
|
|
318
317
|
def _simple_summary(self, cluster: EpisodeCluster, contents: List[str]) -> str:
|
|
319
318
|
"""Generate a simple summary without LLM."""
|
|
320
319
|
return f"""# Insights: {cluster.topic.title()}
|
|
@@ -0,0 +1,39 @@
|
|
|
1
|
+
"""
|
|
2
|
+
IPFS remote for agmem (stub).
|
|
3
|
+
|
|
4
|
+
Push/pull via CIDs; pinning; gateway fallback when daemon unavailable.
|
|
5
|
+
Requires optional ipfs extra (ipfshttpclient or gateway requests).
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from pathlib import Path
|
|
9
|
+
from typing import Optional, Set
|
|
10
|
+
|
|
11
|
+
from .objects import ObjectStore
|
|
12
|
+
from .remote import _collect_objects_from_commit
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
def push_to_ipfs(
|
|
16
|
+
objects_dir: Path,
|
|
17
|
+
branch: str,
|
|
18
|
+
commit_hash: str,
|
|
19
|
+
gateway_url: str = "https://ipfs.io",
|
|
20
|
+
) -> Optional[str]:
|
|
21
|
+
"""Push branch objects to IPFS and return root CID. Stub: returns None until IPFS client added."""
|
|
22
|
+
return None
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
def pull_from_ipfs(
|
|
26
|
+
objects_dir: Path,
|
|
27
|
+
cid: str,
|
|
28
|
+
gateway_url: str = "https://ipfs.io",
|
|
29
|
+
) -> bool:
|
|
30
|
+
"""Pull objects by CID from IPFS into objects_dir. Stub: returns False until IPFS client added."""
|
|
31
|
+
return False
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
def parse_ipfs_url(url: str) -> Optional[str]:
|
|
35
|
+
"""Parse ipfs://<cid> or ipfs://<cid>/path. Returns CID or None."""
|
|
36
|
+
if not url.startswith("ipfs://"):
|
|
37
|
+
return None
|
|
38
|
+
rest = url[7:].lstrip("/")
|
|
39
|
+
return rest.split("/")[0] or None
|
memvcs/core/knowledge_graph.py
CHANGED
|
@@ -84,6 +84,7 @@ class KnowledgeGraphBuilder:
|
|
|
84
84
|
1. Wikilinks: [[filename]] references
|
|
85
85
|
2. Semantic similarity: Using embeddings
|
|
86
86
|
3. Shared tags: Files with common tags
|
|
87
|
+
4. Co-occurrence: Facts in same episodic session (optional)
|
|
87
88
|
"""
|
|
88
89
|
|
|
89
90
|
# Pattern for wikilinks: [[target]] or [[target|display text]]
|
|
@@ -0,0 +1,50 @@
|
|
|
1
|
+
"""Anthropic (Claude) LLM provider."""
|
|
2
|
+
|
|
3
|
+
import os
|
|
4
|
+
from typing import Optional, List, Dict, Any
|
|
5
|
+
|
|
6
|
+
from .base import LLMProvider
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
class AnthropicProvider(LLMProvider):
|
|
10
|
+
"""Anthropic Claude provider. API key from ANTHROPIC_API_KEY."""
|
|
11
|
+
|
|
12
|
+
def __init__(self, model: Optional[str] = None):
|
|
13
|
+
self._model = model or os.environ.get("ANTHROPIC_MODEL", "claude-3-haiku-20240307")
|
|
14
|
+
|
|
15
|
+
@property
|
|
16
|
+
def name(self) -> str:
|
|
17
|
+
return "anthropic"
|
|
18
|
+
|
|
19
|
+
def complete(
|
|
20
|
+
self,
|
|
21
|
+
messages: List[Dict[str, str]],
|
|
22
|
+
*,
|
|
23
|
+
model: Optional[str] = None,
|
|
24
|
+
max_tokens: int = 1024,
|
|
25
|
+
**kwargs: Any,
|
|
26
|
+
) -> str:
|
|
27
|
+
try:
|
|
28
|
+
import anthropic
|
|
29
|
+
except ImportError:
|
|
30
|
+
raise RuntimeError("Anthropic provider requires: pip install anthropic")
|
|
31
|
+
m = model or self._model
|
|
32
|
+
client = anthropic.Anthropic()
|
|
33
|
+
# Convert OpenAI-style messages to Anthropic (system + user/assistant)
|
|
34
|
+
system = ""
|
|
35
|
+
anthropic_messages = []
|
|
36
|
+
for msg in messages:
|
|
37
|
+
role = msg.get("role", "user")
|
|
38
|
+
content = msg.get("content", "")
|
|
39
|
+
if role == "system":
|
|
40
|
+
system = content
|
|
41
|
+
else:
|
|
42
|
+
anthropic_messages.append({"role": role, "content": content})
|
|
43
|
+
resp = client.messages.create(
|
|
44
|
+
model=m,
|
|
45
|
+
max_tokens=max_tokens,
|
|
46
|
+
system=system or None,
|
|
47
|
+
messages=anthropic_messages,
|
|
48
|
+
**kwargs,
|
|
49
|
+
)
|
|
50
|
+
return resp.content[0].text if resp.content else ""
|
memvcs/core/llm/base.py
ADDED
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
"""
|
|
2
|
+
LLM provider interface for agmem.
|
|
3
|
+
"""
|
|
4
|
+
|
|
5
|
+
from abc import ABC, abstractmethod
|
|
6
|
+
from typing import Optional, List, Dict, Any
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
class LLMProvider(ABC):
|
|
10
|
+
"""Abstract LLM provider (complete(messages) -> text)."""
|
|
11
|
+
|
|
12
|
+
@abstractmethod
|
|
13
|
+
def complete(
|
|
14
|
+
self,
|
|
15
|
+
messages: List[Dict[str, str]],
|
|
16
|
+
*,
|
|
17
|
+
model: Optional[str] = None,
|
|
18
|
+
max_tokens: int = 1024,
|
|
19
|
+
**kwargs: Any,
|
|
20
|
+
) -> str:
|
|
21
|
+
"""Return completion text for messages. Raises on failure."""
|
|
22
|
+
pass
|
|
23
|
+
|
|
24
|
+
@property
|
|
25
|
+
def name(self) -> str:
|
|
26
|
+
"""Provider name (e.g. openai, anthropic)."""
|
|
27
|
+
return "base"
|
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
"""LLM provider factory: select by config or env."""
|
|
2
|
+
|
|
3
|
+
import os
|
|
4
|
+
from typing import Optional, Dict, Any
|
|
5
|
+
|
|
6
|
+
from .base import LLMProvider
|
|
7
|
+
from .openai_provider import OpenAIProvider
|
|
8
|
+
from .anthropic_provider import AnthropicProvider
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
def get_provider(
|
|
12
|
+
provider_name: Optional[str] = None,
|
|
13
|
+
model: Optional[str] = None,
|
|
14
|
+
config: Optional[Dict[str, Any]] = None,
|
|
15
|
+
) -> Optional[LLMProvider]:
|
|
16
|
+
"""
|
|
17
|
+
Return LLM provider by name. Config may have llm_provider, llm_model.
|
|
18
|
+
Env: AGMEM_LLM_PROVIDER, OPENAI_API_KEY, ANTHROPIC_API_KEY.
|
|
19
|
+
"""
|
|
20
|
+
name = (
|
|
21
|
+
provider_name
|
|
22
|
+
or (config or {}).get("llm_provider")
|
|
23
|
+
or os.environ.get("AGMEM_LLM_PROVIDER", "openai")
|
|
24
|
+
)
|
|
25
|
+
m = model or (config or {}).get("llm_model")
|
|
26
|
+
if name == "openai":
|
|
27
|
+
return OpenAIProvider(model=m)
|
|
28
|
+
if name == "anthropic":
|
|
29
|
+
return AnthropicProvider(model=m)
|
|
30
|
+
return OpenAIProvider(model=m)
|
|
@@ -0,0 +1,36 @@
|
|
|
1
|
+
"""OpenAI LLM provider."""
|
|
2
|
+
|
|
3
|
+
import os
|
|
4
|
+
from typing import Optional, List, Dict, Any
|
|
5
|
+
|
|
6
|
+
from .base import LLMProvider
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
class OpenAIProvider(LLMProvider):
|
|
10
|
+
"""OpenAI (GPT) provider. API key from OPENAI_API_KEY."""
|
|
11
|
+
|
|
12
|
+
def __init__(self, model: Optional[str] = None):
|
|
13
|
+
self._model = model or os.environ.get("OPENAI_MODEL", "gpt-3.5-turbo")
|
|
14
|
+
|
|
15
|
+
@property
|
|
16
|
+
def name(self) -> str:
|
|
17
|
+
return "openai"
|
|
18
|
+
|
|
19
|
+
def complete(
|
|
20
|
+
self,
|
|
21
|
+
messages: List[Dict[str, str]],
|
|
22
|
+
*,
|
|
23
|
+
model: Optional[str] = None,
|
|
24
|
+
max_tokens: int = 1024,
|
|
25
|
+
**kwargs: Any,
|
|
26
|
+
) -> str:
|
|
27
|
+
import openai
|
|
28
|
+
|
|
29
|
+
m = model or self._model
|
|
30
|
+
response = openai.chat.completions.create(
|
|
31
|
+
model=m,
|
|
32
|
+
messages=messages,
|
|
33
|
+
max_tokens=max_tokens,
|
|
34
|
+
**kwargs,
|
|
35
|
+
)
|
|
36
|
+
return response.choices[0].message.content or ""
|
memvcs/core/merge.py
CHANGED
|
@@ -33,6 +33,8 @@ class Conflict:
|
|
|
33
33
|
ours_content: Optional[str]
|
|
34
34
|
theirs_content: Optional[str]
|
|
35
35
|
message: str
|
|
36
|
+
memory_type: Optional[str] = None # episodic, semantic, procedural
|
|
37
|
+
payload: Optional[Dict[str, Any]] = None # type-specific (e.g. fact strings, step diffs)
|
|
36
38
|
|
|
37
39
|
|
|
38
40
|
@dataclass
|
|
@@ -256,31 +258,31 @@ class MergeEngine:
|
|
|
256
258
|
ours_content: Optional[str],
|
|
257
259
|
theirs_content: Optional[str],
|
|
258
260
|
) -> Tuple[str, bool]:
|
|
259
|
-
"""LLM arbitration: call LLM to resolve contradiction."""
|
|
261
|
+
"""LLM arbitration: call LLM to resolve contradiction (multi-provider)."""
|
|
260
262
|
try:
|
|
261
|
-
import
|
|
262
|
-
|
|
263
|
-
|
|
264
|
-
|
|
265
|
-
|
|
266
|
-
|
|
267
|
-
|
|
268
|
-
|
|
269
|
-
|
|
270
|
-
|
|
271
|
-
|
|
272
|
-
|
|
273
|
-
|
|
274
|
-
|
|
275
|
-
|
|
276
|
-
|
|
277
|
-
|
|
278
|
-
|
|
279
|
-
return merged, False
|
|
263
|
+
from .llm import get_provider
|
|
264
|
+
|
|
265
|
+
provider = get_provider()
|
|
266
|
+
if provider:
|
|
267
|
+
merged = provider.complete(
|
|
268
|
+
[
|
|
269
|
+
{
|
|
270
|
+
"role": "system",
|
|
271
|
+
"content": "Resolve the contradiction between two memory versions. Output the merged content that best reflects the combined truth.",
|
|
272
|
+
},
|
|
273
|
+
{
|
|
274
|
+
"role": "user",
|
|
275
|
+
"content": f"OURS:\n{ours_content}\n\nTHEIRS:\n{theirs_content}",
|
|
276
|
+
},
|
|
277
|
+
],
|
|
278
|
+
max_tokens=1000,
|
|
279
|
+
)
|
|
280
|
+
return (merged or "").strip(), False
|
|
280
281
|
except Exception:
|
|
281
|
-
|
|
282
|
-
|
|
283
|
-
|
|
282
|
+
pass
|
|
283
|
+
# Fallback to conflict markers
|
|
284
|
+
merged = f"<<<<<<< OURS\n{ours_content}\n=======\n{theirs_content}\n>>>>>>> THEIRS"
|
|
285
|
+
return merged, True
|
|
284
286
|
|
|
285
287
|
def merge_procedural(
|
|
286
288
|
self,
|
|
@@ -398,6 +400,15 @@ class MergeEngine:
|
|
|
398
400
|
|
|
399
401
|
# Record conflict if any
|
|
400
402
|
if had_conflict:
|
|
403
|
+
payload = {}
|
|
404
|
+
if ours_content:
|
|
405
|
+
payload["ours_preview"] = (
|
|
406
|
+
ours_content[:300] if len(ours_content) > 300 else ours_content
|
|
407
|
+
)
|
|
408
|
+
if theirs_content:
|
|
409
|
+
payload["theirs_preview"] = (
|
|
410
|
+
theirs_content[:300] if len(theirs_content) > 300 else theirs_content
|
|
411
|
+
)
|
|
401
412
|
conflicts.append(
|
|
402
413
|
Conflict(
|
|
403
414
|
path=path,
|
|
@@ -405,6 +416,8 @@ class MergeEngine:
|
|
|
405
416
|
ours_content=ours_content,
|
|
406
417
|
theirs_content=theirs_content,
|
|
407
418
|
message=f"{strategy.value} merge conflict in {path}",
|
|
419
|
+
memory_type=strategy.value,
|
|
420
|
+
payload=payload or None,
|
|
408
421
|
)
|
|
409
422
|
)
|
|
410
423
|
|
memvcs/core/objects.py
CHANGED
|
@@ -24,8 +24,9 @@ def _valid_object_hash(hash_id: str) -> bool:
|
|
|
24
24
|
class ObjectStore:
|
|
25
25
|
"""Content-addressable object storage system."""
|
|
26
26
|
|
|
27
|
-
def __init__(self, objects_dir: Path):
|
|
27
|
+
def __init__(self, objects_dir: Path, encryptor: Optional[Any] = None):
|
|
28
28
|
self.objects_dir = Path(objects_dir)
|
|
29
|
+
self._encryptor = encryptor
|
|
29
30
|
self._ensure_directories()
|
|
30
31
|
|
|
31
32
|
def _ensure_directories(self):
|
|
@@ -68,11 +69,15 @@ class ObjectStore:
|
|
|
68
69
|
# Create directory if needed
|
|
69
70
|
obj_path.parent.mkdir(parents=True, exist_ok=True)
|
|
70
71
|
|
|
71
|
-
# Compress and
|
|
72
|
+
# Compress and optionally encrypt
|
|
72
73
|
header = f"{obj_type} {len(content)}\0".encode()
|
|
73
74
|
full_content = header + content
|
|
74
75
|
compressed = zlib.compress(full_content)
|
|
75
|
-
|
|
76
|
+
if self._encryptor:
|
|
77
|
+
try:
|
|
78
|
+
compressed = self._encryptor.encrypt_payload(compressed)
|
|
79
|
+
except ValueError:
|
|
80
|
+
pass # no key; store plain compressed (legacy behavior)
|
|
76
81
|
obj_path.write_bytes(compressed)
|
|
77
82
|
return hash_id
|
|
78
83
|
|
|
@@ -92,9 +97,14 @@ class ObjectStore:
|
|
|
92
97
|
if not obj_path.exists():
|
|
93
98
|
return None
|
|
94
99
|
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
100
|
+
raw = obj_path.read_bytes()
|
|
101
|
+
# Optionally decrypt (iv+tag minimum 12+16 bytes)
|
|
102
|
+
if self._encryptor and len(raw) >= 12 + 16:
|
|
103
|
+
try:
|
|
104
|
+
raw = self._encryptor.decrypt_payload(raw)
|
|
105
|
+
except Exception:
|
|
106
|
+
pass # legacy plain compressed
|
|
107
|
+
full_content = zlib.decompress(raw)
|
|
98
108
|
|
|
99
109
|
# Parse header
|
|
100
110
|
null_idx = full_content.index(b"\0")
|
memvcs/core/pack.py
ADDED
|
@@ -0,0 +1,92 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Pack files and garbage collection for agmem.
|
|
3
|
+
|
|
4
|
+
Pack: collect loose objects into single file + index. GC: delete unreachable objects, repack.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
import json
|
|
8
|
+
import zlib
|
|
9
|
+
from pathlib import Path
|
|
10
|
+
from typing import Set, Dict, List, Optional, Tuple
|
|
11
|
+
|
|
12
|
+
from .objects import ObjectStore
|
|
13
|
+
from .refs import RefsManager
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
def _pack_dir(objects_dir: Path) -> Path:
|
|
17
|
+
return objects_dir / "pack"
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
def list_loose_objects(objects_dir: Path) -> Set[str]:
|
|
21
|
+
"""List all loose object hashes (blob, tree, commit, tag)."""
|
|
22
|
+
hashes = set()
|
|
23
|
+
for obj_type in ["blob", "tree", "commit", "tag"]:
|
|
24
|
+
type_dir = objects_dir / obj_type
|
|
25
|
+
if not type_dir.exists():
|
|
26
|
+
continue
|
|
27
|
+
for prefix_dir in type_dir.iterdir():
|
|
28
|
+
if not prefix_dir.is_dir():
|
|
29
|
+
continue
|
|
30
|
+
for f in prefix_dir.iterdir():
|
|
31
|
+
hash_id = prefix_dir.name + f.name
|
|
32
|
+
hashes.add(hash_id)
|
|
33
|
+
return hashes
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
def reachable_from_refs(mem_dir: Path, store: ObjectStore, gc_prune_days: int = 90) -> Set[str]:
|
|
37
|
+
"""Collect all object hashes reachable from branches, tags, and reflog (within prune window)."""
|
|
38
|
+
refs = RefsManager(mem_dir)
|
|
39
|
+
reachable = set()
|
|
40
|
+
# Branch tips
|
|
41
|
+
for b in refs.list_branches():
|
|
42
|
+
ch = refs.get_branch_commit(b)
|
|
43
|
+
if ch:
|
|
44
|
+
reachable.update(_collect_from_commit(store, ch))
|
|
45
|
+
# Tags
|
|
46
|
+
for t in refs.list_tags():
|
|
47
|
+
ch = refs.get_tag_commit(t)
|
|
48
|
+
if ch:
|
|
49
|
+
reachable.update(_collect_from_commit(store, ch))
|
|
50
|
+
# Reflog (simplified: just HEAD recent)
|
|
51
|
+
try:
|
|
52
|
+
log = refs.get_reflog("HEAD", max_count=1000)
|
|
53
|
+
for e in log:
|
|
54
|
+
h = e.get("hash")
|
|
55
|
+
if h:
|
|
56
|
+
reachable.update(_collect_from_commit(store, h))
|
|
57
|
+
except Exception:
|
|
58
|
+
pass
|
|
59
|
+
return reachable
|
|
60
|
+
|
|
61
|
+
|
|
62
|
+
def _collect_from_commit(store: ObjectStore, commit_hash: str) -> Set[str]:
|
|
63
|
+
"""Collect all object hashes reachable from a commit."""
|
|
64
|
+
from .remote import _collect_objects_from_commit
|
|
65
|
+
|
|
66
|
+
return _collect_objects_from_commit(store, commit_hash)
|
|
67
|
+
|
|
68
|
+
|
|
69
|
+
def run_gc(
|
|
70
|
+
mem_dir: Path, store: ObjectStore, gc_prune_days: int = 90, dry_run: bool = False
|
|
71
|
+
) -> Tuple[int, int]:
|
|
72
|
+
"""
|
|
73
|
+
Garbage collect: delete unreachable loose objects.
|
|
74
|
+
Returns (deleted_count, bytes_freed). dry_run: only report, do not delete.
|
|
75
|
+
"""
|
|
76
|
+
loose = list_loose_objects(mem_dir / "objects")
|
|
77
|
+
reachable = reachable_from_refs(mem_dir, store, gc_prune_days)
|
|
78
|
+
to_delete = loose - reachable
|
|
79
|
+
freed = 0
|
|
80
|
+
for hash_id in to_delete:
|
|
81
|
+
# Resolve type from path
|
|
82
|
+
for obj_type in ["blob", "tree", "commit", "tag"]:
|
|
83
|
+
p = store.objects_dir / obj_type / hash_id[:2] / hash_id[2:]
|
|
84
|
+
if p.exists():
|
|
85
|
+
if not dry_run:
|
|
86
|
+
size = p.stat().st_size
|
|
87
|
+
p.unlink()
|
|
88
|
+
freed += size
|
|
89
|
+
else:
|
|
90
|
+
freed += p.stat().st_size
|
|
91
|
+
break
|
|
92
|
+
return (len(to_delete), freed)
|
|
@@ -0,0 +1,63 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Differential privacy budget tracking for agmem.
|
|
3
|
+
|
|
4
|
+
Per-repo epsilon spent; block when budget exceeded.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
import json
|
|
8
|
+
import math
|
|
9
|
+
from pathlib import Path
|
|
10
|
+
from typing import Optional, Tuple
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
def _budget_path(mem_dir: Path) -> Path:
|
|
14
|
+
return mem_dir / "privacy_budget.json"
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
def load_budget(mem_dir: Path) -> Tuple[float, float, float]:
|
|
18
|
+
"""Load (epsilon_spent, max_epsilon, delta). Returns (0, max, delta) if no file."""
|
|
19
|
+
path = _budget_path(mem_dir)
|
|
20
|
+
if not path.exists():
|
|
21
|
+
config = mem_dir / "config.json"
|
|
22
|
+
max_eps = 1.0
|
|
23
|
+
delta = 1e-5
|
|
24
|
+
if config.exists():
|
|
25
|
+
try:
|
|
26
|
+
c = json.loads(config.read_text())
|
|
27
|
+
dp = c.get("differential_privacy", {})
|
|
28
|
+
max_eps = float(dp.get("max_epsilon", 1.0))
|
|
29
|
+
delta = float(dp.get("delta", 1e-5))
|
|
30
|
+
except Exception:
|
|
31
|
+
pass
|
|
32
|
+
return (0.0, max_eps, delta)
|
|
33
|
+
try:
|
|
34
|
+
data = json.loads(path.read_text())
|
|
35
|
+
return (
|
|
36
|
+
float(data.get("epsilon_spent", 0)),
|
|
37
|
+
float(data.get("max_epsilon", 1.0)),
|
|
38
|
+
float(data.get("delta", 1e-5)),
|
|
39
|
+
)
|
|
40
|
+
except Exception:
|
|
41
|
+
return (0.0, 1.0, 1e-5)
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
def spend_epsilon(mem_dir: Path, epsilon: float, max_epsilon: Optional[float] = None) -> bool:
|
|
45
|
+
"""Record epsilon spent. Returns False if budget would be exceeded."""
|
|
46
|
+
spent, max_eps, delta = load_budget(mem_dir)
|
|
47
|
+
if max_epsilon is not None:
|
|
48
|
+
max_eps = max_epsilon
|
|
49
|
+
if spent + epsilon > max_eps:
|
|
50
|
+
return False
|
|
51
|
+
mem_dir.mkdir(parents=True, exist_ok=True)
|
|
52
|
+
path = _budget_path(mem_dir)
|
|
53
|
+
data = {"epsilon_spent": spent + epsilon, "max_epsilon": max_eps, "delta": delta}
|
|
54
|
+
path.write_text(json.dumps(data, indent=2))
|
|
55
|
+
return True
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
def add_noise(value: float, sensitivity: float, epsilon: float, delta: float = 1e-5) -> float:
|
|
59
|
+
"""Add Gaussian noise for (epsilon, delta)-DP. sigma = sensitivity * sqrt(2*ln(1.25/delta)) / epsilon."""
|
|
60
|
+
import random
|
|
61
|
+
|
|
62
|
+
sigma = sensitivity * math.sqrt(2 * math.log(1.25 / delta)) / epsilon
|
|
63
|
+
return value + random.gauss(0, sigma)
|