agmem 0.1.2__py3-none-any.whl → 0.1.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (45) hide show
  1. {agmem-0.1.2.dist-info → agmem-0.1.3.dist-info}/METADATA +138 -14
  2. {agmem-0.1.2.dist-info → agmem-0.1.3.dist-info}/RECORD +45 -26
  3. memvcs/cli.py +10 -0
  4. memvcs/commands/add.py +6 -0
  5. memvcs/commands/audit.py +59 -0
  6. memvcs/commands/clone.py +7 -0
  7. memvcs/commands/daemon.py +28 -0
  8. memvcs/commands/distill.py +16 -0
  9. memvcs/commands/federated.py +53 -0
  10. memvcs/commands/fsck.py +31 -0
  11. memvcs/commands/garden.py +14 -0
  12. memvcs/commands/gc.py +51 -0
  13. memvcs/commands/merge.py +55 -1
  14. memvcs/commands/prove.py +66 -0
  15. memvcs/commands/pull.py +27 -0
  16. memvcs/commands/resolve.py +130 -0
  17. memvcs/commands/verify.py +74 -23
  18. memvcs/core/audit.py +124 -0
  19. memvcs/core/consistency.py +9 -9
  20. memvcs/core/crypto_verify.py +280 -0
  21. memvcs/core/distiller.py +25 -25
  22. memvcs/core/encryption.py +169 -0
  23. memvcs/core/federated.py +86 -0
  24. memvcs/core/gardener.py +23 -24
  25. memvcs/core/ipfs_remote.py +39 -0
  26. memvcs/core/knowledge_graph.py +1 -0
  27. memvcs/core/llm/__init__.py +10 -0
  28. memvcs/core/llm/anthropic_provider.py +50 -0
  29. memvcs/core/llm/base.py +27 -0
  30. memvcs/core/llm/factory.py +30 -0
  31. memvcs/core/llm/openai_provider.py +36 -0
  32. memvcs/core/merge.py +36 -23
  33. memvcs/core/objects.py +16 -6
  34. memvcs/core/pack.py +92 -0
  35. memvcs/core/privacy_budget.py +63 -0
  36. memvcs/core/remote.py +38 -0
  37. memvcs/core/repository.py +82 -2
  38. memvcs/core/temporal_index.py +9 -0
  39. memvcs/core/trust.py +103 -0
  40. memvcs/core/vector_store.py +15 -1
  41. memvcs/core/zk_proofs.py +26 -0
  42. {agmem-0.1.2.dist-info → agmem-0.1.3.dist-info}/WHEEL +0 -0
  43. {agmem-0.1.2.dist-info → agmem-0.1.3.dist-info}/entry_points.txt +0 -0
  44. {agmem-0.1.2.dist-info → agmem-0.1.3.dist-info}/licenses/LICENSE +0 -0
  45. {agmem-0.1.2.dist-info → agmem-0.1.3.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,86 @@
1
+ """
2
+ Federated memory collaboration for agmem.
3
+
4
+ Agents share model updates or aggregated summaries instead of raw episodic logs.
5
+ Optional coordinator URL; optional differential privacy (Tier 3).
6
+ """
7
+
8
+ import json
9
+ from pathlib import Path
10
+ from typing import Optional, List, Dict, Any
11
+
12
+ from .config_loader import load_agmem_config
13
+
14
+
15
+ def get_federated_config(repo_root: Path) -> Optional[Dict[str, Any]]:
16
+ """Get federated config from repo/user config. Returns None if disabled."""
17
+ config = load_agmem_config(repo_root)
18
+ fed = config.get("federated") or {}
19
+ if not fed.get("enabled"):
20
+ return None
21
+ url = fed.get("coordinator_url")
22
+ if not url:
23
+ return None
24
+ return {
25
+ "coordinator_url": url.rstrip("/"),
26
+ "memory_types": fed.get("memory_types", ["episodic", "semantic"]),
27
+ }
28
+
29
+
30
+ def produce_local_summary(repo_root: Path, memory_types: List[str]) -> Dict[str, Any]:
31
+ """
32
+ Produce a local summary from episodic/semantic data (no raw content).
33
+ Returns dict suitable for sending to coordinator (e.g. topic counts, fact hashes).
34
+ """
35
+ current_dir = repo_root / "current"
36
+ summary = {"memory_types": memory_types, "topics": {}, "fact_count": 0}
37
+ for mtype in memory_types:
38
+ d = current_dir / mtype
39
+ if not d.exists():
40
+ continue
41
+ count = 0
42
+ for f in d.rglob("*.md"):
43
+ if f.is_file():
44
+ count += 1
45
+ summary["topics"][mtype] = count
46
+ if mtype == "semantic":
47
+ summary["fact_count"] = count
48
+ return summary
49
+
50
+
51
+ def push_updates(repo_root: Path, summary: Dict[str, Any]) -> str:
52
+ """Send local summary to coordinator. Returns status message."""
53
+ cfg = get_federated_config(repo_root)
54
+ if not cfg:
55
+ return "Federated collaboration not configured"
56
+ url = cfg["coordinator_url"] + "/push"
57
+ try:
58
+ import urllib.request
59
+
60
+ req = urllib.request.Request(
61
+ url,
62
+ data=json.dumps(summary).encode(),
63
+ headers={"Content-Type": "application/json"},
64
+ method="POST",
65
+ )
66
+ with urllib.request.urlopen(req, timeout=30) as resp:
67
+ if resp.status in (200, 201):
68
+ return "Pushed updates to coordinator"
69
+ return f"Coordinator returned {resp.status}"
70
+ except Exception as e:
71
+ return f"Push failed: {e}"
72
+
73
+
74
+ def pull_merged(repo_root: Path) -> Optional[Dict[str, Any]]:
75
+ """Pull merged summaries from coordinator. Returns merged data or None."""
76
+ cfg = get_federated_config(repo_root)
77
+ if not cfg:
78
+ return None
79
+ url = cfg["coordinator_url"] + "/pull"
80
+ try:
81
+ import urllib.request
82
+
83
+ with urllib.request.urlopen(url, timeout=30) as resp:
84
+ return json.loads(resp.read().decode())
85
+ except Exception:
86
+ return None
memvcs/core/gardener.py CHANGED
@@ -284,37 +284,36 @@ class Gardener:
284
284
 
285
285
  combined = "\n---\n".join(contents)
286
286
 
287
- # Try LLM summarization
288
- if self.config.llm_provider == "openai" and self.config.llm_model:
287
+ # Try LLM summarization (multi-provider)
288
+ if self.config.llm_provider and self.config.llm_model:
289
289
  try:
290
- return self._summarize_with_openai(combined, cluster.topic)
290
+ from .llm import get_provider
291
+
292
+ config = {
293
+ "llm_provider": self.config.llm_provider,
294
+ "llm_model": self.config.llm_model,
295
+ }
296
+ provider = get_provider(config=config)
297
+ if provider:
298
+ return provider.complete(
299
+ [
300
+ {
301
+ "role": "system",
302
+ "content": "You are a helpful assistant that summarizes conversation logs into actionable insights.",
303
+ },
304
+ {
305
+ "role": "user",
306
+ "content": f"Summarize these conversation logs about '{cluster.topic}' into 2-3 key insights:\n\n{combined[:4000]}",
307
+ },
308
+ ],
309
+ max_tokens=500,
310
+ )
291
311
  except Exception:
292
312
  pass
293
313
 
294
314
  # Fall back to simple summary
295
315
  return self._simple_summary(cluster, contents)
296
316
 
297
- def _summarize_with_openai(self, content: str, topic: str) -> str:
298
- """Summarize using OpenAI API."""
299
- import openai
300
-
301
- response = openai.chat.completions.create(
302
- model=self.config.llm_model or "gpt-3.5-turbo",
303
- messages=[
304
- {
305
- "role": "system",
306
- "content": "You are a helpful assistant that summarizes conversation logs into actionable insights.",
307
- },
308
- {
309
- "role": "user",
310
- "content": f"Summarize these conversation logs about '{topic}' into 2-3 key insights:\n\n{content[:4000]}",
311
- },
312
- ],
313
- max_tokens=500,
314
- )
315
-
316
- return response.choices[0].message.content
317
-
318
317
  def _simple_summary(self, cluster: EpisodeCluster, contents: List[str]) -> str:
319
318
  """Generate a simple summary without LLM."""
320
319
  return f"""# Insights: {cluster.topic.title()}
@@ -0,0 +1,39 @@
1
+ """
2
+ IPFS remote for agmem (stub).
3
+
4
+ Push/pull via CIDs; pinning; gateway fallback when daemon unavailable.
5
+ Requires optional ipfs extra (ipfshttpclient or gateway requests).
6
+ """
7
+
8
+ from pathlib import Path
9
+ from typing import Optional, Set
10
+
11
+ from .objects import ObjectStore
12
+ from .remote import _collect_objects_from_commit
13
+
14
+
15
+ def push_to_ipfs(
16
+ objects_dir: Path,
17
+ branch: str,
18
+ commit_hash: str,
19
+ gateway_url: str = "https://ipfs.io",
20
+ ) -> Optional[str]:
21
+ """Push branch objects to IPFS and return root CID. Stub: returns None until IPFS client added."""
22
+ return None
23
+
24
+
25
+ def pull_from_ipfs(
26
+ objects_dir: Path,
27
+ cid: str,
28
+ gateway_url: str = "https://ipfs.io",
29
+ ) -> bool:
30
+ """Pull objects by CID from IPFS into objects_dir. Stub: returns False until IPFS client added."""
31
+ return False
32
+
33
+
34
+ def parse_ipfs_url(url: str) -> Optional[str]:
35
+ """Parse ipfs://<cid> or ipfs://<cid>/path. Returns CID or None."""
36
+ if not url.startswith("ipfs://"):
37
+ return None
38
+ rest = url[7:].lstrip("/")
39
+ return rest.split("/")[0] or None
@@ -84,6 +84,7 @@ class KnowledgeGraphBuilder:
84
84
  1. Wikilinks: [[filename]] references
85
85
  2. Semantic similarity: Using embeddings
86
86
  3. Shared tags: Files with common tags
87
+ 4. Co-occurrence: Facts in same episodic session (optional)
87
88
  """
88
89
 
89
90
  # Pattern for wikilinks: [[target]] or [[target|display text]]
@@ -0,0 +1,10 @@
1
+ """
2
+ Multi-provider LLM integration for agmem.
3
+
4
+ Abstract interface; implementations: OpenAI, Anthropic, Ollama, custom HTTP.
5
+ """
6
+
7
+ from .base import LLMProvider
8
+ from .factory import get_provider
9
+
10
+ __all__ = ["LLMProvider", "get_provider"]
@@ -0,0 +1,50 @@
1
+ """Anthropic (Claude) LLM provider."""
2
+
3
+ import os
4
+ from typing import Optional, List, Dict, Any
5
+
6
+ from .base import LLMProvider
7
+
8
+
9
+ class AnthropicProvider(LLMProvider):
10
+ """Anthropic Claude provider. API key from ANTHROPIC_API_KEY."""
11
+
12
+ def __init__(self, model: Optional[str] = None):
13
+ self._model = model or os.environ.get("ANTHROPIC_MODEL", "claude-3-haiku-20240307")
14
+
15
+ @property
16
+ def name(self) -> str:
17
+ return "anthropic"
18
+
19
+ def complete(
20
+ self,
21
+ messages: List[Dict[str, str]],
22
+ *,
23
+ model: Optional[str] = None,
24
+ max_tokens: int = 1024,
25
+ **kwargs: Any,
26
+ ) -> str:
27
+ try:
28
+ import anthropic
29
+ except ImportError:
30
+ raise RuntimeError("Anthropic provider requires: pip install anthropic")
31
+ m = model or self._model
32
+ client = anthropic.Anthropic()
33
+ # Convert OpenAI-style messages to Anthropic (system + user/assistant)
34
+ system = ""
35
+ anthropic_messages = []
36
+ for msg in messages:
37
+ role = msg.get("role", "user")
38
+ content = msg.get("content", "")
39
+ if role == "system":
40
+ system = content
41
+ else:
42
+ anthropic_messages.append({"role": role, "content": content})
43
+ resp = client.messages.create(
44
+ model=m,
45
+ max_tokens=max_tokens,
46
+ system=system or None,
47
+ messages=anthropic_messages,
48
+ **kwargs,
49
+ )
50
+ return resp.content[0].text if resp.content else ""
@@ -0,0 +1,27 @@
1
+ """
2
+ LLM provider interface for agmem.
3
+ """
4
+
5
+ from abc import ABC, abstractmethod
6
+ from typing import Optional, List, Dict, Any
7
+
8
+
9
+ class LLMProvider(ABC):
10
+ """Abstract LLM provider (complete(messages) -> text)."""
11
+
12
+ @abstractmethod
13
+ def complete(
14
+ self,
15
+ messages: List[Dict[str, str]],
16
+ *,
17
+ model: Optional[str] = None,
18
+ max_tokens: int = 1024,
19
+ **kwargs: Any,
20
+ ) -> str:
21
+ """Return completion text for messages. Raises on failure."""
22
+ pass
23
+
24
+ @property
25
+ def name(self) -> str:
26
+ """Provider name (e.g. openai, anthropic)."""
27
+ return "base"
@@ -0,0 +1,30 @@
1
+ """LLM provider factory: select by config or env."""
2
+
3
+ import os
4
+ from typing import Optional, Dict, Any
5
+
6
+ from .base import LLMProvider
7
+ from .openai_provider import OpenAIProvider
8
+ from .anthropic_provider import AnthropicProvider
9
+
10
+
11
+ def get_provider(
12
+ provider_name: Optional[str] = None,
13
+ model: Optional[str] = None,
14
+ config: Optional[Dict[str, Any]] = None,
15
+ ) -> Optional[LLMProvider]:
16
+ """
17
+ Return LLM provider by name. Config may have llm_provider, llm_model.
18
+ Env: AGMEM_LLM_PROVIDER, OPENAI_API_KEY, ANTHROPIC_API_KEY.
19
+ """
20
+ name = (
21
+ provider_name
22
+ or (config or {}).get("llm_provider")
23
+ or os.environ.get("AGMEM_LLM_PROVIDER", "openai")
24
+ )
25
+ m = model or (config or {}).get("llm_model")
26
+ if name == "openai":
27
+ return OpenAIProvider(model=m)
28
+ if name == "anthropic":
29
+ return AnthropicProvider(model=m)
30
+ return OpenAIProvider(model=m)
@@ -0,0 +1,36 @@
1
+ """OpenAI LLM provider."""
2
+
3
+ import os
4
+ from typing import Optional, List, Dict, Any
5
+
6
+ from .base import LLMProvider
7
+
8
+
9
+ class OpenAIProvider(LLMProvider):
10
+ """OpenAI (GPT) provider. API key from OPENAI_API_KEY."""
11
+
12
+ def __init__(self, model: Optional[str] = None):
13
+ self._model = model or os.environ.get("OPENAI_MODEL", "gpt-3.5-turbo")
14
+
15
+ @property
16
+ def name(self) -> str:
17
+ return "openai"
18
+
19
+ def complete(
20
+ self,
21
+ messages: List[Dict[str, str]],
22
+ *,
23
+ model: Optional[str] = None,
24
+ max_tokens: int = 1024,
25
+ **kwargs: Any,
26
+ ) -> str:
27
+ import openai
28
+
29
+ m = model or self._model
30
+ response = openai.chat.completions.create(
31
+ model=m,
32
+ messages=messages,
33
+ max_tokens=max_tokens,
34
+ **kwargs,
35
+ )
36
+ return response.choices[0].message.content or ""
memvcs/core/merge.py CHANGED
@@ -33,6 +33,8 @@ class Conflict:
33
33
  ours_content: Optional[str]
34
34
  theirs_content: Optional[str]
35
35
  message: str
36
+ memory_type: Optional[str] = None # episodic, semantic, procedural
37
+ payload: Optional[Dict[str, Any]] = None # type-specific (e.g. fact strings, step diffs)
36
38
 
37
39
 
38
40
  @dataclass
@@ -256,31 +258,31 @@ class MergeEngine:
256
258
  ours_content: Optional[str],
257
259
  theirs_content: Optional[str],
258
260
  ) -> Tuple[str, bool]:
259
- """LLM arbitration: call LLM to resolve contradiction."""
261
+ """LLM arbitration: call LLM to resolve contradiction (multi-provider)."""
260
262
  try:
261
- import openai
262
-
263
- response = openai.chat.completions.create(
264
- model="gpt-3.5-turbo",
265
- messages=[
266
- {
267
- "role": "system",
268
- "content": "Resolve the contradiction between two memory versions. "
269
- "Output the merged content that best reflects the combined truth.",
270
- },
271
- {
272
- "role": "user",
273
- "content": f"OURS:\n{ours_content}\n\nTHEIRS:\n{theirs_content}",
274
- },
275
- ],
276
- max_tokens=1000,
277
- )
278
- merged = response.choices[0].message.content.strip()
279
- return merged, False
263
+ from .llm import get_provider
264
+
265
+ provider = get_provider()
266
+ if provider:
267
+ merged = provider.complete(
268
+ [
269
+ {
270
+ "role": "system",
271
+ "content": "Resolve the contradiction between two memory versions. Output the merged content that best reflects the combined truth.",
272
+ },
273
+ {
274
+ "role": "user",
275
+ "content": f"OURS:\n{ours_content}\n\nTHEIRS:\n{theirs_content}",
276
+ },
277
+ ],
278
+ max_tokens=1000,
279
+ )
280
+ return (merged or "").strip(), False
280
281
  except Exception:
281
- # Fallback to conflict markers
282
- merged = f"<<<<<<< OURS\n{ours_content}\n=======\n{theirs_content}\n>>>>>>> THEIRS"
283
- return merged, True
282
+ pass
283
+ # Fallback to conflict markers
284
+ merged = f"<<<<<<< OURS\n{ours_content}\n=======\n{theirs_content}\n>>>>>>> THEIRS"
285
+ return merged, True
284
286
 
285
287
  def merge_procedural(
286
288
  self,
@@ -398,6 +400,15 @@ class MergeEngine:
398
400
 
399
401
  # Record conflict if any
400
402
  if had_conflict:
403
+ payload = {}
404
+ if ours_content:
405
+ payload["ours_preview"] = (
406
+ ours_content[:300] if len(ours_content) > 300 else ours_content
407
+ )
408
+ if theirs_content:
409
+ payload["theirs_preview"] = (
410
+ theirs_content[:300] if len(theirs_content) > 300 else theirs_content
411
+ )
401
412
  conflicts.append(
402
413
  Conflict(
403
414
  path=path,
@@ -405,6 +416,8 @@ class MergeEngine:
405
416
  ours_content=ours_content,
406
417
  theirs_content=theirs_content,
407
418
  message=f"{strategy.value} merge conflict in {path}",
419
+ memory_type=strategy.value,
420
+ payload=payload or None,
408
421
  )
409
422
  )
410
423
 
memvcs/core/objects.py CHANGED
@@ -24,8 +24,9 @@ def _valid_object_hash(hash_id: str) -> bool:
24
24
  class ObjectStore:
25
25
  """Content-addressable object storage system."""
26
26
 
27
- def __init__(self, objects_dir: Path):
27
+ def __init__(self, objects_dir: Path, encryptor: Optional[Any] = None):
28
28
  self.objects_dir = Path(objects_dir)
29
+ self._encryptor = encryptor
29
30
  self._ensure_directories()
30
31
 
31
32
  def _ensure_directories(self):
@@ -68,11 +69,15 @@ class ObjectStore:
68
69
  # Create directory if needed
69
70
  obj_path.parent.mkdir(parents=True, exist_ok=True)
70
71
 
71
- # Compress and store
72
+ # Compress and optionally encrypt
72
73
  header = f"{obj_type} {len(content)}\0".encode()
73
74
  full_content = header + content
74
75
  compressed = zlib.compress(full_content)
75
-
76
+ if self._encryptor:
77
+ try:
78
+ compressed = self._encryptor.encrypt_payload(compressed)
79
+ except ValueError:
80
+ pass # no key; store plain compressed (legacy behavior)
76
81
  obj_path.write_bytes(compressed)
77
82
  return hash_id
78
83
 
@@ -92,9 +97,14 @@ class ObjectStore:
92
97
  if not obj_path.exists():
93
98
  return None
94
99
 
95
- # Decompress and extract content
96
- compressed = obj_path.read_bytes()
97
- full_content = zlib.decompress(compressed)
100
+ raw = obj_path.read_bytes()
101
+ # Optionally decrypt (iv+tag minimum 12+16 bytes)
102
+ if self._encryptor and len(raw) >= 12 + 16:
103
+ try:
104
+ raw = self._encryptor.decrypt_payload(raw)
105
+ except Exception:
106
+ pass # legacy plain compressed
107
+ full_content = zlib.decompress(raw)
98
108
 
99
109
  # Parse header
100
110
  null_idx = full_content.index(b"\0")
memvcs/core/pack.py ADDED
@@ -0,0 +1,92 @@
1
+ """
2
+ Pack files and garbage collection for agmem.
3
+
4
+ Pack: collect loose objects into single file + index. GC: delete unreachable objects, repack.
5
+ """
6
+
7
+ import json
8
+ import zlib
9
+ from pathlib import Path
10
+ from typing import Set, Dict, List, Optional, Tuple
11
+
12
+ from .objects import ObjectStore
13
+ from .refs import RefsManager
14
+
15
+
16
+ def _pack_dir(objects_dir: Path) -> Path:
17
+ return objects_dir / "pack"
18
+
19
+
20
+ def list_loose_objects(objects_dir: Path) -> Set[str]:
21
+ """List all loose object hashes (blob, tree, commit, tag)."""
22
+ hashes = set()
23
+ for obj_type in ["blob", "tree", "commit", "tag"]:
24
+ type_dir = objects_dir / obj_type
25
+ if not type_dir.exists():
26
+ continue
27
+ for prefix_dir in type_dir.iterdir():
28
+ if not prefix_dir.is_dir():
29
+ continue
30
+ for f in prefix_dir.iterdir():
31
+ hash_id = prefix_dir.name + f.name
32
+ hashes.add(hash_id)
33
+ return hashes
34
+
35
+
36
+ def reachable_from_refs(mem_dir: Path, store: ObjectStore, gc_prune_days: int = 90) -> Set[str]:
37
+ """Collect all object hashes reachable from branches, tags, and reflog (within prune window)."""
38
+ refs = RefsManager(mem_dir)
39
+ reachable = set()
40
+ # Branch tips
41
+ for b in refs.list_branches():
42
+ ch = refs.get_branch_commit(b)
43
+ if ch:
44
+ reachable.update(_collect_from_commit(store, ch))
45
+ # Tags
46
+ for t in refs.list_tags():
47
+ ch = refs.get_tag_commit(t)
48
+ if ch:
49
+ reachable.update(_collect_from_commit(store, ch))
50
+ # Reflog (simplified: just HEAD recent)
51
+ try:
52
+ log = refs.get_reflog("HEAD", max_count=1000)
53
+ for e in log:
54
+ h = e.get("hash")
55
+ if h:
56
+ reachable.update(_collect_from_commit(store, h))
57
+ except Exception:
58
+ pass
59
+ return reachable
60
+
61
+
62
+ def _collect_from_commit(store: ObjectStore, commit_hash: str) -> Set[str]:
63
+ """Collect all object hashes reachable from a commit."""
64
+ from .remote import _collect_objects_from_commit
65
+
66
+ return _collect_objects_from_commit(store, commit_hash)
67
+
68
+
69
+ def run_gc(
70
+ mem_dir: Path, store: ObjectStore, gc_prune_days: int = 90, dry_run: bool = False
71
+ ) -> Tuple[int, int]:
72
+ """
73
+ Garbage collect: delete unreachable loose objects.
74
+ Returns (deleted_count, bytes_freed). dry_run: only report, do not delete.
75
+ """
76
+ loose = list_loose_objects(mem_dir / "objects")
77
+ reachable = reachable_from_refs(mem_dir, store, gc_prune_days)
78
+ to_delete = loose - reachable
79
+ freed = 0
80
+ for hash_id in to_delete:
81
+ # Resolve type from path
82
+ for obj_type in ["blob", "tree", "commit", "tag"]:
83
+ p = store.objects_dir / obj_type / hash_id[:2] / hash_id[2:]
84
+ if p.exists():
85
+ if not dry_run:
86
+ size = p.stat().st_size
87
+ p.unlink()
88
+ freed += size
89
+ else:
90
+ freed += p.stat().st_size
91
+ break
92
+ return (len(to_delete), freed)
@@ -0,0 +1,63 @@
1
+ """
2
+ Differential privacy budget tracking for agmem.
3
+
4
+ Per-repo epsilon spent; block when budget exceeded.
5
+ """
6
+
7
+ import json
8
+ import math
9
+ from pathlib import Path
10
+ from typing import Optional, Tuple
11
+
12
+
13
+ def _budget_path(mem_dir: Path) -> Path:
14
+ return mem_dir / "privacy_budget.json"
15
+
16
+
17
+ def load_budget(mem_dir: Path) -> Tuple[float, float, float]:
18
+ """Load (epsilon_spent, max_epsilon, delta). Returns (0, max, delta) if no file."""
19
+ path = _budget_path(mem_dir)
20
+ if not path.exists():
21
+ config = mem_dir / "config.json"
22
+ max_eps = 1.0
23
+ delta = 1e-5
24
+ if config.exists():
25
+ try:
26
+ c = json.loads(config.read_text())
27
+ dp = c.get("differential_privacy", {})
28
+ max_eps = float(dp.get("max_epsilon", 1.0))
29
+ delta = float(dp.get("delta", 1e-5))
30
+ except Exception:
31
+ pass
32
+ return (0.0, max_eps, delta)
33
+ try:
34
+ data = json.loads(path.read_text())
35
+ return (
36
+ float(data.get("epsilon_spent", 0)),
37
+ float(data.get("max_epsilon", 1.0)),
38
+ float(data.get("delta", 1e-5)),
39
+ )
40
+ except Exception:
41
+ return (0.0, 1.0, 1e-5)
42
+
43
+
44
+ def spend_epsilon(mem_dir: Path, epsilon: float, max_epsilon: Optional[float] = None) -> bool:
45
+ """Record epsilon spent. Returns False if budget would be exceeded."""
46
+ spent, max_eps, delta = load_budget(mem_dir)
47
+ if max_epsilon is not None:
48
+ max_eps = max_epsilon
49
+ if spent + epsilon > max_eps:
50
+ return False
51
+ mem_dir.mkdir(parents=True, exist_ok=True)
52
+ path = _budget_path(mem_dir)
53
+ data = {"epsilon_spent": spent + epsilon, "max_epsilon": max_eps, "delta": delta}
54
+ path.write_text(json.dumps(data, indent=2))
55
+ return True
56
+
57
+
58
+ def add_noise(value: float, sensitivity: float, epsilon: float, delta: float = 1e-5) -> float:
59
+ """Add Gaussian noise for (epsilon, delta)-DP. sigma = sensitivity * sqrt(2*ln(1.25/delta)) / epsilon."""
60
+ import random
61
+
62
+ sigma = sensitivity * math.sqrt(2 * math.log(1.25 / delta)) / epsilon
63
+ return value + random.gauss(0, sigma)