agmem 0.1.2__py3-none-any.whl → 0.1.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (48) hide show
  1. {agmem-0.1.2.dist-info → agmem-0.1.4.dist-info}/METADATA +144 -14
  2. {agmem-0.1.2.dist-info → agmem-0.1.4.dist-info}/RECORD +48 -28
  3. memvcs/cli.py +10 -0
  4. memvcs/commands/add.py +6 -0
  5. memvcs/commands/audit.py +59 -0
  6. memvcs/commands/clone.py +7 -0
  7. memvcs/commands/daemon.py +45 -0
  8. memvcs/commands/distill.py +24 -0
  9. memvcs/commands/federated.py +59 -0
  10. memvcs/commands/fsck.py +31 -0
  11. memvcs/commands/garden.py +22 -0
  12. memvcs/commands/gc.py +66 -0
  13. memvcs/commands/merge.py +55 -1
  14. memvcs/commands/prove.py +66 -0
  15. memvcs/commands/pull.py +27 -0
  16. memvcs/commands/resolve.py +130 -0
  17. memvcs/commands/timeline.py +27 -0
  18. memvcs/commands/verify.py +74 -23
  19. memvcs/commands/when.py +27 -0
  20. memvcs/core/audit.py +124 -0
  21. memvcs/core/compression_pipeline.py +157 -0
  22. memvcs/core/consistency.py +9 -9
  23. memvcs/core/crypto_verify.py +291 -0
  24. memvcs/core/distiller.py +47 -29
  25. memvcs/core/encryption.py +169 -0
  26. memvcs/core/federated.py +147 -0
  27. memvcs/core/gardener.py +47 -29
  28. memvcs/core/ipfs_remote.py +200 -0
  29. memvcs/core/knowledge_graph.py +77 -5
  30. memvcs/core/llm/__init__.py +10 -0
  31. memvcs/core/llm/anthropic_provider.py +50 -0
  32. memvcs/core/llm/base.py +27 -0
  33. memvcs/core/llm/factory.py +30 -0
  34. memvcs/core/llm/openai_provider.py +36 -0
  35. memvcs/core/merge.py +36 -23
  36. memvcs/core/objects.py +39 -19
  37. memvcs/core/pack.py +278 -0
  38. memvcs/core/privacy_budget.py +63 -0
  39. memvcs/core/remote.py +229 -3
  40. memvcs/core/repository.py +82 -2
  41. memvcs/core/temporal_index.py +9 -0
  42. memvcs/core/trust.py +103 -0
  43. memvcs/core/vector_store.py +15 -1
  44. memvcs/core/zk_proofs.py +158 -0
  45. {agmem-0.1.2.dist-info → agmem-0.1.4.dist-info}/WHEEL +0 -0
  46. {agmem-0.1.2.dist-info → agmem-0.1.4.dist-info}/entry_points.txt +0 -0
  47. {agmem-0.1.2.dist-info → agmem-0.1.4.dist-info}/licenses/LICENSE +0 -0
  48. {agmem-0.1.2.dist-info → agmem-0.1.4.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,30 @@
1
+ """LLM provider factory: select by config or env."""
2
+
3
+ import os
4
+ from typing import Optional, Dict, Any
5
+
6
+ from .base import LLMProvider
7
+ from .openai_provider import OpenAIProvider
8
+ from .anthropic_provider import AnthropicProvider
9
+
10
+
11
+ def get_provider(
12
+ provider_name: Optional[str] = None,
13
+ model: Optional[str] = None,
14
+ config: Optional[Dict[str, Any]] = None,
15
+ ) -> Optional[LLMProvider]:
16
+ """
17
+ Return LLM provider by name. Config may have llm_provider, llm_model.
18
+ Env: AGMEM_LLM_PROVIDER, OPENAI_API_KEY, ANTHROPIC_API_KEY.
19
+ """
20
+ name = (
21
+ provider_name
22
+ or (config or {}).get("llm_provider")
23
+ or os.environ.get("AGMEM_LLM_PROVIDER", "openai")
24
+ )
25
+ m = model or (config or {}).get("llm_model")
26
+ if name == "openai":
27
+ return OpenAIProvider(model=m)
28
+ if name == "anthropic":
29
+ return AnthropicProvider(model=m)
30
+ return OpenAIProvider(model=m)
@@ -0,0 +1,36 @@
1
+ """OpenAI LLM provider."""
2
+
3
+ import os
4
+ from typing import Optional, List, Dict, Any
5
+
6
+ from .base import LLMProvider
7
+
8
+
9
+ class OpenAIProvider(LLMProvider):
10
+ """OpenAI (GPT) provider. API key from OPENAI_API_KEY."""
11
+
12
+ def __init__(self, model: Optional[str] = None):
13
+ self._model = model or os.environ.get("OPENAI_MODEL", "gpt-3.5-turbo")
14
+
15
+ @property
16
+ def name(self) -> str:
17
+ return "openai"
18
+
19
+ def complete(
20
+ self,
21
+ messages: List[Dict[str, str]],
22
+ *,
23
+ model: Optional[str] = None,
24
+ max_tokens: int = 1024,
25
+ **kwargs: Any,
26
+ ) -> str:
27
+ import openai
28
+
29
+ m = model or self._model
30
+ response = openai.chat.completions.create(
31
+ model=m,
32
+ messages=messages,
33
+ max_tokens=max_tokens,
34
+ **kwargs,
35
+ )
36
+ return response.choices[0].message.content or ""
memvcs/core/merge.py CHANGED
@@ -33,6 +33,8 @@ class Conflict:
33
33
  ours_content: Optional[str]
34
34
  theirs_content: Optional[str]
35
35
  message: str
36
+ memory_type: Optional[str] = None # episodic, semantic, procedural
37
+ payload: Optional[Dict[str, Any]] = None # type-specific (e.g. fact strings, step diffs)
36
38
 
37
39
 
38
40
  @dataclass
@@ -256,31 +258,31 @@ class MergeEngine:
256
258
  ours_content: Optional[str],
257
259
  theirs_content: Optional[str],
258
260
  ) -> Tuple[str, bool]:
259
- """LLM arbitration: call LLM to resolve contradiction."""
261
+ """LLM arbitration: call LLM to resolve contradiction (multi-provider)."""
260
262
  try:
261
- import openai
262
-
263
- response = openai.chat.completions.create(
264
- model="gpt-3.5-turbo",
265
- messages=[
266
- {
267
- "role": "system",
268
- "content": "Resolve the contradiction between two memory versions. "
269
- "Output the merged content that best reflects the combined truth.",
270
- },
271
- {
272
- "role": "user",
273
- "content": f"OURS:\n{ours_content}\n\nTHEIRS:\n{theirs_content}",
274
- },
275
- ],
276
- max_tokens=1000,
277
- )
278
- merged = response.choices[0].message.content.strip()
279
- return merged, False
263
+ from .llm import get_provider
264
+
265
+ provider = get_provider()
266
+ if provider:
267
+ merged = provider.complete(
268
+ [
269
+ {
270
+ "role": "system",
271
+ "content": "Resolve the contradiction between two memory versions. Output the merged content that best reflects the combined truth.",
272
+ },
273
+ {
274
+ "role": "user",
275
+ "content": f"OURS:\n{ours_content}\n\nTHEIRS:\n{theirs_content}",
276
+ },
277
+ ],
278
+ max_tokens=1000,
279
+ )
280
+ return (merged or "").strip(), False
280
281
  except Exception:
281
- # Fallback to conflict markers
282
- merged = f"<<<<<<< OURS\n{ours_content}\n=======\n{theirs_content}\n>>>>>>> THEIRS"
283
- return merged, True
282
+ pass
283
+ # Fallback to conflict markers
284
+ merged = f"<<<<<<< OURS\n{ours_content}\n=======\n{theirs_content}\n>>>>>>> THEIRS"
285
+ return merged, True
284
286
 
285
287
  def merge_procedural(
286
288
  self,
@@ -398,6 +400,15 @@ class MergeEngine:
398
400
 
399
401
  # Record conflict if any
400
402
  if had_conflict:
403
+ payload = {}
404
+ if ours_content:
405
+ payload["ours_preview"] = (
406
+ ours_content[:300] if len(ours_content) > 300 else ours_content
407
+ )
408
+ if theirs_content:
409
+ payload["theirs_preview"] = (
410
+ theirs_content[:300] if len(theirs_content) > 300 else theirs_content
411
+ )
401
412
  conflicts.append(
402
413
  Conflict(
403
414
  path=path,
@@ -405,6 +416,8 @@ class MergeEngine:
405
416
  ours_content=ours_content,
406
417
  theirs_content=theirs_content,
407
418
  message=f"{strategy.value} merge conflict in {path}",
419
+ memory_type=strategy.value,
420
+ payload=payload or None,
408
421
  )
409
422
  )
410
423
 
memvcs/core/objects.py CHANGED
@@ -24,8 +24,9 @@ def _valid_object_hash(hash_id: str) -> bool:
24
24
  class ObjectStore:
25
25
  """Content-addressable object storage system."""
26
26
 
27
- def __init__(self, objects_dir: Path):
27
+ def __init__(self, objects_dir: Path, encryptor: Optional[Any] = None):
28
28
  self.objects_dir = Path(objects_dir)
29
+ self._encryptor = encryptor
29
30
  self._ensure_directories()
30
31
 
31
32
  def _ensure_directories(self):
@@ -68,17 +69,21 @@ class ObjectStore:
68
69
  # Create directory if needed
69
70
  obj_path.parent.mkdir(parents=True, exist_ok=True)
70
71
 
71
- # Compress and store
72
+ # Compress and optionally encrypt
72
73
  header = f"{obj_type} {len(content)}\0".encode()
73
74
  full_content = header + content
74
75
  compressed = zlib.compress(full_content)
75
-
76
+ if self._encryptor:
77
+ try:
78
+ compressed = self._encryptor.encrypt_payload(compressed)
79
+ except ValueError:
80
+ pass # no key; store plain compressed (legacy behavior)
76
81
  obj_path.write_bytes(compressed)
77
82
  return hash_id
78
83
 
79
84
  def retrieve(self, hash_id: str, obj_type: str) -> Optional[bytes]:
80
85
  """
81
- Retrieve content by hash ID.
86
+ Retrieve content by hash ID (loose object or pack).
82
87
 
83
88
  Args:
84
89
  hash_id: SHA-256 hash of the object
@@ -89,26 +94,41 @@ class ObjectStore:
89
94
  """
90
95
  obj_path = self._get_object_path(hash_id, obj_type)
91
96
 
92
- if not obj_path.exists():
93
- return None
94
-
95
- # Decompress and extract content
96
- compressed = obj_path.read_bytes()
97
- full_content = zlib.decompress(compressed)
98
-
99
- # Parse header
100
- null_idx = full_content.index(b"\0")
101
- header = full_content[:null_idx].decode()
102
- content = full_content[null_idx + 1 :]
103
-
104
- return content
97
+ if obj_path.exists():
98
+ raw = obj_path.read_bytes()
99
+ # Optionally decrypt (iv+tag minimum 12+16 bytes)
100
+ if self._encryptor and len(raw) >= 12 + 16:
101
+ try:
102
+ raw = self._encryptor.decrypt_payload(raw)
103
+ except Exception:
104
+ pass # legacy plain compressed
105
+ full_content = zlib.decompress(raw)
106
+ null_idx = full_content.index(b"\0")
107
+ content = full_content[null_idx + 1 :]
108
+ return content
109
+
110
+ # Try pack file when loose object missing
111
+ try:
112
+ from .pack import retrieve_from_pack
113
+ result = retrieve_from_pack(self.objects_dir, hash_id, expected_type=obj_type)
114
+ if result is not None:
115
+ return result[1]
116
+ except Exception:
117
+ pass
118
+ return None
105
119
 
106
120
  def exists(self, hash_id: str, obj_type: str) -> bool:
107
- """Check if an object exists. Returns False for invalid hash (no raise)."""
121
+ """Check if an object exists (loose or pack). Returns False for invalid hash (no raise)."""
108
122
  if not _valid_object_hash(hash_id):
109
123
  return False
110
124
  obj_path = self._get_object_path(hash_id, obj_type)
111
- return obj_path.exists()
125
+ if obj_path.exists():
126
+ return True
127
+ try:
128
+ from .pack import retrieve_from_pack
129
+ return retrieve_from_pack(self.objects_dir, hash_id, expected_type=obj_type) is not None
130
+ except Exception:
131
+ return False
112
132
 
113
133
  def delete(self, hash_id: str, obj_type: str) -> bool:
114
134
  """Delete an object. Returns True if deleted, False if not found."""
memvcs/core/pack.py ADDED
@@ -0,0 +1,278 @@
1
+ """
2
+ Pack files and garbage collection for agmem.
3
+
4
+ Pack: collect loose objects into single file + index. GC: delete unreachable objects, repack.
5
+ """
6
+
7
+ import hashlib
8
+ import struct
9
+ import zlib
10
+ from pathlib import Path
11
+ from typing import Set, Dict, List, Optional, Tuple
12
+
13
+ from .objects import ObjectStore
14
+ from .refs import RefsManager
15
+
16
+ PACK_MAGIC = b"PACK"
17
+ PACK_VERSION = 2
18
+ IDX_MAGIC = b"agidx"
19
+ IDX_VERSION = 2
20
+ OBJ_TYPE_BLOB = 1
21
+ OBJ_TYPE_TREE = 2
22
+ OBJ_TYPE_COMMIT = 3
23
+ OBJ_TYPE_TAG = 4
24
+ TYPE_TO_BYTE = {"blob": OBJ_TYPE_BLOB, "tree": OBJ_TYPE_TREE, "commit": OBJ_TYPE_COMMIT, "tag": OBJ_TYPE_TAG}
25
+ BYTE_TO_TYPE = {v: k for k, v in TYPE_TO_BYTE.items()}
26
+
27
+
28
+ def _pack_dir(objects_dir: Path) -> Path:
29
+ return objects_dir / "pack"
30
+
31
+
32
+ def _get_loose_object_type(objects_dir: Path, hash_id: str) -> Optional[str]:
33
+ """Return obj_type for a loose object, or None if not found."""
34
+ if len(hash_id) < 4:
35
+ return None
36
+ prefix, suffix = hash_id[:2], hash_id[2:]
37
+ for obj_type in ["blob", "tree", "commit", "tag"]:
38
+ p = objects_dir / obj_type / prefix / suffix
39
+ if p.exists():
40
+ return obj_type
41
+ return None
42
+
43
+
44
+ def list_loose_objects(objects_dir: Path) -> Set[str]:
45
+ """List all loose object hashes (blob, tree, commit, tag)."""
46
+ hashes = set()
47
+ for obj_type in ["blob", "tree", "commit", "tag"]:
48
+ type_dir = objects_dir / obj_type
49
+ if not type_dir.exists():
50
+ continue
51
+ for prefix_dir in type_dir.iterdir():
52
+ if not prefix_dir.is_dir():
53
+ continue
54
+ for f in prefix_dir.iterdir():
55
+ hash_id = prefix_dir.name + f.name
56
+ hashes.add(hash_id)
57
+ return hashes
58
+
59
+
60
+ def reachable_from_refs(mem_dir: Path, store: ObjectStore, gc_prune_days: int = 90) -> Set[str]:
61
+ """Collect all object hashes reachable from branches, tags, and reflog (within prune window)."""
62
+ refs = RefsManager(mem_dir)
63
+ reachable = set()
64
+ # Branch tips
65
+ for b in refs.list_branches():
66
+ ch = refs.get_branch_commit(b)
67
+ if ch:
68
+ reachable.update(_collect_from_commit(store, ch))
69
+ # Tags
70
+ for t in refs.list_tags():
71
+ ch = refs.get_tag_commit(t)
72
+ if ch:
73
+ reachable.update(_collect_from_commit(store, ch))
74
+ # Reflog (simplified: just HEAD recent)
75
+ try:
76
+ log = refs.get_reflog("HEAD", max_count=1000)
77
+ for e in log:
78
+ h = e.get("hash")
79
+ if h:
80
+ reachable.update(_collect_from_commit(store, h))
81
+ except Exception:
82
+ pass
83
+ return reachable
84
+
85
+
86
+ def _collect_from_commit(store: ObjectStore, commit_hash: str) -> Set[str]:
87
+ """Collect all object hashes reachable from a commit."""
88
+ from .remote import _collect_objects_from_commit
89
+
90
+ return _collect_objects_from_commit(store, commit_hash)
91
+
92
+
93
+ def run_gc(
94
+ mem_dir: Path, store: ObjectStore, gc_prune_days: int = 90, dry_run: bool = False
95
+ ) -> Tuple[int, int]:
96
+ """
97
+ Garbage collect: delete unreachable loose objects.
98
+ Returns (deleted_count, bytes_freed). dry_run: only report, do not delete.
99
+ """
100
+ loose = list_loose_objects(mem_dir / "objects")
101
+ reachable = reachable_from_refs(mem_dir, store, gc_prune_days)
102
+ to_delete = loose - reachable
103
+ freed = 0
104
+ for hash_id in to_delete:
105
+ # Resolve type from path
106
+ for obj_type in ["blob", "tree", "commit", "tag"]:
107
+ p = store.objects_dir / obj_type / hash_id[:2] / hash_id[2:]
108
+ if p.exists():
109
+ if not dry_run:
110
+ size = p.stat().st_size
111
+ p.unlink()
112
+ freed += size
113
+ else:
114
+ freed += p.stat().st_size
115
+ break
116
+ return (len(to_delete), freed)
117
+
118
+
119
+ def write_pack(
120
+ objects_dir: Path, store: ObjectStore, hash_to_type: Dict[str, str]
121
+ ) -> Tuple[Path, Path]:
122
+ """
123
+ Pack loose objects into a single pack file and index.
124
+ hash_to_type: map hash_id -> obj_type for objects to include.
125
+ Returns (pack_path, index_path). Does not delete loose objects.
126
+ """
127
+ if not hash_to_type:
128
+ raise ValueError("Cannot write empty pack")
129
+ pack_d = _pack_dir(objects_dir)
130
+ pack_d.mkdir(parents=True, exist_ok=True)
131
+
132
+ pack_header_len = len(PACK_MAGIC) + 4 + 4
133
+ pack_body = bytearray()
134
+ index_entries: List[Tuple[str, str, int]] = [] # (hash_id, obj_type, offset_in_file)
135
+ offset_in_file = pack_header_len
136
+
137
+ for hash_id in sorted(hash_to_type.keys()):
138
+ obj_type = hash_to_type[hash_id]
139
+ content = store.retrieve(hash_id, obj_type)
140
+ if content is None:
141
+ continue
142
+ header = f"{obj_type} {len(content)}\0".encode()
143
+ full = header + content
144
+ compressed = zlib.compress(full)
145
+ type_byte = TYPE_TO_BYTE.get(obj_type, OBJ_TYPE_BLOB)
146
+ size_bytes = struct.pack(">I", len(compressed))
147
+ chunk = bytes([type_byte]) + size_bytes + compressed
148
+ pack_body.extend(chunk)
149
+ index_entries.append((hash_id, obj_type, offset_in_file))
150
+ offset_in_file += len(chunk)
151
+
152
+ if not index_entries:
153
+ raise ValueError("No objects to pack")
154
+
155
+ pack_content = PACK_MAGIC + struct.pack(">I", PACK_VERSION) + struct.pack(">I", len(index_entries)) + bytes(pack_body)
156
+ pack_hash = hashlib.sha256(pack_content).digest()
157
+ pack_content += pack_hash
158
+
159
+ pack_name = f"pack-{pack_hash[:16].hex()}.pack"
160
+ pack_path = pack_d / pack_name
161
+ pack_path.write_bytes(pack_content)
162
+
163
+ index_content = bytearray(IDX_MAGIC + struct.pack(">I", IDX_VERSION) + struct.pack(">I", len(index_entries)))
164
+ for hash_id, obj_type, off in index_entries:
165
+ index_content.extend(bytes.fromhex(hash_id))
166
+ index_content.append(TYPE_TO_BYTE[obj_type])
167
+ index_content.extend(struct.pack(">I", off))
168
+ idx_hash = hashlib.sha256(index_content).digest()
169
+ index_content.extend(idx_hash)
170
+ idx_path = pack_path.with_suffix(".idx")
171
+ idx_path.write_bytes(index_content)
172
+
173
+ return (pack_path, idx_path)
174
+
175
+
176
+ def _find_pack_index(objects_dir: Path) -> Optional[Path]:
177
+ """Return path to first .idx file in objects/pack, or None."""
178
+ pack_d = _pack_dir(objects_dir)
179
+ if not pack_d.exists():
180
+ return None
181
+ for p in pack_d.iterdir():
182
+ if p.suffix == ".idx":
183
+ return p
184
+ return None
185
+
186
+
187
+ def retrieve_from_pack(objects_dir: Path, hash_id: str, expected_type: Optional[str] = None) -> Optional[Tuple[str, bytes]]:
188
+ """
189
+ Retrieve object from pack by hash. Returns (obj_type, content) or None.
190
+ If expected_type is set, only return if pack type matches.
191
+ """
192
+ idx_path = _find_pack_index(objects_dir)
193
+ if idx_path is None:
194
+ return None
195
+ pack_path = idx_path.with_suffix(".pack")
196
+ if not pack_path.exists():
197
+ return None
198
+
199
+ raw_idx = idx_path.read_bytes()
200
+ if len(raw_idx) < len(IDX_MAGIC) + 4 + 4 + 32 + 1 + 4 + 32:
201
+ return None
202
+ if raw_idx[: len(IDX_MAGIC)] != IDX_MAGIC:
203
+ return None
204
+ version = struct.unpack(">I", raw_idx[len(IDX_MAGIC) : len(IDX_MAGIC) + 4])[0]
205
+ if version != IDX_VERSION:
206
+ return None
207
+ count = struct.unpack(">I", raw_idx[len(IDX_MAGIC) + 4 : len(IDX_MAGIC) + 8])[0]
208
+ entry_size = 32 + 1 + 4
209
+ entries_start = len(IDX_MAGIC) + 8
210
+ entries_end = entries_start + count * entry_size
211
+ if entries_end + 32 > len(raw_idx):
212
+ return None
213
+ hash_hex = hash_id
214
+ if len(hash_hex) != 64:
215
+ return None
216
+ hash_bin = bytes.fromhex(hash_hex)
217
+ for i in range(count):
218
+ base = entries_start + i * entry_size
219
+ entry_hash = raw_idx[base : base + 32]
220
+ if entry_hash != hash_bin:
221
+ continue
222
+ type_byte = raw_idx[base + 32]
223
+ offset = struct.unpack(">I", raw_idx[base + 33 : base + 37])[0]
224
+ obj_type = BYTE_TO_TYPE.get(type_byte)
225
+ if obj_type is None:
226
+ continue
227
+ if expected_type is not None and obj_type != expected_type:
228
+ return None
229
+ pack_raw = pack_path.read_bytes()
230
+ header_size = len(PACK_MAGIC) + 4 + 4
231
+ if offset + 1 + 4 > len(pack_raw) - 32:
232
+ return None
233
+ size = struct.unpack(">I", pack_raw[offset + 1 : offset + 5])[0]
234
+ payload_start = offset + 5
235
+ payload_end = payload_start + size
236
+ if payload_end > len(pack_raw) - 32:
237
+ return None
238
+ compressed = pack_raw[payload_start:payload_end]
239
+ try:
240
+ full = zlib.decompress(compressed)
241
+ except Exception:
242
+ return None
243
+ null_idx = full.index(b"\0")
244
+ content = full[null_idx + 1 :]
245
+ return (obj_type, content)
246
+ return None
247
+
248
+
249
+ def run_repack(
250
+ mem_dir: Path, store: ObjectStore, gc_prune_days: int = 90, dry_run: bool = False
251
+ ) -> Tuple[int, int]:
252
+ """
253
+ After GC: pack all reachable loose objects into a pack file, then delete those loose objects.
254
+ Returns (objects_packed, bytes_freed_from_loose).
255
+ """
256
+ objects_dir = mem_dir / "objects"
257
+ reachable = reachable_from_refs(mem_dir, store, gc_prune_days)
258
+ loose = list_loose_objects(objects_dir)
259
+ to_pack = reachable & loose
260
+ if not to_pack:
261
+ return (0, 0)
262
+ hash_to_type: Dict[str, str] = {}
263
+ for hash_id in to_pack:
264
+ obj_type = _get_loose_object_type(objects_dir, hash_id)
265
+ if obj_type:
266
+ hash_to_type[hash_id] = obj_type
267
+ if not hash_to_type:
268
+ return (0, 0)
269
+ if dry_run:
270
+ return (len(hash_to_type), 0)
271
+ write_pack(objects_dir, store, hash_to_type)
272
+ freed = 0
273
+ for hash_id, obj_type in hash_to_type.items():
274
+ p = store.objects_dir / obj_type / hash_id[:2] / hash_id[2:]
275
+ if p.exists():
276
+ freed += p.stat().st_size
277
+ p.unlink()
278
+ return (len(hash_to_type), freed)
@@ -0,0 +1,63 @@
1
+ """
2
+ Differential privacy budget tracking for agmem.
3
+
4
+ Per-repo epsilon spent; block when budget exceeded.
5
+ """
6
+
7
+ import json
8
+ import math
9
+ from pathlib import Path
10
+ from typing import Optional, Tuple
11
+
12
+
13
+ def _budget_path(mem_dir: Path) -> Path:
14
+ return mem_dir / "privacy_budget.json"
15
+
16
+
17
+ def load_budget(mem_dir: Path) -> Tuple[float, float, float]:
18
+ """Load (epsilon_spent, max_epsilon, delta). Returns (0, max, delta) if no file."""
19
+ path = _budget_path(mem_dir)
20
+ if not path.exists():
21
+ config = mem_dir / "config.json"
22
+ max_eps = 1.0
23
+ delta = 1e-5
24
+ if config.exists():
25
+ try:
26
+ c = json.loads(config.read_text())
27
+ dp = c.get("differential_privacy", {})
28
+ max_eps = float(dp.get("max_epsilon", 1.0))
29
+ delta = float(dp.get("delta", 1e-5))
30
+ except Exception:
31
+ pass
32
+ return (0.0, max_eps, delta)
33
+ try:
34
+ data = json.loads(path.read_text())
35
+ return (
36
+ float(data.get("epsilon_spent", 0)),
37
+ float(data.get("max_epsilon", 1.0)),
38
+ float(data.get("delta", 1e-5)),
39
+ )
40
+ except Exception:
41
+ return (0.0, 1.0, 1e-5)
42
+
43
+
44
+ def spend_epsilon(mem_dir: Path, epsilon: float, max_epsilon: Optional[float] = None) -> bool:
45
+ """Record epsilon spent. Returns False if budget would be exceeded."""
46
+ spent, max_eps, delta = load_budget(mem_dir)
47
+ if max_epsilon is not None:
48
+ max_eps = max_epsilon
49
+ if spent + epsilon > max_eps:
50
+ return False
51
+ mem_dir.mkdir(parents=True, exist_ok=True)
52
+ path = _budget_path(mem_dir)
53
+ data = {"epsilon_spent": spent + epsilon, "max_epsilon": max_eps, "delta": delta}
54
+ path.write_text(json.dumps(data, indent=2))
55
+ return True
56
+
57
+
58
+ def add_noise(value: float, sensitivity: float, epsilon: float, delta: float = 1e-5) -> float:
59
+ """Add Gaussian noise for (epsilon, delta)-DP. sigma = sensitivity * sqrt(2*ln(1.25/delta)) / epsilon."""
60
+ import random
61
+
62
+ sigma = sensitivity * math.sqrt(2 * math.log(1.25 / delta)) / epsilon
63
+ return value + random.gauss(0, sigma)