agmem 0.1.2__py3-none-any.whl → 0.1.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {agmem-0.1.2.dist-info → agmem-0.1.4.dist-info}/METADATA +144 -14
- {agmem-0.1.2.dist-info → agmem-0.1.4.dist-info}/RECORD +48 -28
- memvcs/cli.py +10 -0
- memvcs/commands/add.py +6 -0
- memvcs/commands/audit.py +59 -0
- memvcs/commands/clone.py +7 -0
- memvcs/commands/daemon.py +45 -0
- memvcs/commands/distill.py +24 -0
- memvcs/commands/federated.py +59 -0
- memvcs/commands/fsck.py +31 -0
- memvcs/commands/garden.py +22 -0
- memvcs/commands/gc.py +66 -0
- memvcs/commands/merge.py +55 -1
- memvcs/commands/prove.py +66 -0
- memvcs/commands/pull.py +27 -0
- memvcs/commands/resolve.py +130 -0
- memvcs/commands/timeline.py +27 -0
- memvcs/commands/verify.py +74 -23
- memvcs/commands/when.py +27 -0
- memvcs/core/audit.py +124 -0
- memvcs/core/compression_pipeline.py +157 -0
- memvcs/core/consistency.py +9 -9
- memvcs/core/crypto_verify.py +291 -0
- memvcs/core/distiller.py +47 -29
- memvcs/core/encryption.py +169 -0
- memvcs/core/federated.py +147 -0
- memvcs/core/gardener.py +47 -29
- memvcs/core/ipfs_remote.py +200 -0
- memvcs/core/knowledge_graph.py +77 -5
- memvcs/core/llm/__init__.py +10 -0
- memvcs/core/llm/anthropic_provider.py +50 -0
- memvcs/core/llm/base.py +27 -0
- memvcs/core/llm/factory.py +30 -0
- memvcs/core/llm/openai_provider.py +36 -0
- memvcs/core/merge.py +36 -23
- memvcs/core/objects.py +39 -19
- memvcs/core/pack.py +278 -0
- memvcs/core/privacy_budget.py +63 -0
- memvcs/core/remote.py +229 -3
- memvcs/core/repository.py +82 -2
- memvcs/core/temporal_index.py +9 -0
- memvcs/core/trust.py +103 -0
- memvcs/core/vector_store.py +15 -1
- memvcs/core/zk_proofs.py +158 -0
- {agmem-0.1.2.dist-info → agmem-0.1.4.dist-info}/WHEEL +0 -0
- {agmem-0.1.2.dist-info → agmem-0.1.4.dist-info}/entry_points.txt +0 -0
- {agmem-0.1.2.dist-info → agmem-0.1.4.dist-info}/licenses/LICENSE +0 -0
- {agmem-0.1.2.dist-info → agmem-0.1.4.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
"""LLM provider factory: select by config or env."""
|
|
2
|
+
|
|
3
|
+
import os
|
|
4
|
+
from typing import Optional, Dict, Any
|
|
5
|
+
|
|
6
|
+
from .base import LLMProvider
|
|
7
|
+
from .openai_provider import OpenAIProvider
|
|
8
|
+
from .anthropic_provider import AnthropicProvider
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
def get_provider(
|
|
12
|
+
provider_name: Optional[str] = None,
|
|
13
|
+
model: Optional[str] = None,
|
|
14
|
+
config: Optional[Dict[str, Any]] = None,
|
|
15
|
+
) -> Optional[LLMProvider]:
|
|
16
|
+
"""
|
|
17
|
+
Return LLM provider by name. Config may have llm_provider, llm_model.
|
|
18
|
+
Env: AGMEM_LLM_PROVIDER, OPENAI_API_KEY, ANTHROPIC_API_KEY.
|
|
19
|
+
"""
|
|
20
|
+
name = (
|
|
21
|
+
provider_name
|
|
22
|
+
or (config or {}).get("llm_provider")
|
|
23
|
+
or os.environ.get("AGMEM_LLM_PROVIDER", "openai")
|
|
24
|
+
)
|
|
25
|
+
m = model or (config or {}).get("llm_model")
|
|
26
|
+
if name == "openai":
|
|
27
|
+
return OpenAIProvider(model=m)
|
|
28
|
+
if name == "anthropic":
|
|
29
|
+
return AnthropicProvider(model=m)
|
|
30
|
+
return OpenAIProvider(model=m)
|
|
@@ -0,0 +1,36 @@
|
|
|
1
|
+
"""OpenAI LLM provider."""
|
|
2
|
+
|
|
3
|
+
import os
|
|
4
|
+
from typing import Optional, List, Dict, Any
|
|
5
|
+
|
|
6
|
+
from .base import LLMProvider
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
class OpenAIProvider(LLMProvider):
|
|
10
|
+
"""OpenAI (GPT) provider. API key from OPENAI_API_KEY."""
|
|
11
|
+
|
|
12
|
+
def __init__(self, model: Optional[str] = None):
|
|
13
|
+
self._model = model or os.environ.get("OPENAI_MODEL", "gpt-3.5-turbo")
|
|
14
|
+
|
|
15
|
+
@property
|
|
16
|
+
def name(self) -> str:
|
|
17
|
+
return "openai"
|
|
18
|
+
|
|
19
|
+
def complete(
|
|
20
|
+
self,
|
|
21
|
+
messages: List[Dict[str, str]],
|
|
22
|
+
*,
|
|
23
|
+
model: Optional[str] = None,
|
|
24
|
+
max_tokens: int = 1024,
|
|
25
|
+
**kwargs: Any,
|
|
26
|
+
) -> str:
|
|
27
|
+
import openai
|
|
28
|
+
|
|
29
|
+
m = model or self._model
|
|
30
|
+
response = openai.chat.completions.create(
|
|
31
|
+
model=m,
|
|
32
|
+
messages=messages,
|
|
33
|
+
max_tokens=max_tokens,
|
|
34
|
+
**kwargs,
|
|
35
|
+
)
|
|
36
|
+
return response.choices[0].message.content or ""
|
memvcs/core/merge.py
CHANGED
|
@@ -33,6 +33,8 @@ class Conflict:
|
|
|
33
33
|
ours_content: Optional[str]
|
|
34
34
|
theirs_content: Optional[str]
|
|
35
35
|
message: str
|
|
36
|
+
memory_type: Optional[str] = None # episodic, semantic, procedural
|
|
37
|
+
payload: Optional[Dict[str, Any]] = None # type-specific (e.g. fact strings, step diffs)
|
|
36
38
|
|
|
37
39
|
|
|
38
40
|
@dataclass
|
|
@@ -256,31 +258,31 @@ class MergeEngine:
|
|
|
256
258
|
ours_content: Optional[str],
|
|
257
259
|
theirs_content: Optional[str],
|
|
258
260
|
) -> Tuple[str, bool]:
|
|
259
|
-
"""LLM arbitration: call LLM to resolve contradiction."""
|
|
261
|
+
"""LLM arbitration: call LLM to resolve contradiction (multi-provider)."""
|
|
260
262
|
try:
|
|
261
|
-
import
|
|
262
|
-
|
|
263
|
-
|
|
264
|
-
|
|
265
|
-
|
|
266
|
-
|
|
267
|
-
|
|
268
|
-
|
|
269
|
-
|
|
270
|
-
|
|
271
|
-
|
|
272
|
-
|
|
273
|
-
|
|
274
|
-
|
|
275
|
-
|
|
276
|
-
|
|
277
|
-
|
|
278
|
-
|
|
279
|
-
return merged, False
|
|
263
|
+
from .llm import get_provider
|
|
264
|
+
|
|
265
|
+
provider = get_provider()
|
|
266
|
+
if provider:
|
|
267
|
+
merged = provider.complete(
|
|
268
|
+
[
|
|
269
|
+
{
|
|
270
|
+
"role": "system",
|
|
271
|
+
"content": "Resolve the contradiction between two memory versions. Output the merged content that best reflects the combined truth.",
|
|
272
|
+
},
|
|
273
|
+
{
|
|
274
|
+
"role": "user",
|
|
275
|
+
"content": f"OURS:\n{ours_content}\n\nTHEIRS:\n{theirs_content}",
|
|
276
|
+
},
|
|
277
|
+
],
|
|
278
|
+
max_tokens=1000,
|
|
279
|
+
)
|
|
280
|
+
return (merged or "").strip(), False
|
|
280
281
|
except Exception:
|
|
281
|
-
|
|
282
|
-
|
|
283
|
-
|
|
282
|
+
pass
|
|
283
|
+
# Fallback to conflict markers
|
|
284
|
+
merged = f"<<<<<<< OURS\n{ours_content}\n=======\n{theirs_content}\n>>>>>>> THEIRS"
|
|
285
|
+
return merged, True
|
|
284
286
|
|
|
285
287
|
def merge_procedural(
|
|
286
288
|
self,
|
|
@@ -398,6 +400,15 @@ class MergeEngine:
|
|
|
398
400
|
|
|
399
401
|
# Record conflict if any
|
|
400
402
|
if had_conflict:
|
|
403
|
+
payload = {}
|
|
404
|
+
if ours_content:
|
|
405
|
+
payload["ours_preview"] = (
|
|
406
|
+
ours_content[:300] if len(ours_content) > 300 else ours_content
|
|
407
|
+
)
|
|
408
|
+
if theirs_content:
|
|
409
|
+
payload["theirs_preview"] = (
|
|
410
|
+
theirs_content[:300] if len(theirs_content) > 300 else theirs_content
|
|
411
|
+
)
|
|
401
412
|
conflicts.append(
|
|
402
413
|
Conflict(
|
|
403
414
|
path=path,
|
|
@@ -405,6 +416,8 @@ class MergeEngine:
|
|
|
405
416
|
ours_content=ours_content,
|
|
406
417
|
theirs_content=theirs_content,
|
|
407
418
|
message=f"{strategy.value} merge conflict in {path}",
|
|
419
|
+
memory_type=strategy.value,
|
|
420
|
+
payload=payload or None,
|
|
408
421
|
)
|
|
409
422
|
)
|
|
410
423
|
|
memvcs/core/objects.py
CHANGED
|
@@ -24,8 +24,9 @@ def _valid_object_hash(hash_id: str) -> bool:
|
|
|
24
24
|
class ObjectStore:
|
|
25
25
|
"""Content-addressable object storage system."""
|
|
26
26
|
|
|
27
|
-
def __init__(self, objects_dir: Path):
|
|
27
|
+
def __init__(self, objects_dir: Path, encryptor: Optional[Any] = None):
|
|
28
28
|
self.objects_dir = Path(objects_dir)
|
|
29
|
+
self._encryptor = encryptor
|
|
29
30
|
self._ensure_directories()
|
|
30
31
|
|
|
31
32
|
def _ensure_directories(self):
|
|
@@ -68,17 +69,21 @@ class ObjectStore:
|
|
|
68
69
|
# Create directory if needed
|
|
69
70
|
obj_path.parent.mkdir(parents=True, exist_ok=True)
|
|
70
71
|
|
|
71
|
-
# Compress and
|
|
72
|
+
# Compress and optionally encrypt
|
|
72
73
|
header = f"{obj_type} {len(content)}\0".encode()
|
|
73
74
|
full_content = header + content
|
|
74
75
|
compressed = zlib.compress(full_content)
|
|
75
|
-
|
|
76
|
+
if self._encryptor:
|
|
77
|
+
try:
|
|
78
|
+
compressed = self._encryptor.encrypt_payload(compressed)
|
|
79
|
+
except ValueError:
|
|
80
|
+
pass # no key; store plain compressed (legacy behavior)
|
|
76
81
|
obj_path.write_bytes(compressed)
|
|
77
82
|
return hash_id
|
|
78
83
|
|
|
79
84
|
def retrieve(self, hash_id: str, obj_type: str) -> Optional[bytes]:
|
|
80
85
|
"""
|
|
81
|
-
Retrieve content by hash ID.
|
|
86
|
+
Retrieve content by hash ID (loose object or pack).
|
|
82
87
|
|
|
83
88
|
Args:
|
|
84
89
|
hash_id: SHA-256 hash of the object
|
|
@@ -89,26 +94,41 @@ class ObjectStore:
|
|
|
89
94
|
"""
|
|
90
95
|
obj_path = self._get_object_path(hash_id, obj_type)
|
|
91
96
|
|
|
92
|
-
if
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
97
|
+
if obj_path.exists():
|
|
98
|
+
raw = obj_path.read_bytes()
|
|
99
|
+
# Optionally decrypt (iv+tag minimum 12+16 bytes)
|
|
100
|
+
if self._encryptor and len(raw) >= 12 + 16:
|
|
101
|
+
try:
|
|
102
|
+
raw = self._encryptor.decrypt_payload(raw)
|
|
103
|
+
except Exception:
|
|
104
|
+
pass # legacy plain compressed
|
|
105
|
+
full_content = zlib.decompress(raw)
|
|
106
|
+
null_idx = full_content.index(b"\0")
|
|
107
|
+
content = full_content[null_idx + 1 :]
|
|
108
|
+
return content
|
|
109
|
+
|
|
110
|
+
# Try pack file when loose object missing
|
|
111
|
+
try:
|
|
112
|
+
from .pack import retrieve_from_pack
|
|
113
|
+
result = retrieve_from_pack(self.objects_dir, hash_id, expected_type=obj_type)
|
|
114
|
+
if result is not None:
|
|
115
|
+
return result[1]
|
|
116
|
+
except Exception:
|
|
117
|
+
pass
|
|
118
|
+
return None
|
|
105
119
|
|
|
106
120
|
def exists(self, hash_id: str, obj_type: str) -> bool:
|
|
107
|
-
"""Check if an object exists. Returns False for invalid hash (no raise)."""
|
|
121
|
+
"""Check if an object exists (loose or pack). Returns False for invalid hash (no raise)."""
|
|
108
122
|
if not _valid_object_hash(hash_id):
|
|
109
123
|
return False
|
|
110
124
|
obj_path = self._get_object_path(hash_id, obj_type)
|
|
111
|
-
|
|
125
|
+
if obj_path.exists():
|
|
126
|
+
return True
|
|
127
|
+
try:
|
|
128
|
+
from .pack import retrieve_from_pack
|
|
129
|
+
return retrieve_from_pack(self.objects_dir, hash_id, expected_type=obj_type) is not None
|
|
130
|
+
except Exception:
|
|
131
|
+
return False
|
|
112
132
|
|
|
113
133
|
def delete(self, hash_id: str, obj_type: str) -> bool:
|
|
114
134
|
"""Delete an object. Returns True if deleted, False if not found."""
|
memvcs/core/pack.py
ADDED
|
@@ -0,0 +1,278 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Pack files and garbage collection for agmem.
|
|
3
|
+
|
|
4
|
+
Pack: collect loose objects into single file + index. GC: delete unreachable objects, repack.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
import hashlib
|
|
8
|
+
import struct
|
|
9
|
+
import zlib
|
|
10
|
+
from pathlib import Path
|
|
11
|
+
from typing import Set, Dict, List, Optional, Tuple
|
|
12
|
+
|
|
13
|
+
from .objects import ObjectStore
|
|
14
|
+
from .refs import RefsManager
|
|
15
|
+
|
|
16
|
+
PACK_MAGIC = b"PACK"
|
|
17
|
+
PACK_VERSION = 2
|
|
18
|
+
IDX_MAGIC = b"agidx"
|
|
19
|
+
IDX_VERSION = 2
|
|
20
|
+
OBJ_TYPE_BLOB = 1
|
|
21
|
+
OBJ_TYPE_TREE = 2
|
|
22
|
+
OBJ_TYPE_COMMIT = 3
|
|
23
|
+
OBJ_TYPE_TAG = 4
|
|
24
|
+
TYPE_TO_BYTE = {"blob": OBJ_TYPE_BLOB, "tree": OBJ_TYPE_TREE, "commit": OBJ_TYPE_COMMIT, "tag": OBJ_TYPE_TAG}
|
|
25
|
+
BYTE_TO_TYPE = {v: k for k, v in TYPE_TO_BYTE.items()}
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
def _pack_dir(objects_dir: Path) -> Path:
|
|
29
|
+
return objects_dir / "pack"
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
def _get_loose_object_type(objects_dir: Path, hash_id: str) -> Optional[str]:
|
|
33
|
+
"""Return obj_type for a loose object, or None if not found."""
|
|
34
|
+
if len(hash_id) < 4:
|
|
35
|
+
return None
|
|
36
|
+
prefix, suffix = hash_id[:2], hash_id[2:]
|
|
37
|
+
for obj_type in ["blob", "tree", "commit", "tag"]:
|
|
38
|
+
p = objects_dir / obj_type / prefix / suffix
|
|
39
|
+
if p.exists():
|
|
40
|
+
return obj_type
|
|
41
|
+
return None
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
def list_loose_objects(objects_dir: Path) -> Set[str]:
|
|
45
|
+
"""List all loose object hashes (blob, tree, commit, tag)."""
|
|
46
|
+
hashes = set()
|
|
47
|
+
for obj_type in ["blob", "tree", "commit", "tag"]:
|
|
48
|
+
type_dir = objects_dir / obj_type
|
|
49
|
+
if not type_dir.exists():
|
|
50
|
+
continue
|
|
51
|
+
for prefix_dir in type_dir.iterdir():
|
|
52
|
+
if not prefix_dir.is_dir():
|
|
53
|
+
continue
|
|
54
|
+
for f in prefix_dir.iterdir():
|
|
55
|
+
hash_id = prefix_dir.name + f.name
|
|
56
|
+
hashes.add(hash_id)
|
|
57
|
+
return hashes
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
def reachable_from_refs(mem_dir: Path, store: ObjectStore, gc_prune_days: int = 90) -> Set[str]:
|
|
61
|
+
"""Collect all object hashes reachable from branches, tags, and reflog (within prune window)."""
|
|
62
|
+
refs = RefsManager(mem_dir)
|
|
63
|
+
reachable = set()
|
|
64
|
+
# Branch tips
|
|
65
|
+
for b in refs.list_branches():
|
|
66
|
+
ch = refs.get_branch_commit(b)
|
|
67
|
+
if ch:
|
|
68
|
+
reachable.update(_collect_from_commit(store, ch))
|
|
69
|
+
# Tags
|
|
70
|
+
for t in refs.list_tags():
|
|
71
|
+
ch = refs.get_tag_commit(t)
|
|
72
|
+
if ch:
|
|
73
|
+
reachable.update(_collect_from_commit(store, ch))
|
|
74
|
+
# Reflog (simplified: just HEAD recent)
|
|
75
|
+
try:
|
|
76
|
+
log = refs.get_reflog("HEAD", max_count=1000)
|
|
77
|
+
for e in log:
|
|
78
|
+
h = e.get("hash")
|
|
79
|
+
if h:
|
|
80
|
+
reachable.update(_collect_from_commit(store, h))
|
|
81
|
+
except Exception:
|
|
82
|
+
pass
|
|
83
|
+
return reachable
|
|
84
|
+
|
|
85
|
+
|
|
86
|
+
def _collect_from_commit(store: ObjectStore, commit_hash: str) -> Set[str]:
|
|
87
|
+
"""Collect all object hashes reachable from a commit."""
|
|
88
|
+
from .remote import _collect_objects_from_commit
|
|
89
|
+
|
|
90
|
+
return _collect_objects_from_commit(store, commit_hash)
|
|
91
|
+
|
|
92
|
+
|
|
93
|
+
def run_gc(
|
|
94
|
+
mem_dir: Path, store: ObjectStore, gc_prune_days: int = 90, dry_run: bool = False
|
|
95
|
+
) -> Tuple[int, int]:
|
|
96
|
+
"""
|
|
97
|
+
Garbage collect: delete unreachable loose objects.
|
|
98
|
+
Returns (deleted_count, bytes_freed). dry_run: only report, do not delete.
|
|
99
|
+
"""
|
|
100
|
+
loose = list_loose_objects(mem_dir / "objects")
|
|
101
|
+
reachable = reachable_from_refs(mem_dir, store, gc_prune_days)
|
|
102
|
+
to_delete = loose - reachable
|
|
103
|
+
freed = 0
|
|
104
|
+
for hash_id in to_delete:
|
|
105
|
+
# Resolve type from path
|
|
106
|
+
for obj_type in ["blob", "tree", "commit", "tag"]:
|
|
107
|
+
p = store.objects_dir / obj_type / hash_id[:2] / hash_id[2:]
|
|
108
|
+
if p.exists():
|
|
109
|
+
if not dry_run:
|
|
110
|
+
size = p.stat().st_size
|
|
111
|
+
p.unlink()
|
|
112
|
+
freed += size
|
|
113
|
+
else:
|
|
114
|
+
freed += p.stat().st_size
|
|
115
|
+
break
|
|
116
|
+
return (len(to_delete), freed)
|
|
117
|
+
|
|
118
|
+
|
|
119
|
+
def write_pack(
|
|
120
|
+
objects_dir: Path, store: ObjectStore, hash_to_type: Dict[str, str]
|
|
121
|
+
) -> Tuple[Path, Path]:
|
|
122
|
+
"""
|
|
123
|
+
Pack loose objects into a single pack file and index.
|
|
124
|
+
hash_to_type: map hash_id -> obj_type for objects to include.
|
|
125
|
+
Returns (pack_path, index_path). Does not delete loose objects.
|
|
126
|
+
"""
|
|
127
|
+
if not hash_to_type:
|
|
128
|
+
raise ValueError("Cannot write empty pack")
|
|
129
|
+
pack_d = _pack_dir(objects_dir)
|
|
130
|
+
pack_d.mkdir(parents=True, exist_ok=True)
|
|
131
|
+
|
|
132
|
+
pack_header_len = len(PACK_MAGIC) + 4 + 4
|
|
133
|
+
pack_body = bytearray()
|
|
134
|
+
index_entries: List[Tuple[str, str, int]] = [] # (hash_id, obj_type, offset_in_file)
|
|
135
|
+
offset_in_file = pack_header_len
|
|
136
|
+
|
|
137
|
+
for hash_id in sorted(hash_to_type.keys()):
|
|
138
|
+
obj_type = hash_to_type[hash_id]
|
|
139
|
+
content = store.retrieve(hash_id, obj_type)
|
|
140
|
+
if content is None:
|
|
141
|
+
continue
|
|
142
|
+
header = f"{obj_type} {len(content)}\0".encode()
|
|
143
|
+
full = header + content
|
|
144
|
+
compressed = zlib.compress(full)
|
|
145
|
+
type_byte = TYPE_TO_BYTE.get(obj_type, OBJ_TYPE_BLOB)
|
|
146
|
+
size_bytes = struct.pack(">I", len(compressed))
|
|
147
|
+
chunk = bytes([type_byte]) + size_bytes + compressed
|
|
148
|
+
pack_body.extend(chunk)
|
|
149
|
+
index_entries.append((hash_id, obj_type, offset_in_file))
|
|
150
|
+
offset_in_file += len(chunk)
|
|
151
|
+
|
|
152
|
+
if not index_entries:
|
|
153
|
+
raise ValueError("No objects to pack")
|
|
154
|
+
|
|
155
|
+
pack_content = PACK_MAGIC + struct.pack(">I", PACK_VERSION) + struct.pack(">I", len(index_entries)) + bytes(pack_body)
|
|
156
|
+
pack_hash = hashlib.sha256(pack_content).digest()
|
|
157
|
+
pack_content += pack_hash
|
|
158
|
+
|
|
159
|
+
pack_name = f"pack-{pack_hash[:16].hex()}.pack"
|
|
160
|
+
pack_path = pack_d / pack_name
|
|
161
|
+
pack_path.write_bytes(pack_content)
|
|
162
|
+
|
|
163
|
+
index_content = bytearray(IDX_MAGIC + struct.pack(">I", IDX_VERSION) + struct.pack(">I", len(index_entries)))
|
|
164
|
+
for hash_id, obj_type, off in index_entries:
|
|
165
|
+
index_content.extend(bytes.fromhex(hash_id))
|
|
166
|
+
index_content.append(TYPE_TO_BYTE[obj_type])
|
|
167
|
+
index_content.extend(struct.pack(">I", off))
|
|
168
|
+
idx_hash = hashlib.sha256(index_content).digest()
|
|
169
|
+
index_content.extend(idx_hash)
|
|
170
|
+
idx_path = pack_path.with_suffix(".idx")
|
|
171
|
+
idx_path.write_bytes(index_content)
|
|
172
|
+
|
|
173
|
+
return (pack_path, idx_path)
|
|
174
|
+
|
|
175
|
+
|
|
176
|
+
def _find_pack_index(objects_dir: Path) -> Optional[Path]:
|
|
177
|
+
"""Return path to first .idx file in objects/pack, or None."""
|
|
178
|
+
pack_d = _pack_dir(objects_dir)
|
|
179
|
+
if not pack_d.exists():
|
|
180
|
+
return None
|
|
181
|
+
for p in pack_d.iterdir():
|
|
182
|
+
if p.suffix == ".idx":
|
|
183
|
+
return p
|
|
184
|
+
return None
|
|
185
|
+
|
|
186
|
+
|
|
187
|
+
def retrieve_from_pack(objects_dir: Path, hash_id: str, expected_type: Optional[str] = None) -> Optional[Tuple[str, bytes]]:
|
|
188
|
+
"""
|
|
189
|
+
Retrieve object from pack by hash. Returns (obj_type, content) or None.
|
|
190
|
+
If expected_type is set, only return if pack type matches.
|
|
191
|
+
"""
|
|
192
|
+
idx_path = _find_pack_index(objects_dir)
|
|
193
|
+
if idx_path is None:
|
|
194
|
+
return None
|
|
195
|
+
pack_path = idx_path.with_suffix(".pack")
|
|
196
|
+
if not pack_path.exists():
|
|
197
|
+
return None
|
|
198
|
+
|
|
199
|
+
raw_idx = idx_path.read_bytes()
|
|
200
|
+
if len(raw_idx) < len(IDX_MAGIC) + 4 + 4 + 32 + 1 + 4 + 32:
|
|
201
|
+
return None
|
|
202
|
+
if raw_idx[: len(IDX_MAGIC)] != IDX_MAGIC:
|
|
203
|
+
return None
|
|
204
|
+
version = struct.unpack(">I", raw_idx[len(IDX_MAGIC) : len(IDX_MAGIC) + 4])[0]
|
|
205
|
+
if version != IDX_VERSION:
|
|
206
|
+
return None
|
|
207
|
+
count = struct.unpack(">I", raw_idx[len(IDX_MAGIC) + 4 : len(IDX_MAGIC) + 8])[0]
|
|
208
|
+
entry_size = 32 + 1 + 4
|
|
209
|
+
entries_start = len(IDX_MAGIC) + 8
|
|
210
|
+
entries_end = entries_start + count * entry_size
|
|
211
|
+
if entries_end + 32 > len(raw_idx):
|
|
212
|
+
return None
|
|
213
|
+
hash_hex = hash_id
|
|
214
|
+
if len(hash_hex) != 64:
|
|
215
|
+
return None
|
|
216
|
+
hash_bin = bytes.fromhex(hash_hex)
|
|
217
|
+
for i in range(count):
|
|
218
|
+
base = entries_start + i * entry_size
|
|
219
|
+
entry_hash = raw_idx[base : base + 32]
|
|
220
|
+
if entry_hash != hash_bin:
|
|
221
|
+
continue
|
|
222
|
+
type_byte = raw_idx[base + 32]
|
|
223
|
+
offset = struct.unpack(">I", raw_idx[base + 33 : base + 37])[0]
|
|
224
|
+
obj_type = BYTE_TO_TYPE.get(type_byte)
|
|
225
|
+
if obj_type is None:
|
|
226
|
+
continue
|
|
227
|
+
if expected_type is not None and obj_type != expected_type:
|
|
228
|
+
return None
|
|
229
|
+
pack_raw = pack_path.read_bytes()
|
|
230
|
+
header_size = len(PACK_MAGIC) + 4 + 4
|
|
231
|
+
if offset + 1 + 4 > len(pack_raw) - 32:
|
|
232
|
+
return None
|
|
233
|
+
size = struct.unpack(">I", pack_raw[offset + 1 : offset + 5])[0]
|
|
234
|
+
payload_start = offset + 5
|
|
235
|
+
payload_end = payload_start + size
|
|
236
|
+
if payload_end > len(pack_raw) - 32:
|
|
237
|
+
return None
|
|
238
|
+
compressed = pack_raw[payload_start:payload_end]
|
|
239
|
+
try:
|
|
240
|
+
full = zlib.decompress(compressed)
|
|
241
|
+
except Exception:
|
|
242
|
+
return None
|
|
243
|
+
null_idx = full.index(b"\0")
|
|
244
|
+
content = full[null_idx + 1 :]
|
|
245
|
+
return (obj_type, content)
|
|
246
|
+
return None
|
|
247
|
+
|
|
248
|
+
|
|
249
|
+
def run_repack(
|
|
250
|
+
mem_dir: Path, store: ObjectStore, gc_prune_days: int = 90, dry_run: bool = False
|
|
251
|
+
) -> Tuple[int, int]:
|
|
252
|
+
"""
|
|
253
|
+
After GC: pack all reachable loose objects into a pack file, then delete those loose objects.
|
|
254
|
+
Returns (objects_packed, bytes_freed_from_loose).
|
|
255
|
+
"""
|
|
256
|
+
objects_dir = mem_dir / "objects"
|
|
257
|
+
reachable = reachable_from_refs(mem_dir, store, gc_prune_days)
|
|
258
|
+
loose = list_loose_objects(objects_dir)
|
|
259
|
+
to_pack = reachable & loose
|
|
260
|
+
if not to_pack:
|
|
261
|
+
return (0, 0)
|
|
262
|
+
hash_to_type: Dict[str, str] = {}
|
|
263
|
+
for hash_id in to_pack:
|
|
264
|
+
obj_type = _get_loose_object_type(objects_dir, hash_id)
|
|
265
|
+
if obj_type:
|
|
266
|
+
hash_to_type[hash_id] = obj_type
|
|
267
|
+
if not hash_to_type:
|
|
268
|
+
return (0, 0)
|
|
269
|
+
if dry_run:
|
|
270
|
+
return (len(hash_to_type), 0)
|
|
271
|
+
write_pack(objects_dir, store, hash_to_type)
|
|
272
|
+
freed = 0
|
|
273
|
+
for hash_id, obj_type in hash_to_type.items():
|
|
274
|
+
p = store.objects_dir / obj_type / hash_id[:2] / hash_id[2:]
|
|
275
|
+
if p.exists():
|
|
276
|
+
freed += p.stat().st_size
|
|
277
|
+
p.unlink()
|
|
278
|
+
return (len(hash_to_type), freed)
|
|
@@ -0,0 +1,63 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Differential privacy budget tracking for agmem.
|
|
3
|
+
|
|
4
|
+
Per-repo epsilon spent; block when budget exceeded.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
import json
|
|
8
|
+
import math
|
|
9
|
+
from pathlib import Path
|
|
10
|
+
from typing import Optional, Tuple
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
def _budget_path(mem_dir: Path) -> Path:
|
|
14
|
+
return mem_dir / "privacy_budget.json"
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
def load_budget(mem_dir: Path) -> Tuple[float, float, float]:
|
|
18
|
+
"""Load (epsilon_spent, max_epsilon, delta). Returns (0, max, delta) if no file."""
|
|
19
|
+
path = _budget_path(mem_dir)
|
|
20
|
+
if not path.exists():
|
|
21
|
+
config = mem_dir / "config.json"
|
|
22
|
+
max_eps = 1.0
|
|
23
|
+
delta = 1e-5
|
|
24
|
+
if config.exists():
|
|
25
|
+
try:
|
|
26
|
+
c = json.loads(config.read_text())
|
|
27
|
+
dp = c.get("differential_privacy", {})
|
|
28
|
+
max_eps = float(dp.get("max_epsilon", 1.0))
|
|
29
|
+
delta = float(dp.get("delta", 1e-5))
|
|
30
|
+
except Exception:
|
|
31
|
+
pass
|
|
32
|
+
return (0.0, max_eps, delta)
|
|
33
|
+
try:
|
|
34
|
+
data = json.loads(path.read_text())
|
|
35
|
+
return (
|
|
36
|
+
float(data.get("epsilon_spent", 0)),
|
|
37
|
+
float(data.get("max_epsilon", 1.0)),
|
|
38
|
+
float(data.get("delta", 1e-5)),
|
|
39
|
+
)
|
|
40
|
+
except Exception:
|
|
41
|
+
return (0.0, 1.0, 1e-5)
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
def spend_epsilon(mem_dir: Path, epsilon: float, max_epsilon: Optional[float] = None) -> bool:
|
|
45
|
+
"""Record epsilon spent. Returns False if budget would be exceeded."""
|
|
46
|
+
spent, max_eps, delta = load_budget(mem_dir)
|
|
47
|
+
if max_epsilon is not None:
|
|
48
|
+
max_eps = max_epsilon
|
|
49
|
+
if spent + epsilon > max_eps:
|
|
50
|
+
return False
|
|
51
|
+
mem_dir.mkdir(parents=True, exist_ok=True)
|
|
52
|
+
path = _budget_path(mem_dir)
|
|
53
|
+
data = {"epsilon_spent": spent + epsilon, "max_epsilon": max_eps, "delta": delta}
|
|
54
|
+
path.write_text(json.dumps(data, indent=2))
|
|
55
|
+
return True
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
def add_noise(value: float, sensitivity: float, epsilon: float, delta: float = 1e-5) -> float:
|
|
59
|
+
"""Add Gaussian noise for (epsilon, delta)-DP. sigma = sensitivity * sqrt(2*ln(1.25/delta)) / epsilon."""
|
|
60
|
+
import random
|
|
61
|
+
|
|
62
|
+
sigma = sensitivity * math.sqrt(2 * math.log(1.25 / delta)) / epsilon
|
|
63
|
+
return value + random.gauss(0, sigma)
|