agmem 0.1.3__py3-none-any.whl → 0.1.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {agmem-0.1.3.dist-info → agmem-0.1.4.dist-info}/METADATA +24 -18
- {agmem-0.1.3.dist-info → agmem-0.1.4.dist-info}/RECORD +25 -24
- memvcs/commands/daemon.py +20 -3
- memvcs/commands/distill.py +10 -2
- memvcs/commands/federated.py +7 -1
- memvcs/commands/garden.py +10 -2
- memvcs/commands/gc.py +16 -1
- memvcs/commands/prove.py +2 -2
- memvcs/commands/timeline.py +27 -0
- memvcs/commands/when.py +27 -0
- memvcs/core/compression_pipeline.py +157 -0
- memvcs/core/crypto_verify.py +12 -1
- memvcs/core/distiller.py +22 -4
- memvcs/core/federated.py +70 -9
- memvcs/core/gardener.py +24 -5
- memvcs/core/ipfs_remote.py +169 -8
- memvcs/core/knowledge_graph.py +77 -6
- memvcs/core/objects.py +31 -21
- memvcs/core/pack.py +187 -1
- memvcs/core/remote.py +191 -3
- memvcs/core/zk_proofs.py +143 -11
- {agmem-0.1.3.dist-info → agmem-0.1.4.dist-info}/WHEEL +0 -0
- {agmem-0.1.3.dist-info → agmem-0.1.4.dist-info}/entry_points.txt +0 -0
- {agmem-0.1.3.dist-info → agmem-0.1.4.dist-info}/licenses/LICENSE +0 -0
- {agmem-0.1.3.dist-info → agmem-0.1.4.dist-info}/top_level.txt +0 -0
memvcs/core/pack.py
CHANGED
|
@@ -4,7 +4,8 @@ Pack files and garbage collection for agmem.
|
|
|
4
4
|
Pack: collect loose objects into single file + index. GC: delete unreachable objects, repack.
|
|
5
5
|
"""
|
|
6
6
|
|
|
7
|
-
import
|
|
7
|
+
import hashlib
|
|
8
|
+
import struct
|
|
8
9
|
import zlib
|
|
9
10
|
from pathlib import Path
|
|
10
11
|
from typing import Set, Dict, List, Optional, Tuple
|
|
@@ -12,11 +13,34 @@ from typing import Set, Dict, List, Optional, Tuple
|
|
|
12
13
|
from .objects import ObjectStore
|
|
13
14
|
from .refs import RefsManager
|
|
14
15
|
|
|
16
|
+
PACK_MAGIC = b"PACK"
|
|
17
|
+
PACK_VERSION = 2
|
|
18
|
+
IDX_MAGIC = b"agidx"
|
|
19
|
+
IDX_VERSION = 2
|
|
20
|
+
OBJ_TYPE_BLOB = 1
|
|
21
|
+
OBJ_TYPE_TREE = 2
|
|
22
|
+
OBJ_TYPE_COMMIT = 3
|
|
23
|
+
OBJ_TYPE_TAG = 4
|
|
24
|
+
TYPE_TO_BYTE = {"blob": OBJ_TYPE_BLOB, "tree": OBJ_TYPE_TREE, "commit": OBJ_TYPE_COMMIT, "tag": OBJ_TYPE_TAG}
|
|
25
|
+
BYTE_TO_TYPE = {v: k for k, v in TYPE_TO_BYTE.items()}
|
|
26
|
+
|
|
15
27
|
|
|
16
28
|
def _pack_dir(objects_dir: Path) -> Path:
|
|
17
29
|
return objects_dir / "pack"
|
|
18
30
|
|
|
19
31
|
|
|
32
|
+
def _get_loose_object_type(objects_dir: Path, hash_id: str) -> Optional[str]:
|
|
33
|
+
"""Return obj_type for a loose object, or None if not found."""
|
|
34
|
+
if len(hash_id) < 4:
|
|
35
|
+
return None
|
|
36
|
+
prefix, suffix = hash_id[:2], hash_id[2:]
|
|
37
|
+
for obj_type in ["blob", "tree", "commit", "tag"]:
|
|
38
|
+
p = objects_dir / obj_type / prefix / suffix
|
|
39
|
+
if p.exists():
|
|
40
|
+
return obj_type
|
|
41
|
+
return None
|
|
42
|
+
|
|
43
|
+
|
|
20
44
|
def list_loose_objects(objects_dir: Path) -> Set[str]:
|
|
21
45
|
"""List all loose object hashes (blob, tree, commit, tag)."""
|
|
22
46
|
hashes = set()
|
|
@@ -90,3 +114,165 @@ def run_gc(
|
|
|
90
114
|
freed += p.stat().st_size
|
|
91
115
|
break
|
|
92
116
|
return (len(to_delete), freed)
|
|
117
|
+
|
|
118
|
+
|
|
119
|
+
def write_pack(
|
|
120
|
+
objects_dir: Path, store: ObjectStore, hash_to_type: Dict[str, str]
|
|
121
|
+
) -> Tuple[Path, Path]:
|
|
122
|
+
"""
|
|
123
|
+
Pack loose objects into a single pack file and index.
|
|
124
|
+
hash_to_type: map hash_id -> obj_type for objects to include.
|
|
125
|
+
Returns (pack_path, index_path). Does not delete loose objects.
|
|
126
|
+
"""
|
|
127
|
+
if not hash_to_type:
|
|
128
|
+
raise ValueError("Cannot write empty pack")
|
|
129
|
+
pack_d = _pack_dir(objects_dir)
|
|
130
|
+
pack_d.mkdir(parents=True, exist_ok=True)
|
|
131
|
+
|
|
132
|
+
pack_header_len = len(PACK_MAGIC) + 4 + 4
|
|
133
|
+
pack_body = bytearray()
|
|
134
|
+
index_entries: List[Tuple[str, str, int]] = [] # (hash_id, obj_type, offset_in_file)
|
|
135
|
+
offset_in_file = pack_header_len
|
|
136
|
+
|
|
137
|
+
for hash_id in sorted(hash_to_type.keys()):
|
|
138
|
+
obj_type = hash_to_type[hash_id]
|
|
139
|
+
content = store.retrieve(hash_id, obj_type)
|
|
140
|
+
if content is None:
|
|
141
|
+
continue
|
|
142
|
+
header = f"{obj_type} {len(content)}\0".encode()
|
|
143
|
+
full = header + content
|
|
144
|
+
compressed = zlib.compress(full)
|
|
145
|
+
type_byte = TYPE_TO_BYTE.get(obj_type, OBJ_TYPE_BLOB)
|
|
146
|
+
size_bytes = struct.pack(">I", len(compressed))
|
|
147
|
+
chunk = bytes([type_byte]) + size_bytes + compressed
|
|
148
|
+
pack_body.extend(chunk)
|
|
149
|
+
index_entries.append((hash_id, obj_type, offset_in_file))
|
|
150
|
+
offset_in_file += len(chunk)
|
|
151
|
+
|
|
152
|
+
if not index_entries:
|
|
153
|
+
raise ValueError("No objects to pack")
|
|
154
|
+
|
|
155
|
+
pack_content = PACK_MAGIC + struct.pack(">I", PACK_VERSION) + struct.pack(">I", len(index_entries)) + bytes(pack_body)
|
|
156
|
+
pack_hash = hashlib.sha256(pack_content).digest()
|
|
157
|
+
pack_content += pack_hash
|
|
158
|
+
|
|
159
|
+
pack_name = f"pack-{pack_hash[:16].hex()}.pack"
|
|
160
|
+
pack_path = pack_d / pack_name
|
|
161
|
+
pack_path.write_bytes(pack_content)
|
|
162
|
+
|
|
163
|
+
index_content = bytearray(IDX_MAGIC + struct.pack(">I", IDX_VERSION) + struct.pack(">I", len(index_entries)))
|
|
164
|
+
for hash_id, obj_type, off in index_entries:
|
|
165
|
+
index_content.extend(bytes.fromhex(hash_id))
|
|
166
|
+
index_content.append(TYPE_TO_BYTE[obj_type])
|
|
167
|
+
index_content.extend(struct.pack(">I", off))
|
|
168
|
+
idx_hash = hashlib.sha256(index_content).digest()
|
|
169
|
+
index_content.extend(idx_hash)
|
|
170
|
+
idx_path = pack_path.with_suffix(".idx")
|
|
171
|
+
idx_path.write_bytes(index_content)
|
|
172
|
+
|
|
173
|
+
return (pack_path, idx_path)
|
|
174
|
+
|
|
175
|
+
|
|
176
|
+
def _find_pack_index(objects_dir: Path) -> Optional[Path]:
|
|
177
|
+
"""Return path to first .idx file in objects/pack, or None."""
|
|
178
|
+
pack_d = _pack_dir(objects_dir)
|
|
179
|
+
if not pack_d.exists():
|
|
180
|
+
return None
|
|
181
|
+
for p in pack_d.iterdir():
|
|
182
|
+
if p.suffix == ".idx":
|
|
183
|
+
return p
|
|
184
|
+
return None
|
|
185
|
+
|
|
186
|
+
|
|
187
|
+
def retrieve_from_pack(objects_dir: Path, hash_id: str, expected_type: Optional[str] = None) -> Optional[Tuple[str, bytes]]:
|
|
188
|
+
"""
|
|
189
|
+
Retrieve object from pack by hash. Returns (obj_type, content) or None.
|
|
190
|
+
If expected_type is set, only return if pack type matches.
|
|
191
|
+
"""
|
|
192
|
+
idx_path = _find_pack_index(objects_dir)
|
|
193
|
+
if idx_path is None:
|
|
194
|
+
return None
|
|
195
|
+
pack_path = idx_path.with_suffix(".pack")
|
|
196
|
+
if not pack_path.exists():
|
|
197
|
+
return None
|
|
198
|
+
|
|
199
|
+
raw_idx = idx_path.read_bytes()
|
|
200
|
+
if len(raw_idx) < len(IDX_MAGIC) + 4 + 4 + 32 + 1 + 4 + 32:
|
|
201
|
+
return None
|
|
202
|
+
if raw_idx[: len(IDX_MAGIC)] != IDX_MAGIC:
|
|
203
|
+
return None
|
|
204
|
+
version = struct.unpack(">I", raw_idx[len(IDX_MAGIC) : len(IDX_MAGIC) + 4])[0]
|
|
205
|
+
if version != IDX_VERSION:
|
|
206
|
+
return None
|
|
207
|
+
count = struct.unpack(">I", raw_idx[len(IDX_MAGIC) + 4 : len(IDX_MAGIC) + 8])[0]
|
|
208
|
+
entry_size = 32 + 1 + 4
|
|
209
|
+
entries_start = len(IDX_MAGIC) + 8
|
|
210
|
+
entries_end = entries_start + count * entry_size
|
|
211
|
+
if entries_end + 32 > len(raw_idx):
|
|
212
|
+
return None
|
|
213
|
+
hash_hex = hash_id
|
|
214
|
+
if len(hash_hex) != 64:
|
|
215
|
+
return None
|
|
216
|
+
hash_bin = bytes.fromhex(hash_hex)
|
|
217
|
+
for i in range(count):
|
|
218
|
+
base = entries_start + i * entry_size
|
|
219
|
+
entry_hash = raw_idx[base : base + 32]
|
|
220
|
+
if entry_hash != hash_bin:
|
|
221
|
+
continue
|
|
222
|
+
type_byte = raw_idx[base + 32]
|
|
223
|
+
offset = struct.unpack(">I", raw_idx[base + 33 : base + 37])[0]
|
|
224
|
+
obj_type = BYTE_TO_TYPE.get(type_byte)
|
|
225
|
+
if obj_type is None:
|
|
226
|
+
continue
|
|
227
|
+
if expected_type is not None and obj_type != expected_type:
|
|
228
|
+
return None
|
|
229
|
+
pack_raw = pack_path.read_bytes()
|
|
230
|
+
header_size = len(PACK_MAGIC) + 4 + 4
|
|
231
|
+
if offset + 1 + 4 > len(pack_raw) - 32:
|
|
232
|
+
return None
|
|
233
|
+
size = struct.unpack(">I", pack_raw[offset + 1 : offset + 5])[0]
|
|
234
|
+
payload_start = offset + 5
|
|
235
|
+
payload_end = payload_start + size
|
|
236
|
+
if payload_end > len(pack_raw) - 32:
|
|
237
|
+
return None
|
|
238
|
+
compressed = pack_raw[payload_start:payload_end]
|
|
239
|
+
try:
|
|
240
|
+
full = zlib.decompress(compressed)
|
|
241
|
+
except Exception:
|
|
242
|
+
return None
|
|
243
|
+
null_idx = full.index(b"\0")
|
|
244
|
+
content = full[null_idx + 1 :]
|
|
245
|
+
return (obj_type, content)
|
|
246
|
+
return None
|
|
247
|
+
|
|
248
|
+
|
|
249
|
+
def run_repack(
|
|
250
|
+
mem_dir: Path, store: ObjectStore, gc_prune_days: int = 90, dry_run: bool = False
|
|
251
|
+
) -> Tuple[int, int]:
|
|
252
|
+
"""
|
|
253
|
+
After GC: pack all reachable loose objects into a pack file, then delete those loose objects.
|
|
254
|
+
Returns (objects_packed, bytes_freed_from_loose).
|
|
255
|
+
"""
|
|
256
|
+
objects_dir = mem_dir / "objects"
|
|
257
|
+
reachable = reachable_from_refs(mem_dir, store, gc_prune_days)
|
|
258
|
+
loose = list_loose_objects(objects_dir)
|
|
259
|
+
to_pack = reachable & loose
|
|
260
|
+
if not to_pack:
|
|
261
|
+
return (0, 0)
|
|
262
|
+
hash_to_type: Dict[str, str] = {}
|
|
263
|
+
for hash_id in to_pack:
|
|
264
|
+
obj_type = _get_loose_object_type(objects_dir, hash_id)
|
|
265
|
+
if obj_type:
|
|
266
|
+
hash_to_type[hash_id] = obj_type
|
|
267
|
+
if not hash_to_type:
|
|
268
|
+
return (0, 0)
|
|
269
|
+
if dry_run:
|
|
270
|
+
return (len(hash_to_type), 0)
|
|
271
|
+
write_pack(objects_dir, store, hash_to_type)
|
|
272
|
+
freed = 0
|
|
273
|
+
for hash_id, obj_type in hash_to_type.items():
|
|
274
|
+
p = store.objects_dir / obj_type / hash_id[:2] / hash_id[2:]
|
|
275
|
+
if p.exists():
|
|
276
|
+
freed += p.stat().st_size
|
|
277
|
+
p.unlink()
|
|
278
|
+
return (len(hash_to_type), freed)
|
memvcs/core/remote.py
CHANGED
|
@@ -1,19 +1,24 @@
|
|
|
1
1
|
"""
|
|
2
|
-
Remote sync for agmem - file-based push/pull/clone.
|
|
2
|
+
Remote sync for agmem - file-based and cloud (S3/GCS) push/pull/clone.
|
|
3
3
|
|
|
4
|
-
Supports file:// URLs
|
|
4
|
+
Supports file:// URLs and s3:///gs:// with optional distributed locking.
|
|
5
5
|
"""
|
|
6
6
|
|
|
7
7
|
import json
|
|
8
8
|
import shutil
|
|
9
9
|
from pathlib import Path
|
|
10
|
-
from typing import Optional, Set
|
|
10
|
+
from typing import Optional, Set, Any
|
|
11
11
|
from urllib.parse import urlparse
|
|
12
12
|
|
|
13
13
|
from .objects import ObjectStore, Commit, Tree, Blob, _valid_object_hash
|
|
14
14
|
from .refs import RefsManager, _ref_path_under_root
|
|
15
15
|
|
|
16
16
|
|
|
17
|
+
def _is_cloud_remote(url: str) -> bool:
|
|
18
|
+
"""Return True if URL is S3 or GCS (use storage adapter + optional lock)."""
|
|
19
|
+
return url.startswith("s3://") or url.startswith("gs://")
|
|
20
|
+
|
|
21
|
+
|
|
17
22
|
def parse_remote_url(url: str) -> Path:
|
|
18
23
|
"""Parse remote URL to local path. Supports file:// only. Rejects path traversal."""
|
|
19
24
|
parsed = urlparse(url)
|
|
@@ -62,6 +67,50 @@ def _collect_objects_from_commit(store: ObjectStore, commit_hash: str) -> Set[st
|
|
|
62
67
|
return seen
|
|
63
68
|
|
|
64
69
|
|
|
70
|
+
def _read_object_from_adapter(adapter: Any, hash_id: str) -> Optional[tuple]:
|
|
71
|
+
"""Read object from storage adapter. Returns (obj_type, content_bytes) or None."""
|
|
72
|
+
import zlib
|
|
73
|
+
for obj_type in ["commit", "tree", "blob", "tag"]:
|
|
74
|
+
rel = f".mem/objects/{obj_type}/{hash_id[:2]}/{hash_id[2:]}"
|
|
75
|
+
if not adapter.exists(rel):
|
|
76
|
+
continue
|
|
77
|
+
try:
|
|
78
|
+
raw = adapter.read_file(rel)
|
|
79
|
+
full = zlib.decompress(raw)
|
|
80
|
+
null_idx = full.index(b"\0")
|
|
81
|
+
content = full[null_idx + 1:]
|
|
82
|
+
return (obj_type, content)
|
|
83
|
+
except Exception:
|
|
84
|
+
continue
|
|
85
|
+
return None
|
|
86
|
+
|
|
87
|
+
|
|
88
|
+
def _collect_objects_from_commit_remote(adapter: Any, commit_hash: str) -> Set[str]:
|
|
89
|
+
"""Collect object hashes reachable from a commit when reading from storage adapter."""
|
|
90
|
+
seen = set()
|
|
91
|
+
todo = [commit_hash]
|
|
92
|
+
while todo:
|
|
93
|
+
h = todo.pop()
|
|
94
|
+
if h in seen:
|
|
95
|
+
continue
|
|
96
|
+
seen.add(h)
|
|
97
|
+
pair = _read_object_from_adapter(adapter, h)
|
|
98
|
+
if pair is None:
|
|
99
|
+
continue
|
|
100
|
+
obj_type, content = pair
|
|
101
|
+
if obj_type == "commit":
|
|
102
|
+
data = json.loads(content)
|
|
103
|
+
todo.extend(data.get("parents", []))
|
|
104
|
+
if "tree" in data:
|
|
105
|
+
todo.append(data["tree"])
|
|
106
|
+
elif obj_type == "tree":
|
|
107
|
+
data = json.loads(content)
|
|
108
|
+
for e in data.get("entries", []):
|
|
109
|
+
if "hash" in e:
|
|
110
|
+
todo.append(e["hash"])
|
|
111
|
+
return seen
|
|
112
|
+
|
|
113
|
+
|
|
65
114
|
def _list_local_objects(objects_dir: Path) -> Set[str]:
|
|
66
115
|
"""List all object hashes in a .mem/objects directory."""
|
|
67
116
|
hashes = set()
|
|
@@ -139,6 +188,113 @@ class Remote:
|
|
|
139
188
|
self._config["remotes"][self.name]["url"] = url
|
|
140
189
|
self._save_config(self._config)
|
|
141
190
|
|
|
191
|
+
def _push_via_storage(self, adapter: Any, branch: Optional[str] = None) -> str:
|
|
192
|
+
"""Push objects and refs via storage adapter. Caller must hold lock if needed."""
|
|
193
|
+
refs = RefsManager(self.mem_dir)
|
|
194
|
+
store = ObjectStore(self.objects_dir)
|
|
195
|
+
to_push = set()
|
|
196
|
+
for b in refs.list_branches():
|
|
197
|
+
if branch and b != branch:
|
|
198
|
+
continue
|
|
199
|
+
ch = refs.get_branch_commit(b)
|
|
200
|
+
if ch:
|
|
201
|
+
to_push.update(_collect_objects_from_commit(store, ch))
|
|
202
|
+
for t in refs.list_tags():
|
|
203
|
+
ch = refs.get_tag_commit(t)
|
|
204
|
+
if ch:
|
|
205
|
+
to_push.update(_collect_objects_from_commit(store, ch))
|
|
206
|
+
copied = 0
|
|
207
|
+
for h in to_push:
|
|
208
|
+
obj_type = None
|
|
209
|
+
for otype in ["blob", "tree", "commit", "tag"]:
|
|
210
|
+
p = self.objects_dir / otype / h[:2] / h[2:]
|
|
211
|
+
if p.exists():
|
|
212
|
+
obj_type = otype
|
|
213
|
+
break
|
|
214
|
+
if not obj_type:
|
|
215
|
+
continue
|
|
216
|
+
rel = f".mem/objects/{obj_type}/{h[:2]}/{h[2:]}"
|
|
217
|
+
if not adapter.exists(rel):
|
|
218
|
+
try:
|
|
219
|
+
data = p.read_bytes()
|
|
220
|
+
adapter.makedirs(f".mem/objects/{obj_type}/{h[:2]}")
|
|
221
|
+
adapter.write_file(rel, data)
|
|
222
|
+
copied += 1
|
|
223
|
+
except Exception:
|
|
224
|
+
pass
|
|
225
|
+
for b in refs.list_branches():
|
|
226
|
+
if branch and b != branch:
|
|
227
|
+
continue
|
|
228
|
+
ch = refs.get_branch_commit(b)
|
|
229
|
+
if ch and _ref_path_under_root(b, refs.heads_dir):
|
|
230
|
+
parent = str(Path(b).parent)
|
|
231
|
+
if parent != ".":
|
|
232
|
+
adapter.makedirs(f".mem/refs/heads/{parent}")
|
|
233
|
+
adapter.write_file(f".mem/refs/heads/{b}", (ch + "\n").encode())
|
|
234
|
+
for t in refs.list_tags():
|
|
235
|
+
ch = refs.get_tag_commit(t)
|
|
236
|
+
if ch and _ref_path_under_root(t, refs.tags_dir):
|
|
237
|
+
parent = str(Path(t).parent)
|
|
238
|
+
if parent != ".":
|
|
239
|
+
adapter.makedirs(f".mem/refs/tags/{parent}")
|
|
240
|
+
adapter.write_file(f".mem/refs/tags/{t}", (ch + "\n").encode())
|
|
241
|
+
try:
|
|
242
|
+
from .audit import append_audit
|
|
243
|
+
append_audit(self.mem_dir, "push", {"remote": self.name, "branch": branch, "copied": copied})
|
|
244
|
+
except Exception:
|
|
245
|
+
pass
|
|
246
|
+
return f"Pushed {copied} object(s) to {self.name}"
|
|
247
|
+
|
|
248
|
+
def _fetch_via_storage(self, adapter: Any, branch: Optional[str] = None) -> str:
|
|
249
|
+
"""Fetch objects and refs via storage adapter. Caller must hold lock if needed."""
|
|
250
|
+
to_fetch = set()
|
|
251
|
+
try:
|
|
252
|
+
heads = adapter.list_dir(".mem/refs/heads")
|
|
253
|
+
for fi in heads:
|
|
254
|
+
if fi.is_dir:
|
|
255
|
+
continue
|
|
256
|
+
branch_name = fi.path.replace(".mem/refs/heads/", "").replace("\\", "/").strip("/")
|
|
257
|
+
if branch and branch_name != branch:
|
|
258
|
+
continue
|
|
259
|
+
data = adapter.read_file(fi.path)
|
|
260
|
+
ch = data.decode().strip()
|
|
261
|
+
if ch and _valid_object_hash(ch):
|
|
262
|
+
to_fetch.update(_collect_objects_from_commit_remote(adapter, ch))
|
|
263
|
+
tags = adapter.list_dir(".mem/refs/tags")
|
|
264
|
+
for fi in tags:
|
|
265
|
+
if fi.is_dir:
|
|
266
|
+
continue
|
|
267
|
+
data = adapter.read_file(fi.path)
|
|
268
|
+
ch = data.decode().strip()
|
|
269
|
+
if ch and _valid_object_hash(ch):
|
|
270
|
+
to_fetch.update(_collect_objects_from_commit_remote(adapter, ch))
|
|
271
|
+
except Exception:
|
|
272
|
+
pass
|
|
273
|
+
if not to_fetch:
|
|
274
|
+
return f"Fetched 0 object(s) from {self.name}"
|
|
275
|
+
local_has = _list_local_objects(self.objects_dir)
|
|
276
|
+
missing = to_fetch - local_has
|
|
277
|
+
copied = 0
|
|
278
|
+
for h in missing:
|
|
279
|
+
for otype in ["blob", "tree", "commit", "tag"]:
|
|
280
|
+
rel = f".mem/objects/{otype}/{h[:2]}/{h[2:]}"
|
|
281
|
+
if adapter.exists(rel):
|
|
282
|
+
try:
|
|
283
|
+
data = adapter.read_file(rel)
|
|
284
|
+
p = self.objects_dir / otype / h[:2] / h[2:]
|
|
285
|
+
p.parent.mkdir(parents=True, exist_ok=True)
|
|
286
|
+
p.write_bytes(data)
|
|
287
|
+
copied += 1
|
|
288
|
+
except Exception:
|
|
289
|
+
pass
|
|
290
|
+
break
|
|
291
|
+
try:
|
|
292
|
+
from .audit import append_audit
|
|
293
|
+
append_audit(self.mem_dir, "fetch", {"remote": self.name, "branch": branch, "copied": copied})
|
|
294
|
+
except Exception:
|
|
295
|
+
pass
|
|
296
|
+
return f"Fetched {copied} object(s) from {self.name}"
|
|
297
|
+
|
|
142
298
|
def push(self, branch: Optional[str] = None) -> str:
|
|
143
299
|
"""
|
|
144
300
|
Push objects and refs to remote.
|
|
@@ -148,6 +304,22 @@ class Remote:
|
|
|
148
304
|
if not url:
|
|
149
305
|
raise ValueError(f"Remote '{self.name}' has no URL configured")
|
|
150
306
|
|
|
307
|
+
if _is_cloud_remote(url):
|
|
308
|
+
try:
|
|
309
|
+
from .storage import get_adapter
|
|
310
|
+
from .storage.base import LockError
|
|
311
|
+
adapter = get_adapter(url, self._config)
|
|
312
|
+
lock_name = "agmem-push"
|
|
313
|
+
adapter.acquire_lock(lock_name, 30)
|
|
314
|
+
try:
|
|
315
|
+
return self._push_via_storage(adapter, branch)
|
|
316
|
+
finally:
|
|
317
|
+
adapter.release_lock(lock_name)
|
|
318
|
+
except LockError as e:
|
|
319
|
+
raise ValueError(f"Could not acquire remote lock: {e}") from e
|
|
320
|
+
except Exception as e:
|
|
321
|
+
raise ValueError(f"Push to cloud failed: {e}") from e
|
|
322
|
+
|
|
151
323
|
remote_path = parse_remote_url(url)
|
|
152
324
|
remote_mem = remote_path / ".mem"
|
|
153
325
|
remote_objects = remote_mem / "objects"
|
|
@@ -247,6 +419,22 @@ class Remote:
|
|
|
247
419
|
if not url:
|
|
248
420
|
raise ValueError(f"Remote '{self.name}' has no URL configured")
|
|
249
421
|
|
|
422
|
+
if _is_cloud_remote(url):
|
|
423
|
+
try:
|
|
424
|
+
from .storage import get_adapter
|
|
425
|
+
from .storage.base import LockError
|
|
426
|
+
adapter = get_adapter(url, self._config)
|
|
427
|
+
lock_name = "agmem-fetch"
|
|
428
|
+
adapter.acquire_lock(lock_name, 30)
|
|
429
|
+
try:
|
|
430
|
+
return self._fetch_via_storage(adapter, branch)
|
|
431
|
+
finally:
|
|
432
|
+
adapter.release_lock(lock_name)
|
|
433
|
+
except LockError as e:
|
|
434
|
+
raise ValueError(f"Could not acquire remote lock: {e}") from e
|
|
435
|
+
except Exception as e:
|
|
436
|
+
raise ValueError(f"Fetch from cloud failed: {e}") from e
|
|
437
|
+
|
|
250
438
|
remote_path = parse_remote_url(url)
|
|
251
439
|
remote_objects = remote_path / ".mem" / "objects"
|
|
252
440
|
remote_refs = remote_path / ".mem" / "refs"
|
memvcs/core/zk_proofs.py
CHANGED
|
@@ -1,26 +1,158 @@
|
|
|
1
1
|
"""
|
|
2
|
-
Zero-knowledge proof system for agmem
|
|
2
|
+
Zero-knowledge proof system for agmem.
|
|
3
3
|
|
|
4
|
-
|
|
5
|
-
|
|
4
|
+
Hash/signature-based proofs: keyword containment (Merkle set membership),
|
|
5
|
+
memory freshness (signed timestamp). Full zk-SNARK backend can be added later.
|
|
6
6
|
"""
|
|
7
7
|
|
|
8
|
+
import base64
|
|
9
|
+
import hashlib
|
|
10
|
+
import json
|
|
11
|
+
import os
|
|
8
12
|
from pathlib import Path
|
|
9
|
-
from typing import Optional, Tuple
|
|
13
|
+
from typing import Optional, List, Tuple, Any, Dict
|
|
14
|
+
|
|
15
|
+
from .crypto_verify import (
|
|
16
|
+
build_merkle_tree,
|
|
17
|
+
merkle_proof,
|
|
18
|
+
verify_merkle_proof,
|
|
19
|
+
load_public_key,
|
|
20
|
+
load_private_key_from_env,
|
|
21
|
+
sign_merkle_root,
|
|
22
|
+
verify_signature,
|
|
23
|
+
ED25519_AVAILABLE,
|
|
24
|
+
)
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
def _word_hashes(content: str) -> List[str]:
|
|
28
|
+
"""Extract words and return sorted list of SHA-256 hashes (hex)."""
|
|
29
|
+
words = set()
|
|
30
|
+
for word in content.split():
|
|
31
|
+
w = word.strip().lower()
|
|
32
|
+
if len(w) >= 1:
|
|
33
|
+
words.add(w)
|
|
34
|
+
return sorted(hashlib.sha256(w.encode()).hexdigest() for w in words)
|
|
10
35
|
|
|
11
36
|
|
|
12
37
|
def prove_keyword_containment(memory_path: Path, keyword: str, output_proof_path: Path) -> bool:
|
|
13
|
-
"""
|
|
14
|
-
|
|
38
|
+
"""
|
|
39
|
+
Prove memory file contains keyword without revealing content.
|
|
40
|
+
Proof: Merkle set membership of H(keyword) over word hashes in file.
|
|
41
|
+
"""
|
|
42
|
+
if not memory_path.exists() or not memory_path.is_file():
|
|
43
|
+
return False
|
|
44
|
+
try:
|
|
45
|
+
content = memory_path.read_text(encoding="utf-8", errors="replace")
|
|
46
|
+
except Exception:
|
|
47
|
+
return False
|
|
48
|
+
word_hashes_list = _word_hashes(content)
|
|
49
|
+
keyword_hash = hashlib.sha256(keyword.strip().lower().encode()).hexdigest()
|
|
50
|
+
if keyword_hash not in word_hashes_list:
|
|
51
|
+
return False
|
|
52
|
+
root = build_merkle_tree(word_hashes_list)
|
|
53
|
+
proof_path_list = merkle_proof(word_hashes_list, keyword_hash)
|
|
54
|
+
if proof_path_list is None:
|
|
55
|
+
return False
|
|
56
|
+
proof_data = {
|
|
57
|
+
"statement_type": "keyword",
|
|
58
|
+
"keyword_hash": keyword_hash,
|
|
59
|
+
"root": root,
|
|
60
|
+
"path": proof_path_list,
|
|
61
|
+
}
|
|
62
|
+
output_proof_path.parent.mkdir(parents=True, exist_ok=True)
|
|
63
|
+
output_proof_path.write_text(json.dumps(proof_data, indent=2))
|
|
64
|
+
return True
|
|
15
65
|
|
|
16
66
|
|
|
17
67
|
def prove_memory_freshness(
|
|
18
|
-
memory_path: Path, after_timestamp: str, output_proof_path: Path
|
|
68
|
+
memory_path: Path, after_timestamp: str, output_proof_path: Path, mem_dir: Optional[Path] = None
|
|
19
69
|
) -> bool:
|
|
20
|
-
"""
|
|
21
|
-
|
|
70
|
+
"""
|
|
71
|
+
Prove memory was updated after date without revealing content.
|
|
72
|
+
Proof: signed file mtime (or current time) and optional public key.
|
|
73
|
+
"""
|
|
74
|
+
if not memory_path.exists() or not memory_path.is_file():
|
|
75
|
+
return False
|
|
76
|
+
if not ED25519_AVAILABLE:
|
|
77
|
+
return False
|
|
78
|
+
try:
|
|
79
|
+
stat = memory_path.stat()
|
|
80
|
+
ts = stat.st_mtime
|
|
81
|
+
from datetime import datetime, timezone
|
|
82
|
+
iso_ts = datetime.fromtimestamp(ts, tz=timezone.utc).isoformat()
|
|
83
|
+
except Exception:
|
|
84
|
+
return False
|
|
85
|
+
private_pem = load_private_key_from_env() if mem_dir is not None else None
|
|
86
|
+
if private_pem is None:
|
|
87
|
+
return False
|
|
88
|
+
try:
|
|
89
|
+
sig_hex = sign_merkle_root(iso_ts, private_pem)
|
|
90
|
+
except Exception:
|
|
91
|
+
return False
|
|
92
|
+
proof_data = {"statement_type": "freshness", "timestamp": iso_ts, "signature": sig_hex}
|
|
93
|
+
if mem_dir is not None:
|
|
94
|
+
pub_pem = load_public_key(mem_dir)
|
|
95
|
+
if pub_pem is not None:
|
|
96
|
+
proof_data["public_key_pem_b64"] = base64.b64encode(pub_pem).decode()
|
|
97
|
+
output_proof_path.parent.mkdir(parents=True, exist_ok=True)
|
|
98
|
+
output_proof_path.write_text(json.dumps(proof_data, indent=2))
|
|
99
|
+
return True
|
|
22
100
|
|
|
23
101
|
|
|
24
|
-
def verify_proof(proof_path: Path, statement_type: str, **kwargs) -> bool:
|
|
25
|
-
"""
|
|
102
|
+
def verify_proof(proof_path: Path, statement_type: str, **kwargs: Any) -> bool:
|
|
103
|
+
"""
|
|
104
|
+
Verify a proof. statement_type in ("keyword", "freshness").
|
|
105
|
+
For keyword: pass keyword=... (the keyword string).
|
|
106
|
+
For freshness: pass after_timestamp=... (ISO date string). Optional mem_dir=... for public key.
|
|
107
|
+
"""
|
|
108
|
+
if not proof_path.exists() or not proof_path.is_file():
|
|
109
|
+
return False
|
|
110
|
+
try:
|
|
111
|
+
data = json.loads(proof_path.read_text())
|
|
112
|
+
except Exception:
|
|
113
|
+
return False
|
|
114
|
+
if data.get("statement_type") != statement_type:
|
|
115
|
+
return False
|
|
116
|
+
if statement_type == "keyword":
|
|
117
|
+
keyword = kwargs.get("keyword")
|
|
118
|
+
if keyword is None:
|
|
119
|
+
return False
|
|
120
|
+
keyword_hash = hashlib.sha256(keyword.strip().lower().encode()).hexdigest()
|
|
121
|
+
if data.get("keyword_hash") != keyword_hash:
|
|
122
|
+
return False
|
|
123
|
+
root = data.get("root")
|
|
124
|
+
path_list = data.get("path")
|
|
125
|
+
if not root or path_list is None:
|
|
126
|
+
return False
|
|
127
|
+
return verify_merkle_proof(keyword_hash, path_list, root)
|
|
128
|
+
if statement_type == "freshness":
|
|
129
|
+
after_ts = kwargs.get("after_timestamp")
|
|
130
|
+
if after_ts is None:
|
|
131
|
+
return False
|
|
132
|
+
ts_str = data.get("timestamp")
|
|
133
|
+
sig_hex = data.get("signature")
|
|
134
|
+
if not ts_str or not sig_hex:
|
|
135
|
+
return False
|
|
136
|
+
pub_pem_b64 = data.get("public_key_pem_b64")
|
|
137
|
+
if pub_pem_b64:
|
|
138
|
+
try:
|
|
139
|
+
pub_pem = base64.b64decode(pub_pem_b64)
|
|
140
|
+
except Exception:
|
|
141
|
+
return False
|
|
142
|
+
else:
|
|
143
|
+
mem_dir = kwargs.get("mem_dir")
|
|
144
|
+
if mem_dir is None:
|
|
145
|
+
return False
|
|
146
|
+
pub_pem = load_public_key(Path(mem_dir))
|
|
147
|
+
if pub_pem is None:
|
|
148
|
+
return False
|
|
149
|
+
if not verify_signature(ts_str, sig_hex, pub_pem):
|
|
150
|
+
return False
|
|
151
|
+
try:
|
|
152
|
+
from datetime import datetime
|
|
153
|
+
after_dt = datetime.fromisoformat(after_ts.replace("Z", "+00:00"))
|
|
154
|
+
ts_dt = datetime.fromisoformat(ts_str.replace("Z", "+00:00"))
|
|
155
|
+
return ts_dt >= after_dt
|
|
156
|
+
except Exception:
|
|
157
|
+
return False
|
|
26
158
|
return False
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|