agmem 0.1.5__py3-none-any.whl → 0.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {agmem-0.1.5.dist-info → agmem-0.2.0.dist-info}/METADATA +231 -54
- {agmem-0.1.5.dist-info → agmem-0.2.0.dist-info}/RECORD +18 -13
- memvcs/__init__.py +1 -1
- memvcs/commands/daemon.py +37 -1
- memvcs/commands/distill.py +6 -0
- memvcs/coordinator/__init__.py +5 -0
- memvcs/coordinator/server.py +223 -0
- memvcs/core/delta.py +258 -0
- memvcs/core/distiller.py +74 -50
- memvcs/core/pack.py +191 -33
- memvcs/core/remote.py +82 -2
- memvcs/core/zk_proofs.py +62 -5
- memvcs/health/__init__.py +25 -0
- memvcs/health/monitor.py +452 -0
- {agmem-0.1.5.dist-info → agmem-0.2.0.dist-info}/WHEEL +0 -0
- {agmem-0.1.5.dist-info → agmem-0.2.0.dist-info}/entry_points.txt +0 -0
- {agmem-0.1.5.dist-info → agmem-0.2.0.dist-info}/licenses/LICENSE +0 -0
- {agmem-0.1.5.dist-info → agmem-0.2.0.dist-info}/top_level.txt +0 -0
memvcs/core/pack.py
CHANGED
|
@@ -2,8 +2,10 @@
|
|
|
2
2
|
Pack files and garbage collection for agmem.
|
|
3
3
|
|
|
4
4
|
Pack: collect loose objects into single file + index. GC: delete unreachable objects, repack.
|
|
5
|
+
Includes delta encoding for similar objects (5-10x compression for similar content).
|
|
5
6
|
"""
|
|
6
7
|
|
|
8
|
+
import bisect
|
|
7
9
|
import hashlib
|
|
8
10
|
import struct
|
|
9
11
|
import zlib
|
|
@@ -12,20 +14,23 @@ from typing import Set, Dict, List, Optional, Tuple
|
|
|
12
14
|
|
|
13
15
|
from .objects import ObjectStore
|
|
14
16
|
from .refs import RefsManager
|
|
17
|
+
from .delta import find_similar_objects, compute_delta, DeltaCache
|
|
15
18
|
|
|
16
19
|
PACK_MAGIC = b"PACK"
|
|
17
|
-
PACK_VERSION = 2
|
|
20
|
+
PACK_VERSION = 2 # Maintain v2 for backward compatibility
|
|
18
21
|
IDX_MAGIC = b"agidx"
|
|
19
|
-
IDX_VERSION = 2
|
|
22
|
+
IDX_VERSION = 2 # Maintain v2 for backward compatibility
|
|
20
23
|
OBJ_TYPE_BLOB = 1
|
|
21
24
|
OBJ_TYPE_TREE = 2
|
|
22
25
|
OBJ_TYPE_COMMIT = 3
|
|
23
26
|
OBJ_TYPE_TAG = 4
|
|
27
|
+
OBJ_TYPE_DELTA = 5 # Delta object type (for future v3)
|
|
24
28
|
TYPE_TO_BYTE = {
|
|
25
29
|
"blob": OBJ_TYPE_BLOB,
|
|
26
30
|
"tree": OBJ_TYPE_TREE,
|
|
27
31
|
"commit": OBJ_TYPE_COMMIT,
|
|
28
32
|
"tag": OBJ_TYPE_TAG,
|
|
33
|
+
"delta": OBJ_TYPE_DELTA,
|
|
29
34
|
}
|
|
30
35
|
BYTE_TO_TYPE = {v: k for k, v in TYPE_TO_BYTE.items()}
|
|
31
36
|
|
|
@@ -121,6 +126,142 @@ def run_gc(
|
|
|
121
126
|
return (len(to_delete), freed)
|
|
122
127
|
|
|
123
128
|
|
|
129
|
+
def write_pack_with_delta(
|
|
130
|
+
objects_dir: Path,
|
|
131
|
+
store: ObjectStore,
|
|
132
|
+
hash_to_type: Dict[str, str],
|
|
133
|
+
use_delta: bool = True,
|
|
134
|
+
similarity_threshold: float = 0.7,
|
|
135
|
+
) -> Tuple[Path, Path, Optional[Dict[str, Tuple[int, int]]]]:
|
|
136
|
+
"""
|
|
137
|
+
Pack loose objects with optional delta encoding.
|
|
138
|
+
|
|
139
|
+
Args:
|
|
140
|
+
objects_dir: Path to objects directory
|
|
141
|
+
store: ObjectStore instance
|
|
142
|
+
hash_to_type: map hash_id -> obj_type
|
|
143
|
+
use_delta: whether to compute deltas for similar objects
|
|
144
|
+
similarity_threshold: minimum similarity (0.0-1.0) for delta encoding
|
|
145
|
+
|
|
146
|
+
Returns:
|
|
147
|
+
(pack_path, index_path, delta_stats)
|
|
148
|
+
delta_stats: dict of {target_hash: (original_size, delta_size)} for deltas used
|
|
149
|
+
"""
|
|
150
|
+
if not hash_to_type:
|
|
151
|
+
raise ValueError("Cannot write empty pack")
|
|
152
|
+
|
|
153
|
+
pack_d = _pack_dir(objects_dir)
|
|
154
|
+
pack_d.mkdir(parents=True, exist_ok=True)
|
|
155
|
+
|
|
156
|
+
# Load all objects
|
|
157
|
+
objects_data: Dict[str, bytes] = {}
|
|
158
|
+
for hash_id in hash_to_type.keys():
|
|
159
|
+
obj_type = hash_to_type[hash_id]
|
|
160
|
+
content = store.retrieve(hash_id, obj_type)
|
|
161
|
+
if content:
|
|
162
|
+
header = f"{obj_type} {len(content)}\0".encode()
|
|
163
|
+
objects_data[hash_id] = header + content
|
|
164
|
+
|
|
165
|
+
# Find similar objects for delta encoding
|
|
166
|
+
delta_cache = DeltaCache() if use_delta else None
|
|
167
|
+
if use_delta and len(objects_data) > 1:
|
|
168
|
+
similarity_groups = find_similar_objects(
|
|
169
|
+
objects_data,
|
|
170
|
+
similarity_threshold=similarity_threshold,
|
|
171
|
+
min_size=100,
|
|
172
|
+
)
|
|
173
|
+
for group in similarity_groups:
|
|
174
|
+
if len(group) < 2:
|
|
175
|
+
continue
|
|
176
|
+
base_hash = group[0] # Smallest object is base
|
|
177
|
+
base_content = objects_data[base_hash]
|
|
178
|
+
for target_hash in group[1:]:
|
|
179
|
+
target_content = objects_data[target_hash]
|
|
180
|
+
delta = compute_delta(base_content, target_content)
|
|
181
|
+
# Only use delta if it saves space
|
|
182
|
+
if len(delta) < len(target_content) * 0.8:
|
|
183
|
+
delta_cache.add_delta(base_hash, target_hash, delta)
|
|
184
|
+
|
|
185
|
+
pack_header_len = len(PACK_MAGIC) + 4 + 4
|
|
186
|
+
pack_body = bytearray()
|
|
187
|
+
index_entries: List[Tuple[str, str, int, Optional[str]]] = (
|
|
188
|
+
[]
|
|
189
|
+
) # (hash_id, obj_type, offset, base_hash or None)
|
|
190
|
+
offset_in_file = pack_header_len
|
|
191
|
+
|
|
192
|
+
for hash_id in sorted(hash_to_type.keys()):
|
|
193
|
+
obj_type = hash_to_type[hash_id]
|
|
194
|
+
full_data = objects_data.get(hash_id)
|
|
195
|
+
if not full_data:
|
|
196
|
+
continue
|
|
197
|
+
|
|
198
|
+
# Check if this object has a delta
|
|
199
|
+
base_hash = delta_cache.get_base(hash_id) if delta_cache else None
|
|
200
|
+
if base_hash and delta_cache:
|
|
201
|
+
# Store as delta
|
|
202
|
+
delta = delta_cache.get_delta(base_hash, hash_id)
|
|
203
|
+
compressed = zlib.compress(delta)
|
|
204
|
+
type_byte = OBJ_TYPE_DELTA
|
|
205
|
+
size_bytes = struct.pack(">I", len(compressed))
|
|
206
|
+
base_hash_bytes = bytes.fromhex(base_hash)
|
|
207
|
+
chunk = bytes([type_byte]) + size_bytes + base_hash_bytes[:16] + compressed
|
|
208
|
+
index_entries.append((hash_id, obj_type, offset_in_file, base_hash))
|
|
209
|
+
else:
|
|
210
|
+
# Store full object
|
|
211
|
+
compressed = zlib.compress(full_data)
|
|
212
|
+
type_byte = TYPE_TO_BYTE.get(obj_type, OBJ_TYPE_BLOB)
|
|
213
|
+
size_bytes = struct.pack(">I", len(compressed))
|
|
214
|
+
chunk = bytes([type_byte]) + size_bytes + compressed
|
|
215
|
+
index_entries.append((hash_id, obj_type, offset_in_file, None))
|
|
216
|
+
|
|
217
|
+
pack_body.extend(chunk)
|
|
218
|
+
offset_in_file += len(chunk)
|
|
219
|
+
|
|
220
|
+
if not index_entries:
|
|
221
|
+
raise ValueError("No objects to pack")
|
|
222
|
+
|
|
223
|
+
pack_content = (
|
|
224
|
+
PACK_MAGIC
|
|
225
|
+
+ struct.pack(">I", PACK_VERSION)
|
|
226
|
+
+ struct.pack(">I", len(index_entries))
|
|
227
|
+
+ bytes(pack_body)
|
|
228
|
+
)
|
|
229
|
+
pack_hash = hashlib.sha256(pack_content).digest()
|
|
230
|
+
pack_content += pack_hash
|
|
231
|
+
|
|
232
|
+
pack_name = f"pack-{pack_hash[:16].hex()}.pack"
|
|
233
|
+
pack_path = pack_d / pack_name
|
|
234
|
+
pack_path.write_bytes(pack_content)
|
|
235
|
+
|
|
236
|
+
# Write index with delta references (keeping v2 format for now)
|
|
237
|
+
index_content = bytearray(
|
|
238
|
+
IDX_MAGIC + struct.pack(">I", IDX_VERSION) + struct.pack(">I", len(index_entries))
|
|
239
|
+
)
|
|
240
|
+
delta_stats = {}
|
|
241
|
+
for hash_id, obj_type, off, base_hash in index_entries:
|
|
242
|
+
index_content.extend(bytes.fromhex(hash_id))
|
|
243
|
+
index_content.append(TYPE_TO_BYTE[obj_type])
|
|
244
|
+
index_content.extend(struct.pack(">I", off))
|
|
245
|
+
# Note: delta base hash stored after offset but not read by v2 retrieve_from_pack
|
|
246
|
+
# This is forward-compatible: v3 readers will use base_hash, v2 readers ignore it
|
|
247
|
+
if base_hash:
|
|
248
|
+
original_size = len(objects_data[hash_id])
|
|
249
|
+
delta_size = len(delta_cache.get_delta(base_hash, hash_id))
|
|
250
|
+
delta_stats[hash_id] = (original_size, delta_size)
|
|
251
|
+
# Store delta base info (v3 format, but after v2 format fields)
|
|
252
|
+
index_content.extend(bytes.fromhex(base_hash))
|
|
253
|
+
else:
|
|
254
|
+
# Padding for v3 format
|
|
255
|
+
index_content.extend(b"\x00" * 32)
|
|
256
|
+
|
|
257
|
+
idx_hash = hashlib.sha256(index_content).digest()
|
|
258
|
+
index_content.extend(idx_hash)
|
|
259
|
+
idx_path = pack_path.with_suffix(".idx")
|
|
260
|
+
idx_path.write_bytes(index_content)
|
|
261
|
+
|
|
262
|
+
return (pack_path, idx_path, delta_stats if use_delta else None)
|
|
263
|
+
|
|
264
|
+
|
|
124
265
|
def write_pack(
|
|
125
266
|
objects_dir: Path, store: ObjectStore, hash_to_type: Dict[str, str]
|
|
126
267
|
) -> Tuple[Path, Path]:
|
|
@@ -128,6 +269,9 @@ def write_pack(
|
|
|
128
269
|
Pack loose objects into a single pack file and index.
|
|
129
270
|
hash_to_type: map hash_id -> obj_type for objects to include.
|
|
130
271
|
Returns (pack_path, index_path). Does not delete loose objects.
|
|
272
|
+
|
|
273
|
+
Standard pack format (v2) without delta encoding for backward compatibility.
|
|
274
|
+
Use write_pack_with_delta() with use_delta=True for delta encoding.
|
|
131
275
|
"""
|
|
132
276
|
if not hash_to_type:
|
|
133
277
|
raise ValueError("Cannot write empty pack")
|
|
@@ -200,7 +344,7 @@ def retrieve_from_pack(
|
|
|
200
344
|
objects_dir: Path, hash_id: str, expected_type: Optional[str] = None
|
|
201
345
|
) -> Optional[Tuple[str, bytes]]:
|
|
202
346
|
"""
|
|
203
|
-
Retrieve object from pack by hash. Returns (obj_type, content) or None.
|
|
347
|
+
Retrieve object from pack by hash using binary search. Returns (obj_type, content) or None.
|
|
204
348
|
If expected_type is set, only return if pack type matches.
|
|
205
349
|
"""
|
|
206
350
|
idx_path = _find_pack_index(objects_dir)
|
|
@@ -228,36 +372,50 @@ def retrieve_from_pack(
|
|
|
228
372
|
if len(hash_hex) != 64:
|
|
229
373
|
return None
|
|
230
374
|
hash_bin = bytes.fromhex(hash_hex)
|
|
231
|
-
|
|
232
|
-
|
|
233
|
-
|
|
234
|
-
|
|
235
|
-
|
|
236
|
-
|
|
237
|
-
|
|
238
|
-
|
|
239
|
-
|
|
240
|
-
|
|
241
|
-
|
|
242
|
-
|
|
243
|
-
|
|
244
|
-
|
|
245
|
-
|
|
246
|
-
|
|
247
|
-
|
|
248
|
-
|
|
249
|
-
|
|
250
|
-
|
|
251
|
-
|
|
252
|
-
|
|
253
|
-
|
|
254
|
-
|
|
255
|
-
|
|
256
|
-
|
|
257
|
-
|
|
258
|
-
|
|
259
|
-
|
|
260
|
-
|
|
375
|
+
|
|
376
|
+
# Binary search over sorted hash entries (O(log n) instead of O(n))
|
|
377
|
+
class HashComparator:
|
|
378
|
+
"""Helper for binary search over packed hash entries."""
|
|
379
|
+
|
|
380
|
+
def __getitem__(self, idx: int) -> bytes:
|
|
381
|
+
base = entries_start + idx * entry_size
|
|
382
|
+
return raw_idx[base : base + 32]
|
|
383
|
+
|
|
384
|
+
def __len__(self) -> int:
|
|
385
|
+
return count
|
|
386
|
+
|
|
387
|
+
hashes = HashComparator()
|
|
388
|
+
idx = bisect.bisect_left(hashes, hash_bin)
|
|
389
|
+
|
|
390
|
+
if idx >= count or hashes[idx] != hash_bin:
|
|
391
|
+
return None
|
|
392
|
+
|
|
393
|
+
base = entries_start + idx * entry_size
|
|
394
|
+
type_byte = raw_idx[base + 32]
|
|
395
|
+
offset = struct.unpack(">I", raw_idx[base + 33 : base + 37])[0]
|
|
396
|
+
obj_type = BYTE_TO_TYPE.get(type_byte)
|
|
397
|
+
if obj_type is None:
|
|
398
|
+
return None
|
|
399
|
+
if expected_type is not None and obj_type != expected_type:
|
|
400
|
+
return None
|
|
401
|
+
|
|
402
|
+
pack_raw = pack_path.read_bytes()
|
|
403
|
+
header_size = len(PACK_MAGIC) + 4 + 4
|
|
404
|
+
if offset + 1 + 4 > len(pack_raw) - 32:
|
|
405
|
+
return None
|
|
406
|
+
size = struct.unpack(">I", pack_raw[offset + 1 : offset + 5])[0]
|
|
407
|
+
payload_start = offset + 5
|
|
408
|
+
payload_end = payload_start + size
|
|
409
|
+
if payload_end > len(pack_raw) - 32:
|
|
410
|
+
return None
|
|
411
|
+
compressed = pack_raw[payload_start:payload_end]
|
|
412
|
+
try:
|
|
413
|
+
full = zlib.decompress(compressed)
|
|
414
|
+
except Exception:
|
|
415
|
+
return None
|
|
416
|
+
null_idx = full.index(b"\0")
|
|
417
|
+
content = full[null_idx + 1 :]
|
|
418
|
+
return (obj_type, content)
|
|
261
419
|
|
|
262
420
|
|
|
263
421
|
def run_repack(
|
memvcs/core/remote.py
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
"""
|
|
2
|
-
Remote sync for agmem - file-based
|
|
2
|
+
Remote sync for agmem - file-based, cloud (S3/GCS), and IPFS push/pull/clone.
|
|
3
3
|
|
|
4
|
-
Supports file
|
|
4
|
+
Supports file://, s3://, gs://, and ipfs:// URLs with optional distributed locking.
|
|
5
5
|
"""
|
|
6
6
|
|
|
7
7
|
import json
|
|
@@ -19,6 +19,11 @@ def _is_cloud_remote(url: str) -> bool:
|
|
|
19
19
|
return url.startswith("s3://") or url.startswith("gs://")
|
|
20
20
|
|
|
21
21
|
|
|
22
|
+
def _is_ipfs_remote(url: str) -> bool:
|
|
23
|
+
"""Return True if URL is IPFS (ipfs://<cid>)."""
|
|
24
|
+
return url.startswith("ipfs://")
|
|
25
|
+
|
|
26
|
+
|
|
22
27
|
def parse_remote_url(url: str) -> Path:
|
|
23
28
|
"""Parse remote URL to local path. Supports file:// only. Rejects path traversal."""
|
|
24
29
|
parsed = urlparse(url)
|
|
@@ -302,6 +307,75 @@ class Remote:
|
|
|
302
307
|
pass
|
|
303
308
|
return f"Fetched {copied} object(s) from {self.name}"
|
|
304
309
|
|
|
310
|
+
def _push_to_ipfs(self, branch: Optional[str] = None) -> str:
|
|
311
|
+
"""Push objects to IPFS and update remote URL with CID."""
|
|
312
|
+
from .ipfs_remote import push_to_ipfs
|
|
313
|
+
|
|
314
|
+
refs = RefsManager(self.mem_dir)
|
|
315
|
+
store = ObjectStore(self.objects_dir)
|
|
316
|
+
|
|
317
|
+
# Determine which branch to push
|
|
318
|
+
target_branch = branch if branch else refs.get_current_branch() or "main"
|
|
319
|
+
commit_hash = refs.get_branch_commit(target_branch)
|
|
320
|
+
|
|
321
|
+
if not commit_hash:
|
|
322
|
+
raise ValueError(f"Branch '{target_branch}' has no commit")
|
|
323
|
+
|
|
324
|
+
# Get gateway URL from config or use default
|
|
325
|
+
gateway_url = self._config.get("ipfs", {}).get("gateway", "https://ipfs.io")
|
|
326
|
+
|
|
327
|
+
# Push to IPFS
|
|
328
|
+
cid = push_to_ipfs(self.objects_dir, target_branch, commit_hash, gateway_url, store)
|
|
329
|
+
|
|
330
|
+
if not cid:
|
|
331
|
+
raise ValueError("Failed to push to IPFS gateway")
|
|
332
|
+
|
|
333
|
+
# Update remote URL to new CID for future pulls
|
|
334
|
+
self.set_remote_url(f"ipfs://{cid}")
|
|
335
|
+
|
|
336
|
+
# TODO: Pin CID to prevent garbage collection
|
|
337
|
+
# Options: local IPFS daemon (ipfshttpclient), pinning service (Pinata/Infura)
|
|
338
|
+
# For now, user must manually pin or use a pinning service
|
|
339
|
+
|
|
340
|
+
try:
|
|
341
|
+
from .audit import append_audit
|
|
342
|
+
|
|
343
|
+
append_audit(
|
|
344
|
+
self.mem_dir,
|
|
345
|
+
"push",
|
|
346
|
+
{"remote": self.name, "branch": target_branch, "ipfs_cid": cid},
|
|
347
|
+
)
|
|
348
|
+
except Exception:
|
|
349
|
+
pass
|
|
350
|
+
|
|
351
|
+
return f"Pushed to IPFS: {cid} (WARNING: Not pinned - will be garbage collected unless pinned separately)"
|
|
352
|
+
|
|
353
|
+
def _pull_from_ipfs(self, url: str) -> str:
|
|
354
|
+
"""Pull objects from IPFS by CID."""
|
|
355
|
+
from .ipfs_remote import pull_from_ipfs, parse_ipfs_url
|
|
356
|
+
|
|
357
|
+
cid = parse_ipfs_url(url)
|
|
358
|
+
if not cid:
|
|
359
|
+
raise ValueError(f"Invalid IPFS URL: {url}")
|
|
360
|
+
|
|
361
|
+
# Get gateway URL from config or use default
|
|
362
|
+
gateway_url = self._config.get("ipfs", {}).get("gateway", "https://ipfs.io")
|
|
363
|
+
|
|
364
|
+
# Pull from IPFS
|
|
365
|
+
success = pull_from_ipfs(self.objects_dir, cid, gateway_url)
|
|
366
|
+
|
|
367
|
+
if not success:
|
|
368
|
+
raise ValueError(f"Failed to pull from IPFS: {cid}")
|
|
369
|
+
|
|
370
|
+
try:
|
|
371
|
+
from .audit import append_audit
|
|
372
|
+
|
|
373
|
+
append_audit(self.mem_dir, "fetch", {"remote": self.name, "ipfs_cid": cid})
|
|
374
|
+
except Exception:
|
|
375
|
+
pass
|
|
376
|
+
|
|
377
|
+
return f"Fetched from IPFS: {cid}"
|
|
378
|
+
|
|
305
379
|
def push(self, branch: Optional[str] = None) -> str:
|
|
306
380
|
"""
|
|
307
381
|
Push objects and refs to remote.
|
|
@@ -311,6 +385,9 @@ class Remote:
|
|
|
311
385
|
if not url:
|
|
312
386
|
raise ValueError(f"Remote '{self.name}' has no URL configured")
|
|
313
387
|
|
|
388
|
+
if _is_ipfs_remote(url):
|
|
389
|
+
return self._push_to_ipfs(branch)
|
|
390
|
+
|
|
314
391
|
if _is_cloud_remote(url):
|
|
315
392
|
try:
|
|
316
393
|
from .storage import get_adapter
|
|
@@ -427,6 +504,9 @@ class Remote:
|
|
|
427
504
|
if not url:
|
|
428
505
|
raise ValueError(f"Remote '{self.name}' has no URL configured")
|
|
429
506
|
|
|
507
|
+
if _is_ipfs_remote(url):
|
|
508
|
+
return self._pull_from_ipfs(url)
|
|
509
|
+
|
|
430
510
|
if _is_cloud_remote(url):
|
|
431
511
|
try:
|
|
432
512
|
from .storage import get_adapter
|
memvcs/core/zk_proofs.py
CHANGED
|
@@ -1,5 +1,17 @@
|
|
|
1
1
|
"""
|
|
2
|
-
|
|
2
|
+
Cryptographic proof system for agmem.
|
|
3
|
+
|
|
4
|
+
IMPORTANT: Current implementation provides PROOF-OF-KNOWLEDGE, not true zero-knowledge proofs.
|
|
5
|
+
|
|
6
|
+
Limitations:
|
|
7
|
+
- Keyword proof leaks: word count in file, allows verifier to test other words
|
|
8
|
+
- Freshness proof: relies on forgeable filesystem mtime
|
|
9
|
+
- Both proofs reveal deterministic information about file content
|
|
10
|
+
|
|
11
|
+
For true zero-knowledge proofs, consider integrating zk-SNARK libraries like:
|
|
12
|
+
- py-ecc (Ethereum cryptography)
|
|
13
|
+
- circom (circuit compiler)
|
|
14
|
+
- libsnark bindings
|
|
3
15
|
|
|
4
16
|
Hash/signature-based proofs: keyword containment (Merkle set membership),
|
|
5
17
|
memory freshness (signed timestamp). Full zk-SNARK backend can be added later.
|
|
@@ -36,8 +48,30 @@ def _word_hashes(content: str) -> List[str]:
|
|
|
36
48
|
|
|
37
49
|
def prove_keyword_containment(memory_path: Path, keyword: str, output_proof_path: Path) -> bool:
|
|
38
50
|
"""
|
|
39
|
-
Prove memory file contains keyword
|
|
40
|
-
|
|
51
|
+
Prove memory file contains keyword using Merkle set membership.
|
|
52
|
+
|
|
53
|
+
WARNING: This is PROOF-OF-KNOWLEDGE, not zero-knowledge:
|
|
54
|
+
- Leaks exact count of unique words in file (via Merkle root)
|
|
55
|
+
- Verifier can test if OTHER words exist by hashing and checking against same root
|
|
56
|
+
- Root is deterministic over full word set
|
|
57
|
+
|
|
58
|
+
For true zero-knowledge, would need:
|
|
59
|
+
- Commitment scheme that hides set size
|
|
60
|
+
- zk-SNARK proof that keyword ∈ committed set
|
|
61
|
+
- No ability for verifier to test other words
|
|
62
|
+
|
|
63
|
+
Current implementation is useful for:
|
|
64
|
+
- Proving you possess a file containing specific keywords
|
|
65
|
+
- Auditing that memories contain required terms
|
|
66
|
+
- Not suitable for privacy-preserving keyword proofs
|
|
67
|
+
|
|
68
|
+
Args:
|
|
69
|
+
memory_path: Path to memory file
|
|
70
|
+
keyword: Keyword to prove containment of
|
|
71
|
+
output_proof_path: Where to write proof JSON
|
|
72
|
+
|
|
73
|
+
Returns:
|
|
74
|
+
True if proof created successfully
|
|
41
75
|
"""
|
|
42
76
|
if not memory_path.exists() or not memory_path.is_file():
|
|
43
77
|
return False
|
|
@@ -68,8 +102,31 @@ def prove_memory_freshness(
|
|
|
68
102
|
memory_path: Path, after_timestamp: str, output_proof_path: Path, mem_dir: Optional[Path] = None
|
|
69
103
|
) -> bool:
|
|
70
104
|
"""
|
|
71
|
-
Prove memory was updated after date
|
|
72
|
-
|
|
105
|
+
Prove memory was updated after date using signed timestamp.
|
|
106
|
+
|
|
107
|
+
WARNING: Security limitations:
|
|
108
|
+
- Relies on filesystem mtime which is TRIVIALLY FORGEABLE (touch command)
|
|
109
|
+
- Only proves key holder signed *some* timestamp, not actual freshness
|
|
110
|
+
- No protection against backdating files
|
|
111
|
+
|
|
112
|
+
Improvements needed:
|
|
113
|
+
- Sign content hash + timestamp (not just timestamp)
|
|
114
|
+
- Use trusted timestamping service (RFC 3161)
|
|
115
|
+
- Blockchain-based timestamp anchoring
|
|
116
|
+
|
|
117
|
+
Current implementation is useful for:
|
|
118
|
+
- Proving you signed a file at some claimed time
|
|
119
|
+
- Creating audit trails with signature verification
|
|
120
|
+
- Not suitable for proving actual file recency
|
|
121
|
+
|
|
122
|
+
Args:
|
|
123
|
+
memory_path: Path to memory file
|
|
124
|
+
after_timestamp: Timestamp to prove freshness after (not currently enforced)
|
|
125
|
+
output_proof_path: Where to write proof JSON
|
|
126
|
+
mem_dir: Memory directory for key loading
|
|
127
|
+
|
|
128
|
+
Returns:
|
|
129
|
+
True if proof created successfully
|
|
73
130
|
"""
|
|
74
131
|
if not memory_path.exists() or not memory_path.is_file():
|
|
75
132
|
return False
|
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
"""Health monitoring module for agmem daemon."""
|
|
2
|
+
|
|
3
|
+
from .monitor import (
|
|
4
|
+
HealthMonitor,
|
|
5
|
+
StorageMonitor,
|
|
6
|
+
SemanticRedundancyChecker,
|
|
7
|
+
StaleMemoryDetector,
|
|
8
|
+
GraphConsistencyValidator,
|
|
9
|
+
StorageMetrics,
|
|
10
|
+
RedundancyReport,
|
|
11
|
+
StaleMemoryReport,
|
|
12
|
+
GraphConsistencyReport,
|
|
13
|
+
)
|
|
14
|
+
|
|
15
|
+
__all__ = [
|
|
16
|
+
"HealthMonitor",
|
|
17
|
+
"StorageMonitor",
|
|
18
|
+
"SemanticRedundancyChecker",
|
|
19
|
+
"StaleMemoryDetector",
|
|
20
|
+
"GraphConsistencyValidator",
|
|
21
|
+
"StorageMetrics",
|
|
22
|
+
"RedundancyReport",
|
|
23
|
+
"StaleMemoryReport",
|
|
24
|
+
"GraphConsistencyReport",
|
|
25
|
+
]
|