PyPI - agmem - Versions diffs - 0.1.3__py3-none-any.whl → 0.1.5__py3-none-any.whl - Mend

agmem 0.1.3py3-none-any.whl → 0.1.5py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (25) hide show

{agmem-0.1.3.dist-info → agmem-0.1.5.dist-info}/METADATA +24 -18
{agmem-0.1.3.dist-info → agmem-0.1.5.dist-info}/RECORD +25 -24
memvcs/commands/daemon.py +21 -3
memvcs/commands/distill.py +10 -2
memvcs/commands/federated.py +7 -1
memvcs/commands/garden.py +10 -2
memvcs/commands/gc.py +18 -1
memvcs/commands/prove.py +4 -2
memvcs/commands/timeline.py +28 -0
memvcs/commands/when.py +28 -0
memvcs/core/compression_pipeline.py +165 -0
memvcs/core/crypto_verify.py +12 -1
memvcs/core/distiller.py +70 -4
memvcs/core/federated.py +80 -9
memvcs/core/gardener.py +80 -5
memvcs/core/ipfs_remote.py +168 -8
memvcs/core/knowledge_graph.py +79 -6
memvcs/core/objects.py +33 -21
memvcs/core/pack.py +201 -1
memvcs/core/remote.py +200 -3
memvcs/core/zk_proofs.py +145 -11
{agmem-0.1.3.dist-info → agmem-0.1.5.dist-info}/WHEEL +0 -0
{agmem-0.1.3.dist-info → agmem-0.1.5.dist-info}/entry_points.txt +0 -0
{agmem-0.1.3.dist-info → agmem-0.1.5.dist-info}/licenses/LICENSE +0 -0
{agmem-0.1.3.dist-info → agmem-0.1.5.dist-info}/top_level.txt +0 -0

memvcs/core/objects.py CHANGED Viewed

@@ -83,7 +83,7 @@ class ObjectStore:
     def retrieve(self, hash_id: str, obj_type: str) -> Optional[bytes]:
         """
-        Retrieve content by hash ID.
+        Retrieve content by hash ID (loose object or pack).
         Args:
             hash_id: SHA-256 hash of the object
@@ -94,31 +94,43 @@ class ObjectStore:
         """
         obj_path = self._get_object_path(hash_id, obj_type)
-        if not obj_path.exists():
-            return None
-        raw = obj_path.read_bytes()
-        # Optionally decrypt (iv+tag minimum 12+16 bytes)
-        if self._encryptor and len(raw) >= 12 + 16:
-            try:
-                raw = self._encryptor.decrypt_payload(raw)
-            except Exception:
-                pass  # legacy plain compressed
-        full_content = zlib.decompress(raw)
-        # Parse header
-        null_idx = full_content.index(b"\0")
-        header = full_content[:null_idx].decode()
-        content = full_content[null_idx + 1 :]
-        return content
+        if obj_path.exists():
+            raw = obj_path.read_bytes()
+            # Optionally decrypt (iv+tag minimum 12+16 bytes)
+            if self._encryptor and len(raw) >= 12 + 16:
+                try:
+                    raw = self._encryptor.decrypt_payload(raw)
+                except Exception:
+                    pass  # legacy plain compressed
+            full_content = zlib.decompress(raw)
+            null_idx = full_content.index(b"\0")
+            content = full_content[null_idx + 1 :]
+            return content
+        # Try pack file when loose object missing
+        try:
+            from .pack import retrieve_from_pack
+            result = retrieve_from_pack(self.objects_dir, hash_id, expected_type=obj_type)
+            if result is not None:
+                return result[1]
+        except Exception:
+            pass
+        return None
     def exists(self, hash_id: str, obj_type: str) -> bool:
-        """Check if an object exists. Returns False for invalid hash (no raise)."""
+        """Check if an object exists (loose or pack). Returns False for invalid hash (no raise)."""
         if not _valid_object_hash(hash_id):
             return False
         obj_path = self._get_object_path(hash_id, obj_type)
-        return obj_path.exists()
+        if obj_path.exists():
+            return True
+        try:
+            from .pack import retrieve_from_pack
+            return retrieve_from_pack(self.objects_dir, hash_id, expected_type=obj_type) is not None
+        except Exception:
+            return False
     def delete(self, hash_id: str, obj_type: str) -> bool:
         """Delete an object. Returns True if deleted, False if not found."""

memvcs/core/pack.py CHANGED Viewed

@@ -4,7 +4,8 @@ Pack files and garbage collection for agmem.
 Pack: collect loose objects into single file + index. GC: delete unreachable objects, repack.
 """
-import json
+import hashlib
+import struct
 import zlib
 from pathlib import Path
 from typing import Set, Dict, List, Optional, Tuple
@@ -12,11 +13,39 @@ from typing import Set, Dict, List, Optional, Tuple
 from .objects import ObjectStore
 from .refs import RefsManager
+PACK_MAGIC = b"PACK"
+PACK_VERSION = 2
+IDX_MAGIC = b"agidx"
+IDX_VERSION = 2
+OBJ_TYPE_BLOB = 1
+OBJ_TYPE_TREE = 2
+OBJ_TYPE_COMMIT = 3
+OBJ_TYPE_TAG = 4
+TYPE_TO_BYTE = {
+    "blob": OBJ_TYPE_BLOB,
+    "tree": OBJ_TYPE_TREE,
+    "commit": OBJ_TYPE_COMMIT,
+    "tag": OBJ_TYPE_TAG,
+}
+BYTE_TO_TYPE = {v: k for k, v in TYPE_TO_BYTE.items()}
 def _pack_dir(objects_dir: Path) -> Path:
     return objects_dir / "pack"
+def _get_loose_object_type(objects_dir: Path, hash_id: str) -> Optional[str]:
+    """Return obj_type for a loose object, or None if not found."""
+    if len(hash_id) < 4:
+        return None
+    prefix, suffix = hash_id[:2], hash_id[2:]
+    for obj_type in ["blob", "tree", "commit", "tag"]:
+        p = objects_dir / obj_type / prefix / suffix
+        if p.exists():
+            return obj_type
+    return None
 def list_loose_objects(objects_dir: Path) -> Set[str]:
     """List all loose object hashes (blob, tree, commit, tag)."""
     hashes = set()
@@ -90,3 +119,174 @@ def run_gc(
                     freed += p.stat().st_size
                 break
     return (len(to_delete), freed)
+def write_pack(
+    objects_dir: Path, store: ObjectStore, hash_to_type: Dict[str, str]
+) -> Tuple[Path, Path]:
+    """
+    Pack loose objects into a single pack file and index.
+    hash_to_type: map hash_id -> obj_type for objects to include.
+    Returns (pack_path, index_path). Does not delete loose objects.
+    """
+    if not hash_to_type:
+        raise ValueError("Cannot write empty pack")
+    pack_d = _pack_dir(objects_dir)
+    pack_d.mkdir(parents=True, exist_ok=True)
+    pack_header_len = len(PACK_MAGIC) + 4 + 4
+    pack_body = bytearray()
+    index_entries: List[Tuple[str, str, int]] = []  # (hash_id, obj_type, offset_in_file)
+    offset_in_file = pack_header_len
+    for hash_id in sorted(hash_to_type.keys()):
+        obj_type = hash_to_type[hash_id]
+        content = store.retrieve(hash_id, obj_type)
+        if content is None:
+            continue
+        header = f"{obj_type} {len(content)}\0".encode()
+        full = header + content
+        compressed = zlib.compress(full)
+        type_byte = TYPE_TO_BYTE.get(obj_type, OBJ_TYPE_BLOB)
+        size_bytes = struct.pack(">I", len(compressed))
+        chunk = bytes([type_byte]) + size_bytes + compressed
+        pack_body.extend(chunk)
+        index_entries.append((hash_id, obj_type, offset_in_file))
+        offset_in_file += len(chunk)
+    if not index_entries:
+        raise ValueError("No objects to pack")
+    pack_content = (
+        PACK_MAGIC
+        + struct.pack(">I", PACK_VERSION)
+        + struct.pack(">I", len(index_entries))
+        + bytes(pack_body)
+    )
+    pack_hash = hashlib.sha256(pack_content).digest()
+    pack_content += pack_hash
+    pack_name = f"pack-{pack_hash[:16].hex()}.pack"
+    pack_path = pack_d / pack_name
+    pack_path.write_bytes(pack_content)
+    index_content = bytearray(
+        IDX_MAGIC + struct.pack(">I", IDX_VERSION) + struct.pack(">I", len(index_entries))
+    )
+    for hash_id, obj_type, off in index_entries:
+        index_content.extend(bytes.fromhex(hash_id))
+        index_content.append(TYPE_TO_BYTE[obj_type])
+        index_content.extend(struct.pack(">I", off))
+    idx_hash = hashlib.sha256(index_content).digest()
+    index_content.extend(idx_hash)
+    idx_path = pack_path.with_suffix(".idx")
+    idx_path.write_bytes(index_content)
+    return (pack_path, idx_path)
+def _find_pack_index(objects_dir: Path) -> Optional[Path]:
+    """Return path to first .idx file in objects/pack, or None."""
+    pack_d = _pack_dir(objects_dir)
+    if not pack_d.exists():
+        return None
+    for p in pack_d.iterdir():
+        if p.suffix == ".idx":
+            return p
+    return None
+def retrieve_from_pack(
+    objects_dir: Path, hash_id: str, expected_type: Optional[str] = None
+) -> Optional[Tuple[str, bytes]]:
+    """
+    Retrieve object from pack by hash. Returns (obj_type, content) or None.
+    If expected_type is set, only return if pack type matches.
+    """
+    idx_path = _find_pack_index(objects_dir)
+    if idx_path is None:
+        return None
+    pack_path = idx_path.with_suffix(".pack")
+    if not pack_path.exists():
+        return None
+    raw_idx = idx_path.read_bytes()
+    if len(raw_idx) < len(IDX_MAGIC) + 4 + 4 + 32 + 1 + 4 + 32:
+        return None
+    if raw_idx[: len(IDX_MAGIC)] != IDX_MAGIC:
+        return None
+    version = struct.unpack(">I", raw_idx[len(IDX_MAGIC) : len(IDX_MAGIC) + 4])[0]
+    if version != IDX_VERSION:
+        return None
+    count = struct.unpack(">I", raw_idx[len(IDX_MAGIC) + 4 : len(IDX_MAGIC) + 8])[0]
+    entry_size = 32 + 1 + 4
+    entries_start = len(IDX_MAGIC) + 8
+    entries_end = entries_start + count * entry_size
+    if entries_end + 32 > len(raw_idx):
+        return None
+    hash_hex = hash_id
+    if len(hash_hex) != 64:
+        return None
+    hash_bin = bytes.fromhex(hash_hex)
+    for i in range(count):
+        base = entries_start + i * entry_size
+        entry_hash = raw_idx[base : base + 32]
+        if entry_hash != hash_bin:
+            continue
+        type_byte = raw_idx[base + 32]
+        offset = struct.unpack(">I", raw_idx[base + 33 : base + 37])[0]
+        obj_type = BYTE_TO_TYPE.get(type_byte)
+        if obj_type is None:
+            continue
+        if expected_type is not None and obj_type != expected_type:
+            return None
+        pack_raw = pack_path.read_bytes()
+        header_size = len(PACK_MAGIC) + 4 + 4
+        if offset + 1 + 4 > len(pack_raw) - 32:
+            return None
+        size = struct.unpack(">I", pack_raw[offset + 1 : offset + 5])[0]
+        payload_start = offset + 5
+        payload_end = payload_start + size
+        if payload_end > len(pack_raw) - 32:
+            return None
+        compressed = pack_raw[payload_start:payload_end]
+        try:
+            full = zlib.decompress(compressed)
+        except Exception:
+            return None
+        null_idx = full.index(b"\0")
+        content = full[null_idx + 1 :]
+        return (obj_type, content)
+    return None
+def run_repack(
+    mem_dir: Path, store: ObjectStore, gc_prune_days: int = 90, dry_run: bool = False
+) -> Tuple[int, int]:
+    """
+    After GC: pack all reachable loose objects into a pack file, then delete those loose objects.
+    Returns (objects_packed, bytes_freed_from_loose).
+    """
+    objects_dir = mem_dir / "objects"
+    reachable = reachable_from_refs(mem_dir, store, gc_prune_days)
+    loose = list_loose_objects(objects_dir)
+    to_pack = reachable & loose
+    if not to_pack:
+        return (0, 0)
+    hash_to_type: Dict[str, str] = {}
+    for hash_id in to_pack:
+        obj_type = _get_loose_object_type(objects_dir, hash_id)
+        if obj_type:
+            hash_to_type[hash_id] = obj_type
+    if not hash_to_type:
+        return (0, 0)
+    if dry_run:
+        return (len(hash_to_type), 0)
+    write_pack(objects_dir, store, hash_to_type)
+    freed = 0
+    for hash_id, obj_type in hash_to_type.items():
+        p = store.objects_dir / obj_type / hash_id[:2] / hash_id[2:]
+        if p.exists():
+            freed += p.stat().st_size
+            p.unlink()
+    return (len(hash_to_type), freed)

memvcs/core/remote.py CHANGED Viewed

@@ -1,19 +1,24 @@
 """
-Remote sync for agmem - file-based push/pull/clone.
+Remote sync for agmem - file-based and cloud (S3/GCS) push/pull/clone.
-Supports file:// URLs for local or mounted directories.
+Supports file:// URLs and s3:///gs:// with optional distributed locking.
 """
 import json
 import shutil
 from pathlib import Path
-from typing import Optional, Set
+from typing import Optional, Set, Any
 from urllib.parse import urlparse
 from .objects import ObjectStore, Commit, Tree, Blob, _valid_object_hash
 from .refs import RefsManager, _ref_path_under_root
+def _is_cloud_remote(url: str) -> bool:
+    """Return True if URL is S3 or GCS (use storage adapter + optional lock)."""
+    return url.startswith("s3://") or url.startswith("gs://")
 def parse_remote_url(url: str) -> Path:
     """Parse remote URL to local path. Supports file:// only. Rejects path traversal."""
     parsed = urlparse(url)
@@ -62,6 +67,51 @@ def _collect_objects_from_commit(store: ObjectStore, commit_hash: str) -> Set[st
     return seen
+def _read_object_from_adapter(adapter: Any, hash_id: str) -> Optional[tuple]:
+    """Read object from storage adapter. Returns (obj_type, content_bytes) or None."""
+    import zlib
+    for obj_type in ["commit", "tree", "blob", "tag"]:
+        rel = f".mem/objects/{obj_type}/{hash_id[:2]}/{hash_id[2:]}"
+        if not adapter.exists(rel):
+            continue
+        try:
+            raw = adapter.read_file(rel)
+            full = zlib.decompress(raw)
+            null_idx = full.index(b"\0")
+            content = full[null_idx + 1 :]
+            return (obj_type, content)
+        except Exception:
+            continue
+    return None
+def _collect_objects_from_commit_remote(adapter: Any, commit_hash: str) -> Set[str]:
+    """Collect object hashes reachable from a commit when reading from storage adapter."""
+    seen = set()
+    todo = [commit_hash]
+    while todo:
+        h = todo.pop()
+        if h in seen:
+            continue
+        seen.add(h)
+        pair = _read_object_from_adapter(adapter, h)
+        if pair is None:
+            continue
+        obj_type, content = pair
+        if obj_type == "commit":
+            data = json.loads(content)
+            todo.extend(data.get("parents", []))
+            if "tree" in data:
+                todo.append(data["tree"])
+        elif obj_type == "tree":
+            data = json.loads(content)
+            for e in data.get("entries", []):
+                if "hash" in e:
+                    todo.append(e["hash"])
+    return seen
 def _list_local_objects(objects_dir: Path) -> Set[str]:
     """List all object hashes in a .mem/objects directory."""
     hashes = set()
@@ -139,6 +189,119 @@ class Remote:
         self._config["remotes"][self.name]["url"] = url
         self._save_config(self._config)
+    def _push_via_storage(self, adapter: Any, branch: Optional[str] = None) -> str:
+        """Push objects and refs via storage adapter. Caller must hold lock if needed."""
+        refs = RefsManager(self.mem_dir)
+        store = ObjectStore(self.objects_dir)
+        to_push = set()
+        for b in refs.list_branches():
+            if branch and b != branch:
+                continue
+            ch = refs.get_branch_commit(b)
+            if ch:
+                to_push.update(_collect_objects_from_commit(store, ch))
+        for t in refs.list_tags():
+            ch = refs.get_tag_commit(t)
+            if ch:
+                to_push.update(_collect_objects_from_commit(store, ch))
+        copied = 0
+        for h in to_push:
+            obj_type = None
+            for otype in ["blob", "tree", "commit", "tag"]:
+                p = self.objects_dir / otype / h[:2] / h[2:]
+                if p.exists():
+                    obj_type = otype
+                    break
+            if not obj_type:
+                continue
+            rel = f".mem/objects/{obj_type}/{h[:2]}/{h[2:]}"
+            if not adapter.exists(rel):
+                try:
+                    data = p.read_bytes()
+                    adapter.makedirs(f".mem/objects/{obj_type}/{h[:2]}")
+                    adapter.write_file(rel, data)
+                    copied += 1
+                except Exception:
+                    pass
+        for b in refs.list_branches():
+            if branch and b != branch:
+                continue
+            ch = refs.get_branch_commit(b)
+            if ch and _ref_path_under_root(b, refs.heads_dir):
+                parent = str(Path(b).parent)
+                if parent != ".":
+                    adapter.makedirs(f".mem/refs/heads/{parent}")
+                adapter.write_file(f".mem/refs/heads/{b}", (ch + "\n").encode())
+        for t in refs.list_tags():
+            ch = refs.get_tag_commit(t)
+            if ch and _ref_path_under_root(t, refs.tags_dir):
+                parent = str(Path(t).parent)
+                if parent != ".":
+                    adapter.makedirs(f".mem/refs/tags/{parent}")
+                adapter.write_file(f".mem/refs/tags/{t}", (ch + "\n").encode())
+        try:
+            from .audit import append_audit
+            append_audit(
+                self.mem_dir, "push", {"remote": self.name, "branch": branch, "copied": copied}
+            )
+        except Exception:
+            pass
+        return f"Pushed {copied} object(s) to {self.name}"
+    def _fetch_via_storage(self, adapter: Any, branch: Optional[str] = None) -> str:
+        """Fetch objects and refs via storage adapter. Caller must hold lock if needed."""
+        to_fetch = set()
+        try:
+            heads = adapter.list_dir(".mem/refs/heads")
+            for fi in heads:
+                if fi.is_dir:
+                    continue
+                branch_name = fi.path.replace(".mem/refs/heads/", "").replace("\\", "/").strip("/")
+                if branch and branch_name != branch:
+                    continue
+                data = adapter.read_file(fi.path)
+                ch = data.decode().strip()
+                if ch and _valid_object_hash(ch):
+                    to_fetch.update(_collect_objects_from_commit_remote(adapter, ch))
+            tags = adapter.list_dir(".mem/refs/tags")
+            for fi in tags:
+                if fi.is_dir:
+                    continue
+                data = adapter.read_file(fi.path)
+                ch = data.decode().strip()
+                if ch and _valid_object_hash(ch):
+                    to_fetch.update(_collect_objects_from_commit_remote(adapter, ch))
+        except Exception:
+            pass
+        if not to_fetch:
+            return f"Fetched 0 object(s) from {self.name}"
+        local_has = _list_local_objects(self.objects_dir)
+        missing = to_fetch - local_has
+        copied = 0
+        for h in missing:
+            for otype in ["blob", "tree", "commit", "tag"]:
+                rel = f".mem/objects/{otype}/{h[:2]}/{h[2:]}"
+                if adapter.exists(rel):
+                    try:
+                        data = adapter.read_file(rel)
+                        p = self.objects_dir / otype / h[:2] / h[2:]
+                        p.parent.mkdir(parents=True, exist_ok=True)
+                        p.write_bytes(data)
+                        copied += 1
+                    except Exception:
+                        pass
+                    break
+        try:
+            from .audit import append_audit
+            append_audit(
+                self.mem_dir, "fetch", {"remote": self.name, "branch": branch, "copied": copied}
+            )
+        except Exception:
+            pass
+        return f"Fetched {copied} object(s) from {self.name}"
     def push(self, branch: Optional[str] = None) -> str:
         """
         Push objects and refs to remote.
@@ -148,6 +311,23 @@ class Remote:
         if not url:
             raise ValueError(f"Remote '{self.name}' has no URL configured")
+        if _is_cloud_remote(url):
+            try:
+                from .storage import get_adapter
+                from .storage.base import LockError
+                adapter = get_adapter(url, self._config)
+                lock_name = "agmem-push"
+                adapter.acquire_lock(lock_name, 30)
+                try:
+                    return self._push_via_storage(adapter, branch)
+                finally:
+                    adapter.release_lock(lock_name)
+            except LockError as e:
+                raise ValueError(f"Could not acquire remote lock: {e}") from e
+            except Exception as e:
+                raise ValueError(f"Push to cloud failed: {e}") from e
         remote_path = parse_remote_url(url)
         remote_mem = remote_path / ".mem"
         remote_objects = remote_mem / "objects"
@@ -247,6 +427,23 @@ class Remote:
         if not url:
             raise ValueError(f"Remote '{self.name}' has no URL configured")
+        if _is_cloud_remote(url):
+            try:
+                from .storage import get_adapter
+                from .storage.base import LockError
+                adapter = get_adapter(url, self._config)
+                lock_name = "agmem-fetch"
+                adapter.acquire_lock(lock_name, 30)
+                try:
+                    return self._fetch_via_storage(adapter, branch)
+                finally:
+                    adapter.release_lock(lock_name)
+            except LockError as e:
+                raise ValueError(f"Could not acquire remote lock: {e}") from e
+            except Exception as e:
+                raise ValueError(f"Fetch from cloud failed: {e}") from e
         remote_path = parse_remote_url(url)
         remote_objects = remote_path / ".mem" / "objects"
         remote_refs = remote_path / ".mem" / "refs"

agmem 0.1.3__py3-none-any.whl → 0.1.5__py3-none-any.whl

agmem 0.1.3py3-none-any.whl → 0.1.5py3-none-any.whl