agmem 0.1.3__py3-none-any.whl → 0.1.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
memvcs/core/objects.py CHANGED
@@ -83,7 +83,7 @@ class ObjectStore:
83
83
 
84
84
  def retrieve(self, hash_id: str, obj_type: str) -> Optional[bytes]:
85
85
  """
86
- Retrieve content by hash ID.
86
+ Retrieve content by hash ID (loose object or pack).
87
87
 
88
88
  Args:
89
89
  hash_id: SHA-256 hash of the object
@@ -94,31 +94,43 @@ class ObjectStore:
94
94
  """
95
95
  obj_path = self._get_object_path(hash_id, obj_type)
96
96
 
97
- if not obj_path.exists():
98
- return None
99
-
100
- raw = obj_path.read_bytes()
101
- # Optionally decrypt (iv+tag minimum 12+16 bytes)
102
- if self._encryptor and len(raw) >= 12 + 16:
103
- try:
104
- raw = self._encryptor.decrypt_payload(raw)
105
- except Exception:
106
- pass # legacy plain compressed
107
- full_content = zlib.decompress(raw)
108
-
109
- # Parse header
110
- null_idx = full_content.index(b"\0")
111
- header = full_content[:null_idx].decode()
112
- content = full_content[null_idx + 1 :]
113
-
114
- return content
97
+ if obj_path.exists():
98
+ raw = obj_path.read_bytes()
99
+ # Optionally decrypt (iv+tag minimum 12+16 bytes)
100
+ if self._encryptor and len(raw) >= 12 + 16:
101
+ try:
102
+ raw = self._encryptor.decrypt_payload(raw)
103
+ except Exception:
104
+ pass # legacy plain compressed
105
+ full_content = zlib.decompress(raw)
106
+ null_idx = full_content.index(b"\0")
107
+ content = full_content[null_idx + 1 :]
108
+ return content
109
+
110
+ # Try pack file when loose object missing
111
+ try:
112
+ from .pack import retrieve_from_pack
113
+
114
+ result = retrieve_from_pack(self.objects_dir, hash_id, expected_type=obj_type)
115
+ if result is not None:
116
+ return result[1]
117
+ except Exception:
118
+ pass
119
+ return None
115
120
 
116
121
  def exists(self, hash_id: str, obj_type: str) -> bool:
117
- """Check if an object exists. Returns False for invalid hash (no raise)."""
122
+ """Check if an object exists (loose or pack). Returns False for invalid hash (no raise)."""
118
123
  if not _valid_object_hash(hash_id):
119
124
  return False
120
125
  obj_path = self._get_object_path(hash_id, obj_type)
121
- return obj_path.exists()
126
+ if obj_path.exists():
127
+ return True
128
+ try:
129
+ from .pack import retrieve_from_pack
130
+
131
+ return retrieve_from_pack(self.objects_dir, hash_id, expected_type=obj_type) is not None
132
+ except Exception:
133
+ return False
122
134
 
123
135
  def delete(self, hash_id: str, obj_type: str) -> bool:
124
136
  """Delete an object. Returns True if deleted, False if not found."""
memvcs/core/pack.py CHANGED
@@ -4,7 +4,8 @@ Pack files and garbage collection for agmem.
4
4
  Pack: collect loose objects into single file + index. GC: delete unreachable objects, repack.
5
5
  """
6
6
 
7
- import json
7
+ import hashlib
8
+ import struct
8
9
  import zlib
9
10
  from pathlib import Path
10
11
  from typing import Set, Dict, List, Optional, Tuple
@@ -12,11 +13,39 @@ from typing import Set, Dict, List, Optional, Tuple
12
13
  from .objects import ObjectStore
13
14
  from .refs import RefsManager
14
15
 
16
+ PACK_MAGIC = b"PACK"
17
+ PACK_VERSION = 2
18
+ IDX_MAGIC = b"agidx"
19
+ IDX_VERSION = 2
20
+ OBJ_TYPE_BLOB = 1
21
+ OBJ_TYPE_TREE = 2
22
+ OBJ_TYPE_COMMIT = 3
23
+ OBJ_TYPE_TAG = 4
24
+ TYPE_TO_BYTE = {
25
+ "blob": OBJ_TYPE_BLOB,
26
+ "tree": OBJ_TYPE_TREE,
27
+ "commit": OBJ_TYPE_COMMIT,
28
+ "tag": OBJ_TYPE_TAG,
29
+ }
30
+ BYTE_TO_TYPE = {v: k for k, v in TYPE_TO_BYTE.items()}
31
+
15
32
 
16
33
  def _pack_dir(objects_dir: Path) -> Path:
17
34
  return objects_dir / "pack"
18
35
 
19
36
 
37
+ def _get_loose_object_type(objects_dir: Path, hash_id: str) -> Optional[str]:
38
+ """Return obj_type for a loose object, or None if not found."""
39
+ if len(hash_id) < 4:
40
+ return None
41
+ prefix, suffix = hash_id[:2], hash_id[2:]
42
+ for obj_type in ["blob", "tree", "commit", "tag"]:
43
+ p = objects_dir / obj_type / prefix / suffix
44
+ if p.exists():
45
+ return obj_type
46
+ return None
47
+
48
+
20
49
  def list_loose_objects(objects_dir: Path) -> Set[str]:
21
50
  """List all loose object hashes (blob, tree, commit, tag)."""
22
51
  hashes = set()
@@ -90,3 +119,174 @@ def run_gc(
90
119
  freed += p.stat().st_size
91
120
  break
92
121
  return (len(to_delete), freed)
122
+
123
+
124
+ def write_pack(
125
+ objects_dir: Path, store: ObjectStore, hash_to_type: Dict[str, str]
126
+ ) -> Tuple[Path, Path]:
127
+ """
128
+ Pack loose objects into a single pack file and index.
129
+ hash_to_type: map hash_id -> obj_type for objects to include.
130
+ Returns (pack_path, index_path). Does not delete loose objects.
131
+ """
132
+ if not hash_to_type:
133
+ raise ValueError("Cannot write empty pack")
134
+ pack_d = _pack_dir(objects_dir)
135
+ pack_d.mkdir(parents=True, exist_ok=True)
136
+
137
+ pack_header_len = len(PACK_MAGIC) + 4 + 4
138
+ pack_body = bytearray()
139
+ index_entries: List[Tuple[str, str, int]] = [] # (hash_id, obj_type, offset_in_file)
140
+ offset_in_file = pack_header_len
141
+
142
+ for hash_id in sorted(hash_to_type.keys()):
143
+ obj_type = hash_to_type[hash_id]
144
+ content = store.retrieve(hash_id, obj_type)
145
+ if content is None:
146
+ continue
147
+ header = f"{obj_type} {len(content)}\0".encode()
148
+ full = header + content
149
+ compressed = zlib.compress(full)
150
+ type_byte = TYPE_TO_BYTE.get(obj_type, OBJ_TYPE_BLOB)
151
+ size_bytes = struct.pack(">I", len(compressed))
152
+ chunk = bytes([type_byte]) + size_bytes + compressed
153
+ pack_body.extend(chunk)
154
+ index_entries.append((hash_id, obj_type, offset_in_file))
155
+ offset_in_file += len(chunk)
156
+
157
+ if not index_entries:
158
+ raise ValueError("No objects to pack")
159
+
160
+ pack_content = (
161
+ PACK_MAGIC
162
+ + struct.pack(">I", PACK_VERSION)
163
+ + struct.pack(">I", len(index_entries))
164
+ + bytes(pack_body)
165
+ )
166
+ pack_hash = hashlib.sha256(pack_content).digest()
167
+ pack_content += pack_hash
168
+
169
+ pack_name = f"pack-{pack_hash[:16].hex()}.pack"
170
+ pack_path = pack_d / pack_name
171
+ pack_path.write_bytes(pack_content)
172
+
173
+ index_content = bytearray(
174
+ IDX_MAGIC + struct.pack(">I", IDX_VERSION) + struct.pack(">I", len(index_entries))
175
+ )
176
+ for hash_id, obj_type, off in index_entries:
177
+ index_content.extend(bytes.fromhex(hash_id))
178
+ index_content.append(TYPE_TO_BYTE[obj_type])
179
+ index_content.extend(struct.pack(">I", off))
180
+ idx_hash = hashlib.sha256(index_content).digest()
181
+ index_content.extend(idx_hash)
182
+ idx_path = pack_path.with_suffix(".idx")
183
+ idx_path.write_bytes(index_content)
184
+
185
+ return (pack_path, idx_path)
186
+
187
+
188
+ def _find_pack_index(objects_dir: Path) -> Optional[Path]:
189
+ """Return path to first .idx file in objects/pack, or None."""
190
+ pack_d = _pack_dir(objects_dir)
191
+ if not pack_d.exists():
192
+ return None
193
+ for p in pack_d.iterdir():
194
+ if p.suffix == ".idx":
195
+ return p
196
+ return None
197
+
198
+
199
+ def retrieve_from_pack(
200
+ objects_dir: Path, hash_id: str, expected_type: Optional[str] = None
201
+ ) -> Optional[Tuple[str, bytes]]:
202
+ """
203
+ Retrieve object from pack by hash. Returns (obj_type, content) or None.
204
+ If expected_type is set, only return if pack type matches.
205
+ """
206
+ idx_path = _find_pack_index(objects_dir)
207
+ if idx_path is None:
208
+ return None
209
+ pack_path = idx_path.with_suffix(".pack")
210
+ if not pack_path.exists():
211
+ return None
212
+
213
+ raw_idx = idx_path.read_bytes()
214
+ if len(raw_idx) < len(IDX_MAGIC) + 4 + 4 + 32 + 1 + 4 + 32:
215
+ return None
216
+ if raw_idx[: len(IDX_MAGIC)] != IDX_MAGIC:
217
+ return None
218
+ version = struct.unpack(">I", raw_idx[len(IDX_MAGIC) : len(IDX_MAGIC) + 4])[0]
219
+ if version != IDX_VERSION:
220
+ return None
221
+ count = struct.unpack(">I", raw_idx[len(IDX_MAGIC) + 4 : len(IDX_MAGIC) + 8])[0]
222
+ entry_size = 32 + 1 + 4
223
+ entries_start = len(IDX_MAGIC) + 8
224
+ entries_end = entries_start + count * entry_size
225
+ if entries_end + 32 > len(raw_idx):
226
+ return None
227
+ hash_hex = hash_id
228
+ if len(hash_hex) != 64:
229
+ return None
230
+ hash_bin = bytes.fromhex(hash_hex)
231
+ for i in range(count):
232
+ base = entries_start + i * entry_size
233
+ entry_hash = raw_idx[base : base + 32]
234
+ if entry_hash != hash_bin:
235
+ continue
236
+ type_byte = raw_idx[base + 32]
237
+ offset = struct.unpack(">I", raw_idx[base + 33 : base + 37])[0]
238
+ obj_type = BYTE_TO_TYPE.get(type_byte)
239
+ if obj_type is None:
240
+ continue
241
+ if expected_type is not None and obj_type != expected_type:
242
+ return None
243
+ pack_raw = pack_path.read_bytes()
244
+ header_size = len(PACK_MAGIC) + 4 + 4
245
+ if offset + 1 + 4 > len(pack_raw) - 32:
246
+ return None
247
+ size = struct.unpack(">I", pack_raw[offset + 1 : offset + 5])[0]
248
+ payload_start = offset + 5
249
+ payload_end = payload_start + size
250
+ if payload_end > len(pack_raw) - 32:
251
+ return None
252
+ compressed = pack_raw[payload_start:payload_end]
253
+ try:
254
+ full = zlib.decompress(compressed)
255
+ except Exception:
256
+ return None
257
+ null_idx = full.index(b"\0")
258
+ content = full[null_idx + 1 :]
259
+ return (obj_type, content)
260
+ return None
261
+
262
+
263
+ def run_repack(
264
+ mem_dir: Path, store: ObjectStore, gc_prune_days: int = 90, dry_run: bool = False
265
+ ) -> Tuple[int, int]:
266
+ """
267
+ After GC: pack all reachable loose objects into a pack file, then delete those loose objects.
268
+ Returns (objects_packed, bytes_freed_from_loose).
269
+ """
270
+ objects_dir = mem_dir / "objects"
271
+ reachable = reachable_from_refs(mem_dir, store, gc_prune_days)
272
+ loose = list_loose_objects(objects_dir)
273
+ to_pack = reachable & loose
274
+ if not to_pack:
275
+ return (0, 0)
276
+ hash_to_type: Dict[str, str] = {}
277
+ for hash_id in to_pack:
278
+ obj_type = _get_loose_object_type(objects_dir, hash_id)
279
+ if obj_type:
280
+ hash_to_type[hash_id] = obj_type
281
+ if not hash_to_type:
282
+ return (0, 0)
283
+ if dry_run:
284
+ return (len(hash_to_type), 0)
285
+ write_pack(objects_dir, store, hash_to_type)
286
+ freed = 0
287
+ for hash_id, obj_type in hash_to_type.items():
288
+ p = store.objects_dir / obj_type / hash_id[:2] / hash_id[2:]
289
+ if p.exists():
290
+ freed += p.stat().st_size
291
+ p.unlink()
292
+ return (len(hash_to_type), freed)
memvcs/core/remote.py CHANGED
@@ -1,19 +1,24 @@
1
1
  """
2
- Remote sync for agmem - file-based push/pull/clone.
2
+ Remote sync for agmem - file-based and cloud (S3/GCS) push/pull/clone.
3
3
 
4
- Supports file:// URLs for local or mounted directories.
4
+ Supports file:// URLs and s3:///gs:// with optional distributed locking.
5
5
  """
6
6
 
7
7
  import json
8
8
  import shutil
9
9
  from pathlib import Path
10
- from typing import Optional, Set
10
+ from typing import Optional, Set, Any
11
11
  from urllib.parse import urlparse
12
12
 
13
13
  from .objects import ObjectStore, Commit, Tree, Blob, _valid_object_hash
14
14
  from .refs import RefsManager, _ref_path_under_root
15
15
 
16
16
 
17
+ def _is_cloud_remote(url: str) -> bool:
18
+ """Return True if URL is S3 or GCS (use storage adapter + optional lock)."""
19
+ return url.startswith("s3://") or url.startswith("gs://")
20
+
21
+
17
22
  def parse_remote_url(url: str) -> Path:
18
23
  """Parse remote URL to local path. Supports file:// only. Rejects path traversal."""
19
24
  parsed = urlparse(url)
@@ -62,6 +67,51 @@ def _collect_objects_from_commit(store: ObjectStore, commit_hash: str) -> Set[st
62
67
  return seen
63
68
 
64
69
 
70
+ def _read_object_from_adapter(adapter: Any, hash_id: str) -> Optional[tuple]:
71
+ """Read object from storage adapter. Returns (obj_type, content_bytes) or None."""
72
+ import zlib
73
+
74
+ for obj_type in ["commit", "tree", "blob", "tag"]:
75
+ rel = f".mem/objects/{obj_type}/{hash_id[:2]}/{hash_id[2:]}"
76
+ if not adapter.exists(rel):
77
+ continue
78
+ try:
79
+ raw = adapter.read_file(rel)
80
+ full = zlib.decompress(raw)
81
+ null_idx = full.index(b"\0")
82
+ content = full[null_idx + 1 :]
83
+ return (obj_type, content)
84
+ except Exception:
85
+ continue
86
+ return None
87
+
88
+
89
+ def _collect_objects_from_commit_remote(adapter: Any, commit_hash: str) -> Set[str]:
90
+ """Collect object hashes reachable from a commit when reading from storage adapter."""
91
+ seen = set()
92
+ todo = [commit_hash]
93
+ while todo:
94
+ h = todo.pop()
95
+ if h in seen:
96
+ continue
97
+ seen.add(h)
98
+ pair = _read_object_from_adapter(adapter, h)
99
+ if pair is None:
100
+ continue
101
+ obj_type, content = pair
102
+ if obj_type == "commit":
103
+ data = json.loads(content)
104
+ todo.extend(data.get("parents", []))
105
+ if "tree" in data:
106
+ todo.append(data["tree"])
107
+ elif obj_type == "tree":
108
+ data = json.loads(content)
109
+ for e in data.get("entries", []):
110
+ if "hash" in e:
111
+ todo.append(e["hash"])
112
+ return seen
113
+
114
+
65
115
  def _list_local_objects(objects_dir: Path) -> Set[str]:
66
116
  """List all object hashes in a .mem/objects directory."""
67
117
  hashes = set()
@@ -139,6 +189,119 @@ class Remote:
139
189
  self._config["remotes"][self.name]["url"] = url
140
190
  self._save_config(self._config)
141
191
 
192
+ def _push_via_storage(self, adapter: Any, branch: Optional[str] = None) -> str:
193
+ """Push objects and refs via storage adapter. Caller must hold lock if needed."""
194
+ refs = RefsManager(self.mem_dir)
195
+ store = ObjectStore(self.objects_dir)
196
+ to_push = set()
197
+ for b in refs.list_branches():
198
+ if branch and b != branch:
199
+ continue
200
+ ch = refs.get_branch_commit(b)
201
+ if ch:
202
+ to_push.update(_collect_objects_from_commit(store, ch))
203
+ for t in refs.list_tags():
204
+ ch = refs.get_tag_commit(t)
205
+ if ch:
206
+ to_push.update(_collect_objects_from_commit(store, ch))
207
+ copied = 0
208
+ for h in to_push:
209
+ obj_type = None
210
+ for otype in ["blob", "tree", "commit", "tag"]:
211
+ p = self.objects_dir / otype / h[:2] / h[2:]
212
+ if p.exists():
213
+ obj_type = otype
214
+ break
215
+ if not obj_type:
216
+ continue
217
+ rel = f".mem/objects/{obj_type}/{h[:2]}/{h[2:]}"
218
+ if not adapter.exists(rel):
219
+ try:
220
+ data = p.read_bytes()
221
+ adapter.makedirs(f".mem/objects/{obj_type}/{h[:2]}")
222
+ adapter.write_file(rel, data)
223
+ copied += 1
224
+ except Exception:
225
+ pass
226
+ for b in refs.list_branches():
227
+ if branch and b != branch:
228
+ continue
229
+ ch = refs.get_branch_commit(b)
230
+ if ch and _ref_path_under_root(b, refs.heads_dir):
231
+ parent = str(Path(b).parent)
232
+ if parent != ".":
233
+ adapter.makedirs(f".mem/refs/heads/{parent}")
234
+ adapter.write_file(f".mem/refs/heads/{b}", (ch + "\n").encode())
235
+ for t in refs.list_tags():
236
+ ch = refs.get_tag_commit(t)
237
+ if ch and _ref_path_under_root(t, refs.tags_dir):
238
+ parent = str(Path(t).parent)
239
+ if parent != ".":
240
+ adapter.makedirs(f".mem/refs/tags/{parent}")
241
+ adapter.write_file(f".mem/refs/tags/{t}", (ch + "\n").encode())
242
+ try:
243
+ from .audit import append_audit
244
+
245
+ append_audit(
246
+ self.mem_dir, "push", {"remote": self.name, "branch": branch, "copied": copied}
247
+ )
248
+ except Exception:
249
+ pass
250
+ return f"Pushed {copied} object(s) to {self.name}"
251
+
252
+ def _fetch_via_storage(self, adapter: Any, branch: Optional[str] = None) -> str:
253
+ """Fetch objects and refs via storage adapter. Caller must hold lock if needed."""
254
+ to_fetch = set()
255
+ try:
256
+ heads = adapter.list_dir(".mem/refs/heads")
257
+ for fi in heads:
258
+ if fi.is_dir:
259
+ continue
260
+ branch_name = fi.path.replace(".mem/refs/heads/", "").replace("\\", "/").strip("/")
261
+ if branch and branch_name != branch:
262
+ continue
263
+ data = adapter.read_file(fi.path)
264
+ ch = data.decode().strip()
265
+ if ch and _valid_object_hash(ch):
266
+ to_fetch.update(_collect_objects_from_commit_remote(adapter, ch))
267
+ tags = adapter.list_dir(".mem/refs/tags")
268
+ for fi in tags:
269
+ if fi.is_dir:
270
+ continue
271
+ data = adapter.read_file(fi.path)
272
+ ch = data.decode().strip()
273
+ if ch and _valid_object_hash(ch):
274
+ to_fetch.update(_collect_objects_from_commit_remote(adapter, ch))
275
+ except Exception:
276
+ pass
277
+ if not to_fetch:
278
+ return f"Fetched 0 object(s) from {self.name}"
279
+ local_has = _list_local_objects(self.objects_dir)
280
+ missing = to_fetch - local_has
281
+ copied = 0
282
+ for h in missing:
283
+ for otype in ["blob", "tree", "commit", "tag"]:
284
+ rel = f".mem/objects/{otype}/{h[:2]}/{h[2:]}"
285
+ if adapter.exists(rel):
286
+ try:
287
+ data = adapter.read_file(rel)
288
+ p = self.objects_dir / otype / h[:2] / h[2:]
289
+ p.parent.mkdir(parents=True, exist_ok=True)
290
+ p.write_bytes(data)
291
+ copied += 1
292
+ except Exception:
293
+ pass
294
+ break
295
+ try:
296
+ from .audit import append_audit
297
+
298
+ append_audit(
299
+ self.mem_dir, "fetch", {"remote": self.name, "branch": branch, "copied": copied}
300
+ )
301
+ except Exception:
302
+ pass
303
+ return f"Fetched {copied} object(s) from {self.name}"
304
+
142
305
  def push(self, branch: Optional[str] = None) -> str:
143
306
  """
144
307
  Push objects and refs to remote.
@@ -148,6 +311,23 @@ class Remote:
148
311
  if not url:
149
312
  raise ValueError(f"Remote '{self.name}' has no URL configured")
150
313
 
314
+ if _is_cloud_remote(url):
315
+ try:
316
+ from .storage import get_adapter
317
+ from .storage.base import LockError
318
+
319
+ adapter = get_adapter(url, self._config)
320
+ lock_name = "agmem-push"
321
+ adapter.acquire_lock(lock_name, 30)
322
+ try:
323
+ return self._push_via_storage(adapter, branch)
324
+ finally:
325
+ adapter.release_lock(lock_name)
326
+ except LockError as e:
327
+ raise ValueError(f"Could not acquire remote lock: {e}") from e
328
+ except Exception as e:
329
+ raise ValueError(f"Push to cloud failed: {e}") from e
330
+
151
331
  remote_path = parse_remote_url(url)
152
332
  remote_mem = remote_path / ".mem"
153
333
  remote_objects = remote_mem / "objects"
@@ -247,6 +427,23 @@ class Remote:
247
427
  if not url:
248
428
  raise ValueError(f"Remote '{self.name}' has no URL configured")
249
429
 
430
+ if _is_cloud_remote(url):
431
+ try:
432
+ from .storage import get_adapter
433
+ from .storage.base import LockError
434
+
435
+ adapter = get_adapter(url, self._config)
436
+ lock_name = "agmem-fetch"
437
+ adapter.acquire_lock(lock_name, 30)
438
+ try:
439
+ return self._fetch_via_storage(adapter, branch)
440
+ finally:
441
+ adapter.release_lock(lock_name)
442
+ except LockError as e:
443
+ raise ValueError(f"Could not acquire remote lock: {e}") from e
444
+ except Exception as e:
445
+ raise ValueError(f"Fetch from cloud failed: {e}") from e
446
+
250
447
  remote_path = parse_remote_url(url)
251
448
  remote_objects = remote_path / ".mem" / "objects"
252
449
  remote_refs = remote_path / ".mem" / "refs"