agmem 0.1.1__py3-none-any.whl → 0.1.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {agmem-0.1.1.dist-info → agmem-0.1.3.dist-info}/METADATA +157 -16
- agmem-0.1.3.dist-info/RECORD +105 -0
- memvcs/__init__.py +1 -1
- memvcs/cli.py +45 -31
- memvcs/commands/__init__.py +9 -9
- memvcs/commands/add.py +83 -76
- memvcs/commands/audit.py +59 -0
- memvcs/commands/blame.py +46 -53
- memvcs/commands/branch.py +13 -33
- memvcs/commands/checkout.py +27 -32
- memvcs/commands/clean.py +18 -23
- memvcs/commands/clone.py +11 -1
- memvcs/commands/commit.py +40 -39
- memvcs/commands/daemon.py +109 -76
- memvcs/commands/decay.py +77 -0
- memvcs/commands/diff.py +56 -57
- memvcs/commands/distill.py +90 -0
- memvcs/commands/federated.py +53 -0
- memvcs/commands/fsck.py +86 -61
- memvcs/commands/garden.py +40 -35
- memvcs/commands/gc.py +51 -0
- memvcs/commands/graph.py +41 -48
- memvcs/commands/init.py +16 -24
- memvcs/commands/log.py +25 -40
- memvcs/commands/merge.py +69 -27
- memvcs/commands/pack.py +129 -0
- memvcs/commands/prove.py +66 -0
- memvcs/commands/pull.py +31 -1
- memvcs/commands/push.py +4 -2
- memvcs/commands/recall.py +145 -0
- memvcs/commands/reflog.py +13 -22
- memvcs/commands/remote.py +1 -0
- memvcs/commands/repair.py +66 -0
- memvcs/commands/reset.py +23 -33
- memvcs/commands/resolve.py +130 -0
- memvcs/commands/resurrect.py +82 -0
- memvcs/commands/search.py +3 -4
- memvcs/commands/serve.py +2 -1
- memvcs/commands/show.py +66 -36
- memvcs/commands/stash.py +34 -34
- memvcs/commands/status.py +27 -35
- memvcs/commands/tag.py +23 -47
- memvcs/commands/test.py +30 -44
- memvcs/commands/timeline.py +111 -0
- memvcs/commands/tree.py +26 -27
- memvcs/commands/verify.py +110 -0
- memvcs/commands/when.py +115 -0
- memvcs/core/access_index.py +167 -0
- memvcs/core/audit.py +124 -0
- memvcs/core/config_loader.py +3 -1
- memvcs/core/consistency.py +214 -0
- memvcs/core/crypto_verify.py +280 -0
- memvcs/core/decay.py +185 -0
- memvcs/core/diff.py +158 -143
- memvcs/core/distiller.py +277 -0
- memvcs/core/encryption.py +169 -0
- memvcs/core/federated.py +86 -0
- memvcs/core/gardener.py +176 -145
- memvcs/core/hooks.py +48 -14
- memvcs/core/ipfs_remote.py +39 -0
- memvcs/core/knowledge_graph.py +135 -138
- memvcs/core/llm/__init__.py +10 -0
- memvcs/core/llm/anthropic_provider.py +50 -0
- memvcs/core/llm/base.py +27 -0
- memvcs/core/llm/factory.py +30 -0
- memvcs/core/llm/openai_provider.py +36 -0
- memvcs/core/merge.py +260 -170
- memvcs/core/objects.py +110 -101
- memvcs/core/pack.py +92 -0
- memvcs/core/pii_scanner.py +147 -146
- memvcs/core/privacy_budget.py +63 -0
- memvcs/core/refs.py +132 -115
- memvcs/core/remote.py +38 -0
- memvcs/core/repository.py +254 -164
- memvcs/core/schema.py +155 -113
- memvcs/core/staging.py +60 -65
- memvcs/core/storage/__init__.py +20 -18
- memvcs/core/storage/base.py +74 -70
- memvcs/core/storage/gcs.py +70 -68
- memvcs/core/storage/local.py +42 -40
- memvcs/core/storage/s3.py +105 -110
- memvcs/core/temporal_index.py +121 -0
- memvcs/core/test_runner.py +101 -93
- memvcs/core/trust.py +103 -0
- memvcs/core/vector_store.py +56 -36
- memvcs/core/zk_proofs.py +26 -0
- memvcs/integrations/mcp_server.py +1 -3
- memvcs/integrations/web_ui/server.py +25 -26
- memvcs/retrieval/__init__.py +22 -0
- memvcs/retrieval/base.py +54 -0
- memvcs/retrieval/pack.py +128 -0
- memvcs/retrieval/recaller.py +105 -0
- memvcs/retrieval/strategies.py +314 -0
- memvcs/utils/__init__.py +3 -3
- memvcs/utils/helpers.py +52 -52
- agmem-0.1.1.dist-info/RECORD +0 -67
- {agmem-0.1.1.dist-info → agmem-0.1.3.dist-info}/WHEEL +0 -0
- {agmem-0.1.1.dist-info → agmem-0.1.3.dist-info}/entry_points.txt +0 -0
- {agmem-0.1.1.dist-info → agmem-0.1.3.dist-info}/licenses/LICENSE +0 -0
- {agmem-0.1.1.dist-info → agmem-0.1.3.dist-info}/top_level.txt +0 -0
memvcs/core/objects.py
CHANGED
|
@@ -18,21 +18,22 @@ def _valid_object_hash(hash_id: str) -> bool:
|
|
|
18
18
|
"""Return True if hash_id is safe for object paths (hex, 4-64 chars)."""
|
|
19
19
|
if not hash_id or len(hash_id) < 4 or len(hash_id) > 64:
|
|
20
20
|
return False
|
|
21
|
-
return all(c in
|
|
21
|
+
return all(c in "0123456789abcdef" for c in hash_id.lower())
|
|
22
22
|
|
|
23
23
|
|
|
24
24
|
class ObjectStore:
|
|
25
25
|
"""Content-addressable object storage system."""
|
|
26
|
-
|
|
27
|
-
def __init__(self, objects_dir: Path):
|
|
26
|
+
|
|
27
|
+
def __init__(self, objects_dir: Path, encryptor: Optional[Any] = None):
|
|
28
28
|
self.objects_dir = Path(objects_dir)
|
|
29
|
+
self._encryptor = encryptor
|
|
29
30
|
self._ensure_directories()
|
|
30
|
-
|
|
31
|
+
|
|
31
32
|
def _ensure_directories(self):
|
|
32
33
|
"""Create object storage directories."""
|
|
33
|
-
for obj_type in [
|
|
34
|
+
for obj_type in ["blob", "tree", "commit", "tag"]:
|
|
34
35
|
(self.objects_dir / obj_type).mkdir(parents=True, exist_ok=True)
|
|
35
|
-
|
|
36
|
+
|
|
36
37
|
def _get_object_path(self, hash_id: str, obj_type: str) -> Path:
|
|
37
38
|
"""Get storage path for an object. Validates hash_id to prevent path traversal."""
|
|
38
39
|
if not _valid_object_hash(hash_id):
|
|
@@ -40,76 +41,85 @@ class ObjectStore:
|
|
|
40
41
|
prefix = hash_id[:2]
|
|
41
42
|
suffix = hash_id[2:]
|
|
42
43
|
return self.objects_dir / obj_type / prefix / suffix
|
|
43
|
-
|
|
44
|
+
|
|
44
45
|
def _compute_hash(self, content: bytes, obj_type: str) -> str:
|
|
45
46
|
"""Compute SHA-256 hash of content with type header."""
|
|
46
47
|
header = f"{obj_type} {len(content)}\0".encode()
|
|
47
48
|
full_content = header + content
|
|
48
49
|
return hashlib.sha256(full_content).hexdigest()
|
|
49
|
-
|
|
50
|
+
|
|
50
51
|
def store(self, content: bytes, obj_type: str) -> str:
|
|
51
52
|
"""
|
|
52
53
|
Store content and return its hash ID.
|
|
53
|
-
|
|
54
|
+
|
|
54
55
|
Args:
|
|
55
56
|
content: Raw bytes to store
|
|
56
57
|
obj_type: Type of object ('blob', 'tree', 'commit', 'tag')
|
|
57
|
-
|
|
58
|
+
|
|
58
59
|
Returns:
|
|
59
60
|
SHA-256 hash ID of stored object
|
|
60
61
|
"""
|
|
61
62
|
hash_id = self._compute_hash(content, obj_type)
|
|
62
63
|
obj_path = self._get_object_path(hash_id, obj_type)
|
|
63
|
-
|
|
64
|
+
|
|
64
65
|
# Don't store if already exists (deduplication)
|
|
65
66
|
if obj_path.exists():
|
|
66
67
|
return hash_id
|
|
67
|
-
|
|
68
|
+
|
|
68
69
|
# Create directory if needed
|
|
69
70
|
obj_path.parent.mkdir(parents=True, exist_ok=True)
|
|
70
|
-
|
|
71
|
-
# Compress and
|
|
71
|
+
|
|
72
|
+
# Compress and optionally encrypt
|
|
72
73
|
header = f"{obj_type} {len(content)}\0".encode()
|
|
73
74
|
full_content = header + content
|
|
74
75
|
compressed = zlib.compress(full_content)
|
|
75
|
-
|
|
76
|
+
if self._encryptor:
|
|
77
|
+
try:
|
|
78
|
+
compressed = self._encryptor.encrypt_payload(compressed)
|
|
79
|
+
except ValueError:
|
|
80
|
+
pass # no key; store plain compressed (legacy behavior)
|
|
76
81
|
obj_path.write_bytes(compressed)
|
|
77
82
|
return hash_id
|
|
78
|
-
|
|
83
|
+
|
|
79
84
|
def retrieve(self, hash_id: str, obj_type: str) -> Optional[bytes]:
|
|
80
85
|
"""
|
|
81
86
|
Retrieve content by hash ID.
|
|
82
|
-
|
|
87
|
+
|
|
83
88
|
Args:
|
|
84
89
|
hash_id: SHA-256 hash of the object
|
|
85
90
|
obj_type: Type of object
|
|
86
|
-
|
|
91
|
+
|
|
87
92
|
Returns:
|
|
88
93
|
Raw bytes content or None if not found
|
|
89
94
|
"""
|
|
90
95
|
obj_path = self._get_object_path(hash_id, obj_type)
|
|
91
|
-
|
|
96
|
+
|
|
92
97
|
if not obj_path.exists():
|
|
93
98
|
return None
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
+
|
|
100
|
+
raw = obj_path.read_bytes()
|
|
101
|
+
# Optionally decrypt (iv+tag minimum 12+16 bytes)
|
|
102
|
+
if self._encryptor and len(raw) >= 12 + 16:
|
|
103
|
+
try:
|
|
104
|
+
raw = self._encryptor.decrypt_payload(raw)
|
|
105
|
+
except Exception:
|
|
106
|
+
pass # legacy plain compressed
|
|
107
|
+
full_content = zlib.decompress(raw)
|
|
108
|
+
|
|
99
109
|
# Parse header
|
|
100
|
-
null_idx = full_content.index(b
|
|
110
|
+
null_idx = full_content.index(b"\0")
|
|
101
111
|
header = full_content[:null_idx].decode()
|
|
102
|
-
content = full_content[null_idx + 1:]
|
|
103
|
-
|
|
112
|
+
content = full_content[null_idx + 1 :]
|
|
113
|
+
|
|
104
114
|
return content
|
|
105
|
-
|
|
115
|
+
|
|
106
116
|
def exists(self, hash_id: str, obj_type: str) -> bool:
|
|
107
117
|
"""Check if an object exists. Returns False for invalid hash (no raise)."""
|
|
108
118
|
if not _valid_object_hash(hash_id):
|
|
109
119
|
return False
|
|
110
120
|
obj_path = self._get_object_path(hash_id, obj_type)
|
|
111
121
|
return obj_path.exists()
|
|
112
|
-
|
|
122
|
+
|
|
113
123
|
def delete(self, hash_id: str, obj_type: str) -> bool:
|
|
114
124
|
"""Delete an object. Returns True if deleted, False if not found."""
|
|
115
125
|
obj_path = self._get_object_path(hash_id, obj_type)
|
|
@@ -120,13 +130,13 @@ class ObjectStore:
|
|
|
120
130
|
obj_path.parent.rmdir()
|
|
121
131
|
return True
|
|
122
132
|
return False
|
|
123
|
-
|
|
133
|
+
|
|
124
134
|
def list_objects(self, obj_type: str) -> List[str]:
|
|
125
135
|
"""List all objects of a given type."""
|
|
126
136
|
obj_dir = self.objects_dir / obj_type
|
|
127
137
|
if not obj_dir.exists():
|
|
128
138
|
return []
|
|
129
|
-
|
|
139
|
+
|
|
130
140
|
hashes = []
|
|
131
141
|
for prefix_dir in obj_dir.iterdir():
|
|
132
142
|
if prefix_dir.is_dir():
|
|
@@ -134,7 +144,7 @@ class ObjectStore:
|
|
|
134
144
|
hash_id = prefix_dir.name + suffix_file.name
|
|
135
145
|
hashes.append(hash_id)
|
|
136
146
|
return hashes
|
|
137
|
-
|
|
147
|
+
|
|
138
148
|
def get_size(self, hash_id: str, obj_type: str) -> int:
|
|
139
149
|
"""Get the compressed size of an object."""
|
|
140
150
|
obj_path = self._get_object_path(hash_id, obj_type)
|
|
@@ -146,16 +156,17 @@ class ObjectStore:
|
|
|
146
156
|
@dataclass
|
|
147
157
|
class Blob:
|
|
148
158
|
"""Blob object for storing raw memory content."""
|
|
159
|
+
|
|
149
160
|
content: bytes
|
|
150
|
-
|
|
161
|
+
|
|
151
162
|
def store(self, store: ObjectStore) -> str:
|
|
152
163
|
"""Store this blob and return its hash."""
|
|
153
|
-
return store.store(self.content,
|
|
154
|
-
|
|
164
|
+
return store.store(self.content, "blob")
|
|
165
|
+
|
|
155
166
|
@staticmethod
|
|
156
|
-
def load(store: ObjectStore, hash_id: str) -> Optional[
|
|
167
|
+
def load(store: ObjectStore, hash_id: str) -> Optional["Blob"]:
|
|
157
168
|
"""Load a blob from storage."""
|
|
158
|
-
content = store.retrieve(hash_id,
|
|
169
|
+
content = store.retrieve(hash_id, "blob")
|
|
159
170
|
if content is not None:
|
|
160
171
|
return Blob(content=content)
|
|
161
172
|
return None
|
|
@@ -164,6 +175,7 @@ class Blob:
|
|
|
164
175
|
@dataclass
|
|
165
176
|
class TreeEntry:
|
|
166
177
|
"""Entry in a tree object."""
|
|
178
|
+
|
|
167
179
|
mode: str # '100644' for file, '040000' for directory
|
|
168
180
|
obj_type: str # 'blob' or 'tree'
|
|
169
181
|
hash: str
|
|
@@ -174,52 +186,47 @@ class TreeEntry:
|
|
|
174
186
|
@dataclass
|
|
175
187
|
class Tree:
|
|
176
188
|
"""Tree object for storing directory structure."""
|
|
189
|
+
|
|
177
190
|
entries: List[TreeEntry]
|
|
178
|
-
|
|
191
|
+
|
|
179
192
|
def to_dict(self) -> Dict[str, Any]:
|
|
180
193
|
"""Convert to dictionary for serialization."""
|
|
181
194
|
return {
|
|
182
|
-
|
|
183
|
-
|
|
184
|
-
{
|
|
185
|
-
'mode': e.mode,
|
|
186
|
-
'type': e.obj_type,
|
|
187
|
-
'hash': e.hash,
|
|
188
|
-
'name': e.name,
|
|
189
|
-
'path': e.path
|
|
190
|
-
}
|
|
195
|
+
"type": "tree",
|
|
196
|
+
"entries": [
|
|
197
|
+
{"mode": e.mode, "type": e.obj_type, "hash": e.hash, "name": e.name, "path": e.path}
|
|
191
198
|
for e in self.entries
|
|
192
|
-
]
|
|
199
|
+
],
|
|
193
200
|
}
|
|
194
|
-
|
|
201
|
+
|
|
195
202
|
def to_bytes(self) -> bytes:
|
|
196
203
|
"""Serialize to bytes."""
|
|
197
204
|
return json.dumps(self.to_dict(), sort_keys=True).encode()
|
|
198
|
-
|
|
205
|
+
|
|
199
206
|
def store(self, store: ObjectStore) -> str:
|
|
200
207
|
"""Store this tree and return its hash."""
|
|
201
|
-
return store.store(self.to_bytes(),
|
|
202
|
-
|
|
208
|
+
return store.store(self.to_bytes(), "tree")
|
|
209
|
+
|
|
203
210
|
@staticmethod
|
|
204
|
-
def load(store: ObjectStore, hash_id: str) -> Optional[
|
|
211
|
+
def load(store: ObjectStore, hash_id: str) -> Optional["Tree"]:
|
|
205
212
|
"""Load a tree from storage."""
|
|
206
|
-
content = store.retrieve(hash_id,
|
|
213
|
+
content = store.retrieve(hash_id, "tree")
|
|
207
214
|
if content is None:
|
|
208
215
|
return None
|
|
209
|
-
|
|
216
|
+
|
|
210
217
|
data = json.loads(content)
|
|
211
218
|
entries = [
|
|
212
219
|
TreeEntry(
|
|
213
|
-
mode=e[
|
|
214
|
-
obj_type=e[
|
|
215
|
-
hash=e[
|
|
216
|
-
name=e[
|
|
217
|
-
path=e.get(
|
|
220
|
+
mode=e["mode"],
|
|
221
|
+
obj_type=e["type"],
|
|
222
|
+
hash=e["hash"],
|
|
223
|
+
name=e["name"],
|
|
224
|
+
path=e.get("path", ""),
|
|
218
225
|
)
|
|
219
|
-
for e in data.get(
|
|
226
|
+
for e in data.get("entries", [])
|
|
220
227
|
]
|
|
221
228
|
return Tree(entries=entries)
|
|
222
|
-
|
|
229
|
+
|
|
223
230
|
def get_entry(self, name: str) -> Optional[TreeEntry]:
|
|
224
231
|
"""Get an entry by name."""
|
|
225
232
|
for entry in self.entries:
|
|
@@ -231,50 +238,51 @@ class Tree:
|
|
|
231
238
|
@dataclass
|
|
232
239
|
class Commit:
|
|
233
240
|
"""Commit object for storing memory snapshots."""
|
|
241
|
+
|
|
234
242
|
tree: str # Hash of tree object
|
|
235
243
|
parents: List[str] # Hashes of parent commits
|
|
236
244
|
author: str
|
|
237
245
|
timestamp: str
|
|
238
246
|
message: str
|
|
239
247
|
metadata: Dict[str, Any] # Additional metadata
|
|
240
|
-
|
|
248
|
+
|
|
241
249
|
def to_dict(self) -> Dict[str, Any]:
|
|
242
250
|
"""Convert to dictionary for serialization."""
|
|
243
251
|
return {
|
|
244
|
-
|
|
245
|
-
|
|
246
|
-
|
|
247
|
-
|
|
248
|
-
|
|
249
|
-
|
|
250
|
-
|
|
252
|
+
"type": "commit",
|
|
253
|
+
"tree": self.tree,
|
|
254
|
+
"parents": self.parents,
|
|
255
|
+
"author": self.author,
|
|
256
|
+
"timestamp": self.timestamp,
|
|
257
|
+
"message": self.message,
|
|
258
|
+
"metadata": self.metadata,
|
|
251
259
|
}
|
|
252
|
-
|
|
260
|
+
|
|
253
261
|
def to_bytes(self) -> bytes:
|
|
254
262
|
"""Serialize to bytes."""
|
|
255
263
|
return json.dumps(self.to_dict(), sort_keys=True).encode()
|
|
256
|
-
|
|
264
|
+
|
|
257
265
|
def store(self, store: ObjectStore) -> str:
|
|
258
266
|
"""Store this commit and return its hash."""
|
|
259
|
-
return store.store(self.to_bytes(),
|
|
260
|
-
|
|
267
|
+
return store.store(self.to_bytes(), "commit")
|
|
268
|
+
|
|
261
269
|
@staticmethod
|
|
262
|
-
def load(store: ObjectStore, hash_id: str) -> Optional[
|
|
270
|
+
def load(store: ObjectStore, hash_id: str) -> Optional["Commit"]:
|
|
263
271
|
"""Load a commit from storage."""
|
|
264
|
-
content = store.retrieve(hash_id,
|
|
272
|
+
content = store.retrieve(hash_id, "commit")
|
|
265
273
|
if content is None:
|
|
266
274
|
return None
|
|
267
|
-
|
|
275
|
+
|
|
268
276
|
data = json.loads(content)
|
|
269
277
|
return Commit(
|
|
270
|
-
tree=data[
|
|
271
|
-
parents=data.get(
|
|
272
|
-
author=data[
|
|
273
|
-
timestamp=data[
|
|
274
|
-
message=data[
|
|
275
|
-
metadata=data.get(
|
|
278
|
+
tree=data["tree"],
|
|
279
|
+
parents=data.get("parents", []),
|
|
280
|
+
author=data["author"],
|
|
281
|
+
timestamp=data["timestamp"],
|
|
282
|
+
message=data["message"],
|
|
283
|
+
metadata=data.get("metadata", {}),
|
|
276
284
|
)
|
|
277
|
-
|
|
285
|
+
|
|
278
286
|
def short_hash(self, store: ObjectStore) -> str:
|
|
279
287
|
"""Get short hash for display."""
|
|
280
288
|
full_hash = self.store(store)
|
|
@@ -284,40 +292,41 @@ class Commit:
|
|
|
284
292
|
@dataclass
|
|
285
293
|
class Tag:
|
|
286
294
|
"""Tag object for marking specific commits."""
|
|
295
|
+
|
|
287
296
|
name: str
|
|
288
297
|
commit_hash: str
|
|
289
298
|
message: str
|
|
290
299
|
timestamp: str
|
|
291
|
-
|
|
300
|
+
|
|
292
301
|
def to_dict(self) -> Dict[str, Any]:
|
|
293
302
|
"""Convert to dictionary for serialization."""
|
|
294
303
|
return {
|
|
295
|
-
|
|
296
|
-
|
|
297
|
-
|
|
298
|
-
|
|
299
|
-
|
|
304
|
+
"type": "tag",
|
|
305
|
+
"name": self.name,
|
|
306
|
+
"commit_hash": self.commit_hash,
|
|
307
|
+
"message": self.message,
|
|
308
|
+
"timestamp": self.timestamp,
|
|
300
309
|
}
|
|
301
|
-
|
|
310
|
+
|
|
302
311
|
def to_bytes(self) -> bytes:
|
|
303
312
|
"""Serialize to bytes."""
|
|
304
313
|
return json.dumps(self.to_dict(), sort_keys=True).encode()
|
|
305
|
-
|
|
314
|
+
|
|
306
315
|
def store(self, store: ObjectStore) -> str:
|
|
307
316
|
"""Store this tag and return its hash."""
|
|
308
|
-
return store.store(self.to_bytes(),
|
|
309
|
-
|
|
317
|
+
return store.store(self.to_bytes(), "tag")
|
|
318
|
+
|
|
310
319
|
@staticmethod
|
|
311
|
-
def load(store: ObjectStore, hash_id: str) -> Optional[
|
|
320
|
+
def load(store: ObjectStore, hash_id: str) -> Optional["Tag"]:
|
|
312
321
|
"""Load a tag from storage."""
|
|
313
|
-
content = store.retrieve(hash_id,
|
|
322
|
+
content = store.retrieve(hash_id, "tag")
|
|
314
323
|
if content is None:
|
|
315
324
|
return None
|
|
316
|
-
|
|
325
|
+
|
|
317
326
|
data = json.loads(content)
|
|
318
327
|
return Tag(
|
|
319
|
-
name=data[
|
|
320
|
-
commit_hash=data[
|
|
321
|
-
message=data[
|
|
322
|
-
timestamp=data[
|
|
328
|
+
name=data["name"],
|
|
329
|
+
commit_hash=data["commit_hash"],
|
|
330
|
+
message=data["message"],
|
|
331
|
+
timestamp=data["timestamp"],
|
|
323
332
|
)
|
memvcs/core/pack.py
ADDED
|
@@ -0,0 +1,92 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Pack files and garbage collection for agmem.
|
|
3
|
+
|
|
4
|
+
Pack: collect loose objects into single file + index. GC: delete unreachable objects, repack.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
import json
|
|
8
|
+
import zlib
|
|
9
|
+
from pathlib import Path
|
|
10
|
+
from typing import Set, Dict, List, Optional, Tuple
|
|
11
|
+
|
|
12
|
+
from .objects import ObjectStore
|
|
13
|
+
from .refs import RefsManager
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
def _pack_dir(objects_dir: Path) -> Path:
|
|
17
|
+
return objects_dir / "pack"
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
def list_loose_objects(objects_dir: Path) -> Set[str]:
|
|
21
|
+
"""List all loose object hashes (blob, tree, commit, tag)."""
|
|
22
|
+
hashes = set()
|
|
23
|
+
for obj_type in ["blob", "tree", "commit", "tag"]:
|
|
24
|
+
type_dir = objects_dir / obj_type
|
|
25
|
+
if not type_dir.exists():
|
|
26
|
+
continue
|
|
27
|
+
for prefix_dir in type_dir.iterdir():
|
|
28
|
+
if not prefix_dir.is_dir():
|
|
29
|
+
continue
|
|
30
|
+
for f in prefix_dir.iterdir():
|
|
31
|
+
hash_id = prefix_dir.name + f.name
|
|
32
|
+
hashes.add(hash_id)
|
|
33
|
+
return hashes
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
def reachable_from_refs(mem_dir: Path, store: ObjectStore, gc_prune_days: int = 90) -> Set[str]:
|
|
37
|
+
"""Collect all object hashes reachable from branches, tags, and reflog (within prune window)."""
|
|
38
|
+
refs = RefsManager(mem_dir)
|
|
39
|
+
reachable = set()
|
|
40
|
+
# Branch tips
|
|
41
|
+
for b in refs.list_branches():
|
|
42
|
+
ch = refs.get_branch_commit(b)
|
|
43
|
+
if ch:
|
|
44
|
+
reachable.update(_collect_from_commit(store, ch))
|
|
45
|
+
# Tags
|
|
46
|
+
for t in refs.list_tags():
|
|
47
|
+
ch = refs.get_tag_commit(t)
|
|
48
|
+
if ch:
|
|
49
|
+
reachable.update(_collect_from_commit(store, ch))
|
|
50
|
+
# Reflog (simplified: just HEAD recent)
|
|
51
|
+
try:
|
|
52
|
+
log = refs.get_reflog("HEAD", max_count=1000)
|
|
53
|
+
for e in log:
|
|
54
|
+
h = e.get("hash")
|
|
55
|
+
if h:
|
|
56
|
+
reachable.update(_collect_from_commit(store, h))
|
|
57
|
+
except Exception:
|
|
58
|
+
pass
|
|
59
|
+
return reachable
|
|
60
|
+
|
|
61
|
+
|
|
62
|
+
def _collect_from_commit(store: ObjectStore, commit_hash: str) -> Set[str]:
|
|
63
|
+
"""Collect all object hashes reachable from a commit."""
|
|
64
|
+
from .remote import _collect_objects_from_commit
|
|
65
|
+
|
|
66
|
+
return _collect_objects_from_commit(store, commit_hash)
|
|
67
|
+
|
|
68
|
+
|
|
69
|
+
def run_gc(
|
|
70
|
+
mem_dir: Path, store: ObjectStore, gc_prune_days: int = 90, dry_run: bool = False
|
|
71
|
+
) -> Tuple[int, int]:
|
|
72
|
+
"""
|
|
73
|
+
Garbage collect: delete unreachable loose objects.
|
|
74
|
+
Returns (deleted_count, bytes_freed). dry_run: only report, do not delete.
|
|
75
|
+
"""
|
|
76
|
+
loose = list_loose_objects(mem_dir / "objects")
|
|
77
|
+
reachable = reachable_from_refs(mem_dir, store, gc_prune_days)
|
|
78
|
+
to_delete = loose - reachable
|
|
79
|
+
freed = 0
|
|
80
|
+
for hash_id in to_delete:
|
|
81
|
+
# Resolve type from path
|
|
82
|
+
for obj_type in ["blob", "tree", "commit", "tag"]:
|
|
83
|
+
p = store.objects_dir / obj_type / hash_id[:2] / hash_id[2:]
|
|
84
|
+
if p.exists():
|
|
85
|
+
if not dry_run:
|
|
86
|
+
size = p.stat().st_size
|
|
87
|
+
p.unlink()
|
|
88
|
+
freed += size
|
|
89
|
+
else:
|
|
90
|
+
freed += p.stat().st_size
|
|
91
|
+
break
|
|
92
|
+
return (len(to_delete), freed)
|