memgit 0.1.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- memgit/__init__.py +3 -0
- memgit/cli.py +1267 -0
- memgit/graph.py +486 -0
- memgit/http_server.py +231 -0
- memgit/importer.py +121 -0
- memgit/mcp_server.py +418 -0
- memgit/models.py +80 -0
- memgit/repo.py +714 -0
- memgit/scorer.py +123 -0
- memgit/store.py +176 -0
- memgit/tokens.py +48 -0
- memgit/toon.py +356 -0
- memgit-0.1.1.dist-info/METADATA +457 -0
- memgit-0.1.1.dist-info/RECORD +18 -0
- memgit-0.1.1.dist-info/WHEEL +5 -0
- memgit-0.1.1.dist-info/entry_points.txt +2 -0
- memgit-0.1.1.dist-info/licenses/LICENSE +21 -0
- memgit-0.1.1.dist-info/top_level.txt +1 -0
memgit/repo.py
ADDED
|
@@ -0,0 +1,714 @@
|
|
|
1
|
+
"""Repository operations — init, add, commit, log, diff, show, squash, flat-export."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
import os
|
|
5
|
+
import subprocess
|
|
6
|
+
import tomllib
|
|
7
|
+
import uuid
|
|
8
|
+
from datetime import datetime, timedelta, timezone
|
|
9
|
+
from pathlib import Path
|
|
10
|
+
from typing import Optional
|
|
11
|
+
|
|
12
|
+
from .models import (
|
|
13
|
+
Checkpoint, DiffSummary, MindState, MindStateEntry, Mnemonic, Thread,
|
|
14
|
+
)
|
|
15
|
+
from .store import ObjectStore
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
class Repository:
|
|
19
|
+
"""A memgit repository rooted at `.memgit/`."""
|
|
20
|
+
|
|
21
|
+
def __init__(self, memgit_dir: Path):
|
|
22
|
+
self.path = memgit_dir
|
|
23
|
+
self.store = ObjectStore(memgit_dir)
|
|
24
|
+
|
|
25
|
+
# ── Discovery ─────────────────────────────────────────────────────────────
|
|
26
|
+
|
|
27
|
+
@classmethod
|
|
28
|
+
def find(cls, start: Path = None) -> Optional['Repository']:
|
|
29
|
+
"""Walk up from start looking for a `.memgit/` directory."""
|
|
30
|
+
current = Path(start or Path.cwd())
|
|
31
|
+
while True:
|
|
32
|
+
candidate = current / '.memgit'
|
|
33
|
+
if candidate.is_dir():
|
|
34
|
+
return cls(candidate)
|
|
35
|
+
parent = current.parent
|
|
36
|
+
if parent == current:
|
|
37
|
+
return None
|
|
38
|
+
current = parent
|
|
39
|
+
|
|
40
|
+
# ── Init ──────────────────────────────────────────────────────────────────
|
|
41
|
+
|
|
42
|
+
@classmethod
|
|
43
|
+
def init(cls, project_dir: Path) -> 'Repository':
|
|
44
|
+
"""Initialize a new repository in `project_dir`."""
|
|
45
|
+
memgit = project_dir / '.memgit'
|
|
46
|
+
for sub in ['objects', 'refs/threads', 'refs/tags', 'logs/threads']:
|
|
47
|
+
(memgit / sub).mkdir(parents=True, exist_ok=True)
|
|
48
|
+
|
|
49
|
+
(memgit / 'HEAD').write_text('threads/main\n')
|
|
50
|
+
_write_config(memgit / 'config', {
|
|
51
|
+
'core': {'author': os.environ.get('USER', 'unknown'), 'version': 1},
|
|
52
|
+
'thread': {'default': 'main'},
|
|
53
|
+
})
|
|
54
|
+
|
|
55
|
+
repo = cls(memgit)
|
|
56
|
+
|
|
57
|
+
# Create flat memories directory for git-native sync
|
|
58
|
+
(project_dir / 'memories').mkdir(exist_ok=True)
|
|
59
|
+
|
|
60
|
+
# Root checkpoint with empty MindState
|
|
61
|
+
now = datetime.now(timezone.utc)
|
|
62
|
+
ms = MindState(timestamp=now, entries=[])
|
|
63
|
+
ms_sha = repo.store.write_mindstate(ms)
|
|
64
|
+
|
|
65
|
+
ck = Checkpoint(
|
|
66
|
+
mindstate_sha=ms_sha,
|
|
67
|
+
timestamp=now,
|
|
68
|
+
trigger='explicit',
|
|
69
|
+
message='Initial checkpoint',
|
|
70
|
+
author=_env_author(),
|
|
71
|
+
session_id=str(uuid.uuid4()),
|
|
72
|
+
parent_sha=None,
|
|
73
|
+
diff_summary=DiffSummary(),
|
|
74
|
+
)
|
|
75
|
+
ck_sha = repo.store.write_checkpoint(ck)
|
|
76
|
+
repo._set_ref('main', ck_sha)
|
|
77
|
+
repo._write_index({})
|
|
78
|
+
return repo
|
|
79
|
+
|
|
80
|
+
# ── Thread / HEAD ─────────────────────────────────────────────────────────
|
|
81
|
+
|
|
82
|
+
def current_thread(self) -> str:
|
|
83
|
+
head = (self.path / 'HEAD').read_text().strip()
|
|
84
|
+
return head.removeprefix('threads/')
|
|
85
|
+
|
|
86
|
+
def head_sha(self, thread: str = None) -> Optional[str]:
|
|
87
|
+
ref = self.path / 'refs' / 'threads' / (thread or self.current_thread())
|
|
88
|
+
return ref.read_text().strip() if ref.exists() else None
|
|
89
|
+
|
|
90
|
+
def _set_ref(self, thread: str, sha: str):
|
|
91
|
+
ref = self.path / 'refs' / 'threads' / thread
|
|
92
|
+
ref.parent.mkdir(parents=True, exist_ok=True)
|
|
93
|
+
ref.write_text(sha + '\n')
|
|
94
|
+
log = self.path / 'logs' / 'threads' / thread
|
|
95
|
+
log.parent.mkdir(parents=True, exist_ok=True)
|
|
96
|
+
ts = datetime.now(timezone.utc).isoformat()
|
|
97
|
+
with open(log, 'a') as f:
|
|
98
|
+
f.write(f'{ts} {sha}\n')
|
|
99
|
+
|
|
100
|
+
# ── TOON_INDEX (staging area) ─────────────────────────────────────────────
|
|
101
|
+
|
|
102
|
+
def get_index(self) -> dict[str, str]:
|
|
103
|
+
"""Load TOON_INDEX → {slug: mnem_sha}."""
|
|
104
|
+
idx_path = self.path / 'TOON_INDEX'
|
|
105
|
+
if not idx_path.exists():
|
|
106
|
+
return {}
|
|
107
|
+
result: dict[str, str] = {}
|
|
108
|
+
for line in idx_path.read_text().splitlines():
|
|
109
|
+
line = line.strip()
|
|
110
|
+
if not line or line.startswith('#'):
|
|
111
|
+
continue
|
|
112
|
+
parts = line.split()
|
|
113
|
+
if len(parts) == 2:
|
|
114
|
+
result[parts[0]] = parts[1]
|
|
115
|
+
return result
|
|
116
|
+
|
|
117
|
+
def _write_index(self, index: dict[str, str]):
|
|
118
|
+
thread = self.current_thread()
|
|
119
|
+
head = self.head_sha() or 'none'
|
|
120
|
+
lines = [
|
|
121
|
+
'# TOON_INDEX v1',
|
|
122
|
+
f'# thread: {thread}',
|
|
123
|
+
f'# checkpoint: {head}',
|
|
124
|
+
]
|
|
125
|
+
for slug in sorted(index):
|
|
126
|
+
lines.append(f'{slug} {index[slug]}')
|
|
127
|
+
(self.path / 'TOON_INDEX').write_text('\n'.join(lines) + '\n')
|
|
128
|
+
|
|
129
|
+
def _rebuild_index(self):
|
|
130
|
+
"""Rebuild TOON_INDEX from the HEAD checkpoint's MindState."""
|
|
131
|
+
sha = self.head_sha()
|
|
132
|
+
if not sha:
|
|
133
|
+
self._write_index({})
|
|
134
|
+
return
|
|
135
|
+
ck = self.store.read_checkpoint(sha)
|
|
136
|
+
ms = self.store.read_mindstate(ck.mindstate_sha)
|
|
137
|
+
self._write_index({e.slug: e.mnem_sha for e in ms.entries})
|
|
138
|
+
|
|
139
|
+
# ── Mnemonic operations ───────────────────────────────────────────────────
|
|
140
|
+
|
|
141
|
+
def add(self, m: Mnemonic) -> str:
|
|
142
|
+
"""Write a mnemonic and stage it in the index. Returns SHA."""
|
|
143
|
+
sha = self.store.write_mnemonic(m)
|
|
144
|
+
index = self.get_index()
|
|
145
|
+
index[m.slug] = sha
|
|
146
|
+
self._write_index(index)
|
|
147
|
+
return sha
|
|
148
|
+
|
|
149
|
+
def remove(self, slug: str) -> bool:
|
|
150
|
+
"""Remove a slug from the index (does not delete objects). Returns True if it existed."""
|
|
151
|
+
index = self.get_index()
|
|
152
|
+
if slug not in index:
|
|
153
|
+
return False
|
|
154
|
+
del index[slug]
|
|
155
|
+
self._write_index(index)
|
|
156
|
+
return True
|
|
157
|
+
|
|
158
|
+
def get(self, slug: str) -> Optional[Mnemonic]:
|
|
159
|
+
index = self.get_index()
|
|
160
|
+
sha = index.get(slug)
|
|
161
|
+
return self.store.read_mnemonic(sha) if sha else None
|
|
162
|
+
|
|
163
|
+
def list(self) -> list[Mnemonic]:
|
|
164
|
+
index = self.get_index()
|
|
165
|
+
result = []
|
|
166
|
+
for slug, sha in index.items():
|
|
167
|
+
try:
|
|
168
|
+
result.append(self.store.read_mnemonic(sha))
|
|
169
|
+
except Exception:
|
|
170
|
+
pass
|
|
171
|
+
return result
|
|
172
|
+
|
|
173
|
+
# ── Commit ────────────────────────────────────────────────────────────────
|
|
174
|
+
|
|
175
|
+
def commit(self, message: str = None, trigger: str = 'explicit') -> Optional[str]:
|
|
176
|
+
"""Checkpoint the current index. Returns checkpoint SHA or None if no changes."""
|
|
177
|
+
now = datetime.now(timezone.utc)
|
|
178
|
+
index = self.get_index()
|
|
179
|
+
|
|
180
|
+
entries = [MindStateEntry(slug=s, mnem_sha=h) for s, h in index.items()]
|
|
181
|
+
new_ms = MindState(timestamp=now, entries=entries)
|
|
182
|
+
new_ms_sha = self.store.mindstate_sha(new_ms)
|
|
183
|
+
|
|
184
|
+
# Compare against HEAD
|
|
185
|
+
head = self.head_sha()
|
|
186
|
+
if head:
|
|
187
|
+
old_ck = self.store.read_checkpoint(head)
|
|
188
|
+
if old_ck.mindstate_sha == new_ms_sha:
|
|
189
|
+
return None # nothing changed
|
|
190
|
+
old_ms = self.store.read_mindstate(old_ck.mindstate_sha)
|
|
191
|
+
else:
|
|
192
|
+
old_ms = MindState(timestamp=now, entries=[])
|
|
193
|
+
|
|
194
|
+
self.store.write_mindstate(new_ms)
|
|
195
|
+
|
|
196
|
+
# Compute diff
|
|
197
|
+
old_map = {e.slug: e.mnem_sha for e in old_ms.entries}
|
|
198
|
+
new_map = {e.slug: e.mnem_sha for e in new_ms.entries}
|
|
199
|
+
diff = DiffSummary(
|
|
200
|
+
added=[s for s in new_map if s not in old_map],
|
|
201
|
+
removed=[s for s in old_map if s not in new_map],
|
|
202
|
+
modified=[s for s in old_map if s in new_map and old_map[s] != new_map[s]],
|
|
203
|
+
unchanged=[s for s in old_map if s in new_map and old_map[s] == new_map[s]],
|
|
204
|
+
)
|
|
205
|
+
|
|
206
|
+
if message is None:
|
|
207
|
+
parts = []
|
|
208
|
+
if diff.added:
|
|
209
|
+
sample = ', '.join(diff.added[:3])
|
|
210
|
+
suffix = '...' if len(diff.added) > 3 else ''
|
|
211
|
+
parts.append(f'Added {len(diff.added)}: {sample}{suffix}')
|
|
212
|
+
if diff.modified:
|
|
213
|
+
sample = ', '.join(diff.modified[:3])
|
|
214
|
+
suffix = '...' if len(diff.modified) > 3 else ''
|
|
215
|
+
parts.append(f'Updated {len(diff.modified)}: {sample}{suffix}')
|
|
216
|
+
if diff.removed:
|
|
217
|
+
sample = ', '.join(diff.removed[:3])
|
|
218
|
+
suffix = '...' if len(diff.removed) > 3 else ''
|
|
219
|
+
parts.append(f'Removed {len(diff.removed)}: {sample}{suffix}')
|
|
220
|
+
message = '; '.join(parts) or 'No changes'
|
|
221
|
+
|
|
222
|
+
ck = Checkpoint(
|
|
223
|
+
mindstate_sha=new_ms_sha,
|
|
224
|
+
timestamp=now,
|
|
225
|
+
trigger=trigger,
|
|
226
|
+
message=message,
|
|
227
|
+
author=self._author(),
|
|
228
|
+
session_id=str(uuid.uuid4()),
|
|
229
|
+
parent_sha=head,
|
|
230
|
+
diff_summary=diff,
|
|
231
|
+
)
|
|
232
|
+
ck_sha = self.store.write_checkpoint(ck)
|
|
233
|
+
self._set_ref(self.current_thread(), ck_sha)
|
|
234
|
+
self._rebuild_index()
|
|
235
|
+
return ck_sha
|
|
236
|
+
|
|
237
|
+
# ── History ───────────────────────────────────────────────────────────────
|
|
238
|
+
|
|
239
|
+
def log(self, limit: int = 10, thread: str = None) -> list[Checkpoint]:
|
|
240
|
+
"""Return checkpoint chain from HEAD, newest first."""
|
|
241
|
+
result = []
|
|
242
|
+
sha = self.head_sha(thread)
|
|
243
|
+
while sha and len(result) < limit:
|
|
244
|
+
try:
|
|
245
|
+
ck = self.store.read_checkpoint(sha)
|
|
246
|
+
except Exception:
|
|
247
|
+
break
|
|
248
|
+
result.append(ck)
|
|
249
|
+
sha = ck.parent_sha
|
|
250
|
+
return result
|
|
251
|
+
|
|
252
|
+
def diff(self, sha1: str = None, sha2: str = None) -> DiffSummary:
|
|
253
|
+
"""Diff two checkpoints. Defaults to HEAD^ → HEAD."""
|
|
254
|
+
sha2 = sha2 or self.head_sha()
|
|
255
|
+
if sha2 is None:
|
|
256
|
+
return DiffSummary()
|
|
257
|
+
|
|
258
|
+
if sha1 is None:
|
|
259
|
+
ck2 = self.store.read_checkpoint(sha2)
|
|
260
|
+
sha1 = ck2.parent_sha
|
|
261
|
+
|
|
262
|
+
if sha1 is None:
|
|
263
|
+
old_ms = MindState(timestamp=datetime.now(timezone.utc), entries=[])
|
|
264
|
+
else:
|
|
265
|
+
ck1 = self.store.read_checkpoint(sha1)
|
|
266
|
+
old_ms = self.store.read_mindstate(ck1.mindstate_sha)
|
|
267
|
+
|
|
268
|
+
ck2 = self.store.read_checkpoint(sha2)
|
|
269
|
+
new_ms = self.store.read_mindstate(ck2.mindstate_sha)
|
|
270
|
+
|
|
271
|
+
old_map = {e.slug: e.mnem_sha for e in old_ms.entries}
|
|
272
|
+
new_map = {e.slug: e.mnem_sha for e in new_ms.entries}
|
|
273
|
+
|
|
274
|
+
return DiffSummary(
|
|
275
|
+
added=[s for s in new_map if s not in old_map],
|
|
276
|
+
removed=[s for s in old_map if s not in new_map],
|
|
277
|
+
modified=[s for s in old_map if s in new_map and old_map[s] != new_map[s]],
|
|
278
|
+
unchanged=[s for s in old_map if s in new_map and old_map[s] == new_map[s]],
|
|
279
|
+
)
|
|
280
|
+
|
|
281
|
+
def diff_full(self, sha1: str = None, sha2: str = None) -> list[tuple[str, str, Optional[Mnemonic], Optional[Mnemonic]]]:
|
|
282
|
+
"""Return detailed diff: list of (slug, status, old_mnem, new_mnem).
|
|
283
|
+
|
|
284
|
+
status: 'added' | 'removed' | 'modified' | 'unchanged'
|
|
285
|
+
"""
|
|
286
|
+
sha2 = sha2 or self.head_sha()
|
|
287
|
+
if sha2 is None:
|
|
288
|
+
return []
|
|
289
|
+
|
|
290
|
+
if sha1 is None:
|
|
291
|
+
ck2 = self.store.read_checkpoint(sha2)
|
|
292
|
+
sha1 = ck2.parent_sha
|
|
293
|
+
|
|
294
|
+
if sha1 is None:
|
|
295
|
+
old_ms = MindState(timestamp=datetime.now(timezone.utc), entries=[])
|
|
296
|
+
else:
|
|
297
|
+
ck1 = self.store.read_checkpoint(sha1)
|
|
298
|
+
old_ms = self.store.read_mindstate(ck1.mindstate_sha)
|
|
299
|
+
|
|
300
|
+
ck2 = self.store.read_checkpoint(sha2)
|
|
301
|
+
new_ms = self.store.read_mindstate(ck2.mindstate_sha)
|
|
302
|
+
|
|
303
|
+
old_map = {e.slug: e.mnem_sha for e in old_ms.entries}
|
|
304
|
+
new_map = {e.slug: e.mnem_sha for e in new_ms.entries}
|
|
305
|
+
all_slugs = sorted(set(old_map) | set(new_map))
|
|
306
|
+
|
|
307
|
+
result = []
|
|
308
|
+
for slug in all_slugs:
|
|
309
|
+
old_sha = old_map.get(slug)
|
|
310
|
+
new_sha = new_map.get(slug)
|
|
311
|
+
old_m = self.store.read_mnemonic(old_sha) if old_sha else None
|
|
312
|
+
new_m = self.store.read_mnemonic(new_sha) if new_sha else None
|
|
313
|
+
|
|
314
|
+
if old_sha is None:
|
|
315
|
+
status = 'added'
|
|
316
|
+
elif new_sha is None:
|
|
317
|
+
status = 'removed'
|
|
318
|
+
elif old_sha != new_sha:
|
|
319
|
+
status = 'modified'
|
|
320
|
+
else:
|
|
321
|
+
status = 'unchanged'
|
|
322
|
+
result.append((slug, status, old_m, new_m))
|
|
323
|
+
|
|
324
|
+
return result
|
|
325
|
+
|
|
326
|
+
# ── Thread management ─────────────────────────────────────────────────────
|
|
327
|
+
|
|
328
|
+
def thread_create(self, name: str, description: str = '') -> Thread:
|
|
329
|
+
head = self.head_sha()
|
|
330
|
+
if head is None:
|
|
331
|
+
raise ValueError('No HEAD checkpoint to branch from')
|
|
332
|
+
self._set_ref(name, head)
|
|
333
|
+
ck = self.store.read_checkpoint(head)
|
|
334
|
+
return Thread(name=name, head_sha=head, created_at=ck.timestamp, description=description)
|
|
335
|
+
|
|
336
|
+
def thread_list(self) -> list[Thread]:
|
|
337
|
+
threads_dir = self.path / 'refs' / 'threads'
|
|
338
|
+
result = []
|
|
339
|
+
for p in threads_dir.rglob('*'):
|
|
340
|
+
if p.is_file():
|
|
341
|
+
name = str(p.relative_to(threads_dir))
|
|
342
|
+
sha = p.read_text().strip()
|
|
343
|
+
try:
|
|
344
|
+
ck = self.store.read_checkpoint(sha)
|
|
345
|
+
result.append(Thread(name=name, head_sha=sha, created_at=ck.timestamp))
|
|
346
|
+
except Exception:
|
|
347
|
+
result.append(Thread(name=name, head_sha=sha, created_at=datetime.now(timezone.utc)))
|
|
348
|
+
return result
|
|
349
|
+
|
|
350
|
+
def thread_switch(self, name: str):
|
|
351
|
+
ref = self.path / 'refs' / 'threads' / name
|
|
352
|
+
if not ref.exists():
|
|
353
|
+
raise ValueError(f'Thread {name!r} does not exist')
|
|
354
|
+
(self.path / 'HEAD').write_text(f'threads/{name}\n')
|
|
355
|
+
self._rebuild_index()
|
|
356
|
+
|
|
357
|
+
# ── Integrity ─────────────────────────────────────────────────────────────
|
|
358
|
+
|
|
359
|
+
def fsck(self, rebuild_index: bool = False) -> list[str]:
|
|
360
|
+
"""Verify repository integrity. Returns list of error messages."""
|
|
361
|
+
errors = []
|
|
362
|
+
index = self.get_index()
|
|
363
|
+
for slug, sha in index.items():
|
|
364
|
+
if not self.store.exists(sha):
|
|
365
|
+
errors.append(f'MISSING mnemonic object: {slug} → {sha[:8]}')
|
|
366
|
+
head = self.head_sha()
|
|
367
|
+
if head and not self.store.exists(head):
|
|
368
|
+
errors.append(f'MISSING HEAD checkpoint: {head[:8]}')
|
|
369
|
+
if rebuild_index:
|
|
370
|
+
self._rebuild_index()
|
|
371
|
+
return errors
|
|
372
|
+
|
|
373
|
+
# ── Flat memories/ directory (git-native sync) ────────────────────────────
|
|
374
|
+
|
|
375
|
+
@property
|
|
376
|
+
def memories_dir(self) -> Path:
|
|
377
|
+
return self.path.parent / 'memories'
|
|
378
|
+
|
|
379
|
+
def write_flat(self):
|
|
380
|
+
"""Write every indexed memory as a readable .toon file under memories/.
|
|
381
|
+
|
|
382
|
+
This is the git sync surface — users can `git push` this directory to
|
|
383
|
+
share memories across machines and teammates. Each file is human-readable
|
|
384
|
+
and diffable with standard git tools.
|
|
385
|
+
"""
|
|
386
|
+
from .toon import serialize_mnemonic
|
|
387
|
+
mdir = self.memories_dir
|
|
388
|
+
mdir.mkdir(exist_ok=True)
|
|
389
|
+
|
|
390
|
+
index = self.get_index()
|
|
391
|
+
current_slugs: set[str] = set()
|
|
392
|
+
|
|
393
|
+
for slug, sha in index.items():
|
|
394
|
+
try:
|
|
395
|
+
m = self.store.read_mnemonic(sha)
|
|
396
|
+
toon_text = serialize_mnemonic(m)
|
|
397
|
+
(mdir / f'{slug}.toon').write_text(toon_text + '\n')
|
|
398
|
+
current_slugs.add(slug)
|
|
399
|
+
except Exception:
|
|
400
|
+
pass
|
|
401
|
+
|
|
402
|
+
# Remove stale files for deleted memories
|
|
403
|
+
for f in mdir.glob('*.toon'):
|
|
404
|
+
if f.stem not in current_slugs:
|
|
405
|
+
f.unlink()
|
|
406
|
+
|
|
407
|
+
def import_flat(self) -> int:
|
|
408
|
+
"""Import memories from memories/ flat files into the object store.
|
|
409
|
+
|
|
410
|
+
Used after a `git pull` to absorb teammate memory updates.
|
|
411
|
+
Returns the number of memories imported.
|
|
412
|
+
"""
|
|
413
|
+
from .toon import parse_toon
|
|
414
|
+
from .models import Mnemonic as MnemType
|
|
415
|
+
mdir = self.memories_dir
|
|
416
|
+
if not mdir.exists():
|
|
417
|
+
return 0
|
|
418
|
+
count = 0
|
|
419
|
+
for f in sorted(mdir.glob('*.toon')):
|
|
420
|
+
try:
|
|
421
|
+
objs = parse_toon(f.read_text())
|
|
422
|
+
for obj in objs:
|
|
423
|
+
if isinstance(obj, MnemType):
|
|
424
|
+
self.add(obj)
|
|
425
|
+
count += 1
|
|
426
|
+
except Exception:
|
|
427
|
+
pass
|
|
428
|
+
return count
|
|
429
|
+
|
|
430
|
+
# ── Git integration ───────────────────────────────────────────────────────
|
|
431
|
+
|
|
432
|
+
def git_init(self) -> bool:
|
|
433
|
+
"""Run `git init` in the store root (parent of .memgit/).
|
|
434
|
+
|
|
435
|
+
Creates a .gitignore that tracks memories/ but ignores the binary
|
|
436
|
+
object blobs. Returns True if successful."""
|
|
437
|
+
store_root = self.path.parent
|
|
438
|
+
gitignore = store_root / '.gitignore'
|
|
439
|
+
if not gitignore.exists():
|
|
440
|
+
gitignore.write_text(
|
|
441
|
+
'# memgit object blobs — large and redundant with memories/\n'
|
|
442
|
+
'.memgit/objects/\n'
|
|
443
|
+
'.memgit/logs/\n'
|
|
444
|
+
'*.pyc\n'
|
|
445
|
+
)
|
|
446
|
+
try:
|
|
447
|
+
if not (store_root / '.git').exists():
|
|
448
|
+
subprocess.run(['git', 'init'], cwd=store_root, check=True,
|
|
449
|
+
capture_output=True)
|
|
450
|
+
return True
|
|
451
|
+
except Exception:
|
|
452
|
+
return False
|
|
453
|
+
|
|
454
|
+
def git_status(self) -> Optional[str]:
|
|
455
|
+
"""Return git status output for the store root, or None if not a git repo."""
|
|
456
|
+
store_root = self.path.parent
|
|
457
|
+
if not (store_root / '.git').exists():
|
|
458
|
+
return None
|
|
459
|
+
try:
|
|
460
|
+
r = subprocess.run(['git', 'status', '--short'], cwd=store_root,
|
|
461
|
+
capture_output=True, text=True, check=True)
|
|
462
|
+
return r.stdout.strip()
|
|
463
|
+
except Exception:
|
|
464
|
+
return None
|
|
465
|
+
|
|
466
|
+
def git_push(self, remote: str = 'origin', branch: str = 'main',
|
|
467
|
+
message: str = None) -> tuple[bool, str]:
|
|
468
|
+
"""Write flat files then `git add + commit + push`.
|
|
469
|
+
|
|
470
|
+
Returns (success, output_message).
|
|
471
|
+
"""
|
|
472
|
+
store_root = self.path.parent
|
|
473
|
+
if not (store_root / '.git').exists():
|
|
474
|
+
return False, 'Not a git repo — run `memgit git init` first'
|
|
475
|
+
self.write_flat()
|
|
476
|
+
head_sha = self.head_sha() or 'none'
|
|
477
|
+
commit_msg = message or f'memgit: checkpoint {head_sha[:8]}'
|
|
478
|
+
try:
|
|
479
|
+
subprocess.run(['git', 'add', 'memories/', '.memgit/refs/'], cwd=store_root,
|
|
480
|
+
check=True, capture_output=True)
|
|
481
|
+
r = subprocess.run(
|
|
482
|
+
['git', 'diff', '--cached', '--quiet'],
|
|
483
|
+
cwd=store_root, capture_output=True,
|
|
484
|
+
)
|
|
485
|
+
if r.returncode == 0:
|
|
486
|
+
return True, 'Nothing to push (no changes since last git commit)'
|
|
487
|
+
subprocess.run(['git', 'commit', '-m', commit_msg], cwd=store_root,
|
|
488
|
+
check=True, capture_output=True)
|
|
489
|
+
subprocess.run(['git', 'push', '-u', remote, branch], cwd=store_root,
|
|
490
|
+
check=True, capture_output=True)
|
|
491
|
+
return True, f'Pushed to {remote}/{branch}'
|
|
492
|
+
except subprocess.CalledProcessError as e:
|
|
493
|
+
return False, e.stderr.decode() if e.stderr else str(e)
|
|
494
|
+
|
|
495
|
+
def git_pull(self, remote: str = 'origin', branch: str = 'main') -> tuple[bool, str, int]:
|
|
496
|
+
"""Pull from git remote then import flat files.
|
|
497
|
+
|
|
498
|
+
Returns (success, message, memories_imported).
|
|
499
|
+
"""
|
|
500
|
+
store_root = self.path.parent
|
|
501
|
+
if not (store_root / '.git').exists():
|
|
502
|
+
return False, 'Not a git repo', 0
|
|
503
|
+
try:
|
|
504
|
+
subprocess.run(['git', 'pull', remote, branch], cwd=store_root,
|
|
505
|
+
check=True, capture_output=True)
|
|
506
|
+
count = self.import_flat()
|
|
507
|
+
if count > 0:
|
|
508
|
+
sha = self.commit(f'pull: imported {count} memories from {remote}/{branch}')
|
|
509
|
+
return True, f'Pulled {count} memories', count
|
|
510
|
+
return True, 'Already up to date', 0
|
|
511
|
+
except subprocess.CalledProcessError as e:
|
|
512
|
+
return False, e.stderr.decode() if e.stderr else str(e), 0
|
|
513
|
+
|
|
514
|
+
# ── Squash (scale to 10k+ commits) ───────────────────────────────────────
|
|
515
|
+
|
|
516
|
+
def squash(
|
|
517
|
+
self,
|
|
518
|
+
keep_last: int = None,
|
|
519
|
+
older_than_days: int = None,
|
|
520
|
+
dry_run: bool = False,
|
|
521
|
+
) -> dict:
|
|
522
|
+
"""Squash old checkpoints into a single baseline checkpoint.
|
|
523
|
+
|
|
524
|
+
Like `git rebase -i --autosquash`, but for memory history. Keeps the
|
|
525
|
+
full current memory state; collapses old checkpoint metadata.
|
|
526
|
+
|
|
527
|
+
Args:
|
|
528
|
+
keep_last: Keep this many recent checkpoints; squash the rest.
|
|
529
|
+
older_than_days: Squash all checkpoints older than N days.
|
|
530
|
+
dry_run: Preview only, no changes.
|
|
531
|
+
|
|
532
|
+
Returns dict with squash summary.
|
|
533
|
+
"""
|
|
534
|
+
all_cks = self.log(limit=10_000)
|
|
535
|
+
if len(all_cks) < 3:
|
|
536
|
+
return {'squashed': 0, 'kept': len(all_cks), 'dry_run': dry_run}
|
|
537
|
+
|
|
538
|
+
# Determine the cut point
|
|
539
|
+
now = datetime.now(timezone.utc)
|
|
540
|
+
cut_idx = None
|
|
541
|
+
|
|
542
|
+
if keep_last is not None:
|
|
543
|
+
cut_idx = keep_last
|
|
544
|
+
elif older_than_days is not None:
|
|
545
|
+
cutoff = now - timedelta(days=older_than_days)
|
|
546
|
+
for i, ck in enumerate(all_cks):
|
|
547
|
+
if ck.timestamp < cutoff:
|
|
548
|
+
cut_idx = i
|
|
549
|
+
break
|
|
550
|
+
else:
|
|
551
|
+
cut_idx = max(1, len(all_cks) // 2) # default: halve history
|
|
552
|
+
|
|
553
|
+
if cut_idx is None or cut_idx >= len(all_cks):
|
|
554
|
+
return {'squashed': 0, 'kept': len(all_cks), 'dry_run': dry_run}
|
|
555
|
+
|
|
556
|
+
kept_cks = all_cks[:cut_idx] # newest N checkpoints, keep as-is
|
|
557
|
+
squashed_cks = all_cks[cut_idx:] # older ones, collapse to one baseline
|
|
558
|
+
|
|
559
|
+
baseline_ck = squashed_cks[0] # oldest of the squashed set = the baseline
|
|
560
|
+
|
|
561
|
+
summary = {
|
|
562
|
+
'kept': len(kept_cks),
|
|
563
|
+
'squashed': len(squashed_cks),
|
|
564
|
+
'baseline_sha': baseline_ck.sha[:8] if baseline_ck.sha else '?',
|
|
565
|
+
'baseline_ts': baseline_ck.timestamp.strftime('%Y-%m-%d'),
|
|
566
|
+
'dry_run': dry_run,
|
|
567
|
+
}
|
|
568
|
+
|
|
569
|
+
if dry_run:
|
|
570
|
+
return summary
|
|
571
|
+
|
|
572
|
+
# Step 1: Write a new "squash root" checkpoint that has no parent —
|
|
573
|
+
# this is the baseline. It carries the MindState from the oldest squashed
|
|
574
|
+
# checkpoint so the memory content at that point in time is preserved.
|
|
575
|
+
squash_root = Checkpoint(
|
|
576
|
+
mindstate_sha=baseline_ck.mindstate_sha,
|
|
577
|
+
timestamp=baseline_ck.timestamp,
|
|
578
|
+
trigger='squash',
|
|
579
|
+
message=f'squash root: {len(squashed_cks)} older checkpoints collapsed',
|
|
580
|
+
author=baseline_ck.author,
|
|
581
|
+
session_id=baseline_ck.session_id,
|
|
582
|
+
parent_sha=None, # no parent — this is the new root
|
|
583
|
+
diff_summary=DiffSummary(),
|
|
584
|
+
)
|
|
585
|
+
squash_root_sha = self.store.write_checkpoint(squash_root)
|
|
586
|
+
|
|
587
|
+
# Step 2: Rewrite kept chain so oldest_kept.parent → squash_root
|
|
588
|
+
oldest_kept = kept_cks[-1]
|
|
589
|
+
remap: dict[str, str] = {}
|
|
590
|
+
|
|
591
|
+
rewritten_oldest = Checkpoint(
|
|
592
|
+
mindstate_sha=oldest_kept.mindstate_sha,
|
|
593
|
+
timestamp=oldest_kept.timestamp,
|
|
594
|
+
trigger=oldest_kept.trigger,
|
|
595
|
+
message=f'(squashed {len(squashed_cks)} older checkpoints) {oldest_kept.message}',
|
|
596
|
+
author=oldest_kept.author,
|
|
597
|
+
session_id=oldest_kept.session_id,
|
|
598
|
+
parent_sha=squash_root_sha,
|
|
599
|
+
diff_summary=oldest_kept.diff_summary,
|
|
600
|
+
)
|
|
601
|
+
remap[oldest_kept.sha] = self.store.write_checkpoint(rewritten_oldest)
|
|
602
|
+
|
|
603
|
+
# Step 3: Walk newer checkpoints, updating each parent pointer
|
|
604
|
+
for ck in reversed(kept_cks[:-1]):
|
|
605
|
+
parent = remap.get(ck.parent_sha, ck.parent_sha)
|
|
606
|
+
updated = Checkpoint(
|
|
607
|
+
mindstate_sha=ck.mindstate_sha,
|
|
608
|
+
timestamp=ck.timestamp,
|
|
609
|
+
trigger=ck.trigger,
|
|
610
|
+
message=ck.message,
|
|
611
|
+
author=ck.author,
|
|
612
|
+
session_id=ck.session_id,
|
|
613
|
+
parent_sha=parent,
|
|
614
|
+
diff_summary=ck.diff_summary,
|
|
615
|
+
)
|
|
616
|
+
remap[ck.sha] = self.store.write_checkpoint(updated)
|
|
617
|
+
|
|
618
|
+
# Step 4: Update HEAD
|
|
619
|
+
newest_kept_sha = remap.get(kept_cks[0].sha, kept_cks[0].sha)
|
|
620
|
+
self._set_ref(self.current_thread(), newest_kept_sha)
|
|
621
|
+
self._rebuild_index()
|
|
622
|
+
|
|
623
|
+
summary['new_head'] = newest_kept_sha[:8]
|
|
624
|
+
return summary
|
|
625
|
+
|
|
626
|
+
# ── Stats ─────────────────────────────────────────────────────────────────
|
|
627
|
+
|
|
628
|
+
def stats(self) -> dict:
|
|
629
|
+
"""Compute token-savings statistics for the memory store."""
|
|
630
|
+
from .tokens import all_memories_tokens, memory_tokens, token_cost_usd
|
|
631
|
+
from .scorer import score as bm25_score
|
|
632
|
+
|
|
633
|
+
mnemonics = self.list()
|
|
634
|
+
if not mnemonics:
|
|
635
|
+
return {'total': 0}
|
|
636
|
+
|
|
637
|
+
full_tokens = all_memories_tokens(mnemonics)
|
|
638
|
+
avg_mem_tokens = full_tokens / len(mnemonics) if mnemonics else 0
|
|
639
|
+
|
|
640
|
+
# Simulate a typical search: top-8 results
|
|
641
|
+
sample_queries = [
|
|
642
|
+
'how should I approach this', 'project rules and conventions',
|
|
643
|
+
'user preferences', 'what to avoid', 'lessons learned',
|
|
644
|
+
]
|
|
645
|
+
search_token_samples = []
|
|
646
|
+
for q in sample_queries:
|
|
647
|
+
results = bm25_score(q, mnemonics, top_k=8)
|
|
648
|
+
toks = sum(memory_tokens(r.mnemonic) for r in results)
|
|
649
|
+
search_token_samples.append(toks)
|
|
650
|
+
avg_search_tokens = round(sum(search_token_samples) / len(search_token_samples)) if search_token_samples else 0
|
|
651
|
+
|
|
652
|
+
critical = [m for m in mnemonics if m.priority == 3]
|
|
653
|
+
critical_tokens = sum(memory_tokens(m) for m in critical)
|
|
654
|
+
|
|
655
|
+
by_type: dict[str, int] = {}
|
|
656
|
+
for m in mnemonics:
|
|
657
|
+
by_type[m.type_code] = by_type.get(m.type_code, 0) + 1
|
|
658
|
+
|
|
659
|
+
checkpoints = self.log(limit=10_000)
|
|
660
|
+
|
|
661
|
+
return {
|
|
662
|
+
'total': len(mnemonics),
|
|
663
|
+
'by_type': by_type,
|
|
664
|
+
'priority_counts': {
|
|
665
|
+
3: sum(1 for m in mnemonics if m.priority == 3),
|
|
666
|
+
2: sum(1 for m in mnemonics if m.priority == 2),
|
|
667
|
+
1: sum(1 for m in mnemonics if m.priority == 1),
|
|
668
|
+
},
|
|
669
|
+
'full_tokens': full_tokens,
|
|
670
|
+
'avg_mem_tokens': round(avg_mem_tokens),
|
|
671
|
+
'avg_search_tokens': avg_search_tokens,
|
|
672
|
+
'critical_tokens': critical_tokens,
|
|
673
|
+
'reduction_pct': round(100 * (1 - avg_search_tokens / full_tokens)) if full_tokens else 0,
|
|
674
|
+
'weekly_savings_tokens': (full_tokens - avg_search_tokens) * 10, # 10 sessions/week
|
|
675
|
+
'weekly_savings_usd': round(token_cost_usd((full_tokens - avg_search_tokens) * 10), 4),
|
|
676
|
+
'checkpoint_count': len(checkpoints),
|
|
677
|
+
'first_checkpoint_ts': checkpoints[-1].timestamp if checkpoints else None,
|
|
678
|
+
'last_checkpoint_ts': checkpoints[0].timestamp if checkpoints else None,
|
|
679
|
+
}
|
|
680
|
+
|
|
681
|
+
# ── Internal ──────────────────────────────────────────────────────────────
|
|
682
|
+
|
|
683
|
+
def _author(self) -> str:
|
|
684
|
+
try:
|
|
685
|
+
cfg = _read_config(self.path / 'config')
|
|
686
|
+
return cfg.get('core', {}).get('author', _env_author())
|
|
687
|
+
except Exception:
|
|
688
|
+
return _env_author()
|
|
689
|
+
|
|
690
|
+
|
|
691
|
+
# ── Config helpers ────────────────────────────────────────────────────────────
|
|
692
|
+
|
|
693
|
+
def _env_author() -> str:
|
|
694
|
+
return os.environ.get('USER', os.environ.get('USERNAME', 'unknown'))
|
|
695
|
+
|
|
696
|
+
|
|
697
|
+
def _write_config(path: Path, data: dict):
|
|
698
|
+
lines = []
|
|
699
|
+
for section, values in data.items():
|
|
700
|
+
lines.append(f'[{section}]')
|
|
701
|
+
for k, v in values.items():
|
|
702
|
+
if isinstance(v, str):
|
|
703
|
+
lines.append(f'{k} = "{v}"')
|
|
704
|
+
elif isinstance(v, (int, float)):
|
|
705
|
+
lines.append(f'{k} = {v}')
|
|
706
|
+
elif isinstance(v, bool):
|
|
707
|
+
lines.append(f'{k} = {"true" if v else "false"}')
|
|
708
|
+
lines.append('')
|
|
709
|
+
path.write_text('\n'.join(lines))
|
|
710
|
+
|
|
711
|
+
|
|
712
|
+
def _read_config(path: Path) -> dict:
|
|
713
|
+
with open(path, 'rb') as f:
|
|
714
|
+
return tomllib.load(f)
|