memgit 0.1.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
memgit/repo.py ADDED
@@ -0,0 +1,714 @@
1
+ """Repository operations — init, add, commit, log, diff, show, squash, flat-export."""
2
+
3
+ from __future__ import annotations
4
+ import os
5
+ import subprocess
6
+ import tomllib
7
+ import uuid
8
+ from datetime import datetime, timedelta, timezone
9
+ from pathlib import Path
10
+ from typing import Optional
11
+
12
+ from .models import (
13
+ Checkpoint, DiffSummary, MindState, MindStateEntry, Mnemonic, Thread,
14
+ )
15
+ from .store import ObjectStore
16
+
17
+
18
+ class Repository:
19
+ """A memgit repository rooted at `.memgit/`."""
20
+
21
+ def __init__(self, memgit_dir: Path):
22
+ self.path = memgit_dir
23
+ self.store = ObjectStore(memgit_dir)
24
+
25
+ # ── Discovery ─────────────────────────────────────────────────────────────
26
+
27
+ @classmethod
28
+ def find(cls, start: Path = None) -> Optional['Repository']:
29
+ """Walk up from start looking for a `.memgit/` directory."""
30
+ current = Path(start or Path.cwd())
31
+ while True:
32
+ candidate = current / '.memgit'
33
+ if candidate.is_dir():
34
+ return cls(candidate)
35
+ parent = current.parent
36
+ if parent == current:
37
+ return None
38
+ current = parent
39
+
40
+ # ── Init ──────────────────────────────────────────────────────────────────
41
+
42
+ @classmethod
43
+ def init(cls, project_dir: Path) -> 'Repository':
44
+ """Initialize a new repository in `project_dir`."""
45
+ memgit = project_dir / '.memgit'
46
+ for sub in ['objects', 'refs/threads', 'refs/tags', 'logs/threads']:
47
+ (memgit / sub).mkdir(parents=True, exist_ok=True)
48
+
49
+ (memgit / 'HEAD').write_text('threads/main\n')
50
+ _write_config(memgit / 'config', {
51
+ 'core': {'author': os.environ.get('USER', 'unknown'), 'version': 1},
52
+ 'thread': {'default': 'main'},
53
+ })
54
+
55
+ repo = cls(memgit)
56
+
57
+ # Create flat memories directory for git-native sync
58
+ (project_dir / 'memories').mkdir(exist_ok=True)
59
+
60
+ # Root checkpoint with empty MindState
61
+ now = datetime.now(timezone.utc)
62
+ ms = MindState(timestamp=now, entries=[])
63
+ ms_sha = repo.store.write_mindstate(ms)
64
+
65
+ ck = Checkpoint(
66
+ mindstate_sha=ms_sha,
67
+ timestamp=now,
68
+ trigger='explicit',
69
+ message='Initial checkpoint',
70
+ author=_env_author(),
71
+ session_id=str(uuid.uuid4()),
72
+ parent_sha=None,
73
+ diff_summary=DiffSummary(),
74
+ )
75
+ ck_sha = repo.store.write_checkpoint(ck)
76
+ repo._set_ref('main', ck_sha)
77
+ repo._write_index({})
78
+ return repo
79
+
80
+ # ── Thread / HEAD ─────────────────────────────────────────────────────────
81
+
82
+ def current_thread(self) -> str:
83
+ head = (self.path / 'HEAD').read_text().strip()
84
+ return head.removeprefix('threads/')
85
+
86
+ def head_sha(self, thread: str = None) -> Optional[str]:
87
+ ref = self.path / 'refs' / 'threads' / (thread or self.current_thread())
88
+ return ref.read_text().strip() if ref.exists() else None
89
+
90
+ def _set_ref(self, thread: str, sha: str):
91
+ ref = self.path / 'refs' / 'threads' / thread
92
+ ref.parent.mkdir(parents=True, exist_ok=True)
93
+ ref.write_text(sha + '\n')
94
+ log = self.path / 'logs' / 'threads' / thread
95
+ log.parent.mkdir(parents=True, exist_ok=True)
96
+ ts = datetime.now(timezone.utc).isoformat()
97
+ with open(log, 'a') as f:
98
+ f.write(f'{ts} {sha}\n')
99
+
100
+ # ── TOON_INDEX (staging area) ─────────────────────────────────────────────
101
+
102
+ def get_index(self) -> dict[str, str]:
103
+ """Load TOON_INDEX → {slug: mnem_sha}."""
104
+ idx_path = self.path / 'TOON_INDEX'
105
+ if not idx_path.exists():
106
+ return {}
107
+ result: dict[str, str] = {}
108
+ for line in idx_path.read_text().splitlines():
109
+ line = line.strip()
110
+ if not line or line.startswith('#'):
111
+ continue
112
+ parts = line.split()
113
+ if len(parts) == 2:
114
+ result[parts[0]] = parts[1]
115
+ return result
116
+
117
+ def _write_index(self, index: dict[str, str]):
118
+ thread = self.current_thread()
119
+ head = self.head_sha() or 'none'
120
+ lines = [
121
+ '# TOON_INDEX v1',
122
+ f'# thread: {thread}',
123
+ f'# checkpoint: {head}',
124
+ ]
125
+ for slug in sorted(index):
126
+ lines.append(f'{slug} {index[slug]}')
127
+ (self.path / 'TOON_INDEX').write_text('\n'.join(lines) + '\n')
128
+
129
+ def _rebuild_index(self):
130
+ """Rebuild TOON_INDEX from the HEAD checkpoint's MindState."""
131
+ sha = self.head_sha()
132
+ if not sha:
133
+ self._write_index({})
134
+ return
135
+ ck = self.store.read_checkpoint(sha)
136
+ ms = self.store.read_mindstate(ck.mindstate_sha)
137
+ self._write_index({e.slug: e.mnem_sha for e in ms.entries})
138
+
139
+ # ── Mnemonic operations ───────────────────────────────────────────────────
140
+
141
+ def add(self, m: Mnemonic) -> str:
142
+ """Write a mnemonic and stage it in the index. Returns SHA."""
143
+ sha = self.store.write_mnemonic(m)
144
+ index = self.get_index()
145
+ index[m.slug] = sha
146
+ self._write_index(index)
147
+ return sha
148
+
149
+ def remove(self, slug: str) -> bool:
150
+ """Remove a slug from the index (does not delete objects). Returns True if it existed."""
151
+ index = self.get_index()
152
+ if slug not in index:
153
+ return False
154
+ del index[slug]
155
+ self._write_index(index)
156
+ return True
157
+
158
+ def get(self, slug: str) -> Optional[Mnemonic]:
159
+ index = self.get_index()
160
+ sha = index.get(slug)
161
+ return self.store.read_mnemonic(sha) if sha else None
162
+
163
+ def list(self) -> list[Mnemonic]:
164
+ index = self.get_index()
165
+ result = []
166
+ for slug, sha in index.items():
167
+ try:
168
+ result.append(self.store.read_mnemonic(sha))
169
+ except Exception:
170
+ pass
171
+ return result
172
+
173
+ # ── Commit ────────────────────────────────────────────────────────────────
174
+
175
+ def commit(self, message: str = None, trigger: str = 'explicit') -> Optional[str]:
176
+ """Checkpoint the current index. Returns checkpoint SHA or None if no changes."""
177
+ now = datetime.now(timezone.utc)
178
+ index = self.get_index()
179
+
180
+ entries = [MindStateEntry(slug=s, mnem_sha=h) for s, h in index.items()]
181
+ new_ms = MindState(timestamp=now, entries=entries)
182
+ new_ms_sha = self.store.mindstate_sha(new_ms)
183
+
184
+ # Compare against HEAD
185
+ head = self.head_sha()
186
+ if head:
187
+ old_ck = self.store.read_checkpoint(head)
188
+ if old_ck.mindstate_sha == new_ms_sha:
189
+ return None # nothing changed
190
+ old_ms = self.store.read_mindstate(old_ck.mindstate_sha)
191
+ else:
192
+ old_ms = MindState(timestamp=now, entries=[])
193
+
194
+ self.store.write_mindstate(new_ms)
195
+
196
+ # Compute diff
197
+ old_map = {e.slug: e.mnem_sha for e in old_ms.entries}
198
+ new_map = {e.slug: e.mnem_sha for e in new_ms.entries}
199
+ diff = DiffSummary(
200
+ added=[s for s in new_map if s not in old_map],
201
+ removed=[s for s in old_map if s not in new_map],
202
+ modified=[s for s in old_map if s in new_map and old_map[s] != new_map[s]],
203
+ unchanged=[s for s in old_map if s in new_map and old_map[s] == new_map[s]],
204
+ )
205
+
206
+ if message is None:
207
+ parts = []
208
+ if diff.added:
209
+ sample = ', '.join(diff.added[:3])
210
+ suffix = '...' if len(diff.added) > 3 else ''
211
+ parts.append(f'Added {len(diff.added)}: {sample}{suffix}')
212
+ if diff.modified:
213
+ sample = ', '.join(diff.modified[:3])
214
+ suffix = '...' if len(diff.modified) > 3 else ''
215
+ parts.append(f'Updated {len(diff.modified)}: {sample}{suffix}')
216
+ if diff.removed:
217
+ sample = ', '.join(diff.removed[:3])
218
+ suffix = '...' if len(diff.removed) > 3 else ''
219
+ parts.append(f'Removed {len(diff.removed)}: {sample}{suffix}')
220
+ message = '; '.join(parts) or 'No changes'
221
+
222
+ ck = Checkpoint(
223
+ mindstate_sha=new_ms_sha,
224
+ timestamp=now,
225
+ trigger=trigger,
226
+ message=message,
227
+ author=self._author(),
228
+ session_id=str(uuid.uuid4()),
229
+ parent_sha=head,
230
+ diff_summary=diff,
231
+ )
232
+ ck_sha = self.store.write_checkpoint(ck)
233
+ self._set_ref(self.current_thread(), ck_sha)
234
+ self._rebuild_index()
235
+ return ck_sha
236
+
237
+ # ── History ───────────────────────────────────────────────────────────────
238
+
239
+ def log(self, limit: int = 10, thread: str = None) -> list[Checkpoint]:
240
+ """Return checkpoint chain from HEAD, newest first."""
241
+ result = []
242
+ sha = self.head_sha(thread)
243
+ while sha and len(result) < limit:
244
+ try:
245
+ ck = self.store.read_checkpoint(sha)
246
+ except Exception:
247
+ break
248
+ result.append(ck)
249
+ sha = ck.parent_sha
250
+ return result
251
+
252
+ def diff(self, sha1: str = None, sha2: str = None) -> DiffSummary:
253
+ """Diff two checkpoints. Defaults to HEAD^ → HEAD."""
254
+ sha2 = sha2 or self.head_sha()
255
+ if sha2 is None:
256
+ return DiffSummary()
257
+
258
+ if sha1 is None:
259
+ ck2 = self.store.read_checkpoint(sha2)
260
+ sha1 = ck2.parent_sha
261
+
262
+ if sha1 is None:
263
+ old_ms = MindState(timestamp=datetime.now(timezone.utc), entries=[])
264
+ else:
265
+ ck1 = self.store.read_checkpoint(sha1)
266
+ old_ms = self.store.read_mindstate(ck1.mindstate_sha)
267
+
268
+ ck2 = self.store.read_checkpoint(sha2)
269
+ new_ms = self.store.read_mindstate(ck2.mindstate_sha)
270
+
271
+ old_map = {e.slug: e.mnem_sha for e in old_ms.entries}
272
+ new_map = {e.slug: e.mnem_sha for e in new_ms.entries}
273
+
274
+ return DiffSummary(
275
+ added=[s for s in new_map if s not in old_map],
276
+ removed=[s for s in old_map if s not in new_map],
277
+ modified=[s for s in old_map if s in new_map and old_map[s] != new_map[s]],
278
+ unchanged=[s for s in old_map if s in new_map and old_map[s] == new_map[s]],
279
+ )
280
+
281
+ def diff_full(self, sha1: str = None, sha2: str = None) -> list[tuple[str, str, Optional[Mnemonic], Optional[Mnemonic]]]:
282
+ """Return detailed diff: list of (slug, status, old_mnem, new_mnem).
283
+
284
+ status: 'added' | 'removed' | 'modified' | 'unchanged'
285
+ """
286
+ sha2 = sha2 or self.head_sha()
287
+ if sha2 is None:
288
+ return []
289
+
290
+ if sha1 is None:
291
+ ck2 = self.store.read_checkpoint(sha2)
292
+ sha1 = ck2.parent_sha
293
+
294
+ if sha1 is None:
295
+ old_ms = MindState(timestamp=datetime.now(timezone.utc), entries=[])
296
+ else:
297
+ ck1 = self.store.read_checkpoint(sha1)
298
+ old_ms = self.store.read_mindstate(ck1.mindstate_sha)
299
+
300
+ ck2 = self.store.read_checkpoint(sha2)
301
+ new_ms = self.store.read_mindstate(ck2.mindstate_sha)
302
+
303
+ old_map = {e.slug: e.mnem_sha for e in old_ms.entries}
304
+ new_map = {e.slug: e.mnem_sha for e in new_ms.entries}
305
+ all_slugs = sorted(set(old_map) | set(new_map))
306
+
307
+ result = []
308
+ for slug in all_slugs:
309
+ old_sha = old_map.get(slug)
310
+ new_sha = new_map.get(slug)
311
+ old_m = self.store.read_mnemonic(old_sha) if old_sha else None
312
+ new_m = self.store.read_mnemonic(new_sha) if new_sha else None
313
+
314
+ if old_sha is None:
315
+ status = 'added'
316
+ elif new_sha is None:
317
+ status = 'removed'
318
+ elif old_sha != new_sha:
319
+ status = 'modified'
320
+ else:
321
+ status = 'unchanged'
322
+ result.append((slug, status, old_m, new_m))
323
+
324
+ return result
325
+
326
+ # ── Thread management ─────────────────────────────────────────────────────
327
+
328
+ def thread_create(self, name: str, description: str = '') -> Thread:
329
+ head = self.head_sha()
330
+ if head is None:
331
+ raise ValueError('No HEAD checkpoint to branch from')
332
+ self._set_ref(name, head)
333
+ ck = self.store.read_checkpoint(head)
334
+ return Thread(name=name, head_sha=head, created_at=ck.timestamp, description=description)
335
+
336
+ def thread_list(self) -> list[Thread]:
337
+ threads_dir = self.path / 'refs' / 'threads'
338
+ result = []
339
+ for p in threads_dir.rglob('*'):
340
+ if p.is_file():
341
+ name = str(p.relative_to(threads_dir))
342
+ sha = p.read_text().strip()
343
+ try:
344
+ ck = self.store.read_checkpoint(sha)
345
+ result.append(Thread(name=name, head_sha=sha, created_at=ck.timestamp))
346
+ except Exception:
347
+ result.append(Thread(name=name, head_sha=sha, created_at=datetime.now(timezone.utc)))
348
+ return result
349
+
350
+ def thread_switch(self, name: str):
351
+ ref = self.path / 'refs' / 'threads' / name
352
+ if not ref.exists():
353
+ raise ValueError(f'Thread {name!r} does not exist')
354
+ (self.path / 'HEAD').write_text(f'threads/{name}\n')
355
+ self._rebuild_index()
356
+
357
+ # ── Integrity ─────────────────────────────────────────────────────────────
358
+
359
+ def fsck(self, rebuild_index: bool = False) -> list[str]:
360
+ """Verify repository integrity. Returns list of error messages."""
361
+ errors = []
362
+ index = self.get_index()
363
+ for slug, sha in index.items():
364
+ if not self.store.exists(sha):
365
+ errors.append(f'MISSING mnemonic object: {slug} → {sha[:8]}')
366
+ head = self.head_sha()
367
+ if head and not self.store.exists(head):
368
+ errors.append(f'MISSING HEAD checkpoint: {head[:8]}')
369
+ if rebuild_index:
370
+ self._rebuild_index()
371
+ return errors
372
+
373
+ # ── Flat memories/ directory (git-native sync) ────────────────────────────
374
+
375
+ @property
376
+ def memories_dir(self) -> Path:
377
+ return self.path.parent / 'memories'
378
+
379
+ def write_flat(self):
380
+ """Write every indexed memory as a readable .toon file under memories/.
381
+
382
+ This is the git sync surface — users can `git push` this directory to
383
+ share memories across machines and teammates. Each file is human-readable
384
+ and diffable with standard git tools.
385
+ """
386
+ from .toon import serialize_mnemonic
387
+ mdir = self.memories_dir
388
+ mdir.mkdir(exist_ok=True)
389
+
390
+ index = self.get_index()
391
+ current_slugs: set[str] = set()
392
+
393
+ for slug, sha in index.items():
394
+ try:
395
+ m = self.store.read_mnemonic(sha)
396
+ toon_text = serialize_mnemonic(m)
397
+ (mdir / f'{slug}.toon').write_text(toon_text + '\n')
398
+ current_slugs.add(slug)
399
+ except Exception:
400
+ pass
401
+
402
+ # Remove stale files for deleted memories
403
+ for f in mdir.glob('*.toon'):
404
+ if f.stem not in current_slugs:
405
+ f.unlink()
406
+
407
+ def import_flat(self) -> int:
408
+ """Import memories from memories/ flat files into the object store.
409
+
410
+ Used after a `git pull` to absorb teammate memory updates.
411
+ Returns the number of memories imported.
412
+ """
413
+ from .toon import parse_toon
414
+ from .models import Mnemonic as MnemType
415
+ mdir = self.memories_dir
416
+ if not mdir.exists():
417
+ return 0
418
+ count = 0
419
+ for f in sorted(mdir.glob('*.toon')):
420
+ try:
421
+ objs = parse_toon(f.read_text())
422
+ for obj in objs:
423
+ if isinstance(obj, MnemType):
424
+ self.add(obj)
425
+ count += 1
426
+ except Exception:
427
+ pass
428
+ return count
429
+
430
+ # ── Git integration ───────────────────────────────────────────────────────
431
+
432
+ def git_init(self) -> bool:
433
+ """Run `git init` in the store root (parent of .memgit/).
434
+
435
+ Creates a .gitignore that tracks memories/ but ignores the binary
436
+ object blobs. Returns True if successful."""
437
+ store_root = self.path.parent
438
+ gitignore = store_root / '.gitignore'
439
+ if not gitignore.exists():
440
+ gitignore.write_text(
441
+ '# memgit object blobs — large and redundant with memories/\n'
442
+ '.memgit/objects/\n'
443
+ '.memgit/logs/\n'
444
+ '*.pyc\n'
445
+ )
446
+ try:
447
+ if not (store_root / '.git').exists():
448
+ subprocess.run(['git', 'init'], cwd=store_root, check=True,
449
+ capture_output=True)
450
+ return True
451
+ except Exception:
452
+ return False
453
+
454
+ def git_status(self) -> Optional[str]:
455
+ """Return git status output for the store root, or None if not a git repo."""
456
+ store_root = self.path.parent
457
+ if not (store_root / '.git').exists():
458
+ return None
459
+ try:
460
+ r = subprocess.run(['git', 'status', '--short'], cwd=store_root,
461
+ capture_output=True, text=True, check=True)
462
+ return r.stdout.strip()
463
+ except Exception:
464
+ return None
465
+
466
+ def git_push(self, remote: str = 'origin', branch: str = 'main',
467
+ message: str = None) -> tuple[bool, str]:
468
+ """Write flat files then `git add + commit + push`.
469
+
470
+ Returns (success, output_message).
471
+ """
472
+ store_root = self.path.parent
473
+ if not (store_root / '.git').exists():
474
+ return False, 'Not a git repo — run `memgit git init` first'
475
+ self.write_flat()
476
+ head_sha = self.head_sha() or 'none'
477
+ commit_msg = message or f'memgit: checkpoint {head_sha[:8]}'
478
+ try:
479
+ subprocess.run(['git', 'add', 'memories/', '.memgit/refs/'], cwd=store_root,
480
+ check=True, capture_output=True)
481
+ r = subprocess.run(
482
+ ['git', 'diff', '--cached', '--quiet'],
483
+ cwd=store_root, capture_output=True,
484
+ )
485
+ if r.returncode == 0:
486
+ return True, 'Nothing to push (no changes since last git commit)'
487
+ subprocess.run(['git', 'commit', '-m', commit_msg], cwd=store_root,
488
+ check=True, capture_output=True)
489
+ subprocess.run(['git', 'push', '-u', remote, branch], cwd=store_root,
490
+ check=True, capture_output=True)
491
+ return True, f'Pushed to {remote}/{branch}'
492
+ except subprocess.CalledProcessError as e:
493
+ return False, e.stderr.decode() if e.stderr else str(e)
494
+
495
+ def git_pull(self, remote: str = 'origin', branch: str = 'main') -> tuple[bool, str, int]:
496
+ """Pull from git remote then import flat files.
497
+
498
+ Returns (success, message, memories_imported).
499
+ """
500
+ store_root = self.path.parent
501
+ if not (store_root / '.git').exists():
502
+ return False, 'Not a git repo', 0
503
+ try:
504
+ subprocess.run(['git', 'pull', remote, branch], cwd=store_root,
505
+ check=True, capture_output=True)
506
+ count = self.import_flat()
507
+ if count > 0:
508
+ sha = self.commit(f'pull: imported {count} memories from {remote}/{branch}')
509
+ return True, f'Pulled {count} memories', count
510
+ return True, 'Already up to date', 0
511
+ except subprocess.CalledProcessError as e:
512
+ return False, e.stderr.decode() if e.stderr else str(e), 0
513
+
514
+ # ── Squash (scale to 10k+ commits) ───────────────────────────────────────
515
+
516
+ def squash(
517
+ self,
518
+ keep_last: int = None,
519
+ older_than_days: int = None,
520
+ dry_run: bool = False,
521
+ ) -> dict:
522
+ """Squash old checkpoints into a single baseline checkpoint.
523
+
524
+ Like `git rebase -i --autosquash`, but for memory history. Keeps the
525
+ full current memory state; collapses old checkpoint metadata.
526
+
527
+ Args:
528
+ keep_last: Keep this many recent checkpoints; squash the rest.
529
+ older_than_days: Squash all checkpoints older than N days.
530
+ dry_run: Preview only, no changes.
531
+
532
+ Returns dict with squash summary.
533
+ """
534
+ all_cks = self.log(limit=10_000)
535
+ if len(all_cks) < 3:
536
+ return {'squashed': 0, 'kept': len(all_cks), 'dry_run': dry_run}
537
+
538
+ # Determine the cut point
539
+ now = datetime.now(timezone.utc)
540
+ cut_idx = None
541
+
542
+ if keep_last is not None:
543
+ cut_idx = keep_last
544
+ elif older_than_days is not None:
545
+ cutoff = now - timedelta(days=older_than_days)
546
+ for i, ck in enumerate(all_cks):
547
+ if ck.timestamp < cutoff:
548
+ cut_idx = i
549
+ break
550
+ else:
551
+ cut_idx = max(1, len(all_cks) // 2) # default: halve history
552
+
553
+ if cut_idx is None or cut_idx >= len(all_cks):
554
+ return {'squashed': 0, 'kept': len(all_cks), 'dry_run': dry_run}
555
+
556
+ kept_cks = all_cks[:cut_idx] # newest N checkpoints, keep as-is
557
+ squashed_cks = all_cks[cut_idx:] # older ones, collapse to one baseline
558
+
559
+ baseline_ck = squashed_cks[0] # oldest of the squashed set = the baseline
560
+
561
+ summary = {
562
+ 'kept': len(kept_cks),
563
+ 'squashed': len(squashed_cks),
564
+ 'baseline_sha': baseline_ck.sha[:8] if baseline_ck.sha else '?',
565
+ 'baseline_ts': baseline_ck.timestamp.strftime('%Y-%m-%d'),
566
+ 'dry_run': dry_run,
567
+ }
568
+
569
+ if dry_run:
570
+ return summary
571
+
572
+ # Step 1: Write a new "squash root" checkpoint that has no parent —
573
+ # this is the baseline. It carries the MindState from the oldest squashed
574
+ # checkpoint so the memory content at that point in time is preserved.
575
+ squash_root = Checkpoint(
576
+ mindstate_sha=baseline_ck.mindstate_sha,
577
+ timestamp=baseline_ck.timestamp,
578
+ trigger='squash',
579
+ message=f'squash root: {len(squashed_cks)} older checkpoints collapsed',
580
+ author=baseline_ck.author,
581
+ session_id=baseline_ck.session_id,
582
+ parent_sha=None, # no parent — this is the new root
583
+ diff_summary=DiffSummary(),
584
+ )
585
+ squash_root_sha = self.store.write_checkpoint(squash_root)
586
+
587
+ # Step 2: Rewrite kept chain so oldest_kept.parent → squash_root
588
+ oldest_kept = kept_cks[-1]
589
+ remap: dict[str, str] = {}
590
+
591
+ rewritten_oldest = Checkpoint(
592
+ mindstate_sha=oldest_kept.mindstate_sha,
593
+ timestamp=oldest_kept.timestamp,
594
+ trigger=oldest_kept.trigger,
595
+ message=f'(squashed {len(squashed_cks)} older checkpoints) {oldest_kept.message}',
596
+ author=oldest_kept.author,
597
+ session_id=oldest_kept.session_id,
598
+ parent_sha=squash_root_sha,
599
+ diff_summary=oldest_kept.diff_summary,
600
+ )
601
+ remap[oldest_kept.sha] = self.store.write_checkpoint(rewritten_oldest)
602
+
603
+ # Step 3: Walk newer checkpoints, updating each parent pointer
604
+ for ck in reversed(kept_cks[:-1]):
605
+ parent = remap.get(ck.parent_sha, ck.parent_sha)
606
+ updated = Checkpoint(
607
+ mindstate_sha=ck.mindstate_sha,
608
+ timestamp=ck.timestamp,
609
+ trigger=ck.trigger,
610
+ message=ck.message,
611
+ author=ck.author,
612
+ session_id=ck.session_id,
613
+ parent_sha=parent,
614
+ diff_summary=ck.diff_summary,
615
+ )
616
+ remap[ck.sha] = self.store.write_checkpoint(updated)
617
+
618
+ # Step 4: Update HEAD
619
+ newest_kept_sha = remap.get(kept_cks[0].sha, kept_cks[0].sha)
620
+ self._set_ref(self.current_thread(), newest_kept_sha)
621
+ self._rebuild_index()
622
+
623
+ summary['new_head'] = newest_kept_sha[:8]
624
+ return summary
625
+
626
+ # ── Stats ─────────────────────────────────────────────────────────────────
627
+
628
+ def stats(self) -> dict:
629
+ """Compute token-savings statistics for the memory store."""
630
+ from .tokens import all_memories_tokens, memory_tokens, token_cost_usd
631
+ from .scorer import score as bm25_score
632
+
633
+ mnemonics = self.list()
634
+ if not mnemonics:
635
+ return {'total': 0}
636
+
637
+ full_tokens = all_memories_tokens(mnemonics)
638
+ avg_mem_tokens = full_tokens / len(mnemonics) if mnemonics else 0
639
+
640
+ # Simulate a typical search: top-8 results
641
+ sample_queries = [
642
+ 'how should I approach this', 'project rules and conventions',
643
+ 'user preferences', 'what to avoid', 'lessons learned',
644
+ ]
645
+ search_token_samples = []
646
+ for q in sample_queries:
647
+ results = bm25_score(q, mnemonics, top_k=8)
648
+ toks = sum(memory_tokens(r.mnemonic) for r in results)
649
+ search_token_samples.append(toks)
650
+ avg_search_tokens = round(sum(search_token_samples) / len(search_token_samples)) if search_token_samples else 0
651
+
652
+ critical = [m for m in mnemonics if m.priority == 3]
653
+ critical_tokens = sum(memory_tokens(m) for m in critical)
654
+
655
+ by_type: dict[str, int] = {}
656
+ for m in mnemonics:
657
+ by_type[m.type_code] = by_type.get(m.type_code, 0) + 1
658
+
659
+ checkpoints = self.log(limit=10_000)
660
+
661
+ return {
662
+ 'total': len(mnemonics),
663
+ 'by_type': by_type,
664
+ 'priority_counts': {
665
+ 3: sum(1 for m in mnemonics if m.priority == 3),
666
+ 2: sum(1 for m in mnemonics if m.priority == 2),
667
+ 1: sum(1 for m in mnemonics if m.priority == 1),
668
+ },
669
+ 'full_tokens': full_tokens,
670
+ 'avg_mem_tokens': round(avg_mem_tokens),
671
+ 'avg_search_tokens': avg_search_tokens,
672
+ 'critical_tokens': critical_tokens,
673
+ 'reduction_pct': round(100 * (1 - avg_search_tokens / full_tokens)) if full_tokens else 0,
674
+ 'weekly_savings_tokens': (full_tokens - avg_search_tokens) * 10, # 10 sessions/week
675
+ 'weekly_savings_usd': round(token_cost_usd((full_tokens - avg_search_tokens) * 10), 4),
676
+ 'checkpoint_count': len(checkpoints),
677
+ 'first_checkpoint_ts': checkpoints[-1].timestamp if checkpoints else None,
678
+ 'last_checkpoint_ts': checkpoints[0].timestamp if checkpoints else None,
679
+ }
680
+
681
+ # ── Internal ──────────────────────────────────────────────────────────────
682
+
683
+ def _author(self) -> str:
684
+ try:
685
+ cfg = _read_config(self.path / 'config')
686
+ return cfg.get('core', {}).get('author', _env_author())
687
+ except Exception:
688
+ return _env_author()
689
+
690
+
691
+ # ── Config helpers ────────────────────────────────────────────────────────────
692
+
693
+ def _env_author() -> str:
694
+ return os.environ.get('USER', os.environ.get('USERNAME', 'unknown'))
695
+
696
+
697
+ def _write_config(path: Path, data: dict):
698
+ lines = []
699
+ for section, values in data.items():
700
+ lines.append(f'[{section}]')
701
+ for k, v in values.items():
702
+ if isinstance(v, str):
703
+ lines.append(f'{k} = "{v}"')
704
+ elif isinstance(v, (int, float)):
705
+ lines.append(f'{k} = {v}')
706
+ elif isinstance(v, bool):
707
+ lines.append(f'{k} = {"true" if v else "false"}')
708
+ lines.append('')
709
+ path.write_text('\n'.join(lines))
710
+
711
+
712
+ def _read_config(path: Path) -> dict:
713
+ with open(path, 'rb') as f:
714
+ return tomllib.load(f)