flurryx-code-memory 0.4.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (53) hide show
  1. code_memory/__init__.py +1 -0
  2. code_memory/claims/__init__.py +32 -0
  3. code_memory/claims/extractor.py +325 -0
  4. code_memory/claims/indexer.py +258 -0
  5. code_memory/claims/resolver.py +186 -0
  6. code_memory/claims/store.py +424 -0
  7. code_memory/cli.py +1192 -0
  8. code_memory/config.py +268 -0
  9. code_memory/embed/__init__.py +224 -0
  10. code_memory/embed/cache.py +204 -0
  11. code_memory/embed/m3.py +174 -0
  12. code_memory/embed/ollama.py +92 -0
  13. code_memory/embed/tei.py +106 -0
  14. code_memory/episodic/__init__.py +3 -0
  15. code_memory/episodic/sqlite_store.py +278 -0
  16. code_memory/extractor/__init__.py +3 -0
  17. code_memory/extractor/csproj.py +166 -0
  18. code_memory/extractor/dll.py +385 -0
  19. code_memory/extractor/gitignore.py +162 -0
  20. code_memory/extractor/nuget.py +275 -0
  21. code_memory/extractor/sanity.py +124 -0
  22. code_memory/extractor/sln.py +108 -0
  23. code_memory/extractor/treesitter.py +1172 -0
  24. code_memory/graph/__init__.py +3 -0
  25. code_memory/graph/falkor_store.py +740 -0
  26. code_memory/mcp_server.py +1816 -0
  27. code_memory/metrics.py +260 -0
  28. code_memory/orchestrator/__init__.py +13 -0
  29. code_memory/orchestrator/git_delta.py +211 -0
  30. code_memory/orchestrator/ingest_state.py +71 -0
  31. code_memory/orchestrator/pipeline.py +1478 -0
  32. code_memory/orchestrator/reset.py +130 -0
  33. code_memory/orchestrator/resolver.py +825 -0
  34. code_memory/orchestrator/retrieve.py +505 -0
  35. code_memory/resilience.py +73 -0
  36. code_memory/sync/__init__.py +20 -0
  37. code_memory/sync/autostart/__init__.py +42 -0
  38. code_memory/sync/autostart/base.py +106 -0
  39. code_memory/sync/autostart/launchd.py +115 -0
  40. code_memory/sync/autostart/schtasks.py +155 -0
  41. code_memory/sync/autostart/systemd.py +113 -0
  42. code_memory/sync/hooks.py +164 -0
  43. code_memory/sync/safety.py +65 -0
  44. code_memory/sync/snapshot.py +461 -0
  45. code_memory/sync/store.py +399 -0
  46. code_memory/sync/sync.py +405 -0
  47. code_memory/sync/watcher.py +320 -0
  48. code_memory/vector/__init__.py +3 -0
  49. code_memory/vector/qdrant_store.py +302 -0
  50. flurryx_code_memory-0.4.0.dist-info/METADATA +26 -0
  51. flurryx_code_memory-0.4.0.dist-info/RECORD +53 -0
  52. flurryx_code_memory-0.4.0.dist-info/WHEEL +4 -0
  53. flurryx_code_memory-0.4.0.dist-info/entry_points.txt +3 -0
@@ -0,0 +1,399 @@
1
+ """Snapshot storage backend: orphan git branch ``codemem-snapshots``.
2
+
3
+ Layout on the branch::
4
+
5
+ snapshots/<sha>.cmsnap # one tar.gz blob per ingested commit
6
+ manifests/<sha>.json # mirror of the snapshot manifest (cheap lookup)
7
+ index.json # { sha: {created_at, size, parent_sha?, ...} }
8
+
9
+ The branch has no shared history with ``main``; it is pure storage. Any
10
+ contributor can publish; content-addressing by SHA makes concurrent
11
+ pushes for the same commit converge (identical blob = no-op).
12
+ """
13
+
14
+ from __future__ import annotations
15
+
16
+ import json
17
+ import subprocess
18
+ import tempfile
19
+ from dataclasses import dataclass
20
+ from pathlib import Path
21
+ from typing import Iterable
22
+
23
+ DEFAULT_BRANCH = "codemem-snapshots"
24
+
25
+
26
+ class StoreError(RuntimeError):
27
+ pass
28
+
29
+
30
+ def _git(repo: Path, *args: str, check: bool = True, timeout: float = 60.0) -> str:
31
+ out = subprocess.run(
32
+ ["git", "-C", str(repo), *args],
33
+ capture_output=True,
34
+ text=True,
35
+ check=False,
36
+ timeout=timeout,
37
+ )
38
+ if check and out.returncode != 0:
39
+ raise StoreError(
40
+ f"git {' '.join(args)} failed (exit {out.returncode}): {out.stderr.strip()}"
41
+ )
42
+ return out.stdout
43
+
44
+
45
+ @dataclass(frozen=True)
46
+ class StoreEntry:
47
+ sha: str
48
+ size: int
49
+ created_at: float
50
+
51
+
52
+ class SnapshotStore:
53
+ """Git-backed snapshot storage (no external infra).
54
+
55
+ Operations:
56
+ - ``fetch()`` — fetch the snapshot branch from origin
57
+ - ``has(sha)`` — check local existence
58
+ - ``read(sha) -> bytes`` — extract blob bytes
59
+ - ``write(sha, data)`` — write blob, commit, push (best-effort)
60
+ - ``list_local() / list_remote()``
61
+ - ``gc(keep_last)`` — prune old snapshots locally + remote
62
+ """
63
+
64
+ def __init__(
65
+ self,
66
+ repo_root: Path,
67
+ *,
68
+ branch: str = DEFAULT_BRANCH,
69
+ remote: str = "origin",
70
+ ) -> None:
71
+ self.repo = Path(repo_root).resolve()
72
+ self.branch = branch
73
+ self.remote = remote
74
+ if not (self.repo / ".git").exists():
75
+ raise StoreError(f"not a git repo: {self.repo}")
76
+
77
+ # ------------------------------------------------------------------
78
+ # Read side
79
+ # ------------------------------------------------------------------
80
+
81
+ def fetch(self) -> bool:
82
+ """Fetch the snapshot branch from remote. Returns False if no remote."""
83
+ if not self._has_remote():
84
+ return False
85
+ try:
86
+ _git(
87
+ self.repo,
88
+ "fetch",
89
+ self.remote,
90
+ f"+refs/heads/{self.branch}:refs/remotes/{self.remote}/{self.branch}",
91
+ check=True,
92
+ )
93
+ return True
94
+ except StoreError:
95
+ # remote may not have the branch yet — that's not an error
96
+ return False
97
+
98
+ def has(self, sha: str) -> bool:
99
+ return self._blob_oid(sha) is not None
100
+
101
+ def read(self, sha: str) -> bytes:
102
+ oid = self._blob_oid(sha)
103
+ if oid is None:
104
+ raise StoreError(f"snapshot {sha} not found in {self.branch}")
105
+ out = subprocess.run(
106
+ ["git", "-C", str(self.repo), "cat-file", "blob", oid],
107
+ capture_output=True,
108
+ check=True,
109
+ )
110
+ return out.stdout
111
+
112
+ def list_local(self) -> list[StoreEntry]:
113
+ return self._list_at(self._local_ref())
114
+
115
+ def list_remote(self) -> list[StoreEntry]:
116
+ return self._list_at(self._remote_ref())
117
+
118
+ # ------------------------------------------------------------------
119
+ # Write side
120
+ # ------------------------------------------------------------------
121
+
122
+ def write(
123
+ self,
124
+ sha: str,
125
+ blob: bytes,
126
+ *,
127
+ manifest: dict[str, object] | None = None,
128
+ message: str | None = None,
129
+ push: bool = True,
130
+ ) -> bool:
131
+ """Add ``blob`` for ``sha`` to the snapshot branch.
132
+
133
+ If the SHA already exists with identical content, this is a no-op
134
+ (returns False). Otherwise it commits and (optionally) pushes.
135
+ Returns True iff a new commit was created.
136
+ """
137
+ if self.has(sha):
138
+ existing = self.read(sha)
139
+ if existing == blob:
140
+ return False
141
+ new_blob_oid = self._hash_object(blob)
142
+ manifest_oid: str | None = None
143
+ if manifest is not None:
144
+ manifest_bytes = json.dumps(manifest, sort_keys=True, indent=2).encode()
145
+ manifest_oid = self._hash_object(manifest_bytes)
146
+ parent_commit = self._local_commit() or self._remote_commit()
147
+ index_entries = self._read_index(parent_commit) if parent_commit else {}
148
+ index_entries[sha] = {
149
+ "size": len(blob),
150
+ "created_at": _now(),
151
+ }
152
+ index_oid = self._hash_object(
153
+ json.dumps(index_entries, sort_keys=True, indent=2).encode()
154
+ )
155
+
156
+ # Build a tree with all existing entries + new blob/manifest
157
+ tree_entries = self._tree_entries(parent_commit) if parent_commit else {}
158
+ tree_entries[f"snapshots/{sha}.cmsnap"] = ("100644", "blob", new_blob_oid)
159
+ if manifest_oid:
160
+ tree_entries[f"manifests/{sha}.json"] = ("100644", "blob", manifest_oid)
161
+ tree_entries["index.json"] = ("100644", "blob", index_oid)
162
+
163
+ tree_oid = self._mktree(tree_entries)
164
+
165
+ commit_msg = message or f"codememory: add snapshot {sha[:12]}"
166
+ if parent_commit:
167
+ commit_oid = _git(
168
+ self.repo, "commit-tree", tree_oid, "-p", parent_commit, "-m", commit_msg
169
+ ).strip()
170
+ else:
171
+ commit_oid = _git(self.repo, "commit-tree", tree_oid, "-m", commit_msg).strip()
172
+
173
+ _git(self.repo, "update-ref", f"refs/heads/{self.branch}", commit_oid)
174
+ if push and self._has_remote():
175
+ try:
176
+ _git(
177
+ self.repo,
178
+ "push",
179
+ self.remote,
180
+ f"refs/heads/{self.branch}:refs/heads/{self.branch}",
181
+ check=True,
182
+ )
183
+ except StoreError:
184
+ # remote moved; try once with --force-with-lease after refetch
185
+ self.fetch()
186
+ _git(
187
+ self.repo,
188
+ "push",
189
+ self.remote,
190
+ f"refs/heads/{self.branch}:refs/heads/{self.branch}",
191
+ "--force-with-lease",
192
+ check=False,
193
+ )
194
+ return True
195
+
196
+ def gc(self, keep_last: int, *, push: bool = True) -> int:
197
+ """Drop all but the ``keep_last`` most recent snapshots. Returns count removed."""
198
+ entries = sorted(self.list_local(), key=lambda e: e.created_at, reverse=True)
199
+ if len(entries) <= keep_last:
200
+ return 0
201
+ keep = {e.sha for e in entries[:keep_last]}
202
+ parent_commit = self._local_commit()
203
+ if parent_commit is None:
204
+ return 0
205
+ tree_entries = self._tree_entries(parent_commit)
206
+ removed = 0
207
+ for path in list(tree_entries):
208
+ if not (path.startswith("snapshots/") or path.startswith("manifests/")):
209
+ continue
210
+ sha = Path(path).stem
211
+ if sha not in keep:
212
+ del tree_entries[path]
213
+ removed += 1
214
+ if removed == 0:
215
+ return 0
216
+ index_entries = self._read_index(parent_commit)
217
+ index_entries = {k: v for k, v in index_entries.items() if k in keep}
218
+ tree_entries["index.json"] = (
219
+ "100644",
220
+ "blob",
221
+ self._hash_object(
222
+ json.dumps(index_entries, sort_keys=True, indent=2).encode()
223
+ ),
224
+ )
225
+ tree_oid = self._mktree(tree_entries)
226
+ commit_oid = _git(
227
+ self.repo,
228
+ "commit-tree",
229
+ tree_oid,
230
+ "-p",
231
+ parent_commit,
232
+ "-m",
233
+ f"codememory: gc keep_last={keep_last}",
234
+ ).strip()
235
+ _git(self.repo, "update-ref", f"refs/heads/{self.branch}", commit_oid)
236
+ if push and self._has_remote():
237
+ try:
238
+ _git(
239
+ self.repo,
240
+ "push",
241
+ self.remote,
242
+ f"refs/heads/{self.branch}:refs/heads/{self.branch}",
243
+ "--force-with-lease",
244
+ )
245
+ except StoreError:
246
+ pass
247
+ return removed
248
+
249
+ # ------------------------------------------------------------------
250
+ # Internals
251
+ # ------------------------------------------------------------------
252
+
253
+ def _has_remote(self) -> bool:
254
+ out = _git(self.repo, "remote", check=False).strip().splitlines()
255
+ return self.remote in out
256
+
257
+ def _local_ref(self) -> str | None:
258
+ out = _git(
259
+ self.repo, "rev-parse", "--verify", f"refs/heads/{self.branch}", check=False
260
+ ).strip()
261
+ return out or None
262
+
263
+ def _remote_ref(self) -> str | None:
264
+ out = _git(
265
+ self.repo,
266
+ "rev-parse",
267
+ "--verify",
268
+ f"refs/remotes/{self.remote}/{self.branch}",
269
+ check=False,
270
+ ).strip()
271
+ return out or None
272
+
273
+ def _local_commit(self) -> str | None:
274
+ return self._local_ref()
275
+
276
+ def _remote_commit(self) -> str | None:
277
+ return self._remote_ref()
278
+
279
+ def _blob_oid(self, sha: str) -> str | None:
280
+ for ref_fn in (self._local_ref, self._remote_ref):
281
+ ref = ref_fn()
282
+ if ref is None:
283
+ continue
284
+ oid = self._lookup(ref, f"snapshots/{sha}.cmsnap")
285
+ if oid:
286
+ return oid
287
+ return None
288
+
289
+ def _lookup(self, ref: str, path: str) -> str | None:
290
+ out = _git(self.repo, "ls-tree", ref, path, check=False).strip()
291
+ if not out:
292
+ return None
293
+ parts = out.split()
294
+ if len(parts) < 3:
295
+ return None
296
+ return parts[2]
297
+
298
+ def _tree_entries(self, commit: str) -> dict[str, tuple[str, str, str]]:
299
+ out = _git(self.repo, "ls-tree", "-r", commit, check=False).strip()
300
+ entries: dict[str, tuple[str, str, str]] = {}
301
+ for line in out.splitlines():
302
+ if not line:
303
+ continue
304
+ meta, name = line.split("\t", 1)
305
+ mode, otype, oid = meta.split()
306
+ entries[name] = (mode, otype, oid)
307
+ return entries
308
+
309
+ def _read_index(self, commit: str) -> dict[str, dict[str, object]]:
310
+ oid = self._lookup(commit, "index.json")
311
+ if not oid:
312
+ return {}
313
+ out = subprocess.run(
314
+ ["git", "-C", str(self.repo), "cat-file", "blob", oid],
315
+ capture_output=True,
316
+ check=True,
317
+ )
318
+ try:
319
+ data = json.loads(out.stdout.decode() or "{}")
320
+ except json.JSONDecodeError:
321
+ return {}
322
+ if not isinstance(data, dict):
323
+ return {}
324
+ return data # type: ignore[return-value]
325
+
326
+ def _list_at(self, ref: str | None) -> list[StoreEntry]:
327
+ if ref is None:
328
+ return []
329
+ index = self._read_index(ref)
330
+ entries: list[StoreEntry] = []
331
+ for sha, meta in index.items():
332
+ if not isinstance(meta, dict):
333
+ entries.append(StoreEntry(sha=sha, size=0, created_at=0.0))
334
+ continue
335
+ raw_size = meta.get("size", 0)
336
+ raw_ts = meta.get("created_at", 0.0)
337
+ size = int(raw_size) if isinstance(raw_size, (int, float, str)) else 0
338
+ ts = float(raw_ts) if isinstance(raw_ts, (int, float, str)) else 0.0
339
+ entries.append(StoreEntry(sha=sha, size=size, created_at=ts))
340
+ return entries
341
+
342
+ def _hash_object(self, blob: bytes) -> str:
343
+ with tempfile.NamedTemporaryFile(delete=False) as tmp:
344
+ tmp.write(blob)
345
+ tmp_path = tmp.name
346
+ try:
347
+ out = _git(self.repo, "hash-object", "-w", tmp_path).strip()
348
+ finally:
349
+ Path(tmp_path).unlink(missing_ok=True)
350
+ return out
351
+
352
+ def _mktree(self, entries: dict[str, tuple[str, str, str]]) -> str:
353
+ """Build a (possibly nested) tree from flat path -> (mode, type, oid)."""
354
+ return _build_tree(self.repo, entries)
355
+
356
+
357
+ def _build_tree(repo: Path, entries: dict[str, tuple[str, str, str]]) -> str:
358
+ """Recursively materialise a tree object from flat entries."""
359
+ grouped: dict[str, dict[str, tuple[str, str, str]]] = {"": {}}
360
+ for path, meta in entries.items():
361
+ parts = path.split("/")
362
+ if len(parts) == 1:
363
+ grouped[""][parts[0]] = meta
364
+ else:
365
+ sub = parts[0]
366
+ rest = "/".join(parts[1:])
367
+ grouped.setdefault(sub, {})[rest] = meta
368
+
369
+ # Build subtrees recursively
370
+ leaf_lines: list[str] = []
371
+ for name, meta in grouped[""].items():
372
+ mode, otype, oid = meta
373
+ leaf_lines.append(f"{mode} {otype} {oid}\t{name}")
374
+
375
+ for sub, sub_entries in grouped.items():
376
+ if sub == "":
377
+ continue
378
+ sub_oid = _build_tree(repo, sub_entries)
379
+ leaf_lines.append(f"040000 tree {sub_oid}\t{sub}")
380
+
381
+ payload = "\n".join(leaf_lines) + "\n"
382
+ out = subprocess.run(
383
+ ["git", "-C", str(repo), "mktree"],
384
+ input=payload,
385
+ capture_output=True,
386
+ text=True,
387
+ check=True,
388
+ )
389
+ return out.stdout.strip()
390
+
391
+
392
+ def _now() -> float:
393
+ import time
394
+
395
+ return time.time()
396
+
397
+
398
+ # silence unused import warning
399
+ _ = Iterable