flurryx-code-memory 0.4.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- code_memory/__init__.py +1 -0
- code_memory/claims/__init__.py +32 -0
- code_memory/claims/extractor.py +325 -0
- code_memory/claims/indexer.py +258 -0
- code_memory/claims/resolver.py +186 -0
- code_memory/claims/store.py +424 -0
- code_memory/cli.py +1192 -0
- code_memory/config.py +268 -0
- code_memory/embed/__init__.py +224 -0
- code_memory/embed/cache.py +204 -0
- code_memory/embed/m3.py +174 -0
- code_memory/embed/ollama.py +92 -0
- code_memory/embed/tei.py +106 -0
- code_memory/episodic/__init__.py +3 -0
- code_memory/episodic/sqlite_store.py +278 -0
- code_memory/extractor/__init__.py +3 -0
- code_memory/extractor/csproj.py +166 -0
- code_memory/extractor/dll.py +385 -0
- code_memory/extractor/gitignore.py +162 -0
- code_memory/extractor/nuget.py +275 -0
- code_memory/extractor/sanity.py +124 -0
- code_memory/extractor/sln.py +108 -0
- code_memory/extractor/treesitter.py +1172 -0
- code_memory/graph/__init__.py +3 -0
- code_memory/graph/falkor_store.py +740 -0
- code_memory/mcp_server.py +1816 -0
- code_memory/metrics.py +260 -0
- code_memory/orchestrator/__init__.py +13 -0
- code_memory/orchestrator/git_delta.py +211 -0
- code_memory/orchestrator/ingest_state.py +71 -0
- code_memory/orchestrator/pipeline.py +1478 -0
- code_memory/orchestrator/reset.py +130 -0
- code_memory/orchestrator/resolver.py +825 -0
- code_memory/orchestrator/retrieve.py +505 -0
- code_memory/resilience.py +73 -0
- code_memory/sync/__init__.py +20 -0
- code_memory/sync/autostart/__init__.py +42 -0
- code_memory/sync/autostart/base.py +106 -0
- code_memory/sync/autostart/launchd.py +115 -0
- code_memory/sync/autostart/schtasks.py +155 -0
- code_memory/sync/autostart/systemd.py +113 -0
- code_memory/sync/hooks.py +164 -0
- code_memory/sync/safety.py +65 -0
- code_memory/sync/snapshot.py +461 -0
- code_memory/sync/store.py +399 -0
- code_memory/sync/sync.py +405 -0
- code_memory/sync/watcher.py +320 -0
- code_memory/vector/__init__.py +3 -0
- code_memory/vector/qdrant_store.py +302 -0
- flurryx_code_memory-0.4.0.dist-info/METADATA +26 -0
- flurryx_code_memory-0.4.0.dist-info/RECORD +53 -0
- flurryx_code_memory-0.4.0.dist-info/WHEEL +4 -0
- flurryx_code_memory-0.4.0.dist-info/entry_points.txt +3 -0
|
@@ -0,0 +1,399 @@
|
|
|
1
|
+
"""Snapshot storage backend: orphan git branch ``codemem-snapshots``.
|
|
2
|
+
|
|
3
|
+
Layout on the branch::
|
|
4
|
+
|
|
5
|
+
snapshots/<sha>.cmsnap # one tar.gz blob per ingested commit
|
|
6
|
+
manifests/<sha>.json # mirror of the snapshot manifest (cheap lookup)
|
|
7
|
+
index.json # { sha: {created_at, size, parent_sha?, ...} }
|
|
8
|
+
|
|
9
|
+
The branch has no shared history with ``main``; it is pure storage. Any
|
|
10
|
+
contributor can publish; content-addressing by SHA makes concurrent
|
|
11
|
+
pushes for the same commit converge (identical blob = no-op).
|
|
12
|
+
"""
|
|
13
|
+
|
|
14
|
+
from __future__ import annotations
|
|
15
|
+
|
|
16
|
+
import json
|
|
17
|
+
import subprocess
|
|
18
|
+
import tempfile
|
|
19
|
+
from dataclasses import dataclass
|
|
20
|
+
from pathlib import Path
|
|
21
|
+
from typing import Iterable
|
|
22
|
+
|
|
23
|
+
DEFAULT_BRANCH = "codemem-snapshots"
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
class StoreError(RuntimeError):
|
|
27
|
+
pass
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
def _git(repo: Path, *args: str, check: bool = True, timeout: float = 60.0) -> str:
|
|
31
|
+
out = subprocess.run(
|
|
32
|
+
["git", "-C", str(repo), *args],
|
|
33
|
+
capture_output=True,
|
|
34
|
+
text=True,
|
|
35
|
+
check=False,
|
|
36
|
+
timeout=timeout,
|
|
37
|
+
)
|
|
38
|
+
if check and out.returncode != 0:
|
|
39
|
+
raise StoreError(
|
|
40
|
+
f"git {' '.join(args)} failed (exit {out.returncode}): {out.stderr.strip()}"
|
|
41
|
+
)
|
|
42
|
+
return out.stdout
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
@dataclass(frozen=True)
|
|
46
|
+
class StoreEntry:
|
|
47
|
+
sha: str
|
|
48
|
+
size: int
|
|
49
|
+
created_at: float
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
class SnapshotStore:
|
|
53
|
+
"""Git-backed snapshot storage (no external infra).
|
|
54
|
+
|
|
55
|
+
Operations:
|
|
56
|
+
- ``fetch()`` — fetch the snapshot branch from origin
|
|
57
|
+
- ``has(sha)`` — check local existence
|
|
58
|
+
- ``read(sha) -> bytes`` — extract blob bytes
|
|
59
|
+
- ``write(sha, data)`` — write blob, commit, push (best-effort)
|
|
60
|
+
- ``list_local() / list_remote()``
|
|
61
|
+
- ``gc(keep_last)`` — prune old snapshots locally + remote
|
|
62
|
+
"""
|
|
63
|
+
|
|
64
|
+
def __init__(
|
|
65
|
+
self,
|
|
66
|
+
repo_root: Path,
|
|
67
|
+
*,
|
|
68
|
+
branch: str = DEFAULT_BRANCH,
|
|
69
|
+
remote: str = "origin",
|
|
70
|
+
) -> None:
|
|
71
|
+
self.repo = Path(repo_root).resolve()
|
|
72
|
+
self.branch = branch
|
|
73
|
+
self.remote = remote
|
|
74
|
+
if not (self.repo / ".git").exists():
|
|
75
|
+
raise StoreError(f"not a git repo: {self.repo}")
|
|
76
|
+
|
|
77
|
+
# ------------------------------------------------------------------
|
|
78
|
+
# Read side
|
|
79
|
+
# ------------------------------------------------------------------
|
|
80
|
+
|
|
81
|
+
def fetch(self) -> bool:
|
|
82
|
+
"""Fetch the snapshot branch from remote. Returns False if no remote."""
|
|
83
|
+
if not self._has_remote():
|
|
84
|
+
return False
|
|
85
|
+
try:
|
|
86
|
+
_git(
|
|
87
|
+
self.repo,
|
|
88
|
+
"fetch",
|
|
89
|
+
self.remote,
|
|
90
|
+
f"+refs/heads/{self.branch}:refs/remotes/{self.remote}/{self.branch}",
|
|
91
|
+
check=True,
|
|
92
|
+
)
|
|
93
|
+
return True
|
|
94
|
+
except StoreError:
|
|
95
|
+
# remote may not have the branch yet — that's not an error
|
|
96
|
+
return False
|
|
97
|
+
|
|
98
|
+
def has(self, sha: str) -> bool:
|
|
99
|
+
return self._blob_oid(sha) is not None
|
|
100
|
+
|
|
101
|
+
def read(self, sha: str) -> bytes:
|
|
102
|
+
oid = self._blob_oid(sha)
|
|
103
|
+
if oid is None:
|
|
104
|
+
raise StoreError(f"snapshot {sha} not found in {self.branch}")
|
|
105
|
+
out = subprocess.run(
|
|
106
|
+
["git", "-C", str(self.repo), "cat-file", "blob", oid],
|
|
107
|
+
capture_output=True,
|
|
108
|
+
check=True,
|
|
109
|
+
)
|
|
110
|
+
return out.stdout
|
|
111
|
+
|
|
112
|
+
def list_local(self) -> list[StoreEntry]:
|
|
113
|
+
return self._list_at(self._local_ref())
|
|
114
|
+
|
|
115
|
+
def list_remote(self) -> list[StoreEntry]:
|
|
116
|
+
return self._list_at(self._remote_ref())
|
|
117
|
+
|
|
118
|
+
# ------------------------------------------------------------------
|
|
119
|
+
# Write side
|
|
120
|
+
# ------------------------------------------------------------------
|
|
121
|
+
|
|
122
|
+
def write(
|
|
123
|
+
self,
|
|
124
|
+
sha: str,
|
|
125
|
+
blob: bytes,
|
|
126
|
+
*,
|
|
127
|
+
manifest: dict[str, object] | None = None,
|
|
128
|
+
message: str | None = None,
|
|
129
|
+
push: bool = True,
|
|
130
|
+
) -> bool:
|
|
131
|
+
"""Add ``blob`` for ``sha`` to the snapshot branch.
|
|
132
|
+
|
|
133
|
+
If the SHA already exists with identical content, this is a no-op
|
|
134
|
+
(returns False). Otherwise it commits and (optionally) pushes.
|
|
135
|
+
Returns True iff a new commit was created.
|
|
136
|
+
"""
|
|
137
|
+
if self.has(sha):
|
|
138
|
+
existing = self.read(sha)
|
|
139
|
+
if existing == blob:
|
|
140
|
+
return False
|
|
141
|
+
new_blob_oid = self._hash_object(blob)
|
|
142
|
+
manifest_oid: str | None = None
|
|
143
|
+
if manifest is not None:
|
|
144
|
+
manifest_bytes = json.dumps(manifest, sort_keys=True, indent=2).encode()
|
|
145
|
+
manifest_oid = self._hash_object(manifest_bytes)
|
|
146
|
+
parent_commit = self._local_commit() or self._remote_commit()
|
|
147
|
+
index_entries = self._read_index(parent_commit) if parent_commit else {}
|
|
148
|
+
index_entries[sha] = {
|
|
149
|
+
"size": len(blob),
|
|
150
|
+
"created_at": _now(),
|
|
151
|
+
}
|
|
152
|
+
index_oid = self._hash_object(
|
|
153
|
+
json.dumps(index_entries, sort_keys=True, indent=2).encode()
|
|
154
|
+
)
|
|
155
|
+
|
|
156
|
+
# Build a tree with all existing entries + new blob/manifest
|
|
157
|
+
tree_entries = self._tree_entries(parent_commit) if parent_commit else {}
|
|
158
|
+
tree_entries[f"snapshots/{sha}.cmsnap"] = ("100644", "blob", new_blob_oid)
|
|
159
|
+
if manifest_oid:
|
|
160
|
+
tree_entries[f"manifests/{sha}.json"] = ("100644", "blob", manifest_oid)
|
|
161
|
+
tree_entries["index.json"] = ("100644", "blob", index_oid)
|
|
162
|
+
|
|
163
|
+
tree_oid = self._mktree(tree_entries)
|
|
164
|
+
|
|
165
|
+
commit_msg = message or f"codememory: add snapshot {sha[:12]}"
|
|
166
|
+
if parent_commit:
|
|
167
|
+
commit_oid = _git(
|
|
168
|
+
self.repo, "commit-tree", tree_oid, "-p", parent_commit, "-m", commit_msg
|
|
169
|
+
).strip()
|
|
170
|
+
else:
|
|
171
|
+
commit_oid = _git(self.repo, "commit-tree", tree_oid, "-m", commit_msg).strip()
|
|
172
|
+
|
|
173
|
+
_git(self.repo, "update-ref", f"refs/heads/{self.branch}", commit_oid)
|
|
174
|
+
if push and self._has_remote():
|
|
175
|
+
try:
|
|
176
|
+
_git(
|
|
177
|
+
self.repo,
|
|
178
|
+
"push",
|
|
179
|
+
self.remote,
|
|
180
|
+
f"refs/heads/{self.branch}:refs/heads/{self.branch}",
|
|
181
|
+
check=True,
|
|
182
|
+
)
|
|
183
|
+
except StoreError:
|
|
184
|
+
# remote moved; try once with --force-with-lease after refetch
|
|
185
|
+
self.fetch()
|
|
186
|
+
_git(
|
|
187
|
+
self.repo,
|
|
188
|
+
"push",
|
|
189
|
+
self.remote,
|
|
190
|
+
f"refs/heads/{self.branch}:refs/heads/{self.branch}",
|
|
191
|
+
"--force-with-lease",
|
|
192
|
+
check=False,
|
|
193
|
+
)
|
|
194
|
+
return True
|
|
195
|
+
|
|
196
|
+
def gc(self, keep_last: int, *, push: bool = True) -> int:
|
|
197
|
+
"""Drop all but the ``keep_last`` most recent snapshots. Returns count removed."""
|
|
198
|
+
entries = sorted(self.list_local(), key=lambda e: e.created_at, reverse=True)
|
|
199
|
+
if len(entries) <= keep_last:
|
|
200
|
+
return 0
|
|
201
|
+
keep = {e.sha for e in entries[:keep_last]}
|
|
202
|
+
parent_commit = self._local_commit()
|
|
203
|
+
if parent_commit is None:
|
|
204
|
+
return 0
|
|
205
|
+
tree_entries = self._tree_entries(parent_commit)
|
|
206
|
+
removed = 0
|
|
207
|
+
for path in list(tree_entries):
|
|
208
|
+
if not (path.startswith("snapshots/") or path.startswith("manifests/")):
|
|
209
|
+
continue
|
|
210
|
+
sha = Path(path).stem
|
|
211
|
+
if sha not in keep:
|
|
212
|
+
del tree_entries[path]
|
|
213
|
+
removed += 1
|
|
214
|
+
if removed == 0:
|
|
215
|
+
return 0
|
|
216
|
+
index_entries = self._read_index(parent_commit)
|
|
217
|
+
index_entries = {k: v for k, v in index_entries.items() if k in keep}
|
|
218
|
+
tree_entries["index.json"] = (
|
|
219
|
+
"100644",
|
|
220
|
+
"blob",
|
|
221
|
+
self._hash_object(
|
|
222
|
+
json.dumps(index_entries, sort_keys=True, indent=2).encode()
|
|
223
|
+
),
|
|
224
|
+
)
|
|
225
|
+
tree_oid = self._mktree(tree_entries)
|
|
226
|
+
commit_oid = _git(
|
|
227
|
+
self.repo,
|
|
228
|
+
"commit-tree",
|
|
229
|
+
tree_oid,
|
|
230
|
+
"-p",
|
|
231
|
+
parent_commit,
|
|
232
|
+
"-m",
|
|
233
|
+
f"codememory: gc keep_last={keep_last}",
|
|
234
|
+
).strip()
|
|
235
|
+
_git(self.repo, "update-ref", f"refs/heads/{self.branch}", commit_oid)
|
|
236
|
+
if push and self._has_remote():
|
|
237
|
+
try:
|
|
238
|
+
_git(
|
|
239
|
+
self.repo,
|
|
240
|
+
"push",
|
|
241
|
+
self.remote,
|
|
242
|
+
f"refs/heads/{self.branch}:refs/heads/{self.branch}",
|
|
243
|
+
"--force-with-lease",
|
|
244
|
+
)
|
|
245
|
+
except StoreError:
|
|
246
|
+
pass
|
|
247
|
+
return removed
|
|
248
|
+
|
|
249
|
+
# ------------------------------------------------------------------
|
|
250
|
+
# Internals
|
|
251
|
+
# ------------------------------------------------------------------
|
|
252
|
+
|
|
253
|
+
def _has_remote(self) -> bool:
|
|
254
|
+
out = _git(self.repo, "remote", check=False).strip().splitlines()
|
|
255
|
+
return self.remote in out
|
|
256
|
+
|
|
257
|
+
def _local_ref(self) -> str | None:
|
|
258
|
+
out = _git(
|
|
259
|
+
self.repo, "rev-parse", "--verify", f"refs/heads/{self.branch}", check=False
|
|
260
|
+
).strip()
|
|
261
|
+
return out or None
|
|
262
|
+
|
|
263
|
+
def _remote_ref(self) -> str | None:
|
|
264
|
+
out = _git(
|
|
265
|
+
self.repo,
|
|
266
|
+
"rev-parse",
|
|
267
|
+
"--verify",
|
|
268
|
+
f"refs/remotes/{self.remote}/{self.branch}",
|
|
269
|
+
check=False,
|
|
270
|
+
).strip()
|
|
271
|
+
return out or None
|
|
272
|
+
|
|
273
|
+
def _local_commit(self) -> str | None:
|
|
274
|
+
return self._local_ref()
|
|
275
|
+
|
|
276
|
+
def _remote_commit(self) -> str | None:
|
|
277
|
+
return self._remote_ref()
|
|
278
|
+
|
|
279
|
+
def _blob_oid(self, sha: str) -> str | None:
|
|
280
|
+
for ref_fn in (self._local_ref, self._remote_ref):
|
|
281
|
+
ref = ref_fn()
|
|
282
|
+
if ref is None:
|
|
283
|
+
continue
|
|
284
|
+
oid = self._lookup(ref, f"snapshots/{sha}.cmsnap")
|
|
285
|
+
if oid:
|
|
286
|
+
return oid
|
|
287
|
+
return None
|
|
288
|
+
|
|
289
|
+
def _lookup(self, ref: str, path: str) -> str | None:
|
|
290
|
+
out = _git(self.repo, "ls-tree", ref, path, check=False).strip()
|
|
291
|
+
if not out:
|
|
292
|
+
return None
|
|
293
|
+
parts = out.split()
|
|
294
|
+
if len(parts) < 3:
|
|
295
|
+
return None
|
|
296
|
+
return parts[2]
|
|
297
|
+
|
|
298
|
+
def _tree_entries(self, commit: str) -> dict[str, tuple[str, str, str]]:
|
|
299
|
+
out = _git(self.repo, "ls-tree", "-r", commit, check=False).strip()
|
|
300
|
+
entries: dict[str, tuple[str, str, str]] = {}
|
|
301
|
+
for line in out.splitlines():
|
|
302
|
+
if not line:
|
|
303
|
+
continue
|
|
304
|
+
meta, name = line.split("\t", 1)
|
|
305
|
+
mode, otype, oid = meta.split()
|
|
306
|
+
entries[name] = (mode, otype, oid)
|
|
307
|
+
return entries
|
|
308
|
+
|
|
309
|
+
def _read_index(self, commit: str) -> dict[str, dict[str, object]]:
|
|
310
|
+
oid = self._lookup(commit, "index.json")
|
|
311
|
+
if not oid:
|
|
312
|
+
return {}
|
|
313
|
+
out = subprocess.run(
|
|
314
|
+
["git", "-C", str(self.repo), "cat-file", "blob", oid],
|
|
315
|
+
capture_output=True,
|
|
316
|
+
check=True,
|
|
317
|
+
)
|
|
318
|
+
try:
|
|
319
|
+
data = json.loads(out.stdout.decode() or "{}")
|
|
320
|
+
except json.JSONDecodeError:
|
|
321
|
+
return {}
|
|
322
|
+
if not isinstance(data, dict):
|
|
323
|
+
return {}
|
|
324
|
+
return data # type: ignore[return-value]
|
|
325
|
+
|
|
326
|
+
def _list_at(self, ref: str | None) -> list[StoreEntry]:
|
|
327
|
+
if ref is None:
|
|
328
|
+
return []
|
|
329
|
+
index = self._read_index(ref)
|
|
330
|
+
entries: list[StoreEntry] = []
|
|
331
|
+
for sha, meta in index.items():
|
|
332
|
+
if not isinstance(meta, dict):
|
|
333
|
+
entries.append(StoreEntry(sha=sha, size=0, created_at=0.0))
|
|
334
|
+
continue
|
|
335
|
+
raw_size = meta.get("size", 0)
|
|
336
|
+
raw_ts = meta.get("created_at", 0.0)
|
|
337
|
+
size = int(raw_size) if isinstance(raw_size, (int, float, str)) else 0
|
|
338
|
+
ts = float(raw_ts) if isinstance(raw_ts, (int, float, str)) else 0.0
|
|
339
|
+
entries.append(StoreEntry(sha=sha, size=size, created_at=ts))
|
|
340
|
+
return entries
|
|
341
|
+
|
|
342
|
+
def _hash_object(self, blob: bytes) -> str:
|
|
343
|
+
with tempfile.NamedTemporaryFile(delete=False) as tmp:
|
|
344
|
+
tmp.write(blob)
|
|
345
|
+
tmp_path = tmp.name
|
|
346
|
+
try:
|
|
347
|
+
out = _git(self.repo, "hash-object", "-w", tmp_path).strip()
|
|
348
|
+
finally:
|
|
349
|
+
Path(tmp_path).unlink(missing_ok=True)
|
|
350
|
+
return out
|
|
351
|
+
|
|
352
|
+
def _mktree(self, entries: dict[str, tuple[str, str, str]]) -> str:
|
|
353
|
+
"""Build a (possibly nested) tree from flat path -> (mode, type, oid)."""
|
|
354
|
+
return _build_tree(self.repo, entries)
|
|
355
|
+
|
|
356
|
+
|
|
357
|
+
def _build_tree(repo: Path, entries: dict[str, tuple[str, str, str]]) -> str:
|
|
358
|
+
"""Recursively materialise a tree object from flat entries."""
|
|
359
|
+
grouped: dict[str, dict[str, tuple[str, str, str]]] = {"": {}}
|
|
360
|
+
for path, meta in entries.items():
|
|
361
|
+
parts = path.split("/")
|
|
362
|
+
if len(parts) == 1:
|
|
363
|
+
grouped[""][parts[0]] = meta
|
|
364
|
+
else:
|
|
365
|
+
sub = parts[0]
|
|
366
|
+
rest = "/".join(parts[1:])
|
|
367
|
+
grouped.setdefault(sub, {})[rest] = meta
|
|
368
|
+
|
|
369
|
+
# Build subtrees recursively
|
|
370
|
+
leaf_lines: list[str] = []
|
|
371
|
+
for name, meta in grouped[""].items():
|
|
372
|
+
mode, otype, oid = meta
|
|
373
|
+
leaf_lines.append(f"{mode} {otype} {oid}\t{name}")
|
|
374
|
+
|
|
375
|
+
for sub, sub_entries in grouped.items():
|
|
376
|
+
if sub == "":
|
|
377
|
+
continue
|
|
378
|
+
sub_oid = _build_tree(repo, sub_entries)
|
|
379
|
+
leaf_lines.append(f"040000 tree {sub_oid}\t{sub}")
|
|
380
|
+
|
|
381
|
+
payload = "\n".join(leaf_lines) + "\n"
|
|
382
|
+
out = subprocess.run(
|
|
383
|
+
["git", "-C", str(repo), "mktree"],
|
|
384
|
+
input=payload,
|
|
385
|
+
capture_output=True,
|
|
386
|
+
text=True,
|
|
387
|
+
check=True,
|
|
388
|
+
)
|
|
389
|
+
return out.stdout.strip()
|
|
390
|
+
|
|
391
|
+
|
|
392
|
+
def _now() -> float:
|
|
393
|
+
import time
|
|
394
|
+
|
|
395
|
+
return time.time()
|
|
396
|
+
|
|
397
|
+
|
|
398
|
+
# silence unused import warning
|
|
399
|
+
_ = Iterable
|