flurryx-code-memory 0.4.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- code_memory/__init__.py +1 -0
- code_memory/claims/__init__.py +32 -0
- code_memory/claims/extractor.py +325 -0
- code_memory/claims/indexer.py +258 -0
- code_memory/claims/resolver.py +186 -0
- code_memory/claims/store.py +424 -0
- code_memory/cli.py +1192 -0
- code_memory/config.py +268 -0
- code_memory/embed/__init__.py +224 -0
- code_memory/embed/cache.py +204 -0
- code_memory/embed/m3.py +174 -0
- code_memory/embed/ollama.py +92 -0
- code_memory/embed/tei.py +106 -0
- code_memory/episodic/__init__.py +3 -0
- code_memory/episodic/sqlite_store.py +278 -0
- code_memory/extractor/__init__.py +3 -0
- code_memory/extractor/csproj.py +166 -0
- code_memory/extractor/dll.py +385 -0
- code_memory/extractor/gitignore.py +162 -0
- code_memory/extractor/nuget.py +275 -0
- code_memory/extractor/sanity.py +124 -0
- code_memory/extractor/sln.py +108 -0
- code_memory/extractor/treesitter.py +1172 -0
- code_memory/graph/__init__.py +3 -0
- code_memory/graph/falkor_store.py +740 -0
- code_memory/mcp_server.py +1816 -0
- code_memory/metrics.py +260 -0
- code_memory/orchestrator/__init__.py +13 -0
- code_memory/orchestrator/git_delta.py +211 -0
- code_memory/orchestrator/ingest_state.py +71 -0
- code_memory/orchestrator/pipeline.py +1478 -0
- code_memory/orchestrator/reset.py +130 -0
- code_memory/orchestrator/resolver.py +825 -0
- code_memory/orchestrator/retrieve.py +505 -0
- code_memory/resilience.py +73 -0
- code_memory/sync/__init__.py +20 -0
- code_memory/sync/autostart/__init__.py +42 -0
- code_memory/sync/autostart/base.py +106 -0
- code_memory/sync/autostart/launchd.py +115 -0
- code_memory/sync/autostart/schtasks.py +155 -0
- code_memory/sync/autostart/systemd.py +113 -0
- code_memory/sync/hooks.py +164 -0
- code_memory/sync/safety.py +65 -0
- code_memory/sync/snapshot.py +461 -0
- code_memory/sync/store.py +399 -0
- code_memory/sync/sync.py +405 -0
- code_memory/sync/watcher.py +320 -0
- code_memory/vector/__init__.py +3 -0
- code_memory/vector/qdrant_store.py +302 -0
- flurryx_code_memory-0.4.0.dist-info/METADATA +26 -0
- flurryx_code_memory-0.4.0.dist-info/RECORD +53 -0
- flurryx_code_memory-0.4.0.dist-info/WHEEL +4 -0
- flurryx_code_memory-0.4.0.dist-info/entry_points.txt +3 -0
code_memory/sync/sync.py
ADDED
|
@@ -0,0 +1,405 @@
|
|
|
1
|
+
"""Smart sync: snapshot pull + incremental ingest in one command.
|
|
2
|
+
|
|
3
|
+
Decision tree::
|
|
4
|
+
|
|
5
|
+
1. HEAD unchanged AND local state matches AND no dirty files
|
|
6
|
+
-> noop
|
|
7
|
+
|
|
8
|
+
2. Local state empty
|
|
9
|
+
a. snapshot for HEAD exists in store -> pull + apply
|
|
10
|
+
b. snapshot for nearest ancestor exists -> pull + apply + incremental
|
|
11
|
+
c. nothing in store -> full local ingest
|
|
12
|
+
(then optionally publish if on canonical branch)
|
|
13
|
+
|
|
14
|
+
3. Local state present
|
|
15
|
+
a. HEAD == state.sha -> dirty-files incremental only
|
|
16
|
+
b. HEAD newer, snapshot for HEAD exists -> pull + apply
|
|
17
|
+
c. HEAD newer, otherwise -> incremental from state.sha
|
|
18
|
+
|
|
19
|
+
The goal: O(seconds) on every event, never block, always converge.
|
|
20
|
+
"""
|
|
21
|
+
|
|
22
|
+
from __future__ import annotations
|
|
23
|
+
|
|
24
|
+
import logging
|
|
25
|
+
from dataclasses import dataclass, field
|
|
26
|
+
from pathlib import Path
|
|
27
|
+
from typing import Iterable, Literal
|
|
28
|
+
|
|
29
|
+
from ..config import CONFIG, detect_project_slug
|
|
30
|
+
from ..orchestrator import git_delta
|
|
31
|
+
from ..orchestrator.ingest_state import IngestStateStore
|
|
32
|
+
from ..orchestrator.pipeline import IngestStats, Pipeline
|
|
33
|
+
from .snapshot import (
|
|
34
|
+
Snapshot,
|
|
35
|
+
apply_snapshot,
|
|
36
|
+
build_snapshot,
|
|
37
|
+
verify_snapshot,
|
|
38
|
+
)
|
|
39
|
+
from .store import SnapshotStore
|
|
40
|
+
|
|
41
|
+
log = logging.getLogger("codememory.sync")
|
|
42
|
+
|
|
43
|
+
Action = Literal[
|
|
44
|
+
"noop",
|
|
45
|
+
"pull_snapshot",
|
|
46
|
+
"pull_then_incremental",
|
|
47
|
+
"incremental",
|
|
48
|
+
"full_ingest",
|
|
49
|
+
"dirty_only",
|
|
50
|
+
]
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
@dataclass
|
|
54
|
+
class SyncResult:
|
|
55
|
+
action: Action
|
|
56
|
+
head_sha: str | None
|
|
57
|
+
base_sha: str | None = None
|
|
58
|
+
snapshot_sha: str | None = None
|
|
59
|
+
publish: bool = False
|
|
60
|
+
files_changed: int = 0
|
|
61
|
+
files_deleted: int = 0
|
|
62
|
+
notes: list[str] = field(default_factory=list)
|
|
63
|
+
|
|
64
|
+
|
|
65
|
+
def sync_repo(
|
|
66
|
+
root: str | Path,
|
|
67
|
+
*,
|
|
68
|
+
project: str | None = None,
|
|
69
|
+
publish: bool = False,
|
|
70
|
+
canonical_branch: str = "main",
|
|
71
|
+
trigger: str = "manual",
|
|
72
|
+
fetch: bool = True,
|
|
73
|
+
) -> SyncResult:
|
|
74
|
+
"""Reconcile local code-memory state with git HEAD.
|
|
75
|
+
|
|
76
|
+
Parameters
|
|
77
|
+
----------
|
|
78
|
+
root :
|
|
79
|
+
Repo root.
|
|
80
|
+
project :
|
|
81
|
+
Project slug (auto-detected from git toplevel if None).
|
|
82
|
+
publish :
|
|
83
|
+
After sync, if on ``canonical_branch`` and a fresh snapshot was
|
|
84
|
+
produced locally, push it to the snapshot store.
|
|
85
|
+
canonical_branch :
|
|
86
|
+
Branch whose tip is considered canonical (default ``main``).
|
|
87
|
+
trigger :
|
|
88
|
+
Free-form tag for logging (e.g. ``post-merge``, ``watcher``).
|
|
89
|
+
fetch :
|
|
90
|
+
If True, ``git fetch`` the snapshot branch before lookup.
|
|
91
|
+
"""
|
|
92
|
+
root_path = Path(root).resolve()
|
|
93
|
+
slug = project or detect_project_slug(root_path)
|
|
94
|
+
log.info("sync start trigger=%s project=%s root=%s", trigger, slug, root_path)
|
|
95
|
+
|
|
96
|
+
if not git_delta.is_git_repo(root_path):
|
|
97
|
+
# Not a git repo: best we can do is a full ingest
|
|
98
|
+
pipe = Pipeline(project=slug)
|
|
99
|
+
stats = pipe.ingest_repo(root_path, mode="full")
|
|
100
|
+
return SyncResult(
|
|
101
|
+
action="full_ingest",
|
|
102
|
+
head_sha=None,
|
|
103
|
+
files_changed=stats.files,
|
|
104
|
+
notes=["not a git repository; performed full ingest"],
|
|
105
|
+
)
|
|
106
|
+
|
|
107
|
+
head = git_delta.head_sha(root_path)
|
|
108
|
+
branch = git_delta.current_branch(root_path)
|
|
109
|
+
store = SnapshotStore(root_path)
|
|
110
|
+
if fetch:
|
|
111
|
+
store.fetch()
|
|
112
|
+
|
|
113
|
+
cfg = CONFIG.for_project(slug)
|
|
114
|
+
state_store = IngestStateStore(cfg.episodic_db)
|
|
115
|
+
prior = state_store.get(root_path)
|
|
116
|
+
dirty = git_delta.dirty_files(root_path)
|
|
117
|
+
dirty_deleted = git_delta.dirty_deleted_files(root_path)
|
|
118
|
+
|
|
119
|
+
# ---- Case 1: HEAD matches local state ---------------------------------
|
|
120
|
+
if prior is not None and prior.last_sha == head:
|
|
121
|
+
if not dirty and not dirty_deleted:
|
|
122
|
+
log.info("sync noop (head=%s, clean)", head[:12])
|
|
123
|
+
return SyncResult(action="noop", head_sha=head)
|
|
124
|
+
# dirty files: incremental only
|
|
125
|
+
return _run_dirty_only(root_path, slug, head, dirty, dirty_deleted)
|
|
126
|
+
|
|
127
|
+
# ---- Case 2: no local state -------------------------------------------
|
|
128
|
+
if prior is None:
|
|
129
|
+
if store.has(head):
|
|
130
|
+
return _pull_and_apply(
|
|
131
|
+
root_path, slug, head, branch, store, publish=False
|
|
132
|
+
)
|
|
133
|
+
ancestor = _find_ancestor_snapshot(root_path, store, head)
|
|
134
|
+
if ancestor:
|
|
135
|
+
return _pull_and_apply_then_incremental(
|
|
136
|
+
root_path, slug, head, branch, store, ancestor, dirty
|
|
137
|
+
)
|
|
138
|
+
# No snapshot, no state — full ingest
|
|
139
|
+
return _run_full_ingest(
|
|
140
|
+
root_path,
|
|
141
|
+
slug,
|
|
142
|
+
head,
|
|
143
|
+
branch,
|
|
144
|
+
store,
|
|
145
|
+
publish=publish and branch == canonical_branch,
|
|
146
|
+
)
|
|
147
|
+
|
|
148
|
+
# ---- Case 3: HEAD moved, local state stale ----------------------------
|
|
149
|
+
if store.has(head):
|
|
150
|
+
return _pull_and_apply(root_path, slug, head, branch, store, publish=False)
|
|
151
|
+
|
|
152
|
+
if not git_delta.is_reachable(root_path, prior.last_sha):
|
|
153
|
+
# base rewritten — fall back to ancestor snapshot or full
|
|
154
|
+
ancestor = _find_ancestor_snapshot(root_path, store, head)
|
|
155
|
+
if ancestor:
|
|
156
|
+
return _pull_and_apply_then_incremental(
|
|
157
|
+
root_path, slug, head, branch, store, ancestor, dirty
|
|
158
|
+
)
|
|
159
|
+
return _run_full_ingest(
|
|
160
|
+
root_path,
|
|
161
|
+
slug,
|
|
162
|
+
head,
|
|
163
|
+
branch,
|
|
164
|
+
store,
|
|
165
|
+
publish=publish and branch == canonical_branch,
|
|
166
|
+
)
|
|
167
|
+
|
|
168
|
+
return _run_incremental(
|
|
169
|
+
root_path,
|
|
170
|
+
slug,
|
|
171
|
+
head,
|
|
172
|
+
branch,
|
|
173
|
+
base=prior.last_sha,
|
|
174
|
+
store=store,
|
|
175
|
+
publish=publish and branch == canonical_branch,
|
|
176
|
+
)
|
|
177
|
+
|
|
178
|
+
|
|
179
|
+
# ---------------------------------------------------------------------------
|
|
180
|
+
# Strategies
|
|
181
|
+
# ---------------------------------------------------------------------------
|
|
182
|
+
|
|
183
|
+
|
|
184
|
+
def _run_dirty_only(
|
|
185
|
+
root: Path,
|
|
186
|
+
slug: str,
|
|
187
|
+
head: str,
|
|
188
|
+
dirty: Iterable[Path],
|
|
189
|
+
dirty_deleted: Iterable[Path] = (),
|
|
190
|
+
) -> SyncResult:
|
|
191
|
+
pipe = Pipeline(project=slug)
|
|
192
|
+
changed = 0
|
|
193
|
+
for path in dirty:
|
|
194
|
+
if not path.is_file():
|
|
195
|
+
continue
|
|
196
|
+
ex = pipe.reingest_file(path)
|
|
197
|
+
if ex is not None:
|
|
198
|
+
changed += 1
|
|
199
|
+
# Tear down vanished files even when HEAD hasn't moved. Without this
|
|
200
|
+
# leg a plain ``rm tracked.ts`` (or ``git rm``) leaves the file's
|
|
201
|
+
# graph node + vectors orphaned until the deletion is committed and
|
|
202
|
+
# a later sync runs through the diff path.
|
|
203
|
+
deleted = pipe.delete_paths(list(dirty_deleted), head_sha=head)
|
|
204
|
+
return SyncResult(
|
|
205
|
+
action="dirty_only",
|
|
206
|
+
head_sha=head,
|
|
207
|
+
base_sha=head,
|
|
208
|
+
files_changed=changed,
|
|
209
|
+
files_deleted=deleted,
|
|
210
|
+
notes=["worktree dirty; re-indexed locally without changing state"],
|
|
211
|
+
)
|
|
212
|
+
|
|
213
|
+
|
|
214
|
+
def _run_incremental(
|
|
215
|
+
root: Path,
|
|
216
|
+
slug: str,
|
|
217
|
+
head: str,
|
|
218
|
+
branch: str | None,
|
|
219
|
+
*,
|
|
220
|
+
base: str,
|
|
221
|
+
store: SnapshotStore,
|
|
222
|
+
publish: bool,
|
|
223
|
+
) -> SyncResult:
|
|
224
|
+
pipe = Pipeline(project=slug)
|
|
225
|
+
stats = pipe.ingest_repo(root, mode="incremental", since=base)
|
|
226
|
+
result = SyncResult(
|
|
227
|
+
action="incremental",
|
|
228
|
+
head_sha=head,
|
|
229
|
+
base_sha=base,
|
|
230
|
+
files_changed=stats.files,
|
|
231
|
+
files_deleted=stats.deleted,
|
|
232
|
+
)
|
|
233
|
+
if publish:
|
|
234
|
+
_publish(store, slug, head, branch, result)
|
|
235
|
+
return result
|
|
236
|
+
|
|
237
|
+
|
|
238
|
+
def _run_full_ingest(
|
|
239
|
+
root: Path,
|
|
240
|
+
slug: str,
|
|
241
|
+
head: str,
|
|
242
|
+
branch: str | None,
|
|
243
|
+
store: SnapshotStore,
|
|
244
|
+
*,
|
|
245
|
+
publish: bool,
|
|
246
|
+
) -> SyncResult:
|
|
247
|
+
pipe = Pipeline(project=slug)
|
|
248
|
+
stats: IngestStats = pipe.ingest_repo(root, mode="full")
|
|
249
|
+
result = SyncResult(
|
|
250
|
+
action="full_ingest",
|
|
251
|
+
head_sha=head,
|
|
252
|
+
files_changed=stats.files,
|
|
253
|
+
)
|
|
254
|
+
if publish:
|
|
255
|
+
_publish(store, slug, head, branch, result)
|
|
256
|
+
return result
|
|
257
|
+
|
|
258
|
+
|
|
259
|
+
def _pull_and_apply(
|
|
260
|
+
root: Path,
|
|
261
|
+
slug: str,
|
|
262
|
+
head: str,
|
|
263
|
+
branch: str | None,
|
|
264
|
+
store: SnapshotStore,
|
|
265
|
+
*,
|
|
266
|
+
publish: bool,
|
|
267
|
+
) -> SyncResult:
|
|
268
|
+
snap_bytes = store.read(head)
|
|
269
|
+
snap = _load_and_verify(snap_bytes, slug)
|
|
270
|
+
apply_snapshot(snap)
|
|
271
|
+
# Mirror the snapshot's state into the local ingest_state so a
|
|
272
|
+
# subsequent incremental can diff from it.
|
|
273
|
+
pipe = Pipeline(project=slug)
|
|
274
|
+
pipe.state.set(root, sha=head, branch=branch)
|
|
275
|
+
return SyncResult(
|
|
276
|
+
action="pull_snapshot",
|
|
277
|
+
head_sha=head,
|
|
278
|
+
snapshot_sha=head,
|
|
279
|
+
files_changed=snap.manifest.counts.get("vectors", 0),
|
|
280
|
+
)
|
|
281
|
+
|
|
282
|
+
|
|
283
|
+
def _pull_and_apply_then_incremental(
|
|
284
|
+
root: Path,
|
|
285
|
+
slug: str,
|
|
286
|
+
head: str,
|
|
287
|
+
branch: str | None,
|
|
288
|
+
store: SnapshotStore,
|
|
289
|
+
ancestor: str,
|
|
290
|
+
dirty: Iterable[Path],
|
|
291
|
+
) -> SyncResult:
|
|
292
|
+
snap_bytes = store.read(ancestor)
|
|
293
|
+
snap = _load_and_verify(snap_bytes, slug)
|
|
294
|
+
apply_snapshot(snap)
|
|
295
|
+
pipe = Pipeline(project=slug)
|
|
296
|
+
pipe.state.set(root, sha=ancestor, branch=branch)
|
|
297
|
+
stats = pipe.ingest_repo(root, mode="incremental", since=ancestor)
|
|
298
|
+
return SyncResult(
|
|
299
|
+
action="pull_then_incremental",
|
|
300
|
+
head_sha=head,
|
|
301
|
+
base_sha=ancestor,
|
|
302
|
+
snapshot_sha=ancestor,
|
|
303
|
+
files_changed=stats.files,
|
|
304
|
+
files_deleted=stats.deleted,
|
|
305
|
+
)
|
|
306
|
+
|
|
307
|
+
|
|
308
|
+
def _publish(
|
|
309
|
+
store: SnapshotStore,
|
|
310
|
+
slug: str,
|
|
311
|
+
head: str,
|
|
312
|
+
branch: str | None,
|
|
313
|
+
result: SyncResult,
|
|
314
|
+
) -> None:
|
|
315
|
+
"""Build and push a fresh snapshot for HEAD."""
|
|
316
|
+
if store.has(head):
|
|
317
|
+
result.notes.append("snapshot already published")
|
|
318
|
+
return
|
|
319
|
+
snap = build_snapshot(
|
|
320
|
+
project=slug,
|
|
321
|
+
head_sha=head,
|
|
322
|
+
branch=branch,
|
|
323
|
+
state={"last_sha": head, "branch": branch},
|
|
324
|
+
)
|
|
325
|
+
import tempfile
|
|
326
|
+
|
|
327
|
+
with tempfile.NamedTemporaryFile(
|
|
328
|
+
suffix=".cmsnap", delete=False
|
|
329
|
+
) as tmp:
|
|
330
|
+
tmp_path = Path(tmp.name)
|
|
331
|
+
try:
|
|
332
|
+
snap.write(tmp_path)
|
|
333
|
+
data = tmp_path.read_bytes()
|
|
334
|
+
finally:
|
|
335
|
+
tmp_path.unlink(missing_ok=True)
|
|
336
|
+
manifest_dict: dict[str, object] = {
|
|
337
|
+
"format_version": snap.manifest.format_version,
|
|
338
|
+
"project": snap.manifest.project,
|
|
339
|
+
"head_sha": snap.manifest.head_sha,
|
|
340
|
+
"branch": snap.manifest.branch,
|
|
341
|
+
"embed_model": snap.manifest.embed_model,
|
|
342
|
+
"embed_dim": snap.manifest.embed_dim,
|
|
343
|
+
"created_at": snap.manifest.created_at,
|
|
344
|
+
"created_by": snap.manifest.created_by,
|
|
345
|
+
"tool_version": snap.manifest.tool_version,
|
|
346
|
+
"counts": snap.manifest.counts,
|
|
347
|
+
"content_sha256": snap.manifest.content_sha256,
|
|
348
|
+
}
|
|
349
|
+
created = store.write(head, data, manifest=manifest_dict)
|
|
350
|
+
result.publish = created
|
|
351
|
+
if created:
|
|
352
|
+
result.notes.append(f"published snapshot {head[:12]}")
|
|
353
|
+
|
|
354
|
+
|
|
355
|
+
def _load_and_verify(blob: bytes, slug: str) -> Snapshot:
|
|
356
|
+
import tempfile
|
|
357
|
+
|
|
358
|
+
with tempfile.NamedTemporaryFile(suffix=".cmsnap", delete=False) as tmp:
|
|
359
|
+
tmp.write(blob)
|
|
360
|
+
path = Path(tmp.name)
|
|
361
|
+
try:
|
|
362
|
+
snap = Snapshot.read(path)
|
|
363
|
+
finally:
|
|
364
|
+
path.unlink(missing_ok=True)
|
|
365
|
+
cfg = CONFIG.for_project(slug)
|
|
366
|
+
res = verify_snapshot(
|
|
367
|
+
snap=snap,
|
|
368
|
+
expected_model=cfg.embed_model,
|
|
369
|
+
expected_dim=cfg.embed_dim,
|
|
370
|
+
)
|
|
371
|
+
if not res.ok:
|
|
372
|
+
raise RuntimeError(f"snapshot verification failed: {res.reason}")
|
|
373
|
+
return snap
|
|
374
|
+
|
|
375
|
+
|
|
376
|
+
def _find_ancestor_snapshot(
|
|
377
|
+
root: Path, store: SnapshotStore, head: str, *, max_depth: int = 200
|
|
378
|
+
) -> str | None:
|
|
379
|
+
"""Walk back from HEAD on first-parent looking for a published snapshot."""
|
|
380
|
+
available = {e.sha for e in store.list_local()} | {e.sha for e in store.list_remote()}
|
|
381
|
+
if not available:
|
|
382
|
+
return None
|
|
383
|
+
import subprocess
|
|
384
|
+
|
|
385
|
+
out = subprocess.run(
|
|
386
|
+
[
|
|
387
|
+
"git",
|
|
388
|
+
"-C",
|
|
389
|
+
str(root),
|
|
390
|
+
"rev-list",
|
|
391
|
+
"--first-parent",
|
|
392
|
+
f"-n{max_depth}",
|
|
393
|
+
head,
|
|
394
|
+
],
|
|
395
|
+
capture_output=True,
|
|
396
|
+
text=True,
|
|
397
|
+
check=False,
|
|
398
|
+
)
|
|
399
|
+
if out.returncode != 0:
|
|
400
|
+
return None
|
|
401
|
+
for sha in out.stdout.splitlines():
|
|
402
|
+
sha = sha.strip()
|
|
403
|
+
if sha in available:
|
|
404
|
+
return sha
|
|
405
|
+
return None
|