flurryx-code-memory 0.4.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (53) hide show
  1. code_memory/__init__.py +1 -0
  2. code_memory/claims/__init__.py +32 -0
  3. code_memory/claims/extractor.py +325 -0
  4. code_memory/claims/indexer.py +258 -0
  5. code_memory/claims/resolver.py +186 -0
  6. code_memory/claims/store.py +424 -0
  7. code_memory/cli.py +1192 -0
  8. code_memory/config.py +268 -0
  9. code_memory/embed/__init__.py +224 -0
  10. code_memory/embed/cache.py +204 -0
  11. code_memory/embed/m3.py +174 -0
  12. code_memory/embed/ollama.py +92 -0
  13. code_memory/embed/tei.py +106 -0
  14. code_memory/episodic/__init__.py +3 -0
  15. code_memory/episodic/sqlite_store.py +278 -0
  16. code_memory/extractor/__init__.py +3 -0
  17. code_memory/extractor/csproj.py +166 -0
  18. code_memory/extractor/dll.py +385 -0
  19. code_memory/extractor/gitignore.py +162 -0
  20. code_memory/extractor/nuget.py +275 -0
  21. code_memory/extractor/sanity.py +124 -0
  22. code_memory/extractor/sln.py +108 -0
  23. code_memory/extractor/treesitter.py +1172 -0
  24. code_memory/graph/__init__.py +3 -0
  25. code_memory/graph/falkor_store.py +740 -0
  26. code_memory/mcp_server.py +1816 -0
  27. code_memory/metrics.py +260 -0
  28. code_memory/orchestrator/__init__.py +13 -0
  29. code_memory/orchestrator/git_delta.py +211 -0
  30. code_memory/orchestrator/ingest_state.py +71 -0
  31. code_memory/orchestrator/pipeline.py +1478 -0
  32. code_memory/orchestrator/reset.py +130 -0
  33. code_memory/orchestrator/resolver.py +825 -0
  34. code_memory/orchestrator/retrieve.py +505 -0
  35. code_memory/resilience.py +73 -0
  36. code_memory/sync/__init__.py +20 -0
  37. code_memory/sync/autostart/__init__.py +42 -0
  38. code_memory/sync/autostart/base.py +106 -0
  39. code_memory/sync/autostart/launchd.py +115 -0
  40. code_memory/sync/autostart/schtasks.py +155 -0
  41. code_memory/sync/autostart/systemd.py +113 -0
  42. code_memory/sync/hooks.py +164 -0
  43. code_memory/sync/safety.py +65 -0
  44. code_memory/sync/snapshot.py +461 -0
  45. code_memory/sync/store.py +399 -0
  46. code_memory/sync/sync.py +405 -0
  47. code_memory/sync/watcher.py +320 -0
  48. code_memory/vector/__init__.py +3 -0
  49. code_memory/vector/qdrant_store.py +302 -0
  50. flurryx_code_memory-0.4.0.dist-info/METADATA +26 -0
  51. flurryx_code_memory-0.4.0.dist-info/RECORD +53 -0
  52. flurryx_code_memory-0.4.0.dist-info/WHEEL +4 -0
  53. flurryx_code_memory-0.4.0.dist-info/entry_points.txt +3 -0
@@ -0,0 +1,405 @@
1
+ """Smart sync: snapshot pull + incremental ingest in one command.
2
+
3
+ Decision tree::
4
+
5
+ 1. HEAD unchanged AND local state matches AND no dirty files
6
+ -> noop
7
+
8
+ 2. Local state empty
9
+ a. snapshot for HEAD exists in store -> pull + apply
10
+ b. snapshot for nearest ancestor exists -> pull + apply + incremental
11
+ c. nothing in store -> full local ingest
12
+ (then optionally publish if on canonical branch)
13
+
14
+ 3. Local state present
15
+ a. HEAD == state.sha -> dirty-files incremental only
16
+ b. HEAD newer, snapshot for HEAD exists -> pull + apply
17
+ c. HEAD newer, otherwise -> incremental from state.sha
18
+
19
+ The goal: O(seconds) on every event, never block, always converge.
20
+ """
21
+
22
+ from __future__ import annotations
23
+
24
+ import logging
25
+ from dataclasses import dataclass, field
26
+ from pathlib import Path
27
+ from typing import Iterable, Literal
28
+
29
+ from ..config import CONFIG, detect_project_slug
30
+ from ..orchestrator import git_delta
31
+ from ..orchestrator.ingest_state import IngestStateStore
32
+ from ..orchestrator.pipeline import IngestStats, Pipeline
33
+ from .snapshot import (
34
+ Snapshot,
35
+ apply_snapshot,
36
+ build_snapshot,
37
+ verify_snapshot,
38
+ )
39
+ from .store import SnapshotStore
40
+
41
+ log = logging.getLogger("codememory.sync")
42
+
43
+ Action = Literal[
44
+ "noop",
45
+ "pull_snapshot",
46
+ "pull_then_incremental",
47
+ "incremental",
48
+ "full_ingest",
49
+ "dirty_only",
50
+ ]
51
+
52
+
53
+ @dataclass
54
+ class SyncResult:
55
+ action: Action
56
+ head_sha: str | None
57
+ base_sha: str | None = None
58
+ snapshot_sha: str | None = None
59
+ publish: bool = False
60
+ files_changed: int = 0
61
+ files_deleted: int = 0
62
+ notes: list[str] = field(default_factory=list)
63
+
64
+
65
+ def sync_repo(
66
+ root: str | Path,
67
+ *,
68
+ project: str | None = None,
69
+ publish: bool = False,
70
+ canonical_branch: str = "main",
71
+ trigger: str = "manual",
72
+ fetch: bool = True,
73
+ ) -> SyncResult:
74
+ """Reconcile local code-memory state with git HEAD.
75
+
76
+ Parameters
77
+ ----------
78
+ root :
79
+ Repo root.
80
+ project :
81
+ Project slug (auto-detected from git toplevel if None).
82
+ publish :
83
+ After sync, if on ``canonical_branch`` and a fresh snapshot was
84
+ produced locally, push it to the snapshot store.
85
+ canonical_branch :
86
+ Branch whose tip is considered canonical (default ``main``).
87
+ trigger :
88
+ Free-form tag for logging (e.g. ``post-merge``, ``watcher``).
89
+ fetch :
90
+ If True, ``git fetch`` the snapshot branch before lookup.
91
+ """
92
+ root_path = Path(root).resolve()
93
+ slug = project or detect_project_slug(root_path)
94
+ log.info("sync start trigger=%s project=%s root=%s", trigger, slug, root_path)
95
+
96
+ if not git_delta.is_git_repo(root_path):
97
+ # Not a git repo: best we can do is a full ingest
98
+ pipe = Pipeline(project=slug)
99
+ stats = pipe.ingest_repo(root_path, mode="full")
100
+ return SyncResult(
101
+ action="full_ingest",
102
+ head_sha=None,
103
+ files_changed=stats.files,
104
+ notes=["not a git repository; performed full ingest"],
105
+ )
106
+
107
+ head = git_delta.head_sha(root_path)
108
+ branch = git_delta.current_branch(root_path)
109
+ store = SnapshotStore(root_path)
110
+ if fetch:
111
+ store.fetch()
112
+
113
+ cfg = CONFIG.for_project(slug)
114
+ state_store = IngestStateStore(cfg.episodic_db)
115
+ prior = state_store.get(root_path)
116
+ dirty = git_delta.dirty_files(root_path)
117
+ dirty_deleted = git_delta.dirty_deleted_files(root_path)
118
+
119
+ # ---- Case 1: HEAD matches local state ---------------------------------
120
+ if prior is not None and prior.last_sha == head:
121
+ if not dirty and not dirty_deleted:
122
+ log.info("sync noop (head=%s, clean)", head[:12])
123
+ return SyncResult(action="noop", head_sha=head)
124
+ # dirty files: incremental only
125
+ return _run_dirty_only(root_path, slug, head, dirty, dirty_deleted)
126
+
127
+ # ---- Case 2: no local state -------------------------------------------
128
+ if prior is None:
129
+ if store.has(head):
130
+ return _pull_and_apply(
131
+ root_path, slug, head, branch, store, publish=False
132
+ )
133
+ ancestor = _find_ancestor_snapshot(root_path, store, head)
134
+ if ancestor:
135
+ return _pull_and_apply_then_incremental(
136
+ root_path, slug, head, branch, store, ancestor, dirty
137
+ )
138
+ # No snapshot, no state — full ingest
139
+ return _run_full_ingest(
140
+ root_path,
141
+ slug,
142
+ head,
143
+ branch,
144
+ store,
145
+ publish=publish and branch == canonical_branch,
146
+ )
147
+
148
+ # ---- Case 3: HEAD moved, local state stale ----------------------------
149
+ if store.has(head):
150
+ return _pull_and_apply(root_path, slug, head, branch, store, publish=False)
151
+
152
+ if not git_delta.is_reachable(root_path, prior.last_sha):
153
+ # base rewritten — fall back to ancestor snapshot or full
154
+ ancestor = _find_ancestor_snapshot(root_path, store, head)
155
+ if ancestor:
156
+ return _pull_and_apply_then_incremental(
157
+ root_path, slug, head, branch, store, ancestor, dirty
158
+ )
159
+ return _run_full_ingest(
160
+ root_path,
161
+ slug,
162
+ head,
163
+ branch,
164
+ store,
165
+ publish=publish and branch == canonical_branch,
166
+ )
167
+
168
+ return _run_incremental(
169
+ root_path,
170
+ slug,
171
+ head,
172
+ branch,
173
+ base=prior.last_sha,
174
+ store=store,
175
+ publish=publish and branch == canonical_branch,
176
+ )
177
+
178
+
179
+ # ---------------------------------------------------------------------------
180
+ # Strategies
181
+ # ---------------------------------------------------------------------------
182
+
183
+
184
+ def _run_dirty_only(
185
+ root: Path,
186
+ slug: str,
187
+ head: str,
188
+ dirty: Iterable[Path],
189
+ dirty_deleted: Iterable[Path] = (),
190
+ ) -> SyncResult:
191
+ pipe = Pipeline(project=slug)
192
+ changed = 0
193
+ for path in dirty:
194
+ if not path.is_file():
195
+ continue
196
+ ex = pipe.reingest_file(path)
197
+ if ex is not None:
198
+ changed += 1
199
+ # Tear down vanished files even when HEAD hasn't moved. Without this
200
+ # leg a plain ``rm tracked.ts`` (or ``git rm``) leaves the file's
201
+ # graph node + vectors orphaned until the deletion is committed and
202
+ # a later sync runs through the diff path.
203
+ deleted = pipe.delete_paths(list(dirty_deleted), head_sha=head)
204
+ return SyncResult(
205
+ action="dirty_only",
206
+ head_sha=head,
207
+ base_sha=head,
208
+ files_changed=changed,
209
+ files_deleted=deleted,
210
+ notes=["worktree dirty; re-indexed locally without changing state"],
211
+ )
212
+
213
+
214
+ def _run_incremental(
215
+ root: Path,
216
+ slug: str,
217
+ head: str,
218
+ branch: str | None,
219
+ *,
220
+ base: str,
221
+ store: SnapshotStore,
222
+ publish: bool,
223
+ ) -> SyncResult:
224
+ pipe = Pipeline(project=slug)
225
+ stats = pipe.ingest_repo(root, mode="incremental", since=base)
226
+ result = SyncResult(
227
+ action="incremental",
228
+ head_sha=head,
229
+ base_sha=base,
230
+ files_changed=stats.files,
231
+ files_deleted=stats.deleted,
232
+ )
233
+ if publish:
234
+ _publish(store, slug, head, branch, result)
235
+ return result
236
+
237
+
238
+ def _run_full_ingest(
239
+ root: Path,
240
+ slug: str,
241
+ head: str,
242
+ branch: str | None,
243
+ store: SnapshotStore,
244
+ *,
245
+ publish: bool,
246
+ ) -> SyncResult:
247
+ pipe = Pipeline(project=slug)
248
+ stats: IngestStats = pipe.ingest_repo(root, mode="full")
249
+ result = SyncResult(
250
+ action="full_ingest",
251
+ head_sha=head,
252
+ files_changed=stats.files,
253
+ )
254
+ if publish:
255
+ _publish(store, slug, head, branch, result)
256
+ return result
257
+
258
+
259
+ def _pull_and_apply(
260
+ root: Path,
261
+ slug: str,
262
+ head: str,
263
+ branch: str | None,
264
+ store: SnapshotStore,
265
+ *,
266
+ publish: bool,
267
+ ) -> SyncResult:
268
+ snap_bytes = store.read(head)
269
+ snap = _load_and_verify(snap_bytes, slug)
270
+ apply_snapshot(snap)
271
+ # Mirror the snapshot's state into the local ingest_state so a
272
+ # subsequent incremental can diff from it.
273
+ pipe = Pipeline(project=slug)
274
+ pipe.state.set(root, sha=head, branch=branch)
275
+ return SyncResult(
276
+ action="pull_snapshot",
277
+ head_sha=head,
278
+ snapshot_sha=head,
279
+ files_changed=snap.manifest.counts.get("vectors", 0),
280
+ )
281
+
282
+
283
+ def _pull_and_apply_then_incremental(
284
+ root: Path,
285
+ slug: str,
286
+ head: str,
287
+ branch: str | None,
288
+ store: SnapshotStore,
289
+ ancestor: str,
290
+ dirty: Iterable[Path],
291
+ ) -> SyncResult:
292
+ snap_bytes = store.read(ancestor)
293
+ snap = _load_and_verify(snap_bytes, slug)
294
+ apply_snapshot(snap)
295
+ pipe = Pipeline(project=slug)
296
+ pipe.state.set(root, sha=ancestor, branch=branch)
297
+ stats = pipe.ingest_repo(root, mode="incremental", since=ancestor)
298
+ return SyncResult(
299
+ action="pull_then_incremental",
300
+ head_sha=head,
301
+ base_sha=ancestor,
302
+ snapshot_sha=ancestor,
303
+ files_changed=stats.files,
304
+ files_deleted=stats.deleted,
305
+ )
306
+
307
+
308
+ def _publish(
309
+ store: SnapshotStore,
310
+ slug: str,
311
+ head: str,
312
+ branch: str | None,
313
+ result: SyncResult,
314
+ ) -> None:
315
+ """Build and push a fresh snapshot for HEAD."""
316
+ if store.has(head):
317
+ result.notes.append("snapshot already published")
318
+ return
319
+ snap = build_snapshot(
320
+ project=slug,
321
+ head_sha=head,
322
+ branch=branch,
323
+ state={"last_sha": head, "branch": branch},
324
+ )
325
+ import tempfile
326
+
327
+ with tempfile.NamedTemporaryFile(
328
+ suffix=".cmsnap", delete=False
329
+ ) as tmp:
330
+ tmp_path = Path(tmp.name)
331
+ try:
332
+ snap.write(tmp_path)
333
+ data = tmp_path.read_bytes()
334
+ finally:
335
+ tmp_path.unlink(missing_ok=True)
336
+ manifest_dict: dict[str, object] = {
337
+ "format_version": snap.manifest.format_version,
338
+ "project": snap.manifest.project,
339
+ "head_sha": snap.manifest.head_sha,
340
+ "branch": snap.manifest.branch,
341
+ "embed_model": snap.manifest.embed_model,
342
+ "embed_dim": snap.manifest.embed_dim,
343
+ "created_at": snap.manifest.created_at,
344
+ "created_by": snap.manifest.created_by,
345
+ "tool_version": snap.manifest.tool_version,
346
+ "counts": snap.manifest.counts,
347
+ "content_sha256": snap.manifest.content_sha256,
348
+ }
349
+ created = store.write(head, data, manifest=manifest_dict)
350
+ result.publish = created
351
+ if created:
352
+ result.notes.append(f"published snapshot {head[:12]}")
353
+
354
+
355
+ def _load_and_verify(blob: bytes, slug: str) -> Snapshot:
356
+ import tempfile
357
+
358
+ with tempfile.NamedTemporaryFile(suffix=".cmsnap", delete=False) as tmp:
359
+ tmp.write(blob)
360
+ path = Path(tmp.name)
361
+ try:
362
+ snap = Snapshot.read(path)
363
+ finally:
364
+ path.unlink(missing_ok=True)
365
+ cfg = CONFIG.for_project(slug)
366
+ res = verify_snapshot(
367
+ snap=snap,
368
+ expected_model=cfg.embed_model,
369
+ expected_dim=cfg.embed_dim,
370
+ )
371
+ if not res.ok:
372
+ raise RuntimeError(f"snapshot verification failed: {res.reason}")
373
+ return snap
374
+
375
+
376
+ def _find_ancestor_snapshot(
377
+ root: Path, store: SnapshotStore, head: str, *, max_depth: int = 200
378
+ ) -> str | None:
379
+ """Walk back from HEAD on first-parent looking for a published snapshot."""
380
+ available = {e.sha for e in store.list_local()} | {e.sha for e in store.list_remote()}
381
+ if not available:
382
+ return None
383
+ import subprocess
384
+
385
+ out = subprocess.run(
386
+ [
387
+ "git",
388
+ "-C",
389
+ str(root),
390
+ "rev-list",
391
+ "--first-parent",
392
+ f"-n{max_depth}",
393
+ head,
394
+ ],
395
+ capture_output=True,
396
+ text=True,
397
+ check=False,
398
+ )
399
+ if out.returncode != 0:
400
+ return None
401
+ for sha in out.stdout.splitlines():
402
+ sha = sha.strip()
403
+ if sha in available:
404
+ return sha
405
+ return None