flurryx-code-memory 0.4.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (53) hide show
  1. code_memory/__init__.py +1 -0
  2. code_memory/claims/__init__.py +32 -0
  3. code_memory/claims/extractor.py +325 -0
  4. code_memory/claims/indexer.py +258 -0
  5. code_memory/claims/resolver.py +186 -0
  6. code_memory/claims/store.py +424 -0
  7. code_memory/cli.py +1192 -0
  8. code_memory/config.py +268 -0
  9. code_memory/embed/__init__.py +224 -0
  10. code_memory/embed/cache.py +204 -0
  11. code_memory/embed/m3.py +174 -0
  12. code_memory/embed/ollama.py +92 -0
  13. code_memory/embed/tei.py +106 -0
  14. code_memory/episodic/__init__.py +3 -0
  15. code_memory/episodic/sqlite_store.py +278 -0
  16. code_memory/extractor/__init__.py +3 -0
  17. code_memory/extractor/csproj.py +166 -0
  18. code_memory/extractor/dll.py +385 -0
  19. code_memory/extractor/gitignore.py +162 -0
  20. code_memory/extractor/nuget.py +275 -0
  21. code_memory/extractor/sanity.py +124 -0
  22. code_memory/extractor/sln.py +108 -0
  23. code_memory/extractor/treesitter.py +1172 -0
  24. code_memory/graph/__init__.py +3 -0
  25. code_memory/graph/falkor_store.py +740 -0
  26. code_memory/mcp_server.py +1816 -0
  27. code_memory/metrics.py +260 -0
  28. code_memory/orchestrator/__init__.py +13 -0
  29. code_memory/orchestrator/git_delta.py +211 -0
  30. code_memory/orchestrator/ingest_state.py +71 -0
  31. code_memory/orchestrator/pipeline.py +1478 -0
  32. code_memory/orchestrator/reset.py +130 -0
  33. code_memory/orchestrator/resolver.py +825 -0
  34. code_memory/orchestrator/retrieve.py +505 -0
  35. code_memory/resilience.py +73 -0
  36. code_memory/sync/__init__.py +20 -0
  37. code_memory/sync/autostart/__init__.py +42 -0
  38. code_memory/sync/autostart/base.py +106 -0
  39. code_memory/sync/autostart/launchd.py +115 -0
  40. code_memory/sync/autostart/schtasks.py +155 -0
  41. code_memory/sync/autostart/systemd.py +113 -0
  42. code_memory/sync/hooks.py +164 -0
  43. code_memory/sync/safety.py +65 -0
  44. code_memory/sync/snapshot.py +461 -0
  45. code_memory/sync/store.py +399 -0
  46. code_memory/sync/sync.py +405 -0
  47. code_memory/sync/watcher.py +320 -0
  48. code_memory/vector/__init__.py +3 -0
  49. code_memory/vector/qdrant_store.py +302 -0
  50. flurryx_code_memory-0.4.0.dist-info/METADATA +26 -0
  51. flurryx_code_memory-0.4.0.dist-info/RECORD +53 -0
  52. flurryx_code_memory-0.4.0.dist-info/WHEEL +4 -0
  53. flurryx_code_memory-0.4.0.dist-info/entry_points.txt +3 -0
code_memory/cli.py ADDED
@@ -0,0 +1,1192 @@
1
+ from __future__ import annotations
2
+
3
+ import json
4
+ import os
5
+ import sys
6
+ from dataclasses import asdict
7
+ from pathlib import Path
8
+ from typing import Any
9
+
10
+ import typer
11
+ from rich import print as rprint
12
+
13
+ from dataclasses import asdict as _asdict
14
+
15
+ from .config import CONFIG, detect_project_slug
16
+ from .episodic import Episode
17
+ from .graph import FalkorStore
18
+ from .orchestrator import Pipeline, Retriever, list_projects, reset_all, reset_project
19
+ from .orchestrator import git_delta as _git_delta
20
+
21
+
22
+ def _graph_for(project: str | None) -> FalkorStore:
23
+ slug = project or detect_project_slug()
24
+ cfg = CONFIG.for_project(slug)
25
+ return FalkorStore(graph_name=cfg.falkor_graph)
26
+
27
+ app = typer.Typer(no_args_is_help=True, add_completion=False, help="code-memory CLI")
28
+
29
+
30
+ ProjectOpt = typer.Option(
31
+ None,
32
+ "--project",
33
+ "-p",
34
+ help="Project slug for namespaced storage. Auto-detected if omitted.",
35
+ )
36
+
37
+ JsonOpt = typer.Option(
38
+ False,
39
+ "--json",
40
+ help="Emit machine-readable JSON to stdout instead of rich output.",
41
+ )
42
+
43
+
44
+ def _emit(payload: Any, *, as_json: bool) -> None:
45
+ if as_json:
46
+ sys.stdout.write(json.dumps(payload, default=str))
47
+ sys.stdout.write("\n")
48
+ sys.stdout.flush()
49
+ else:
50
+ rprint(payload)
51
+
52
+
53
+ def _resolve_or_none(resolver: Any, text: str) -> str | None:
54
+ """Run entity resolution defensively; swallow any failure to None.
55
+
56
+ The CLI's extract-claims path runs from a detached hook process —
57
+ we'd rather store a claim with a NULL entity ID than crash the whole
58
+ extraction because Qdrant blipped or Ollama timed out.
59
+ """
60
+ if resolver is None:
61
+ return None
62
+ try:
63
+ ref = resolver.resolve(text)
64
+ except Exception: # noqa: BLE001
65
+ return None
66
+ return ref.id if ref is not None else None
67
+
68
+
69
+ @app.command()
70
+ def ingest(
71
+ root: Path = typer.Argument(..., exists=True, file_okay=False, dir_okay=True),
72
+ project: str | None = ProjectOpt,
73
+ full: bool = typer.Option(
74
+ False, "--full", help="Force a full walk; ignore stored state."
75
+ ),
76
+ since: str | None = typer.Option(
77
+ None, "--since", help="Base ref (branch/tag/sha) to diff against HEAD."
78
+ ),
79
+ dry_run: bool = typer.Option(
80
+ False, "--dry-run", help="Show what would be ingested; don't write."
81
+ ),
82
+ no_vectors: bool = typer.Option(
83
+ False,
84
+ "--no-vectors",
85
+ help=(
86
+ "Skip embedding + vector store writes. Builds only the symbol "
87
+ "graph (callers/definitions/importers still work; semantic "
88
+ "retrieve will be empty). Drops Ollama from the critical path "
89
+ "— large repos that don't need semantic recall finish in a "
90
+ "fraction of the time."
91
+ ),
92
+ ),
93
+ as_json: bool = JsonOpt,
94
+ ) -> None:
95
+ """Ingest a repository.
96
+
97
+ Default: git-aware incremental — diff prior state to HEAD.
98
+ """
99
+ slug = project or detect_project_slug(root)
100
+ pipe = Pipeline(project=slug, skip_vectors=no_vectors)
101
+ stats = pipe.ingest_repo(
102
+ root,
103
+ mode="full" if full else "auto",
104
+ since=since,
105
+ dry_run=dry_run,
106
+ )
107
+ _emit(
108
+ {"project": slug, "dry_run": dry_run, "ingested": asdict(stats)},
109
+ as_json=as_json,
110
+ )
111
+
112
+
113
+ @app.command("ingest-status")
114
+ def ingest_status(
115
+ root: Path = typer.Argument(
116
+ Path("."), exists=True, file_okay=False, dir_okay=True
117
+ ),
118
+ project: str | None = ProjectOpt,
119
+ as_json: bool = JsonOpt,
120
+ ) -> None:
121
+ """Show stored ingest state for ROOT (last commit, branch, drift vs HEAD)."""
122
+ slug = project or detect_project_slug(root)
123
+ pipe = Pipeline(project=slug)
124
+ prior = pipe.state.get(root)
125
+ payload: dict[str, object] = {"project": slug, "repo_root": str(Path(root).resolve())}
126
+ if prior is None:
127
+ payload["state"] = None
128
+ else:
129
+ payload["state"] = {
130
+ "last_sha": prior.last_sha,
131
+ "last_ts": prior.last_ts,
132
+ "branch": prior.branch,
133
+ }
134
+
135
+ if _git_delta.is_git_repo(root):
136
+ try:
137
+ head = _git_delta.head_sha(root)
138
+ branch = _git_delta.current_branch(root)
139
+ payload["head_sha"] = head
140
+ payload["branch"] = branch
141
+ if prior is not None and _git_delta.is_reachable(root, prior.last_sha):
142
+ d = _git_delta.diff(root, prior.last_sha, head)
143
+ payload["drift"] = {
144
+ "changed": len(d.changed),
145
+ "deleted": len(d.deleted),
146
+ }
147
+ payload["dirty"] = len(_git_delta.dirty_files(root))
148
+ except _git_delta.GitError as e:
149
+ payload["git_error"] = str(e)
150
+ else:
151
+ payload["git"] = False
152
+
153
+ _emit(payload, as_json=as_json)
154
+
155
+
156
+ @app.command("ingest-watch")
157
+ def ingest_watch(
158
+ file: Path | None = typer.Option(
159
+ None,
160
+ "--file",
161
+ help=(
162
+ "Override snapshot path. Defaults to "
163
+ "$CODEMEMORY_PROGRESS_FILE or ~/.cache/code-memory/"
164
+ "ingest-progress.json (same path the ingest pipeline "
165
+ "writes to)."
166
+ ),
167
+ ),
168
+ interval: float = typer.Option(
169
+ 0.25, "--interval", help="Poll cadence in seconds."
170
+ ),
171
+ stale_after: float = typer.Option(
172
+ 10.0,
173
+ "--stale-after",
174
+ help="Show 'idle' state if no snapshot update for this many seconds.",
175
+ ),
176
+ ) -> None:
177
+ """Live ingest progressbar.
178
+
179
+ Run in any real terminal (your own iTerm pane, tmux split, etc.)
180
+ while an agent or another process runs ``code-memory ingest``.
181
+ Renders a rich live bar reading the same snapshot file the pipeline
182
+ writes on every tick. Exits when the snapshot reports ``done`` or on
183
+ Ctrl-C.
184
+ """
185
+ from .orchestrator.pipeline import _default_progress_file
186
+
187
+ path = file or _default_progress_file()
188
+
189
+ try:
190
+ from rich.console import Console
191
+ from rich.progress import (
192
+ BarColumn,
193
+ MofNCompleteColumn,
194
+ Progress,
195
+ SpinnerColumn,
196
+ TextColumn,
197
+ TimeElapsedColumn,
198
+ TimeRemainingColumn,
199
+ )
200
+ except Exception as exc: # noqa: BLE001
201
+ rprint(f"[red]rich not available: {exc}[/red]")
202
+ raise typer.Exit(code=1) from exc
203
+
204
+ progress = Progress(
205
+ SpinnerColumn(style="cyan"),
206
+ TextColumn("[bold cyan]code-memory[/] {task.description}"),
207
+ BarColumn(bar_width=None),
208
+ MofNCompleteColumn(),
209
+ TextColumn(
210
+ "[green]{task.fields[symbols]}[/]sym "
211
+ "[magenta]{task.fields[chunks]}[/]chk "
212
+ "[yellow]{task.fields[skipped]}[/]skip "
213
+ "[dim]{task.fields[rate]}/s {task.fields[state]}[/]"
214
+ ),
215
+ TimeElapsedColumn(),
216
+ TimeRemainingColumn(),
217
+ console=Console(),
218
+ refresh_per_second=8,
219
+ )
220
+
221
+ import time as _time
222
+
223
+ progress.start()
224
+ task_id = progress.add_task(
225
+ "waiting…", total=None, symbols=0, chunks=0, skipped=0, rate="0.0", state="idle"
226
+ )
227
+ try:
228
+ while True:
229
+ snap: dict[str, Any] | None = None
230
+ try:
231
+ snap = json.loads(path.read_text()) if path.exists() else None
232
+ except Exception: # noqa: BLE001 — race with writer; retry
233
+ snap = None
234
+ now = _time.time()
235
+ if snap:
236
+ ts = float(snap.get("ts", 0.0))
237
+ state = "running"
238
+ if now - ts > stale_after:
239
+ state = "stale"
240
+ if snap.get("done"):
241
+ state = "done"
242
+ progress.update(
243
+ task_id,
244
+ description=snap.get("label", "ingest"),
245
+ completed=int(snap.get("files", 0)),
246
+ total=snap.get("total"),
247
+ symbols=int(snap.get("symbols", 0)),
248
+ chunks=int(snap.get("chunks", 0)),
249
+ skipped=int(snap.get("skipped", 0)),
250
+ rate=f"{float(snap.get('rate', 0.0)):.1f}",
251
+ state=state,
252
+ )
253
+ if snap.get("done"):
254
+ progress.refresh()
255
+ break
256
+ _time.sleep(interval)
257
+ except KeyboardInterrupt:
258
+ pass
259
+ finally:
260
+ progress.stop()
261
+
262
+
263
+ @app.command()
264
+ def reingest(
265
+ path: Path = typer.Argument(..., exists=True, file_okay=True, dir_okay=False),
266
+ project: str | None = ProjectOpt,
267
+ as_json: bool = JsonOpt,
268
+ ) -> None:
269
+ """Re-ingest a single file."""
270
+ slug = project or detect_project_slug(path)
271
+ pipe = Pipeline(project=slug)
272
+ ex = pipe.reingest_file(path)
273
+ if ex is None:
274
+ _emit({"error": "unsupported file type", "path": str(path)}, as_json=as_json)
275
+ raise typer.Exit(code=1)
276
+ _emit(
277
+ {
278
+ "project": slug,
279
+ "path": ex.path,
280
+ "symbols": len(ex.symbols),
281
+ "imports": len(ex.imports),
282
+ },
283
+ as_json=as_json,
284
+ )
285
+
286
+
287
+ @app.command()
288
+ def retrieve(
289
+ query: str = typer.Argument(...),
290
+ k: int = typer.Option(8, "--k", help="top-k code"),
291
+ eps: int = typer.Option(5, "--eps", help="top-k episodes"),
292
+ include_idle_episodes: bool = typer.Option(
293
+ False,
294
+ "--include-idle-episodes",
295
+ help="Include episodes with verdict='idle' (suppressed by default).",
296
+ ),
297
+ project: str | None = ProjectOpt,
298
+ as_json: bool = JsonOpt,
299
+ ) -> None:
300
+ """Retrieve context pack for a natural-language query."""
301
+ r = Retriever(project=project)
302
+ pack = r.retrieve(
303
+ query,
304
+ top_k_code=k,
305
+ top_k_eps=eps,
306
+ include_idle_episodes=include_idle_episodes,
307
+ )
308
+ if as_json:
309
+ _emit(pack.to_dict(), as_json=True)
310
+ else:
311
+ rprint(pack.render())
312
+
313
+
314
+ @app.command()
315
+ def record(
316
+ prompt: str = typer.Option(..., "--prompt"),
317
+ plan: str = typer.Option("", "--plan"),
318
+ patch: str = typer.Option("", "--patch"),
319
+ verdict: str = typer.Option("", "--verdict"),
320
+ project: str | None = ProjectOpt,
321
+ as_json: bool = JsonOpt,
322
+ ) -> None:
323
+ """Record a task episode."""
324
+ pipe = Pipeline(project=project)
325
+ ep = Episode(
326
+ prompt=prompt,
327
+ plan=plan or None,
328
+ patch=patch or None,
329
+ verdict=verdict or None,
330
+ )
331
+ ep_id = pipe.record_episode(ep)
332
+ _emit({"project": pipe.slug, "id": ep_id}, as_json=as_json)
333
+
334
+
335
+ @app.command("record-read")
336
+ def record_read(
337
+ tool: str = typer.Option(..., "--tool", help="Filesystem tool name (grep, read, bash, glob)"),
338
+ path: str = typer.Option("", "--path", help="File path or pattern accessed"),
339
+ chars: int = typer.Option(0, "--chars", help="Output character count"),
340
+ session_id: str = typer.Option("", "--session-id"),
341
+ project: str | None = ProjectOpt,
342
+ as_json: bool = JsonOpt,
343
+ ) -> None:
344
+ """Record a filesystem read for MCP efficiency tracking.
345
+
346
+ Fire-and-forget metrics call — best-effort, never crashes.
347
+ Only persists when CODEMEMORY_METRICS_DB is set.
348
+ """
349
+ db_path = os.environ.get("CODEMEMORY_METRICS_DB") or str(CONFIG.data_dir / "metrics.db")
350
+ try:
351
+ from .metrics import MetricsStore
352
+
353
+ ms = MetricsStore(Path(db_path))
354
+ ms.record_fs_read(
355
+ tool=tool,
356
+ path=path,
357
+ project=project or "",
358
+ output_chars=chars,
359
+ session_id=session_id,
360
+ )
361
+ _emit({"recorded": True}, as_json=as_json)
362
+ except Exception as exc:
363
+ _emit({"recorded": False, "error": str(exc)}, as_json=as_json)
364
+
365
+
366
+ @app.command("dedupe-episodes")
367
+ def dedupe_episodes(
368
+ project: str | None = ProjectOpt,
369
+ as_json: bool = JsonOpt,
370
+ ) -> None:
371
+ """Compact duplicate episodes by prompt hash, prune their vectors.
372
+
373
+ Same prompt asserted N times collapses to one row whose ts is the
374
+ most-recent observation. Matching Qdrant points are deleted so the
375
+ vector store stays aligned with SQLite.
376
+ """
377
+ pipe = Pipeline(project=project)
378
+ result = pipe.dedupe_episodes()
379
+ _emit({"project": pipe.slug, **result}, as_json=as_json)
380
+
381
+
382
+ @app.command("extract-claims")
383
+ def extract_claims(
384
+ prompt: list[str] = typer.Option(
385
+ [],
386
+ "--prompt",
387
+ "-p",
388
+ help="User prompt text. Repeat for multiple prompts.",
389
+ ),
390
+ session_id: str = typer.Option("", "--session-id"),
391
+ project: str | None = ProjectOpt,
392
+ as_json: bool = JsonOpt,
393
+ ) -> None:
394
+ """Run Graphiti-style claim extraction over user prompts.
395
+
396
+ Honors ``CLAIMS_EXTRACTION`` env var: when disabled, emits a
397
+ ``{"status": "disabled"}`` payload and exits 0 (callers can treat
398
+ this as a no-op).
399
+ """
400
+ from .claims import ClaimExtractor, ClaimRecord, ClaimsStore, EntityResolver
401
+ from .claims.extractor import ExtractionError
402
+ from .config import CONFIG
403
+ from .orchestrator import git_delta
404
+ import time as _t
405
+
406
+ if not CONFIG.claims_enabled:
407
+ _emit(
408
+ {
409
+ "status": "disabled",
410
+ "hint": (
411
+ "set CLAIMS_EXTRACTION=true after "
412
+ "`ollama pull gemma2:9b`."
413
+ ),
414
+ },
415
+ as_json=as_json,
416
+ )
417
+ raise typer.Exit(code=0)
418
+
419
+ prompts = [p.strip() for p in prompt if p and p.strip()]
420
+ if not prompts:
421
+ _emit({"claims_added": 0, "claims": []}, as_json=as_json)
422
+ raise typer.Exit(code=0)
423
+
424
+ slug = detect_project_slug() if project is None else project
425
+ cfg = CONFIG.for_project(slug)
426
+
427
+ repo = Path.cwd()
428
+ head = None
429
+ if git_delta.is_git_repo(repo):
430
+ try:
431
+ head = git_delta.head_sha(repo)
432
+ except git_delta.GitError:
433
+ head = None
434
+
435
+ store = ClaimsStore(path=cfg.claims_db)
436
+ extractor = ClaimExtractor()
437
+ # Entity resolution is best-effort: a Qdrant outage shouldn't lose
438
+ # claims, so a None resolver means "skip resolution, persist with
439
+ # NULL entity IDs". The resolver itself constructs lazily and only
440
+ # warms on the first resolve() call.
441
+ resolver: EntityResolver | None
442
+ try:
443
+ resolver = EntityResolver(project=slug, cfg=cfg)
444
+ except Exception: # noqa: BLE001
445
+ resolver = None
446
+ added = 0
447
+ samples: list[dict[str, object]] = []
448
+ try:
449
+ for text in prompts:
450
+ try:
451
+ claims = extractor.extract(text)
452
+ except ExtractionError as exc:
453
+ _emit(
454
+ {
455
+ "error": "ExtractionError",
456
+ "message": str(exc),
457
+ "claims_added": added,
458
+ },
459
+ as_json=as_json,
460
+ )
461
+ raise typer.Exit(code=0)
462
+ now = _t.time()
463
+ for c in claims:
464
+ subj_id = _resolve_or_none(resolver, c.subject)
465
+ obj_id = _resolve_or_none(resolver, c.object)
466
+ rec = ClaimRecord(
467
+ subject=c.subject,
468
+ predicate=c.predicate,
469
+ object=c.object,
470
+ polarity=c.polarity,
471
+ confidence=c.confidence,
472
+ evidence_span=c.evidence_span,
473
+ valid_at=now,
474
+ head_sha=head,
475
+ session_id=session_id or None,
476
+ entity_subject_id=subj_id,
477
+ entity_object_id=obj_id,
478
+ )
479
+ store.upsert(rec)
480
+ added += 1
481
+ if len(samples) < 5:
482
+ samples.append(
483
+ {
484
+ "subject": rec.subject,
485
+ "predicate": rec.predicate,
486
+ "object": rec.object,
487
+ "confidence": rec.confidence,
488
+ }
489
+ )
490
+ finally:
491
+ extractor.close()
492
+ store.close()
493
+
494
+ _emit(
495
+ {"project": slug, "claims_added": added, "sample": samples},
496
+ as_json=as_json,
497
+ )
498
+
499
+
500
+ @app.command()
501
+ def project(
502
+ root: Path | None = typer.Argument(None, exists=True, file_okay=False, dir_okay=True),
503
+ as_json: bool = JsonOpt,
504
+ ) -> None:
505
+ """Print the resolved project slug for ROOT (or cwd)."""
506
+ _emit({"slug": detect_project_slug(root)}, as_json=as_json)
507
+
508
+
509
+ @app.command()
510
+ def projects(as_json: bool = JsonOpt) -> None:
511
+ """List every project slug known to the storage backends."""
512
+ _emit({"projects": list_projects()}, as_json=as_json)
513
+
514
+
515
+ @app.command()
516
+ def reset(
517
+ root: Path | None = typer.Argument(
518
+ None,
519
+ exists=True,
520
+ file_okay=False,
521
+ dir_okay=True,
522
+ help="Path used to auto-detect the project slug. Ignored with --all.",
523
+ ),
524
+ project: str | None = ProjectOpt,
525
+ all_: bool = typer.Option(
526
+ False, "--all", help="Wipe every project (use with care)."
527
+ ),
528
+ include_episodes: bool = typer.Option(
529
+ False,
530
+ "--include-episodes",
531
+ help="Also drop episodic memory (conversation history). Destructive.",
532
+ ),
533
+ yes: bool = typer.Option(
534
+ False, "--yes", "-y", help="Skip confirmation prompt."
535
+ ),
536
+ as_json: bool = JsonOpt,
537
+ ) -> None:
538
+ """Erase code-index data for a project (or every project).
539
+
540
+ Default scope: Qdrant code collection + FalkorDB graph + ingest_state.
541
+ Episodes (conversation memory) are preserved unless --include-episodes.
542
+ """
543
+ if all_:
544
+ targets = list_projects()
545
+ scope_desc = f"all {len(targets)} projects"
546
+ else:
547
+ slug = project or detect_project_slug(root)
548
+ targets = [slug]
549
+ scope_desc = f"project '{slug}'"
550
+
551
+ if not targets:
552
+ _emit({"reset": [], "note": "nothing to reset"}, as_json=as_json)
553
+ return
554
+
555
+ if not yes:
556
+ extra = " + episodes" if include_episodes else ""
557
+ confirm = typer.confirm(
558
+ f"Reset {scope_desc}{extra}? This drops vectors + graph + ingest_state.",
559
+ default=False,
560
+ )
561
+ if not confirm:
562
+ raise typer.Exit(code=1)
563
+
564
+ if all_:
565
+ results = reset_all(include_episodes=include_episodes)
566
+ else:
567
+ results = [
568
+ reset_project(s, include_episodes=include_episodes) for s in targets
569
+ ]
570
+
571
+ _emit(
572
+ {"reset": [asdict(r) for r in results]},
573
+ as_json=as_json,
574
+ )
575
+
576
+
577
+ @app.command()
578
+ def resolve(
579
+ project: str | None = ProjectOpt,
580
+ as_json: bool = JsonOpt,
581
+ ) -> None:
582
+ """Re-run the symbol resolver against the current graph.
583
+
584
+ Use after writes that mutated cross-file call relationships (rename,
585
+ move, delete). Cheaper than a full re-ingest because it skips
586
+ tree-sitter and embedding — it only re-points placeholder CALLS
587
+ edges to real Symbol nodes.
588
+ """
589
+ from .orchestrator.resolver import resolve_graph
590
+
591
+ pipe = Pipeline(project=project)
592
+ r = resolve_graph(pipe.graph)
593
+ _emit(
594
+ {
595
+ "project": pipe.slug,
596
+ "placeholders": r.placeholders,
597
+ "edges_total": r.edges_total,
598
+ "resolved_same_file": r.edges_resolved_same_file,
599
+ "resolved_imported": r.edges_resolved_imported,
600
+ "resolved_unique": r.edges_resolved_unique,
601
+ "ambiguous": r.edges_left_ambiguous,
602
+ "external": r.edges_left_external,
603
+ "placeholders_deleted": r.placeholders_deleted,
604
+ "import_aliases_added": r.import_aliases_added,
605
+ },
606
+ as_json=as_json,
607
+ )
608
+
609
+
610
+ def _parse_duration(spec: str) -> float:
611
+ """Parse strings like ``30d`` / ``12h`` / ``45m`` / ``900s`` into seconds."""
612
+ spec = spec.strip().lower()
613
+ if not spec:
614
+ raise typer.BadParameter("duration is empty")
615
+ unit_to_secs = {"s": 1.0, "m": 60.0, "h": 3600.0, "d": 86400.0, "w": 604800.0}
616
+ unit = spec[-1]
617
+ if unit not in unit_to_secs:
618
+ # treat as bare seconds for ergonomics — ``--older-than 600`` works
619
+ try:
620
+ return float(spec)
621
+ except ValueError as e:
622
+ raise typer.BadParameter(
623
+ f"unknown duration unit in {spec!r}; use s/m/h/d/w"
624
+ ) from e
625
+ try:
626
+ value = float(spec[:-1])
627
+ except ValueError as e:
628
+ raise typer.BadParameter(f"could not parse duration {spec!r}") from e
629
+ return value * unit_to_secs[unit]
630
+
631
+
632
+ @app.command()
633
+ def vacuum(
634
+ project: str | None = ProjectOpt,
635
+ before: str | None = typer.Option(
636
+ None,
637
+ "--before",
638
+ help=(
639
+ "Drop tombstones invalidated at or before this git ref "
640
+ "(branch / tag / sha). Mutually exclusive with --older-than / --all."
641
+ ),
642
+ ),
643
+ older_than: str | None = typer.Option(
644
+ None,
645
+ "--older-than",
646
+ help=(
647
+ "Drop tombstones older than this duration (e.g. 30d, 12h). "
648
+ "Mutually exclusive with --before / --all."
649
+ ),
650
+ ),
651
+ drop_all: bool = typer.Option(
652
+ False,
653
+ "--all",
654
+ help="Drop every tombstone regardless of age. Mutually exclusive with the other modes.",
655
+ ),
656
+ repo: Path = typer.Option(
657
+ Path("."),
658
+ "--repo",
659
+ exists=True,
660
+ file_okay=False,
661
+ dir_okay=True,
662
+ help="Repo root used to resolve --before refs to topological ordinals.",
663
+ ),
664
+ dry_run: bool = typer.Option(
665
+ False,
666
+ "--dry-run",
667
+ help="Show what would be removed without writing.",
668
+ ),
669
+ as_json: bool = JsonOpt,
670
+ ) -> None:
671
+ """Drop tombstoned graph elements to bound monotonic growth.
672
+
673
+ Tombstones accumulate because temporal deletes preserve history.
674
+ Once a SHA is "ancient" for your workflow (released, archived, or
675
+ just irrelevant), vacuum reclaims the space.
676
+ """
677
+ modes_set = [
678
+ x is not None and x is not False
679
+ for x in (before, older_than, drop_all or None)
680
+ ]
681
+ if sum(modes_set) != 1:
682
+ raise typer.BadParameter(
683
+ "specify exactly one of --before / --older-than / --all"
684
+ )
685
+
686
+ graph = _graph_for(project)
687
+ kwargs: dict[str, Any] = {"dry_run": dry_run}
688
+ payload: dict[str, Any] = {
689
+ "project": project or detect_project_slug(),
690
+ "dry_run": dry_run,
691
+ }
692
+
693
+ if before is not None:
694
+ try:
695
+ sha = _git_delta.resolve_ref(repo, before)
696
+ except _git_delta.GitError as e:
697
+ raise typer.BadParameter(f"could not resolve --before {before!r}: {e}") from e
698
+ ord_ = _git_delta.commit_ordinal(repo, sha)
699
+ if ord_ is None:
700
+ raise typer.BadParameter(
701
+ f"could not compute ordinal for {sha} (shallow clone?)"
702
+ )
703
+ kwargs["before_ord"] = ord_
704
+ payload["mode"] = "before"
705
+ payload["before_sha"] = sha
706
+ payload["before_ord"] = ord_
707
+ elif older_than is not None:
708
+ kwargs["older_than_seconds"] = _parse_duration(older_than)
709
+ payload["mode"] = "older_than"
710
+ payload["older_than_seconds"] = kwargs["older_than_seconds"]
711
+ else:
712
+ kwargs["drop_all"] = True
713
+ payload["mode"] = "all"
714
+
715
+ result = graph.vacuum(**kwargs)
716
+ payload["removed"] = result
717
+ _emit(payload, as_json=as_json)
718
+
719
+
720
+ @app.command()
721
+ def drift(
722
+ project: str | None = ProjectOpt,
723
+ repo: Path = typer.Option(
724
+ Path("."),
725
+ "--repo",
726
+ exists=True,
727
+ file_okay=False,
728
+ dir_okay=True,
729
+ help="Repo root used to read HEAD.",
730
+ ),
731
+ as_json: bool = JsonOpt,
732
+ ) -> None:
733
+ """List symbols whose ``last_seen_sha`` doesn't match HEAD.
734
+
735
+ Useful for sanity-checking a long-running watcher and for surfacing
736
+ references in comments / docs that point at code the most recent
737
+ ingest no longer confirms.
738
+ """
739
+ try:
740
+ head = _git_delta.head_sha(repo)
741
+ except _git_delta.GitError as e:
742
+ raise typer.BadParameter(f"could not read HEAD from {repo}: {e}") from e
743
+ graph = _graph_for(project)
744
+ rows = graph.drift(head)
745
+ _emit(
746
+ {
747
+ "project": project or detect_project_slug(),
748
+ "head_sha": head,
749
+ "count": len(rows),
750
+ "items": rows,
751
+ },
752
+ as_json=as_json,
753
+ )
754
+
755
+
756
+ @app.command()
757
+ def callers(
758
+ symbol: str = typer.Argument(..., help="Symbol name to look up callers for."),
759
+ depth: int = typer.Option(1, "--depth", help="Traversal depth (1-3)."),
760
+ project: str | None = ProjectOpt,
761
+ as_json: bool = JsonOpt,
762
+ ) -> None:
763
+ """List files that call a symbol (reverse CALLS edges)."""
764
+ rows = _graph_for(project).callers(symbol, depth=depth)
765
+ _emit({"symbol": symbol, "callers": rows}, as_json=as_json)
766
+
767
+
768
+ @app.command()
769
+ def callees(
770
+ symbol: str = typer.Argument(..., help="Symbol name to look up callees for."),
771
+ depth: int = typer.Option(1, "--depth", help="Traversal depth (1-3)."),
772
+ project: str | None = ProjectOpt,
773
+ as_json: bool = JsonOpt,
774
+ ) -> None:
775
+ """List symbols called from the file that defines ``symbol``."""
776
+ rows = _graph_for(project).callees(symbol, depth=depth)
777
+ _emit({"symbol": symbol, "callees": rows}, as_json=as_json)
778
+
779
+
780
+ @app.command()
781
+ def importers(
782
+ target: str = typer.Argument(..., help="Module / package / path."),
783
+ project: str | None = ProjectOpt,
784
+ as_json: bool = JsonOpt,
785
+ ) -> None:
786
+ """List files that import a module or package."""
787
+ rows = _graph_for(project).importers(target)
788
+ _emit({"target": target, "importers": rows}, as_json=as_json)
789
+
790
+
791
+ @app.command()
792
+ def dependencies(
793
+ file: str = typer.Argument(..., help="Absolute file path."),
794
+ depth: int = typer.Option(1, "--depth", help="Traversal depth (1-3)."),
795
+ project: str | None = ProjectOpt,
796
+ as_json: bool = JsonOpt,
797
+ ) -> None:
798
+ """List modules imported by a file (forward IMPORTS edges)."""
799
+ rows = _graph_for(project).dependencies(file, depth=depth)
800
+ _emit({"file": file, "dependencies": rows}, as_json=as_json)
801
+
802
+
803
+ @app.command()
804
+ def injects(
805
+ symbol: str = typer.Argument(..., help="Symbol whose defining file is inspected."),
806
+ project: str | None = ProjectOpt,
807
+ as_json: bool = JsonOpt,
808
+ ) -> None:
809
+ """List DI tokens injected by the file that defines ``symbol``."""
810
+ rows = _graph_for(project).injects(symbol)
811
+ _emit({"symbol": symbol, "injects": rows}, as_json=as_json)
812
+
813
+
814
+ @app.command()
815
+ def injectors(
816
+ token: str = typer.Argument(..., help="DI token name."),
817
+ project: str | None = ProjectOpt,
818
+ as_json: bool = JsonOpt,
819
+ ) -> None:
820
+ """List files that inject ``token`` (reverse INJECTS edges)."""
821
+ rows = _graph_for(project).injectors(token)
822
+ _emit({"token": token, "injectors": rows}, as_json=as_json)
823
+
824
+
825
+ @app.command()
826
+ def definitions(
827
+ symbol: str = typer.Argument(..., help="Symbol name to locate."),
828
+ project: str | None = ProjectOpt,
829
+ as_json: bool = JsonOpt,
830
+ ) -> None:
831
+ """List all files+line ranges that define ``symbol``."""
832
+ rows = _graph_for(project).definitions(symbol)
833
+ _emit({"symbol": symbol, "definitions": rows}, as_json=as_json)
834
+
835
+
836
+ # ---------------------------------------------------------------------------
837
+ # Team sync (snapshot + watcher + autostart + hooks)
838
+ # ---------------------------------------------------------------------------
839
+
840
+
841
+ snapshot_app = typer.Typer(help="Snapshot management (publish, list, gc).")
842
+ hooks_app = typer.Typer(help="Git hooks installer.")
843
+ autostart_app = typer.Typer(help="Cross-platform autostart service.")
844
+ app.add_typer(snapshot_app, name="snapshot")
845
+ app.add_typer(hooks_app, name="hooks")
846
+ app.add_typer(autostart_app, name="autostart")
847
+
848
+
849
+ @app.command()
850
+ def sync(
851
+ root: Path = typer.Argument(
852
+ Path("."), exists=True, file_okay=False, dir_okay=True, help="Repo root."
853
+ ),
854
+ project: str | None = ProjectOpt,
855
+ publish: bool = typer.Option(
856
+ False,
857
+ "--publish",
858
+ help="If on the canonical branch, publish a fresh snapshot after sync.",
859
+ ),
860
+ canonical_branch: str = typer.Option(
861
+ "main", "--canonical-branch", help="Branch whose tip publishes snapshots."
862
+ ),
863
+ trigger: str = typer.Option(
864
+ "manual", "--trigger", help="Free-form tag (e.g. post-merge, watcher)."
865
+ ),
866
+ no_fetch: bool = typer.Option(
867
+ False, "--no-fetch", help="Skip `git fetch` of the snapshot branch."
868
+ ),
869
+ as_json: bool = JsonOpt,
870
+ ) -> None:
871
+ """Reconcile local code-memory state with git HEAD.
872
+
873
+ Pulls a snapshot if one exists for HEAD or a recent ancestor,
874
+ otherwise runs an incremental ingest. Idempotent: cheap on
875
+ quiet repos, fast on small diffs, falls back to a full ingest
876
+ only when nothing else is available.
877
+ """
878
+ from .sync import sync_repo
879
+
880
+ result = sync_repo(
881
+ root,
882
+ project=project,
883
+ publish=publish,
884
+ canonical_branch=canonical_branch,
885
+ trigger=trigger,
886
+ fetch=not no_fetch,
887
+ )
888
+ _emit(_asdict(result), as_json=as_json)
889
+
890
+
891
+ @app.command()
892
+ def watch(
893
+ root: Path = typer.Argument(
894
+ Path("."), exists=True, file_okay=False, dir_okay=True, help="Repo root."
895
+ ),
896
+ project: str | None = ProjectOpt,
897
+ ) -> None:
898
+ """Run the filesystem watcher in the foreground until interrupted."""
899
+ from .sync.safety import UnsafeWatchRootError, assert_safe_watch_root
900
+ from .sync.watcher import run_foreground
901
+
902
+ try:
903
+ safe_root = assert_safe_watch_root(root)
904
+ except UnsafeWatchRootError as e:
905
+ typer.echo(f"error: {e}", err=True)
906
+ raise typer.Exit(code=2) from e
907
+
908
+ run_foreground(safe_root, project=project)
909
+
910
+
911
+ @app.command()
912
+ def status(
913
+ root: Path = typer.Argument(
914
+ Path("."), exists=True, file_okay=False, dir_okay=True, help="Repo root."
915
+ ),
916
+ project: str | None = ProjectOpt,
917
+ as_json: bool = JsonOpt,
918
+ ) -> None:
919
+ """Show a unified sync status (autostart, hooks, snapshot, drift)."""
920
+ from .sync.autostart import ensure_autostart # noqa: F401 - imported for side-types
921
+ from .sync.autostart.base import get_adapter
922
+ from .sync.hooks import hook_status
923
+ from .sync.store import SnapshotStore
924
+
925
+ slug = project or detect_project_slug(root)
926
+ payload: dict[str, object] = {"project": slug, "root": str(Path(root).resolve())}
927
+
928
+ # autostart
929
+ try:
930
+ adapter = get_adapter()
931
+ st = adapter.status(Path(root).resolve())
932
+ payload["autostart"] = {
933
+ "installed": st.installed,
934
+ "running": st.running,
935
+ "label": st.label,
936
+ "unit_path": st.unit_path,
937
+ "note": st.note,
938
+ }
939
+ except Exception as e: # noqa: BLE001
940
+ payload["autostart"] = {"error": str(e)}
941
+
942
+ # hooks
943
+ payload["hooks"] = hook_status(Path(root).resolve())
944
+
945
+ # snapshot drift
946
+ try:
947
+ if _git_delta.is_git_repo(root):
948
+ head = _git_delta.head_sha(root)
949
+ store = SnapshotStore(Path(root).resolve())
950
+ store.fetch()
951
+ payload["head_sha"] = head
952
+ payload["snapshot_for_head"] = store.has(head)
953
+ payload["local_snapshots"] = len(store.list_local())
954
+ payload["remote_snapshots"] = len(store.list_remote())
955
+ except Exception as e: # noqa: BLE001
956
+ payload["snapshot_error"] = str(e)
957
+
958
+ # ingest state
959
+ try:
960
+ cfg = CONFIG.for_project(slug)
961
+ from .orchestrator.ingest_state import IngestStateStore
962
+
963
+ prior = IngestStateStore(cfg.episodic_db).get(root)
964
+ payload["ingest_state"] = (
965
+ None
966
+ if prior is None
967
+ else {"last_sha": prior.last_sha, "branch": prior.branch, "last_ts": prior.last_ts}
968
+ )
969
+ except Exception as e: # noqa: BLE001
970
+ payload["ingest_state_error"] = str(e)
971
+
972
+ _emit(payload, as_json=as_json)
973
+
974
+
975
+ # ---- snapshot subcommands -------------------------------------------------
976
+
977
+
978
+ @snapshot_app.command("publish")
979
+ def snapshot_publish(
980
+ root: Path = typer.Argument(
981
+ Path("."), exists=True, file_okay=False, dir_okay=True
982
+ ),
983
+ project: str | None = ProjectOpt,
984
+ push: bool = typer.Option(True, "--push/--no-push", help="Push the snapshot branch."),
985
+ as_json: bool = JsonOpt,
986
+ ) -> None:
987
+ """Build a snapshot for HEAD and push it to the snapshot branch."""
988
+ from .sync.snapshot import build_snapshot
989
+ from .sync.store import SnapshotStore
990
+
991
+ if not _git_delta.is_git_repo(root):
992
+ _emit({"error": "not a git repo"}, as_json=as_json)
993
+ raise typer.Exit(code=1)
994
+ slug = project or detect_project_slug(root)
995
+ head = _git_delta.head_sha(root)
996
+ branch = _git_delta.current_branch(root)
997
+ snap = build_snapshot(
998
+ project=slug,
999
+ head_sha=head,
1000
+ branch=branch,
1001
+ state={"last_sha": head, "branch": branch},
1002
+ )
1003
+ import tempfile
1004
+
1005
+ with tempfile.NamedTemporaryFile(suffix=".cmsnap", delete=False) as tmp:
1006
+ tmp_path = Path(tmp.name)
1007
+ try:
1008
+ snap.write(tmp_path)
1009
+ data = tmp_path.read_bytes()
1010
+ finally:
1011
+ tmp_path.unlink(missing_ok=True)
1012
+ store = SnapshotStore(Path(root).resolve())
1013
+ manifest: dict[str, object] = {
1014
+ "head_sha": head,
1015
+ "branch": branch,
1016
+ "size": len(data),
1017
+ "embed_model": snap.manifest.embed_model,
1018
+ "embed_dim": snap.manifest.embed_dim,
1019
+ "counts": snap.manifest.counts,
1020
+ "content_sha256": snap.manifest.content_sha256,
1021
+ }
1022
+ created = store.write(head, data, manifest=manifest, push=push)
1023
+ _emit(
1024
+ {
1025
+ "project": slug,
1026
+ "head": head,
1027
+ "created": created,
1028
+ "size": len(data),
1029
+ "counts": snap.manifest.counts,
1030
+ },
1031
+ as_json=as_json,
1032
+ )
1033
+
1034
+
1035
+ @snapshot_app.command("list")
1036
+ def snapshot_list(
1037
+ root: Path = typer.Argument(
1038
+ Path("."), exists=True, file_okay=False, dir_okay=True
1039
+ ),
1040
+ remote_only: bool = typer.Option(False, "--remote", help="Only list remote entries."),
1041
+ as_json: bool = JsonOpt,
1042
+ ) -> None:
1043
+ """List snapshots present on the snapshot branch."""
1044
+ from .sync.store import SnapshotStore
1045
+
1046
+ store = SnapshotStore(Path(root).resolve())
1047
+ store.fetch()
1048
+ rows = store.list_remote() if remote_only else store.list_local()
1049
+ _emit({"snapshots": [_asdict(r) for r in rows]}, as_json=as_json)
1050
+
1051
+
1052
+ @snapshot_app.command("gc")
1053
+ def snapshot_gc(
1054
+ root: Path = typer.Argument(
1055
+ Path("."), exists=True, file_okay=False, dir_okay=True
1056
+ ),
1057
+ keep: int = typer.Option(20, "--keep", help="Number of recent snapshots to keep."),
1058
+ push: bool = typer.Option(True, "--push/--no-push"),
1059
+ as_json: bool = JsonOpt,
1060
+ ) -> None:
1061
+ """Prune all but the most recent ``--keep`` snapshots."""
1062
+ from .sync.store import SnapshotStore
1063
+
1064
+ store = SnapshotStore(Path(root).resolve())
1065
+ removed = store.gc(keep, push=push)
1066
+ _emit({"removed": removed, "kept": keep}, as_json=as_json)
1067
+
1068
+
1069
+ # ---- hooks subcommands ----------------------------------------------------
1070
+
1071
+
1072
+ @hooks_app.command("install")
1073
+ def hooks_install(
1074
+ root: Path = typer.Argument(Path("."), exists=True, file_okay=False, dir_okay=True),
1075
+ with_autostart: bool = typer.Option(
1076
+ True, "--autostart/--no-autostart", help="Also register OS autostart."
1077
+ ),
1078
+ as_json: bool = JsonOpt,
1079
+ ) -> None:
1080
+ """Install git hooks (and OS autostart) for this repo."""
1081
+ from .sync.hooks import install_hooks
1082
+
1083
+ result = install_hooks(Path(root).resolve())
1084
+ payload: dict[str, object] = {
1085
+ "hooks_dir": result.hooks_dir,
1086
+ "installed": result.installed,
1087
+ "skipped": result.skipped,
1088
+ }
1089
+ if with_autostart:
1090
+ try:
1091
+ from .sync.autostart import ensure_autostart
1092
+
1093
+ st = ensure_autostart(Path(root).resolve())
1094
+ payload["autostart"] = {
1095
+ "installed": st.installed,
1096
+ "running": st.running,
1097
+ "label": st.label,
1098
+ "unit_path": st.unit_path,
1099
+ "note": st.note,
1100
+ }
1101
+ except Exception as e: # noqa: BLE001
1102
+ payload["autostart_error"] = str(e)
1103
+ _emit(payload, as_json=as_json)
1104
+
1105
+
1106
+ @hooks_app.command("uninstall")
1107
+ def hooks_uninstall(
1108
+ root: Path = typer.Argument(Path("."), exists=True, file_okay=False, dir_okay=True),
1109
+ with_autostart: bool = typer.Option(True, "--autostart/--no-autostart"),
1110
+ as_json: bool = JsonOpt,
1111
+ ) -> None:
1112
+ """Remove code-memory git hooks (and OS autostart)."""
1113
+ from .sync.hooks import uninstall_hooks
1114
+
1115
+ result = uninstall_hooks(Path(root).resolve())
1116
+ payload: dict[str, object] = {
1117
+ "removed": result.installed,
1118
+ "skipped": result.skipped,
1119
+ }
1120
+ if with_autostart:
1121
+ try:
1122
+ from .sync.autostart.base import get_adapter
1123
+
1124
+ st = get_adapter().uninstall(Path(root).resolve())
1125
+ payload["autostart"] = {
1126
+ "installed": st.installed,
1127
+ "label": st.label,
1128
+ }
1129
+ except Exception as e: # noqa: BLE001
1130
+ payload["autostart_error"] = str(e)
1131
+ _emit(payload, as_json=as_json)
1132
+
1133
+
1134
+ # ---- autostart subcommands ------------------------------------------------
1135
+
1136
+
1137
+ @autostart_app.command("install")
1138
+ def autostart_install(
1139
+ root: Path = typer.Argument(Path("."), exists=True, file_okay=False, dir_okay=True),
1140
+ as_json: bool = JsonOpt,
1141
+ ) -> None:
1142
+ """Register the OS-level autostart service."""
1143
+ from .sync.autostart import ensure_autostart
1144
+
1145
+ st = ensure_autostart(Path(root).resolve())
1146
+ _emit(
1147
+ {
1148
+ "installed": st.installed,
1149
+ "running": st.running,
1150
+ "label": st.label,
1151
+ "unit_path": st.unit_path,
1152
+ "note": st.note,
1153
+ },
1154
+ as_json=as_json,
1155
+ )
1156
+
1157
+
1158
+ @autostart_app.command("uninstall")
1159
+ def autostart_uninstall(
1160
+ root: Path = typer.Argument(Path("."), exists=True, file_okay=False, dir_okay=True),
1161
+ as_json: bool = JsonOpt,
1162
+ ) -> None:
1163
+ """Remove the OS-level autostart service."""
1164
+ from .sync.autostart.base import get_adapter
1165
+
1166
+ st = get_adapter().uninstall(Path(root).resolve())
1167
+ _emit({"installed": st.installed, "label": st.label}, as_json=as_json)
1168
+
1169
+
1170
+ @autostart_app.command("status")
1171
+ def autostart_status(
1172
+ root: Path = typer.Argument(Path("."), exists=True, file_okay=False, dir_okay=True),
1173
+ as_json: bool = JsonOpt,
1174
+ ) -> None:
1175
+ """Show OS autostart status for this repo."""
1176
+ from .sync.autostart.base import get_adapter
1177
+
1178
+ st = get_adapter().status(Path(root).resolve())
1179
+ _emit(
1180
+ {
1181
+ "installed": st.installed,
1182
+ "running": st.running,
1183
+ "label": st.label,
1184
+ "unit_path": st.unit_path,
1185
+ "note": st.note,
1186
+ },
1187
+ as_json=as_json,
1188
+ )
1189
+
1190
+
1191
+ if __name__ == "__main__":
1192
+ app()