cherry-docs 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (42) hide show
  1. app/__init__.py +0 -0
  2. app/repo_scope.py +24 -0
  3. app/services/__init__.py +0 -0
  4. app/services/agent_protocol.py +59 -0
  5. app/services/auto_promote_sessions.py +245 -0
  6. app/services/capture_adapters.py +89 -0
  7. app/services/capture_core.py +164 -0
  8. app/services/internal_memory_agent.py +214 -0
  9. app/services/memory_evidence.py +89 -0
  10. app/services/memory_extraction_normalize.py +134 -0
  11. app/services/memory_lifecycle.py +258 -0
  12. app/services/memory_profiles.py +88 -0
  13. app/services/memory_providers.py +113 -0
  14. app/services/memory_retrieval.py +327 -0
  15. app/services/memory_retrieval_scoring.py +106 -0
  16. app/services/memory_retrieval_text.py +113 -0
  17. app/services/memory_similarity.py +135 -0
  18. app/services/privacy.py +72 -0
  19. app/services/promoted_memory_answer.py +157 -0
  20. app/services/promoted_memory_pipeline.py +194 -0
  21. app/services/promoted_memory_store.py +57 -0
  22. cherry_docs-0.2.0.dist-info/METADATA +143 -0
  23. cherry_docs-0.2.0.dist-info/RECORD +42 -0
  24. cherry_docs-0.2.0.dist-info/WHEEL +5 -0
  25. cherry_docs-0.2.0.dist-info/entry_points.txt +4 -0
  26. cherry_docs-0.2.0.dist-info/top_level.txt +3 -0
  27. cherrydocs/__init__.py +3 -0
  28. cherrydocs/cli.py +213 -0
  29. cherrydocs/hook.py +27 -0
  30. cherrydocs/mcp.py +22 -0
  31. scripts/__init__.py +0 -0
  32. scripts/auto_promote_capture.py +63 -0
  33. scripts/check_size_limits.py +115 -0
  34. scripts/ci_auto_capture.py +289 -0
  35. scripts/claude_hooks/__init__.py +0 -0
  36. scripts/claude_hooks/state_manager.py +526 -0
  37. scripts/coverage_regression_gate.py +121 -0
  38. scripts/eval_projects.py +247 -0
  39. scripts/install.py +212 -0
  40. scripts/pr_gate_report.py +282 -0
  41. scripts/promptfoo_regression_gate.py +176 -0
  42. scripts/render_agent_prompts.py +57 -0
@@ -0,0 +1,247 @@
1
+ #!/usr/bin/env python3
2
+ """cherry eval — evaluate CherryDocs memory quality across all projects.
3
+
4
+ Runs in two passes:
5
+ 1. Heuristic — memory count, confidence distribution, kind diversity
6
+ 2. LLM judge — Ollama grades each project's answer on a 1-5 usefulness scale
7
+
8
+ Reads exclusively from ~/.cherrydocs/ — never touches downstream project repos.
9
+
10
+ Usage:
11
+ python scripts/eval_projects.py
12
+ python scripts/eval_projects.py --project footcorn
13
+ python scripts/eval_projects.py --no-llm # heuristic only
14
+ """
15
+
16
+ from __future__ import annotations
17
+
18
+ import argparse
19
+ import json
20
+ import os
21
+ import sys
22
+ from pathlib import Path
23
+
24
+ ROOT = Path(__file__).resolve().parents[1]
25
+ if str(ROOT) not in sys.path:
26
+ sys.path.insert(0, str(ROOT))
27
+
28
+ import httpx
29
+
30
+ from app.services.promoted_memory_answer import answer_from_promoted_memory
31
+ from app.services.promoted_memory_store import DEFAULT_PROMOTED_ROOT, LocalPromotedMemoryStore
32
+
33
+ _HOME_CHERRY = Path.home() / ".cherrydocs"
34
+ _DEFAULT_BUFFER = os.environ.get("CHERRY_CAPTURE_BUFFER_DIR", str(_HOME_CHERRY / "capture"))
35
+ _OLLAMA_URL = os.getenv("OLLAMA_CHAT_URL", "http://127.0.0.1:11434/api/chat")
36
+ _OLLAMA_MODEL = os.getenv("CHERRY_OLLAMA_MODEL", "qwen2.5:7b-instruct")
37
+
38
+ # Questions used for every project — generic enough to work without knowing the domain
39
+ _EVAL_QUESTIONS = [
40
+ "What is this project and what is its current state?",
41
+ "What are the key technical decisions made so far?",
42
+ "What should I know before making changes to this codebase?",
43
+ ]
44
+
45
+ _JUDGE_PROMPT = """\
46
+ You are evaluating whether an AI memory system gave a useful answer to a developer.
47
+
48
+ Question: {question}
49
+
50
+ Answer received:
51
+ {answer}
52
+
53
+ Rate the answer on a scale from 1 to 5:
54
+ 1 = useless (empty, generic, no project-specific info)
55
+ 2 = weak (vague, mostly generic)
56
+ 3 = ok (some specific info but incomplete)
57
+ 4 = good (specific and actionable)
58
+ 5 = excellent (highly specific, immediately actionable, would save real time)
59
+
60
+ Respond with JSON only: {{"score": <1-5>, "reason": "<one sentence>"}}"""
61
+
62
+
63
+
64
+ def _list_projects(promoted_root: str) -> list[str]:
65
+ root = Path(promoted_root)
66
+ if not root.exists():
67
+ return []
68
+ return sorted(p.stem for p in root.glob("*.json"))
69
+
70
+
71
+ def _heuristic(project_id: str, promoted_root: str) -> dict:
72
+ store = LocalPromotedMemoryStore(promoted_root)
73
+ records = store.load_records(project_id)
74
+ active = [r for r in records if r.status == "active"]
75
+ inactive = [r for r in records if r.status != "active"]
76
+
77
+ confidences = [r.confidence for r in active]
78
+ avg_conf = sum(confidences) / len(confidences) if confidences else 0.0
79
+ kinds = {r.kind for r in active}
80
+
81
+ score = "❌"
82
+ if len(active) >= 3 and avg_conf >= 0.7 and len(kinds) >= 2:
83
+ score = "✅"
84
+ elif len(active) >= 1:
85
+ score = "⚠️ "
86
+
87
+ return {
88
+ "score": score,
89
+ "active": len(active),
90
+ "inactive": len(inactive),
91
+ "avg_conf": avg_conf,
92
+ "kinds": sorted(kinds),
93
+ }
94
+
95
+
96
+ def _ask(project_id: str, question: str, promoted_root: str, buffer_dir: str) -> str:
97
+ try:
98
+ answer = answer_from_promoted_memory(
99
+ project_id=project_id,
100
+ question=question,
101
+ buffer_dir=buffer_dir,
102
+ promoted_root=promoted_root,
103
+ )
104
+ return answer.answer or ""
105
+ except Exception as e:
106
+ return f"[error: {e}]"
107
+
108
+
109
+ def _judge(question: str, answer_text: str, timeout: float = 60.0) -> dict:
110
+ if not answer_text or answer_text.startswith("[error"):
111
+ return {"score": 1, "reason": "no answer returned"}
112
+ prompt = _JUDGE_PROMPT.format(question=question, answer=answer_text[:800])
113
+ payload = {
114
+ "model": _OLLAMA_MODEL,
115
+ "messages": [{"role": "user", "content": prompt}],
116
+ "stream": False,
117
+ "format": "json",
118
+ "options": {"temperature": 0.1},
119
+ }
120
+ try:
121
+ with httpx.Client(timeout=timeout) as client:
122
+ r = client.post(_OLLAMA_URL, json=payload)
123
+ r.raise_for_status()
124
+ content = (r.json().get("message") or {}).get("content") or ""
125
+ return json.loads(content)
126
+ except Exception as e:
127
+ return {"score": 0, "reason": f"ollama error: {e}"}
128
+
129
+
130
+ def _bar(score: int, max_score: int = 5) -> str:
131
+ filled = round(score / max_score * 10)
132
+ return "█" * filled + "░" * (10 - filled)
133
+
134
+
135
+ def run_eval(
136
+ projects: list[str],
137
+ promoted_root: str,
138
+ buffer_dir: str,
139
+ use_llm: bool = True,
140
+ verbose: bool = False,
141
+ ) -> list[dict]:
142
+ results = []
143
+ for pid in projects:
144
+ print(f"\n{'─'*60}")
145
+ print(f" Project: {pid}")
146
+ print(f"{'─'*60}")
147
+
148
+ # --- Heuristic pass ---
149
+ h = _heuristic(pid, promoted_root)
150
+ print(f" Memories : {h['active']} active ({h['inactive']} inactive)")
151
+ print(f" Avg conf : {h['avg_conf']:.2f}")
152
+ print(f" Kinds : {', '.join(h['kinds']) or 'none'}")
153
+ print(f" Heuristic: {h['score']}")
154
+
155
+ # --- LLM judge pass ---
156
+ llm_scores = []
157
+ if use_llm:
158
+ print()
159
+ for q in _EVAL_QUESTIONS:
160
+ print(f" Q: {q}")
161
+ ans = _ask(pid, q, promoted_root, buffer_dir)
162
+ verdict = _judge(q, ans)
163
+ s = verdict.get("score", 0)
164
+ llm_scores.append(s)
165
+ bar = _bar(s)
166
+ print(f" [{bar}] {s}/5 — {verdict.get('reason', '')}")
167
+ if verbose and ans:
168
+ print(f" Answer: {ans[:200]}")
169
+ print()
170
+
171
+ avg_llm = sum(llm_scores) / len(llm_scores) if llm_scores else None
172
+ results.append({
173
+ "project_id": pid,
174
+ "heuristic": h,
175
+ "llm_avg": avg_llm,
176
+ "llm_scores": llm_scores,
177
+ })
178
+
179
+ return results
180
+
181
+
182
+ def _summary(results: list[dict], use_llm: bool) -> None:
183
+ print(f"\n{'═'*60}")
184
+ print(" SUMMARY")
185
+ print(f"{'═'*60}")
186
+ header = f" {'Project':<30} {'Mem':>4} {'Conf':>5} {'H':>3}"
187
+ if use_llm:
188
+ header += f" {'LLM':>5}"
189
+ print(header)
190
+ print(f" {'─'*56}")
191
+ for r in results:
192
+ h = r["heuristic"]
193
+ row = f" {r['project_id']:<30} {h['active']:>4} {h['avg_conf']:>5.2f} {h['score']:>3}"
194
+ if use_llm and r["llm_avg"] is not None:
195
+ row += f" {r['llm_avg']:>5.1f}"
196
+ print(row)
197
+
198
+ if use_llm:
199
+ all_scores = [s for r in results for s in r["llm_scores"]]
200
+ if all_scores:
201
+ overall = sum(all_scores) / len(all_scores)
202
+ bar = _bar(round(overall))
203
+ print(f"\n Overall LLM score: {bar} {overall:.1f}/5")
204
+ if overall >= 4:
205
+ print(" → Memory quality is GOOD. CherryDocs is delivering value.")
206
+ elif overall >= 2.5:
207
+ print(" → Memory quality is OK. Run /bootstrap-project to improve.")
208
+ else:
209
+ print(" → Memory quality is WEAK. Sessions may not have been distilled yet.")
210
+
211
+
212
+ def _parser() -> argparse.ArgumentParser:
213
+ p = argparse.ArgumentParser(description="Evaluate CherryDocs memory quality across projects")
214
+ p.add_argument("--project", help="Evaluate a single project (default: all)")
215
+ p.add_argument("--promoted-root", default=DEFAULT_PROMOTED_ROOT)
216
+ p.add_argument("--buffer-dir", default=_DEFAULT_BUFFER)
217
+ p.add_argument("--no-llm", action="store_true", help="Heuristic only (no Ollama)")
218
+ p.add_argument("--verbose", action="store_true", help="Print answer text")
219
+ return p
220
+
221
+
222
+ def main() -> int:
223
+ args = _parser().parse_args()
224
+
225
+ promoted_root = args.promoted_root
226
+ buffer_dir = args.buffer_dir
227
+ use_llm = not args.no_llm
228
+
229
+ projects = [args.project] if args.project else _list_projects(promoted_root)
230
+ if not projects:
231
+ print(f"No projects found in {promoted_root}")
232
+ print("Run /bootstrap-project in a Claude Code session first.")
233
+ return 1
234
+
235
+ print(f"\n🍒 CherryDocs Eval — {len(projects)} project(s)")
236
+ print(f" Promoted root: {promoted_root}")
237
+ print(f" Buffer dir: {buffer_dir}")
238
+ print(f" LLM judge: {'on (' + _OLLAMA_MODEL + ')' if use_llm else 'off'}")
239
+
240
+ results = run_eval(projects, promoted_root, buffer_dir, use_llm=use_llm, verbose=args.verbose)
241
+ _summary(results, use_llm)
242
+ print()
243
+ return 0
244
+
245
+
246
+ if __name__ == "__main__":
247
+ raise SystemExit(main())
scripts/install.py ADDED
@@ -0,0 +1,212 @@
1
+ #!/usr/bin/env python3
2
+ """cherry install — wire CherryDocs into Claude Code globally.
3
+
4
+ Works two ways:
5
+ A) Package install (recommended):
6
+ pip install cherry-docs
7
+ cherry install
8
+
9
+ B) Source install (development):
10
+ python /path/to/cherry-docs/scripts/install.py
11
+
12
+ What it does:
13
+ 1. Adds cherry-docs-mcp to Claude Code user scope
14
+ (every project gets log_activity / onboard / answer / save_checkpoint)
15
+ 2. Adds cherry-hook capture hooks to ~/.claude/settings.json
16
+ (every session is captured, auto-distilled on stop)
17
+ 3. Creates ~/.cherrydocs/ as the central store
18
+ 4. Migrates any project-local .cherrydocs/promoted/ data to the central store
19
+
20
+ After install, open any project in Claude Code — CherryDocs is active.
21
+ Run /bootstrap-project on existing projects to seed their memory.
22
+ """
23
+
24
+ from __future__ import annotations
25
+
26
+ import json
27
+ import shutil
28
+ import subprocess
29
+ import sys
30
+ from pathlib import Path
31
+
32
+ CHERRY_ROOT = Path(__file__).resolve().parent.parent
33
+ GLOBAL_SETTINGS = Path.home() / ".claude" / "settings.json"
34
+ CENTRAL_DIR = Path.home() / ".cherrydocs"
35
+ CENTRAL_CAPTURE = CENTRAL_DIR / "capture"
36
+ CENTRAL_PROMOTED = CENTRAL_DIR / "promoted"
37
+
38
+
39
+ def _run(cmd: list[str], check: bool = True) -> subprocess.CompletedProcess:
40
+ return subprocess.run(cmd, capture_output=True, text=True, check=check)
41
+
42
+
43
+ def _mcp_command() -> list[str]:
44
+ """Return the command to launch the MCP server.
45
+
46
+ Prefers the installed `cherry-docs-mcp` entry point.
47
+ Falls back to `python mcp_server.py` from source root.
48
+ """
49
+ ep = shutil.which("cherry-docs-mcp")
50
+ if ep:
51
+ return [ep]
52
+ mcp_server = CHERRY_ROOT / "mcp_server.py"
53
+ return [sys.executable, str(mcp_server)]
54
+
55
+
56
+ def _hook_command(subcommand: str) -> str:
57
+ """Return the shell command string for a hook subcommand.
58
+
59
+ Sets both CHERRY_CAPTURE_BUFFER_DIR and CHERRY_PROMOTED_ROOT so all
60
+ hook subprocesses (including auto-distill) use the central store.
61
+
62
+ Prefers the installed `cherry-hook` entry point.
63
+ Falls back to `python state_manager.py` from source root.
64
+ """
65
+ env = (
66
+ f"CHERRY_CAPTURE_BUFFER_DIR={CENTRAL_CAPTURE}"
67
+ f" CHERRY_PROMOTED_ROOT={CENTRAL_PROMOTED}"
68
+ )
69
+ ep = shutil.which("cherry-hook")
70
+ if ep:
71
+ return f"{env} {ep} {subcommand}"
72
+ sm = CHERRY_ROOT / "scripts" / "claude_hooks" / "state_manager.py"
73
+ return f"{env} {sys.executable} {sm} {subcommand}"
74
+
75
+
76
+ def _install_mcp() -> bool:
77
+ """Add cherry-docs MCP server at user scope via claude CLI."""
78
+ print("→ Adding cherry-docs MCP server (user scope)…")
79
+ result = _run(["claude", "mcp", "get", "cherry-docs"], check=False)
80
+ if "user" in result.stdout.lower():
81
+ print(" ✓ Already installed at user scope.")
82
+ return True
83
+
84
+ # Remove project/local-scoped entries (they would shadow user scope)
85
+ _run(["claude", "mcp", "remove", "cherry-docs", "-s", "project"], check=False)
86
+ _run(["claude", "mcp", "remove", "cherry-docs", "-s", "local"], check=False)
87
+
88
+ cmd = _mcp_command()
89
+ result = _run(
90
+ ["claude", "mcp", "add", "cherry-docs", *cmd, "--scope", "user"],
91
+ check=False,
92
+ )
93
+ if result.returncode != 0:
94
+ print(f" ✗ claude mcp add failed: {result.stderr.strip()}")
95
+ print(f" Add manually: claude mcp add cherry-docs {' '.join(cmd)} --scope user")
96
+ return False
97
+
98
+ print(f" ✓ MCP server registered ({' '.join(cmd)}).")
99
+ return True
100
+
101
+
102
+ def _build_hooks() -> dict:
103
+ return {
104
+ "UserPromptSubmit": [{"hooks": [
105
+ {"type": "command", "command": _hook_command("session-start")},
106
+ ]}],
107
+ "PostToolUse": [{
108
+ "matcher": "Edit|Write|NotebookEdit|Bash|mcp__cherry-docs__log_activity|mcp__cherry-docs__save_checkpoint",
109
+ "hooks": [{"type": "command", "command": _hook_command("post-tool-use")}],
110
+ }],
111
+ "Stop": [{"hooks": [
112
+ {"type": "command", "command": _hook_command("stop")},
113
+ ]}],
114
+ }
115
+
116
+
117
+ def _install_hooks() -> None:
118
+ """Merge CherryDocs hooks into ~/.claude/settings.json."""
119
+ print("→ Writing capture hooks to ~/.claude/settings.json…")
120
+ settings: dict = {}
121
+ if GLOBAL_SETTINGS.exists():
122
+ try:
123
+ settings = json.loads(GLOBAL_SETTINGS.read_text(encoding="utf-8"))
124
+ except json.JSONDecodeError:
125
+ pass
126
+
127
+ existing = settings.get("hooks", {})
128
+ new_hooks = _build_hooks()
129
+
130
+ for event, matchers in new_hooks.items():
131
+ current = existing.get(event, [])
132
+ # Drop stale CherryDocs hook entries (identified by state_manager or cherry-hook)
133
+ current = [
134
+ m for m in current
135
+ if not any(
136
+ "state_manager.py" in h.get("command", "") or "cherry-hook" in h.get("command", "")
137
+ for h in m.get("hooks", [])
138
+ )
139
+ ]
140
+ existing[event] = matchers + current
141
+
142
+ settings["hooks"] = existing
143
+ GLOBAL_SETTINGS.parent.mkdir(parents=True, exist_ok=True)
144
+ GLOBAL_SETTINGS.write_text(json.dumps(settings, indent=2), encoding="utf-8")
145
+ print(" ✓ Hooks installed.")
146
+
147
+
148
+ def _create_store() -> None:
149
+ """Create central ~/.cherrydocs/ directory structure."""
150
+ print(f"→ Creating central store at {CENTRAL_DIR}…")
151
+ CENTRAL_CAPTURE.mkdir(parents=True, exist_ok=True)
152
+ CENTRAL_PROMOTED.mkdir(parents=True, exist_ok=True)
153
+ print(" ✓ Store ready.")
154
+
155
+
156
+ def _migrate_local_promoted() -> None:
157
+ """Copy any project-local .cherrydocs/promoted/*.json to the central store.
158
+
159
+ Before this fix, cherry-docs-mcp wrote promoted memories relative to its
160
+ working directory (.cherrydocs/promoted/). This migrates that data to the
161
+ central ~/.cherrydocs/promoted/ so it isn't lost.
162
+ """
163
+ local_promoted = CHERRY_ROOT / ".cherrydocs" / "promoted"
164
+ if not local_promoted.exists():
165
+ return
166
+ jsons = list(local_promoted.glob("*.json"))
167
+ if not jsons:
168
+ return
169
+ print(f"→ Migrating {len(jsons)} project file(s) from local store to central store…")
170
+ for src in jsons:
171
+ dst = CENTRAL_PROMOTED / src.name
172
+ if dst.exists():
173
+ print(f" ↷ Skipping {src.name} (already in central store)")
174
+ else:
175
+ shutil.copy2(src, dst)
176
+ print(f" ✓ Migrated {src.name}")
177
+ print(" ✓ Migration complete.")
178
+
179
+
180
+ def main() -> int:
181
+ print(f"\nCherryDocs install — v{_version()}\n")
182
+
183
+ _create_store()
184
+ _migrate_local_promoted()
185
+ _install_mcp()
186
+ _install_hooks()
187
+
188
+ print("""
189
+ ✅ CherryDocs is now active for ALL Claude Code projects.
190
+
191
+ Next steps:
192
+ • Open any project in Claude Code — hooks + MCP are live immediately
193
+ • Run: /bootstrap-project
194
+ (reads codebase, seeds 10-20 memories via log_activity)
195
+ • Use 'cherry status' to verify at any time
196
+
197
+ Project IDs are detected from git remote (e.g. freebeiro-myapp)
198
+ or fall back to the directory name.
199
+ """)
200
+ return 0
201
+
202
+
203
+ def _version() -> str:
204
+ try:
205
+ from importlib.metadata import version
206
+ return version("cherry-docs")
207
+ except Exception:
208
+ return "dev"
209
+
210
+
211
+ if __name__ == "__main__":
212
+ sys.exit(main())