cherry-docs 0.2.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (64) hide show
  1. cherry_docs-0.2.0/PKG-INFO +143 -0
  2. cherry_docs-0.2.0/README.md +114 -0
  3. cherry_docs-0.2.0/app/__init__.py +0 -0
  4. cherry_docs-0.2.0/app/repo_scope.py +24 -0
  5. cherry_docs-0.2.0/app/services/__init__.py +0 -0
  6. cherry_docs-0.2.0/app/services/agent_protocol.py +59 -0
  7. cherry_docs-0.2.0/app/services/auto_promote_sessions.py +245 -0
  8. cherry_docs-0.2.0/app/services/capture_adapters.py +89 -0
  9. cherry_docs-0.2.0/app/services/capture_core.py +164 -0
  10. cherry_docs-0.2.0/app/services/internal_memory_agent.py +214 -0
  11. cherry_docs-0.2.0/app/services/memory_evidence.py +89 -0
  12. cherry_docs-0.2.0/app/services/memory_extraction_normalize.py +134 -0
  13. cherry_docs-0.2.0/app/services/memory_lifecycle.py +258 -0
  14. cherry_docs-0.2.0/app/services/memory_profiles.py +88 -0
  15. cherry_docs-0.2.0/app/services/memory_providers.py +113 -0
  16. cherry_docs-0.2.0/app/services/memory_retrieval.py +327 -0
  17. cherry_docs-0.2.0/app/services/memory_retrieval_scoring.py +106 -0
  18. cherry_docs-0.2.0/app/services/memory_retrieval_text.py +113 -0
  19. cherry_docs-0.2.0/app/services/memory_similarity.py +135 -0
  20. cherry_docs-0.2.0/app/services/privacy.py +72 -0
  21. cherry_docs-0.2.0/app/services/promoted_memory_answer.py +157 -0
  22. cherry_docs-0.2.0/app/services/promoted_memory_pipeline.py +194 -0
  23. cherry_docs-0.2.0/app/services/promoted_memory_store.py +57 -0
  24. cherry_docs-0.2.0/cherry_docs.egg-info/PKG-INFO +143 -0
  25. cherry_docs-0.2.0/cherry_docs.egg-info/SOURCES.txt +62 -0
  26. cherry_docs-0.2.0/cherry_docs.egg-info/dependency_links.txt +1 -0
  27. cherry_docs-0.2.0/cherry_docs.egg-info/entry_points.txt +4 -0
  28. cherry_docs-0.2.0/cherry_docs.egg-info/requires.txt +14 -0
  29. cherry_docs-0.2.0/cherry_docs.egg-info/top_level.txt +3 -0
  30. cherry_docs-0.2.0/cherrydocs/__init__.py +3 -0
  31. cherry_docs-0.2.0/cherrydocs/cli.py +213 -0
  32. cherry_docs-0.2.0/cherrydocs/hook.py +27 -0
  33. cherry_docs-0.2.0/cherrydocs/mcp.py +22 -0
  34. cherry_docs-0.2.0/pyproject.toml +66 -0
  35. cherry_docs-0.2.0/scripts/__init__.py +0 -0
  36. cherry_docs-0.2.0/scripts/auto_promote_capture.py +63 -0
  37. cherry_docs-0.2.0/scripts/check_size_limits.py +115 -0
  38. cherry_docs-0.2.0/scripts/ci_auto_capture.py +289 -0
  39. cherry_docs-0.2.0/scripts/claude_hooks/__init__.py +0 -0
  40. cherry_docs-0.2.0/scripts/claude_hooks/state_manager.py +526 -0
  41. cherry_docs-0.2.0/scripts/coverage_regression_gate.py +121 -0
  42. cherry_docs-0.2.0/scripts/eval_projects.py +247 -0
  43. cherry_docs-0.2.0/scripts/install.py +212 -0
  44. cherry_docs-0.2.0/scripts/pr_gate_report.py +282 -0
  45. cherry_docs-0.2.0/scripts/promptfoo_regression_gate.py +176 -0
  46. cherry_docs-0.2.0/scripts/render_agent_prompts.py +57 -0
  47. cherry_docs-0.2.0/setup.cfg +4 -0
  48. cherry_docs-0.2.0/tests/test_agent_protocol.py +31 -0
  49. cherry_docs-0.2.0/tests/test_answer_quality_evals.py +163 -0
  50. cherry_docs-0.2.0/tests/test_auto_promote_sessions.py +214 -0
  51. cherry_docs-0.2.0/tests/test_capture_core.py +119 -0
  52. cherry_docs-0.2.0/tests/test_ci_auto_capture.py +112 -0
  53. cherry_docs-0.2.0/tests/test_claude_capture_hooks.py +169 -0
  54. cherry_docs-0.2.0/tests/test_claude_hooks_state.py +431 -0
  55. cherry_docs-0.2.0/tests/test_coverage_regression_gate.py +101 -0
  56. cherry_docs-0.2.0/tests/test_internal_memory_agent.py +337 -0
  57. cherry_docs-0.2.0/tests/test_mcp_server.py +58 -0
  58. cherry_docs-0.2.0/tests/test_memory_lifecycle.py +321 -0
  59. cherry_docs-0.2.0/tests/test_memory_retrieval.py +617 -0
  60. cherry_docs-0.2.0/tests/test_pr_gate_report.py +168 -0
  61. cherry_docs-0.2.0/tests/test_promoted_memory_answer.py +393 -0
  62. cherry_docs-0.2.0/tests/test_promoted_memory_pipeline.py +227 -0
  63. cherry_docs-0.2.0/tests/test_promoted_memory_store.py +51 -0
  64. cherry_docs-0.2.0/tests/test_promptfoo_regression_gate.py +102 -0
@@ -0,0 +1,143 @@
1
+ Metadata-Version: 2.4
2
+ Name: cherry-docs
3
+ Version: 0.2.0
4
+ Summary: Local-first AI memory for Claude Code — capture, distill, and retrieve project knowledge automatically.
5
+ License: MIT
6
+ Project-URL: Homepage, https://github.com/freebeiro/cherry-docs
7
+ Project-URL: Repository, https://github.com/freebeiro/cherry-docs
8
+ Keywords: ai,memory,claude,mcp,developer-tools
9
+ Classifier: Development Status :: 3 - Alpha
10
+ Classifier: Intended Audience :: Developers
11
+ Classifier: Programming Language :: Python :: 3
12
+ Classifier: Programming Language :: Python :: 3.11
13
+ Classifier: Programming Language :: Python :: 3.12
14
+ Classifier: License :: OSI Approved :: MIT License
15
+ Requires-Python: >=3.11
16
+ Description-Content-Type: text/markdown
17
+ Requires-Dist: python-dotenv>=1.0
18
+ Requires-Dist: pydantic>=2.0
19
+ Requires-Dist: httpx>=0.27
20
+ Requires-Dist: mcp>=1.0
21
+ Provides-Extra: anthropic
22
+ Requires-Dist: anthropic>=0.40; extra == "anthropic"
23
+ Provides-Extra: dev
24
+ Requires-Dist: pytest>=8.0; extra == "dev"
25
+ Requires-Dist: pytest-asyncio>=0.23; extra == "dev"
26
+ Requires-Dist: coverage>=7.0; extra == "dev"
27
+ Requires-Dist: ruff>=0.4; extra == "dev"
28
+ Requires-Dist: mypy>=1.9; extra == "dev"
29
+
30
+ # CherryDocs
31
+
32
+ CherryDocs is a local-first memory layer for AI coding chats.
33
+
34
+ The intended flow is simple:
35
+
36
+ 1. connect your AI client to CherryDocs via MCP
37
+ 2. start with `onboard()` — get project context in one call
38
+ 3. work normally in the repo
39
+ 4. ask `answer()` when continuity matters
40
+
41
+ ## What It Does
42
+
43
+ CherryDocs helps an AI answer questions like:
44
+
45
+ - Why is this code here?
46
+ - What did we already try?
47
+ - What failed before?
48
+ - How do I continue this work without rereading everything?
49
+
50
+ The core product shape is:
51
+
52
+ - `onboard()` for the smallest useful startup view
53
+ - passive capture of work traces via Claude Code hooks
54
+ - local Ollama distillation of sessions into durable project memory
55
+ - `answer()` for retrieval when a new chat needs context
56
+
57
+ ## Current Architecture
58
+
59
+ - **Durable memory store**: local JSON at `~/.cherrydocs/promoted/{project_id}.json`
60
+ - **Transport**: MCP via stdio (FastMCP) — 4 tools
61
+ - **Distillation**: local Ollama (qwen2.5:7b-instruct by default)
62
+ - **Capture**: Claude Code hooks + MCP log tools
63
+
64
+ CherryDocs is project-scoped first and branch-aware second.
65
+
66
+ ## MCP Tools
67
+
68
+ | Tool | Purpose |
69
+ |---|---|
70
+ | `onboard` | Session start — loads top memories + recent sessions |
71
+ | `log_activity` | Record a decision, fix, or insight to the capture buffer |
72
+ | `save_checkpoint` | Structured handoff — blind AI must be able to continue |
73
+ | `answer` | Query promoted memory for project questions |
74
+
75
+ ## Setup
76
+
77
+ ```bash
78
+ pip install cherry-docs
79
+ cherry install # installs Claude Code hooks
80
+ ```
81
+
82
+ Then add to your `.mcp.json`:
83
+
84
+ ```json
85
+ {
86
+ "mcpServers": {
87
+ "cherry-docs": {
88
+ "command": "cherry-docs-mcp"
89
+ }
90
+ }
91
+ }
92
+ ```
93
+
94
+ ## Minimal AI Rule
95
+
96
+ ```md
97
+ Use CherryDocs.
98
+ - On start: call `onboard()`.
99
+ - Work normally.
100
+ - Use `answer()` when history could change the decision.
101
+ - Use `log_activity()` when something important would otherwise be lost.
102
+ ```
103
+
104
+ The canonical source for generated agent rules is [docs/agent_protocol.toml](docs/agent_protocol.toml).
105
+
106
+ ## Workflow
107
+
108
+ In a new session:
109
+
110
+ 1. Claude calls `onboard()` — gets top memories + recent session state
111
+ 2. Work happens normally; hooks capture tool use and code changes
112
+ 3. On git commit, auto-distillation fires via Ollama
113
+ 4. Ask `answer("Why did we change this?")` in any future session
114
+
115
+ ## What Works Today
116
+
117
+ - Local file-backed promoted memory (no cloud, no graph DB)
118
+ - MCP stdio server with 4 tools
119
+ - Claude Code hook-based passive capture
120
+ - Ollama distillation pipeline (per-session + commit-triggered)
121
+ - `cherry eval` — heuristic + LLM judge for memory quality
122
+ - `cherry why <file>` — show memories anchored to commits touching a file
123
+
124
+ ## Development
125
+
126
+ ```bash
127
+ pip install -e .
128
+ python -m pytest tests/ -q
129
+ python scripts/check_size_limits.py
130
+ ```
131
+
132
+ For PR hardening:
133
+
134
+ ```bash
135
+ bash scripts/local_pr_gate.sh fast
136
+ ```
137
+
138
+ ## Documentation
139
+
140
+ - [Product Brief](docs/PRODUCT_BRIEF.md)
141
+ - [System Deep Dive](docs/SYSTEM_DEEP_DIVE.md)
142
+
143
+ > Would another AI actually want to keep this on because it helps achieve the goal?
@@ -0,0 +1,114 @@
1
+ # CherryDocs
2
+
3
+ CherryDocs is a local-first memory layer for AI coding chats.
4
+
5
+ The intended flow is simple:
6
+
7
+ 1. connect your AI client to CherryDocs via MCP
8
+ 2. start with `onboard()` — get project context in one call
9
+ 3. work normally in the repo
10
+ 4. ask `answer()` when continuity matters
11
+
12
+ ## What It Does
13
+
14
+ CherryDocs helps an AI answer questions like:
15
+
16
+ - Why is this code here?
17
+ - What did we already try?
18
+ - What failed before?
19
+ - How do I continue this work without rereading everything?
20
+
21
+ The core product shape is:
22
+
23
+ - `onboard()` for the smallest useful startup view
24
+ - passive capture of work traces via Claude Code hooks
25
+ - local Ollama distillation of sessions into durable project memory
26
+ - `answer()` for retrieval when a new chat needs context
27
+
28
+ ## Current Architecture
29
+
30
+ - **Durable memory store**: local JSON at `~/.cherrydocs/promoted/{project_id}.json`
31
+ - **Transport**: MCP via stdio (FastMCP) — 4 tools
32
+ - **Distillation**: local Ollama (qwen2.5:7b-instruct by default)
33
+ - **Capture**: Claude Code hooks + MCP log tools
34
+
35
+ CherryDocs is project-scoped first and branch-aware second.
36
+
37
+ ## MCP Tools
38
+
39
+ | Tool | Purpose |
40
+ |---|---|
41
+ | `onboard` | Session start — loads top memories + recent sessions |
42
+ | `log_activity` | Record a decision, fix, or insight to the capture buffer |
43
+ | `save_checkpoint` | Structured handoff — blind AI must be able to continue |
44
+ | `answer` | Query promoted memory for project questions |
45
+
46
+ ## Setup
47
+
48
+ ```bash
49
+ pip install cherry-docs
50
+ cherry install # installs Claude Code hooks
51
+ ```
52
+
53
+ Then add to your `.mcp.json`:
54
+
55
+ ```json
56
+ {
57
+ "mcpServers": {
58
+ "cherry-docs": {
59
+ "command": "cherry-docs-mcp"
60
+ }
61
+ }
62
+ }
63
+ ```
64
+
65
+ ## Minimal AI Rule
66
+
67
+ ```md
68
+ Use CherryDocs.
69
+ - On start: call `onboard()`.
70
+ - Work normally.
71
+ - Use `answer()` when history could change the decision.
72
+ - Use `log_activity()` when something important would otherwise be lost.
73
+ ```
74
+
75
+ The canonical source for generated agent rules is [docs/agent_protocol.toml](docs/agent_protocol.toml).
76
+
77
+ ## Workflow
78
+
79
+ In a new session:
80
+
81
+ 1. Claude calls `onboard()` — gets top memories + recent session state
82
+ 2. Work happens normally; hooks capture tool use and code changes
83
+ 3. On git commit, auto-distillation fires via Ollama
84
+ 4. Ask `answer("Why did we change this?")` in any future session
85
+
86
+ ## What Works Today
87
+
88
+ - Local file-backed promoted memory (no cloud, no graph DB)
89
+ - MCP stdio server with 4 tools
90
+ - Claude Code hook-based passive capture
91
+ - Ollama distillation pipeline (per-session + commit-triggered)
92
+ - `cherry eval` — heuristic + LLM judge for memory quality
93
+ - `cherry why <file>` — show memories anchored to commits touching a file
94
+
95
+ ## Development
96
+
97
+ ```bash
98
+ pip install -e .
99
+ python -m pytest tests/ -q
100
+ python scripts/check_size_limits.py
101
+ ```
102
+
103
+ For PR hardening:
104
+
105
+ ```bash
106
+ bash scripts/local_pr_gate.sh fast
107
+ ```
108
+
109
+ ## Documentation
110
+
111
+ - [Product Brief](docs/PRODUCT_BRIEF.md)
112
+ - [System Deep Dive](docs/SYSTEM_DEEP_DIVE.md)
113
+
114
+ > Would another AI actually want to keep this on because it helps achieve the goal?
File without changes
@@ -0,0 +1,24 @@
1
+ from typing import Optional
2
+
3
+
4
+ def normalize_project_id(project_id: Optional[str], default: str = "default-project") -> str:
5
+ """
6
+ Normalize repository/project identifiers to the internal dashed format.
7
+ Examples:
8
+ - github.com/owner/repo -> owner-repo
9
+ - https://github.com/owner/repo.git -> owner-repo
10
+ - git@github.com:owner/repo.git -> owner-repo
11
+ """
12
+ if not project_id:
13
+ return default
14
+
15
+ normalized = project_id.strip()
16
+ if not normalized:
17
+ return default
18
+
19
+ normalized = normalized.replace("https://", "").replace("http://", "")
20
+ normalized = normalized.replace("git@github.com:", "").replace("github.com/", "")
21
+ normalized = normalized.removesuffix(".git")
22
+ normalized = normalized.strip("/")
23
+ normalized = normalized.replace("/", "-")
24
+ return normalized.lower() or default
File without changes
@@ -0,0 +1,59 @@
1
+ """Canonical agent protocol rendering for all supported client rule files."""
2
+ from __future__ import annotations
3
+
4
+ import tomllib
5
+ from hashlib import sha256
6
+ from pathlib import Path
7
+
8
+ ROOT_DIR = Path(__file__).resolve().parents[2]
9
+ PROTOCOL_PATH = ROOT_DIR / "docs" / "agent_protocol.toml"
10
+ PROTOCOL_SOURCE = "docs/agent_protocol.toml"
11
+ PROMPT_OUTPUTS = {
12
+ ".claude/CLAUDE.md": "claude",
13
+ "AGENTS.md": "agents",
14
+ "GEMINI.md": "gemini",
15
+ ".cursorrules": "cursorrules",
16
+ ".cursor/rules/cherrydocs.mdc": "cursor_mdc",
17
+ }
18
+
19
+
20
+ def _load_protocol() -> tuple[dict, str]:
21
+ raw = PROTOCOL_PATH.read_text(encoding="utf-8")
22
+ return tomllib.loads(raw), raw
23
+
24
+
25
+ def _protocol_metadata(protocol: dict, raw: str) -> dict[str, str]:
26
+ return {
27
+ "source": PROTOCOL_SOURCE,
28
+ "version": str(protocol["meta"]["version"]),
29
+ "hash": sha256(raw.encode("utf-8")).hexdigest()[:12],
30
+ }
31
+
32
+
33
+ def _generated_comment(version: str, protocol_hash: str) -> str:
34
+ return f"<!-- Generated from {PROTOCOL_SOURCE} version={version} hash={protocol_hash}; do not edit by hand. -->"
35
+
36
+
37
+ def _render_body(protocol: dict, version: str, protocol_hash: str) -> str:
38
+ meta = protocol["meta"]
39
+ shared = protocol["shared"]
40
+ lines = [_generated_comment(version, protocol_hash), meta["title"], ""]
41
+ lines.extend(f"- {bullet}" for bullet in shared["bullets"])
42
+ return "\n".join(lines).rstrip() + "\n"
43
+
44
+
45
+ def render_platform_prompt(platform: str) -> str:
46
+ protocol, raw = _load_protocol()
47
+ meta = _protocol_metadata(protocol, raw)
48
+ body = _render_body(protocol, meta["version"], meta["hash"])
49
+ if platform == "claude":
50
+ return body
51
+ if platform in {"agents", "gemini", "cursorrules"}:
52
+ return f"# Project Rules\n\n{body}"
53
+ if platform == "cursor_mdc":
54
+ return "---\ndescription: CherryDocs project protocol\nalwaysApply: true\n---\n\n" + body
55
+ raise ValueError(f"Unsupported platform: {platform}")
56
+
57
+
58
+ def build_prompt_file_map() -> dict[str, str]:
59
+ return {path: render_platform_prompt(platform) for path, platform in PROMPT_OUTPUTS.items()}
@@ -0,0 +1,245 @@
1
+ """Background-friendly auto-promotion for captured AI sessions."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import hashlib
6
+ import json
7
+ from datetime import UTC, datetime
8
+ from pathlib import Path
9
+
10
+ from pydantic import BaseModel, ConfigDict, Field
11
+
12
+ from app.repo_scope import normalize_project_id
13
+ from app.services.capture_core import LocalCaptureBuffer
14
+ from app.services.internal_memory_agent import MemoryModelProvider
15
+ from app.services.memory_providers import resolve_provider
16
+ from app.services.promoted_memory_pipeline import run_session_promotion
17
+ from app.services.promoted_memory_store import DEFAULT_PROMOTED_ROOT, LocalPromotedMemoryStore
18
+
19
+
20
+ class AutoPromotionPolicy(BaseModel):
21
+ model_config = ConfigDict(extra="ignore")
22
+
23
+ min_event_count: int = 3
24
+ min_candidate_confidence: float = 0.8
25
+ max_sessions: int = 10
26
+
27
+
28
+ class AutoPromotionState(BaseModel):
29
+ model_config = ConfigDict(extra="ignore")
30
+
31
+ session_id: str
32
+ project_id: str
33
+ signature: str
34
+ event_count: int
35
+ last_event_timestamp: str | None = None
36
+ last_promoted_at: str = Field(default_factory=lambda: datetime.now(UTC).isoformat())
37
+
38
+
39
+ class AutoPromotionSessionResult(BaseModel):
40
+ model_config = ConfigDict(extra="ignore")
41
+
42
+ session_id: str
43
+ action: str
44
+ reason: str = ""
45
+ promoted_count: int = 0
46
+ highlights: list[str] = Field(default_factory=list)
47
+ distillation_trace: dict[str, object] | None = None
48
+
49
+
50
+ class AutoPromotionRunReport(BaseModel):
51
+ model_config = ConfigDict(extra="ignore")
52
+
53
+ project_id: str
54
+ processed: list[AutoPromotionSessionResult] = Field(default_factory=list)
55
+ skipped: list[AutoPromotionSessionResult] = Field(default_factory=list)
56
+
57
+
58
+ def _state_dir(buffer_dir: str | Path) -> Path:
59
+ return Path(buffer_dir).expanduser().resolve() / ".promotion-state"
60
+
61
+
62
+ def _state_path(buffer_dir: str | Path, session_id: str) -> Path:
63
+ safe = session_id.replace("/", "_").replace("\\", "_")
64
+ return _state_dir(buffer_dir) / f"{safe}.json"
65
+
66
+
67
+ def _load_state(buffer_dir: str | Path, session_id: str) -> AutoPromotionState | None:
68
+ path = _state_path(buffer_dir, session_id)
69
+ if not path.exists():
70
+ return None
71
+ try:
72
+ payload = json.loads(path.read_text(encoding="utf-8"))
73
+ return AutoPromotionState.model_validate(payload)
74
+ except Exception:
75
+ return None
76
+
77
+
78
+ def _save_state(buffer_dir: str | Path, state: AutoPromotionState) -> None:
79
+ path = _state_path(buffer_dir, state.session_id)
80
+ path.parent.mkdir(parents=True, exist_ok=True)
81
+ path.write_text(json.dumps(state.model_dump(mode="json"), indent=2), encoding="utf-8")
82
+
83
+
84
+ def list_capture_sessions(buffer_dir: str | Path) -> list[str]:
85
+ root = Path(buffer_dir).expanduser().resolve()
86
+ if not root.exists():
87
+ return []
88
+ return [
89
+ path.stem
90
+ for path in sorted(root.glob("*.jsonl"), key=lambda p: p.stat().st_mtime, reverse=True)
91
+ ]
92
+
93
+
94
+ def _session_signature(events: list[dict]) -> str:
95
+ relevant = [
96
+ {
97
+ "event_type": event.get("event_type"),
98
+ "timestamp": event.get("timestamp"),
99
+ "text": str(event.get("text") or "")[:400],
100
+ "command": event.get("command"),
101
+ "exit_code": event.get("exit_code"),
102
+ }
103
+ for event in events
104
+ ]
105
+ payload = json.dumps(relevant, sort_keys=True, ensure_ascii=False)
106
+ return hashlib.sha1(payload.encode("utf-8"), usedforsecurity=False).hexdigest()
107
+
108
+
109
+ def _new_state(project_id: str, session_id: str, events: list[dict]) -> AutoPromotionState:
110
+ timestamps = [
111
+ str(event.get("timestamp") or "").strip()
112
+ for event in events
113
+ if str(event.get("timestamp") or "").strip()
114
+ ]
115
+ return AutoPromotionState(
116
+ session_id=session_id,
117
+ project_id=project_id,
118
+ signature=_session_signature(events),
119
+ event_count=len(events),
120
+ last_event_timestamp=timestamps[-1] if timestamps else None,
121
+ )
122
+
123
+
124
+ def _session_matches_scope(events: list[dict], *, project_id: str, branch: str | None) -> bool:
125
+ if not events:
126
+ return False
127
+ normalized_project_id = normalize_project_id(project_id)
128
+ repos = {
129
+ normalize_project_id(str(event.get("repo") or ""))
130
+ for event in events
131
+ if str(event.get("repo") or "").strip()
132
+ }
133
+ if repos:
134
+ if normalized_project_id not in repos:
135
+ return False
136
+ else:
137
+ # No repo field — fall back to cwd directory name match
138
+ cwds = {
139
+ normalize_project_id(Path(str(event.get("cwd") or "")).name)
140
+ for event in events
141
+ if str(event.get("cwd") or "").strip()
142
+ }
143
+ if cwds and normalized_project_id not in cwds:
144
+ return False
145
+ if branch:
146
+ branches = {
147
+ str(event.get("branch") or "").strip()
148
+ for event in events
149
+ if str(event.get("branch") or "").strip()
150
+ }
151
+ if branches and branch not in branches:
152
+ return False
153
+ return True
154
+
155
+
156
+ def auto_promote_captured_sessions(
157
+ *,
158
+ project_id: str,
159
+ buffer_dir: str | Path = ".cherrydocs/capture",
160
+ promoted_root: str | Path = DEFAULT_PROMOTED_ROOT,
161
+ provider: MemoryModelProvider | None = None,
162
+ project_hint: str | None = None,
163
+ branch: str | None = None,
164
+ commit: str | None = None,
165
+ policy: AutoPromotionPolicy | None = None,
166
+ memory_profile: str | None = None,
167
+ ) -> AutoPromotionRunReport:
168
+ resolved_policy = policy or AutoPromotionPolicy()
169
+ resolved_provider = provider or resolve_provider()
170
+ buffer = LocalCaptureBuffer(buffer_dir)
171
+ store = LocalPromotedMemoryStore(promoted_root)
172
+ sessions = list_capture_sessions(buffer_dir)[: resolved_policy.max_sessions]
173
+
174
+ existing_records = [
175
+ r for r in store.load_records(project_id)
176
+ if not branch or not r.branch or r.branch == branch
177
+ ]
178
+ processed: list[AutoPromotionSessionResult] = []
179
+ skipped: list[AutoPromotionSessionResult] = []
180
+
181
+ for session_id in sessions:
182
+ events = buffer.read(session_id)
183
+ if not _session_matches_scope(events, project_id=project_id, branch=branch):
184
+ skipped.append(AutoPromotionSessionResult(
185
+ session_id=session_id, action="skip",
186
+ reason="session outside requested project/branch scope",
187
+ ))
188
+ continue
189
+ if len(events) < resolved_policy.min_event_count:
190
+ skipped.append(AutoPromotionSessionResult(
191
+ session_id=session_id, action="skip",
192
+ reason=f"too few events ({len(events)} < {resolved_policy.min_event_count})",
193
+ ))
194
+ continue
195
+
196
+ state = _load_state(buffer_dir, session_id)
197
+ current_state = _new_state(project_id, session_id, events)
198
+ if state and state.project_id == project_id and state.signature == current_state.signature:
199
+ skipped.append(AutoPromotionSessionResult(
200
+ session_id=session_id, action="skip",
201
+ reason="no new captured evidence since last promotion",
202
+ ))
203
+ continue
204
+
205
+ report = run_session_promotion(
206
+ events=events,
207
+ session_id=session_id,
208
+ project_id=project_id,
209
+ provider=resolved_provider,
210
+ project_hint=project_hint,
211
+ branch=branch,
212
+ commit=commit,
213
+ existing_records=existing_records,
214
+ min_confidence=resolved_policy.min_candidate_confidence,
215
+ memory_profile=memory_profile,
216
+ )
217
+ session_records = [r for r in report.session_records if r.memory_type != "noise"]
218
+ if not session_records:
219
+ skipped.append(AutoPromotionSessionResult(
220
+ session_id=session_id, action="skip",
221
+ reason="no high-confidence durable memory candidates",
222
+ ))
223
+ _save_state(buffer_dir, current_state)
224
+ continue
225
+
226
+ existing_records = store.upsert_records(project_id, report.promotion.records)
227
+ _save_state(buffer_dir, current_state)
228
+ processed.append(AutoPromotionSessionResult(
229
+ session_id=session_id,
230
+ action="promote",
231
+ promoted_count=len(session_records),
232
+ highlights=[r.summary for r in session_records[:3]],
233
+ distillation_trace=report.distillation_trace.model_dump(mode="json"),
234
+ ))
235
+
236
+ return AutoPromotionRunReport(project_id=project_id, processed=processed, skipped=skipped)
237
+
238
+
239
+ __all__ = [
240
+ "AutoPromotionPolicy",
241
+ "AutoPromotionRunReport",
242
+ "AutoPromotionSessionResult",
243
+ "auto_promote_captured_sessions",
244
+ "list_capture_sessions",
245
+ ]
@@ -0,0 +1,89 @@
1
+ """Shared append helpers for capture integrations."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from pathlib import Path
6
+ from typing import Any
7
+
8
+ from app.services.capture_core import (
9
+ CaptureEvent,
10
+ CaptureEventType,
11
+ LocalCaptureBuffer,
12
+ build_capture_event,
13
+ )
14
+
15
+ _TEST_COMMAND_MARKERS = (
16
+ "pytest",
17
+ "pnpm test",
18
+ "npm test",
19
+ "yarn test",
20
+ "bun test",
21
+ "vitest",
22
+ "jest",
23
+ "go test",
24
+ "cargo test",
25
+ "mix test",
26
+ "rspec",
27
+ "phpunit",
28
+ )
29
+
30
+
31
+ def infer_capture_event_type(*, tool_name: str | None = None, command: str | None = None) -> CaptureEventType:
32
+ normalized_command = " ".join(str(command or "").split()).lower()
33
+ if normalized_command and any(marker in normalized_command for marker in _TEST_COMMAND_MARKERS):
34
+ return CaptureEventType.TEST_RESULT
35
+ if str(tool_name or "").strip() == "Bash":
36
+ return CaptureEventType.SHELL_RESULT
37
+ return CaptureEventType.TOOL_RESULT
38
+
39
+
40
+ def enrich_capture_metadata(
41
+ *,
42
+ event_type: CaptureEventType,
43
+ command: str | None = None,
44
+ exit_code: int | None = None,
45
+ metadata: dict[str, Any] | None = None,
46
+ ) -> dict[str, Any]:
47
+ enriched = dict(metadata or {})
48
+ if event_type == CaptureEventType.TEST_RESULT:
49
+ enriched.setdefault("capture_kind", "verification")
50
+ enriched.setdefault("verification_kind", "test")
51
+ if exit_code is not None:
52
+ enriched.setdefault("verification_status", "passed" if exit_code == 0 else "failed")
53
+ elif event_type == CaptureEventType.SHELL_RESULT and command:
54
+ enriched.setdefault("capture_kind", "command")
55
+ return enriched
56
+
57
+
58
+ def append_capture_event(
59
+ *,
60
+ buffer_dir: str | Path,
61
+ source: str,
62
+ event_type: CaptureEventType,
63
+ session_id: str,
64
+ cwd: str,
65
+ text: str | None = None,
66
+ files: list[str] | None = None,
67
+ command: str | None = None,
68
+ exit_code: int | None = None,
69
+ metadata: dict[str, Any] | None = None,
70
+ ) -> CaptureEvent:
71
+ merged_metadata = enrich_capture_metadata(
72
+ event_type=event_type,
73
+ command=command,
74
+ exit_code=exit_code,
75
+ metadata=metadata,
76
+ )
77
+ event = build_capture_event(
78
+ source=source,
79
+ event_type=event_type,
80
+ session_id=session_id,
81
+ cwd=cwd,
82
+ text=text,
83
+ files=files,
84
+ command=command,
85
+ exit_code=exit_code,
86
+ metadata=merged_metadata,
87
+ )
88
+ LocalCaptureBuffer(buffer_dir).append(event)
89
+ return event