cherry-docs 0.2.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- cherry_docs-0.2.0/PKG-INFO +143 -0
- cherry_docs-0.2.0/README.md +114 -0
- cherry_docs-0.2.0/app/__init__.py +0 -0
- cherry_docs-0.2.0/app/repo_scope.py +24 -0
- cherry_docs-0.2.0/app/services/__init__.py +0 -0
- cherry_docs-0.2.0/app/services/agent_protocol.py +59 -0
- cherry_docs-0.2.0/app/services/auto_promote_sessions.py +245 -0
- cherry_docs-0.2.0/app/services/capture_adapters.py +89 -0
- cherry_docs-0.2.0/app/services/capture_core.py +164 -0
- cherry_docs-0.2.0/app/services/internal_memory_agent.py +214 -0
- cherry_docs-0.2.0/app/services/memory_evidence.py +89 -0
- cherry_docs-0.2.0/app/services/memory_extraction_normalize.py +134 -0
- cherry_docs-0.2.0/app/services/memory_lifecycle.py +258 -0
- cherry_docs-0.2.0/app/services/memory_profiles.py +88 -0
- cherry_docs-0.2.0/app/services/memory_providers.py +113 -0
- cherry_docs-0.2.0/app/services/memory_retrieval.py +327 -0
- cherry_docs-0.2.0/app/services/memory_retrieval_scoring.py +106 -0
- cherry_docs-0.2.0/app/services/memory_retrieval_text.py +113 -0
- cherry_docs-0.2.0/app/services/memory_similarity.py +135 -0
- cherry_docs-0.2.0/app/services/privacy.py +72 -0
- cherry_docs-0.2.0/app/services/promoted_memory_answer.py +157 -0
- cherry_docs-0.2.0/app/services/promoted_memory_pipeline.py +194 -0
- cherry_docs-0.2.0/app/services/promoted_memory_store.py +57 -0
- cherry_docs-0.2.0/cherry_docs.egg-info/PKG-INFO +143 -0
- cherry_docs-0.2.0/cherry_docs.egg-info/SOURCES.txt +62 -0
- cherry_docs-0.2.0/cherry_docs.egg-info/dependency_links.txt +1 -0
- cherry_docs-0.2.0/cherry_docs.egg-info/entry_points.txt +4 -0
- cherry_docs-0.2.0/cherry_docs.egg-info/requires.txt +14 -0
- cherry_docs-0.2.0/cherry_docs.egg-info/top_level.txt +3 -0
- cherry_docs-0.2.0/cherrydocs/__init__.py +3 -0
- cherry_docs-0.2.0/cherrydocs/cli.py +213 -0
- cherry_docs-0.2.0/cherrydocs/hook.py +27 -0
- cherry_docs-0.2.0/cherrydocs/mcp.py +22 -0
- cherry_docs-0.2.0/pyproject.toml +66 -0
- cherry_docs-0.2.0/scripts/__init__.py +0 -0
- cherry_docs-0.2.0/scripts/auto_promote_capture.py +63 -0
- cherry_docs-0.2.0/scripts/check_size_limits.py +115 -0
- cherry_docs-0.2.0/scripts/ci_auto_capture.py +289 -0
- cherry_docs-0.2.0/scripts/claude_hooks/__init__.py +0 -0
- cherry_docs-0.2.0/scripts/claude_hooks/state_manager.py +526 -0
- cherry_docs-0.2.0/scripts/coverage_regression_gate.py +121 -0
- cherry_docs-0.2.0/scripts/eval_projects.py +247 -0
- cherry_docs-0.2.0/scripts/install.py +212 -0
- cherry_docs-0.2.0/scripts/pr_gate_report.py +282 -0
- cherry_docs-0.2.0/scripts/promptfoo_regression_gate.py +176 -0
- cherry_docs-0.2.0/scripts/render_agent_prompts.py +57 -0
- cherry_docs-0.2.0/setup.cfg +4 -0
- cherry_docs-0.2.0/tests/test_agent_protocol.py +31 -0
- cherry_docs-0.2.0/tests/test_answer_quality_evals.py +163 -0
- cherry_docs-0.2.0/tests/test_auto_promote_sessions.py +214 -0
- cherry_docs-0.2.0/tests/test_capture_core.py +119 -0
- cherry_docs-0.2.0/tests/test_ci_auto_capture.py +112 -0
- cherry_docs-0.2.0/tests/test_claude_capture_hooks.py +169 -0
- cherry_docs-0.2.0/tests/test_claude_hooks_state.py +431 -0
- cherry_docs-0.2.0/tests/test_coverage_regression_gate.py +101 -0
- cherry_docs-0.2.0/tests/test_internal_memory_agent.py +337 -0
- cherry_docs-0.2.0/tests/test_mcp_server.py +58 -0
- cherry_docs-0.2.0/tests/test_memory_lifecycle.py +321 -0
- cherry_docs-0.2.0/tests/test_memory_retrieval.py +617 -0
- cherry_docs-0.2.0/tests/test_pr_gate_report.py +168 -0
- cherry_docs-0.2.0/tests/test_promoted_memory_answer.py +393 -0
- cherry_docs-0.2.0/tests/test_promoted_memory_pipeline.py +227 -0
- cherry_docs-0.2.0/tests/test_promoted_memory_store.py +51 -0
- cherry_docs-0.2.0/tests/test_promptfoo_regression_gate.py +102 -0
|
@@ -0,0 +1,143 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: cherry-docs
|
|
3
|
+
Version: 0.2.0
|
|
4
|
+
Summary: Local-first AI memory for Claude Code — capture, distill, and retrieve project knowledge automatically.
|
|
5
|
+
License: MIT
|
|
6
|
+
Project-URL: Homepage, https://github.com/freebeiro/cherry-docs
|
|
7
|
+
Project-URL: Repository, https://github.com/freebeiro/cherry-docs
|
|
8
|
+
Keywords: ai,memory,claude,mcp,developer-tools
|
|
9
|
+
Classifier: Development Status :: 3 - Alpha
|
|
10
|
+
Classifier: Intended Audience :: Developers
|
|
11
|
+
Classifier: Programming Language :: Python :: 3
|
|
12
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
13
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
14
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
15
|
+
Requires-Python: >=3.11
|
|
16
|
+
Description-Content-Type: text/markdown
|
|
17
|
+
Requires-Dist: python-dotenv>=1.0
|
|
18
|
+
Requires-Dist: pydantic>=2.0
|
|
19
|
+
Requires-Dist: httpx>=0.27
|
|
20
|
+
Requires-Dist: mcp>=1.0
|
|
21
|
+
Provides-Extra: anthropic
|
|
22
|
+
Requires-Dist: anthropic>=0.40; extra == "anthropic"
|
|
23
|
+
Provides-Extra: dev
|
|
24
|
+
Requires-Dist: pytest>=8.0; extra == "dev"
|
|
25
|
+
Requires-Dist: pytest-asyncio>=0.23; extra == "dev"
|
|
26
|
+
Requires-Dist: coverage>=7.0; extra == "dev"
|
|
27
|
+
Requires-Dist: ruff>=0.4; extra == "dev"
|
|
28
|
+
Requires-Dist: mypy>=1.9; extra == "dev"
|
|
29
|
+
|
|
30
|
+
# CherryDocs
|
|
31
|
+
|
|
32
|
+
CherryDocs is a local-first memory layer for AI coding chats.
|
|
33
|
+
|
|
34
|
+
The intended flow is simple:
|
|
35
|
+
|
|
36
|
+
1. connect your AI client to CherryDocs via MCP
|
|
37
|
+
2. start with `onboard()` — get project context in one call
|
|
38
|
+
3. work normally in the repo
|
|
39
|
+
4. ask `answer()` when continuity matters
|
|
40
|
+
|
|
41
|
+
## What It Does
|
|
42
|
+
|
|
43
|
+
CherryDocs helps an AI answer questions like:
|
|
44
|
+
|
|
45
|
+
- Why is this code here?
|
|
46
|
+
- What did we already try?
|
|
47
|
+
- What failed before?
|
|
48
|
+
- How do I continue this work without rereading everything?
|
|
49
|
+
|
|
50
|
+
The core product shape is:
|
|
51
|
+
|
|
52
|
+
- `onboard()` for the smallest useful startup view
|
|
53
|
+
- passive capture of work traces via Claude Code hooks
|
|
54
|
+
- local Ollama distillation of sessions into durable project memory
|
|
55
|
+
- `answer()` for retrieval when a new chat needs context
|
|
56
|
+
|
|
57
|
+
## Current Architecture
|
|
58
|
+
|
|
59
|
+
- **Durable memory store**: local JSON at `~/.cherrydocs/promoted/{project_id}.json`
|
|
60
|
+
- **Transport**: MCP via stdio (FastMCP) — 4 tools
|
|
61
|
+
- **Distillation**: local Ollama (qwen2.5:7b-instruct by default)
|
|
62
|
+
- **Capture**: Claude Code hooks + MCP log tools
|
|
63
|
+
|
|
64
|
+
CherryDocs is project-scoped first and branch-aware second.
|
|
65
|
+
|
|
66
|
+
## MCP Tools
|
|
67
|
+
|
|
68
|
+
| Tool | Purpose |
|
|
69
|
+
|---|---|
|
|
70
|
+
| `onboard` | Session start — loads top memories + recent sessions |
|
|
71
|
+
| `log_activity` | Record a decision, fix, or insight to the capture buffer |
|
|
72
|
+
| `save_checkpoint` | Structured handoff — blind AI must be able to continue |
|
|
73
|
+
| `answer` | Query promoted memory for project questions |
|
|
74
|
+
|
|
75
|
+
## Setup
|
|
76
|
+
|
|
77
|
+
```bash
|
|
78
|
+
pip install cherry-docs
|
|
79
|
+
cherry install # installs Claude Code hooks
|
|
80
|
+
```
|
|
81
|
+
|
|
82
|
+
Then add to your `.mcp.json`:
|
|
83
|
+
|
|
84
|
+
```json
|
|
85
|
+
{
|
|
86
|
+
"mcpServers": {
|
|
87
|
+
"cherry-docs": {
|
|
88
|
+
"command": "cherry-docs-mcp"
|
|
89
|
+
}
|
|
90
|
+
}
|
|
91
|
+
}
|
|
92
|
+
```
|
|
93
|
+
|
|
94
|
+
## Minimal AI Rule
|
|
95
|
+
|
|
96
|
+
```md
|
|
97
|
+
Use CherryDocs.
|
|
98
|
+
- On start: call `onboard()`.
|
|
99
|
+
- Work normally.
|
|
100
|
+
- Use `answer()` when history could change the decision.
|
|
101
|
+
- Use `log_activity()` when something important would otherwise be lost.
|
|
102
|
+
```
|
|
103
|
+
|
|
104
|
+
The canonical source for generated agent rules is [docs/agent_protocol.toml](docs/agent_protocol.toml).
|
|
105
|
+
|
|
106
|
+
## Workflow
|
|
107
|
+
|
|
108
|
+
In a new session:
|
|
109
|
+
|
|
110
|
+
1. Claude calls `onboard()` — gets top memories + recent session state
|
|
111
|
+
2. Work happens normally; hooks capture tool use and code changes
|
|
112
|
+
3. On git commit, auto-distillation fires via Ollama
|
|
113
|
+
4. Ask `answer("Why did we change this?")` in any future session
|
|
114
|
+
|
|
115
|
+
## What Works Today
|
|
116
|
+
|
|
117
|
+
- Local file-backed promoted memory (no cloud, no graph DB)
|
|
118
|
+
- MCP stdio server with 4 tools
|
|
119
|
+
- Claude Code hook-based passive capture
|
|
120
|
+
- Ollama distillation pipeline (per-session + commit-triggered)
|
|
121
|
+
- `cherry eval` — heuristic + LLM judge for memory quality
|
|
122
|
+
- `cherry why <file>` — show memories anchored to commits touching a file
|
|
123
|
+
|
|
124
|
+
## Development
|
|
125
|
+
|
|
126
|
+
```bash
|
|
127
|
+
pip install -e .
|
|
128
|
+
python -m pytest tests/ -q
|
|
129
|
+
python scripts/check_size_limits.py
|
|
130
|
+
```
|
|
131
|
+
|
|
132
|
+
For PR hardening:
|
|
133
|
+
|
|
134
|
+
```bash
|
|
135
|
+
bash scripts/local_pr_gate.sh fast
|
|
136
|
+
```
|
|
137
|
+
|
|
138
|
+
## Documentation
|
|
139
|
+
|
|
140
|
+
- [Product Brief](docs/PRODUCT_BRIEF.md)
|
|
141
|
+
- [System Deep Dive](docs/SYSTEM_DEEP_DIVE.md)
|
|
142
|
+
|
|
143
|
+
> Would another AI actually want to keep this on because it helps achieve the goal?
|
|
@@ -0,0 +1,114 @@
|
|
|
1
|
+
# CherryDocs
|
|
2
|
+
|
|
3
|
+
CherryDocs is a local-first memory layer for AI coding chats.
|
|
4
|
+
|
|
5
|
+
The intended flow is simple:
|
|
6
|
+
|
|
7
|
+
1. connect your AI client to CherryDocs via MCP
|
|
8
|
+
2. start with `onboard()` — get project context in one call
|
|
9
|
+
3. work normally in the repo
|
|
10
|
+
4. ask `answer()` when continuity matters
|
|
11
|
+
|
|
12
|
+
## What It Does
|
|
13
|
+
|
|
14
|
+
CherryDocs helps an AI answer questions like:
|
|
15
|
+
|
|
16
|
+
- Why is this code here?
|
|
17
|
+
- What did we already try?
|
|
18
|
+
- What failed before?
|
|
19
|
+
- How do I continue this work without rereading everything?
|
|
20
|
+
|
|
21
|
+
The core product shape is:
|
|
22
|
+
|
|
23
|
+
- `onboard()` for the smallest useful startup view
|
|
24
|
+
- passive capture of work traces via Claude Code hooks
|
|
25
|
+
- local Ollama distillation of sessions into durable project memory
|
|
26
|
+
- `answer()` for retrieval when a new chat needs context
|
|
27
|
+
|
|
28
|
+
## Current Architecture
|
|
29
|
+
|
|
30
|
+
- **Durable memory store**: local JSON at `~/.cherrydocs/promoted/{project_id}.json`
|
|
31
|
+
- **Transport**: MCP via stdio (FastMCP) — 4 tools
|
|
32
|
+
- **Distillation**: local Ollama (qwen2.5:7b-instruct by default)
|
|
33
|
+
- **Capture**: Claude Code hooks + MCP log tools
|
|
34
|
+
|
|
35
|
+
CherryDocs is project-scoped first and branch-aware second.
|
|
36
|
+
|
|
37
|
+
## MCP Tools
|
|
38
|
+
|
|
39
|
+
| Tool | Purpose |
|
|
40
|
+
|---|---|
|
|
41
|
+
| `onboard` | Session start — loads top memories + recent sessions |
|
|
42
|
+
| `log_activity` | Record a decision, fix, or insight to the capture buffer |
|
|
43
|
+
| `save_checkpoint` | Structured handoff — blind AI must be able to continue |
|
|
44
|
+
| `answer` | Query promoted memory for project questions |
|
|
45
|
+
|
|
46
|
+
## Setup
|
|
47
|
+
|
|
48
|
+
```bash
|
|
49
|
+
pip install cherry-docs
|
|
50
|
+
cherry install # installs Claude Code hooks
|
|
51
|
+
```
|
|
52
|
+
|
|
53
|
+
Then add to your `.mcp.json`:
|
|
54
|
+
|
|
55
|
+
```json
|
|
56
|
+
{
|
|
57
|
+
"mcpServers": {
|
|
58
|
+
"cherry-docs": {
|
|
59
|
+
"command": "cherry-docs-mcp"
|
|
60
|
+
}
|
|
61
|
+
}
|
|
62
|
+
}
|
|
63
|
+
```
|
|
64
|
+
|
|
65
|
+
## Minimal AI Rule
|
|
66
|
+
|
|
67
|
+
```md
|
|
68
|
+
Use CherryDocs.
|
|
69
|
+
- On start: call `onboard()`.
|
|
70
|
+
- Work normally.
|
|
71
|
+
- Use `answer()` when history could change the decision.
|
|
72
|
+
- Use `log_activity()` when something important would otherwise be lost.
|
|
73
|
+
```
|
|
74
|
+
|
|
75
|
+
The canonical source for generated agent rules is [docs/agent_protocol.toml](docs/agent_protocol.toml).
|
|
76
|
+
|
|
77
|
+
## Workflow
|
|
78
|
+
|
|
79
|
+
In a new session:
|
|
80
|
+
|
|
81
|
+
1. Claude calls `onboard()` — gets top memories + recent session state
|
|
82
|
+
2. Work happens normally; hooks capture tool use and code changes
|
|
83
|
+
3. On git commit, auto-distillation fires via Ollama
|
|
84
|
+
4. Ask `answer("Why did we change this?")` in any future session
|
|
85
|
+
|
|
86
|
+
## What Works Today
|
|
87
|
+
|
|
88
|
+
- Local file-backed promoted memory (no cloud, no graph DB)
|
|
89
|
+
- MCP stdio server with 4 tools
|
|
90
|
+
- Claude Code hook-based passive capture
|
|
91
|
+
- Ollama distillation pipeline (per-session + commit-triggered)
|
|
92
|
+
- `cherry eval` — heuristic + LLM judge for memory quality
|
|
93
|
+
- `cherry why <file>` — show memories anchored to commits touching a file
|
|
94
|
+
|
|
95
|
+
## Development
|
|
96
|
+
|
|
97
|
+
```bash
|
|
98
|
+
pip install -e .
|
|
99
|
+
python -m pytest tests/ -q
|
|
100
|
+
python scripts/check_size_limits.py
|
|
101
|
+
```
|
|
102
|
+
|
|
103
|
+
For PR hardening:
|
|
104
|
+
|
|
105
|
+
```bash
|
|
106
|
+
bash scripts/local_pr_gate.sh fast
|
|
107
|
+
```
|
|
108
|
+
|
|
109
|
+
## Documentation
|
|
110
|
+
|
|
111
|
+
- [Product Brief](docs/PRODUCT_BRIEF.md)
|
|
112
|
+
- [System Deep Dive](docs/SYSTEM_DEEP_DIVE.md)
|
|
113
|
+
|
|
114
|
+
> Would another AI actually want to keep this on because it helps achieve the goal?
|
|
File without changes
|
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
from typing import Optional
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
def normalize_project_id(project_id: Optional[str], default: str = "default-project") -> str:
|
|
5
|
+
"""
|
|
6
|
+
Normalize repository/project identifiers to the internal dashed format.
|
|
7
|
+
Examples:
|
|
8
|
+
- github.com/owner/repo -> owner-repo
|
|
9
|
+
- https://github.com/owner/repo.git -> owner-repo
|
|
10
|
+
- git@github.com:owner/repo.git -> owner-repo
|
|
11
|
+
"""
|
|
12
|
+
if not project_id:
|
|
13
|
+
return default
|
|
14
|
+
|
|
15
|
+
normalized = project_id.strip()
|
|
16
|
+
if not normalized:
|
|
17
|
+
return default
|
|
18
|
+
|
|
19
|
+
normalized = normalized.replace("https://", "").replace("http://", "")
|
|
20
|
+
normalized = normalized.replace("git@github.com:", "").replace("github.com/", "")
|
|
21
|
+
normalized = normalized.removesuffix(".git")
|
|
22
|
+
normalized = normalized.strip("/")
|
|
23
|
+
normalized = normalized.replace("/", "-")
|
|
24
|
+
return normalized.lower() or default
|
|
File without changes
|
|
@@ -0,0 +1,59 @@
|
|
|
1
|
+
"""Canonical agent protocol rendering for all supported client rule files."""
|
|
2
|
+
from __future__ import annotations
|
|
3
|
+
|
|
4
|
+
import tomllib
|
|
5
|
+
from hashlib import sha256
|
|
6
|
+
from pathlib import Path
|
|
7
|
+
|
|
8
|
+
ROOT_DIR = Path(__file__).resolve().parents[2]
|
|
9
|
+
PROTOCOL_PATH = ROOT_DIR / "docs" / "agent_protocol.toml"
|
|
10
|
+
PROTOCOL_SOURCE = "docs/agent_protocol.toml"
|
|
11
|
+
PROMPT_OUTPUTS = {
|
|
12
|
+
".claude/CLAUDE.md": "claude",
|
|
13
|
+
"AGENTS.md": "agents",
|
|
14
|
+
"GEMINI.md": "gemini",
|
|
15
|
+
".cursorrules": "cursorrules",
|
|
16
|
+
".cursor/rules/cherrydocs.mdc": "cursor_mdc",
|
|
17
|
+
}
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
def _load_protocol() -> tuple[dict, str]:
|
|
21
|
+
raw = PROTOCOL_PATH.read_text(encoding="utf-8")
|
|
22
|
+
return tomllib.loads(raw), raw
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
def _protocol_metadata(protocol: dict, raw: str) -> dict[str, str]:
|
|
26
|
+
return {
|
|
27
|
+
"source": PROTOCOL_SOURCE,
|
|
28
|
+
"version": str(protocol["meta"]["version"]),
|
|
29
|
+
"hash": sha256(raw.encode("utf-8")).hexdigest()[:12],
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
def _generated_comment(version: str, protocol_hash: str) -> str:
|
|
34
|
+
return f"<!-- Generated from {PROTOCOL_SOURCE} version={version} hash={protocol_hash}; do not edit by hand. -->"
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
def _render_body(protocol: dict, version: str, protocol_hash: str) -> str:
|
|
38
|
+
meta = protocol["meta"]
|
|
39
|
+
shared = protocol["shared"]
|
|
40
|
+
lines = [_generated_comment(version, protocol_hash), meta["title"], ""]
|
|
41
|
+
lines.extend(f"- {bullet}" for bullet in shared["bullets"])
|
|
42
|
+
return "\n".join(lines).rstrip() + "\n"
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
def render_platform_prompt(platform: str) -> str:
|
|
46
|
+
protocol, raw = _load_protocol()
|
|
47
|
+
meta = _protocol_metadata(protocol, raw)
|
|
48
|
+
body = _render_body(protocol, meta["version"], meta["hash"])
|
|
49
|
+
if platform == "claude":
|
|
50
|
+
return body
|
|
51
|
+
if platform in {"agents", "gemini", "cursorrules"}:
|
|
52
|
+
return f"# Project Rules\n\n{body}"
|
|
53
|
+
if platform == "cursor_mdc":
|
|
54
|
+
return "---\ndescription: CherryDocs project protocol\nalwaysApply: true\n---\n\n" + body
|
|
55
|
+
raise ValueError(f"Unsupported platform: {platform}")
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
def build_prompt_file_map() -> dict[str, str]:
|
|
59
|
+
return {path: render_platform_prompt(platform) for path, platform in PROMPT_OUTPUTS.items()}
|
|
@@ -0,0 +1,245 @@
|
|
|
1
|
+
"""Background-friendly auto-promotion for captured AI sessions."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import hashlib
|
|
6
|
+
import json
|
|
7
|
+
from datetime import UTC, datetime
|
|
8
|
+
from pathlib import Path
|
|
9
|
+
|
|
10
|
+
from pydantic import BaseModel, ConfigDict, Field
|
|
11
|
+
|
|
12
|
+
from app.repo_scope import normalize_project_id
|
|
13
|
+
from app.services.capture_core import LocalCaptureBuffer
|
|
14
|
+
from app.services.internal_memory_agent import MemoryModelProvider
|
|
15
|
+
from app.services.memory_providers import resolve_provider
|
|
16
|
+
from app.services.promoted_memory_pipeline import run_session_promotion
|
|
17
|
+
from app.services.promoted_memory_store import DEFAULT_PROMOTED_ROOT, LocalPromotedMemoryStore
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
class AutoPromotionPolicy(BaseModel):
|
|
21
|
+
model_config = ConfigDict(extra="ignore")
|
|
22
|
+
|
|
23
|
+
min_event_count: int = 3
|
|
24
|
+
min_candidate_confidence: float = 0.8
|
|
25
|
+
max_sessions: int = 10
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
class AutoPromotionState(BaseModel):
|
|
29
|
+
model_config = ConfigDict(extra="ignore")
|
|
30
|
+
|
|
31
|
+
session_id: str
|
|
32
|
+
project_id: str
|
|
33
|
+
signature: str
|
|
34
|
+
event_count: int
|
|
35
|
+
last_event_timestamp: str | None = None
|
|
36
|
+
last_promoted_at: str = Field(default_factory=lambda: datetime.now(UTC).isoformat())
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
class AutoPromotionSessionResult(BaseModel):
|
|
40
|
+
model_config = ConfigDict(extra="ignore")
|
|
41
|
+
|
|
42
|
+
session_id: str
|
|
43
|
+
action: str
|
|
44
|
+
reason: str = ""
|
|
45
|
+
promoted_count: int = 0
|
|
46
|
+
highlights: list[str] = Field(default_factory=list)
|
|
47
|
+
distillation_trace: dict[str, object] | None = None
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
class AutoPromotionRunReport(BaseModel):
|
|
51
|
+
model_config = ConfigDict(extra="ignore")
|
|
52
|
+
|
|
53
|
+
project_id: str
|
|
54
|
+
processed: list[AutoPromotionSessionResult] = Field(default_factory=list)
|
|
55
|
+
skipped: list[AutoPromotionSessionResult] = Field(default_factory=list)
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
def _state_dir(buffer_dir: str | Path) -> Path:
|
|
59
|
+
return Path(buffer_dir).expanduser().resolve() / ".promotion-state"
|
|
60
|
+
|
|
61
|
+
|
|
62
|
+
def _state_path(buffer_dir: str | Path, session_id: str) -> Path:
|
|
63
|
+
safe = session_id.replace("/", "_").replace("\\", "_")
|
|
64
|
+
return _state_dir(buffer_dir) / f"{safe}.json"
|
|
65
|
+
|
|
66
|
+
|
|
67
|
+
def _load_state(buffer_dir: str | Path, session_id: str) -> AutoPromotionState | None:
|
|
68
|
+
path = _state_path(buffer_dir, session_id)
|
|
69
|
+
if not path.exists():
|
|
70
|
+
return None
|
|
71
|
+
try:
|
|
72
|
+
payload = json.loads(path.read_text(encoding="utf-8"))
|
|
73
|
+
return AutoPromotionState.model_validate(payload)
|
|
74
|
+
except Exception:
|
|
75
|
+
return None
|
|
76
|
+
|
|
77
|
+
|
|
78
|
+
def _save_state(buffer_dir: str | Path, state: AutoPromotionState) -> None:
|
|
79
|
+
path = _state_path(buffer_dir, state.session_id)
|
|
80
|
+
path.parent.mkdir(parents=True, exist_ok=True)
|
|
81
|
+
path.write_text(json.dumps(state.model_dump(mode="json"), indent=2), encoding="utf-8")
|
|
82
|
+
|
|
83
|
+
|
|
84
|
+
def list_capture_sessions(buffer_dir: str | Path) -> list[str]:
|
|
85
|
+
root = Path(buffer_dir).expanduser().resolve()
|
|
86
|
+
if not root.exists():
|
|
87
|
+
return []
|
|
88
|
+
return [
|
|
89
|
+
path.stem
|
|
90
|
+
for path in sorted(root.glob("*.jsonl"), key=lambda p: p.stat().st_mtime, reverse=True)
|
|
91
|
+
]
|
|
92
|
+
|
|
93
|
+
|
|
94
|
+
def _session_signature(events: list[dict]) -> str:
|
|
95
|
+
relevant = [
|
|
96
|
+
{
|
|
97
|
+
"event_type": event.get("event_type"),
|
|
98
|
+
"timestamp": event.get("timestamp"),
|
|
99
|
+
"text": str(event.get("text") or "")[:400],
|
|
100
|
+
"command": event.get("command"),
|
|
101
|
+
"exit_code": event.get("exit_code"),
|
|
102
|
+
}
|
|
103
|
+
for event in events
|
|
104
|
+
]
|
|
105
|
+
payload = json.dumps(relevant, sort_keys=True, ensure_ascii=False)
|
|
106
|
+
return hashlib.sha1(payload.encode("utf-8"), usedforsecurity=False).hexdigest()
|
|
107
|
+
|
|
108
|
+
|
|
109
|
+
def _new_state(project_id: str, session_id: str, events: list[dict]) -> AutoPromotionState:
|
|
110
|
+
timestamps = [
|
|
111
|
+
str(event.get("timestamp") or "").strip()
|
|
112
|
+
for event in events
|
|
113
|
+
if str(event.get("timestamp") or "").strip()
|
|
114
|
+
]
|
|
115
|
+
return AutoPromotionState(
|
|
116
|
+
session_id=session_id,
|
|
117
|
+
project_id=project_id,
|
|
118
|
+
signature=_session_signature(events),
|
|
119
|
+
event_count=len(events),
|
|
120
|
+
last_event_timestamp=timestamps[-1] if timestamps else None,
|
|
121
|
+
)
|
|
122
|
+
|
|
123
|
+
|
|
124
|
+
def _session_matches_scope(events: list[dict], *, project_id: str, branch: str | None) -> bool:
|
|
125
|
+
if not events:
|
|
126
|
+
return False
|
|
127
|
+
normalized_project_id = normalize_project_id(project_id)
|
|
128
|
+
repos = {
|
|
129
|
+
normalize_project_id(str(event.get("repo") or ""))
|
|
130
|
+
for event in events
|
|
131
|
+
if str(event.get("repo") or "").strip()
|
|
132
|
+
}
|
|
133
|
+
if repos:
|
|
134
|
+
if normalized_project_id not in repos:
|
|
135
|
+
return False
|
|
136
|
+
else:
|
|
137
|
+
# No repo field — fall back to cwd directory name match
|
|
138
|
+
cwds = {
|
|
139
|
+
normalize_project_id(Path(str(event.get("cwd") or "")).name)
|
|
140
|
+
for event in events
|
|
141
|
+
if str(event.get("cwd") or "").strip()
|
|
142
|
+
}
|
|
143
|
+
if cwds and normalized_project_id not in cwds:
|
|
144
|
+
return False
|
|
145
|
+
if branch:
|
|
146
|
+
branches = {
|
|
147
|
+
str(event.get("branch") or "").strip()
|
|
148
|
+
for event in events
|
|
149
|
+
if str(event.get("branch") or "").strip()
|
|
150
|
+
}
|
|
151
|
+
if branches and branch not in branches:
|
|
152
|
+
return False
|
|
153
|
+
return True
|
|
154
|
+
|
|
155
|
+
|
|
156
|
+
def auto_promote_captured_sessions(
|
|
157
|
+
*,
|
|
158
|
+
project_id: str,
|
|
159
|
+
buffer_dir: str | Path = ".cherrydocs/capture",
|
|
160
|
+
promoted_root: str | Path = DEFAULT_PROMOTED_ROOT,
|
|
161
|
+
provider: MemoryModelProvider | None = None,
|
|
162
|
+
project_hint: str | None = None,
|
|
163
|
+
branch: str | None = None,
|
|
164
|
+
commit: str | None = None,
|
|
165
|
+
policy: AutoPromotionPolicy | None = None,
|
|
166
|
+
memory_profile: str | None = None,
|
|
167
|
+
) -> AutoPromotionRunReport:
|
|
168
|
+
resolved_policy = policy or AutoPromotionPolicy()
|
|
169
|
+
resolved_provider = provider or resolve_provider()
|
|
170
|
+
buffer = LocalCaptureBuffer(buffer_dir)
|
|
171
|
+
store = LocalPromotedMemoryStore(promoted_root)
|
|
172
|
+
sessions = list_capture_sessions(buffer_dir)[: resolved_policy.max_sessions]
|
|
173
|
+
|
|
174
|
+
existing_records = [
|
|
175
|
+
r for r in store.load_records(project_id)
|
|
176
|
+
if not branch or not r.branch or r.branch == branch
|
|
177
|
+
]
|
|
178
|
+
processed: list[AutoPromotionSessionResult] = []
|
|
179
|
+
skipped: list[AutoPromotionSessionResult] = []
|
|
180
|
+
|
|
181
|
+
for session_id in sessions:
|
|
182
|
+
events = buffer.read(session_id)
|
|
183
|
+
if not _session_matches_scope(events, project_id=project_id, branch=branch):
|
|
184
|
+
skipped.append(AutoPromotionSessionResult(
|
|
185
|
+
session_id=session_id, action="skip",
|
|
186
|
+
reason="session outside requested project/branch scope",
|
|
187
|
+
))
|
|
188
|
+
continue
|
|
189
|
+
if len(events) < resolved_policy.min_event_count:
|
|
190
|
+
skipped.append(AutoPromotionSessionResult(
|
|
191
|
+
session_id=session_id, action="skip",
|
|
192
|
+
reason=f"too few events ({len(events)} < {resolved_policy.min_event_count})",
|
|
193
|
+
))
|
|
194
|
+
continue
|
|
195
|
+
|
|
196
|
+
state = _load_state(buffer_dir, session_id)
|
|
197
|
+
current_state = _new_state(project_id, session_id, events)
|
|
198
|
+
if state and state.project_id == project_id and state.signature == current_state.signature:
|
|
199
|
+
skipped.append(AutoPromotionSessionResult(
|
|
200
|
+
session_id=session_id, action="skip",
|
|
201
|
+
reason="no new captured evidence since last promotion",
|
|
202
|
+
))
|
|
203
|
+
continue
|
|
204
|
+
|
|
205
|
+
report = run_session_promotion(
|
|
206
|
+
events=events,
|
|
207
|
+
session_id=session_id,
|
|
208
|
+
project_id=project_id,
|
|
209
|
+
provider=resolved_provider,
|
|
210
|
+
project_hint=project_hint,
|
|
211
|
+
branch=branch,
|
|
212
|
+
commit=commit,
|
|
213
|
+
existing_records=existing_records,
|
|
214
|
+
min_confidence=resolved_policy.min_candidate_confidence,
|
|
215
|
+
memory_profile=memory_profile,
|
|
216
|
+
)
|
|
217
|
+
session_records = [r for r in report.session_records if r.memory_type != "noise"]
|
|
218
|
+
if not session_records:
|
|
219
|
+
skipped.append(AutoPromotionSessionResult(
|
|
220
|
+
session_id=session_id, action="skip",
|
|
221
|
+
reason="no high-confidence durable memory candidates",
|
|
222
|
+
))
|
|
223
|
+
_save_state(buffer_dir, current_state)
|
|
224
|
+
continue
|
|
225
|
+
|
|
226
|
+
existing_records = store.upsert_records(project_id, report.promotion.records)
|
|
227
|
+
_save_state(buffer_dir, current_state)
|
|
228
|
+
processed.append(AutoPromotionSessionResult(
|
|
229
|
+
session_id=session_id,
|
|
230
|
+
action="promote",
|
|
231
|
+
promoted_count=len(session_records),
|
|
232
|
+
highlights=[r.summary for r in session_records[:3]],
|
|
233
|
+
distillation_trace=report.distillation_trace.model_dump(mode="json"),
|
|
234
|
+
))
|
|
235
|
+
|
|
236
|
+
return AutoPromotionRunReport(project_id=project_id, processed=processed, skipped=skipped)
|
|
237
|
+
|
|
238
|
+
|
|
239
|
+
__all__ = [
|
|
240
|
+
"AutoPromotionPolicy",
|
|
241
|
+
"AutoPromotionRunReport",
|
|
242
|
+
"AutoPromotionSessionResult",
|
|
243
|
+
"auto_promote_captured_sessions",
|
|
244
|
+
"list_capture_sessions",
|
|
245
|
+
]
|
|
@@ -0,0 +1,89 @@
|
|
|
1
|
+
"""Shared append helpers for capture integrations."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from pathlib import Path
|
|
6
|
+
from typing import Any
|
|
7
|
+
|
|
8
|
+
from app.services.capture_core import (
|
|
9
|
+
CaptureEvent,
|
|
10
|
+
CaptureEventType,
|
|
11
|
+
LocalCaptureBuffer,
|
|
12
|
+
build_capture_event,
|
|
13
|
+
)
|
|
14
|
+
|
|
15
|
+
_TEST_COMMAND_MARKERS = (
|
|
16
|
+
"pytest",
|
|
17
|
+
"pnpm test",
|
|
18
|
+
"npm test",
|
|
19
|
+
"yarn test",
|
|
20
|
+
"bun test",
|
|
21
|
+
"vitest",
|
|
22
|
+
"jest",
|
|
23
|
+
"go test",
|
|
24
|
+
"cargo test",
|
|
25
|
+
"mix test",
|
|
26
|
+
"rspec",
|
|
27
|
+
"phpunit",
|
|
28
|
+
)
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
def infer_capture_event_type(*, tool_name: str | None = None, command: str | None = None) -> CaptureEventType:
|
|
32
|
+
normalized_command = " ".join(str(command or "").split()).lower()
|
|
33
|
+
if normalized_command and any(marker in normalized_command for marker in _TEST_COMMAND_MARKERS):
|
|
34
|
+
return CaptureEventType.TEST_RESULT
|
|
35
|
+
if str(tool_name or "").strip() == "Bash":
|
|
36
|
+
return CaptureEventType.SHELL_RESULT
|
|
37
|
+
return CaptureEventType.TOOL_RESULT
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
def enrich_capture_metadata(
|
|
41
|
+
*,
|
|
42
|
+
event_type: CaptureEventType,
|
|
43
|
+
command: str | None = None,
|
|
44
|
+
exit_code: int | None = None,
|
|
45
|
+
metadata: dict[str, Any] | None = None,
|
|
46
|
+
) -> dict[str, Any]:
|
|
47
|
+
enriched = dict(metadata or {})
|
|
48
|
+
if event_type == CaptureEventType.TEST_RESULT:
|
|
49
|
+
enriched.setdefault("capture_kind", "verification")
|
|
50
|
+
enriched.setdefault("verification_kind", "test")
|
|
51
|
+
if exit_code is not None:
|
|
52
|
+
enriched.setdefault("verification_status", "passed" if exit_code == 0 else "failed")
|
|
53
|
+
elif event_type == CaptureEventType.SHELL_RESULT and command:
|
|
54
|
+
enriched.setdefault("capture_kind", "command")
|
|
55
|
+
return enriched
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
def append_capture_event(
|
|
59
|
+
*,
|
|
60
|
+
buffer_dir: str | Path,
|
|
61
|
+
source: str,
|
|
62
|
+
event_type: CaptureEventType,
|
|
63
|
+
session_id: str,
|
|
64
|
+
cwd: str,
|
|
65
|
+
text: str | None = None,
|
|
66
|
+
files: list[str] | None = None,
|
|
67
|
+
command: str | None = None,
|
|
68
|
+
exit_code: int | None = None,
|
|
69
|
+
metadata: dict[str, Any] | None = None,
|
|
70
|
+
) -> CaptureEvent:
|
|
71
|
+
merged_metadata = enrich_capture_metadata(
|
|
72
|
+
event_type=event_type,
|
|
73
|
+
command=command,
|
|
74
|
+
exit_code=exit_code,
|
|
75
|
+
metadata=metadata,
|
|
76
|
+
)
|
|
77
|
+
event = build_capture_event(
|
|
78
|
+
source=source,
|
|
79
|
+
event_type=event_type,
|
|
80
|
+
session_id=session_id,
|
|
81
|
+
cwd=cwd,
|
|
82
|
+
text=text,
|
|
83
|
+
files=files,
|
|
84
|
+
command=command,
|
|
85
|
+
exit_code=exit_code,
|
|
86
|
+
metadata=merged_metadata,
|
|
87
|
+
)
|
|
88
|
+
LocalCaptureBuffer(buffer_dir).append(event)
|
|
89
|
+
return event
|