cherry-docs 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (42) hide show
  1. app/__init__.py +0 -0
  2. app/repo_scope.py +24 -0
  3. app/services/__init__.py +0 -0
  4. app/services/agent_protocol.py +59 -0
  5. app/services/auto_promote_sessions.py +245 -0
  6. app/services/capture_adapters.py +89 -0
  7. app/services/capture_core.py +164 -0
  8. app/services/internal_memory_agent.py +214 -0
  9. app/services/memory_evidence.py +89 -0
  10. app/services/memory_extraction_normalize.py +134 -0
  11. app/services/memory_lifecycle.py +258 -0
  12. app/services/memory_profiles.py +88 -0
  13. app/services/memory_providers.py +113 -0
  14. app/services/memory_retrieval.py +327 -0
  15. app/services/memory_retrieval_scoring.py +106 -0
  16. app/services/memory_retrieval_text.py +113 -0
  17. app/services/memory_similarity.py +135 -0
  18. app/services/privacy.py +72 -0
  19. app/services/promoted_memory_answer.py +157 -0
  20. app/services/promoted_memory_pipeline.py +194 -0
  21. app/services/promoted_memory_store.py +57 -0
  22. cherry_docs-0.2.0.dist-info/METADATA +143 -0
  23. cherry_docs-0.2.0.dist-info/RECORD +42 -0
  24. cherry_docs-0.2.0.dist-info/WHEEL +5 -0
  25. cherry_docs-0.2.0.dist-info/entry_points.txt +4 -0
  26. cherry_docs-0.2.0.dist-info/top_level.txt +3 -0
  27. cherrydocs/__init__.py +3 -0
  28. cherrydocs/cli.py +213 -0
  29. cherrydocs/hook.py +27 -0
  30. cherrydocs/mcp.py +22 -0
  31. scripts/__init__.py +0 -0
  32. scripts/auto_promote_capture.py +63 -0
  33. scripts/check_size_limits.py +115 -0
  34. scripts/ci_auto_capture.py +289 -0
  35. scripts/claude_hooks/__init__.py +0 -0
  36. scripts/claude_hooks/state_manager.py +526 -0
  37. scripts/coverage_regression_gate.py +121 -0
  38. scripts/eval_projects.py +247 -0
  39. scripts/install.py +212 -0
  40. scripts/pr_gate_report.py +282 -0
  41. scripts/promptfoo_regression_gate.py +176 -0
  42. scripts/render_agent_prompts.py +57 -0
@@ -0,0 +1,282 @@
1
+ #!/usr/bin/env python3
2
+ """Render a markdown + JSON PR gate report from CI artifacts."""
3
+
4
+ from __future__ import annotations
5
+
6
+ import argparse
7
+ import json
8
+ import os
9
+ import xml.etree.ElementTree as ET
10
+ from pathlib import Path
11
+ from typing import Any
12
+
13
+
14
+ def _load_json(path: Path) -> Any:
15
+ return json.loads(path.read_text()) if path.exists() else None
16
+
17
+
18
+ def _job_result(name: str) -> str:
19
+ return os.getenv(name, "missing").strip() or "missing"
20
+
21
+
22
+ def _summarize_coverage(path: Path) -> dict[str, Any] | None:
23
+ if not path.exists():
24
+ return None
25
+ root = ET.fromstring(path.read_text())
26
+ return {
27
+ "line_rate": round(float(root.attrib.get("line-rate", 0.0)) * 100, 2),
28
+ "branch_rate": round(float(root.attrib.get("branch-rate", 0.0)) * 100, 2),
29
+ "lines_covered": int(root.attrib.get("lines-covered", 0)),
30
+ "lines_valid": int(root.attrib.get("lines-valid", 0)),
31
+ }
32
+
33
+
34
+ def _summarize_coverage_regression(path: Path) -> dict[str, Any] | None:
35
+ data = _load_json(path)
36
+ if not data:
37
+ return None
38
+ return {
39
+ "baseline_line_rate": data.get("baseline_line_rate"),
40
+ "candidate_line_rate": data.get("candidate_line_rate"),
41
+ "line_rate_drop": data.get("line_rate_drop"),
42
+ "baseline_branch_rate": data.get("baseline_branch_rate"),
43
+ "candidate_branch_rate": data.get("candidate_branch_rate"),
44
+ "branch_rate_drop": data.get("branch_rate_drop"),
45
+ "errors": data.get("errors") or [],
46
+ }
47
+
48
+
49
+ def _summarize_jscpd(path: Path) -> dict[str, Any] | None:
50
+ data = _load_json(path)
51
+ if not data:
52
+ return None
53
+ total = data.get("statistics", {}).get("total", {})
54
+ return {
55
+ "files": total.get("sources"),
56
+ "clones": total.get("clones"),
57
+ "percentage": round(float(total.get("percentage", 0.0)), 2),
58
+ "duplicated_lines": total.get("duplicatedLines"),
59
+ }
60
+
61
+
62
+ def _summarize_bandit(path: Path) -> dict[str, Any] | None:
63
+ data = _load_json(path)
64
+ if not data:
65
+ return None
66
+ results = data.get("results", [])
67
+ metrics = data.get("metrics", {}).get("_totals", {})
68
+ return {
69
+ "findings": len(results),
70
+ "high": metrics.get("SEVERITY.HIGH", 0),
71
+ "medium": metrics.get("SEVERITY.MEDIUM", 0),
72
+ "low": metrics.get("SEVERITY.LOW", 0),
73
+ }
74
+
75
+
76
+ def _summarize_pip_audit(path: Path) -> dict[str, Any] | None:
77
+ data = _load_json(path)
78
+ if data is None:
79
+ return None
80
+ dependencies = data.get("dependencies", []) if isinstance(data, dict) else data
81
+ vulns = 0
82
+ affected = 0
83
+ for dep in dependencies or []:
84
+ vulnerabilities = dep.get("vulns") or dep.get("vulnerabilities") or []
85
+ if vulnerabilities:
86
+ affected += 1
87
+ vulns += len(vulnerabilities)
88
+ return {"affected_dependencies": affected, "vulnerabilities": vulns}
89
+
90
+
91
+ def _summarize_gitleaks(path: Path) -> dict[str, Any] | None:
92
+ data = _load_json(path)
93
+ if data is None:
94
+ return None
95
+ return {"findings": len(data)}
96
+
97
+
98
+ def _summarize_semgrep(path: Path) -> dict[str, Any] | None:
99
+ data = _load_json(path)
100
+ if not data:
101
+ return None
102
+ results = data.get("results", [])
103
+ severities = {"ERROR": 0, "WARNING": 0, "INFO": 0}
104
+ for finding in results:
105
+ severity = ((finding.get("extra") or {}).get("severity") or "INFO").upper()
106
+ severities[severity] = severities.get(severity, 0) + 1
107
+ return {
108
+ "findings": len(results),
109
+ "errors": len(data.get("errors", [])),
110
+ "error": severities.get("ERROR", 0),
111
+ "warning": severities.get("WARNING", 0),
112
+ "info": severities.get("INFO", 0),
113
+ }
114
+
115
+
116
+ def _summarize_promptfoo(root: Path) -> dict[str, Any] | None:
117
+ combined = _load_json(root / "combined-summary.json")
118
+ if not combined:
119
+ return None
120
+ reports = {}
121
+ failing_cases: list[dict[str, Any]] = []
122
+ for path in root.glob("*-regression.json"):
123
+ reports[path.name] = _load_json(path)
124
+ for suite_name, suite in (combined.get("suites") or {}).items():
125
+ suite_summary = suite.get("summary") or {}
126
+ for case in suite_summary.get("failing_cases") or []:
127
+ failing_cases.append({"suite": suite_name, **case})
128
+ return {
129
+ "overall_score": round(float(combined.get("overall_score", 0.0)), 3),
130
+ "passing_cases": combined.get("passing_cases", 0),
131
+ "failing_cases": combined.get("failing_cases", 0),
132
+ "suite_count": len(combined.get("suites", {})),
133
+ "regressions": {
134
+ name: len((report or {}).get("regressions", []))
135
+ for name, report in reports.items()
136
+ },
137
+ "regression_details": {
138
+ name: (report or {}).get("regressions", [])
139
+ for name, report in reports.items()
140
+ },
141
+ "failing_case_details": failing_cases,
142
+ "raw": combined,
143
+ }
144
+
145
+
146
+ def _render_markdown(summary: dict[str, Any]) -> str:
147
+ lines = ["## CherryDocs PR Gate", ""]
148
+ lines.append("### Job results")
149
+ for label, value in summary["job_results"].items():
150
+ lines.append(f"- `{label}`: **{value}**")
151
+ lines.append("")
152
+
153
+ coverage = summary.get("coverage")
154
+ if coverage:
155
+ lines.append("### Tests / coverage")
156
+ lines.append(
157
+ f"- coverage: **{coverage['line_rate']:.2f}%** line, **{coverage['branch_rate']:.2f}%** branch"
158
+ )
159
+ lines.append(
160
+ f"- covered lines: `{coverage['lines_covered']}` / `{coverage['lines_valid']}`"
161
+ )
162
+ lines.append("")
163
+
164
+ coverage_regression = summary.get("coverage_regression")
165
+ if coverage_regression:
166
+ lines.append("### Coverage regression")
167
+ lines.append(
168
+ f"- baseline vs candidate: **{coverage_regression['baseline_line_rate']:.2f}%** -> **{coverage_regression['candidate_line_rate']:.2f}%** line"
169
+ )
170
+ lines.append(
171
+ f"- line delta: `{coverage_regression['line_rate_drop']:.2f}` points; branch delta: `{coverage_regression['branch_rate_drop']:.2f}` points"
172
+ )
173
+ if coverage_regression["errors"]:
174
+ lines.append("- blocking coverage errors:")
175
+ for error in coverage_regression["errors"]:
176
+ lines.append(f" - {error}")
177
+ lines.append("")
178
+
179
+ promptfoo = summary.get("promptfoo")
180
+ if promptfoo:
181
+ lines.append("### Promptfoo")
182
+ lines.append(
183
+ f"- overall score: **{promptfoo['overall_score']:.3f}** across `{promptfoo['suite_count']}` suite(s)"
184
+ )
185
+ lines.append(
186
+ f"- cases: `{promptfoo['passing_cases']}` passing / `{promptfoo['failing_cases']}` failing"
187
+ )
188
+ if promptfoo["regressions"]:
189
+ for name, count in promptfoo["regressions"].items():
190
+ lines.append(f"- regressions `{name}`: `{count}`")
191
+ if promptfoo.get("failing_case_details"):
192
+ lines.append("- failing cases:")
193
+ for case in promptfoo["failing_case_details"][:5]:
194
+ reason = case.get("failure_reason") or "no reason captured"
195
+ lines.append(
196
+ f" - `{case.get('suite')}` / {case.get('description', 'unknown case')}: {reason}"
197
+ )
198
+ for name, regressions in (promptfoo.get("regression_details") or {}).items():
199
+ for regression in regressions[:5]:
200
+ lines.append(
201
+ f" - regression `{name}`: {regression.get('description', 'unknown case')}"
202
+ )
203
+ lines.append("")
204
+
205
+ duplication = summary.get("duplication")
206
+ if duplication:
207
+ lines.append("### Duplication")
208
+ lines.append(
209
+ f"- duplicated lines: **{duplication['percentage']:.2f}%** (`{duplication['duplicated_lines']}` lines across `{duplication['clones']}` clones)"
210
+ )
211
+ lines.append("")
212
+
213
+ security = summary.get("security")
214
+ if security:
215
+ lines.append("### Security")
216
+ bandit = security.get("bandit")
217
+ if bandit:
218
+ lines.append(
219
+ f"- bandit findings: `{bandit['findings']}` (high `{bandit['high']}`, medium `{bandit['medium']}`, low `{bandit['low']}`)"
220
+ )
221
+ pip_audit = security.get("pip_audit")
222
+ if pip_audit:
223
+ lines.append(
224
+ f"- pip-audit: `{pip_audit['affected_dependencies']}` affected dependencies / `{pip_audit['vulnerabilities']}` vulnerabilities"
225
+ )
226
+ gitleaks = security.get("gitleaks")
227
+ if gitleaks:
228
+ lines.append(f"- gitleaks findings: `{gitleaks['findings']}`")
229
+ semgrep = security.get("semgrep")
230
+ if semgrep:
231
+ lines.append(
232
+ f"- semgrep findings: `{semgrep['findings']}` (error `{semgrep['error']}`, warning `{semgrep['warning']}`, info `{semgrep['info']}`)"
233
+ )
234
+ lines.append("")
235
+
236
+ lines.append("_Generated by the canonical CherryDocs CI workflow._")
237
+ lines.append("")
238
+ return "\n".join(lines)
239
+
240
+
241
+ def main() -> int:
242
+ parser = argparse.ArgumentParser(description=__doc__)
243
+ parser.add_argument("--artifacts-root", required=True)
244
+ parser.add_argument("--output-md", required=True)
245
+ parser.add_argument("--output-json", required=True)
246
+ args = parser.parse_args()
247
+
248
+ root = Path(args.artifacts_root)
249
+ summary = {
250
+ "job_results": {
251
+ "quality_fast": _job_result("QUALITY_FAST_RESULT"),
252
+ "quality_integration": _job_result("QUALITY_INTEGRATION_RESULT"),
253
+ "security_scan": _job_result("SECURITY_SCAN_RESULT"),
254
+ "typecheck_targeted": _job_result("TYPECHECK_TARGETED_RESULT"),
255
+ "duplication_check": _job_result("DUPLICATION_CHECK_RESULT"),
256
+ "browser_e2e": _job_result("BROWSER_E2E_RESULT"),
257
+ "safety_e2e": _job_result("SAFETY_E2E_RESULT"),
258
+ "mcp_parity": _job_result("MCP_PARITY_RESULT"),
259
+ "eval_pr_local": _job_result("EVAL_PR_LOCAL_RESULT"),
260
+ },
261
+ "coverage": _summarize_coverage(root / "coverage-report" / "coverage.xml"),
262
+ "coverage_regression": _summarize_coverage_regression(
263
+ root / "coverage-report" / "coverage-regression.json"
264
+ ),
265
+ "duplication": _summarize_jscpd(root / "jscpd-report" / "jscpd-report.json"),
266
+ "promptfoo": _summarize_promptfoo(root / "promptfoo-pr-gate-results"),
267
+ "security": {
268
+ "bandit": _summarize_bandit(root / "security-scan-report" / "bandit-report.json"),
269
+ "pip_audit": _summarize_pip_audit(root / "security-scan-report" / "pip-audit-report.json"),
270
+ "gitleaks": _summarize_gitleaks(root / "security-scan-report" / "gitleaks-report.json"),
271
+ "semgrep": _summarize_semgrep(root / "security-scan-report" / "semgrep-report.json"),
272
+ },
273
+ }
274
+
275
+ Path(args.output_json).write_text(json.dumps(summary, indent=2, sort_keys=True) + "\n")
276
+ Path(args.output_md).write_text(_render_markdown(summary))
277
+ print(Path(args.output_md).read_text())
278
+ return 0
279
+
280
+
281
+ if __name__ == "__main__":
282
+ raise SystemExit(main())
@@ -0,0 +1,176 @@
1
+ #!/usr/bin/env python3
2
+ """Summarize Promptfoo output and enforce a regression gate against a baseline."""
3
+
4
+ from __future__ import annotations
5
+
6
+ import argparse
7
+ import json
8
+ from pathlib import Path
9
+ from typing import Any
10
+
11
+
12
+ def _load_json(path: str) -> dict[str, Any]:
13
+ return json.loads(Path(path).read_text())
14
+
15
+
16
+ def _normalize_eval(path: str) -> dict[str, Any]:
17
+ data = _load_json(path)
18
+ if "cases" in data and "overall_score" in data:
19
+ return data
20
+
21
+ rows = data.get("results", {}).get("results", [])
22
+ if not isinstance(rows, list):
23
+ raise ValueError(f"{path} does not look like Promptfoo output.")
24
+
25
+ cases: list[dict[str, Any]] = []
26
+ total_score = 0.0
27
+ for index, row in enumerate(rows):
28
+ test_case = row.get("testCase") or {}
29
+ description = (
30
+ test_case.get("description")
31
+ or row.get("description")
32
+ or f"case-{index}"
33
+ )
34
+ score = float(row.get("score") or 0.0)
35
+ success = bool(row.get("success"))
36
+ failure_reason = row.get("failureReason") or ""
37
+ cases.append(
38
+ {
39
+ "description": description,
40
+ "success": success,
41
+ "score": score,
42
+ "failure_reason": failure_reason,
43
+ }
44
+ )
45
+ total_score += score
46
+
47
+ overall_score = total_score / len(cases) if cases else 0.0
48
+ passing = sum(1 for case in cases if case["success"])
49
+ failing = len(cases) - passing
50
+ return {
51
+ "overall_score": overall_score,
52
+ "passing": passing,
53
+ "failing": failing,
54
+ "case_count": len(cases),
55
+ "cases": cases,
56
+ }
57
+
58
+
59
+ def _write_json(path: str | None, payload: dict[str, Any]) -> None:
60
+ if not path:
61
+ return
62
+ target = Path(path)
63
+ target.parent.mkdir(parents=True, exist_ok=True)
64
+ target.write_text(json.dumps(payload, indent=2, sort_keys=True) + "\n")
65
+
66
+
67
+ def _compare(
68
+ baseline: dict[str, Any],
69
+ candidate: dict[str, Any],
70
+ *,
71
+ min_score: float,
72
+ max_score_drop: float,
73
+ ) -> tuple[dict[str, Any], list[str]]:
74
+ errors: list[str] = []
75
+ baseline_cases = {case["description"]: case for case in baseline["cases"]}
76
+ candidate_cases = {case["description"]: case for case in candidate["cases"]}
77
+
78
+ overall_drop = float(baseline["overall_score"]) - float(candidate["overall_score"])
79
+ if float(candidate["overall_score"]) < min_score:
80
+ errors.append(
81
+ f"overall score {candidate['overall_score']:.3f} is below minimum {min_score:.3f}"
82
+ )
83
+ if overall_drop > max_score_drop:
84
+ errors.append(
85
+ f"overall score dropped by {overall_drop:.3f}, exceeding max drop {max_score_drop:.3f}"
86
+ )
87
+
88
+ regressions: list[dict[str, Any]] = []
89
+ for description, baseline_case in baseline_cases.items():
90
+ candidate_case = candidate_cases.get(description)
91
+ if candidate_case is None:
92
+ regressions.append(
93
+ {
94
+ "description": description,
95
+ "reason": "missing from candidate results",
96
+ }
97
+ )
98
+ continue
99
+ if baseline_case["success"] and not candidate_case["success"]:
100
+ regressions.append(
101
+ {
102
+ "description": description,
103
+ "reason": "pass->fail regression",
104
+ "baseline_score": baseline_case["score"],
105
+ "candidate_score": candidate_case["score"],
106
+ "candidate_failure_reason": candidate_case["failure_reason"],
107
+ }
108
+ )
109
+
110
+ if regressions:
111
+ errors.append(f"{len(regressions)} regression case(s) detected")
112
+
113
+ report = {
114
+ "baseline_overall_score": baseline["overall_score"],
115
+ "candidate_overall_score": candidate["overall_score"],
116
+ "overall_drop": overall_drop,
117
+ "min_score": min_score,
118
+ "max_score_drop": max_score_drop,
119
+ "baseline_passing": baseline["passing"],
120
+ "candidate_passing": candidate["passing"],
121
+ "baseline_failing": baseline["failing"],
122
+ "candidate_failing": candidate["failing"],
123
+ "regressions": regressions,
124
+ "errors": errors,
125
+ }
126
+ return report, errors
127
+
128
+
129
+ def main() -> int:
130
+ parser = argparse.ArgumentParser(description=__doc__)
131
+ parser.add_argument("--candidate", required=True, help="Promptfoo result JSON to summarize/compare.")
132
+ parser.add_argument("--baseline", help="Baseline summary JSON to compare against.")
133
+ parser.add_argument("--summary-output", help="Where to write the normalized summary JSON.")
134
+ parser.add_argument("--report-output", help="Where to write the comparison report JSON.")
135
+ parser.add_argument("--min-score", type=float, default=0.8)
136
+ parser.add_argument("--max-score-drop", type=float, default=0.05)
137
+ args = parser.parse_args()
138
+
139
+ candidate = _normalize_eval(args.candidate)
140
+ _write_json(args.summary_output, candidate)
141
+
142
+ print(
143
+ "Promptfoo summary:",
144
+ f"score={candidate['overall_score']:.3f}",
145
+ f"passing={candidate['passing']}",
146
+ f"failing={candidate['failing']}",
147
+ )
148
+
149
+ if not args.baseline:
150
+ return 0
151
+
152
+ baseline = _normalize_eval(args.baseline)
153
+ report, errors = _compare(
154
+ baseline,
155
+ candidate,
156
+ min_score=args.min_score,
157
+ max_score_drop=args.max_score_drop,
158
+ )
159
+ _write_json(args.report_output, report)
160
+
161
+ print(
162
+ "Promptfoo regression check:",
163
+ f"baseline={report['baseline_overall_score']:.3f}",
164
+ f"candidate={report['candidate_overall_score']:.3f}",
165
+ f"drop={report['overall_drop']:.3f}",
166
+ f"regressions={len(report['regressions'])}",
167
+ )
168
+ if errors:
169
+ for error in errors:
170
+ print(f"ERROR: {error}")
171
+ return 1
172
+ return 0
173
+
174
+
175
+ if __name__ == "__main__":
176
+ raise SystemExit(main())
@@ -0,0 +1,57 @@
1
+ #!/usr/bin/env python
2
+ """Render or check generated agent prompt files from the canonical protocol source."""
3
+ from __future__ import annotations
4
+
5
+ import argparse
6
+ from pathlib import Path
7
+ import sys
8
+
9
+ REPO_ROOT = Path(__file__).resolve().parents[1]
10
+ if str(REPO_ROOT) not in sys.path:
11
+ sys.path.insert(0, str(REPO_ROOT))
12
+
13
+ from app.services.agent_protocol import build_prompt_file_map # noqa: E402
14
+
15
+
16
+ def write_outputs(repo_root: Path) -> None:
17
+ for relative_path, content in build_prompt_file_map().items():
18
+ target = repo_root / relative_path
19
+ target.parent.mkdir(parents=True, exist_ok=True)
20
+ target.write_text(content, encoding="utf-8")
21
+ print(f"wrote {relative_path}")
22
+
23
+
24
+ def check_outputs(repo_root: Path) -> int:
25
+ mismatches = []
26
+ for relative_path, expected in build_prompt_file_map().items():
27
+ target = repo_root / relative_path
28
+ actual = target.read_text(encoding="utf-8") if target.exists() else None
29
+ if actual != expected:
30
+ mismatches.append(relative_path)
31
+ if mismatches:
32
+ print("Generated prompt files are out of date:", file=sys.stderr)
33
+ for path in mismatches:
34
+ print(f" - {path}", file=sys.stderr)
35
+ print("Run: python scripts/render_agent_prompts.py --write", file=sys.stderr)
36
+ return 1
37
+ print("Generated prompt files are in sync.")
38
+ return 0
39
+
40
+
41
+ def main() -> int:
42
+ parser = argparse.ArgumentParser()
43
+ parser.add_argument("--write", action="store_true", help="Write generated prompt files to the repo.")
44
+ parser.add_argument("--check", action="store_true", help="Fail if generated prompt files are out of sync.")
45
+ args = parser.parse_args()
46
+
47
+ if args.write == args.check:
48
+ parser.error("Choose exactly one of --write or --check")
49
+
50
+ if args.write:
51
+ write_outputs(REPO_ROOT)
52
+ return 0
53
+ return check_outputs(REPO_ROOT)
54
+
55
+
56
+ if __name__ == "__main__":
57
+ raise SystemExit(main())