cherry-docs 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (42) hide show
  1. app/__init__.py +0 -0
  2. app/repo_scope.py +24 -0
  3. app/services/__init__.py +0 -0
  4. app/services/agent_protocol.py +59 -0
  5. app/services/auto_promote_sessions.py +245 -0
  6. app/services/capture_adapters.py +89 -0
  7. app/services/capture_core.py +164 -0
  8. app/services/internal_memory_agent.py +214 -0
  9. app/services/memory_evidence.py +89 -0
  10. app/services/memory_extraction_normalize.py +134 -0
  11. app/services/memory_lifecycle.py +258 -0
  12. app/services/memory_profiles.py +88 -0
  13. app/services/memory_providers.py +113 -0
  14. app/services/memory_retrieval.py +327 -0
  15. app/services/memory_retrieval_scoring.py +106 -0
  16. app/services/memory_retrieval_text.py +113 -0
  17. app/services/memory_similarity.py +135 -0
  18. app/services/privacy.py +72 -0
  19. app/services/promoted_memory_answer.py +157 -0
  20. app/services/promoted_memory_pipeline.py +194 -0
  21. app/services/promoted_memory_store.py +57 -0
  22. cherry_docs-0.2.0.dist-info/METADATA +143 -0
  23. cherry_docs-0.2.0.dist-info/RECORD +42 -0
  24. cherry_docs-0.2.0.dist-info/WHEEL +5 -0
  25. cherry_docs-0.2.0.dist-info/entry_points.txt +4 -0
  26. cherry_docs-0.2.0.dist-info/top_level.txt +3 -0
  27. cherrydocs/__init__.py +3 -0
  28. cherrydocs/cli.py +213 -0
  29. cherrydocs/hook.py +27 -0
  30. cherrydocs/mcp.py +22 -0
  31. scripts/__init__.py +0 -0
  32. scripts/auto_promote_capture.py +63 -0
  33. scripts/check_size_limits.py +115 -0
  34. scripts/ci_auto_capture.py +289 -0
  35. scripts/claude_hooks/__init__.py +0 -0
  36. scripts/claude_hooks/state_manager.py +526 -0
  37. scripts/coverage_regression_gate.py +121 -0
  38. scripts/eval_projects.py +247 -0
  39. scripts/install.py +212 -0
  40. scripts/pr_gate_report.py +282 -0
  41. scripts/promptfoo_regression_gate.py +176 -0
  42. scripts/render_agent_prompts.py +57 -0
@@ -0,0 +1,214 @@
1
+ """Provider-agnostic internal memory-agent: prompt building, LLM extraction, chunked pipeline."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import json
6
+ from collections.abc import Iterable
7
+ from typing import Any, Protocol
8
+
9
+ from app.services.memory_extraction_normalize import (
10
+ MemoryCandidate,
11
+ MemoryExtractionResult,
12
+ _is_near_duplicate,
13
+ normalize_memory_candidates,
14
+ )
15
+ from app.services.memory_profiles import (
16
+ MemoryPromptProfile,
17
+ MemoryPromptProfileName,
18
+ resolve_memory_prompt_profile,
19
+ )
20
+ from app.services.memory_providers import AnthropicMemoryProvider, OllamaMemoryProvider
21
+
22
+ _CHUNK_SIZE = 25 # events per LLM call
23
+ _MAX_TOTAL_CANDIDATES = 12 # cap across all chunks for one session
24
+
25
+
26
+ class MemoryModelProvider(Protocol):
27
+ def extract(self, prompt: str) -> dict[str, Any]:
28
+ """Return parsed JSON-like extraction payload."""
29
+
30
+
31
+ def _trim_text(value: str, limit: int = 1200) -> str:
32
+ text = " ".join((value or "").split())
33
+ if len(text) <= limit:
34
+ return text
35
+ return f"{text[: limit - 3]}..."
36
+
37
+
38
+ def build_recent_capture_window(events: Iterable[dict[str, Any]], limit: int = _CHUNK_SIZE) -> list[dict[str, Any]]:
39
+ relevant: list[dict[str, Any]] = []
40
+ for event in events:
41
+ event_type = str(event.get("event_type") or "")
42
+ if event_type not in {
43
+ "user_prompt",
44
+ "assistant_output",
45
+ "shell_result",
46
+ "tool_result",
47
+ "remember",
48
+ "test_result",
49
+ }:
50
+ continue
51
+ text = str(event.get("text") or "").strip()
52
+ if not text and event_type not in {"shell_result", "tool_result", "test_result"}:
53
+ continue
54
+ relevant.append(event)
55
+ return relevant[-max(1, limit):]
56
+
57
+
58
+ def build_internal_memory_prompt(
59
+ events: Iterable[dict[str, Any]],
60
+ *,
61
+ project_hint: str | None = None,
62
+ existing_context: list[str] | None = None,
63
+ profile: MemoryPromptProfileName | str | None = None,
64
+ ) -> str:
65
+ prompt_profile = resolve_memory_prompt_profile(profile)
66
+ window = build_recent_capture_window(events)
67
+ event_lines: list[str] = []
68
+ for event in window:
69
+ event_type = str(event.get("event_type") or "unknown")
70
+ text = _trim_text(str(event.get("text") or ""))
71
+ command = _trim_text(str(event.get("command") or ""), 240)
72
+ exit_code = event.get("exit_code")
73
+ _meta = event.get("metadata")
74
+ metadata: dict = _meta if isinstance(_meta, dict) else {}
75
+ parts = [f"type={event_type}"]
76
+ if command:
77
+ parts.append(f"command={command}")
78
+ if exit_code is not None:
79
+ parts.append(f"exit_code={exit_code}")
80
+ verification_status = str(metadata.get("verification_status") or "").strip()
81
+ verification_kind = str(metadata.get("verification_kind") or "").strip()
82
+ if verification_kind:
83
+ parts.append(f"verification_kind={verification_kind}")
84
+ if verification_status:
85
+ parts.append(f"verification_status={verification_status}")
86
+ if text:
87
+ parts.append(f"text={text}")
88
+ event_lines.append(" | ".join(parts))
89
+
90
+ prior = "\n".join(f"- {item}" for item in (existing_context or []) if item) or "- none"
91
+ rendered_events = "\n".join(f"- {line}" for line in event_lines) or "- none"
92
+ project_label = project_hint or "current project"
93
+ keep_rules = "\n".join(f"- {line}" for line in prompt_profile.keep_rules)
94
+ extra_rules = "\n".join(f"- {line}" for line in prompt_profile.extra_rules)
95
+ extraction_bias = "\n".join(f"- {line}" for line in prompt_profile.extraction_bias)
96
+ schema = {
97
+ "candidates": [
98
+ {
99
+ "memory_type": "episodic|heuristic|procedural|factual|noise",
100
+ "kind": "attempt|decision|conclusion|warning|procedure|pattern|fact",
101
+ "summary": "short useful memory",
102
+ "rationale": "why this matters for a future AI",
103
+ "confidence": 0.0,
104
+ "should_store": True,
105
+ "sensitive": False,
106
+ }
107
+ ],
108
+ "notes": ["optional short notes"],
109
+ }
110
+ return f"""You are CherryDocs' internal memory distiller for {project_label}.
111
+
112
+ Your job is to turn recent AI work traces into useful shared project memory.
113
+
114
+ Active profile: {prompt_profile.name}
115
+ Profile intent: {prompt_profile.description}
116
+
117
+ {prompt_profile.system_focus}
118
+ {keep_rules}
119
+
120
+ Memory type rules:
121
+ - factual = what is permanently true right now: a command, path, config, API, or project state
122
+ - episodic = a concrete attempt, decision, or conclusion from this work
123
+ - heuristic = a reusable warning, hidden constraint, or recurring pattern
124
+ - procedural = an explicit step-by-step way of doing something repeatedly
125
+ - noise = routine chatter, weak summaries, or anything cheap to rediscover
126
+
127
+ Kind rules:
128
+ - fact = stable project truth: entry points, config paths, APIs, project-level invariants. NOT test results or error counts.
129
+ - attempt = "we tried X"
130
+ - decision = "we chose X over Y because..."
131
+ - conclusion = "we learned/found that..."
132
+ - warning = "avoid X because..."
133
+ - pattern = "X keeps happening when..."
134
+ - procedure = a repeatable workflow with actual steps
135
+
136
+ Important:
137
+ - Never store a fact whose value changes daily: test pass counts, lint error totals, CI status, PR review status.
138
+ - Use procedure only for explicit repeatable workflows; prefer decision for refactors/pivots.
139
+ - If a candidate is noise, set memory_type=noise and should_store=false.
140
+ - If no durable memory is present, return an empty candidates list.
141
+ - Return at most 3 non-overlapping candidates.
142
+ {extra_rules}
143
+
144
+ Prefer extracting:
145
+ {extraction_bias}
146
+
147
+ Output strict JSON only.
148
+
149
+ Prior remembered context:
150
+ {prior}
151
+
152
+ Recent captured events:
153
+ {rendered_events}
154
+
155
+ Return JSON matching this schema:
156
+ {json.dumps(schema, indent=2)}
157
+ """
158
+
159
+
160
+ def extract_memory_candidates(
161
+ events: Iterable[dict[str, Any]],
162
+ *,
163
+ provider: MemoryModelProvider,
164
+ project_hint: str | None = None,
165
+ existing_context: list[str] | None = None,
166
+ profile: MemoryPromptProfileName | str | None = None,
167
+ ) -> MemoryExtractionResult:
168
+ """Extract memories from all events in chunks, accumulating across the full session."""
169
+ all_events = list(events)
170
+ relevant = build_recent_capture_window(all_events, limit=len(all_events) or 1)
171
+ if not relevant:
172
+ return MemoryExtractionResult()
173
+
174
+ accumulated: list[Any] = []
175
+ running_context = list(existing_context or [])
176
+ notes: list[str] = []
177
+
178
+ for i in range(0, len(relevant), _CHUNK_SIZE):
179
+ chunk = relevant[i: i + _CHUNK_SIZE]
180
+ prompt = build_internal_memory_prompt(
181
+ chunk,
182
+ project_hint=project_hint,
183
+ existing_context=running_context,
184
+ profile=profile,
185
+ )
186
+ result = normalize_memory_candidates(provider.extract(prompt))
187
+ notes.extend(result.notes)
188
+ for candidate in result.candidates:
189
+ if candidate.memory_type == "noise" or not candidate.should_store:
190
+ continue
191
+ if not any(_is_near_duplicate(candidate, seen) for seen in accumulated):
192
+ accumulated.append(candidate)
193
+ running_context = [*running_context, candidate.summary]
194
+ if len(accumulated) >= _MAX_TOTAL_CANDIDATES:
195
+ break
196
+
197
+ return MemoryExtractionResult(candidates=accumulated[:_MAX_TOTAL_CANDIDATES], notes=notes)
198
+
199
+
200
+ __all__ = [
201
+ # Re-exported from sub-modules so callers import from here unchanged.
202
+ "AnthropicMemoryProvider",
203
+ "MemoryCandidate",
204
+ "MemoryExtractionResult",
205
+ "MemoryModelProvider",
206
+ "MemoryPromptProfile",
207
+ "MemoryPromptProfileName",
208
+ "OllamaMemoryProvider",
209
+ "build_internal_memory_prompt",
210
+ "build_recent_capture_window",
211
+ "extract_memory_candidates",
212
+ "normalize_memory_candidates",
213
+ "resolve_memory_prompt_profile",
214
+ ]
@@ -0,0 +1,89 @@
1
+ """Evidence linking: connect memory candidates to their supporting raw capture events."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from typing import TYPE_CHECKING
6
+
7
+ from app.services.memory_similarity import _similarity
8
+
9
+ if TYPE_CHECKING:
10
+ from app.services.memory_extraction_normalize import MemoryCandidate
11
+
12
+
13
+ def build_event_evidence_id(event: dict) -> str:
14
+ event_type = str(event.get("event_type") or "unknown")
15
+ timestamp = str(event.get("timestamp") or "unknown")
16
+ return f"{event_type}:{timestamp}"
17
+
18
+
19
+ def collect_candidate_evidence_refs(
20
+ candidate: MemoryCandidate,
21
+ events: list[dict],
22
+ *,
23
+ limit: int = 3,
24
+ ) -> list[str]:
25
+ ranked: list[tuple[float, str]] = []
26
+ summary_anchor = candidate.summary.strip()
27
+ rationale_anchor = candidate.rationale.strip()
28
+ combined_anchor = f"{summary_anchor} {rationale_anchor}".strip()
29
+ for event in events:
30
+ event_type = str(event.get("event_type") or "")
31
+ if event_type not in {"assistant_output", "remember", "tool_result", "shell_result", "test_result"}:
32
+ continue
33
+ text = str(event.get("text") or "").strip()
34
+ if not text:
35
+ continue
36
+ summary_overlap = _similarity(summary_anchor, text)
37
+ rationale_overlap = _similarity(rationale_anchor, text) if rationale_anchor else 0.0
38
+ overlap = _similarity(combined_anchor, text)
39
+ if summary_overlap < 0.35 and not (summary_overlap >= 0.2 and rationale_overlap >= 0.2):
40
+ continue
41
+ score = (summary_overlap * 1.7) + rationale_overlap + (overlap * 0.4)
42
+ if event_type == "assistant_output":
43
+ score += 0.2
44
+ ranked.append((score, build_event_evidence_id(event)))
45
+ ranked.sort(key=lambda item: item[0], reverse=True)
46
+ return [evidence_id for _, evidence_id in ranked[: max(1, limit)]]
47
+
48
+
49
+ def collect_candidate_context(
50
+ candidate: MemoryCandidate,
51
+ events: list[dict],
52
+ *,
53
+ evidence_refs: list[str] | None = None,
54
+ max_files_per_event: int = 20,
55
+ max_files_total: int = 5,
56
+ ) -> dict:
57
+ matched_ids = set(evidence_refs or collect_candidate_evidence_refs(candidate, events))
58
+ if not matched_ids:
59
+ return {"repo": None, "commit": None, "files": []}
60
+
61
+ repo: str | None = None
62
+ commit: str | None = None
63
+ weighted_files: dict[str, int] = {}
64
+ for event in events:
65
+ event_id = build_event_evidence_id(event)
66
+ if event_id not in matched_ids:
67
+ continue
68
+ if not repo:
69
+ repo = str(event.get("repo") or "").strip() or None
70
+ if not commit:
71
+ commit = str(event.get("commit") or "").strip() or None
72
+ _raw_files = event.get("files")
73
+ event_files = [
74
+ str(p).strip()
75
+ for p in (_raw_files if isinstance(_raw_files, list) else [])
76
+ if str(p).strip()
77
+ ]
78
+ if not event_files or len(event_files) > max_files_per_event:
79
+ continue
80
+ for path in event_files:
81
+ weighted_files[path] = weighted_files.get(path, 0) + 1
82
+
83
+ files = [
84
+ path
85
+ for path, _count in sorted(
86
+ weighted_files.items(), key=lambda item: (-item[1], item[0])
87
+ )[:max_files_total]
88
+ ]
89
+ return {"repo": repo, "commit": commit, "files": files}
@@ -0,0 +1,134 @@
1
+ """Output normalization and deduplication for LLM-extracted memory candidates."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import re
6
+ from typing import Any
7
+
8
+ from pydantic import BaseModel, ConfigDict, Field
9
+
10
+ _VALID_MEMORY_TYPES = {"episodic", "heuristic", "procedural", "factual", "noise"}
11
+ _VALID_KINDS = {"attempt", "decision", "conclusion", "warning", "procedure", "pattern", "fact"}
12
+ _KINDS_BY_MEMORY_TYPE = {
13
+ "episodic": {"attempt", "decision", "conclusion"},
14
+ "heuristic": {"warning", "pattern"},
15
+ "procedural": {"procedure"},
16
+ "factual": {"fact"},
17
+ "noise": _VALID_KINDS,
18
+ }
19
+ _MEMORY_TYPE_BY_KIND = {
20
+ "attempt": "episodic",
21
+ "decision": "episodic",
22
+ "conclusion": "episodic",
23
+ "warning": "heuristic",
24
+ "pattern": "heuristic",
25
+ "procedure": "procedural",
26
+ "fact": "factual",
27
+ }
28
+ _STOPWORDS = {
29
+ "a", "an", "and", "are", "as", "at", "be", "because", "for", "from", "how",
30
+ "if", "in", "into", "is", "it", "of", "on", "or", "over", "that", "the",
31
+ "this", "to", "use", "we", "with",
32
+ }
33
+ _PROCEDURE_HINTS = {"step", "steps", "first", "then", "next", "run", "repeat", "workflow", "checklist"}
34
+ _DECISION_HINTS = {"choose", "decide", "switch", "move", "refactor", "introduce", "keep", "adopt", "use"}
35
+ _TRANSIENT_RE = re.compile(
36
+ r"ruff |pytest |tests? (pass|fail)|\d+ (error|test)|pr (is |was )?(clean|ready)|dead code removed",
37
+ re.IGNORECASE,
38
+ )
39
+
40
+
41
+ class MemoryCandidate(BaseModel):
42
+ model_config = ConfigDict(extra="ignore")
43
+
44
+ memory_type: str
45
+ kind: str
46
+ summary: str
47
+ rationale: str = ""
48
+ confidence: float = 0.0
49
+ should_store: bool = True
50
+ sensitive: bool = False
51
+
52
+
53
+ class MemoryExtractionResult(BaseModel):
54
+ model_config = ConfigDict(extra="ignore")
55
+
56
+ candidates: list[MemoryCandidate] = Field(default_factory=list)
57
+ notes: list[str] = Field(default_factory=list)
58
+
59
+
60
+ def _canonical_tokens(value: str) -> set[str]:
61
+ return {
62
+ token
63
+ for token in re.findall(r"[a-z0-9]+", value.lower())
64
+ if token and token not in _STOPWORDS and len(token) > 2
65
+ }
66
+
67
+
68
+ def _is_near_duplicate(left: MemoryCandidate, right: MemoryCandidate) -> bool:
69
+ left_tokens = _canonical_tokens(f"{left.summary} {left.rationale}")
70
+ right_tokens = _canonical_tokens(f"{right.summary} {right.rationale}")
71
+ if not left_tokens or not right_tokens:
72
+ return left.summary.strip().lower() == right.summary.strip().lower()
73
+ overlap = len(left_tokens & right_tokens)
74
+ smaller = min(len(left_tokens), len(right_tokens))
75
+ return smaller > 0 and (overlap / smaller) >= 0.7
76
+
77
+
78
+ def normalize_memory_candidates(payload: dict[str, Any]) -> MemoryExtractionResult:
79
+ try:
80
+ parsed = MemoryExtractionResult.model_validate(payload)
81
+ except Exception:
82
+ return MemoryExtractionResult()
83
+
84
+ normalized: list[MemoryCandidate] = []
85
+ for item in parsed.candidates:
86
+ memory_type = (item.memory_type or "").strip().lower()
87
+ kind = (item.kind or "").strip().lower()
88
+ if kind not in _VALID_KINDS:
89
+ continue
90
+ if memory_type not in _VALID_MEMORY_TYPES:
91
+ memory_type = _MEMORY_TYPE_BY_KIND.get(kind, "")
92
+ if not memory_type:
93
+ continue
94
+ if kind not in _KINDS_BY_MEMORY_TYPE.get(memory_type, set()):
95
+ inferred_type = _MEMORY_TYPE_BY_KIND.get(kind)
96
+ if inferred_type:
97
+ memory_type = inferred_type
98
+ summary = " ".join(item.summary.split()).strip()
99
+ if not summary or _TRANSIENT_RE.search(summary):
100
+ continue
101
+ rationale = " ".join(item.rationale.split()).strip()
102
+ should_store = bool(item.should_store)
103
+ if memory_type == "procedural" and kind == "procedure":
104
+ tokens = _canonical_tokens(f"{summary} {rationale}")
105
+ if not (tokens & _PROCEDURE_HINTS) and tokens & _DECISION_HINTS:
106
+ memory_type = "episodic"
107
+ kind = "decision"
108
+ if memory_type == "noise":
109
+ should_store = False
110
+ candidate = item.model_copy(
111
+ update={
112
+ "memory_type": memory_type,
113
+ "kind": kind,
114
+ "summary": summary,
115
+ "rationale": rationale,
116
+ "confidence": max(0.0, min(1.0, float(item.confidence or 0.0))),
117
+ "should_store": should_store,
118
+ }
119
+ )
120
+ normalized.append(candidate)
121
+ normalized.sort(
122
+ key=lambda c: (not c.should_store, c.memory_type == "noise", -c.confidence, len(c.summary)),
123
+ )
124
+ deduped: list[MemoryCandidate] = []
125
+ for candidate in normalized:
126
+ if any(_is_near_duplicate(candidate, existing) for existing in deduped):
127
+ continue
128
+ deduped.append(candidate)
129
+ if len(deduped) >= 3:
130
+ break
131
+ return MemoryExtractionResult(
132
+ candidates=deduped,
133
+ notes=[note for note in parsed.notes if str(note).strip()],
134
+ )
@@ -0,0 +1,258 @@
1
+ """Deterministic memory lifecycle helpers for capture-first CherryDocs."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from datetime import UTC, datetime
6
+ from typing import Literal
7
+ from uuid import uuid4
8
+
9
+ from pydantic import BaseModel, ConfigDict, Field
10
+
11
+ from app.services.memory_evidence import (
12
+ build_event_evidence_id,
13
+ collect_candidate_context,
14
+ collect_candidate_evidence_refs,
15
+ )
16
+ from app.services.memory_extraction_normalize import MemoryCandidate
17
+ from app.services.memory_similarity import (
18
+ _best_matching_record,
19
+ _is_conflict,
20
+ _is_repeatable_procedure,
21
+ _should_merge,
22
+ _should_supersede,
23
+ topic_key_for_text,
24
+ )
25
+ from app.services.privacy import detect_blocking_secret_fields, redact_text
26
+
27
+ MemoryStatus = Literal["active", "tentative", "superseded", "stale", "contradicted", "archived"]
28
+ MemoryAction = Literal["create", "merge", "supersede", "conflict", "ignore"]
29
+
30
+
31
+ class MemoryRecord(BaseModel):
32
+ model_config = ConfigDict(extra="ignore")
33
+
34
+ memory_id: str
35
+ memory_type: str
36
+ kind: str
37
+ summary: str
38
+ rationale: str = ""
39
+ confidence: float = 0.0
40
+ status: MemoryStatus = "active"
41
+ topic_key: str = ""
42
+ project_id: str | None = None
43
+ source: str | None = None
44
+ session_id: str | None = None
45
+ repo: str | None = None
46
+ branch: str | None = None
47
+ commit: str | None = None
48
+ files: list[str] = Field(default_factory=list)
49
+ evidence: list[str] = Field(default_factory=list)
50
+ derived_from_paths: list[str] = Field(default_factory=list)
51
+ supersedes: list[str] = Field(default_factory=list)
52
+ superseded_by: str | None = None
53
+ retrieval_count: int = 0
54
+ evidence_hit_count: int = 0
55
+ last_retrieved_at: str | None = None
56
+ last_evidence_hit_at: str | None = None
57
+ created_at: str = Field(default_factory=lambda: datetime.now(UTC).isoformat())
58
+ updated_at: str = Field(default_factory=lambda: datetime.now(UTC).isoformat())
59
+
60
+
61
+ class MemoryLifecycleDecision(BaseModel):
62
+ model_config = ConfigDict(extra="ignore")
63
+
64
+ action: MemoryAction
65
+ reason: str
66
+ candidate: MemoryCandidate
67
+ target_memory_id: str | None = None
68
+ resulting_record: MemoryRecord | None = None
69
+
70
+
71
+ class MemoryPromotionResult(BaseModel):
72
+ model_config = ConfigDict(extra="ignore")
73
+
74
+ decisions: list[MemoryLifecycleDecision] = Field(default_factory=list)
75
+ records: list[MemoryRecord] = Field(default_factory=list)
76
+
77
+
78
+ def build_existing_memory_context(records: list[MemoryRecord], limit: int = 8) -> list[str]:
79
+ selected = sorted(
80
+ [record for record in records if record.status in {"active", "tentative"}],
81
+ key=lambda item: (item.status != "active", -item.confidence, item.updated_at),
82
+ )[: max(1, limit)]
83
+ lines: list[str] = []
84
+ for record in selected:
85
+ detail = f"{record.memory_type}/{record.kind}: {record.summary}"
86
+ if record.rationale:
87
+ detail += f" | why={record.rationale}"
88
+ if record.status != "active":
89
+ detail += f" | status={record.status}"
90
+ lines.append(detail)
91
+ return lines
92
+
93
+
94
+ def sanitize_candidate(candidate: MemoryCandidate) -> MemoryCandidate:
95
+ payload = {"summary": candidate.summary, "rationale": candidate.rationale}
96
+ findings = detect_blocking_secret_fields(payload)
97
+ if findings:
98
+ return candidate.model_copy(update={"sensitive": True, "should_store": False})
99
+ return candidate.model_copy(update={
100
+ "summary": redact_text(candidate.summary),
101
+ "rationale": redact_text(candidate.rationale),
102
+ })
103
+
104
+
105
+ def age_memory_records(
106
+ records: list[MemoryRecord],
107
+ *,
108
+ now: datetime | None = None,
109
+ stale_after_days: int = 180,
110
+ ) -> list[MemoryRecord]:
111
+ reference = now or datetime.now(UTC)
112
+ aged: list[MemoryRecord] = []
113
+ for record in records:
114
+ if record.status not in {"active", "tentative"}:
115
+ aged.append(record)
116
+ continue
117
+ try:
118
+ updated_at = datetime.fromisoformat(record.updated_at)
119
+ except Exception:
120
+ aged.append(record)
121
+ continue
122
+ age_days = (reference - updated_at).days
123
+ aged.append(record.model_copy(update={"status": "stale"}) if age_days >= stale_after_days else record)
124
+ return aged
125
+
126
+
127
+ def _make_record(
128
+ candidate: MemoryCandidate,
129
+ *,
130
+ project_id: str | None,
131
+ source: str | None,
132
+ session_id: str | None,
133
+ repo: str | None,
134
+ branch: str | None,
135
+ commit: str | None,
136
+ files: list[str] | None,
137
+ evidence: list[str] | None,
138
+ derived_from_paths: list[str] | None = None,
139
+ supersedes: list[str] | None = None,
140
+ ) -> MemoryRecord:
141
+ return MemoryRecord(
142
+ memory_id=f"mem-{uuid4().hex[:12]}",
143
+ memory_type=candidate.memory_type,
144
+ kind=candidate.kind,
145
+ summary=candidate.summary,
146
+ rationale=candidate.rationale,
147
+ confidence=candidate.confidence,
148
+ status="tentative" if candidate.confidence < 0.65 else "active",
149
+ topic_key=topic_key_for_text(candidate.summary, candidate.rationale),
150
+ project_id=project_id,
151
+ source=source,
152
+ session_id=session_id,
153
+ repo=repo,
154
+ branch=branch,
155
+ commit=commit,
156
+ files=list(files or []),
157
+ evidence=list(evidence or []),
158
+ derived_from_paths=list(derived_from_paths or []),
159
+ supersedes=list(supersedes or []),
160
+ )
161
+
162
+
163
+ def _decide_for_candidate(
164
+ candidate: MemoryCandidate,
165
+ records: list[MemoryRecord],
166
+ *,
167
+ make_kwargs: dict,
168
+ ) -> tuple[MemoryLifecycleDecision, list[MemoryRecord]]:
169
+ """Return (decision, updated_records) for one candidate."""
170
+ best = _best_matching_record(candidate, records)
171
+
172
+ if best and _should_supersede(candidate, best):
173
+ replacement = _make_record(candidate, supersedes=[best.memory_id], **make_kwargs)
174
+ old = best.model_copy(update={"status": "superseded", "superseded_by": replacement.memory_id})
175
+ updated = [old if r.memory_id == best.memory_id else r for r in records] + [replacement]
176
+ return MemoryLifecycleDecision(
177
+ action="supersede", reason=f"supersedes {best.memory_id}",
178
+ candidate=candidate, target_memory_id=best.memory_id, resulting_record=replacement,
179
+ ), updated
180
+
181
+ if best and _should_merge(candidate, best):
182
+ merged = best.model_copy(update={
183
+ "confidence": max(best.confidence, candidate.confidence),
184
+ "updated_at": datetime.now(UTC).isoformat(),
185
+ "rationale": candidate.rationale or best.rationale,
186
+ "repo": make_kwargs.get("repo") or best.repo,
187
+ "commit": make_kwargs.get("commit") or best.commit,
188
+ "files": list(dict.fromkeys([*best.files, *(make_kwargs.get("files") or [])])),
189
+ "evidence": list(dict.fromkeys([*best.evidence, *(make_kwargs.get("evidence") or [])])),
190
+ })
191
+ updated = [merged if r.memory_id == best.memory_id else r for r in records]
192
+ return MemoryLifecycleDecision(
193
+ action="merge", reason=f"merged into {best.memory_id}",
194
+ candidate=candidate, target_memory_id=best.memory_id, resulting_record=merged,
195
+ ), updated
196
+
197
+ if best and _is_conflict(candidate, best):
198
+ record = _make_record(candidate, **make_kwargs)
199
+ record.status = "contradicted"
200
+ return MemoryLifecycleDecision(
201
+ action="conflict", reason=f"conflicts with {best.memory_id}",
202
+ candidate=candidate, target_memory_id=best.memory_id, resulting_record=record,
203
+ ), [*records, record]
204
+
205
+ created = _make_record(candidate, **make_kwargs)
206
+ return MemoryLifecycleDecision(
207
+ action="create", reason="new durable memory candidate",
208
+ candidate=candidate, resulting_record=created,
209
+ ), [*records, created]
210
+
211
+
212
+ def promote_memory_candidates(
213
+ candidates: list[MemoryCandidate],
214
+ *,
215
+ existing_records: list[MemoryRecord] | None = None,
216
+ project_id: str | None = None,
217
+ source: str | None = None,
218
+ session_id: str | None = None,
219
+ repo: str | None = None,
220
+ branch: str | None = None,
221
+ commit: str | None = None,
222
+ files: list[str] | None = None,
223
+ evidence: list[str] | None = None,
224
+ ) -> MemoryPromotionResult:
225
+ records = [r.model_copy(deep=True) for r in (existing_records or [])]
226
+ decisions: list[MemoryLifecycleDecision] = []
227
+ make_kwargs = dict(
228
+ project_id=project_id, source=source, session_id=session_id,
229
+ repo=repo, branch=branch, commit=commit, files=files, evidence=evidence,
230
+ )
231
+ for candidate in candidates:
232
+ candidate = sanitize_candidate(candidate)
233
+ if candidate.sensitive or not candidate.should_store or candidate.memory_type == "noise":
234
+ decisions.append(MemoryLifecycleDecision(
235
+ action="ignore", reason="noise or sensitive", candidate=candidate,
236
+ ))
237
+ continue
238
+ if (candidate.memory_type == "procedural" and candidate.kind == "procedure"
239
+ and not _is_repeatable_procedure(candidate.summary, candidate.rationale)):
240
+ candidate = candidate.model_copy(update={"memory_type": "episodic", "kind": "decision"})
241
+ decision, records = _decide_for_candidate(candidate, records, make_kwargs=make_kwargs)
242
+ decisions.append(decision)
243
+ return MemoryPromotionResult(decisions=decisions, records=age_memory_records(records))
244
+
245
+
246
+ # Re-export evidence helpers so callers can import them from here unchanged.
247
+ __all__ = [
248
+ "MemoryLifecycleDecision",
249
+ "MemoryPromotionResult",
250
+ "MemoryRecord",
251
+ "age_memory_records",
252
+ "build_event_evidence_id",
253
+ "build_existing_memory_context",
254
+ "collect_candidate_context",
255
+ "collect_candidate_evidence_refs",
256
+ "promote_memory_candidates",
257
+ "sanitize_candidate",
258
+ ]