cherry-docs 0.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- app/__init__.py +0 -0
- app/repo_scope.py +24 -0
- app/services/__init__.py +0 -0
- app/services/agent_protocol.py +59 -0
- app/services/auto_promote_sessions.py +245 -0
- app/services/capture_adapters.py +89 -0
- app/services/capture_core.py +164 -0
- app/services/internal_memory_agent.py +214 -0
- app/services/memory_evidence.py +89 -0
- app/services/memory_extraction_normalize.py +134 -0
- app/services/memory_lifecycle.py +258 -0
- app/services/memory_profiles.py +88 -0
- app/services/memory_providers.py +113 -0
- app/services/memory_retrieval.py +327 -0
- app/services/memory_retrieval_scoring.py +106 -0
- app/services/memory_retrieval_text.py +113 -0
- app/services/memory_similarity.py +135 -0
- app/services/privacy.py +72 -0
- app/services/promoted_memory_answer.py +157 -0
- app/services/promoted_memory_pipeline.py +194 -0
- app/services/promoted_memory_store.py +57 -0
- cherry_docs-0.2.0.dist-info/METADATA +143 -0
- cherry_docs-0.2.0.dist-info/RECORD +42 -0
- cherry_docs-0.2.0.dist-info/WHEEL +5 -0
- cherry_docs-0.2.0.dist-info/entry_points.txt +4 -0
- cherry_docs-0.2.0.dist-info/top_level.txt +3 -0
- cherrydocs/__init__.py +3 -0
- cherrydocs/cli.py +213 -0
- cherrydocs/hook.py +27 -0
- cherrydocs/mcp.py +22 -0
- scripts/__init__.py +0 -0
- scripts/auto_promote_capture.py +63 -0
- scripts/check_size_limits.py +115 -0
- scripts/ci_auto_capture.py +289 -0
- scripts/claude_hooks/__init__.py +0 -0
- scripts/claude_hooks/state_manager.py +526 -0
- scripts/coverage_regression_gate.py +121 -0
- scripts/eval_projects.py +247 -0
- scripts/install.py +212 -0
- scripts/pr_gate_report.py +282 -0
- scripts/promptfoo_regression_gate.py +176 -0
- scripts/render_agent_prompts.py +57 -0
|
@@ -0,0 +1,88 @@
|
|
|
1
|
+
"""Prompt profiles for the CherryDocs internal memory distiller."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from dataclasses import dataclass
|
|
6
|
+
from typing import Literal
|
|
7
|
+
|
|
8
|
+
MemoryPromptProfileName = Literal["default", "noise_strict", "verification_first"]
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
@dataclass(frozen=True)
|
|
12
|
+
class MemoryPromptProfile:
|
|
13
|
+
name: str
|
|
14
|
+
description: str
|
|
15
|
+
system_focus: str
|
|
16
|
+
keep_rules: tuple[str, ...]
|
|
17
|
+
extra_rules: tuple[str, ...]
|
|
18
|
+
extraction_bias: tuple[str, ...]
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
_PROMPT_PROFILES: dict[str, MemoryPromptProfile] = {
|
|
22
|
+
"default": MemoryPromptProfile(
|
|
23
|
+
name="default",
|
|
24
|
+
description="Balanced default profile for general engineering continuity.",
|
|
25
|
+
system_focus="Keep only information that would be expensive for another AI to rediscover later.",
|
|
26
|
+
keep_rules=(
|
|
27
|
+
"Do not preserve generic chatter, routine status updates, or cheap file-edit summaries.",
|
|
28
|
+
"Do not preserve secrets, passwords, API keys, tokens, or obviously sensitive content.",
|
|
29
|
+
"Never extract test counts, lint error counts, CI status, or PR review status as facts — these change daily.",
|
|
30
|
+
),
|
|
31
|
+
extra_rules=(
|
|
32
|
+
"Prefer the strongest concrete decision or warning over multiple paraphrases of the same idea.",
|
|
33
|
+
"Avoid meta commentary about model choice or architecture quality unless it is itself a lasting project decision.",
|
|
34
|
+
),
|
|
35
|
+
extraction_bias=(
|
|
36
|
+
"what was attempted",
|
|
37
|
+
"what failed and why",
|
|
38
|
+
"what decision was made and why",
|
|
39
|
+
"what tradeoff mattered",
|
|
40
|
+
"what reusable procedure was discovered",
|
|
41
|
+
"what warning another AI should not repeat",
|
|
42
|
+
"whether an explicit verification/test step passed or failed",
|
|
43
|
+
),
|
|
44
|
+
),
|
|
45
|
+
"noise_strict": MemoryPromptProfile(
|
|
46
|
+
name="noise_strict",
|
|
47
|
+
description="More aggressive profile for cutting broad chatter and weak notes.",
|
|
48
|
+
system_focus="Keep only high-signal memories with clear rediscovery cost; aggressively drop broad planning chatter.",
|
|
49
|
+
keep_rules=(
|
|
50
|
+
"Discard routine planning updates, generic progress notes, and repeated summaries unless they record a concrete decision, failure, or constraint.",
|
|
51
|
+
"Do not preserve secrets, passwords, API keys, tokens, or obviously sensitive content.",
|
|
52
|
+
),
|
|
53
|
+
extra_rules=(
|
|
54
|
+
"If two candidates overlap, keep only the more concrete one.",
|
|
55
|
+
"When uncertain whether something is durable memory or chatter, prefer noise unless there is explicit evidence of a decision, failure, or verification result.",
|
|
56
|
+
),
|
|
57
|
+
extraction_bias=(
|
|
58
|
+
"what failed and why",
|
|
59
|
+
"what hidden constraint or warning matters later",
|
|
60
|
+
"what exact decision changed the direction of work",
|
|
61
|
+
"what verification/test result invalidated or confirmed an approach",
|
|
62
|
+
),
|
|
63
|
+
),
|
|
64
|
+
"verification_first": MemoryPromptProfile(
|
|
65
|
+
name="verification_first",
|
|
66
|
+
description="Bias toward preserving test and verification outcomes alongside the reasoning they proved.",
|
|
67
|
+
system_focus="Prefer memories that connect decisions or warnings to explicit verification outcomes.",
|
|
68
|
+
keep_rules=(
|
|
69
|
+
"Preserve decisions, warnings, and conclusions that were confirmed or falsified by tests or verification runs.",
|
|
70
|
+
"Do not preserve secrets, passwords, API keys, tokens, or obviously sensitive content.",
|
|
71
|
+
),
|
|
72
|
+
extra_rules=(
|
|
73
|
+
"When a verification step passed or failed, connect that outcome to the decision, warning, or conclusion it supports.",
|
|
74
|
+
"Avoid procedural memory unless the trace clearly shows a repeatable verification workflow another AI should reuse.",
|
|
75
|
+
),
|
|
76
|
+
extraction_bias=(
|
|
77
|
+
"which verification/test step passed or failed",
|
|
78
|
+
"what decision or warning that verification supports",
|
|
79
|
+
"what failure mode was proven by a test result",
|
|
80
|
+
"what workflow reliably verifies the same thing again",
|
|
81
|
+
),
|
|
82
|
+
),
|
|
83
|
+
}
|
|
84
|
+
|
|
85
|
+
|
|
86
|
+
def resolve_memory_prompt_profile(profile: MemoryPromptProfileName | str | None = None) -> MemoryPromptProfile:
|
|
87
|
+
key = " ".join(str(profile or "").split()).strip().lower() or "default"
|
|
88
|
+
return _PROMPT_PROFILES.get(key, _PROMPT_PROFILES["default"])
|
|
@@ -0,0 +1,113 @@
|
|
|
1
|
+
"""LLM provider implementations for memory distillation.
|
|
2
|
+
|
|
3
|
+
Priority when no provider is specified:
|
|
4
|
+
1. ANTHROPIC_API_KEY set → AnthropicMemoryProvider (claude-haiku — best JSON quality)
|
|
5
|
+
2. Fallback → OllamaMemoryProvider (local, no API key needed)
|
|
6
|
+
|
|
7
|
+
Override with CHERRY_DISTILL_PROVIDER=anthropic|ollama.
|
|
8
|
+
"""
|
|
9
|
+
|
|
10
|
+
from __future__ import annotations
|
|
11
|
+
|
|
12
|
+
import json
|
|
13
|
+
import os
|
|
14
|
+
import re
|
|
15
|
+
from typing import Any
|
|
16
|
+
|
|
17
|
+
import httpx
|
|
18
|
+
|
|
19
|
+
_OLLAMA_CHAT_URL = os.getenv("OLLAMA_CHAT_URL", "http://127.0.0.1:11434/api/chat")
|
|
20
|
+
_DEFAULT_OLLAMA_MODEL = os.getenv("CHERRY_OLLAMA_MODEL", "qwen2.5:7b-instruct")
|
|
21
|
+
_DEFAULT_ANTHROPIC_MODEL = os.getenv("CHERRY_ANTHROPIC_MODEL", "claude-3-5-haiku-20241022")
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
class OllamaMemoryProvider:
|
|
25
|
+
"""Local Ollama provider — no API key required. Good for dev/offline use."""
|
|
26
|
+
|
|
27
|
+
def __init__(
|
|
28
|
+
self,
|
|
29
|
+
*,
|
|
30
|
+
model: str = _DEFAULT_OLLAMA_MODEL,
|
|
31
|
+
url: str = _OLLAMA_CHAT_URL,
|
|
32
|
+
timeout: float = 60.0,
|
|
33
|
+
) -> None:
|
|
34
|
+
self.model = model
|
|
35
|
+
self.url = url
|
|
36
|
+
self.timeout = timeout
|
|
37
|
+
|
|
38
|
+
def extract(self, prompt: str) -> dict[str, Any]:
|
|
39
|
+
payload = {
|
|
40
|
+
"model": self.model,
|
|
41
|
+
"messages": [{"role": "user", "content": prompt}],
|
|
42
|
+
"stream": False,
|
|
43
|
+
"format": "json",
|
|
44
|
+
"options": {"temperature": 0.1},
|
|
45
|
+
}
|
|
46
|
+
with httpx.Client(timeout=self.timeout) as client:
|
|
47
|
+
response = client.post(self.url, json=payload)
|
|
48
|
+
response.raise_for_status()
|
|
49
|
+
data = response.json()
|
|
50
|
+
content = ((data.get("message") or {}).get("content") or "").strip()
|
|
51
|
+
if not content:
|
|
52
|
+
return {}
|
|
53
|
+
try:
|
|
54
|
+
return json.loads(content)
|
|
55
|
+
except json.JSONDecodeError:
|
|
56
|
+
return {}
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
class AnthropicMemoryProvider:
|
|
60
|
+
"""Anthropic Claude provider — best JSON extraction quality.
|
|
61
|
+
|
|
62
|
+
Requires ANTHROPIC_API_KEY or explicit api_key param.
|
|
63
|
+
Default model: claude-3-5-haiku-20241022 (fast, cheap, excellent schema adherence).
|
|
64
|
+
Override with CHERRY_ANTHROPIC_MODEL env var.
|
|
65
|
+
"""
|
|
66
|
+
|
|
67
|
+
def __init__(
|
|
68
|
+
self,
|
|
69
|
+
*,
|
|
70
|
+
model: str = _DEFAULT_ANTHROPIC_MODEL,
|
|
71
|
+
api_key: str | None = None,
|
|
72
|
+
timeout: float = 60.0,
|
|
73
|
+
) -> None:
|
|
74
|
+
self.model = model
|
|
75
|
+
self.api_key = api_key # None → SDK reads ANTHROPIC_API_KEY from env
|
|
76
|
+
self.timeout = timeout
|
|
77
|
+
|
|
78
|
+
def extract(self, prompt: str) -> dict[str, Any]:
|
|
79
|
+
try:
|
|
80
|
+
import anthropic as _anthropic
|
|
81
|
+
except ImportError as exc:
|
|
82
|
+
raise ImportError(
|
|
83
|
+
"anthropic package required: pip install 'cherry-docs[anthropic]'"
|
|
84
|
+
) from exc
|
|
85
|
+
|
|
86
|
+
client = _anthropic.Anthropic(api_key=self.api_key, timeout=self.timeout)
|
|
87
|
+
response = client.messages.create(
|
|
88
|
+
model=self.model,
|
|
89
|
+
max_tokens=1024,
|
|
90
|
+
messages=[{"role": "user", "content": prompt}],
|
|
91
|
+
)
|
|
92
|
+
content = (response.content[0].text if response.content else "").strip()
|
|
93
|
+
if not content:
|
|
94
|
+
return {}
|
|
95
|
+
# Strip markdown fences Claude sometimes wraps around JSON
|
|
96
|
+
content = re.sub(r"^```(?:json)?\s*\n?", "", content)
|
|
97
|
+
content = re.sub(r"\n?```\s*$", "", content)
|
|
98
|
+
try:
|
|
99
|
+
return json.loads(content)
|
|
100
|
+
except json.JSONDecodeError:
|
|
101
|
+
return {}
|
|
102
|
+
|
|
103
|
+
|
|
104
|
+
def resolve_provider() -> OllamaMemoryProvider | AnthropicMemoryProvider:
|
|
105
|
+
"""Return the best available provider based on environment variables.
|
|
106
|
+
|
|
107
|
+
Explicit override: CHERRY_DISTILL_PROVIDER=anthropic|ollama
|
|
108
|
+
Auto-detect: ANTHROPIC_API_KEY set → Anthropic, else → Ollama
|
|
109
|
+
"""
|
|
110
|
+
explicit = os.getenv("CHERRY_DISTILL_PROVIDER", "").strip().lower()
|
|
111
|
+
if explicit == "anthropic" or (not explicit and os.getenv("ANTHROPIC_API_KEY")):
|
|
112
|
+
return AnthropicMemoryProvider()
|
|
113
|
+
return OllamaMemoryProvider()
|
|
@@ -0,0 +1,327 @@
|
|
|
1
|
+
"""Deterministic retrieval over promoted memory plus raw captured evidence."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from datetime import UTC, datetime
|
|
6
|
+
from typing import Any
|
|
7
|
+
|
|
8
|
+
from pydantic import BaseModel, ConfigDict, Field
|
|
9
|
+
|
|
10
|
+
from app.services.memory_lifecycle import MemoryRecord, build_event_evidence_id
|
|
11
|
+
from app.services.memory_retrieval_scoring import (
|
|
12
|
+
_EVENT_TYPE_RANK,
|
|
13
|
+
_KIND_BONUS_BY_QUESTION,
|
|
14
|
+
_compute_trust_score,
|
|
15
|
+
_compute_utility_score,
|
|
16
|
+
_parse_timestamp,
|
|
17
|
+
_recentness_bonus,
|
|
18
|
+
_specificity_bonus,
|
|
19
|
+
)
|
|
20
|
+
from app.services.memory_retrieval_text import (
|
|
21
|
+
_file_overlap,
|
|
22
|
+
_file_query_hint,
|
|
23
|
+
_question_mode,
|
|
24
|
+
_score_overlap,
|
|
25
|
+
_trim_text,
|
|
26
|
+
_wants_recent_context,
|
|
27
|
+
)
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
class EvidenceSnippet(BaseModel):
|
|
31
|
+
model_config = ConfigDict(extra="ignore")
|
|
32
|
+
|
|
33
|
+
evidence_id: str
|
|
34
|
+
event_type: str
|
|
35
|
+
timestamp: str | None = None
|
|
36
|
+
text: str = ""
|
|
37
|
+
files: list[str] = Field(default_factory=list)
|
|
38
|
+
score: float = 0.0
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
class RetrievedMemory(BaseModel):
|
|
42
|
+
model_config = ConfigDict(extra="ignore")
|
|
43
|
+
|
|
44
|
+
memory: MemoryRecord
|
|
45
|
+
score: float
|
|
46
|
+
trust_score: float = 0.0
|
|
47
|
+
utility_score: float = 0.0
|
|
48
|
+
relevance_score: float = 0.0
|
|
49
|
+
age_days: int = 0
|
|
50
|
+
reasons: list[str] = Field(default_factory=list)
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
class RetrievalAnswer(BaseModel):
|
|
54
|
+
model_config = ConfigDict(extra="ignore")
|
|
55
|
+
|
|
56
|
+
question: str
|
|
57
|
+
answer: str
|
|
58
|
+
summary: str
|
|
59
|
+
memories: list[RetrievedMemory] = Field(default_factory=list)
|
|
60
|
+
evidence: list[EvidenceSnippet] = Field(default_factory=list)
|
|
61
|
+
gaps: list[str] = Field(default_factory=list)
|
|
62
|
+
stale: bool = False
|
|
63
|
+
|
|
64
|
+
|
|
65
|
+
def _event_by_id(events: list[dict[str, Any]]) -> dict[str, dict[str, Any]]:
|
|
66
|
+
return {build_event_evidence_id(event): event for event in events}
|
|
67
|
+
|
|
68
|
+
|
|
69
|
+
def collect_event_evidence(
|
|
70
|
+
question: str,
|
|
71
|
+
events: list[dict[str, Any]],
|
|
72
|
+
*,
|
|
73
|
+
memory_hint: str | None = None,
|
|
74
|
+
preferred_evidence_ids: list[str] | None = None,
|
|
75
|
+
file_path: str | None = None,
|
|
76
|
+
limit: int = 4,
|
|
77
|
+
) -> list[EvidenceSnippet]:
|
|
78
|
+
event_index = _event_by_id(events)
|
|
79
|
+
file_hint = _file_query_hint(file_path)
|
|
80
|
+
anchor_text = f"{question} {memory_hint or ''} {file_hint}".strip()
|
|
81
|
+
wants_recent = _wants_recent_context(question)
|
|
82
|
+
now = datetime.now(UTC)
|
|
83
|
+
|
|
84
|
+
def _score_event(event: dict, evidence_id: str, base: float, min_overlap: float) -> EvidenceSnippet | None:
|
|
85
|
+
text = str(event.get("text") or "").strip()
|
|
86
|
+
if not text:
|
|
87
|
+
return None
|
|
88
|
+
overlap = _score_overlap(anchor_text, text)
|
|
89
|
+
ev_files = [str(p) for p in (event.get("files") or []) if str(p).strip()]
|
|
90
|
+
file_score = _file_overlap(file_path, ev_files)
|
|
91
|
+
if overlap < min_overlap and file_score <= 0:
|
|
92
|
+
return None
|
|
93
|
+
event_type = str(event.get("event_type") or "unknown")
|
|
94
|
+
timestamp = str(event.get("timestamp") or "")
|
|
95
|
+
recent_bonus = 0.0
|
|
96
|
+
parsed_ts = _parse_timestamp(timestamp)
|
|
97
|
+
if wants_recent and parsed_ts is not None:
|
|
98
|
+
recent_bonus = _recentness_bonus(
|
|
99
|
+
max(0, (now - parsed_ts).days), strong_window_days=7, medium_window_days=30
|
|
100
|
+
)
|
|
101
|
+
return EvidenceSnippet(
|
|
102
|
+
evidence_id=evidence_id,
|
|
103
|
+
event_type=event_type,
|
|
104
|
+
timestamp=timestamp,
|
|
105
|
+
text=_trim_text(text),
|
|
106
|
+
files=ev_files,
|
|
107
|
+
score=base + overlap + (file_score * 1.25) + _EVENT_TYPE_RANK.get(event_type, 0.5) + recent_bonus,
|
|
108
|
+
)
|
|
109
|
+
|
|
110
|
+
preferred: list[EvidenceSnippet] = []
|
|
111
|
+
seen_ids: set[str] = set()
|
|
112
|
+
for evidence_id in preferred_evidence_ids or []:
|
|
113
|
+
event = event_index.get(evidence_id)
|
|
114
|
+
if not event:
|
|
115
|
+
continue
|
|
116
|
+
snippet = _score_event(event, evidence_id, base=2.5, min_overlap=0.2)
|
|
117
|
+
if snippet:
|
|
118
|
+
preferred.append(snippet)
|
|
119
|
+
seen_ids.add(evidence_id)
|
|
120
|
+
|
|
121
|
+
ranked: list[EvidenceSnippet] = []
|
|
122
|
+
for event in events:
|
|
123
|
+
evidence_id = build_event_evidence_id(event)
|
|
124
|
+
if evidence_id in seen_ids:
|
|
125
|
+
continue
|
|
126
|
+
snippet = _score_event(event, evidence_id, base=0.0, min_overlap=0.01)
|
|
127
|
+
if snippet:
|
|
128
|
+
ranked.append(snippet)
|
|
129
|
+
|
|
130
|
+
def _sort_key(item: EvidenceSnippet) -> tuple:
|
|
131
|
+
return (item.score, _parse_timestamp(item.timestamp or "") or datetime.min.replace(tzinfo=UTC))
|
|
132
|
+
|
|
133
|
+
ranked.sort(key=_sort_key, reverse=True)
|
|
134
|
+
if preferred:
|
|
135
|
+
preferred.sort(key=_sort_key, reverse=True)
|
|
136
|
+
return preferred[: max(1, limit)]
|
|
137
|
+
return [*preferred, *ranked][: max(1, limit)]
|
|
138
|
+
|
|
139
|
+
|
|
140
|
+
def rank_memories(
|
|
141
|
+
question: str,
|
|
142
|
+
records: list[MemoryRecord],
|
|
143
|
+
*,
|
|
144
|
+
branch: str | None = None,
|
|
145
|
+
file_path: str | None = None,
|
|
146
|
+
limit: int = 5,
|
|
147
|
+
) -> list[RetrievedMemory]:
|
|
148
|
+
mode = _question_mode(question)
|
|
149
|
+
wants_recent = _wants_recent_context(question)
|
|
150
|
+
now = datetime.now(UTC)
|
|
151
|
+
file_hint = _file_query_hint(file_path)
|
|
152
|
+
ranked: list[RetrievedMemory] = []
|
|
153
|
+
for record in records:
|
|
154
|
+
if record.status == "archived":
|
|
155
|
+
continue
|
|
156
|
+
text = f"{record.kind} {record.memory_type} {record.summary} {record.rationale} {record.topic_key}".strip()
|
|
157
|
+
overlap = _score_overlap(f"{question} {file_hint}".strip(), text)
|
|
158
|
+
branch_score = 0.3 if branch and record.branch and branch == record.branch else 0.0
|
|
159
|
+
file_score = _file_overlap(file_path, record.files) * 1.25
|
|
160
|
+
if overlap <= 0 and file_score <= 0:
|
|
161
|
+
continue
|
|
162
|
+
confidence_score = max(0.0, min(1.0, record.confidence))
|
|
163
|
+
kind_bonus = _KIND_BONUS_BY_QUESTION.get(mode, {}).get(record.kind, 0.0)
|
|
164
|
+
try:
|
|
165
|
+
updated_at = datetime.fromisoformat(record.updated_at)
|
|
166
|
+
age_days = max(0, (now - updated_at).days)
|
|
167
|
+
except Exception:
|
|
168
|
+
age_days = 0
|
|
169
|
+
freshness_penalty = min(0.75, age_days / 3650)
|
|
170
|
+
recent_bonus = _recentness_bonus(age_days) if wants_recent else 0.0
|
|
171
|
+
trust_score = _compute_trust_score(record, age_days=age_days)
|
|
172
|
+
utility_score = _compute_utility_score(record)
|
|
173
|
+
specificity_bonus = _specificity_bonus(
|
|
174
|
+
record, wants_recent=wants_recent, file_path=file_path, file_score=file_score, overlap=overlap,
|
|
175
|
+
)
|
|
176
|
+
relevance_score = overlap + branch_score + file_score + kind_bonus + recent_bonus + specificity_bonus
|
|
177
|
+
score = relevance_score + trust_score + (utility_score * 0.35) + confidence_score - freshness_penalty
|
|
178
|
+
reasons = [f"status={record.status}", f"overlap={overlap:.2f}"]
|
|
179
|
+
if branch_score:
|
|
180
|
+
reasons.append("branch_match")
|
|
181
|
+
if file_score:
|
|
182
|
+
reasons.append(f"file_overlap=+{file_score:.2f}")
|
|
183
|
+
if kind_bonus:
|
|
184
|
+
reasons.append(f"kind_bonus={kind_bonus:+.1f}")
|
|
185
|
+
if freshness_penalty:
|
|
186
|
+
reasons.append(f"freshness_penalty=-{freshness_penalty:.2f}")
|
|
187
|
+
if recent_bonus:
|
|
188
|
+
reasons.append(f"recent_bonus=+{recent_bonus:.2f}")
|
|
189
|
+
if specificity_bonus:
|
|
190
|
+
reasons.append(f"specificity_bonus={specificity_bonus:+.2f}")
|
|
191
|
+
if record.supersedes:
|
|
192
|
+
reasons.append("supersedes_older_memory")
|
|
193
|
+
if record.retrieval_count:
|
|
194
|
+
reasons.append(f"retrieval_count={record.retrieval_count}")
|
|
195
|
+
if record.evidence_hit_count:
|
|
196
|
+
reasons.append(f"evidence_hits={record.evidence_hit_count}")
|
|
197
|
+
ranked.append(RetrievedMemory(
|
|
198
|
+
memory=record, score=score, trust_score=trust_score,
|
|
199
|
+
utility_score=utility_score, relevance_score=relevance_score,
|
|
200
|
+
age_days=age_days, reasons=reasons,
|
|
201
|
+
))
|
|
202
|
+
ranked.sort(
|
|
203
|
+
key=lambda item: (
|
|
204
|
+
item.score,
|
|
205
|
+
_parse_timestamp(item.memory.updated_at) or datetime.min.replace(tzinfo=UTC),
|
|
206
|
+
),
|
|
207
|
+
reverse=True,
|
|
208
|
+
)
|
|
209
|
+
return ranked[: max(1, limit)]
|
|
210
|
+
|
|
211
|
+
|
|
212
|
+
def apply_retrieval_feedback(
|
|
213
|
+
records: list[MemoryRecord],
|
|
214
|
+
retrieved: list[RetrievedMemory],
|
|
215
|
+
evidence: list[EvidenceSnippet],
|
|
216
|
+
*,
|
|
217
|
+
now: datetime | None = None,
|
|
218
|
+
) -> list[MemoryRecord]:
|
|
219
|
+
if not retrieved:
|
|
220
|
+
return records
|
|
221
|
+
reference = (now or datetime.now(UTC)).isoformat()
|
|
222
|
+
retrieved_ids = {item.memory.memory_id for item in retrieved}
|
|
223
|
+
evidence_ids = {item.evidence_id for item in evidence}
|
|
224
|
+
updated: list[MemoryRecord] = []
|
|
225
|
+
for record in records:
|
|
226
|
+
if record.memory_id not in retrieved_ids:
|
|
227
|
+
updated.append(record)
|
|
228
|
+
continue
|
|
229
|
+
evidence_hits = sum(1 for evidence_id in record.evidence if evidence_id in evidence_ids)
|
|
230
|
+
update_payload: dict[str, Any] = {
|
|
231
|
+
"retrieval_count": max(0, record.retrieval_count) + 1,
|
|
232
|
+
"last_retrieved_at": reference,
|
|
233
|
+
}
|
|
234
|
+
if evidence_hits:
|
|
235
|
+
update_payload["evidence_hit_count"] = max(0, record.evidence_hit_count) + evidence_hits
|
|
236
|
+
update_payload["last_evidence_hit_at"] = reference
|
|
237
|
+
updated.append(record.model_copy(update=update_payload))
|
|
238
|
+
return updated
|
|
239
|
+
|
|
240
|
+
|
|
241
|
+
def synthesize_retrieval_answer(
|
|
242
|
+
question: str,
|
|
243
|
+
records: list[MemoryRecord],
|
|
244
|
+
events: list[dict[str, Any]],
|
|
245
|
+
*,
|
|
246
|
+
branch: str | None = None,
|
|
247
|
+
file_path: str | None = None,
|
|
248
|
+
) -> RetrievalAnswer:
|
|
249
|
+
memories = rank_memories(question, records, branch=branch, file_path=file_path)
|
|
250
|
+
memory_hint = ""
|
|
251
|
+
if memories:
|
|
252
|
+
top = memories[0].memory
|
|
253
|
+
memory_hint = f"{top.summary} {top.rationale}".strip()
|
|
254
|
+
evidence = collect_event_evidence(
|
|
255
|
+
question, events, memory_hint=memory_hint,
|
|
256
|
+
preferred_evidence_ids=memories[0].memory.evidence if memories else None,
|
|
257
|
+
file_path=file_path,
|
|
258
|
+
)
|
|
259
|
+
if not memories:
|
|
260
|
+
if evidence:
|
|
261
|
+
top_evidence = evidence[0]
|
|
262
|
+
if file_path:
|
|
263
|
+
answer = (
|
|
264
|
+
f"No promoted memory matched `{file_path}` yet. "
|
|
265
|
+
f"Closest raw evidence for that file is from `{top_evidence.event_type}`: {top_evidence.text}"
|
|
266
|
+
)
|
|
267
|
+
else:
|
|
268
|
+
answer = f"Closest captured evidence for this question is from `{top_evidence.event_type}`: {top_evidence.text}"
|
|
269
|
+
return RetrievalAnswer(
|
|
270
|
+
question=question, answer=answer, summary="Evidence-only fallback answer.",
|
|
271
|
+
memories=[], evidence=evidence,
|
|
272
|
+
gaps=(
|
|
273
|
+
["No promoted memory matched yet; this answer is evidence-only.",
|
|
274
|
+
f"Used file hint `{file_path}` to rank raw evidence."]
|
|
275
|
+
if file_path
|
|
276
|
+
else ["No promoted memory matched yet; this answer is evidence-only."]
|
|
277
|
+
),
|
|
278
|
+
)
|
|
279
|
+
return RetrievalAnswer(
|
|
280
|
+
question=question, answer="I could not find a strong memory-backed answer yet.",
|
|
281
|
+
summary="No strong matching memory found.", memories=[], evidence=evidence,
|
|
282
|
+
gaps=["Need more captured evidence or promoted memory for this topic."],
|
|
283
|
+
)
|
|
284
|
+
|
|
285
|
+
top = memories[0].memory
|
|
286
|
+
lines = [top.summary]
|
|
287
|
+
if top.rationale:
|
|
288
|
+
lines.append(f"Why: {top.rationale}")
|
|
289
|
+
if top.status != "active":
|
|
290
|
+
lines.append(f"Status: {top.status}")
|
|
291
|
+
if top.confidence < 0.8:
|
|
292
|
+
lines.append(f"Confidence: {top.confidence:.2f}")
|
|
293
|
+
if top.supersedes:
|
|
294
|
+
lines.append(f"Supersedes: {', '.join(top.supersedes)}")
|
|
295
|
+
|
|
296
|
+
summary = top.summary
|
|
297
|
+
if len(memories) > 1:
|
|
298
|
+
summary += f" (+{len(memories) - 1} related memory matches)"
|
|
299
|
+
|
|
300
|
+
top_match = memories[0]
|
|
301
|
+
stale = top_match.age_days > 30
|
|
302
|
+
gaps: list[str] = []
|
|
303
|
+
if not evidence:
|
|
304
|
+
gaps.append("No direct raw-evidence snippet matched this question.")
|
|
305
|
+
if top_match.memory.status != "active":
|
|
306
|
+
gaps.append("Top memory is not currently active.")
|
|
307
|
+
if top_match.trust_score < 1.0:
|
|
308
|
+
gaps.append("Top memory is relatively weak or provisional; verify against current code or newer evidence.")
|
|
309
|
+
if stale:
|
|
310
|
+
gaps.append(f"Top memory is {top_match.age_days} days old — verify it still reflects current code.")
|
|
311
|
+
|
|
312
|
+
return RetrievalAnswer(
|
|
313
|
+
question=question, answer="\n".join(lines), summary=summary,
|
|
314
|
+
memories=memories, evidence=evidence, gaps=gaps, stale=stale,
|
|
315
|
+
)
|
|
316
|
+
|
|
317
|
+
|
|
318
|
+
__all__ = [
|
|
319
|
+
"EvidenceSnippet",
|
|
320
|
+
"RetrievalAnswer",
|
|
321
|
+
"RetrievedMemory",
|
|
322
|
+
"apply_retrieval_feedback",
|
|
323
|
+
"build_event_evidence_id",
|
|
324
|
+
"collect_event_evidence",
|
|
325
|
+
"rank_memories",
|
|
326
|
+
"synthesize_retrieval_answer",
|
|
327
|
+
]
|
|
@@ -0,0 +1,106 @@
|
|
|
1
|
+
"""Memory ranking scoring helpers — trust, utility, freshness, and specificity."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from datetime import UTC, datetime
|
|
6
|
+
from typing import TYPE_CHECKING
|
|
7
|
+
|
|
8
|
+
if TYPE_CHECKING:
|
|
9
|
+
from app.services.memory_lifecycle import MemoryRecord
|
|
10
|
+
|
|
11
|
+
_STATUS_RANK = {
|
|
12
|
+
"active": 4.0,
|
|
13
|
+
"tentative": 2.5,
|
|
14
|
+
"stale": 2.0,
|
|
15
|
+
"contradicted": 1.5,
|
|
16
|
+
"superseded": 1.0,
|
|
17
|
+
"archived": 0.5,
|
|
18
|
+
}
|
|
19
|
+
_EVENT_TYPE_RANK = {
|
|
20
|
+
"assistant_output": 1.5,
|
|
21
|
+
"remember": 1.4,
|
|
22
|
+
"tool_result": 1.2,
|
|
23
|
+
"shell_result": 1.1,
|
|
24
|
+
"test_result": 1.3,
|
|
25
|
+
"user_prompt": 0.8,
|
|
26
|
+
}
|
|
27
|
+
_KIND_BONUS_BY_QUESTION = {
|
|
28
|
+
"why": {"decision": 0.8, "warning": 0.4, "pattern": 0.2, "procedure": -0.2},
|
|
29
|
+
"what": {"fact": 0.4, "conclusion": 0.3, "decision": 0.1, "warning": 0.1, "attempt": -0.1},
|
|
30
|
+
"what_failed": {"attempt": 0.8, "warning": 0.5, "decision": 0.1},
|
|
31
|
+
"how": {"procedure": 0.8, "warning": 0.2, "decision": 0.1},
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
def _parse_timestamp(value: str | None) -> datetime | None:
|
|
36
|
+
text = (value or "").strip().replace("Z", "+00:00")
|
|
37
|
+
if not text:
|
|
38
|
+
return None
|
|
39
|
+
try:
|
|
40
|
+
return datetime.fromisoformat(text)
|
|
41
|
+
except Exception:
|
|
42
|
+
return None
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
def _recentness_bonus(age_days: int, *, strong_window_days: int = 14, medium_window_days: int = 45) -> float:
|
|
46
|
+
if age_days <= 1:
|
|
47
|
+
return 0.5
|
|
48
|
+
if age_days <= strong_window_days:
|
|
49
|
+
return 0.35
|
|
50
|
+
if age_days <= medium_window_days:
|
|
51
|
+
return 0.18
|
|
52
|
+
if age_days <= 120:
|
|
53
|
+
return 0.08
|
|
54
|
+
return 0.0
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
def _compute_trust_score(record: MemoryRecord, *, age_days: int) -> float:
|
|
58
|
+
score = _STATUS_RANK.get(record.status, 1.0) / max(_STATUS_RANK.values())
|
|
59
|
+
score += max(0.0, min(1.0, record.confidence)) * 0.35
|
|
60
|
+
if record.commit:
|
|
61
|
+
score += 0.1
|
|
62
|
+
if record.evidence:
|
|
63
|
+
score += min(0.15, 0.03 * len(record.evidence))
|
|
64
|
+
if age_days:
|
|
65
|
+
score -= min(0.2, age_days / 3650)
|
|
66
|
+
return max(0.0, min(1.5, score))
|
|
67
|
+
|
|
68
|
+
|
|
69
|
+
def _compute_utility_score(record: MemoryRecord) -> float:
|
|
70
|
+
retrieval_signal = min(0.6, 0.08 * max(0, record.retrieval_count))
|
|
71
|
+
evidence_signal = min(0.4, 0.06 * max(0, record.evidence_hit_count))
|
|
72
|
+
supersede_signal = 0.08 if record.supersedes else 0.0
|
|
73
|
+
return max(0.0, min(1.25, retrieval_signal + evidence_signal + supersede_signal))
|
|
74
|
+
|
|
75
|
+
|
|
76
|
+
def _specificity_bonus(
|
|
77
|
+
record: MemoryRecord,
|
|
78
|
+
*,
|
|
79
|
+
wants_recent: bool,
|
|
80
|
+
file_path: str | None,
|
|
81
|
+
file_score: float,
|
|
82
|
+
overlap: float,
|
|
83
|
+
) -> float:
|
|
84
|
+
bonus = 0.0
|
|
85
|
+
file_count = len([path for path in record.files if str(path).strip()])
|
|
86
|
+
evidence_count = len([item for item in record.evidence if str(item).strip()])
|
|
87
|
+
|
|
88
|
+
if file_path and file_score >= 1.0:
|
|
89
|
+
if file_count <= 1:
|
|
90
|
+
bonus += 0.45
|
|
91
|
+
elif file_count <= 3:
|
|
92
|
+
bonus += 0.22
|
|
93
|
+
elif file_count >= 6:
|
|
94
|
+
bonus -= 0.15
|
|
95
|
+
|
|
96
|
+
if wants_recent and overlap >= 0.25:
|
|
97
|
+
if file_count <= 2:
|
|
98
|
+
bonus += 0.2
|
|
99
|
+
elif file_count >= 6:
|
|
100
|
+
bonus -= 0.12
|
|
101
|
+
if evidence_count <= 2:
|
|
102
|
+
bonus += 0.12
|
|
103
|
+
elif evidence_count >= 6:
|
|
104
|
+
bonus -= 0.08
|
|
105
|
+
|
|
106
|
+
return bonus
|