agent-memory-engine 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- agent_memory/__init__.py +33 -0
- agent_memory/cli.py +142 -0
- agent_memory/client.py +355 -0
- agent_memory/config.py +28 -0
- agent_memory/controller/__init__.py +15 -0
- agent_memory/controller/conflict.py +95 -0
- agent_memory/controller/consolidation.py +136 -0
- agent_memory/controller/forgetting.py +29 -0
- agent_memory/controller/router.py +62 -0
- agent_memory/controller/trust.py +31 -0
- agent_memory/embedding/__init__.py +5 -0
- agent_memory/embedding/base.py +11 -0
- agent_memory/embedding/local_provider.py +38 -0
- agent_memory/embedding/openai_provider.py +11 -0
- agent_memory/extraction/__init__.py +5 -0
- agent_memory/extraction/entity_extractor.py +13 -0
- agent_memory/extraction/pipeline.py +123 -0
- agent_memory/extraction/prompts.py +40 -0
- agent_memory/governance/__init__.py +6 -0
- agent_memory/governance/audit.py +14 -0
- agent_memory/governance/export.py +72 -0
- agent_memory/governance/health.py +40 -0
- agent_memory/interfaces/__init__.py +14 -0
- agent_memory/interfaces/mcp_server.py +128 -0
- agent_memory/interfaces/rest_api.py +71 -0
- agent_memory/llm/__init__.py +5 -0
- agent_memory/llm/base.py +23 -0
- agent_memory/llm/ollama_client.py +64 -0
- agent_memory/llm/openai_client.py +94 -0
- agent_memory/models.py +149 -0
- agent_memory/storage/__init__.py +4 -0
- agent_memory/storage/base.py +59 -0
- agent_memory/storage/schema.sql +125 -0
- agent_memory/storage/sqlite_backend.py +762 -0
- agent_memory_engine-0.1.0.dist-info/METADATA +228 -0
- agent_memory_engine-0.1.0.dist-info/RECORD +39 -0
- agent_memory_engine-0.1.0.dist-info/WHEEL +4 -0
- agent_memory_engine-0.1.0.dist-info/entry_points.txt +2 -0
- agent_memory_engine-0.1.0.dist-info/licenses/LICENSE +22 -0
|
@@ -0,0 +1,136 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from collections import defaultdict
|
|
4
|
+
from dataclasses import dataclass
|
|
5
|
+
from math import sqrt
|
|
6
|
+
from typing import Any
|
|
7
|
+
|
|
8
|
+
from agent_memory.extraction.prompts import CONSOLIDATION_PROMPT
|
|
9
|
+
from agent_memory.llm.base import LLMClient
|
|
10
|
+
from agent_memory.models import MemoryDraft, MemoryItem, MemoryType
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
CONSOLIDATION_SCHEMA: dict[str, Any] = {
|
|
14
|
+
"type": "object",
|
|
15
|
+
"properties": {
|
|
16
|
+
"content": {"type": "string"},
|
|
17
|
+
"memory_type": {"type": "string", "enum": ["semantic", "episodic", "procedural"]},
|
|
18
|
+
"importance": {"type": "number"},
|
|
19
|
+
"tags": {"type": "array", "items": {"type": "string"}},
|
|
20
|
+
},
|
|
21
|
+
"required": ["content", "memory_type", "importance", "tags"],
|
|
22
|
+
"additionalProperties": False,
|
|
23
|
+
}
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
@dataclass(slots=True)
|
|
27
|
+
class ConsolidationPlanner:
|
|
28
|
+
similarity_threshold: float = 0.9
|
|
29
|
+
recency_window_days: float = 45.0
|
|
30
|
+
|
|
31
|
+
def group_by_entities(self, memories: list[MemoryItem]) -> dict[str, list[MemoryItem]]:
|
|
32
|
+
grouped: dict[str, list[MemoryItem]] = defaultdict(list)
|
|
33
|
+
for memory in memories:
|
|
34
|
+
if not memory.entity_refs:
|
|
35
|
+
continue
|
|
36
|
+
grouped[memory.entity_refs[0].lower()].append(memory)
|
|
37
|
+
return {key: value for key, value in grouped.items() if len(value) > 1}
|
|
38
|
+
|
|
39
|
+
def find_merge_groups(self, memories: list[MemoryItem]) -> list[list[MemoryItem]]:
|
|
40
|
+
groups: list[list[MemoryItem]] = []
|
|
41
|
+
for entity_memories in self.group_by_entities(memories).values():
|
|
42
|
+
ordered = sorted(entity_memories, key=lambda item: item.created_at)
|
|
43
|
+
current_group: list[MemoryItem] = []
|
|
44
|
+
for memory in ordered:
|
|
45
|
+
if not current_group:
|
|
46
|
+
current_group = [memory]
|
|
47
|
+
continue
|
|
48
|
+
if self._should_merge(current_group[-1], memory):
|
|
49
|
+
current_group.append(memory)
|
|
50
|
+
else:
|
|
51
|
+
if len(current_group) > 1:
|
|
52
|
+
groups.append(current_group)
|
|
53
|
+
current_group = [memory]
|
|
54
|
+
if len(current_group) > 1:
|
|
55
|
+
groups.append(current_group)
|
|
56
|
+
return groups
|
|
57
|
+
|
|
58
|
+
def create_merged_draft(
|
|
59
|
+
self,
|
|
60
|
+
memories: list[MemoryItem],
|
|
61
|
+
*,
|
|
62
|
+
source_id: str,
|
|
63
|
+
llm_client: LLMClient | None = None,
|
|
64
|
+
) -> MemoryDraft:
|
|
65
|
+
if llm_client is not None:
|
|
66
|
+
merged = self._create_merged_draft_with_llm(memories, source_id=source_id, llm_client=llm_client)
|
|
67
|
+
if merged is not None:
|
|
68
|
+
return merged
|
|
69
|
+
return self._create_merged_draft_heuristic(memories, source_id=source_id)
|
|
70
|
+
|
|
71
|
+
def _should_merge(self, left: MemoryItem, right: MemoryItem) -> bool:
|
|
72
|
+
age_gap_days = abs((right.created_at - left.created_at).total_seconds()) / 86400.0
|
|
73
|
+
if age_gap_days > self.recency_window_days:
|
|
74
|
+
return False
|
|
75
|
+
if not set(entity.lower() for entity in left.entity_refs) & set(entity.lower() for entity in right.entity_refs):
|
|
76
|
+
return False
|
|
77
|
+
return self._cosine_similarity(left.embedding, right.embedding) >= self.similarity_threshold
|
|
78
|
+
|
|
79
|
+
def _create_merged_draft_with_llm(
|
|
80
|
+
self,
|
|
81
|
+
memories: list[MemoryItem],
|
|
82
|
+
*,
|
|
83
|
+
source_id: str,
|
|
84
|
+
llm_client: LLMClient,
|
|
85
|
+
) -> MemoryDraft | None:
|
|
86
|
+
prompt = "Merge these overlapping memories into one durable memory:\n" + "\n".join(
|
|
87
|
+
f"- {memory.content}" for memory in memories
|
|
88
|
+
)
|
|
89
|
+
response = llm_client.generate_json(
|
|
90
|
+
prompt=prompt,
|
|
91
|
+
schema=CONSOLIDATION_SCHEMA,
|
|
92
|
+
schema_name="memory_consolidation",
|
|
93
|
+
system_prompt=CONSOLIDATION_PROMPT,
|
|
94
|
+
)
|
|
95
|
+
content = str(response.get("content", "")).strip()
|
|
96
|
+
if not content:
|
|
97
|
+
return None
|
|
98
|
+
return MemoryDraft(
|
|
99
|
+
content=content,
|
|
100
|
+
memory_type=MemoryType(str(response.get("memory_type", "semantic"))),
|
|
101
|
+
importance=float(response.get("importance", 0.7)),
|
|
102
|
+
trust_score=max(memory.trust_score for memory in memories),
|
|
103
|
+
source_id=source_id,
|
|
104
|
+
entity_refs=sorted({entity for memory in memories for entity in memory.entity_refs}),
|
|
105
|
+
tags=sorted({tag for memory in memories for tag in memory.tags} | set(response.get("tags", [])) | {"consolidated"}),
|
|
106
|
+
)
|
|
107
|
+
|
|
108
|
+
def _create_merged_draft_heuristic(self, memories: list[MemoryItem], *, source_id: str) -> MemoryDraft:
|
|
109
|
+
ordered = sorted(
|
|
110
|
+
memories,
|
|
111
|
+
key=lambda item: (item.importance, item.trust_score, item.created_at),
|
|
112
|
+
reverse=True,
|
|
113
|
+
)
|
|
114
|
+
anchor = ordered[0]
|
|
115
|
+
return MemoryDraft(
|
|
116
|
+
content=anchor.content,
|
|
117
|
+
memory_type=anchor.memory_type,
|
|
118
|
+
importance=max(memory.importance for memory in memories),
|
|
119
|
+
trust_score=max(memory.trust_score for memory in memories),
|
|
120
|
+
source_id=source_id,
|
|
121
|
+
entity_refs=sorted({entity for memory in memories for entity in memory.entity_refs}),
|
|
122
|
+
tags=sorted({tag for memory in memories for tag in memory.tags} | {"consolidated"}),
|
|
123
|
+
)
|
|
124
|
+
|
|
125
|
+
def _cosine_similarity(self, left: list[float], right: list[float]) -> float:
|
|
126
|
+
if not left or not right:
|
|
127
|
+
return 0.0
|
|
128
|
+
size = min(len(left), len(right))
|
|
129
|
+
left_trimmed = left[:size]
|
|
130
|
+
right_trimmed = right[:size]
|
|
131
|
+
numerator = sum(a * b for a, b in zip(left_trimmed, right_trimmed, strict=False))
|
|
132
|
+
left_norm = sqrt(sum(a * a for a in left_trimmed))
|
|
133
|
+
right_norm = sqrt(sum(b * b for b in right_trimmed))
|
|
134
|
+
if left_norm == 0 or right_norm == 0:
|
|
135
|
+
return 0.0
|
|
136
|
+
return numerator / (left_norm * right_norm)
|
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from dataclasses import dataclass
|
|
4
|
+
from math import exp, log
|
|
5
|
+
|
|
6
|
+
from agent_memory.models import MemoryItem, MemoryLayer
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
@dataclass(slots=True)
|
|
10
|
+
class ForgettingPolicy:
|
|
11
|
+
short_term_beta: float = 1.2
|
|
12
|
+
long_term_beta: float = 0.8
|
|
13
|
+
promote_threshold: float = 0.7
|
|
14
|
+
demote_threshold: float = 0.3
|
|
15
|
+
|
|
16
|
+
def effective_strength(self, memory: MemoryItem, age_days: float) -> float:
|
|
17
|
+
access_boost = 1 + log(1 + max(memory.access_count, 0))
|
|
18
|
+
beta = self.long_term_beta if memory.layer is MemoryLayer.LONG_TERM else self.short_term_beta
|
|
19
|
+
temporal_decay = exp(-memory.decay_rate * (age_days**beta))
|
|
20
|
+
return memory.importance * memory.trust_score * access_boost * temporal_decay
|
|
21
|
+
|
|
22
|
+
def next_layer(self, memory: MemoryItem, age_days: float) -> MemoryLayer:
|
|
23
|
+
strength = self.effective_strength(memory, age_days=age_days)
|
|
24
|
+
if strength >= self.promote_threshold:
|
|
25
|
+
return MemoryLayer.LONG_TERM
|
|
26
|
+
if strength <= self.demote_threshold:
|
|
27
|
+
return MemoryLayer.SHORT_TERM
|
|
28
|
+
return memory.layer
|
|
29
|
+
|
|
@@ -0,0 +1,62 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from collections import defaultdict
|
|
4
|
+
from dataclasses import dataclass
|
|
5
|
+
import re
|
|
6
|
+
|
|
7
|
+
from agent_memory.models import QueryIntent, RetrievalPlan
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
INTENT_PATTERNS: list[tuple[QueryIntent, tuple[str, ...]]] = [
|
|
11
|
+
(QueryIntent.CAUSAL, ("为什么", "为何", "导致", "cause", "caused", "why")),
|
|
12
|
+
(QueryIntent.TEMPORAL, ("上周", "最近", "之前", "刚才", "when", "recent", "before")),
|
|
13
|
+
(QueryIntent.PROCEDURAL, ("如何", "怎么", "步骤", "how to", "how do", "step")),
|
|
14
|
+
(QueryIntent.EXPLORATORY, ("关于", "all about", "everything about", "related to")),
|
|
15
|
+
(QueryIntent.FACTUAL, ("什么是", "谁是", "what is", "who is", "which")),
|
|
16
|
+
]
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
@dataclass(slots=True)
|
|
20
|
+
class IntentRouter:
|
|
21
|
+
def classify(self, query: str) -> QueryIntent:
|
|
22
|
+
normalized = query.lower()
|
|
23
|
+
for intent, patterns in INTENT_PATTERNS:
|
|
24
|
+
if any(pattern in normalized for pattern in patterns):
|
|
25
|
+
return intent
|
|
26
|
+
return QueryIntent.GENERAL
|
|
27
|
+
|
|
28
|
+
def plan(self, query: str) -> RetrievalPlan:
|
|
29
|
+
intent = self.classify(query)
|
|
30
|
+
if intent is QueryIntent.FACTUAL:
|
|
31
|
+
return RetrievalPlan(intent=intent, strategies=["semantic", "entity", "full_text"])
|
|
32
|
+
if intent is QueryIntent.TEMPORAL:
|
|
33
|
+
return RetrievalPlan(
|
|
34
|
+
intent=intent,
|
|
35
|
+
strategies=["semantic", "full_text"],
|
|
36
|
+
filters={"sort": "recency"},
|
|
37
|
+
)
|
|
38
|
+
if intent is QueryIntent.CAUSAL:
|
|
39
|
+
return RetrievalPlan(intent=intent, strategies=["semantic", "full_text", "causal_trace"])
|
|
40
|
+
if intent is QueryIntent.EXPLORATORY:
|
|
41
|
+
return RetrievalPlan(intent=intent, strategies=["entity", "semantic", "full_text"])
|
|
42
|
+
if intent is QueryIntent.PROCEDURAL:
|
|
43
|
+
return RetrievalPlan(
|
|
44
|
+
intent=intent,
|
|
45
|
+
strategies=["semantic", "full_text"],
|
|
46
|
+
filters={"memory_type": "procedural"},
|
|
47
|
+
)
|
|
48
|
+
return RetrievalPlan(intent=intent, strategies=["semantic", "full_text"])
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
def reciprocal_rank_fusion(rankings: dict[str, list[str]], k: int = 60) -> dict[str, float]:
|
|
52
|
+
scores: dict[str, float] = defaultdict(float)
|
|
53
|
+
for ranked_ids in rankings.values():
|
|
54
|
+
for rank, item_id in enumerate(ranked_ids, start=1):
|
|
55
|
+
scores[item_id] += 1.0 / (k + rank)
|
|
56
|
+
return dict(sorted(scores.items(), key=lambda item: item[1], reverse=True))
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
def strip_intent_markers(query: str) -> str:
|
|
60
|
+
pattern = re.compile(r"(为什么|为何|导致|what is|who is|how to|how do|all about|everything about)", re.IGNORECASE)
|
|
61
|
+
return pattern.sub(" ", query).strip()
|
|
62
|
+
|
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from dataclasses import dataclass
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
@dataclass(slots=True)
|
|
7
|
+
class TrustScorer:
|
|
8
|
+
recency_weight: float = 0.15
|
|
9
|
+
corroboration_weight: float = 0.15
|
|
10
|
+
contradiction_weight: float = 0.2
|
|
11
|
+
source_weight: float = 0.5
|
|
12
|
+
|
|
13
|
+
def score(
|
|
14
|
+
self,
|
|
15
|
+
*,
|
|
16
|
+
source_reliability: float,
|
|
17
|
+
corroboration_count: int = 0,
|
|
18
|
+
contradiction_count: int = 0,
|
|
19
|
+
age_days: float = 0.0,
|
|
20
|
+
) -> float:
|
|
21
|
+
recency_bonus = max(0.0, 1.0 - min(age_days, 90.0) / 90.0)
|
|
22
|
+
corroboration_bonus = min(corroboration_count, 5) / 5.0
|
|
23
|
+
contradiction_penalty = min(contradiction_count, 5) / 5.0
|
|
24
|
+
raw_score = (
|
|
25
|
+
source_reliability * self.source_weight
|
|
26
|
+
+ recency_bonus * self.recency_weight
|
|
27
|
+
+ corroboration_bonus * self.corroboration_weight
|
|
28
|
+
- contradiction_penalty * self.contradiction_weight
|
|
29
|
+
)
|
|
30
|
+
return max(0.0, min(1.0, raw_score))
|
|
31
|
+
|
|
@@ -0,0 +1,38 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import hashlib
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
class LocalEmbeddingProvider:
|
|
7
|
+
def __init__(self, dimension: int = 384, model_name: str = "sentence-transformers/all-MiniLM-L6-v2") -> None:
|
|
8
|
+
self.dimension = dimension
|
|
9
|
+
self.model_name = model_name
|
|
10
|
+
self._model = None
|
|
11
|
+
|
|
12
|
+
def embed(self, texts: list[str]) -> list[list[float]]:
|
|
13
|
+
model = self._load_model()
|
|
14
|
+
if model is None:
|
|
15
|
+
return [self._hash_embed(text) for text in texts]
|
|
16
|
+
return [list(vector) for vector in model.encode(texts, normalize_embeddings=True)]
|
|
17
|
+
|
|
18
|
+
def _load_model(self):
|
|
19
|
+
if self._model is not None:
|
|
20
|
+
return self._model
|
|
21
|
+
try:
|
|
22
|
+
from sentence_transformers import SentenceTransformer
|
|
23
|
+
except ImportError:
|
|
24
|
+
return None
|
|
25
|
+
self._model = SentenceTransformer(self.model_name)
|
|
26
|
+
return self._model
|
|
27
|
+
|
|
28
|
+
def _hash_embed(self, text: str) -> list[float]:
|
|
29
|
+
digest = hashlib.sha256(text.encode("utf-8")).digest()
|
|
30
|
+
values: list[float] = []
|
|
31
|
+
while len(values) < self.dimension:
|
|
32
|
+
for byte in digest:
|
|
33
|
+
values.append((byte / 255.0) * 2 - 1)
|
|
34
|
+
if len(values) == self.dimension:
|
|
35
|
+
break
|
|
36
|
+
digest = hashlib.sha256(digest).digest()
|
|
37
|
+
return values
|
|
38
|
+
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
class OpenAIEmbeddingProvider:
|
|
5
|
+
def __init__(self, model: str = "text-embedding-3-small", dimension: int = 1536) -> None:
|
|
6
|
+
self.model = model
|
|
7
|
+
self.dimension = dimension
|
|
8
|
+
|
|
9
|
+
def embed(self, texts: list[str]) -> list[list[float]]:
|
|
10
|
+
raise RuntimeError("Install the OpenAI SDK and wire API calls before using OpenAIEmbeddingProvider.")
|
|
11
|
+
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import re
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
class EntityExtractor:
|
|
7
|
+
def extract(self, text: str) -> list[str]:
|
|
8
|
+
entities = set()
|
|
9
|
+
entities.update(re.findall(r"#([\w-]+)", text))
|
|
10
|
+
entities.update(re.findall(r"\b[A-Z][A-Za-z0-9_-]{1,}\b", text))
|
|
11
|
+
entities.update(re.findall(r"[\u4e00-\u9fff]{2,8}", text))
|
|
12
|
+
return sorted(entity.strip() for entity in entities if entity.strip())
|
|
13
|
+
|
|
@@ -0,0 +1,123 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import re
|
|
4
|
+
from typing import Any
|
|
5
|
+
|
|
6
|
+
from agent_memory.extraction.entity_extractor import EntityExtractor
|
|
7
|
+
from agent_memory.extraction.prompts import EXTRACT_FACTS_PROMPT
|
|
8
|
+
from agent_memory.llm.base import LLMClient
|
|
9
|
+
from agent_memory.models import ConversationTurn, MemoryDraft, MemoryType
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
PREFERENCE_MARKERS = ("喜欢", "偏好", "需要", "负责", "正在做", "prefer", "need", "working on", "building", "goal", "目标", "because", "因为")
|
|
13
|
+
IGNORE_PATTERNS = ("谢谢", "收到", "好的", "ok", "okay", "got it", "明白", "哈哈", "lol")
|
|
14
|
+
MEMORY_EXTRACTION_SCHEMA: dict[str, Any] = {
|
|
15
|
+
"type": "object",
|
|
16
|
+
"properties": {
|
|
17
|
+
"memories": {
|
|
18
|
+
"type": "array",
|
|
19
|
+
"items": {
|
|
20
|
+
"type": "object",
|
|
21
|
+
"properties": {
|
|
22
|
+
"content": {"type": "string"},
|
|
23
|
+
"memory_type": {"type": "string", "enum": ["semantic", "episodic", "procedural"]},
|
|
24
|
+
"importance": {"type": "number"},
|
|
25
|
+
"trust_score": {"type": "number"},
|
|
26
|
+
"tags": {"type": "array", "items": {"type": "string"}},
|
|
27
|
+
},
|
|
28
|
+
"required": ["content", "memory_type", "importance", "trust_score", "tags"],
|
|
29
|
+
"additionalProperties": False,
|
|
30
|
+
},
|
|
31
|
+
}
|
|
32
|
+
},
|
|
33
|
+
"required": ["memories"],
|
|
34
|
+
"additionalProperties": False,
|
|
35
|
+
}
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
class ConversationMemoryPipeline:
|
|
39
|
+
def __init__(self, entity_extractor: EntityExtractor | None = None, llm_client: LLMClient | None = None) -> None:
|
|
40
|
+
self.entity_extractor = entity_extractor or EntityExtractor()
|
|
41
|
+
self.llm_client = llm_client
|
|
42
|
+
|
|
43
|
+
def extract(self, turns: list[ConversationTurn], source_id: str) -> list[MemoryDraft]:
|
|
44
|
+
if self.llm_client is not None:
|
|
45
|
+
try:
|
|
46
|
+
drafts = self._extract_with_llm(turns=turns, source_id=source_id)
|
|
47
|
+
if drafts:
|
|
48
|
+
return drafts
|
|
49
|
+
except Exception:
|
|
50
|
+
pass
|
|
51
|
+
return self._extract_heuristically(turns=turns, source_id=source_id)
|
|
52
|
+
|
|
53
|
+
def _extract_with_llm(self, turns: list[ConversationTurn], source_id: str) -> list[MemoryDraft]:
|
|
54
|
+
transcript = "\n".join(f"{turn.role}: {turn.content}" for turn in turns)
|
|
55
|
+
response = self.llm_client.generate_json(
|
|
56
|
+
prompt=f"Conversation:\n{transcript}",
|
|
57
|
+
schema=MEMORY_EXTRACTION_SCHEMA,
|
|
58
|
+
schema_name="memory_extraction",
|
|
59
|
+
system_prompt=EXTRACT_FACTS_PROMPT,
|
|
60
|
+
)
|
|
61
|
+
drafts: list[MemoryDraft] = []
|
|
62
|
+
for item in response.get("memories", []):
|
|
63
|
+
content = str(item.get("content", "")).strip()
|
|
64
|
+
if not content:
|
|
65
|
+
continue
|
|
66
|
+
drafts.append(
|
|
67
|
+
MemoryDraft(
|
|
68
|
+
content=content,
|
|
69
|
+
memory_type=MemoryType(str(item.get("memory_type", "semantic"))),
|
|
70
|
+
importance=float(item.get("importance", 0.5)),
|
|
71
|
+
trust_score=float(item.get("trust_score", 0.7)),
|
|
72
|
+
source_id=source_id,
|
|
73
|
+
entity_refs=self.entity_extractor.extract(content),
|
|
74
|
+
tags=list(item.get("tags", [])),
|
|
75
|
+
)
|
|
76
|
+
)
|
|
77
|
+
return drafts
|
|
78
|
+
|
|
79
|
+
def _extract_heuristically(self, turns: list[ConversationTurn], source_id: str) -> list[MemoryDraft]:
|
|
80
|
+
drafts: list[MemoryDraft] = []
|
|
81
|
+
seen_contents: set[str] = set()
|
|
82
|
+
for turn in turns:
|
|
83
|
+
if turn.role != "user":
|
|
84
|
+
continue
|
|
85
|
+
sentences = [segment.strip() for segment in re.split(r"[。!?!?\.]", turn.content) if segment.strip()]
|
|
86
|
+
for sentence in sentences:
|
|
87
|
+
if len(sentence) < 6:
|
|
88
|
+
continue
|
|
89
|
+
normalized = sentence.lower()
|
|
90
|
+
if any(pattern in normalized for pattern in IGNORE_PATTERNS):
|
|
91
|
+
continue
|
|
92
|
+
score = self._sentence_score(normalized)
|
|
93
|
+
if score < 1:
|
|
94
|
+
continue
|
|
95
|
+
content = sentence.strip()
|
|
96
|
+
if content in seen_contents:
|
|
97
|
+
continue
|
|
98
|
+
seen_contents.add(content)
|
|
99
|
+
drafts.append(
|
|
100
|
+
MemoryDraft(
|
|
101
|
+
content=content,
|
|
102
|
+
memory_type=MemoryType.PROCEDURAL if self._looks_procedural(normalized) else MemoryType.SEMANTIC,
|
|
103
|
+
importance=min(1.0, 0.4 + 0.15 * score),
|
|
104
|
+
trust_score=0.75,
|
|
105
|
+
source_id=source_id,
|
|
106
|
+
entity_refs=self.entity_extractor.extract(content),
|
|
107
|
+
tags=["conversation", turn.role, "heuristic"],
|
|
108
|
+
)
|
|
109
|
+
)
|
|
110
|
+
return drafts
|
|
111
|
+
|
|
112
|
+
def _sentence_score(self, normalized_sentence: str) -> int:
|
|
113
|
+
score = 0
|
|
114
|
+
if any(marker in normalized_sentence for marker in PREFERENCE_MARKERS):
|
|
115
|
+
score += 1
|
|
116
|
+
if any(keyword in normalized_sentence for keyword in ("always", "usually", "habit", "习惯", "偏好", "因为", "reason")):
|
|
117
|
+
score += 1
|
|
118
|
+
if len(normalized_sentence) > 32:
|
|
119
|
+
score += 1
|
|
120
|
+
return score
|
|
121
|
+
|
|
122
|
+
def _looks_procedural(self, normalized_sentence: str) -> bool:
|
|
123
|
+
return any(marker in normalized_sentence for marker in ("how to", "步骤", "流程", "先", "然后", "最后"))
|
|
@@ -0,0 +1,40 @@
|
|
|
1
|
+
EXTRACT_FACTS_PROMPT = """
|
|
2
|
+
You extract durable long-term memories from conversations for an agent memory engine.
|
|
3
|
+
|
|
4
|
+
Only keep information that is likely to matter across future sessions.
|
|
5
|
+
Prioritize:
|
|
6
|
+
- stable preferences
|
|
7
|
+
- identity or role information
|
|
8
|
+
- long-running goals
|
|
9
|
+
- durable procedural knowledge
|
|
10
|
+
- causal explanations for important decisions
|
|
11
|
+
|
|
12
|
+
Ignore:
|
|
13
|
+
- chit-chat
|
|
14
|
+
- acknowledgements
|
|
15
|
+
- one-off small talk
|
|
16
|
+
- transient details unless they explain an important choice
|
|
17
|
+
|
|
18
|
+
Return JSON only.
|
|
19
|
+
"""
|
|
20
|
+
|
|
21
|
+
CONFLICT_JUDGE_PROMPT = """
|
|
22
|
+
You compare two candidate memories and decide whether they:
|
|
23
|
+
- contradict
|
|
24
|
+
- supersede
|
|
25
|
+
- support each other
|
|
26
|
+
- should both be kept
|
|
27
|
+
Return a short rationale and a resolution label.
|
|
28
|
+
"""
|
|
29
|
+
|
|
30
|
+
CONSOLIDATION_PROMPT = """
|
|
31
|
+
You merge semantically overlapping memories into one durable summary.
|
|
32
|
+
|
|
33
|
+
Produce one canonical memory that:
|
|
34
|
+
- preserves the most useful stable facts
|
|
35
|
+
- keeps causal rationale when important
|
|
36
|
+
- avoids duplicates
|
|
37
|
+
- uses concise wording
|
|
38
|
+
|
|
39
|
+
Return JSON only.
|
|
40
|
+
"""
|
|
@@ -0,0 +1,6 @@
|
|
|
1
|
+
from agent_memory.governance.audit import AuditLogReader
|
|
2
|
+
from agent_memory.governance.export import MemoryExporter, MemoryImporter
|
|
3
|
+
from agent_memory.governance.health import MemoryHealthMonitor
|
|
4
|
+
|
|
5
|
+
__all__ = ["AuditLogReader", "MemoryExporter", "MemoryHealthMonitor", "MemoryImporter"]
|
|
6
|
+
|
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from dataclasses import dataclass
|
|
4
|
+
|
|
5
|
+
from agent_memory.storage.sqlite_backend import SQLiteBackend
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
@dataclass(slots=True)
|
|
9
|
+
class AuditLogReader:
|
|
10
|
+
backend: SQLiteBackend
|
|
11
|
+
|
|
12
|
+
def recent(self, limit: int = 50) -> list[dict[str, object]]:
|
|
13
|
+
return self.backend.get_audit_events(limit=limit)
|
|
14
|
+
|
|
@@ -0,0 +1,72 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from dataclasses import asdict, dataclass
|
|
4
|
+
from datetime import datetime
|
|
5
|
+
from enum import Enum
|
|
6
|
+
import json
|
|
7
|
+
from pathlib import Path
|
|
8
|
+
|
|
9
|
+
from agent_memory.models import MemoryItem, MemoryLayer, MemoryType, RelationEdge, RelationType
|
|
10
|
+
from agent_memory.storage.sqlite_backend import SQLiteBackend
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
def _serialize_value(value: object) -> object:
|
|
14
|
+
if isinstance(value, datetime):
|
|
15
|
+
return value.isoformat()
|
|
16
|
+
if isinstance(value, Enum):
|
|
17
|
+
return value.value
|
|
18
|
+
if isinstance(value, list):
|
|
19
|
+
return [_serialize_value(item) for item in value]
|
|
20
|
+
return value
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
@dataclass(slots=True)
|
|
24
|
+
class MemoryExporter:
|
|
25
|
+
backend: SQLiteBackend
|
|
26
|
+
|
|
27
|
+
def export_jsonl(self, path: str) -> int:
|
|
28
|
+
destination = Path(path)
|
|
29
|
+
count = 0
|
|
30
|
+
with destination.open("w", encoding="utf-8") as handle:
|
|
31
|
+
for memory in self.backend.list_memories(include_deleted=True):
|
|
32
|
+
payload = {key: _serialize_value(value) for key, value in asdict(memory).items()}
|
|
33
|
+
handle.write(json.dumps({"type": "memory", "payload": payload}, ensure_ascii=False) + "\n")
|
|
34
|
+
count += 1
|
|
35
|
+
for relation in self.backend.list_relations():
|
|
36
|
+
payload = {key: _serialize_value(value) for key, value in asdict(relation).items()}
|
|
37
|
+
handle.write(json.dumps({"type": "relation", "payload": payload}, ensure_ascii=False) + "\n")
|
|
38
|
+
return count
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
@dataclass(slots=True)
|
|
42
|
+
class MemoryImporter:
|
|
43
|
+
backend: SQLiteBackend
|
|
44
|
+
|
|
45
|
+
def import_jsonl(self, path: str) -> int:
|
|
46
|
+
source = Path(path)
|
|
47
|
+
count = 0
|
|
48
|
+
memories: list[MemoryItem] = []
|
|
49
|
+
relations: list[RelationEdge] = []
|
|
50
|
+
with source.open("r", encoding="utf-8") as handle:
|
|
51
|
+
for line in handle:
|
|
52
|
+
entry = json.loads(line)
|
|
53
|
+
if entry["type"] == "memory":
|
|
54
|
+
payload = entry["payload"]
|
|
55
|
+
for key in ("created_at", "last_accessed", "valid_from", "valid_until", "deleted_at"):
|
|
56
|
+
if payload.get(key):
|
|
57
|
+
payload[key] = datetime.fromisoformat(payload[key])
|
|
58
|
+
payload["memory_type"] = MemoryType(payload["memory_type"])
|
|
59
|
+
payload["layer"] = MemoryLayer(payload["layer"])
|
|
60
|
+
memories.append(MemoryItem(**payload))
|
|
61
|
+
elif entry["type"] == "relation":
|
|
62
|
+
payload = entry["payload"]
|
|
63
|
+
payload["created_at"] = datetime.fromisoformat(payload["created_at"])
|
|
64
|
+
payload["relation_type"] = RelationType(payload["relation_type"])
|
|
65
|
+
relations.append(RelationEdge(**payload))
|
|
66
|
+
for memory in sorted(memories, key=lambda item: (item.causal_parent_id is not None, item.created_at)):
|
|
67
|
+
if self.backend.get_memory(memory.id) is None:
|
|
68
|
+
self.backend.add_memory(memory)
|
|
69
|
+
count += 1
|
|
70
|
+
for edge in relations:
|
|
71
|
+
self.backend.add_relation(edge)
|
|
72
|
+
return count
|
|
@@ -0,0 +1,40 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from dataclasses import dataclass
|
|
4
|
+
from pathlib import Path
|
|
5
|
+
|
|
6
|
+
from agent_memory.models import HealthReport
|
|
7
|
+
from agent_memory.storage.sqlite_backend import SQLiteBackend
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
@dataclass(slots=True)
|
|
11
|
+
class MemoryHealthMonitor:
|
|
12
|
+
backend: SQLiteBackend
|
|
13
|
+
|
|
14
|
+
def generate(self) -> HealthReport:
|
|
15
|
+
snapshot = self.backend.health_snapshot()
|
|
16
|
+
suggestions: list[str] = []
|
|
17
|
+
if snapshot["stale_ratio"] >= 0.3:
|
|
18
|
+
suggestions.append("30%+ memories are stale; run a forgetting cycle.")
|
|
19
|
+
if snapshot["orphan_ratio"] >= 0.2:
|
|
20
|
+
suggestions.append("Orphan ratio is high; consolidate or attach relation edges.")
|
|
21
|
+
if snapshot["unresolved_conflicts"] > 0:
|
|
22
|
+
suggestions.append("Resolve contradiction edges to improve trust calibration.")
|
|
23
|
+
|
|
24
|
+
size = 0
|
|
25
|
+
if self.backend.database_path != ":memory:":
|
|
26
|
+
path = Path(self.backend.database_path)
|
|
27
|
+
if path.exists():
|
|
28
|
+
size = path.stat().st_size
|
|
29
|
+
|
|
30
|
+
return HealthReport(
|
|
31
|
+
total_memories=int(snapshot["total_memories"]),
|
|
32
|
+
stale_ratio=float(snapshot["stale_ratio"]),
|
|
33
|
+
orphan_ratio=float(snapshot["orphan_ratio"]),
|
|
34
|
+
unresolved_conflicts=int(snapshot["unresolved_conflicts"]),
|
|
35
|
+
average_trust_score=float(snapshot["average_trust_score"]),
|
|
36
|
+
database_size_bytes=size,
|
|
37
|
+
audit_events=int(snapshot["audit_events"]),
|
|
38
|
+
suggestions=suggestions,
|
|
39
|
+
)
|
|
40
|
+
|
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from importlib import import_module
|
|
4
|
+
from typing import Any
|
|
5
|
+
|
|
6
|
+
__all__ = ["create_mcp_server", "create_rest_app"]
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
def __getattr__(name: str) -> Any:
|
|
10
|
+
if name == "create_mcp_server":
|
|
11
|
+
return import_module("agent_memory.interfaces.mcp_server").create_mcp_server
|
|
12
|
+
if name == "create_rest_app":
|
|
13
|
+
return import_module("agent_memory.interfaces.rest_api").create_rest_app
|
|
14
|
+
raise AttributeError(f"module {__name__!r} has no attribute {name!r}")
|