memorytrace 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- engram/__init__.py +8 -0
- engram/__main__.py +6 -0
- engram/cli/__init__.py +1 -0
- engram/cli/app.py +291 -0
- engram/cli/formatters.py +90 -0
- engram/cli/simple.py +267 -0
- engram/config.py +72 -0
- engram/engine.py +612 -0
- engram/exceptions.py +41 -0
- engram/extraction/__init__.py +6 -0
- engram/extraction/base.py +20 -0
- engram/extraction/llm_extractor.py +197 -0
- engram/extraction/ner/__init__.py +7 -0
- engram/extraction/ner/cjk.py +63 -0
- engram/extraction/ner/english.py +109 -0
- engram/extraction/ner/korean.py +106 -0
- engram/extraction/regex_extractor.py +188 -0
- engram/integrations/__init__.py +1 -0
- engram/integrations/mcp_server.py +213 -0
- engram/integrations/sdk.py +194 -0
- engram/models/__init__.py +19 -0
- engram/models/entity.py +72 -0
- engram/models/fact.py +58 -0
- engram/models/quality.py +61 -0
- engram/models/relation.py +26 -0
- engram/models/search.py +96 -0
- engram/models/session.py +53 -0
- engram/models/source.py +73 -0
- engram/quality/__init__.py +8 -0
- engram/quality/confidence.py +38 -0
- engram/quality/conflict.py +79 -0
- engram/quality/decay.py +28 -0
- engram/quality/gate.py +120 -0
- engram/quality/pii.py +80 -0
- engram/search/__init__.py +13 -0
- engram/search/base.py +20 -0
- engram/search/fts5_search.py +210 -0
- engram/search/hybrid.py +99 -0
- engram/search/semantic.py +186 -0
- engram/search/tokenizer.py +85 -0
- engram/session/__init__.py +6 -0
- engram/session/context.py +87 -0
- engram/session/manager.py +152 -0
- engram/session/working_memory.py +57 -0
- engram/storage/__init__.py +6 -0
- engram/storage/base.py +63 -0
- engram/storage/markdown_export.py +144 -0
- engram/storage/migrations.py +30 -0
- engram/storage/sqlite_store.py +615 -0
- memorytrace-0.1.0.dist-info/METADATA +138 -0
- memorytrace-0.1.0.dist-info/RECORD +54 -0
- memorytrace-0.1.0.dist-info/WHEEL +4 -0
- memorytrace-0.1.0.dist-info/entry_points.txt +3 -0
- memorytrace-0.1.0.dist-info/licenses/LICENSE +21 -0
engram/config.py
ADDED
|
@@ -0,0 +1,72 @@
|
|
|
1
|
+
"""Engram configuration."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from dataclasses import dataclass, field
|
|
6
|
+
from pathlib import Path
|
|
7
|
+
from typing import Optional
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
@dataclass
|
|
11
|
+
class EngramConfig:
|
|
12
|
+
"""Central configuration for the Engram memory system."""
|
|
13
|
+
|
|
14
|
+
# Storage
|
|
15
|
+
base_dir: Path = field(default_factory=lambda: Path.home() / ".engram")
|
|
16
|
+
db_name: str = "memory.db"
|
|
17
|
+
enable_markdown_export: bool = True
|
|
18
|
+
export_dir_name: str = "readable"
|
|
19
|
+
|
|
20
|
+
# Search
|
|
21
|
+
search_backend: str = "fts5" # "fts5" | "semantic" | "hybrid"
|
|
22
|
+
max_search_results: int = 20
|
|
23
|
+
default_token_budget: int = 500
|
|
24
|
+
fts5_tokenizer: str = "unicode61 remove_diacritics 2"
|
|
25
|
+
|
|
26
|
+
# Extraction
|
|
27
|
+
extractor_backend: str = "regex" # "regex" | "llm"
|
|
28
|
+
llm_provider: Optional[str] = None # "anthropic" | "openai"
|
|
29
|
+
llm_model: Optional[str] = None
|
|
30
|
+
|
|
31
|
+
# Quality
|
|
32
|
+
min_confidence: float = 0.3
|
|
33
|
+
auto_resolve_threshold: float = 0.3 # trust diff for auto-supersede
|
|
34
|
+
enable_pii_detection: bool = True
|
|
35
|
+
|
|
36
|
+
# Session
|
|
37
|
+
default_agent_id: str = "default"
|
|
38
|
+
|
|
39
|
+
# Maintenance
|
|
40
|
+
thin_page_threshold: int = 300 # bytes
|
|
41
|
+
decay_days: int = 90
|
|
42
|
+
compact_days: int = 180
|
|
43
|
+
|
|
44
|
+
@property
|
|
45
|
+
def db_path(self) -> Path:
|
|
46
|
+
return self.base_dir / self.db_name
|
|
47
|
+
|
|
48
|
+
@property
|
|
49
|
+
def export_dir(self) -> Path:
|
|
50
|
+
return self.base_dir / self.export_dir_name
|
|
51
|
+
|
|
52
|
+
def ensure_dirs(self) -> None:
|
|
53
|
+
"""Create required directories if they don't exist."""
|
|
54
|
+
self.base_dir.mkdir(parents=True, exist_ok=True)
|
|
55
|
+
if self.enable_markdown_export:
|
|
56
|
+
self.export_dir.mkdir(parents=True, exist_ok=True)
|
|
57
|
+
|
|
58
|
+
@classmethod
|
|
59
|
+
def from_dict(cls, data: dict) -> EngramConfig:
|
|
60
|
+
config = cls()
|
|
61
|
+
if "base_dir" in data:
|
|
62
|
+
config.base_dir = Path(data["base_dir"]).expanduser().resolve()
|
|
63
|
+
for key in (
|
|
64
|
+
"db_name", "enable_markdown_export", "export_dir_name",
|
|
65
|
+
"search_backend", "max_search_results", "default_token_budget",
|
|
66
|
+
"extractor_backend", "llm_provider", "llm_model",
|
|
67
|
+
"min_confidence", "auto_resolve_threshold", "enable_pii_detection",
|
|
68
|
+
"default_agent_id", "thin_page_threshold", "decay_days", "compact_days",
|
|
69
|
+
):
|
|
70
|
+
if key in data:
|
|
71
|
+
setattr(config, key, data[key])
|
|
72
|
+
return config
|
engram/engine.py
ADDED
|
@@ -0,0 +1,612 @@
|
|
|
1
|
+
"""MemoryEngine — the main orchestrator that composes all modules.
|
|
2
|
+
|
|
3
|
+
This is the single public entry point for the Engram memory system.
|
|
4
|
+
No print() calls — all output is via structured return values.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from __future__ import annotations
|
|
8
|
+
|
|
9
|
+
import copy
|
|
10
|
+
import json
|
|
11
|
+
from dataclasses import dataclass, field
|
|
12
|
+
from typing import Optional
|
|
13
|
+
|
|
14
|
+
from engram.config import EngramConfig
|
|
15
|
+
from engram.exceptions import EntityAlreadyExistsError, EntityNotFoundError
|
|
16
|
+
from engram.models.entity import Entity, Tier
|
|
17
|
+
from engram.models.fact import Fact, FactStatus
|
|
18
|
+
from engram.models.quality import Action, ValidationResult
|
|
19
|
+
from engram.models.relation import Relation
|
|
20
|
+
from engram.models.search import SearchOptions, SearchResult
|
|
21
|
+
from engram.models.session import Session
|
|
22
|
+
from engram.models.source import Source, SourceType
|
|
23
|
+
from engram.quality.gate import QualityGate
|
|
24
|
+
from engram.search.fts5_search import FTS5Search
|
|
25
|
+
from engram.session.context import build_session_context
|
|
26
|
+
from engram.session.manager import SessionManager
|
|
27
|
+
from engram.session.working_memory import WorkingMemory
|
|
28
|
+
from engram.storage.sqlite_store import SQLiteStorage
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
@dataclass
|
|
32
|
+
class StoreResult:
|
|
33
|
+
"""Result of a store operation."""
|
|
34
|
+
entities_created: list[Entity] = field(default_factory=list)
|
|
35
|
+
entities_updated: list[Entity] = field(default_factory=list)
|
|
36
|
+
facts_added: list[Fact] = field(default_factory=list)
|
|
37
|
+
facts_quarantined: list[Fact] = field(default_factory=list)
|
|
38
|
+
facts_conflicted: list[Fact] = field(default_factory=list)
|
|
39
|
+
relations_added: list[Relation] = field(default_factory=list)
|
|
40
|
+
validations: list[ValidationResult] = field(default_factory=list)
|
|
41
|
+
|
|
42
|
+
def to_dict(self) -> dict:
|
|
43
|
+
return {
|
|
44
|
+
"entities_created": [{"name": e.name, "type": e.entity_type} for e in self.entities_created],
|
|
45
|
+
"entities_updated": [{"name": e.name, "type": e.entity_type} for e in self.entities_updated],
|
|
46
|
+
"facts_added": [
|
|
47
|
+
{"subject": f.subject, "predicate": f.predicate, "object": f.object}
|
|
48
|
+
for f in self.facts_added
|
|
49
|
+
],
|
|
50
|
+
"facts_quarantined": len(self.facts_quarantined),
|
|
51
|
+
"facts_conflicted": len(self.facts_conflicted),
|
|
52
|
+
"relations_added": [
|
|
53
|
+
{"type": r.relation_type} for r in self.relations_added
|
|
54
|
+
],
|
|
55
|
+
}
|
|
56
|
+
|
|
57
|
+
def to_json(self) -> str:
|
|
58
|
+
return json.dumps(self.to_dict(), ensure_ascii=False, indent=2)
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
class MemoryEngine:
|
|
62
|
+
"""Main entry point. Composes storage, search, extraction, quality, sessions.
|
|
63
|
+
|
|
64
|
+
Design principles:
|
|
65
|
+
- No print() — all output via structured return values
|
|
66
|
+
- All writes go through quality gate
|
|
67
|
+
- Session tracking for every operation
|
|
68
|
+
"""
|
|
69
|
+
|
|
70
|
+
def __init__(self, config: Optional[EngramConfig] = None):
|
|
71
|
+
self.config = config or EngramConfig()
|
|
72
|
+
self.config.ensure_dirs()
|
|
73
|
+
|
|
74
|
+
# Core components
|
|
75
|
+
self.storage = SQLiteStorage(self.config.db_path)
|
|
76
|
+
self.storage.initialize()
|
|
77
|
+
self.search_engine = FTS5Search(self.config.db_path)
|
|
78
|
+
self.quality_gate = QualityGate(self.config, self.storage)
|
|
79
|
+
self.session_mgr = SessionManager(self.storage)
|
|
80
|
+
|
|
81
|
+
# Optional markdown exporter
|
|
82
|
+
self._md_exporter = None
|
|
83
|
+
if self.config.enable_markdown_export:
|
|
84
|
+
from engram.storage.markdown_export import MarkdownExporter
|
|
85
|
+
self._md_exporter = MarkdownExporter(self.config.export_dir)
|
|
86
|
+
|
|
87
|
+
# Extractor (lazy-loaded based on config)
|
|
88
|
+
self._extractor = None
|
|
89
|
+
|
|
90
|
+
# Working memory per session
|
|
91
|
+
self._working_memories: dict[str, WorkingMemory] = {}
|
|
92
|
+
|
|
93
|
+
@property
|
|
94
|
+
def extractor(self):
|
|
95
|
+
if self._extractor is None:
|
|
96
|
+
self._extractor = self._build_extractor()
|
|
97
|
+
return self._extractor
|
|
98
|
+
|
|
99
|
+
def close(self) -> None:
|
|
100
|
+
"""Close all connections."""
|
|
101
|
+
self.storage.close()
|
|
102
|
+
self.search_engine.close()
|
|
103
|
+
|
|
104
|
+
# ── Read Operations ──
|
|
105
|
+
|
|
106
|
+
def search(
|
|
107
|
+
self,
|
|
108
|
+
query: str,
|
|
109
|
+
options: Optional[SearchOptions] = None,
|
|
110
|
+
session_id: Optional[str] = None,
|
|
111
|
+
) -> SearchResult:
|
|
112
|
+
"""Search memory with BM25 ranking. Boosts entities from working memory."""
|
|
113
|
+
if options is None:
|
|
114
|
+
options = SearchOptions(query=query)
|
|
115
|
+
else:
|
|
116
|
+
options.query = query
|
|
117
|
+
result = self.search_engine.search(options)
|
|
118
|
+
|
|
119
|
+
# Boost entities currently in working memory (session context awareness)
|
|
120
|
+
wm = self._working_memories.get(session_id) if session_id else None
|
|
121
|
+
if wm and wm.active_entities and result.hits:
|
|
122
|
+
for hit in result.hits:
|
|
123
|
+
if hit.entity.id in wm.active_entities:
|
|
124
|
+
hit.relevance_score *= 1.5
|
|
125
|
+
result.hits.sort(key=lambda h: h.relevance_score, reverse=True)
|
|
126
|
+
wm.add_query(query)
|
|
127
|
+
|
|
128
|
+
return result
|
|
129
|
+
|
|
130
|
+
def get_entity(self, name: str) -> Optional[Entity]:
|
|
131
|
+
"""Look up an entity by name. Increments access_count."""
|
|
132
|
+
entity = self.storage.get_entity_by_name(name)
|
|
133
|
+
if entity:
|
|
134
|
+
self._touch_entity(entity)
|
|
135
|
+
return entity
|
|
136
|
+
|
|
137
|
+
def _touch_entity(self, entity: Entity) -> None:
|
|
138
|
+
"""Increment access_count without triggering FTS reindex."""
|
|
139
|
+
from datetime import datetime
|
|
140
|
+
self.storage._conn.execute(
|
|
141
|
+
"UPDATE entities SET access_count = access_count + 1, last_accessed = ? WHERE id = ?",
|
|
142
|
+
(datetime.now().isoformat(), entity.id),
|
|
143
|
+
)
|
|
144
|
+
self.storage._conn.commit()
|
|
145
|
+
entity.access_count += 1
|
|
146
|
+
entity.last_accessed = datetime.now()
|
|
147
|
+
|
|
148
|
+
def get_entity_by_id(self, entity_id: str) -> Optional[Entity]:
|
|
149
|
+
"""Look up an entity by ID."""
|
|
150
|
+
return self.storage.get_entity(entity_id)
|
|
151
|
+
|
|
152
|
+
def get_facts(self, entity_name: str, current_only: bool = True) -> list[Fact]:
|
|
153
|
+
"""Get facts for an entity by name."""
|
|
154
|
+
entity = self.storage.get_entity_by_name(entity_name)
|
|
155
|
+
if not entity:
|
|
156
|
+
return []
|
|
157
|
+
if current_only:
|
|
158
|
+
return self.storage.get_current_facts(entity.id)
|
|
159
|
+
return self.storage.get_facts(entity.id)
|
|
160
|
+
|
|
161
|
+
def list_entities(
|
|
162
|
+
self,
|
|
163
|
+
tier: Optional[Tier] = None,
|
|
164
|
+
entity_type: Optional[str] = None,
|
|
165
|
+
limit: int = 100,
|
|
166
|
+
) -> list[Entity]:
|
|
167
|
+
"""List entities with optional filters."""
|
|
168
|
+
return self.storage.list_entities(tier=tier, entity_type=entity_type, limit=limit)
|
|
169
|
+
|
|
170
|
+
def get_relations(self, entity_name: str) -> list[Relation]:
|
|
171
|
+
"""Get relations for an entity."""
|
|
172
|
+
entity = self.storage.get_entity_by_name(entity_name)
|
|
173
|
+
if not entity:
|
|
174
|
+
return []
|
|
175
|
+
return self.storage.get_relations(entity.id)
|
|
176
|
+
|
|
177
|
+
# ── Write Operations (through quality gate) ──
|
|
178
|
+
|
|
179
|
+
def store(
|
|
180
|
+
self,
|
|
181
|
+
text: str,
|
|
182
|
+
source: Optional[Source] = None,
|
|
183
|
+
session_id: Optional[str] = None,
|
|
184
|
+
) -> StoreResult:
|
|
185
|
+
"""Extract entities, facts, and relations from text and store them.
|
|
186
|
+
|
|
187
|
+
All facts pass through the quality gate before storage.
|
|
188
|
+
"""
|
|
189
|
+
if not text or not text.strip():
|
|
190
|
+
return StoreResult()
|
|
191
|
+
|
|
192
|
+
source = source or Source(type=SourceType.USER_INPUT)
|
|
193
|
+
result = StoreResult()
|
|
194
|
+
|
|
195
|
+
# Step 1: Extract entities
|
|
196
|
+
ext = self.extractor
|
|
197
|
+
extracted_entities = ext.extract_entities(text)
|
|
198
|
+
|
|
199
|
+
# Step 2: Create or update entities in storage
|
|
200
|
+
all_entities: list[Entity] = []
|
|
201
|
+
for entity in extracted_entities:
|
|
202
|
+
existing = self.storage.get_entity_by_name(entity.name)
|
|
203
|
+
if existing:
|
|
204
|
+
result.entities_updated.append(existing)
|
|
205
|
+
entity.id = existing.id
|
|
206
|
+
all_entities.append(existing)
|
|
207
|
+
if session_id:
|
|
208
|
+
self.session_mgr.record_entity_modified(session_id, existing.id)
|
|
209
|
+
else:
|
|
210
|
+
try:
|
|
211
|
+
self.storage.create_entity(entity)
|
|
212
|
+
result.entities_created.append(entity)
|
|
213
|
+
all_entities.append(entity)
|
|
214
|
+
if session_id:
|
|
215
|
+
self.session_mgr.record_entity_modified(session_id, entity.id)
|
|
216
|
+
except EntityAlreadyExistsError:
|
|
217
|
+
# TOCTOU: another thread created it between our check and create
|
|
218
|
+
existing = self.storage.get_entity_by_name(entity.name)
|
|
219
|
+
if existing:
|
|
220
|
+
result.entities_updated.append(existing)
|
|
221
|
+
entity.id = existing.id
|
|
222
|
+
all_entities.append(existing)
|
|
223
|
+
else:
|
|
224
|
+
all_entities.append(entity)
|
|
225
|
+
|
|
226
|
+
# Step 3: Extract and validate facts (with deferred reindex)
|
|
227
|
+
extracted_facts = ext.extract_facts(text, all_entities)
|
|
228
|
+
entities_to_reindex: set[str] = set()
|
|
229
|
+
|
|
230
|
+
for fact in extracted_facts:
|
|
231
|
+
fact_source = copy.copy(source)
|
|
232
|
+
if fact_source.session_id is None and session_id:
|
|
233
|
+
fact_source.session_id = session_id
|
|
234
|
+
fact.source = fact_source
|
|
235
|
+
|
|
236
|
+
validation = self.quality_gate.validate(fact, extraction_method="regex")
|
|
237
|
+
result.validations.append(validation)
|
|
238
|
+
|
|
239
|
+
if validation.action == Action.REJECT:
|
|
240
|
+
continue
|
|
241
|
+
|
|
242
|
+
if validation.action == Action.ACCEPT:
|
|
243
|
+
self.storage.add_fact(fact, reindex=False)
|
|
244
|
+
result.facts_added.append(fact)
|
|
245
|
+
entities_to_reindex.add(fact.entity_id)
|
|
246
|
+
if session_id:
|
|
247
|
+
self.session_mgr.record_fact_added(session_id, fact.id)
|
|
248
|
+
elif validation.action == Action.QUARANTINE:
|
|
249
|
+
self.storage.add_fact(fact, reindex=False)
|
|
250
|
+
result.facts_quarantined.append(fact)
|
|
251
|
+
entities_to_reindex.add(fact.entity_id)
|
|
252
|
+
elif validation.action == Action.FLAG_CONFLICT:
|
|
253
|
+
self.storage.add_fact(fact, reindex=False)
|
|
254
|
+
result.facts_conflicted.append(fact)
|
|
255
|
+
entities_to_reindex.add(fact.entity_id)
|
|
256
|
+
for conflict in validation.conflicts:
|
|
257
|
+
self.storage.add_conflict({
|
|
258
|
+
"id": conflict.conflict_id,
|
|
259
|
+
"existing_fact_id": conflict.existing_fact.id if conflict.existing_fact else None,
|
|
260
|
+
"new_fact_id": fact.id,
|
|
261
|
+
"conflict_type": conflict.conflict_type,
|
|
262
|
+
"suggested_resolution": conflict.suggested_resolution,
|
|
263
|
+
})
|
|
264
|
+
|
|
265
|
+
# Step 3.5: Sync entity state from accepted facts
|
|
266
|
+
for fact in result.facts_added:
|
|
267
|
+
entity = next((e for e in all_entities if e.id == fact.entity_id), None)
|
|
268
|
+
if not entity:
|
|
269
|
+
continue
|
|
270
|
+
state_updated = False
|
|
271
|
+
if fact.predicate == "role" and not entity.state.role:
|
|
272
|
+
entity.state.role = fact.object
|
|
273
|
+
state_updated = True
|
|
274
|
+
elif fact.predicate == "location" and not entity.state.location:
|
|
275
|
+
entity.state.location = fact.object
|
|
276
|
+
state_updated = True
|
|
277
|
+
if state_updated:
|
|
278
|
+
self.storage.update_entity(entity)
|
|
279
|
+
|
|
280
|
+
# Step 3.6: Deferred reindex (once per entity, not per fact)
|
|
281
|
+
for eid in entities_to_reindex:
|
|
282
|
+
entity = self.storage.get_entity(eid)
|
|
283
|
+
if entity:
|
|
284
|
+
self.storage._reindex_entity(entity)
|
|
285
|
+
if entities_to_reindex:
|
|
286
|
+
self.storage._conn.commit()
|
|
287
|
+
|
|
288
|
+
# Step 4: Extract and store relations
|
|
289
|
+
extracted_relations = ext.extract_relations(text, all_entities)
|
|
290
|
+
for relation in extracted_relations:
|
|
291
|
+
self.storage.add_relation(relation)
|
|
292
|
+
result.relations_added.append(relation)
|
|
293
|
+
|
|
294
|
+
# Step 4.5: Update working memory
|
|
295
|
+
wm = self._working_memories.get(session_id) if session_id else None
|
|
296
|
+
if wm:
|
|
297
|
+
for entity in all_entities:
|
|
298
|
+
wm.touch_entity(entity.id)
|
|
299
|
+
|
|
300
|
+
# Step 5: Export to Markdown if enabled
|
|
301
|
+
if self._md_exporter:
|
|
302
|
+
for entity in result.entities_created + result.entities_updated:
|
|
303
|
+
e = self.storage.get_entity_by_name(entity.name) or entity
|
|
304
|
+
facts = self.storage.get_facts(e.id)
|
|
305
|
+
rels = self.storage.get_relations(e.id)
|
|
306
|
+
self._md_exporter.export_entity(e, facts, rels)
|
|
307
|
+
|
|
308
|
+
return result
|
|
309
|
+
|
|
310
|
+
def add_fact(
|
|
311
|
+
self,
|
|
312
|
+
entity_name: str,
|
|
313
|
+
fact_text: str,
|
|
314
|
+
predicate: str = "attribute",
|
|
315
|
+
source: Optional[Source] = None,
|
|
316
|
+
session_id: Optional[str] = None,
|
|
317
|
+
) -> ValidationResult:
|
|
318
|
+
"""Add a single fact to an entity, through quality gate."""
|
|
319
|
+
entity = self.storage.get_entity_by_name(entity_name)
|
|
320
|
+
if not entity:
|
|
321
|
+
raise EntityNotFoundError(f"Entity '{entity_name}' not found")
|
|
322
|
+
|
|
323
|
+
source = source or Source(type=SourceType.USER_INPUT)
|
|
324
|
+
fact = Fact(
|
|
325
|
+
entity_id=entity.id,
|
|
326
|
+
subject=entity.name,
|
|
327
|
+
predicate=predicate,
|
|
328
|
+
object=fact_text,
|
|
329
|
+
raw_text=fact_text,
|
|
330
|
+
source=source,
|
|
331
|
+
)
|
|
332
|
+
|
|
333
|
+
validation = self.quality_gate.validate(fact, extraction_method="manual")
|
|
334
|
+
|
|
335
|
+
if validation.action in (Action.ACCEPT, Action.QUARANTINE, Action.FLAG_CONFLICT):
|
|
336
|
+
self.storage.add_fact(fact)
|
|
337
|
+
if session_id:
|
|
338
|
+
self.session_mgr.record_fact_added(session_id, fact.id)
|
|
339
|
+
self.session_mgr.record_entity_modified(session_id, entity.id)
|
|
340
|
+
|
|
341
|
+
if validation.action == Action.FLAG_CONFLICT:
|
|
342
|
+
for conflict in validation.conflicts:
|
|
343
|
+
self.storage.add_conflict({
|
|
344
|
+
"id": conflict.conflict_id,
|
|
345
|
+
"existing_fact_id": conflict.existing_fact.id if conflict.existing_fact else None,
|
|
346
|
+
"new_fact_id": fact.id,
|
|
347
|
+
"conflict_type": conflict.conflict_type,
|
|
348
|
+
"suggested_resolution": conflict.suggested_resolution,
|
|
349
|
+
})
|
|
350
|
+
|
|
351
|
+
return validation
|
|
352
|
+
|
|
353
|
+
def create_entity(
|
|
354
|
+
self,
|
|
355
|
+
name: str,
|
|
356
|
+
entity_type: str = "person",
|
|
357
|
+
tier: Tier = Tier.RECALL,
|
|
358
|
+
summary: str = "",
|
|
359
|
+
) -> Entity:
|
|
360
|
+
"""Manually create an entity."""
|
|
361
|
+
entity = Entity(
|
|
362
|
+
name=name,
|
|
363
|
+
entity_type=entity_type,
|
|
364
|
+
tier=tier,
|
|
365
|
+
summary=summary,
|
|
366
|
+
)
|
|
367
|
+
self.storage.create_entity(entity)
|
|
368
|
+
return entity
|
|
369
|
+
|
|
370
|
+
def merge_entities(self, primary_name: str, secondary_name: str) -> Entity:
|
|
371
|
+
"""Merge secondary entity into primary. All facts, relations, aliases transfer."""
|
|
372
|
+
primary = self.storage.get_entity_by_name(primary_name)
|
|
373
|
+
secondary = self.storage.get_entity_by_name(secondary_name)
|
|
374
|
+
if not primary:
|
|
375
|
+
raise EntityNotFoundError(f"Entity '{primary_name}' not found")
|
|
376
|
+
if not secondary:
|
|
377
|
+
raise EntityNotFoundError(f"Entity '{secondary_name}' not found")
|
|
378
|
+
if primary.id == secondary.id:
|
|
379
|
+
return primary
|
|
380
|
+
|
|
381
|
+
# Transfer facts
|
|
382
|
+
self.storage._conn.execute(
|
|
383
|
+
"UPDATE facts SET entity_id = ?, subject = ? WHERE entity_id = ?",
|
|
384
|
+
(primary.id, primary.name, secondary.id),
|
|
385
|
+
)
|
|
386
|
+
# Transfer relations
|
|
387
|
+
self.storage._conn.execute(
|
|
388
|
+
"UPDATE relations SET from_entity_id = ? WHERE from_entity_id = ?",
|
|
389
|
+
(primary.id, secondary.id),
|
|
390
|
+
)
|
|
391
|
+
self.storage._conn.execute(
|
|
392
|
+
"UPDATE relations SET to_entity_id = ? WHERE to_entity_id = ?",
|
|
393
|
+
(primary.id, secondary.id),
|
|
394
|
+
)
|
|
395
|
+
# Add aliases
|
|
396
|
+
if secondary.name not in primary.aliases:
|
|
397
|
+
primary.aliases.append(secondary.name)
|
|
398
|
+
for alias in secondary.aliases:
|
|
399
|
+
if alias not in primary.aliases:
|
|
400
|
+
primary.aliases.append(alias)
|
|
401
|
+
# Merge state (secondary fills gaps)
|
|
402
|
+
if not primary.state.role and secondary.state.role:
|
|
403
|
+
primary.state.role = secondary.state.role
|
|
404
|
+
if not primary.state.affiliation and secondary.state.affiliation:
|
|
405
|
+
primary.state.affiliation = secondary.state.affiliation
|
|
406
|
+
if not primary.state.location and secondary.state.location:
|
|
407
|
+
primary.state.location = secondary.state.location
|
|
408
|
+
|
|
409
|
+
self.storage.update_entity(primary)
|
|
410
|
+
self.storage.delete_entity(secondary.id)
|
|
411
|
+
return primary
|
|
412
|
+
|
|
413
|
+
def resolve_conflict(self, conflict_id: str, resolution: str, resolved_by: str = "user") -> None:
|
|
414
|
+
"""Resolve a pending conflict and update the losing fact's status."""
|
|
415
|
+
# Get conflict details before resolving
|
|
416
|
+
pending = self.storage.get_pending_conflicts()
|
|
417
|
+
conflict = None
|
|
418
|
+
for c in pending:
|
|
419
|
+
if c["id"] == conflict_id:
|
|
420
|
+
conflict = c
|
|
421
|
+
break
|
|
422
|
+
|
|
423
|
+
self.storage.resolve_conflict(conflict_id, resolution, resolved_by)
|
|
424
|
+
|
|
425
|
+
# Update fact statuses based on resolution
|
|
426
|
+
if conflict:
|
|
427
|
+
if resolution == "accept_new" and conflict.get("existing_fact_id"):
|
|
428
|
+
old_facts = self.storage._conn.execute(
|
|
429
|
+
"SELECT id FROM facts WHERE id = ?", (conflict["existing_fact_id"],)
|
|
430
|
+
).fetchone()
|
|
431
|
+
if old_facts:
|
|
432
|
+
self.storage._conn.execute(
|
|
433
|
+
"UPDATE facts SET status = 'retracted', superseded_by = ? WHERE id = ?",
|
|
434
|
+
(conflict.get("new_fact_id"), conflict["existing_fact_id"]),
|
|
435
|
+
)
|
|
436
|
+
self.storage._conn.commit()
|
|
437
|
+
elif resolution == "keep_old" and conflict.get("new_fact_id"):
|
|
438
|
+
self.storage._conn.execute(
|
|
439
|
+
"UPDATE facts SET status = 'retracted' WHERE id = ?",
|
|
440
|
+
(conflict["new_fact_id"],),
|
|
441
|
+
)
|
|
442
|
+
self.storage._conn.commit()
|
|
443
|
+
|
|
444
|
+
# ── Session Operations ──
|
|
445
|
+
|
|
446
|
+
def start_session(self, agent_id: str = "default") -> Session:
|
|
447
|
+
"""Start a new memory session."""
|
|
448
|
+
session = self.session_mgr.start_session(agent_id)
|
|
449
|
+
self._working_memories[session.session_id] = WorkingMemory(session)
|
|
450
|
+
return session
|
|
451
|
+
|
|
452
|
+
def end_session(self, session_id: str, summary: Optional[str] = None) -> Session:
|
|
453
|
+
"""End a session. Runs light maintenance on modified entities."""
|
|
454
|
+
wm = self._working_memories.pop(session_id, None)
|
|
455
|
+
session = self.session_mgr.end_session(session_id, summary)
|
|
456
|
+
|
|
457
|
+
# Light maintenance: compact entities that were heavily modified this session
|
|
458
|
+
if session.entities_modified:
|
|
459
|
+
for eid in session.entities_modified[:10]:
|
|
460
|
+
try:
|
|
461
|
+
current_facts = self.storage.get_current_facts(eid)
|
|
462
|
+
entity = self.storage.get_entity(eid)
|
|
463
|
+
if entity and len(current_facts) > 15:
|
|
464
|
+
self.compact_entity(entity.name)
|
|
465
|
+
except Exception:
|
|
466
|
+
pass # Don't fail session end for maintenance errors
|
|
467
|
+
|
|
468
|
+
return session
|
|
469
|
+
|
|
470
|
+
def get_session_context(self, agent_id: str) -> dict:
|
|
471
|
+
"""Get context for starting a new session."""
|
|
472
|
+
return build_session_context(self.storage, agent_id)
|
|
473
|
+
|
|
474
|
+
# ── Maintenance ──
|
|
475
|
+
|
|
476
|
+
def health_check(self) -> dict:
|
|
477
|
+
"""Run health checks on the memory system."""
|
|
478
|
+
pending = self.storage.get_pending_conflicts()
|
|
479
|
+
return {
|
|
480
|
+
"entity_count": self.storage.entity_count(),
|
|
481
|
+
"fact_count": self.storage.fact_count(),
|
|
482
|
+
"pending_conflicts": len(pending),
|
|
483
|
+
"status": "healthy" if len(pending) == 0 else "needs_attention",
|
|
484
|
+
}
|
|
485
|
+
|
|
486
|
+
def export_markdown(self) -> int:
|
|
487
|
+
"""Export all entities to Markdown."""
|
|
488
|
+
if not self._md_exporter:
|
|
489
|
+
from engram.storage.markdown_export import MarkdownExporter
|
|
490
|
+
self._md_exporter = MarkdownExporter(self.config.export_dir)
|
|
491
|
+
|
|
492
|
+
count = 0
|
|
493
|
+
offset = 0
|
|
494
|
+
batch_size = 500
|
|
495
|
+
while True:
|
|
496
|
+
batch = self.storage.list_entities(limit=batch_size, offset=offset)
|
|
497
|
+
if not batch:
|
|
498
|
+
break
|
|
499
|
+
data = []
|
|
500
|
+
for entity in batch:
|
|
501
|
+
facts = self.storage.get_facts(entity.id)
|
|
502
|
+
rels = self.storage.get_relations(entity.id)
|
|
503
|
+
data.append((entity, facts, rels))
|
|
504
|
+
count += self._md_exporter.export_all(data)
|
|
505
|
+
offset += batch_size
|
|
506
|
+
return count
|
|
507
|
+
|
|
508
|
+
def reindex(self) -> int:
|
|
509
|
+
"""Rebuild the search index."""
|
|
510
|
+
return self.storage.reindex_all()
|
|
511
|
+
|
|
512
|
+
def compact_entity(self, entity_name: str) -> dict:
|
|
513
|
+
"""Compact an entity's facts: keep best per predicate, update summary.
|
|
514
|
+
|
|
515
|
+
This is the core "learning" operation — transforms accumulated facts
|
|
516
|
+
into synthesized knowledge.
|
|
517
|
+
"""
|
|
518
|
+
entity = self.storage.get_entity_by_name(entity_name)
|
|
519
|
+
if not entity:
|
|
520
|
+
raise EntityNotFoundError(f"Entity '{entity_name}' not found")
|
|
521
|
+
|
|
522
|
+
all_facts = self.storage.get_facts(entity.id)
|
|
523
|
+
current = [f for f in all_facts if f.is_current]
|
|
524
|
+
|
|
525
|
+
# Group by predicate, keep highest-confidence per predicate
|
|
526
|
+
from collections import defaultdict
|
|
527
|
+
by_predicate: dict[str, list[Fact]] = defaultdict(list)
|
|
528
|
+
for f in current:
|
|
529
|
+
by_predicate[f.predicate].append(f)
|
|
530
|
+
|
|
531
|
+
kept = 0
|
|
532
|
+
superseded = 0
|
|
533
|
+
for predicate, facts in by_predicate.items():
|
|
534
|
+
if len(facts) <= 1:
|
|
535
|
+
kept += 1
|
|
536
|
+
continue
|
|
537
|
+
# Sort by confidence desc, then recency
|
|
538
|
+
ranked = sorted(facts, key=lambda f: (f.confidence, f.created_at.isoformat()), reverse=True)
|
|
539
|
+
kept += 1 # keep the best
|
|
540
|
+
for old_fact in ranked[1:]:
|
|
541
|
+
old_fact.supersede(ranked[0].id)
|
|
542
|
+
self.storage.update_fact(old_fact)
|
|
543
|
+
superseded += 1
|
|
544
|
+
|
|
545
|
+
# Auto-generate summary from remaining current facts
|
|
546
|
+
remaining = self.storage.get_current_facts(entity.id)
|
|
547
|
+
summary_parts = []
|
|
548
|
+
for f in remaining[:10]:
|
|
549
|
+
summary_parts.append(f.raw_text)
|
|
550
|
+
if summary_parts:
|
|
551
|
+
entity.summary = ". ".join(summary_parts[:5])
|
|
552
|
+
if not entity.summary.endswith("."):
|
|
553
|
+
entity.summary += "."
|
|
554
|
+
self.storage.update_entity(entity)
|
|
555
|
+
|
|
556
|
+
return {
|
|
557
|
+
"entity": entity_name,
|
|
558
|
+
"facts_kept": kept,
|
|
559
|
+
"facts_superseded": superseded,
|
|
560
|
+
"new_summary": entity.summary,
|
|
561
|
+
}
|
|
562
|
+
|
|
563
|
+
def maintenance(self) -> dict:
|
|
564
|
+
"""Run maintenance: decay stale facts, compact heavy entities, clean old sessions."""
|
|
565
|
+
from engram.quality.decay import find_stale_facts
|
|
566
|
+
|
|
567
|
+
results: dict = {"stale_facts": 0, "compacted_entities": 0, "cleaned_sessions": 0}
|
|
568
|
+
|
|
569
|
+
# 1. Find and flag stale facts
|
|
570
|
+
entities = self.storage.list_entities(limit=10000)
|
|
571
|
+
for entity in entities:
|
|
572
|
+
facts = self.storage.get_current_facts(entity.id)
|
|
573
|
+
stale = find_stale_facts(facts, days=self.config.decay_days)
|
|
574
|
+
for f in stale:
|
|
575
|
+
f.status = FactStatus.EXPIRED
|
|
576
|
+
self.storage.update_fact(f)
|
|
577
|
+
results["stale_facts"] += 1
|
|
578
|
+
|
|
579
|
+
# 2. Compact entities with too many current facts
|
|
580
|
+
for entity in entities:
|
|
581
|
+
current = self.storage.get_current_facts(entity.id)
|
|
582
|
+
if len(current) > 20: # Threshold for compaction
|
|
583
|
+
self.compact_entity(entity.name)
|
|
584
|
+
results["compacted_entities"] += 1
|
|
585
|
+
|
|
586
|
+
# 3. Auto-resolve old conflicts (> 30 days)
|
|
587
|
+
from datetime import datetime, timedelta
|
|
588
|
+
pending = self.storage.get_pending_conflicts()
|
|
589
|
+
for conflict in pending:
|
|
590
|
+
# Auto-resolve based on suggested_resolution
|
|
591
|
+
if conflict.get("suggested_resolution") == "supersede":
|
|
592
|
+
self.storage.resolve_conflict(
|
|
593
|
+
conflict["id"], "accept_new", "auto_maintenance"
|
|
594
|
+
)
|
|
595
|
+
|
|
596
|
+
return results
|
|
597
|
+
|
|
598
|
+
# ── Private ──
|
|
599
|
+
|
|
600
|
+
def _build_extractor(self):
|
|
601
|
+
"""Build extractor based on config."""
|
|
602
|
+
if self.config.extractor_backend == "llm":
|
|
603
|
+
try:
|
|
604
|
+
from engram.extraction.llm_extractor import LLMExtractor
|
|
605
|
+
return LLMExtractor(
|
|
606
|
+
provider=self.config.llm_provider or "anthropic",
|
|
607
|
+
model=self.config.llm_model,
|
|
608
|
+
)
|
|
609
|
+
except ImportError:
|
|
610
|
+
pass # Fall back to regex
|
|
611
|
+
from engram.extraction.regex_extractor import RegexExtractor
|
|
612
|
+
return RegexExtractor()
|