memorytrace 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (54) hide show
  1. engram/__init__.py +8 -0
  2. engram/__main__.py +6 -0
  3. engram/cli/__init__.py +1 -0
  4. engram/cli/app.py +291 -0
  5. engram/cli/formatters.py +90 -0
  6. engram/cli/simple.py +267 -0
  7. engram/config.py +72 -0
  8. engram/engine.py +612 -0
  9. engram/exceptions.py +41 -0
  10. engram/extraction/__init__.py +6 -0
  11. engram/extraction/base.py +20 -0
  12. engram/extraction/llm_extractor.py +197 -0
  13. engram/extraction/ner/__init__.py +7 -0
  14. engram/extraction/ner/cjk.py +63 -0
  15. engram/extraction/ner/english.py +109 -0
  16. engram/extraction/ner/korean.py +106 -0
  17. engram/extraction/regex_extractor.py +188 -0
  18. engram/integrations/__init__.py +1 -0
  19. engram/integrations/mcp_server.py +213 -0
  20. engram/integrations/sdk.py +194 -0
  21. engram/models/__init__.py +19 -0
  22. engram/models/entity.py +72 -0
  23. engram/models/fact.py +58 -0
  24. engram/models/quality.py +61 -0
  25. engram/models/relation.py +26 -0
  26. engram/models/search.py +96 -0
  27. engram/models/session.py +53 -0
  28. engram/models/source.py +73 -0
  29. engram/quality/__init__.py +8 -0
  30. engram/quality/confidence.py +38 -0
  31. engram/quality/conflict.py +79 -0
  32. engram/quality/decay.py +28 -0
  33. engram/quality/gate.py +120 -0
  34. engram/quality/pii.py +80 -0
  35. engram/search/__init__.py +13 -0
  36. engram/search/base.py +20 -0
  37. engram/search/fts5_search.py +210 -0
  38. engram/search/hybrid.py +99 -0
  39. engram/search/semantic.py +186 -0
  40. engram/search/tokenizer.py +85 -0
  41. engram/session/__init__.py +6 -0
  42. engram/session/context.py +87 -0
  43. engram/session/manager.py +152 -0
  44. engram/session/working_memory.py +57 -0
  45. engram/storage/__init__.py +6 -0
  46. engram/storage/base.py +63 -0
  47. engram/storage/markdown_export.py +144 -0
  48. engram/storage/migrations.py +30 -0
  49. engram/storage/sqlite_store.py +615 -0
  50. memorytrace-0.1.0.dist-info/METADATA +138 -0
  51. memorytrace-0.1.0.dist-info/RECORD +54 -0
  52. memorytrace-0.1.0.dist-info/WHEEL +4 -0
  53. memorytrace-0.1.0.dist-info/entry_points.txt +3 -0
  54. memorytrace-0.1.0.dist-info/licenses/LICENSE +21 -0
engram/config.py ADDED
@@ -0,0 +1,72 @@
1
+ """Engram configuration."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from dataclasses import dataclass, field
6
+ from pathlib import Path
7
+ from typing import Optional
8
+
9
+
10
+ @dataclass
11
+ class EngramConfig:
12
+ """Central configuration for the Engram memory system."""
13
+
14
+ # Storage
15
+ base_dir: Path = field(default_factory=lambda: Path.home() / ".engram")
16
+ db_name: str = "memory.db"
17
+ enable_markdown_export: bool = True
18
+ export_dir_name: str = "readable"
19
+
20
+ # Search
21
+ search_backend: str = "fts5" # "fts5" | "semantic" | "hybrid"
22
+ max_search_results: int = 20
23
+ default_token_budget: int = 500
24
+ fts5_tokenizer: str = "unicode61 remove_diacritics 2"
25
+
26
+ # Extraction
27
+ extractor_backend: str = "regex" # "regex" | "llm"
28
+ llm_provider: Optional[str] = None # "anthropic" | "openai"
29
+ llm_model: Optional[str] = None
30
+
31
+ # Quality
32
+ min_confidence: float = 0.3
33
+ auto_resolve_threshold: float = 0.3 # trust diff for auto-supersede
34
+ enable_pii_detection: bool = True
35
+
36
+ # Session
37
+ default_agent_id: str = "default"
38
+
39
+ # Maintenance
40
+ thin_page_threshold: int = 300 # bytes
41
+ decay_days: int = 90
42
+ compact_days: int = 180
43
+
44
+ @property
45
+ def db_path(self) -> Path:
46
+ return self.base_dir / self.db_name
47
+
48
+ @property
49
+ def export_dir(self) -> Path:
50
+ return self.base_dir / self.export_dir_name
51
+
52
+ def ensure_dirs(self) -> None:
53
+ """Create required directories if they don't exist."""
54
+ self.base_dir.mkdir(parents=True, exist_ok=True)
55
+ if self.enable_markdown_export:
56
+ self.export_dir.mkdir(parents=True, exist_ok=True)
57
+
58
+ @classmethod
59
+ def from_dict(cls, data: dict) -> EngramConfig:
60
+ config = cls()
61
+ if "base_dir" in data:
62
+ config.base_dir = Path(data["base_dir"]).expanduser().resolve()
63
+ for key in (
64
+ "db_name", "enable_markdown_export", "export_dir_name",
65
+ "search_backend", "max_search_results", "default_token_budget",
66
+ "extractor_backend", "llm_provider", "llm_model",
67
+ "min_confidence", "auto_resolve_threshold", "enable_pii_detection",
68
+ "default_agent_id", "thin_page_threshold", "decay_days", "compact_days",
69
+ ):
70
+ if key in data:
71
+ setattr(config, key, data[key])
72
+ return config
engram/engine.py ADDED
@@ -0,0 +1,612 @@
1
+ """MemoryEngine — the main orchestrator that composes all modules.
2
+
3
+ This is the single public entry point for the Engram memory system.
4
+ No print() calls — all output is via structured return values.
5
+ """
6
+
7
+ from __future__ import annotations
8
+
9
+ import copy
10
+ import json
11
+ from dataclasses import dataclass, field
12
+ from typing import Optional
13
+
14
+ from engram.config import EngramConfig
15
+ from engram.exceptions import EntityAlreadyExistsError, EntityNotFoundError
16
+ from engram.models.entity import Entity, Tier
17
+ from engram.models.fact import Fact, FactStatus
18
+ from engram.models.quality import Action, ValidationResult
19
+ from engram.models.relation import Relation
20
+ from engram.models.search import SearchOptions, SearchResult
21
+ from engram.models.session import Session
22
+ from engram.models.source import Source, SourceType
23
+ from engram.quality.gate import QualityGate
24
+ from engram.search.fts5_search import FTS5Search
25
+ from engram.session.context import build_session_context
26
+ from engram.session.manager import SessionManager
27
+ from engram.session.working_memory import WorkingMemory
28
+ from engram.storage.sqlite_store import SQLiteStorage
29
+
30
+
31
+ @dataclass
32
+ class StoreResult:
33
+ """Result of a store operation."""
34
+ entities_created: list[Entity] = field(default_factory=list)
35
+ entities_updated: list[Entity] = field(default_factory=list)
36
+ facts_added: list[Fact] = field(default_factory=list)
37
+ facts_quarantined: list[Fact] = field(default_factory=list)
38
+ facts_conflicted: list[Fact] = field(default_factory=list)
39
+ relations_added: list[Relation] = field(default_factory=list)
40
+ validations: list[ValidationResult] = field(default_factory=list)
41
+
42
+ def to_dict(self) -> dict:
43
+ return {
44
+ "entities_created": [{"name": e.name, "type": e.entity_type} for e in self.entities_created],
45
+ "entities_updated": [{"name": e.name, "type": e.entity_type} for e in self.entities_updated],
46
+ "facts_added": [
47
+ {"subject": f.subject, "predicate": f.predicate, "object": f.object}
48
+ for f in self.facts_added
49
+ ],
50
+ "facts_quarantined": len(self.facts_quarantined),
51
+ "facts_conflicted": len(self.facts_conflicted),
52
+ "relations_added": [
53
+ {"type": r.relation_type} for r in self.relations_added
54
+ ],
55
+ }
56
+
57
+ def to_json(self) -> str:
58
+ return json.dumps(self.to_dict(), ensure_ascii=False, indent=2)
59
+
60
+
61
+ class MemoryEngine:
62
+ """Main entry point. Composes storage, search, extraction, quality, sessions.
63
+
64
+ Design principles:
65
+ - No print() — all output via structured return values
66
+ - All writes go through quality gate
67
+ - Session tracking for every operation
68
+ """
69
+
70
+ def __init__(self, config: Optional[EngramConfig] = None):
71
+ self.config = config or EngramConfig()
72
+ self.config.ensure_dirs()
73
+
74
+ # Core components
75
+ self.storage = SQLiteStorage(self.config.db_path)
76
+ self.storage.initialize()
77
+ self.search_engine = FTS5Search(self.config.db_path)
78
+ self.quality_gate = QualityGate(self.config, self.storage)
79
+ self.session_mgr = SessionManager(self.storage)
80
+
81
+ # Optional markdown exporter
82
+ self._md_exporter = None
83
+ if self.config.enable_markdown_export:
84
+ from engram.storage.markdown_export import MarkdownExporter
85
+ self._md_exporter = MarkdownExporter(self.config.export_dir)
86
+
87
+ # Extractor (lazy-loaded based on config)
88
+ self._extractor = None
89
+
90
+ # Working memory per session
91
+ self._working_memories: dict[str, WorkingMemory] = {}
92
+
93
+ @property
94
+ def extractor(self):
95
+ if self._extractor is None:
96
+ self._extractor = self._build_extractor()
97
+ return self._extractor
98
+
99
+ def close(self) -> None:
100
+ """Close all connections."""
101
+ self.storage.close()
102
+ self.search_engine.close()
103
+
104
+ # ── Read Operations ──
105
+
106
+ def search(
107
+ self,
108
+ query: str,
109
+ options: Optional[SearchOptions] = None,
110
+ session_id: Optional[str] = None,
111
+ ) -> SearchResult:
112
+ """Search memory with BM25 ranking. Boosts entities from working memory."""
113
+ if options is None:
114
+ options = SearchOptions(query=query)
115
+ else:
116
+ options.query = query
117
+ result = self.search_engine.search(options)
118
+
119
+ # Boost entities currently in working memory (session context awareness)
120
+ wm = self._working_memories.get(session_id) if session_id else None
121
+ if wm and wm.active_entities and result.hits:
122
+ for hit in result.hits:
123
+ if hit.entity.id in wm.active_entities:
124
+ hit.relevance_score *= 1.5
125
+ result.hits.sort(key=lambda h: h.relevance_score, reverse=True)
126
+ wm.add_query(query)
127
+
128
+ return result
129
+
130
+ def get_entity(self, name: str) -> Optional[Entity]:
131
+ """Look up an entity by name. Increments access_count."""
132
+ entity = self.storage.get_entity_by_name(name)
133
+ if entity:
134
+ self._touch_entity(entity)
135
+ return entity
136
+
137
+ def _touch_entity(self, entity: Entity) -> None:
138
+ """Increment access_count without triggering FTS reindex."""
139
+ from datetime import datetime
140
+ self.storage._conn.execute(
141
+ "UPDATE entities SET access_count = access_count + 1, last_accessed = ? WHERE id = ?",
142
+ (datetime.now().isoformat(), entity.id),
143
+ )
144
+ self.storage._conn.commit()
145
+ entity.access_count += 1
146
+ entity.last_accessed = datetime.now()
147
+
148
+ def get_entity_by_id(self, entity_id: str) -> Optional[Entity]:
149
+ """Look up an entity by ID."""
150
+ return self.storage.get_entity(entity_id)
151
+
152
+ def get_facts(self, entity_name: str, current_only: bool = True) -> list[Fact]:
153
+ """Get facts for an entity by name."""
154
+ entity = self.storage.get_entity_by_name(entity_name)
155
+ if not entity:
156
+ return []
157
+ if current_only:
158
+ return self.storage.get_current_facts(entity.id)
159
+ return self.storage.get_facts(entity.id)
160
+
161
+ def list_entities(
162
+ self,
163
+ tier: Optional[Tier] = None,
164
+ entity_type: Optional[str] = None,
165
+ limit: int = 100,
166
+ ) -> list[Entity]:
167
+ """List entities with optional filters."""
168
+ return self.storage.list_entities(tier=tier, entity_type=entity_type, limit=limit)
169
+
170
+ def get_relations(self, entity_name: str) -> list[Relation]:
171
+ """Get relations for an entity."""
172
+ entity = self.storage.get_entity_by_name(entity_name)
173
+ if not entity:
174
+ return []
175
+ return self.storage.get_relations(entity.id)
176
+
177
+ # ── Write Operations (through quality gate) ──
178
+
179
+ def store(
180
+ self,
181
+ text: str,
182
+ source: Optional[Source] = None,
183
+ session_id: Optional[str] = None,
184
+ ) -> StoreResult:
185
+ """Extract entities, facts, and relations from text and store them.
186
+
187
+ All facts pass through the quality gate before storage.
188
+ """
189
+ if not text or not text.strip():
190
+ return StoreResult()
191
+
192
+ source = source or Source(type=SourceType.USER_INPUT)
193
+ result = StoreResult()
194
+
195
+ # Step 1: Extract entities
196
+ ext = self.extractor
197
+ extracted_entities = ext.extract_entities(text)
198
+
199
+ # Step 2: Create or update entities in storage
200
+ all_entities: list[Entity] = []
201
+ for entity in extracted_entities:
202
+ existing = self.storage.get_entity_by_name(entity.name)
203
+ if existing:
204
+ result.entities_updated.append(existing)
205
+ entity.id = existing.id
206
+ all_entities.append(existing)
207
+ if session_id:
208
+ self.session_mgr.record_entity_modified(session_id, existing.id)
209
+ else:
210
+ try:
211
+ self.storage.create_entity(entity)
212
+ result.entities_created.append(entity)
213
+ all_entities.append(entity)
214
+ if session_id:
215
+ self.session_mgr.record_entity_modified(session_id, entity.id)
216
+ except EntityAlreadyExistsError:
217
+ # TOCTOU: another thread created it between our check and create
218
+ existing = self.storage.get_entity_by_name(entity.name)
219
+ if existing:
220
+ result.entities_updated.append(existing)
221
+ entity.id = existing.id
222
+ all_entities.append(existing)
223
+ else:
224
+ all_entities.append(entity)
225
+
226
+ # Step 3: Extract and validate facts (with deferred reindex)
227
+ extracted_facts = ext.extract_facts(text, all_entities)
228
+ entities_to_reindex: set[str] = set()
229
+
230
+ for fact in extracted_facts:
231
+ fact_source = copy.copy(source)
232
+ if fact_source.session_id is None and session_id:
233
+ fact_source.session_id = session_id
234
+ fact.source = fact_source
235
+
236
+ validation = self.quality_gate.validate(fact, extraction_method="regex")
237
+ result.validations.append(validation)
238
+
239
+ if validation.action == Action.REJECT:
240
+ continue
241
+
242
+ if validation.action == Action.ACCEPT:
243
+ self.storage.add_fact(fact, reindex=False)
244
+ result.facts_added.append(fact)
245
+ entities_to_reindex.add(fact.entity_id)
246
+ if session_id:
247
+ self.session_mgr.record_fact_added(session_id, fact.id)
248
+ elif validation.action == Action.QUARANTINE:
249
+ self.storage.add_fact(fact, reindex=False)
250
+ result.facts_quarantined.append(fact)
251
+ entities_to_reindex.add(fact.entity_id)
252
+ elif validation.action == Action.FLAG_CONFLICT:
253
+ self.storage.add_fact(fact, reindex=False)
254
+ result.facts_conflicted.append(fact)
255
+ entities_to_reindex.add(fact.entity_id)
256
+ for conflict in validation.conflicts:
257
+ self.storage.add_conflict({
258
+ "id": conflict.conflict_id,
259
+ "existing_fact_id": conflict.existing_fact.id if conflict.existing_fact else None,
260
+ "new_fact_id": fact.id,
261
+ "conflict_type": conflict.conflict_type,
262
+ "suggested_resolution": conflict.suggested_resolution,
263
+ })
264
+
265
+ # Step 3.5: Sync entity state from accepted facts
266
+ for fact in result.facts_added:
267
+ entity = next((e for e in all_entities if e.id == fact.entity_id), None)
268
+ if not entity:
269
+ continue
270
+ state_updated = False
271
+ if fact.predicate == "role" and not entity.state.role:
272
+ entity.state.role = fact.object
273
+ state_updated = True
274
+ elif fact.predicate == "location" and not entity.state.location:
275
+ entity.state.location = fact.object
276
+ state_updated = True
277
+ if state_updated:
278
+ self.storage.update_entity(entity)
279
+
280
+ # Step 3.6: Deferred reindex (once per entity, not per fact)
281
+ for eid in entities_to_reindex:
282
+ entity = self.storage.get_entity(eid)
283
+ if entity:
284
+ self.storage._reindex_entity(entity)
285
+ if entities_to_reindex:
286
+ self.storage._conn.commit()
287
+
288
+ # Step 4: Extract and store relations
289
+ extracted_relations = ext.extract_relations(text, all_entities)
290
+ for relation in extracted_relations:
291
+ self.storage.add_relation(relation)
292
+ result.relations_added.append(relation)
293
+
294
+ # Step 4.5: Update working memory
295
+ wm = self._working_memories.get(session_id) if session_id else None
296
+ if wm:
297
+ for entity in all_entities:
298
+ wm.touch_entity(entity.id)
299
+
300
+ # Step 5: Export to Markdown if enabled
301
+ if self._md_exporter:
302
+ for entity in result.entities_created + result.entities_updated:
303
+ e = self.storage.get_entity_by_name(entity.name) or entity
304
+ facts = self.storage.get_facts(e.id)
305
+ rels = self.storage.get_relations(e.id)
306
+ self._md_exporter.export_entity(e, facts, rels)
307
+
308
+ return result
309
+
310
+ def add_fact(
311
+ self,
312
+ entity_name: str,
313
+ fact_text: str,
314
+ predicate: str = "attribute",
315
+ source: Optional[Source] = None,
316
+ session_id: Optional[str] = None,
317
+ ) -> ValidationResult:
318
+ """Add a single fact to an entity, through quality gate."""
319
+ entity = self.storage.get_entity_by_name(entity_name)
320
+ if not entity:
321
+ raise EntityNotFoundError(f"Entity '{entity_name}' not found")
322
+
323
+ source = source or Source(type=SourceType.USER_INPUT)
324
+ fact = Fact(
325
+ entity_id=entity.id,
326
+ subject=entity.name,
327
+ predicate=predicate,
328
+ object=fact_text,
329
+ raw_text=fact_text,
330
+ source=source,
331
+ )
332
+
333
+ validation = self.quality_gate.validate(fact, extraction_method="manual")
334
+
335
+ if validation.action in (Action.ACCEPT, Action.QUARANTINE, Action.FLAG_CONFLICT):
336
+ self.storage.add_fact(fact)
337
+ if session_id:
338
+ self.session_mgr.record_fact_added(session_id, fact.id)
339
+ self.session_mgr.record_entity_modified(session_id, entity.id)
340
+
341
+ if validation.action == Action.FLAG_CONFLICT:
342
+ for conflict in validation.conflicts:
343
+ self.storage.add_conflict({
344
+ "id": conflict.conflict_id,
345
+ "existing_fact_id": conflict.existing_fact.id if conflict.existing_fact else None,
346
+ "new_fact_id": fact.id,
347
+ "conflict_type": conflict.conflict_type,
348
+ "suggested_resolution": conflict.suggested_resolution,
349
+ })
350
+
351
+ return validation
352
+
353
+ def create_entity(
354
+ self,
355
+ name: str,
356
+ entity_type: str = "person",
357
+ tier: Tier = Tier.RECALL,
358
+ summary: str = "",
359
+ ) -> Entity:
360
+ """Manually create an entity."""
361
+ entity = Entity(
362
+ name=name,
363
+ entity_type=entity_type,
364
+ tier=tier,
365
+ summary=summary,
366
+ )
367
+ self.storage.create_entity(entity)
368
+ return entity
369
+
370
+ def merge_entities(self, primary_name: str, secondary_name: str) -> Entity:
371
+ """Merge secondary entity into primary. All facts, relations, aliases transfer."""
372
+ primary = self.storage.get_entity_by_name(primary_name)
373
+ secondary = self.storage.get_entity_by_name(secondary_name)
374
+ if not primary:
375
+ raise EntityNotFoundError(f"Entity '{primary_name}' not found")
376
+ if not secondary:
377
+ raise EntityNotFoundError(f"Entity '{secondary_name}' not found")
378
+ if primary.id == secondary.id:
379
+ return primary
380
+
381
+ # Transfer facts
382
+ self.storage._conn.execute(
383
+ "UPDATE facts SET entity_id = ?, subject = ? WHERE entity_id = ?",
384
+ (primary.id, primary.name, secondary.id),
385
+ )
386
+ # Transfer relations
387
+ self.storage._conn.execute(
388
+ "UPDATE relations SET from_entity_id = ? WHERE from_entity_id = ?",
389
+ (primary.id, secondary.id),
390
+ )
391
+ self.storage._conn.execute(
392
+ "UPDATE relations SET to_entity_id = ? WHERE to_entity_id = ?",
393
+ (primary.id, secondary.id),
394
+ )
395
+ # Add aliases
396
+ if secondary.name not in primary.aliases:
397
+ primary.aliases.append(secondary.name)
398
+ for alias in secondary.aliases:
399
+ if alias not in primary.aliases:
400
+ primary.aliases.append(alias)
401
+ # Merge state (secondary fills gaps)
402
+ if not primary.state.role and secondary.state.role:
403
+ primary.state.role = secondary.state.role
404
+ if not primary.state.affiliation and secondary.state.affiliation:
405
+ primary.state.affiliation = secondary.state.affiliation
406
+ if not primary.state.location and secondary.state.location:
407
+ primary.state.location = secondary.state.location
408
+
409
+ self.storage.update_entity(primary)
410
+ self.storage.delete_entity(secondary.id)
411
+ return primary
412
+
413
+ def resolve_conflict(self, conflict_id: str, resolution: str, resolved_by: str = "user") -> None:
414
+ """Resolve a pending conflict and update the losing fact's status."""
415
+ # Get conflict details before resolving
416
+ pending = self.storage.get_pending_conflicts()
417
+ conflict = None
418
+ for c in pending:
419
+ if c["id"] == conflict_id:
420
+ conflict = c
421
+ break
422
+
423
+ self.storage.resolve_conflict(conflict_id, resolution, resolved_by)
424
+
425
+ # Update fact statuses based on resolution
426
+ if conflict:
427
+ if resolution == "accept_new" and conflict.get("existing_fact_id"):
428
+ old_facts = self.storage._conn.execute(
429
+ "SELECT id FROM facts WHERE id = ?", (conflict["existing_fact_id"],)
430
+ ).fetchone()
431
+ if old_facts:
432
+ self.storage._conn.execute(
433
+ "UPDATE facts SET status = 'retracted', superseded_by = ? WHERE id = ?",
434
+ (conflict.get("new_fact_id"), conflict["existing_fact_id"]),
435
+ )
436
+ self.storage._conn.commit()
437
+ elif resolution == "keep_old" and conflict.get("new_fact_id"):
438
+ self.storage._conn.execute(
439
+ "UPDATE facts SET status = 'retracted' WHERE id = ?",
440
+ (conflict["new_fact_id"],),
441
+ )
442
+ self.storage._conn.commit()
443
+
444
+ # ── Session Operations ──
445
+
446
+ def start_session(self, agent_id: str = "default") -> Session:
447
+ """Start a new memory session."""
448
+ session = self.session_mgr.start_session(agent_id)
449
+ self._working_memories[session.session_id] = WorkingMemory(session)
450
+ return session
451
+
452
+ def end_session(self, session_id: str, summary: Optional[str] = None) -> Session:
453
+ """End a session. Runs light maintenance on modified entities."""
454
+ wm = self._working_memories.pop(session_id, None)
455
+ session = self.session_mgr.end_session(session_id, summary)
456
+
457
+ # Light maintenance: compact entities that were heavily modified this session
458
+ if session.entities_modified:
459
+ for eid in session.entities_modified[:10]:
460
+ try:
461
+ current_facts = self.storage.get_current_facts(eid)
462
+ entity = self.storage.get_entity(eid)
463
+ if entity and len(current_facts) > 15:
464
+ self.compact_entity(entity.name)
465
+ except Exception:
466
+ pass # Don't fail session end for maintenance errors
467
+
468
+ return session
469
+
470
+ def get_session_context(self, agent_id: str) -> dict:
471
+ """Get context for starting a new session."""
472
+ return build_session_context(self.storage, agent_id)
473
+
474
+ # ── Maintenance ──
475
+
476
+ def health_check(self) -> dict:
477
+ """Run health checks on the memory system."""
478
+ pending = self.storage.get_pending_conflicts()
479
+ return {
480
+ "entity_count": self.storage.entity_count(),
481
+ "fact_count": self.storage.fact_count(),
482
+ "pending_conflicts": len(pending),
483
+ "status": "healthy" if len(pending) == 0 else "needs_attention",
484
+ }
485
+
486
+ def export_markdown(self) -> int:
487
+ """Export all entities to Markdown."""
488
+ if not self._md_exporter:
489
+ from engram.storage.markdown_export import MarkdownExporter
490
+ self._md_exporter = MarkdownExporter(self.config.export_dir)
491
+
492
+ count = 0
493
+ offset = 0
494
+ batch_size = 500
495
+ while True:
496
+ batch = self.storage.list_entities(limit=batch_size, offset=offset)
497
+ if not batch:
498
+ break
499
+ data = []
500
+ for entity in batch:
501
+ facts = self.storage.get_facts(entity.id)
502
+ rels = self.storage.get_relations(entity.id)
503
+ data.append((entity, facts, rels))
504
+ count += self._md_exporter.export_all(data)
505
+ offset += batch_size
506
+ return count
507
+
508
+ def reindex(self) -> int:
509
+ """Rebuild the search index."""
510
+ return self.storage.reindex_all()
511
+
512
+ def compact_entity(self, entity_name: str) -> dict:
513
+ """Compact an entity's facts: keep best per predicate, update summary.
514
+
515
+ This is the core "learning" operation — transforms accumulated facts
516
+ into synthesized knowledge.
517
+ """
518
+ entity = self.storage.get_entity_by_name(entity_name)
519
+ if not entity:
520
+ raise EntityNotFoundError(f"Entity '{entity_name}' not found")
521
+
522
+ all_facts = self.storage.get_facts(entity.id)
523
+ current = [f for f in all_facts if f.is_current]
524
+
525
+ # Group by predicate, keep highest-confidence per predicate
526
+ from collections import defaultdict
527
+ by_predicate: dict[str, list[Fact]] = defaultdict(list)
528
+ for f in current:
529
+ by_predicate[f.predicate].append(f)
530
+
531
+ kept = 0
532
+ superseded = 0
533
+ for predicate, facts in by_predicate.items():
534
+ if len(facts) <= 1:
535
+ kept += 1
536
+ continue
537
+ # Sort by confidence desc, then recency
538
+ ranked = sorted(facts, key=lambda f: (f.confidence, f.created_at.isoformat()), reverse=True)
539
+ kept += 1 # keep the best
540
+ for old_fact in ranked[1:]:
541
+ old_fact.supersede(ranked[0].id)
542
+ self.storage.update_fact(old_fact)
543
+ superseded += 1
544
+
545
+ # Auto-generate summary from remaining current facts
546
+ remaining = self.storage.get_current_facts(entity.id)
547
+ summary_parts = []
548
+ for f in remaining[:10]:
549
+ summary_parts.append(f.raw_text)
550
+ if summary_parts:
551
+ entity.summary = ". ".join(summary_parts[:5])
552
+ if not entity.summary.endswith("."):
553
+ entity.summary += "."
554
+ self.storage.update_entity(entity)
555
+
556
+ return {
557
+ "entity": entity_name,
558
+ "facts_kept": kept,
559
+ "facts_superseded": superseded,
560
+ "new_summary": entity.summary,
561
+ }
562
+
563
+ def maintenance(self) -> dict:
564
+ """Run maintenance: decay stale facts, compact heavy entities, clean old sessions."""
565
+ from engram.quality.decay import find_stale_facts
566
+
567
+ results: dict = {"stale_facts": 0, "compacted_entities": 0, "cleaned_sessions": 0}
568
+
569
+ # 1. Find and flag stale facts
570
+ entities = self.storage.list_entities(limit=10000)
571
+ for entity in entities:
572
+ facts = self.storage.get_current_facts(entity.id)
573
+ stale = find_stale_facts(facts, days=self.config.decay_days)
574
+ for f in stale:
575
+ f.status = FactStatus.EXPIRED
576
+ self.storage.update_fact(f)
577
+ results["stale_facts"] += 1
578
+
579
+ # 2. Compact entities with too many current facts
580
+ for entity in entities:
581
+ current = self.storage.get_current_facts(entity.id)
582
+ if len(current) > 20: # Threshold for compaction
583
+ self.compact_entity(entity.name)
584
+ results["compacted_entities"] += 1
585
+
586
+ # 3. Auto-resolve old conflicts (> 30 days)
587
+ from datetime import datetime, timedelta
588
+ pending = self.storage.get_pending_conflicts()
589
+ for conflict in pending:
590
+ # Auto-resolve based on suggested_resolution
591
+ if conflict.get("suggested_resolution") == "supersede":
592
+ self.storage.resolve_conflict(
593
+ conflict["id"], "accept_new", "auto_maintenance"
594
+ )
595
+
596
+ return results
597
+
598
+ # ── Private ──
599
+
600
+ def _build_extractor(self):
601
+ """Build extractor based on config."""
602
+ if self.config.extractor_backend == "llm":
603
+ try:
604
+ from engram.extraction.llm_extractor import LLMExtractor
605
+ return LLMExtractor(
606
+ provider=self.config.llm_provider or "anthropic",
607
+ model=self.config.llm_model,
608
+ )
609
+ except ImportError:
610
+ pass # Fall back to regex
611
+ from engram.extraction.regex_extractor import RegexExtractor
612
+ return RegexExtractor()