memorytrace 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (54) hide show
  1. engram/__init__.py +8 -0
  2. engram/__main__.py +6 -0
  3. engram/cli/__init__.py +1 -0
  4. engram/cli/app.py +291 -0
  5. engram/cli/formatters.py +90 -0
  6. engram/cli/simple.py +267 -0
  7. engram/config.py +72 -0
  8. engram/engine.py +612 -0
  9. engram/exceptions.py +41 -0
  10. engram/extraction/__init__.py +6 -0
  11. engram/extraction/base.py +20 -0
  12. engram/extraction/llm_extractor.py +197 -0
  13. engram/extraction/ner/__init__.py +7 -0
  14. engram/extraction/ner/cjk.py +63 -0
  15. engram/extraction/ner/english.py +109 -0
  16. engram/extraction/ner/korean.py +106 -0
  17. engram/extraction/regex_extractor.py +188 -0
  18. engram/integrations/__init__.py +1 -0
  19. engram/integrations/mcp_server.py +213 -0
  20. engram/integrations/sdk.py +194 -0
  21. engram/models/__init__.py +19 -0
  22. engram/models/entity.py +72 -0
  23. engram/models/fact.py +58 -0
  24. engram/models/quality.py +61 -0
  25. engram/models/relation.py +26 -0
  26. engram/models/search.py +96 -0
  27. engram/models/session.py +53 -0
  28. engram/models/source.py +73 -0
  29. engram/quality/__init__.py +8 -0
  30. engram/quality/confidence.py +38 -0
  31. engram/quality/conflict.py +79 -0
  32. engram/quality/decay.py +28 -0
  33. engram/quality/gate.py +120 -0
  34. engram/quality/pii.py +80 -0
  35. engram/search/__init__.py +13 -0
  36. engram/search/base.py +20 -0
  37. engram/search/fts5_search.py +210 -0
  38. engram/search/hybrid.py +99 -0
  39. engram/search/semantic.py +186 -0
  40. engram/search/tokenizer.py +85 -0
  41. engram/session/__init__.py +6 -0
  42. engram/session/context.py +87 -0
  43. engram/session/manager.py +152 -0
  44. engram/session/working_memory.py +57 -0
  45. engram/storage/__init__.py +6 -0
  46. engram/storage/base.py +63 -0
  47. engram/storage/markdown_export.py +144 -0
  48. engram/storage/migrations.py +30 -0
  49. engram/storage/sqlite_store.py +615 -0
  50. memorytrace-0.1.0.dist-info/METADATA +138 -0
  51. memorytrace-0.1.0.dist-info/RECORD +54 -0
  52. memorytrace-0.1.0.dist-info/WHEEL +4 -0
  53. memorytrace-0.1.0.dist-info/entry_points.txt +3 -0
  54. memorytrace-0.1.0.dist-info/licenses/LICENSE +21 -0
@@ -0,0 +1,188 @@
1
+ """Rule-based extractor — NER + fact extraction + relation extraction."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import copy
6
+ import re
7
+ from typing import Optional
8
+
9
+ from engram.models.entity import Entity
10
+ from engram.models.fact import Fact
11
+ from engram.models.relation import Relation
12
+ from engram.models.source import Source
13
+ from engram.extraction.ner.english import extract_english_entities
14
+ from engram.extraction.ner.korean import extract_korean_entities
15
+ from engram.extraction.ner.cjk import extract_cjk_entities
16
+
17
+
18
+ # Fact extraction patterns: "Subject <verb> Object"
19
+ _FACT_PATTERNS = [
20
+ # English: "X is/was the Y of Z"
21
+ re.compile(
22
+ r'([A-Z][a-z]+(?:\s+[A-Z][a-z]+)+)\s+'
23
+ r'(?:is|was|serves?\s+as|became|joined|founded|leads?|runs?|heads?)\s+'
24
+ r'(?:the\s+)?(.{1,200}?)(?:\.|,|;|$)',
25
+ re.MULTILINE,
26
+ ),
27
+ # English: "X, the Y of Z"
28
+ re.compile(
29
+ r'([A-Z][a-z]+(?:\s+[A-Z][a-z]+)+),\s+'
30
+ r'(?:the\s+)?(\w[\w\s]{0,200}?)(?:\.|,|;|$)',
31
+ re.MULTILINE,
32
+ ),
33
+ ]
34
+
35
+ # Relation patterns
36
+ _RELATION_PATTERNS = [
37
+ # "X is the CEO of Y" → X -[CEO_OF]-> Y
38
+ re.compile(
39
+ r'([A-Z][a-z]+(?:\s+[A-Z][a-z]+)+)\s+'
40
+ r'(?:is|was)\s+(?:the\s+)?'
41
+ r'(CEO|CTO|CFO|COO|founder|co-founder|president|director|head|member|partner)\s+'
42
+ r'(?:of|at)\s+'
43
+ r'([A-Z][\w\s]*?)(?:\.|,|;|$)',
44
+ re.IGNORECASE | re.MULTILINE,
45
+ ),
46
+ # "X works at/for Y"
47
+ re.compile(
48
+ r'([A-Z][a-z]+(?:\s+[A-Z][a-z]+)+)\s+'
49
+ r'(?:works?\s+(?:at|for)|joined|left)\s+'
50
+ r'([A-Z][\w\s]*?)(?:\.|,|;|$)',
51
+ re.MULTILINE,
52
+ ),
53
+ ]
54
+
55
+ # Predicate normalization
56
+ _ROLE_KEYWORDS = frozenset({
57
+ "ceo", "cto", "cfo", "coo", "founder", "co-founder",
58
+ "president", "director", "head", "member", "partner",
59
+ "manager", "lead", "engineer", "scientist", "analyst",
60
+ })
61
+
62
+
63
+ class RegexExtractor:
64
+ """Rule-based entity, fact, and relation extraction.
65
+
66
+ Improvements over MemKraft:
67
+ - Context-aware entity type classification
68
+ - Dictionary-based Korean NER (not "any 2-4 hangul chars")
69
+ - Deduplication at extraction time
70
+ - Confidence scoring per pattern
71
+ """
72
+
73
+ def __init__(self, default_source: Optional[Source] = None):
74
+ self.default_source = default_source or Source()
75
+
76
+ def extract_entities(self, text: str) -> list[Entity]:
77
+ """Extract named entities from text across all supported languages."""
78
+ entities: list[Entity] = []
79
+ seen: set[str] = set()
80
+
81
+ # English NER
82
+ for e in extract_english_entities(text):
83
+ key = e.name.lower()
84
+ if key not in seen:
85
+ seen.add(key)
86
+ entities.append(e)
87
+
88
+ # Korean NER
89
+ for e in extract_korean_entities(text):
90
+ key = e.name.lower()
91
+ if key not in seen:
92
+ seen.add(key)
93
+ entities.append(e)
94
+
95
+ # CJK NER
96
+ for e in extract_cjk_entities(text):
97
+ key = e.name.lower()
98
+ if key not in seen:
99
+ seen.add(key)
100
+ entities.append(e)
101
+
102
+ return entities
103
+
104
+ def extract_facts(self, text: str, entities: list[Entity]) -> list[Fact]:
105
+ """Extract facts from text, grounded against known entities."""
106
+ facts: list[Fact] = []
107
+ entity_names = {e.name.lower(): e for e in entities}
108
+
109
+ for pattern in _FACT_PATTERNS:
110
+ for m in pattern.finditer(text):
111
+ subject_text = m.group(1).strip()
112
+ object_text = m.group(2).strip()
113
+
114
+ # Ground subject to known entity
115
+ subject_key = subject_text.lower()
116
+ entity = entity_names.get(subject_key)
117
+ if not entity:
118
+ continue
119
+
120
+ # Determine predicate
121
+ predicate = self._classify_predicate(object_text)
122
+
123
+ # Truncate overly long objects
124
+ if len(object_text) > 200:
125
+ object_text = object_text[:200]
126
+
127
+ # Copy source to avoid shared mutable reference
128
+ fact_source = copy.copy(self.default_source)
129
+ facts.append(Fact(
130
+ entity_id=entity.id,
131
+ subject=entity.name,
132
+ predicate=predicate,
133
+ object=object_text,
134
+ raw_text=m.group(0).strip(),
135
+ source=fact_source,
136
+ confidence=0.5, # Will be recomputed by quality gate
137
+ ))
138
+
139
+ return facts
140
+
141
+ def extract_relations(self, text: str, entities: list[Entity]) -> list[Relation]:
142
+ """Extract directed relations between entities."""
143
+ relations: list[Relation] = []
144
+ entity_names = {e.name.lower(): e for e in entities}
145
+
146
+ for pattern in _RELATION_PATTERNS:
147
+ for m in pattern.finditer(text):
148
+ groups = m.groups()
149
+ if len(groups) == 3:
150
+ # Pattern: X is ROLE of Y
151
+ subject = groups[0].strip()
152
+ role = groups[1].strip().upper()
153
+ obj = groups[2].strip()
154
+ relation_type = f"{role}_OF"
155
+ elif len(groups) == 2:
156
+ # Pattern: X works at Y
157
+ subject = groups[0].strip()
158
+ obj = groups[1].strip()
159
+ relation_type = "WORKS_AT"
160
+ else:
161
+ continue
162
+
163
+ # Ground to known entities
164
+ from_entity = entity_names.get(subject.lower())
165
+ to_entity = entity_names.get(obj.lower())
166
+
167
+ if from_entity and to_entity:
168
+ relations.append(Relation(
169
+ from_entity_id=from_entity.id,
170
+ to_entity_id=to_entity.id,
171
+ relation_type=relation_type,
172
+ ))
173
+
174
+ return relations
175
+
176
+ def _classify_predicate(self, object_text: str) -> str:
177
+ """Determine the predicate type from the object text."""
178
+ lower = object_text.lower()
179
+ for keyword in _ROLE_KEYWORDS:
180
+ if keyword in lower:
181
+ return "role"
182
+ if any(w in lower for w in ("founded", "started", "created", "launched")):
183
+ return "action"
184
+ if any(w in lower for w in ("based in", "located", "lives in", "moved to")):
185
+ return "location"
186
+ if any(w in lower for w in ("invested", "fund", "raised", "capital")):
187
+ return "investment"
188
+ return "attribute"
@@ -0,0 +1 @@
1
+ """Integrations for Engram (MCP, SDK)."""
@@ -0,0 +1,213 @@
1
+ """MCP server for Engram — Claude Code / Codex integration.
2
+
3
+ Requires: pip install engram[mcp]
4
+
5
+ Usage:
6
+ python -m engram.integrations.mcp_server
7
+
8
+ Or via CLI:
9
+ engram serve
10
+ """
11
+
12
+ from __future__ import annotations
13
+
14
+ import atexit
15
+ import json
16
+ import sys
17
+ from typing import Optional
18
+
19
+ from engram.engine import MemoryEngine
20
+ from engram.models.search import SearchOptions
21
+ from engram.models.source import Source, SourceType
22
+
23
+ # Lazy engine instance (created on first tool call)
24
+ _engine: Optional[MemoryEngine] = None
25
+
26
+
27
+ def _get_engine() -> MemoryEngine:
28
+ global _engine
29
+ if _engine is None:
30
+ _engine = MemoryEngine()
31
+ atexit.register(_engine.close)
32
+ return _engine
33
+
34
+
35
+ def _safe_source_type(value: str) -> SourceType:
36
+ """Parse source type string, falling back to agent_inference."""
37
+ try:
38
+ return SourceType(value)
39
+ except ValueError:
40
+ return SourceType.AGENT_INFERENCE
41
+
42
+
43
+ def run_server(transport: str = "stdio", port: int = 8080) -> None:
44
+ """Start the MCP server."""
45
+ try:
46
+ from mcp.server import Server
47
+ from mcp.server.stdio import stdio_server
48
+ except ImportError:
49
+ print("MCP package not installed. Run: pip install engram[mcp]", file=sys.stderr)
50
+ sys.exit(1)
51
+
52
+ app = Server("engram")
53
+
54
+ @app.tool()
55
+ async def memory_search(
56
+ query: str,
57
+ max_results: int = 5,
58
+ max_tokens: int = 500,
59
+ min_confidence: float = 0.0,
60
+ ) -> str:
61
+ """Search persistent memory for relevant context about people, organizations, or past decisions.
62
+
63
+ Use when the user mentions someone/something discussed before,
64
+ or references "last time", "before", "we discussed".
65
+
66
+ Args:
67
+ query: Natural language search query
68
+ max_results: Maximum entities to return
69
+ max_tokens: Token budget for response
70
+ min_confidence: Minimum fact confidence (0.0-1.0)
71
+ """
72
+ engine = _get_engine()
73
+ options = SearchOptions(
74
+ query=query,
75
+ max_results=max_results,
76
+ max_tokens=max_tokens,
77
+ min_confidence=min_confidence,
78
+ )
79
+ result = engine.search(query, options)
80
+ return result.to_agent_context(max_tokens=max_tokens)
81
+
82
+ @app.tool()
83
+ async def memory_store(
84
+ content: str,
85
+ source_type: str = "agent_inference",
86
+ confidence: float = 0.7,
87
+ ) -> str:
88
+ """Store new factual information in persistent memory.
89
+
90
+ Use when new facts about people/orgs/projects are shared.
91
+ Do NOT store: greetings, opinions, or speculative statements.
92
+
93
+ Args:
94
+ content: Factual content to store
95
+ source_type: One of: direct_speech, document, api, web, agent_inference, user_input
96
+ confidence: How confident (0.0-1.0)
97
+ """
98
+ engine = _get_engine()
99
+ source = Source(
100
+ type=_safe_source_type(source_type),
101
+ confidence=confidence,
102
+ channel="mcp",
103
+ )
104
+ result = engine.store(content, source=source)
105
+ return json.dumps(result.to_dict(), ensure_ascii=False)
106
+
107
+ @app.tool()
108
+ async def memory_get_entity(name: str) -> str:
109
+ """Get all known information about a specific entity.
110
+
111
+ Args:
112
+ name: Entity name to look up
113
+ """
114
+ engine = _get_engine()
115
+ entity = engine.get_entity(name)
116
+ if not entity:
117
+ return f"No entity found with name '{name}'."
118
+ facts = engine.get_facts(name)
119
+ lines = [
120
+ f"Name: {entity.name}",
121
+ f"Type: {entity.entity_type}",
122
+ f"Tier: {entity.tier.value}",
123
+ ]
124
+ if entity.summary:
125
+ lines.append(f"Summary: {entity.summary}")
126
+ if entity.state.role:
127
+ lines.append(f"Role: {entity.state.role}")
128
+ if entity.state.affiliation:
129
+ lines.append(f"Affiliation: {entity.state.affiliation}")
130
+ if facts:
131
+ lines.append(f"Facts ({len(facts)}):")
132
+ for f in facts[:10]:
133
+ lines.append(f" - {f.raw_text} [{f.confidence:.0%}]")
134
+ return "\n".join(lines)
135
+
136
+ @app.tool()
137
+ async def memory_list_entities(
138
+ entity_type: str = "",
139
+ tier: str = "",
140
+ limit: int = 20,
141
+ ) -> str:
142
+ """List known entities with optional filters.
143
+
144
+ Args:
145
+ entity_type: Filter by type (person, organization, project, concept)
146
+ tier: Filter by tier (core, recall, archival)
147
+ limit: Max entries to return
148
+ """
149
+ engine = _get_engine()
150
+ from engram.models.entity import Tier
151
+ entities = engine.list_entities(
152
+ tier=Tier(tier) if tier else None,
153
+ entity_type=entity_type or None,
154
+ limit=limit,
155
+ )
156
+ if not entities:
157
+ return "No entities found."
158
+ lines = []
159
+ for e in entities:
160
+ lines.append(f"- {e.name} ({e.entity_type}, {e.tier.value})")
161
+ return "\n".join(lines)
162
+
163
+ @app.tool()
164
+ async def memory_session_start(agent_id: str = "claude-code") -> str:
165
+ """Start a new memory session. Call at the beginning of a conversation.
166
+
167
+ Args:
168
+ agent_id: Identifier for the calling agent
169
+ """
170
+ engine = _get_engine()
171
+ session = engine.start_session(agent_id)
172
+ context = engine.get_session_context(agent_id)
173
+ return json.dumps({
174
+ "session_id": session.session_id,
175
+ "entity_count": context["entity_count"],
176
+ "previous_summary": context.get("previous_summary", ""),
177
+ }, ensure_ascii=False)
178
+
179
+ @app.tool()
180
+ async def memory_session_end(session_id: str, summary: str = "") -> str:
181
+ """End the current memory session.
182
+
183
+ Args:
184
+ session_id: Session ID to end
185
+ summary: Brief summary of what was discussed
186
+ """
187
+ engine = _get_engine()
188
+ session = engine.end_session(session_id, summary=summary or None)
189
+ return f"Session ended. Duration: {session.duration_minutes or 0}min."
190
+
191
+ @app.tool()
192
+ async def memory_resolve_conflict(conflict_id: str, resolution: str) -> str:
193
+ """Resolve a data conflict in memory.
194
+
195
+ Args:
196
+ conflict_id: Conflict ID to resolve
197
+ resolution: One of: accept_new, keep_old, merge
198
+ """
199
+ engine = _get_engine()
200
+ engine.resolve_conflict(conflict_id, resolution)
201
+ return f"Conflict {conflict_id[:8]} resolved: {resolution}"
202
+
203
+ # Run server
204
+ if transport == "stdio":
205
+ import asyncio
206
+ asyncio.run(stdio_server(app))
207
+ else:
208
+ print(f"Transport '{transport}' not yet supported.", file=sys.stderr)
209
+ sys.exit(1)
210
+
211
+
212
+ if __name__ == "__main__":
213
+ run_server()
@@ -0,0 +1,194 @@
1
+ """Programmatic Python SDK — clean wrapper around MemoryEngine.
2
+
3
+ Usage:
4
+ from engram.integrations.sdk import EngramSDK
5
+
6
+ sdk = EngramSDK()
7
+ sdk.store("Simon Kim is the CEO of Hashed.")
8
+ results = sdk.search("CEO")
9
+ sdk.close()
10
+ """
11
+
12
+ from __future__ import annotations
13
+
14
+ from typing import Optional
15
+
16
+ from engram.config import EngramConfig
17
+ from engram.engine import MemoryEngine, StoreResult
18
+ from engram.models.entity import Entity, Tier
19
+ from engram.models.quality import ValidationResult
20
+ from engram.models.search import SearchOptions, SearchResult
21
+ from engram.models.session import Session
22
+ from engram.models.source import Source, SourceType
23
+
24
+
25
+ class EngramSDK:
26
+ """High-level SDK for programmatic access to Engram.
27
+
28
+ Provides a clean API without CLI concerns.
29
+ All methods return structured objects — no print().
30
+ """
31
+
32
+ def __init__(self, config: Optional[EngramConfig] = None):
33
+ self.engine = MemoryEngine(config)
34
+ self._current_session: Optional[Session] = None
35
+
36
+ def close(self) -> None:
37
+ try:
38
+ if self._current_session:
39
+ self.end_session()
40
+ finally:
41
+ self.engine.close()
42
+
43
+ def __enter__(self) -> EngramSDK:
44
+ return self
45
+
46
+ def __exit__(self, *args) -> None:
47
+ self.close()
48
+
49
+ # ── Session ──
50
+
51
+ def start_session(self, agent_id: str = "sdk") -> Session:
52
+ self._current_session = self.engine.start_session(agent_id)
53
+ return self._current_session
54
+
55
+ def end_session(self, summary: Optional[str] = None) -> Optional[Session]:
56
+ if self._current_session:
57
+ session = self.engine.end_session(self._current_session.session_id, summary)
58
+ self._current_session = None
59
+ return session
60
+ return None
61
+
62
+ @property
63
+ def session_id(self) -> Optional[str]:
64
+ return self._current_session.session_id if self._current_session else None
65
+
66
+ # ── Store & Retrieve ──
67
+
68
+ def store(
69
+ self,
70
+ text: str,
71
+ source_type: str = "user_input",
72
+ confidence: float = 1.0,
73
+ author: str = "",
74
+ ) -> StoreResult:
75
+ source = Source(
76
+ type=SourceType(source_type),
77
+ confidence=confidence,
78
+ author=author,
79
+ channel="sdk",
80
+ )
81
+ return self.engine.store(text, source=source, session_id=self.session_id)
82
+
83
+ def search(
84
+ self,
85
+ query: str,
86
+ max_results: int = 10,
87
+ max_tokens: int = 500,
88
+ min_confidence: float = 0.0,
89
+ ) -> SearchResult:
90
+ options = SearchOptions(
91
+ query=query,
92
+ max_results=max_results,
93
+ max_tokens=max_tokens,
94
+ min_confidence=min_confidence,
95
+ )
96
+ return self.engine.search(query, options)
97
+
98
+ def get_entity(self, name: str) -> Optional[Entity]:
99
+ return self.engine.get_entity(name)
100
+
101
+ def get_facts(self, entity_name: str) -> list:
102
+ return self.engine.get_facts(entity_name)
103
+
104
+ def add_fact(
105
+ self,
106
+ entity_name: str,
107
+ fact_text: str,
108
+ predicate: str = "attribute",
109
+ source_type: str = "user_input",
110
+ confidence: float = 1.0,
111
+ ) -> ValidationResult:
112
+ source = Source(type=SourceType(source_type), confidence=confidence, channel="sdk")
113
+ return self.engine.add_fact(
114
+ entity_name, fact_text, predicate=predicate,
115
+ source=source, session_id=self.session_id,
116
+ )
117
+
118
+ def create_entity(
119
+ self,
120
+ name: str,
121
+ entity_type: str = "person",
122
+ tier: str = "recall",
123
+ summary: str = "",
124
+ ) -> Entity:
125
+ return self.engine.create_entity(name, entity_type, Tier(tier), summary)
126
+
127
+ def list_entities(self, entity_type: str = "", tier: str = "", limit: int = 100) -> list[Entity]:
128
+ return self.engine.list_entities(
129
+ tier=Tier(tier) if tier else None,
130
+ entity_type=entity_type or None,
131
+ limit=limit,
132
+ )
133
+
134
+ # ── Context ──
135
+
136
+ def get_context(self, agent_id: str = "sdk") -> dict:
137
+ return self.engine.get_session_context(agent_id)
138
+
139
+ def health(self) -> dict:
140
+ return self.engine.health_check()
141
+
142
+ # ── Tool Schemas (for OpenAI function calling) ──
143
+
144
+ @staticmethod
145
+ def get_tool_schemas() -> list[dict]:
146
+ """Return OpenAI-format function calling schemas for all tools."""
147
+ return [
148
+ {
149
+ "type": "function",
150
+ "function": {
151
+ "name": "memory_search",
152
+ "description": "Search persistent memory for relevant context",
153
+ "parameters": {
154
+ "type": "object",
155
+ "properties": {
156
+ "query": {"type": "string", "description": "Search query"},
157
+ "max_results": {"type": "integer", "default": 5},
158
+ "max_tokens": {"type": "integer", "default": 500},
159
+ },
160
+ "required": ["query"],
161
+ },
162
+ },
163
+ },
164
+ {
165
+ "type": "function",
166
+ "function": {
167
+ "name": "memory_store",
168
+ "description": "Store new factual information in memory",
169
+ "parameters": {
170
+ "type": "object",
171
+ "properties": {
172
+ "content": {"type": "string", "description": "Content to store"},
173
+ "source_type": {"type": "string", "default": "agent_inference"},
174
+ "confidence": {"type": "number", "default": 0.7},
175
+ },
176
+ "required": ["content"],
177
+ },
178
+ },
179
+ },
180
+ {
181
+ "type": "function",
182
+ "function": {
183
+ "name": "memory_get_entity",
184
+ "description": "Get all info about a specific entity",
185
+ "parameters": {
186
+ "type": "object",
187
+ "properties": {
188
+ "name": {"type": "string", "description": "Entity name"},
189
+ },
190
+ "required": ["name"],
191
+ },
192
+ },
193
+ },
194
+ ]
@@ -0,0 +1,19 @@
1
+ """Engram data models — pure dataclasses with no business logic."""
2
+
3
+ from engram.models.entity import Entity, EntityState, Tier
4
+ from engram.models.fact import Fact, FactStatus
5
+ from engram.models.source import Source, SourceType
6
+ from engram.models.relation import Relation
7
+ from engram.models.session import Session, SessionEvent
8
+ from engram.models.search import SearchResult, SearchHit, SearchOptions
9
+ from engram.models.quality import ValidationResult, ConflictInfo, PIIMatch, Action
10
+
11
+ __all__ = [
12
+ "Entity", "EntityState", "Tier",
13
+ "Fact", "FactStatus",
14
+ "Source", "SourceType",
15
+ "Relation",
16
+ "Session", "SessionEvent",
17
+ "SearchResult", "SearchHit", "SearchOptions",
18
+ "ValidationResult", "ConflictInfo", "PIIMatch", "Action",
19
+ ]
@@ -0,0 +1,72 @@
1
+ """Entity data models."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import uuid
6
+ from dataclasses import dataclass, field
7
+ from datetime import datetime
8
+ from enum import Enum
9
+ from typing import Optional
10
+
11
+
12
+ class Tier(str, Enum):
13
+ CORE = "core"
14
+ RECALL = "recall"
15
+ ARCHIVAL = "archival"
16
+
17
+
18
+ @dataclass
19
+ class EntityState:
20
+ """Structured key-value attributes of an entity."""
21
+
22
+ role: Optional[str] = None
23
+ affiliation: Optional[str] = None
24
+ location: Optional[str] = None
25
+ email: Optional[str] = None
26
+ custom: dict[str, str] = field(default_factory=dict)
27
+
28
+ def to_dict(self) -> dict:
29
+ result: dict = {}
30
+ if self.role is not None:
31
+ result["role"] = self.role
32
+ if self.affiliation is not None:
33
+ result["affiliation"] = self.affiliation
34
+ if self.location is not None:
35
+ result["location"] = self.location
36
+ if self.email is not None:
37
+ result["email"] = self.email
38
+ if self.custom:
39
+ result["custom"] = self.custom
40
+ return result
41
+
42
+ @classmethod
43
+ def from_dict(cls, data: dict) -> EntityState:
44
+ return cls(
45
+ role=data.get("role"),
46
+ affiliation=data.get("affiliation"),
47
+ location=data.get("location"),
48
+ email=data.get("email"),
49
+ custom=data.get("custom", {}),
50
+ )
51
+
52
+
53
+ @dataclass
54
+ class Entity:
55
+ """A tracked entity: person, organization, project, or concept."""
56
+
57
+ id: str = field(default_factory=lambda: str(uuid.uuid4()))
58
+ name: str = ""
59
+ entity_type: str = "person"
60
+ state: EntityState = field(default_factory=EntityState)
61
+ tier: Tier = Tier.RECALL
62
+ summary: str = ""
63
+ aliases: list[str] = field(default_factory=list)
64
+ created_at: datetime = field(default_factory=datetime.now)
65
+ updated_at: datetime = field(default_factory=datetime.now)
66
+ access_count: int = 0
67
+ last_accessed: Optional[datetime] = None
68
+
69
+ def touch(self) -> None:
70
+ """Record an access."""
71
+ self.access_count += 1
72
+ self.last_accessed = datetime.now()