memorytrace 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (54) hide show
  1. engram/__init__.py +8 -0
  2. engram/__main__.py +6 -0
  3. engram/cli/__init__.py +1 -0
  4. engram/cli/app.py +291 -0
  5. engram/cli/formatters.py +90 -0
  6. engram/cli/simple.py +267 -0
  7. engram/config.py +72 -0
  8. engram/engine.py +612 -0
  9. engram/exceptions.py +41 -0
  10. engram/extraction/__init__.py +6 -0
  11. engram/extraction/base.py +20 -0
  12. engram/extraction/llm_extractor.py +197 -0
  13. engram/extraction/ner/__init__.py +7 -0
  14. engram/extraction/ner/cjk.py +63 -0
  15. engram/extraction/ner/english.py +109 -0
  16. engram/extraction/ner/korean.py +106 -0
  17. engram/extraction/regex_extractor.py +188 -0
  18. engram/integrations/__init__.py +1 -0
  19. engram/integrations/mcp_server.py +213 -0
  20. engram/integrations/sdk.py +194 -0
  21. engram/models/__init__.py +19 -0
  22. engram/models/entity.py +72 -0
  23. engram/models/fact.py +58 -0
  24. engram/models/quality.py +61 -0
  25. engram/models/relation.py +26 -0
  26. engram/models/search.py +96 -0
  27. engram/models/session.py +53 -0
  28. engram/models/source.py +73 -0
  29. engram/quality/__init__.py +8 -0
  30. engram/quality/confidence.py +38 -0
  31. engram/quality/conflict.py +79 -0
  32. engram/quality/decay.py +28 -0
  33. engram/quality/gate.py +120 -0
  34. engram/quality/pii.py +80 -0
  35. engram/search/__init__.py +13 -0
  36. engram/search/base.py +20 -0
  37. engram/search/fts5_search.py +210 -0
  38. engram/search/hybrid.py +99 -0
  39. engram/search/semantic.py +186 -0
  40. engram/search/tokenizer.py +85 -0
  41. engram/session/__init__.py +6 -0
  42. engram/session/context.py +87 -0
  43. engram/session/manager.py +152 -0
  44. engram/session/working_memory.py +57 -0
  45. engram/storage/__init__.py +6 -0
  46. engram/storage/base.py +63 -0
  47. engram/storage/markdown_export.py +144 -0
  48. engram/storage/migrations.py +30 -0
  49. engram/storage/sqlite_store.py +615 -0
  50. memorytrace-0.1.0.dist-info/METADATA +138 -0
  51. memorytrace-0.1.0.dist-info/RECORD +54 -0
  52. memorytrace-0.1.0.dist-info/WHEEL +4 -0
  53. memorytrace-0.1.0.dist-info/entry_points.txt +3 -0
  54. memorytrace-0.1.0.dist-info/licenses/LICENSE +21 -0
engram/models/fact.py ADDED
@@ -0,0 +1,58 @@
1
+ """Fact data models with temporal dimension."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import uuid
6
+ from dataclasses import dataclass, field
7
+ from datetime import datetime
8
+ from enum import Enum
9
+ from typing import Optional
10
+
11
+ from engram.models.source import Source
12
+
13
+
14
+ class FactStatus(str, Enum):
15
+ UNVERIFIED = "unverified"
16
+ VERIFIED = "verified"
17
+ CONFLICTED = "conflicted"
18
+ EXPIRED = "expired"
19
+ RETRACTED = "retracted"
20
+
21
+
22
+ @dataclass
23
+ class Fact:
24
+ """A single piece of knowledge about an entity, with provenance and temporal bounds."""
25
+
26
+ id: str = field(default_factory=lambda: str(uuid.uuid4()))
27
+ entity_id: str = ""
28
+ subject: str = ""
29
+ predicate: str = ""
30
+ object: str = ""
31
+ raw_text: str = ""
32
+ source: Source = field(default_factory=Source)
33
+ confidence: float = 0.5
34
+ status: FactStatus = FactStatus.UNVERIFIED
35
+ valid_from: Optional[datetime] = None
36
+ valid_to: Optional[datetime] = None # None = currently valid
37
+ superseded_by: Optional[str] = None
38
+ created_at: datetime = field(default_factory=datetime.now)
39
+
40
+ def __post_init__(self) -> None:
41
+ self.confidence = max(0.0, min(1.0, self.confidence))
42
+
43
+ @property
44
+ def is_current(self) -> bool:
45
+ """Whether this fact is currently valid (not expired or superseded)."""
46
+ if self.valid_to is not None:
47
+ return False
48
+ if self.superseded_by is not None:
49
+ return False
50
+ if self.status in (FactStatus.EXPIRED, FactStatus.RETRACTED):
51
+ return False
52
+ return True
53
+
54
+ def supersede(self, new_fact_id: str) -> None:
55
+ """Mark this fact as superseded by another."""
56
+ self.superseded_by = new_fact_id
57
+ self.valid_to = datetime.now()
58
+ self.status = FactStatus.EXPIRED
@@ -0,0 +1,61 @@
1
+ """Quality gate data models."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from dataclasses import dataclass, field
6
+ from enum import Enum
7
+ from typing import Optional
8
+
9
+ from engram.models.fact import Fact
10
+
11
+
12
+ class Action(str, Enum):
13
+ ACCEPT = "accept"
14
+ QUARANTINE = "quarantine"
15
+ FLAG_CONFLICT = "flag_conflict"
16
+ REJECT = "reject"
17
+
18
+
19
+ @dataclass
20
+ class ConflictInfo:
21
+ """Information about a data conflict between facts."""
22
+
23
+ conflict_id: str = ""
24
+ existing_fact: Optional[Fact] = None
25
+ new_fact: Optional[Fact] = None
26
+ conflict_type: str = "" # value_change, contradiction
27
+ suggested_resolution: str = "" # supersede, manual_review, keep_old
28
+
29
+
30
+ @dataclass
31
+ class PIIMatch:
32
+ """A detected PII occurrence in text."""
33
+
34
+ pii_type: str = "" # credit_card, ssn, api_key, email, phone, rrn
35
+ start: int = 0
36
+ end: int = 0
37
+ original: str = ""
38
+
39
+
40
+ @dataclass
41
+ class ValidationResult:
42
+ """Result of quality gate validation."""
43
+
44
+ action: Action = Action.ACCEPT
45
+ reason: str = ""
46
+ confidence: float = 0.0
47
+ conflicts: list[ConflictInfo] = field(default_factory=list)
48
+ pii_matches: list[PIIMatch] = field(default_factory=list)
49
+ masked_text: Optional[str] = None # text after PII masking
50
+
51
+ @property
52
+ def is_accepted(self) -> bool:
53
+ return self.action == Action.ACCEPT
54
+
55
+ @property
56
+ def is_quarantined(self) -> bool:
57
+ return self.action == Action.QUARANTINE
58
+
59
+ @property
60
+ def has_conflicts(self) -> bool:
61
+ return self.action == Action.FLAG_CONFLICT
@@ -0,0 +1,26 @@
1
+ """Relation model for directed entity graph edges."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import uuid
6
+ from dataclasses import dataclass, field
7
+ from datetime import datetime
8
+ from typing import Optional
9
+
10
+
11
+ @dataclass
12
+ class Relation:
13
+ """A directed relationship between two entities."""
14
+
15
+ id: str = field(default_factory=lambda: str(uuid.uuid4()))
16
+ from_entity_id: str = ""
17
+ to_entity_id: str = ""
18
+ relation_type: str = "" # CEO_OF, INVESTED_IN, WORKS_WITH, MEMBER_OF, etc.
19
+ metadata: dict = field(default_factory=dict)
20
+ valid_from: Optional[datetime] = None
21
+ valid_to: Optional[datetime] = None # None = currently valid
22
+ created_at: datetime = field(default_factory=datetime.now)
23
+
24
+ @property
25
+ def is_current(self) -> bool:
26
+ return self.valid_to is None
@@ -0,0 +1,96 @@
1
+ """Search-related data models."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import json
6
+ from dataclasses import dataclass, field
7
+ from datetime import datetime
8
+ from typing import Optional
9
+
10
+ from engram.models.entity import Entity
11
+ from engram.models.fact import Fact
12
+
13
+
14
+ @dataclass
15
+ class SearchOptions:
16
+ """Options for a memory search query."""
17
+
18
+ query: str = ""
19
+ max_results: int = 10
20
+ max_tokens: int = 500 # token budget for agent context
21
+ min_confidence: float = 0.0
22
+ entity_types: list[str] = field(default_factory=list)
23
+ tiers: list[str] = field(default_factory=list)
24
+ date_from: Optional[datetime] = None
25
+ date_to: Optional[datetime] = None
26
+ include_archived: bool = False
27
+
28
+
29
+ @dataclass
30
+ class SearchHit:
31
+ """A single search result."""
32
+
33
+ entity: Entity
34
+ facts: list[Fact] = field(default_factory=list)
35
+ relevance_score: float = 0.0
36
+ snippet: str = ""
37
+ token_count: int = 0
38
+
39
+
40
+ @dataclass
41
+ class SearchResult:
42
+ """Complete search response."""
43
+
44
+ query: str = ""
45
+ hits: list[SearchHit] = field(default_factory=list)
46
+ total_count: int = 0
47
+ search_time_ms: float = 0.0
48
+ total_tokens: int = 0
49
+
50
+ def to_agent_context(self, max_tokens: int = 500) -> str:
51
+ """Serialize to compact text for LLM context injection."""
52
+ if not self.hits:
53
+ return f"No results found for '{self.query}'."
54
+
55
+ parts: list[str] = []
56
+ tokens_used = 0
57
+ for hit in self.hits:
58
+ entry = f"[{hit.entity.name}] ({hit.entity.entity_type})"
59
+ if hit.entity.state.role:
60
+ entry += f" Role: {hit.entity.state.role}"
61
+ if hit.entity.state.affiliation:
62
+ entry += f" @ {hit.entity.state.affiliation}"
63
+ if hit.snippet:
64
+ entry += f" | {hit.snippet}"
65
+ # Rough token estimate: 1 token ≈ 4 chars
66
+ entry_tokens = len(entry) // 4
67
+ if tokens_used + entry_tokens > max_tokens:
68
+ break
69
+ parts.append(entry)
70
+ tokens_used += entry_tokens
71
+
72
+ return "\n".join(parts)
73
+
74
+ def to_dict(self) -> dict:
75
+ """Structured dict for MCP/SDK responses."""
76
+ return {
77
+ "query": self.query,
78
+ "total_count": self.total_count,
79
+ "search_time_ms": self.search_time_ms,
80
+ "hits": [
81
+ {
82
+ "entity_name": h.entity.name,
83
+ "entity_type": h.entity.entity_type,
84
+ "relevance_score": h.relevance_score,
85
+ "snippet": h.snippet,
86
+ "facts": [
87
+ {"text": f.raw_text, "confidence": f.confidence, "status": f.status.value}
88
+ for f in h.facts
89
+ ],
90
+ }
91
+ for h in self.hits
92
+ ],
93
+ }
94
+
95
+ def to_json(self, indent: int = 2) -> str:
96
+ return json.dumps(self.to_dict(), ensure_ascii=False, indent=indent)
@@ -0,0 +1,53 @@
1
+ """Session and session event models."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import uuid
6
+ from dataclasses import dataclass, field
7
+ from datetime import datetime
8
+ from typing import Optional
9
+
10
+
11
+ @dataclass
12
+ class Session:
13
+ """A conversation session with an AI agent."""
14
+
15
+ session_id: str = field(default_factory=lambda: str(uuid.uuid4()))
16
+ agent_id: str = "default"
17
+ started_at: datetime = field(default_factory=datetime.now)
18
+ ended_at: Optional[datetime] = None
19
+ parent_session_id: Optional[str] = None
20
+ entities_accessed: list[str] = field(default_factory=list)
21
+ entities_modified: list[str] = field(default_factory=list)
22
+ facts_added: list[str] = field(default_factory=list)
23
+ summary: Optional[str] = None
24
+ metadata: dict = field(default_factory=dict)
25
+
26
+ @property
27
+ def is_active(self) -> bool:
28
+ return self.ended_at is None
29
+
30
+ @property
31
+ def duration_seconds(self) -> Optional[float]:
32
+ if self.ended_at is None:
33
+ return None
34
+ return (self.ended_at - self.started_at).total_seconds()
35
+
36
+ @property
37
+ def duration_minutes(self) -> Optional[float]:
38
+ secs = self.duration_seconds
39
+ if secs is None:
40
+ return None
41
+ return round(secs / 60, 1)
42
+
43
+
44
+ @dataclass
45
+ class SessionEvent:
46
+ """An auditable event within a session."""
47
+
48
+ id: Optional[int] = None # auto-increment in DB
49
+ session_id: str = ""
50
+ event_type: str = "" # search, read, write, update, delete
51
+ target: str = "" # entity_id or query string
52
+ detail: dict = field(default_factory=dict)
53
+ timestamp: datetime = field(default_factory=datetime.now)
@@ -0,0 +1,73 @@
1
+ """Source and provenance models."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from dataclasses import dataclass, field
6
+ from datetime import datetime
7
+ from enum import Enum
8
+ from typing import Optional
9
+
10
+
11
+ class SourceType(str, Enum):
12
+ DIRECT_SPEECH = "direct_speech"
13
+ DOCUMENT = "document"
14
+ API = "api"
15
+ WEB = "web"
16
+ AGENT_INFERENCE = "agent_inference"
17
+ USER_INPUT = "user_input"
18
+
19
+
20
+ # Base trust scores per source type
21
+ _SOURCE_BASE_TRUST: dict[SourceType, float] = {
22
+ SourceType.DIRECT_SPEECH: 0.95,
23
+ SourceType.DOCUMENT: 0.85,
24
+ SourceType.USER_INPUT: 0.90,
25
+ SourceType.API: 0.80,
26
+ SourceType.WEB: 0.60,
27
+ SourceType.AGENT_INFERENCE: 0.40,
28
+ }
29
+
30
+
31
+ @dataclass
32
+ class Source:
33
+ """Provenance metadata for a fact."""
34
+
35
+ type: SourceType = SourceType.USER_INPUT
36
+ author: str = ""
37
+ channel: str = "" # meeting, email, slack, cli, mcp
38
+ timestamp: datetime = field(default_factory=datetime.now)
39
+ confidence: float = 1.0 # submitter's own confidence (0.0–1.0)
40
+ url: Optional[str] = None
41
+ session_id: Optional[str] = None
42
+
43
+ def __post_init__(self) -> None:
44
+ self.confidence = max(0.0, min(1.0, self.confidence))
45
+
46
+ @property
47
+ def trust_score(self) -> float:
48
+ """Combined trust: base score for source type × submitter confidence."""
49
+ base = _SOURCE_BASE_TRUST.get(self.type, 0.5)
50
+ return base * self.confidence
51
+
52
+ def to_dict(self) -> dict:
53
+ return {
54
+ "type": self.type.value,
55
+ "author": self.author,
56
+ "channel": self.channel,
57
+ "timestamp": self.timestamp.isoformat(),
58
+ "confidence": self.confidence,
59
+ "url": self.url,
60
+ "session_id": self.session_id,
61
+ }
62
+
63
+ @classmethod
64
+ def from_dict(cls, data: dict) -> Source:
65
+ return cls(
66
+ type=SourceType(data.get("type", "user_input")),
67
+ author=data.get("author", ""),
68
+ channel=data.get("channel", ""),
69
+ timestamp=datetime.fromisoformat(data["timestamp"]) if data.get("timestamp") else datetime.now(),
70
+ confidence=data.get("confidence", 1.0),
71
+ url=data.get("url"),
72
+ session_id=data.get("session_id"),
73
+ )
@@ -0,0 +1,8 @@
1
+ """Quality gate system for Engram."""
2
+
3
+ from engram.quality.gate import QualityGate
4
+ from engram.quality.confidence import compute_confidence
5
+ from engram.quality.conflict import ConflictDetector
6
+ from engram.quality.pii import PIIDetector
7
+
8
+ __all__ = ["QualityGate", "compute_confidence", "ConflictDetector", "PIIDetector"]
@@ -0,0 +1,38 @@
1
+ """Confidence scoring for facts based on source type and extraction method."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from engram.models.source import SourceType
6
+
7
+ # Base trust per source type
8
+ _SOURCE_BASE: dict[SourceType, float] = {
9
+ SourceType.DIRECT_SPEECH: 0.95,
10
+ SourceType.USER_INPUT: 0.90,
11
+ SourceType.DOCUMENT: 0.85,
12
+ SourceType.API: 0.80,
13
+ SourceType.WEB: 0.60,
14
+ SourceType.AGENT_INFERENCE: 0.40,
15
+ }
16
+
17
+ # Multiplier per extraction method
18
+ _EXTRACTION_MULT: dict[str, float] = {
19
+ "regex": 0.7,
20
+ "llm": 1.0,
21
+ "manual": 1.0,
22
+ }
23
+
24
+
25
+ def compute_confidence(
26
+ source_type: SourceType,
27
+ source_confidence: float,
28
+ extraction_method: str = "manual",
29
+ ) -> float:
30
+ """Compute final confidence score for a fact.
31
+
32
+ confidence = base_trust(source_type) × source_confidence × extraction_multiplier
33
+ Result is clamped to [0.0, 1.0].
34
+ """
35
+ base = _SOURCE_BASE.get(source_type, 0.5)
36
+ method_mult = _EXTRACTION_MULT.get(extraction_method, 0.7)
37
+ raw = base * max(0.0, min(1.0, source_confidence)) * method_mult
38
+ return max(0.0, min(1.0, round(raw, 4)))
@@ -0,0 +1,79 @@
1
+ """Conflict detection — finds when new facts contradict existing ones."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import uuid
6
+
7
+ from engram.models.fact import Fact
8
+ from engram.models.quality import ConflictInfo
9
+ from engram.storage.base import StorageBackend
10
+
11
+
12
+ class ConflictDetector:
13
+ """Detects when a new fact contradicts existing facts for the same entity+predicate."""
14
+
15
+ def __init__(self, storage: StorageBackend):
16
+ self.storage = storage
17
+
18
+ def find_conflicts(self, new_fact: Fact) -> list[ConflictInfo]:
19
+ """Check if new_fact conflicts with any current facts.
20
+
21
+ A conflict occurs when:
22
+ - Same entity_id AND same predicate
23
+ - Different object value
24
+ - Both are currently valid (valid_to is None)
25
+ """
26
+ existing = self.storage.get_current_facts(new_fact.entity_id)
27
+ conflicts: list[ConflictInfo] = []
28
+
29
+ for old in existing:
30
+ if old.id == new_fact.id:
31
+ continue
32
+ if not self._same_predicate(new_fact, old):
33
+ continue
34
+ if self._same_object(new_fact, old):
35
+ continue
36
+
37
+ # Conflict found
38
+ resolution = self._suggest_resolution(old, new_fact)
39
+ conflicts.append(ConflictInfo(
40
+ conflict_id=str(uuid.uuid4()),
41
+ existing_fact=old,
42
+ new_fact=new_fact,
43
+ conflict_type="value_change",
44
+ suggested_resolution=resolution,
45
+ ))
46
+
47
+ return conflicts
48
+
49
+ def is_duplicate(self, new_fact: Fact) -> bool:
50
+ """Check if an identical fact already exists (same entity+predicate+object)."""
51
+ existing = self.storage.get_current_facts(new_fact.entity_id)
52
+ for old in existing:
53
+ if self._same_predicate(new_fact, old) and self._same_object(new_fact, old):
54
+ return True
55
+ return False
56
+
57
+ def _same_predicate(self, a: Fact, b: Fact) -> bool:
58
+ """Check if two facts refer to the same predicate (case-insensitive)."""
59
+ return a.predicate.strip().lower() == b.predicate.strip().lower()
60
+
61
+ def _same_object(self, a: Fact, b: Fact) -> bool:
62
+ """Check if two facts have the same value (case-insensitive, normalized)."""
63
+ import re
64
+ norm_a = re.sub(r'\b(the|a|an)\b', '', a.object.strip().lower()).strip()
65
+ norm_b = re.sub(r'\b(the|a|an)\b', '', b.object.strip().lower()).strip()
66
+ return norm_a == norm_b
67
+
68
+ def _suggest_resolution(self, old: Fact, new: Fact) -> str:
69
+ """Suggest how to resolve a conflict based on trust scores."""
70
+ old_trust = old.source.trust_score
71
+ new_trust = new.source.trust_score
72
+
73
+ trust_diff = new_trust - old_trust
74
+ if trust_diff > 0.3:
75
+ return "supersede"
76
+ elif trust_diff < -0.3:
77
+ return "keep_old"
78
+ else:
79
+ return "manual_review"
@@ -0,0 +1,28 @@
1
+ """Time-based decay — identifies stale facts that need review."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from datetime import datetime, timedelta
6
+ from typing import Optional
7
+
8
+ from engram.models.fact import Fact
9
+
10
+
11
+ def find_stale_facts(
12
+ facts: list[Fact],
13
+ days: int = 90,
14
+ reference_time: Optional[datetime] = None,
15
+ ) -> list[Fact]:
16
+ """Find facts that haven't been updated within the given period.
17
+
18
+ Args:
19
+ facts: Facts to check.
20
+ days: Number of days before a fact is considered stale.
21
+ reference_time: Time to compare against (default: now).
22
+
23
+ Returns:
24
+ List of stale facts.
25
+ """
26
+ now = reference_time or datetime.now()
27
+ cutoff = now - timedelta(days=days)
28
+ return [f for f in facts if f.is_current and f.created_at < cutoff]
engram/quality/gate.py ADDED
@@ -0,0 +1,120 @@
1
+ """Quality gate — pipeline orchestrator for write validation."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from engram.config import EngramConfig
6
+ from engram.models.fact import Fact, FactStatus
7
+ from engram.models.quality import Action, ConflictInfo, ValidationResult
8
+ from engram.quality.confidence import compute_confidence
9
+ from engram.quality.conflict import ConflictDetector
10
+ from engram.quality.pii import PIIDetector
11
+ from engram.storage.base import StorageBackend
12
+
13
+
14
+ class QualityGate:
15
+ """Pipeline: confidence → PII masking → conflict detection → accept/reject/quarantine.
16
+
17
+ Every write operation passes through this gate before reaching storage.
18
+ """
19
+
20
+ def __init__(self, config: EngramConfig, storage: StorageBackend):
21
+ self.config = config
22
+ self.conflict_detector = ConflictDetector(storage)
23
+ self.pii_detector = PIIDetector()
24
+ self.min_confidence = config.min_confidence
25
+ self.auto_resolve_threshold = config.auto_resolve_threshold
26
+ self.enable_pii = config.enable_pii_detection
27
+
28
+ def validate(
29
+ self,
30
+ fact: Fact,
31
+ extraction_method: str = "manual",
32
+ ) -> ValidationResult:
33
+ """Run the full quality gate pipeline on a fact.
34
+
35
+ Returns ValidationResult with action:
36
+ - ACCEPT: fact is good, store it
37
+ - QUARANTINE: low confidence, store as unverified
38
+ - FLAG_CONFLICT: conflicts with existing data
39
+ - REJECT: should not be stored (e.g., all PII with no substance)
40
+ """
41
+
42
+ # Step 1: Compute confidence
43
+ computed_conf = compute_confidence(
44
+ source_type=fact.source.type,
45
+ source_confidence=fact.source.confidence,
46
+ extraction_method=extraction_method,
47
+ )
48
+ fact.confidence = computed_conf
49
+
50
+ # Step 2: PII detection and masking
51
+ pii_matches = []
52
+ masked_text = None
53
+ if self.enable_pii and fact.raw_text:
54
+ pii_matches = self.pii_detector.scan(fact.raw_text)
55
+ if pii_matches:
56
+ masked_text = self.pii_detector.mask(fact.raw_text, pii_matches)
57
+ fact.raw_text = masked_text
58
+ # Also mask subject/object if they contain PII
59
+ if self.pii_detector.has_pii(fact.object):
60
+ fact.object = self.pii_detector.mask(fact.object)
61
+ if self.pii_detector.has_pii(fact.subject):
62
+ fact.subject = self.pii_detector.mask(fact.subject)
63
+
64
+ # Step 3: Confidence threshold check
65
+ if computed_conf < self.min_confidence:
66
+ fact.status = FactStatus.UNVERIFIED
67
+ return ValidationResult(
68
+ action=Action.QUARANTINE,
69
+ reason=f"Confidence {computed_conf:.2f} below threshold {self.min_confidence:.2f}",
70
+ confidence=computed_conf,
71
+ pii_matches=pii_matches,
72
+ masked_text=masked_text,
73
+ )
74
+
75
+ # Step 3.5: Duplicate detection
76
+ if fact.entity_id and self.conflict_detector.is_duplicate(fact):
77
+ return ValidationResult(
78
+ action=Action.REJECT,
79
+ reason="Duplicate fact already exists",
80
+ confidence=computed_conf,
81
+ pii_matches=pii_matches,
82
+ masked_text=masked_text,
83
+ )
84
+
85
+ # Step 4: Conflict detection
86
+ if fact.entity_id:
87
+ conflicts = self.conflict_detector.find_conflicts(fact)
88
+ if conflicts:
89
+ # Check if we can auto-resolve
90
+ auto_resolved = self._try_auto_resolve(conflicts)
91
+ if not auto_resolved:
92
+ fact.status = FactStatus.CONFLICTED
93
+ return ValidationResult(
94
+ action=Action.FLAG_CONFLICT,
95
+ reason=f"Conflicts with {len(conflicts)} existing fact(s)",
96
+ confidence=computed_conf,
97
+ conflicts=conflicts,
98
+ pii_matches=pii_matches,
99
+ masked_text=masked_text,
100
+ )
101
+
102
+ # Step 5: Accept
103
+ return ValidationResult(
104
+ action=Action.ACCEPT,
105
+ reason="Passed all quality checks",
106
+ confidence=computed_conf,
107
+ pii_matches=pii_matches,
108
+ masked_text=masked_text,
109
+ )
110
+
111
+ def _try_auto_resolve(self, conflicts: list[ConflictInfo]) -> bool:
112
+ """Attempt to auto-resolve conflicts if trust difference is large enough."""
113
+ all_resolved = True
114
+ for conflict in conflicts:
115
+ if conflict.suggested_resolution == "supersede":
116
+ # New fact has much higher trust — auto-supersede
117
+ continue
118
+ else:
119
+ all_resolved = False
120
+ return all_resolved