PyPI - mcal-ai - Versions diffs - 0.1.0__py3-none-any.whl - Mend

mcal-ai 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (32) hide show

mcal/__init__.py +165 -0
mcal/backends/__init__.py +42 -0
mcal/backends/base.py +383 -0
mcal/baselines/__init__.py +1 -0
mcal/core/__init__.py +101 -0
mcal/core/embeddings.py +266 -0
mcal/core/extraction_cache.py +398 -0
mcal/core/goal_retriever.py +539 -0
mcal/core/intent_tracker.py +734 -0
mcal/core/models.py +445 -0
mcal/core/rate_limiter.py +372 -0
mcal/core/reasoning_store.py +1061 -0
mcal/core/retry.py +188 -0
mcal/core/storage.py +456 -0
mcal/core/streaming.py +254 -0
mcal/core/unified_extractor.py +1466 -0
mcal/core/vector_index.py +206 -0
mcal/evaluation/__init__.py +1 -0
mcal/integrations/__init__.py +88 -0
mcal/integrations/autogen.py +95 -0
mcal/integrations/crewai.py +92 -0
mcal/integrations/langchain.py +112 -0
mcal/integrations/langgraph.py +50 -0
mcal/mcal.py +1697 -0
mcal/providers/bedrock.py +217 -0
mcal/storage/__init__.py +1 -0
mcal_ai-0.1.0.dist-info/METADATA +319 -0
mcal_ai-0.1.0.dist-info/RECORD +32 -0
mcal_ai-0.1.0.dist-info/WHEEL +5 -0
mcal_ai-0.1.0.dist-info/entry_points.txt +2 -0
mcal_ai-0.1.0.dist-info/licenses/LICENSE +21 -0
mcal_ai-0.1.0.dist-info/top_level.txt +1 -0

mcal/mcal.py ADDED Viewed

@@ -0,0 +1,1697 @@
+"""
+MCAL: Memory-Context Alignment Layer
+Main interface providing a standalone memory architecture for AI agents.
+Architecture:
+    ┌─────────────────────────────────────────────┐
+    │                   MCAL                       │
+    │  ┌─────────────────────────────────────┐    │
+    │  │  UnifiedExtractor (Single-pass)     │    │
+    │  │  - Entities, Goals, Decisions       │    │
+    │  │  - Relationships, Embeddings        │    │
+    │  └─────────────────────────────────────┘    │
+    │                    │                         │
+    │                    ▼                         │
+    │  ┌─────────────────────────────────────┐    │
+    │  │      UnifiedGraph + VectorIndex     │    │
+    │  │  (Graph storage + semantic search)  │    │
+    │  └─────────────────────────────────────┘    │
+    └─────────────────────────────────────────────┘
+Usage:
+    from mcal import MCAL
+    # Initialize (no external dependencies!)
+    mcal = MCAL(
+        openai_api_key="..."  # or anthropic_api_key, or use bedrock
+    )
+    # Add conversation (extracts facts + intents + reasoning)
+    result = await mcal.add(
+        messages=[
+            {"role": "user", "content": "Let's build a fraud detection system"},
+            {"role": "assistant", "content": "Great! What's your data source?"}
+        ],
+        user_id="user_123"
+    )
+    # Search with goal-awareness
+    context = await mcal.search(
+        query="What database should we use?",
+        user_id="user_123"
+    )
+    # Get assembled context for LLM
+    prompt_context = mcal.get_context(
+        query="What's our next step?",
+        user_id="user_123",
+        max_tokens=4000
+    )
+"""
+from __future__ import annotations
+import asyncio
+import logging
+import time
+from typing import Any, Optional, AsyncIterator
+from dataclasses import dataclass, field
+from anthropic import Anthropic
+from openai import OpenAI, RateLimitError as OpenAIRateLimitError, APIError as OpenAIAPIError
+from pathlib import Path
+from .backends import MemoryBackend, MemoryEntry  # MemoryEntry still needed for compatibility
+from .core.models import (
+    IntentGraph,
+    IntentNode,
+    IntentType,
+    IntentStatus,
+    DecisionTrail,
+    Turn,
+    Session,
+    RetrievalConfig,
+)
+from .core.intent_tracker import IntentTracker
+from .core.reasoning_store import ReasoningStore
+from .core.goal_retriever import ContextAssembler
+from .core.storage import MCALStorage
+from .core.extraction_cache import ExtractionCache, CacheStats
+from .core.unified_extractor import (
+    UnifiedExtractor,
+    UnifiedGraph,
+    NodeType,
+    EdgeType,
+    graph_to_memories,
+    memories_to_context_string,
+)
+from .core.retry import (
+    llm_retry,
+    classify_http_error,
+    LLMRateLimitError,
+    LLMServerError,
+)
+from .providers.bedrock import BedrockProvider
+from .core.streaming import (
+    StreamEvent,
+    StreamEventType,
+    ExtractionPhase,
+    StreamProgress,
+    event_started,
+    event_phase_started,
+    event_phase_complete,
+    event_fact_extracted,
+    event_intent_extracted,
+    event_decision_extracted,
+    event_cache_hit,
+    event_error,
+    event_complete,
+)
+logger = logging.getLogger(__name__)
+# =============================================================================
+# LLM Client Wrapper
+# =============================================================================
+class AnthropicClient:
+    """Wrapper around Anthropic client for MCAL components."""
+    def __init__(self, api_key: str, model: str = "claude-sonnet-4-20250514"):
+        self.client = Anthropic(api_key=api_key)
+        self.model = model
+    @llm_retry(max_attempts=3, min_wait=1.0, max_wait=10.0)
+    async def complete(self, prompt: str, system: Optional[str] = None) -> str:
+        """
+        Generate completion with automatic retry on transient failures.
+        Retries on rate limits and server errors with exponential backoff.
+        """
+        messages = [{"role": "user", "content": prompt}]
+        try:
+            response = self.client.messages.create(
+                model=self.model,
+                max_tokens=4096,
+                system=system or "You are a helpful assistant.",
+                messages=messages
+            )
+            return response.content[0].text
+        except Exception as e:
+            # Anthropic SDK raises specific exceptions we can classify
+            error_str = str(e).lower()
+            if 'rate' in error_str or '429' in error_str:
+                raise LLMRateLimitError(f"Anthropic rate limit: {e}")
+            elif '500' in error_str or '502' in error_str or '503' in error_str or '504' in error_str:
+                raise LLMServerError(f"Anthropic server error: {e}")
+            raise
+class OpenAIClient:
+    """Wrapper around OpenAI client for MCAL components."""
+    def __init__(self, api_key: str, model: str = "gpt-4o"):
+        self.client = OpenAI(api_key=api_key)
+        self.model = model
+        # Token tracking
+        self.total_prompt_tokens = 0
+        self.total_completion_tokens = 0
+    @llm_retry(max_attempts=3, min_wait=1.0, max_wait=10.0)
+    async def complete(self, prompt: str, system: Optional[str] = None) -> str:
+        """
+        Generate completion with automatic retry on transient failures.
+        Retries on rate limits (429) and server errors (5xx) with exponential backoff.
+        """
+        messages = [{"role": "user", "content": prompt}]
+        if system:
+            messages.insert(0, {"role": "system", "content": system})
+        try:
+            response = self.client.chat.completions.create(
+                model=self.model,
+                max_tokens=4096,
+                messages=messages
+            )
+            # Track token usage
+            if response.usage:
+                self.total_prompt_tokens += response.usage.prompt_tokens
+                self.total_completion_tokens += response.usage.completion_tokens
+            return response.choices[0].message.content
+        except OpenAIRateLimitError as e:
+            raise LLMRateLimitError(f"OpenAI rate limit: {e}")
+        except OpenAIAPIError as e:
+            # Check for server errors
+            if hasattr(e, 'status_code') and e.status_code in (500, 502, 503, 504):
+                raise LLMServerError(f"OpenAI server error ({e.status_code}): {e}")
+            raise
+    def get_token_usage(self) -> dict:
+        """Get cumulative token usage."""
+        return {
+            "prompt_tokens": self.total_prompt_tokens,
+            "completion_tokens": self.total_completion_tokens,
+            "total_tokens": self.total_prompt_tokens + self.total_completion_tokens
+        }
+    def reset_token_usage(self):
+        """Reset token counters."""
+        self.total_prompt_tokens = 0
+        self.total_completion_tokens = 0
+class BedrockProviderWrapper:
+    """Wrapper around Bedrock provider for MCAL components."""
+    def __init__(self, model: str = "llama-3.3-70b", region: str = "us-east-1"):
+        self.provider = BedrockProvider(model=model, region=region)
+        self.model = model
+    async def complete(self, prompt: str, system: Optional[str] = None) -> str:
+        """Generate completion."""
+        messages = []
+        # Bedrock Converse API doesn't support system role - merge with user message
+        if system:
+            combined_prompt = f"{system}\n\n{prompt}"
+            messages = [{"role": "user", "content": combined_prompt}]
+        else:
+            messages = [{"role": "user", "content": prompt}]
+        return await self.provider.generate(messages)
+class TieredBedrockProvider:
+    """
+    Tiered Bedrock provider that routes to fast/smart models based on task complexity.
+    Fast model (8B): Simple tasks like intent detection, graph updates
+    Smart model (70B): Complex tasks like decision extraction, reasoning
+    """
+    def __init__(
+        self,
+        fast_model: str = "llama-3.1-8b",
+        smart_model: str = "llama-3.3-70b",
+        region: str = "us-east-1"
+    ):
+        self.fast_provider = BedrockProvider(model=fast_model, region=region)
+        self.smart_provider = BedrockProvider(model=smart_model, region=region)
+        self.fast_model = fast_model
+        self.smart_model = smart_model
+        # Track usage for metrics
+        self.fast_calls = 0
+        self.smart_calls = 0
+    async def complete(
+        self,
+        prompt: str,
+        system: Optional[str] = None,
+        use_smart: bool = False
+    ) -> str:
+        """
+        Generate completion using appropriate model tier.
+        Args:
+            prompt: The prompt to send
+            system: System prompt (merged with user prompt)
+            use_smart: Force use of smart model (for complex tasks)
+        """
+        # Build messages
+        if system:
+            combined_prompt = f"{system}\n\n{prompt}"
+            messages = [{"role": "user", "content": combined_prompt}]
+        else:
+            messages = [{"role": "user", "content": prompt}]
+        # Route to appropriate model
+        if use_smart:
+            self.smart_calls += 1
+            logger.debug(f"Using SMART model ({self.smart_model})")
+            return await self.smart_provider.generate(messages)
+        else:
+            self.fast_calls += 1
+            logger.debug(f"Using FAST model ({self.fast_model})")
+            return await self.fast_provider.generate(messages)
+    def get_usage_stats(self) -> dict:
+        """Get model usage statistics."""
+        total = self.fast_calls + self.smart_calls
+        return {
+            "fast_calls": self.fast_calls,
+            "smart_calls": self.smart_calls,
+            "total_calls": total,
+            "fast_ratio": self.fast_calls / total if total > 0 else 0,
+        }
+    def get_token_usage(self) -> dict:
+        """Get total token usage from both providers."""
+        fast_usage = self.fast_provider.get_token_usage()
+        smart_usage = self.smart_provider.get_token_usage()
+        return {
+            "prompt_tokens": fast_usage["prompt_tokens"] + smart_usage["prompt_tokens"],
+            "completion_tokens": fast_usage["completion_tokens"] + smart_usage["completion_tokens"],
+            "total_tokens": fast_usage["total_tokens"] + smart_usage["total_tokens"],
+            "fast_tokens": fast_usage["total_tokens"],
+            "smart_tokens": smart_usage["total_tokens"],
+        }
+    def reset_token_usage(self):
+        """Reset token counters on both providers."""
+        self.fast_provider.reset_token_usage()
+        self.smart_provider.reset_token_usage()
+class TieredLLMAdapter:
+    """
+    Adapter that wraps TieredBedrockProvider to fix use_smart for a specific task type.
+    Used to pass different "views" of the tiered provider to different components:
+    - IntentTracker gets a "fast" view (use_smart=False)
+    - ReasoningStore gets a "smart" view (use_smart=True)
+    """
+    def __init__(self, provider: TieredBedrockProvider, use_smart: bool):
+        self._provider = provider
+        self._use_smart = use_smart
+    async def complete(self, prompt: str, system: Optional[str] = None) -> str:
+        """Generate completion using the fixed model tier."""
+        return await self._provider.complete(prompt, system, use_smart=self._use_smart)
+# =============================================================================
+# MCAL Result Types
+# =============================================================================
+@dataclass
+class TimingMetrics:
+    """Timing metrics for performance monitoring."""
+    total_ms: int = 0
+    facts_ms: int = 0
+    intents_ms: int = 0
+    decisions_ms: int = 0
+    parallel_savings_ms: int = 0  # Time saved by parallelization
+@dataclass
+class AddResult:
+    """Result from adding messages to MCAL."""
+    # From Mem0
+    facts: list[MemoryEntry] = field(default_factory=list)
+    # From MCAL (novel)
+    intent_graph: Optional[IntentGraph] = None
+    decisions: list[DecisionTrail] = field(default_factory=list)
+    # From Unified Extractor (Issue #19)
+    unified_graph: Optional[UnifiedGraph] = None
+    # Performance metrics
+    timing: Optional[TimingMetrics] = None
+    @property
+    def summary(self) -> dict:
+        """Quick summary of what was extracted."""
+        summary = {
+            "facts_count": len(self.facts),
+            "intents_count": len(self.intent_graph.nodes) if self.intent_graph else 0,
+            "decisions_count": len(self.decisions),
+        }
+        # Add unified graph stats if present
+        if self.unified_graph:
+            graph_summary = self.unified_graph.summary()
+            summary["unified_graph"] = {
+                "nodes": graph_summary["total_nodes"],
+                "edges": graph_summary["total_edges"],
+                "decisions": graph_summary["decisions"],
+                "goals": graph_summary["goals"],
+                "actions": graph_summary["actions"],
+            }
+        if self.timing:
+            summary["timing_ms"] = {
+                "total": self.timing.total_ms,
+                "facts": self.timing.facts_ms,
+                "intents": self.timing.intents_ms,
+                "decisions": self.timing.decisions_ms,
+                "parallel_savings": self.timing.parallel_savings_ms,
+            }
+        return summary
+@dataclass
+class SearchResult:
+    """Result from searching MCAL."""
+    # From Mem0 (re-ranked)
+    memories: list[MemoryEntry] = field(default_factory=list)
+    # From MCAL (enriched)
+    active_goals: list[IntentNode] = field(default_factory=list)
+    relevant_decisions: list[DecisionTrail] = field(default_factory=list)
+    # Assembled context
+    context: Optional[str] = None
+# =============================================================================
+# Main MCAL Class
+# =============================================================================
+class MCAL:
+    """
+    Memory-Context Alignment Layer.
+    Adds reasoning preservation and goal-aware retrieval on top of Mem0.
+    Three Pillars:
+    1. Intent Graph Preservation - Track goal hierarchies
+    2. Reasoning Chain Storage - Store WHY decisions were made
+    3. Goal-Aware Retrieval - Retrieve based on objectives, not just similarity
+    """
+    def __init__(
+        self,
+        anthropic_api_key: Optional[str] = None,
+        openai_api_key: Optional[str] = None,
+        model: Optional[str] = None,
+        llm_provider: str = "openai",
+        bedrock_model: str = "llama-3.3-70b",
+        bedrock_region: str = "us-east-1",
+        storage_path: Optional[str] = None,
+        enable_persistence: bool = True,
+        enable_tiered_models: bool = False,
+        bedrock_fast_model: str = "llama-3.1-8b",
+        bedrock_smart_model: str = "llama-3.3-70b",
+        enable_extraction_cache: bool = False,
+        cache_ttl_seconds: int = 86400,
+        # Deprecated parameters (Issue #53 - kept for backward compatibility)
+        mem0_config: Optional[dict] = None,
+        mem0_api_key: Optional[str] = None,
+        use_standalone_backend: bool = False,
+        use_unified_extractor: bool = True,
+    ):
+        """
+        Initialize MCAL.
+        Args:
+            anthropic_api_key: Anthropic API key for LLM calls (optional)
+            openai_api_key: OpenAI API key for LLM calls (optional)
+            model: Model to use for extraction (defaults based on provider)
+            llm_provider: LLM provider to use ("openai", "anthropic", or "bedrock")
+            bedrock_model: Bedrock model to use (default: llama-3.3-70b)
+            bedrock_region: AWS region for Bedrock (default: us-east-1)
+            storage_path: Path for persistent storage (default: ~/.mcal)
+            enable_persistence: Enable cross-session persistence (default: True)
+            enable_tiered_models: Use fast/smart model routing (bedrock only)
+            bedrock_fast_model: Fast model for simple tasks (default: llama-3.1-8b)
+            bedrock_smart_model: Smart model for complex tasks (default: llama-3.3-70b)
+            enable_extraction_cache: Enable caching of extracted state (Issue #9)
+            cache_ttl_seconds: Time-to-live for cache entries (default: 24h)
+        Deprecated Args (Issue #53 - will be removed in v1.0):
+            mem0_config: Ignored - MCAL is now standalone
+            mem0_api_key: Ignored - MCAL is now standalone
+            use_standalone_backend: Ignored - always standalone
+            use_unified_extractor: Ignored - always uses unified extractor
+        """
+        self._enable_tiered_models = enable_tiered_models
+        self._enable_extraction_cache = enable_extraction_cache
+        self._use_unified_extractor = True  # Always use unified extractor (Issue #53)
+        # Issue #53: Deprecation warnings for removed parameters
+        import warnings
+        if mem0_config is not None:
+            warnings.warn(
+                "mem0_config is deprecated and ignored. MCAL v1.0 is fully standalone.",
+                DeprecationWarning,
+                stacklevel=2
+            )
+        if mem0_api_key is not None:
+            warnings.warn(
+                "mem0_api_key is deprecated and ignored. MCAL v1.0 is fully standalone.",
+                DeprecationWarning,
+                stacklevel=2
+            )
+        if use_standalone_backend:
+            warnings.warn(
+                "use_standalone_backend is deprecated and ignored. MCAL is always standalone.",
+                DeprecationWarning,
+                stacklevel=2
+            )
+        if not use_unified_extractor:
+            warnings.warn(
+                "use_unified_extractor=False is deprecated. Legacy 3-pillar extraction "
+                "has been removed. MCAL now always uses unified extraction.",
+                DeprecationWarning,
+                stacklevel=2
+            )
+        # Initialize extraction cache (Issue #9)
+        if enable_extraction_cache:
+            cache_path = None
+            if storage_path:
+                cache_path = Path(storage_path) / "extraction_cache.json"
+            elif enable_persistence:
+                cache_path = Path.home() / ".mcal" / "extraction_cache.json"
+            self._extraction_cache = ExtractionCache(
+                persist_path=cache_path,
+                ttl_seconds=cache_ttl_seconds
+            )
+            logger.info(f"Extraction cache enabled (TTL: {cache_ttl_seconds}s)")
+        else:
+            self._extraction_cache = None
+        # Initialize LLM client based on provider
+        if llm_provider == "openai":
+            if not openai_api_key:
+                raise ValueError("openai_api_key required when llm_provider='openai'")
+            model = model or "gpt-4o"
+            self._llm = OpenAIClient(api_key=openai_api_key, model=model)
+        elif llm_provider == "anthropic":
+            if not anthropic_api_key:
+                raise ValueError("anthropic_api_key required when llm_provider='anthropic'")
+            model = model or "claude-sonnet-4-20250514"
+            self._llm = AnthropicClient(api_key=anthropic_api_key, model=model)
+        elif llm_provider == "bedrock":
+            # Bedrock uses AWS credentials from environment
+            if enable_tiered_models:
+                self._llm = TieredBedrockProvider(
+                    fast_model=bedrock_fast_model,
+                    smart_model=bedrock_smart_model,
+                    region=bedrock_region
+                )
+                logger.info(f"Using tiered Bedrock: fast={bedrock_fast_model}, smart={bedrock_smart_model}")
+            else:
+                self._llm = BedrockProviderWrapper(
+                    model=bedrock_model,
+                    region=bedrock_region
+                )
+        else:
+            raise ValueError(f"Unknown llm_provider: {llm_provider}. Use 'openai', 'anthropic', or 'bedrock'")
+        # Issue #53: No memory backend needed - MCAL is standalone
+        # UnifiedGraph handles all storage and search
+        # Initialize MCAL components (for legacy 3-pillar mode compatibility)
+        # With tiered models, use fast LLM for intents, smart LLM for decisions
+        if enable_tiered_models and isinstance(self._llm, TieredBedrockProvider):
+            intent_llm = TieredLLMAdapter(self._llm, use_smart=False)  # Fast model for intents
+            decision_llm = TieredLLMAdapter(self._llm, use_smart=True)  # Smart model for decisions
+            self._intent_tracker = IntentTracker(intent_llm)
+            self._reasoning_store = ReasoningStore(decision_llm)
+            logger.info("Tiered model routing: intents→fast, decisions→smart")
+        else:
+            self._intent_tracker = IntentTracker(self._llm)
+            self._reasoning_store = ReasoningStore(self._llm)
+        self._context_assembler = ContextAssembler()
+        # Initialize unified extractor (Issue #19 - 52x token reduction)
+        if use_unified_extractor:
+            self._unified_extractor = UnifiedExtractor(self._llm)
+            self._user_graphs: dict[str, UnifiedGraph] = {}  # Per-user unified graphs
+            logger.info("Unified extractor enabled (single-pass extraction)")
+        else:
+            self._unified_extractor = None
+            self._user_graphs = {}
+        # Initialize persistent storage
+        self._enable_persistence = enable_persistence
+        if enable_persistence:
+            storage_base = Path(storage_path) if storage_path else None
+            self._storage = MCALStorage(base_path=storage_base)
+            logger.info(f"Persistence enabled at {self._storage.base_path}")
+        else:
+            self._storage = None
+            logger.info("Persistence disabled (in-memory only)")
+        # Session management (in-memory cache)
+        self._sessions: dict[str, Session] = {}
+        self._user_intents: dict[str, IntentGraph] = {}
+    # Issue #53: _extract_facts_async removed - no longer needed with unified extractor
+    async def _extract_intents_async(
+        self,
+        turns: list[Turn],
+        user_id: str,
+        session_id: Optional[str]
+    ) -> IntentGraph:
+        """Extract or update intent graph."""
+        # Load existing graph from persistent storage if not in memory
+        if user_id not in self._user_intents and self._enable_persistence and self._storage:
+            stored_graph = self._storage.load_intent_graph(user_id)
+            if stored_graph:
+                self._user_intents[user_id] = stored_graph
+                logger.info(f"Loaded existing intent graph for user {user_id} from storage")
+        # Get or create user's intent graph
+        if user_id in self._user_intents:
+            # Update existing graph
+            for turn in turns:
+                self._user_intents[user_id] = await self._intent_tracker.update_intent(
+                    turn, self._user_intents[user_id]
+                )
+        else:
+            # Create new graph
+            self._user_intents[user_id] = await self._intent_tracker.extract_intents(
+                turns, session_id=session_id
+            )
+        return self._user_intents[user_id]
+    async def _extract_decisions_async(
+        self,
+        turns: list[Turn],
+        user_id: str,
+        intent_graph: Optional[IntentGraph]
+    ) -> list[DecisionTrail]:
+        """Extract decisions with active goals context."""
+        # Load existing decisions from persistent storage
+        existing_decisions = []
+        if self._enable_persistence and self._storage:
+            existing_decisions = self._storage.load_decisions(user_id)
+            if existing_decisions:
+                logger.info(f"Loaded {len(existing_decisions)} existing decisions for user {user_id}")
+        # Get active goals for context
+        goal_ids = None
+        active_goals_context = None
+        if intent_graph:
+            active_goals = intent_graph.get_active_goals()
+            goal_ids = [g.id for g in active_goals]
+            # Build rich goals context for decision extraction
+            if active_goals:
+                goals_lines = []
+                for i, goal in enumerate(active_goals, 1):
+                    goal_type = goal.type.name if hasattr(goal.type, 'name') else str(goal.type)
+                    goals_lines.append(f"{i}. [{goal_type}] {goal.content}")
+                active_goals_context = "\n".join(goals_lines)
+                logger.debug(f"Active goals context: {len(active_goals)} goals")
+        # Extract new decisions with full context (goals + prior decisions)
+        reconciled_decisions = await self._reasoning_store.extract_decisions(
+            turns,
+            goal_ids=goal_ids,
+            existing_decisions=existing_decisions,
+            active_goals_context=active_goals_context
+        )
+        return reconciled_decisions
+    async def add(
+        self,
+        messages: list[dict],
+        user_id: str,
+        session_id: Optional[str] = None,
+        extract_intents: bool = True,
+        extract_decisions: bool = True,
+    ) -> AddResult:
+        """
+        Add messages to memory with full MCAL processing.
+        This is the core method that:
+        1. Sends messages to Mem0 for fact extraction (parallel with intents)
+        2. Extracts intent graph (MCAL Pillar 1)
+        3. Extracts decision trails (MCAL Pillar 2)
+        Performance optimization (Issue #7):
+        - Fact extraction and intent extraction run in PARALLEL
+        - Decision extraction runs after intents (needs active goals context)
+        Args:
+            messages: List of message dicts [{"role": "user", "content": "..."}]
+            user_id: User identifier
+            session_id: Optional session identifier
+            extract_intents: Whether to extract intent graph
+            extract_decisions: Whether to extract decision trails
+        Returns:
+            AddResult with facts, intents, decisions, and timing metrics
+        """
+        total_start = time.time()
+        timing = TimingMetrics()
+        result = AddResult()
+        # Issue #2: Validate messages before processing
+        valid_messages = []
+        for i, msg in enumerate(messages):
+            if not isinstance(msg, dict):
+                logger.warning(f"Skipping message {i}: not a dict")
+                continue
+            if not msg.get("content"):
+                logger.warning(f"Skipping message {i}: missing or empty content")
+                continue
+            if not msg.get("role"):
+                logger.warning(f"Skipping message {i}: missing role")
+                continue
+            valid_messages.append(msg)
+        if not valid_messages:
+            logger.error("No valid messages to process")
+            return result
+        # =========================================================================
+        # Issue #19: UNIFIED EXTRACTOR PATH (52x token reduction)
+        # Single-pass extraction replacing 6-pass system
+        # =========================================================================
+        if self._use_unified_extractor and self._unified_extractor:
+            logger.info("Using unified extractor (single-pass)")
+            unified_start = time.time()
+            # Check if user has existing graph (for delta extraction)
+            existing_graph = self._user_graphs.get(user_id)
+            # Issue #25: Load from disk if not in memory
+            if not existing_graph and self._enable_persistence and self._storage:
+                existing_graph = self._storage.load_unified_graph(user_id)
+                if existing_graph:
+                    self._user_graphs[user_id] = existing_graph
+                    logger.info(f"Loaded unified graph for {user_id} from storage")
+            if existing_graph:
+                # Delta extraction - only process new information
+                logger.debug("Using delta extraction for existing user")
+                unified_graph = await self._unified_extractor.extract_delta(
+                    valid_messages, existing_graph
+                )
+            else:
+                # Full extraction for new user
+                unified_graph = await self._unified_extractor.extract(valid_messages)
+            # Store graph for user (in memory)
+            self._user_graphs[user_id] = unified_graph
+            result.unified_graph = unified_graph
+            # Issue #25: Persist to disk
+            if self._enable_persistence and self._storage:
+                self._storage.save_unified_graph(user_id, unified_graph)
+                logger.debug(f"Saved unified graph for {user_id} to storage")
+            # Map to compatible structures for backward compatibility
+            # Convert graph nodes to memory entries (facts)
+            result.facts = [
+                MemoryEntry(
+                    id=node.id,
+                    content=f"{node.type.name}: {node.label}",
+                    metadata={"type": node.type.value, **node.attrs}
+                )
+                for node in unified_graph.nodes.values()
+            ]
+            # Extract decisions count from graph
+            graph_summary = unified_graph.summary()
+            timing.total_ms = int((time.time() - unified_start) * 1000)
+            timing.facts_ms = timing.total_ms  # All in one pass
+            timing.intents_ms = 0
+            timing.decisions_ms = 0
+            result.timing = timing
+            logger.info(
+                f"Unified extraction complete: {graph_summary['total_nodes']} nodes, "
+                f"{graph_summary['total_edges']} edges, {graph_summary['decisions']} decisions "
+                f"in {timing.total_ms}ms"
+            )
+            return result
+        # =========================================================================
+        # Issue #9: CHECK EXTRACTION CACHE
+        # For returning users, skip extraction if messages already processed
+        # =========================================================================
+        cached_state = None
+        messages_to_process = valid_messages
+        cache_hit_type = "miss"
+        if self._enable_extraction_cache and self._extraction_cache:
+            cached_state, messages_to_process = self._extraction_cache.get_state(
+                user_id, valid_messages
+            )
+            if cached_state and not messages_to_process:
+                # FULL CACHE HIT - return cached results immediately
+                cache_hit_type = "full"
+                logger.info(f"CACHE HIT: Returning cached results for user {user_id}")
+                # Restore from cache
+                if cached_state.intent_graph_data:
+                    result.intent_graph = self._storage._deserialize_intent_graph(
+                        cached_state.intent_graph_data
+                    ) if self._storage else None
+                    # Also restore to memory
+                    if result.intent_graph:
+                        self._user_intents[user_id] = result.intent_graph
+                result.decisions = self._deserialize_decisions(cached_state.decisions_data)
+                # Timing reflects cache lookup (near-zero LLM time)
+                timing.total_ms = int((time.time() - total_start) * 1000)
+                timing.facts_ms = 0
+                timing.intents_ms = 0
+                timing.decisions_ms = 0
+                timing.parallel_savings_ms = cached_state.extraction_time_ms  # Saved this much
+                result.timing = timing
+                logger.info(f"Cache hit saved ~{cached_state.extraction_time_ms}ms of extraction time")
+                return result
+            elif cached_state:
+                # PARTIAL CACHE HIT - process only new messages
+                cache_hit_type = "partial"
+                logger.info(
+                    f"PARTIAL CACHE HIT: Processing {len(messages_to_process)} new messages "
+                    f"(cached {cached_state.messages_processed})"
+                )
+                # Restore cached intent graph to memory for incremental update
+                if cached_state.intent_graph_data and self._storage:
+                    cached_graph = self._storage._deserialize_intent_graph(
+                        cached_state.intent_graph_data
+                    )
+                    if cached_graph:
+                        self._user_intents[user_id] = cached_graph
+        # Convert messages to Turn objects for MCAL processing
+        turns_to_process = [
+            Turn(
+                role=msg.get("role", "user"),
+                content=msg.get("content", ""),
+                session_id=session_id
+            )
+            for msg in messages_to_process
+        ]
+        # All turns (for decision context)
+        all_turns = [
+            Turn(
+                role=msg.get("role", "user"),
+                content=msg.get("content", ""),
+                session_id=session_id
+            )
+            for msg in valid_messages
+        ]
+        # =========================================================================
+        # PHASE 1: PARALLEL EXTRACTION (Facts + Intents)
+        # Issue #7: Run independent extractions in parallel
+        # =========================================================================
+        logger.debug("Phase 1: Parallel extraction (facts + intents)...")
+        phase1_start = time.time()
+        # Prepare tasks
+        tasks = []
+        task_names = []
+        # Always extract facts (for new messages only if partial cache hit)
+        facts_start = time.time()
+        tasks.append(self._extract_facts_async(messages_to_process, user_id))
+        task_names.append("facts")
+        # Optionally extract intents (in parallel with facts)
+        # For partial cache hit, this will incrementally update the cached graph
+        if extract_intents:
+            intents_start = time.time()
+            tasks.append(self._extract_intents_async(turns_to_process, user_id, session_id))
+            task_names.append("intents")
+        # Run in parallel
+        results = await asyncio.gather(*tasks, return_exceptions=True)
+        # Process results
+        for i, (name, res) in enumerate(zip(task_names, results)):
+            if isinstance(res, Exception):
+                logger.error(f"Error in {name} extraction: {res}")
+                continue
+            if name == "facts":
+                result.facts = res
+                timing.facts_ms = int((time.time() - facts_start) * 1000)
+                logger.info(f"Mem0 extracted {len(result.facts)} facts in {timing.facts_ms}ms")
+            elif name == "intents":
+                result.intent_graph = res
+                timing.intents_ms = int((time.time() - intents_start) * 1000)
+                logger.info(f"Intent graph has {len(result.intent_graph.nodes)} nodes in {timing.intents_ms}ms")
+                # Persist updated graph to storage
+                if self._enable_persistence and self._storage:
+                    self._storage.save_intent_graph(user_id, result.intent_graph)
+                    logger.debug(f"Saved intent graph for user {user_id} to storage")
+        phase1_duration = int((time.time() - phase1_start) * 1000)
+        # Calculate parallel savings (what sequential would have taken)
+        sequential_time = timing.facts_ms + timing.intents_ms
+        timing.parallel_savings_ms = sequential_time - phase1_duration
+        logger.info(f"Phase 1 complete: {phase1_duration}ms (saved {timing.parallel_savings_ms}ms via parallelization)")
+        # =========================================================================
+        # PHASE 2: SEQUENTIAL EXTRACTION (Decisions - needs intent graph)
+        # =========================================================================
+        if extract_decisions:
+            logger.debug("Phase 2: Decision extraction (requires intent graph)...")
+            decisions_start = time.time()
+            # For partial cache hit, we still need full context for decision extraction
+            # but ReasoningStore handles existing_decisions merging
+            reconciled_decisions = await self._extract_decisions_async(
+                all_turns, user_id, result.intent_graph
+            )
+            result.decisions = reconciled_decisions
+            timing.decisions_ms = int((time.time() - decisions_start) * 1000)
+            if self._enable_persistence and self._storage:
+                self._storage.save_decisions(user_id, result.decisions)
+                logger.debug(f"Saved {len(result.decisions)} total decisions for user {user_id}")
+            logger.info(f"Extracted {len(result.decisions)} decisions in {timing.decisions_ms}ms")
+        # =========================================================================
+        # Issue #9: UPDATE EXTRACTION CACHE
+        # =========================================================================
+        if self._enable_extraction_cache and self._extraction_cache:
+            extraction_time = timing.facts_ms + timing.intents_ms + timing.decisions_ms
+            # Serialize intent graph for cache
+            intent_graph_data = None
+            if result.intent_graph and self._storage:
+                intent_graph_data = self._storage._serialize_intent_graph(result.intent_graph)
+            # Serialize decisions for cache
+            decisions_data = self._serialize_decisions(result.decisions)
+            self._extraction_cache.update_state(
+                user_id=user_id,
+                messages=valid_messages,
+                intent_graph_data=intent_graph_data,
+                decisions_data=decisions_data,
+                extraction_time_ms=extraction_time
+            )
+            logger.info(f"Cache updated for user {user_id} ({cache_hit_type} -> updated)")
+        # Final timing
+        timing.total_ms = int((time.time() - total_start) * 1000)
+        result.timing = timing
+        logger.info(f"Total add() time: {timing.total_ms}ms "
+                   f"(facts={timing.facts_ms}ms, intents={timing.intents_ms}ms, "
+                   f"decisions={timing.decisions_ms}ms, parallel_savings={timing.parallel_savings_ms}ms)")
+        return result
+    async def add_stream(
+        self,
+        messages: list[dict],
+        user_id: str,
+        session_id: Optional[str] = None,
+        extract_intents: bool = True,
+        extract_decisions: bool = True,
+    ) -> AsyncIterator[StreamEvent]:
+        """
+        Add messages to memory with streaming progress updates.
+        Issue #10: Streaming Response API
+        This is the streaming version of add() that yields partial results
+        as they become available, improving perceived latency.
+        Instead of waiting 22s for complete results, clients see:
+        - STARTED event immediately
+        - Each intent as it's extracted
+        - Each decision as it's extracted
+        - COMPLETE event with final AddResult
+        Usage:
+            async for event in mcal.add_stream(messages, user_id):
+                if event.type == StreamEventType.INTENT_EXTRACTED:
+                    display_intent(event.data)
+                elif event.type == StreamEventType.COMPLETE:
+                    final_result = event.data
+        Args:
+            messages: List of message dicts [{"role": "user", "content": "..."}]
+            user_id: User identifier
+            session_id: Optional session identifier
+            extract_intents: Whether to extract intent graph
+            extract_decisions: Whether to extract decision trails
+        Yields:
+            StreamEvent objects with type, data, and optional progress
+        """
+        total_start = time.time()
+        timing = TimingMetrics()
+        result = AddResult()
+        # Signal extraction start
+        yield event_started()
+        # Validate messages
+        valid_messages = []
+        for i, msg in enumerate(messages):
+            if not isinstance(msg, dict):
+                yield event_error(f"Skipping message {i}: not a dict")
+                continue
+            if not msg.get("content"):
+                yield event_error(f"Skipping message {i}: missing or empty content")
+                continue
+            if not msg.get("role"):
+                yield event_error(f"Skipping message {i}: missing role")
+                continue
+            valid_messages.append(msg)
+        if not valid_messages:
+            yield event_error("No valid messages to process")
+            yield event_complete(result)
+            return
+        # =========================================================================
+        # CHECK EXTRACTION CACHE
+        # =========================================================================
+        cached_state = None
+        messages_to_process = valid_messages
+        if self._enable_extraction_cache and self._extraction_cache:
+            cached_state, messages_to_process = self._extraction_cache.get_state(
+                user_id, valid_messages
+            )
+            if cached_state and not messages_to_process:
+                # FULL CACHE HIT
+                yield event_cache_hit(
+                    hit_type="full",
+                    messages_cached=cached_state.messages_processed,
+                    messages_to_process=0,
+                    saved_time_ms=cached_state.extraction_time_ms
+                )
+                # Restore from cache
+                if cached_state.intent_graph_data:
+                    result.intent_graph = self._storage._deserialize_intent_graph(
+                        cached_state.intent_graph_data
+                    ) if self._storage else None
+                    if result.intent_graph:
+                        self._user_intents[user_id] = result.intent_graph
+                result.decisions = self._deserialize_decisions(cached_state.decisions_data)
+                timing.total_ms = int((time.time() - total_start) * 1000)
+                timing.parallel_savings_ms = cached_state.extraction_time_ms
+                result.timing = timing
+                yield event_complete(result)
+                return
+            elif cached_state:
+                # PARTIAL CACHE HIT
+                yield event_cache_hit(
+                    hit_type="partial",
+                    messages_cached=cached_state.messages_processed,
+                    messages_to_process=len(messages_to_process),
+                    saved_time_ms=0  # Will calculate after extraction
+                )
+                # Restore cached intent graph
+                if cached_state.intent_graph_data and self._storage:
+                    cached_graph = self._storage._deserialize_intent_graph(
+                        cached_state.intent_graph_data
+                    )
+                    if cached_graph:
+                        self._user_intents[user_id] = cached_graph
+        # Convert to Turn objects
+        turns_to_process = [
+            Turn(
+                role=msg.get("role", "user"),
+                content=msg.get("content", ""),
+                session_id=session_id
+            )
+            for msg in messages_to_process
+        ]
+        all_turns = [
+            Turn(
+                role=msg.get("role", "user"),
+                content=msg.get("content", ""),
+                session_id=session_id
+            )
+            for msg in valid_messages
+        ]
+        # =========================================================================
+        # PHASE 1: FACTS EXTRACTION (with streaming)
+        # =========================================================================
+        yield event_phase_started(ExtractionPhase.FACTS, "Extracting facts via Mem0...")
+        facts_start = time.time()
+        try:
+            result.facts = await self._extract_facts_async(messages_to_process, user_id)
+            timing.facts_ms = int((time.time() - facts_start) * 1000)
+            # Yield each fact as it's extracted
+            for i, fact in enumerate(result.facts):
+                yield event_fact_extracted(
+                    {"id": fact.id, "content": fact.content, "score": fact.score},
+                    progress=StreamProgress(
+                        phase=ExtractionPhase.FACTS,
+                        current=i + 1,
+                        total=len(result.facts),
+                        elapsed_ms=int((time.time() - facts_start) * 1000)
+                    )
+                )
+            yield event_phase_complete(ExtractionPhase.FACTS, len(result.facts), timing.facts_ms)
+        except Exception as e:
+            yield event_error(f"Facts extraction failed: {e}")
+            timing.facts_ms = int((time.time() - facts_start) * 1000)
+        # =========================================================================
+        # PHASE 2: INTENTS EXTRACTION (with streaming)
+        # =========================================================================
+        if extract_intents:
+            yield event_phase_started(ExtractionPhase.INTENTS, "Extracting intent graph...")
+            intents_start = time.time()
+            try:
+                # Track nodes before extraction for delta detection
+                prev_nodes = set(self._user_intents.get(user_id, IntentGraph()).nodes.keys())
+                result.intent_graph = await self._extract_intents_async(
+                    turns_to_process, user_id, session_id
+                )
+                timing.intents_ms = int((time.time() - intents_start) * 1000)
+                # Yield each intent node (new ones first, then all for completeness)
+                new_nodes = [
+                    n for n_id, n in result.intent_graph.nodes.items()
+                    if n_id not in prev_nodes
+                ]
+                for i, node in enumerate(new_nodes):
+                    yield event_intent_extracted(
+                        node,
+                        progress=StreamProgress(
+                            phase=ExtractionPhase.INTENTS,
+                            current=i + 1,
+                            total=len(new_nodes),
+                            elapsed_ms=int((time.time() - intents_start) * 1000),
+                            message=f"New intent: {node.type.value}"
+                        )
+                    )
+                # Persist
+                if self._enable_persistence and self._storage:
+                    self._storage.save_intent_graph(user_id, result.intent_graph)
+                yield event_phase_complete(
+                    ExtractionPhase.INTENTS,
+                    len(result.intent_graph.nodes),
+                    timing.intents_ms
+                )
+            except Exception as e:
+                yield event_error(f"Intent extraction failed: {e}")
+                timing.intents_ms = int((time.time() - intents_start) * 1000)
+        # =========================================================================
+        # PHASE 3: DECISIONS EXTRACTION (with streaming)
+        # =========================================================================
+        if extract_decisions:
+            yield event_phase_started(ExtractionPhase.DECISIONS, "Extracting decisions...")
+            decisions_start = time.time()
+            try:
+                # Load existing for delta detection
+                existing_ids = set()
+                if self._enable_persistence and self._storage:
+                    existing = self._storage.load_decisions(user_id)
+                    existing_ids = {d.id for d in existing}
+                reconciled_decisions = await self._extract_decisions_async(
+                    all_turns, user_id, result.intent_graph
+                )
+                result.decisions = reconciled_decisions
+                timing.decisions_ms = int((time.time() - decisions_start) * 1000)
+                # Yield each decision (highlight new ones)
+                for i, decision in enumerate(result.decisions):
+                    is_new = decision.id not in existing_ids
+                    yield event_decision_extracted(
+                        decision,
+                        progress=StreamProgress(
+                            phase=ExtractionPhase.DECISIONS,
+                            current=i + 1,
+                            total=len(result.decisions),
+                            elapsed_ms=int((time.time() - decisions_start) * 1000),
+                            message="New decision" if is_new else "Existing decision"
+                        )
+                    )
+                # Persist
+                if self._enable_persistence and self._storage:
+                    self._storage.save_decisions(user_id, result.decisions)
+                yield event_phase_complete(
+                    ExtractionPhase.DECISIONS,
+                    len(result.decisions),
+                    timing.decisions_ms
+                )
+            except Exception as e:
+                yield event_error(f"Decision extraction failed: {e}")
+                timing.decisions_ms = int((time.time() - decisions_start) * 1000)
+        # =========================================================================
+        # UPDATE CACHE
+        # =========================================================================
+        if self._enable_extraction_cache and self._extraction_cache:
+            extraction_time = timing.facts_ms + timing.intents_ms + timing.decisions_ms
+            intent_graph_data = None
+            if result.intent_graph and self._storage:
+                intent_graph_data = self._storage._serialize_intent_graph(result.intent_graph)
+            decisions_data = self._serialize_decisions(result.decisions)
+            self._extraction_cache.update_state(
+                user_id=user_id,
+                messages=valid_messages,
+                intent_graph_data=intent_graph_data,
+                decisions_data=decisions_data,
+                extraction_time_ms=extraction_time
+            )
+        # Final timing
+        timing.total_ms = int((time.time() - total_start) * 1000)
+        timing.parallel_savings_ms = 0  # Sequential in streaming mode
+        result.timing = timing
+        # Signal completion with full result
+        yield event_complete(result)
+    async def search(
+        self,
+        query: str,
+        user_id: str,
+        limit: int = 10,
+        include_goals: bool = True,
+        include_decisions: bool = True,
+        retrieval_config: Optional[RetrievalConfig] = None,
+    ) -> SearchResult:
+        """
+        Search with goal-aware retrieval.
+        This enhances Mem0's search with:
+        1. Goal-aware re-ranking (MCAL Pillar 3)
+        2. Relevant decision attachment
+        3. Active goal context
+        When using unified extractor mode, searches the unified graph
+        directly instead of the Mem0 backend.
+        Args:
+            query: Search query
+            user_id: User identifier
+            limit: Maximum results
+            include_goals: Include active goals in result
+            include_decisions: Include relevant decisions in result
+            retrieval_config: Optional retrieval configuration
+        Returns:
+            SearchResult with memories, goals, decisions, and assembled context
+        """
+        result = SearchResult()
+        # Check if user has a unified graph (Issue #26 fix)
+        unified_graph = self._user_graphs.get(user_id) if self._user_graphs else None
+        if unified_graph:
+            # Search unified graph directly
+            logger.debug("Searching unified graph...")
+            graph_results = unified_graph.search(query, limit=limit * 2)
+            # Convert graph results to MemoryEntry format
+            result.memories = [
+                MemoryEntry(
+                    id=r["id"],
+                    content=r["content"],
+                    metadata={"type": r["type"], **r.get("attributes", {})},
+                    score=r["score"]
+                )
+                for r in graph_results
+            ]
+            # Get active goals from unified graph
+            if include_goals:
+                active_goal_dicts = unified_graph.get_active_goals()
+                # Convert to IntentNode format for compatibility
+                result.active_goals = [
+                    IntentNode(
+                        id=g["id"],
+                        type=self._goal_type_to_intent_type(g.get("goal_type", "goal")),
+                        content=g["content"],
+                        status=self._status_to_intent_status(g.get("status", "active"))
+                    )
+                    for g in active_goal_dicts
+                ]
+            # Get decisions from unified graph
+            if include_decisions:
+                all_decisions = unified_graph.get_all_decisions_with_detail()
+                # Filter to relevant decisions (keyword match - any query word)
+                query_words = set(query.lower().split())
+                def decision_matches(d: dict) -> bool:
+                    """Check if decision matches any query word."""
+                    decision_lower = d["decision"].lower()
+                    # Check if any query word appears in decision
+                    if any(w in decision_lower for w in query_words):
+                        return True
+                    # Check rationale
+                    rationale = d.get("rationale", "") or ""
+                    if any(w in rationale.lower() for w in query_words):
+                        return True
+                    # Check reasons
+                    for r in d.get("reasons", []):
+                        claim = (r.get("claim", "") or "").lower()
+                        if any(w in claim for w in query_words):
+                            return True
+                    return False
+                result.relevant_decisions = [
+                    DecisionTrail(
+                        id=d["id"],
+                        decision=d["decision"],
+                        rationale=d.get("rationale", ""),
+                        alternatives=[],  # Could expand if needed
+                        context=d.get("context", "")
+                    )
+                    for d in all_decisions
+                    if decision_matches(d)
+                ][:5]  # Limit to top 5 relevant decisions
+        else:
+            # Issue #53: No fallback needed - MCAL is always standalone
+            # If no unified graph, return empty results
+            logger.debug("No unified graph found for user")
+            result.memories = []
+            result.active_goals = []
+            result.relevant_decisions = []
+        # Assemble context
+        result.context = self._context_assembler.assemble(
+            retrieved=[],  # We'll pass memories differently
+            active_goals=result.active_goals,
+            decisions=result.relevant_decisions,
+            include_goals=include_goals,
+            include_decisions=include_decisions
+        )
+        # Add memories to context
+        if result.memories:
+            memory_lines = ["\n### RELEVANT MEMORIES ###"]
+            for mem in result.memories:
+                score_str = f" (relevance: {mem.score:.2f})" if mem.score else ""
+                memory_lines.append(f"- {mem.content}{score_str}")
+            result.context += "\n".join(memory_lines)
+        return result
+    async def get_context(
+        self,
+        query: str,
+        user_id: str,
+        max_tokens: int = 4000,
+        include_goals: bool = True,
+        include_decisions: bool = True,
+        include_reasoning: bool = True,
+    ) -> str:
+        """
+        Get assembled context for LLM prompt.
+        Convenience method that searches and formats context.
+        Args:
+            query: The query/task at hand
+            user_id: User identifier
+            max_tokens: Maximum tokens for context
+            include_goals: Include active goals
+            include_decisions: Include relevant decisions
+            include_reasoning: Include decision rationale
+        Returns:
+            Formatted context string ready for LLM prompt
+        """
+        # Search with all enhancements
+        search_result = await self.search(
+            query=query,
+            user_id=user_id,
+            include_goals=include_goals,
+            include_decisions=include_decisions,
+        )
+        # Build context sections
+        sections = []
+        # Active goals
+        if include_goals and search_result.active_goals:
+            goals_text = "### ACTIVE GOALS ###\n"
+            for goal in search_result.active_goals:
+                status = "🔵" if goal.status.value == "active" else "⏳"
+                goals_text += f"{status} {goal.content}\n"
+            sections.append(goals_text)
+        # Relevant decisions with reasoning
+        if include_decisions and search_result.relevant_decisions:
+            decisions_text = "### KEY DECISIONS ###\n"
+            for decision in search_result.relevant_decisions[:5]:
+                decisions_text += f"DECISION: {decision.decision}\n"
+                if include_reasoning:
+                    decisions_text += f"  Rationale: {decision.rationale}\n"
+                    if decision.alternatives:
+                        alts = ", ".join(a.option for a in decision.alternatives[:3])
+                        decisions_text += f"  Alternatives considered: {alts}\n"
+                decisions_text += "\n"
+            sections.append(decisions_text)
+        # Memories
+        if search_result.memories:
+            memories_text = "### RELEVANT CONTEXT ###\n"
+            for mem in search_result.memories[:10]:
+                memories_text += f"- {mem.content}\n"
+            sections.append(memories_text)
+        return "\n".join(sections)
+    def get_active_goals(self, user_id: str) -> list[IntentNode]:
+        """Get active goals for a user."""
+        if user_id in self._user_intents:
+            return self._user_intents[user_id].get_active_goals()
+        return []
+    def get_intent_graph(self, user_id: str) -> Optional[IntentGraph]:
+        """Get full intent graph for a user."""
+        return self._user_intents.get(user_id)
+    def get_decisions(
+        self,
+        user_id: str,
+        goal_id: Optional[str] = None,
+        include_invalidated: bool = False
+    ) -> list[DecisionTrail]:
+        """Get decisions, optionally filtered by goal."""
+        if goal_id:
+            return self._reasoning_store.get_decisions_for_goal(
+                goal_id, include_invalidated=include_invalidated
+            )
+        return self._reasoning_store.get_valid_decisions()
+    async def _rerank_with_goals(
+        self,
+        memories: list[MemoryEntry],
+        query: str,
+        active_goals: list[IntentNode],
+    ) -> list[MemoryEntry]:
+        """
+        Re-rank memories based on goal alignment.
+        This is MCAL Pillar 3: Goal-Aware Retrieval.
+        Instead of pure similarity, we boost memories that
+        help achieve the user's active goals.
+        """
+        if not active_goals or not memories:
+            return memories
+        # For now, simple heuristic-based re-ranking
+        # TODO: Implement LLM-based goal alignment scoring
+        goal_keywords = set()
+        for goal in active_goals:
+            # Extract keywords from goal content
+            words = goal.content.lower().split()
+            goal_keywords.update(w for w in words if len(w) > 3)
+        # Score each memory
+        scored = []
+        for mem in memories:
+            base_score = mem.score or 0.5
+            # Boost if memory content relates to active goals
+            mem_words = set(mem.content.lower().split())
+            overlap = len(goal_keywords & mem_words)
+            goal_boost = min(0.3, overlap * 0.1)  # Max 0.3 boost
+            final_score = base_score + goal_boost
+            mem.score = final_score
+            scored.append(mem)
+        # Sort by final score
+        scored.sort(key=lambda m: m.score or 0, reverse=True)
+        return scored
+    def _get_relevant_decisions(
+        self,
+        query: str,
+        active_goals: list[IntentNode]
+    ) -> list[DecisionTrail]:
+        """Get decisions relevant to the query and active goals."""
+        relevant = []
+        # Get decisions for active goals
+        for goal in active_goals:
+            goal_decisions = self._reasoning_store.get_decisions_for_goal(goal.id)
+            relevant.extend(goal_decisions)
+        # Also check for keyword matches in query
+        query_lower = query.lower()
+        for decision in self._reasoning_store.get_valid_decisions():
+            if decision not in relevant:
+                if any(word in decision.decision.lower() for word in query_lower.split() if len(word) > 3):
+                    relevant.append(decision)
+        # Deduplicate and limit
+        seen = set()
+        unique = []
+        for d in relevant:
+            if d.id not in seen:
+                seen.add(d.id)
+                unique.append(d)
+        return unique[:10]
+    def clear_user_data(self, user_id: str) -> bool:
+        """Clear all data for a user."""
+        # Clear unified graph data
+        if user_id in self._user_graphs:
+            del self._user_graphs[user_id]
+        # Clear from persistent storage
+        if self._enable_persistence and self._storage:
+            self._storage.delete_unified_graph(user_id)
+        # Clear MCAL data
+        if user_id in self._user_intents:
+            del self._user_intents[user_id]
+        if user_id in self._sessions:
+            del self._sessions[user_id]
+        # Clear extraction cache (Issue #9)
+        if self._enable_extraction_cache and self._extraction_cache:
+            self._extraction_cache.invalidate(user_id)
+        return True
+    def get_tiered_model_stats(self) -> Optional[dict]:
+        """
+        Get tiered model usage statistics (Issue #8).
+        Returns:
+            Dict with fast_calls, smart_calls, total, fast_ratio
+            or None if tiered models not enabled
+        """
+        if self._enable_tiered_models and isinstance(self._llm, TieredBedrockProvider):
+            return self._llm.get_usage_stats()
+        return None
+    def get_cache_stats(self) -> Optional[dict]:
+        """
+        Get extraction cache statistics (Issue #9).
+        Returns:
+            Dict with hits, misses, partial_hits, hit_rate, etc.
+            or None if cache not enabled
+        """
+        if self._enable_extraction_cache and self._extraction_cache:
+            return self._extraction_cache.get_stats().to_dict()
+        return None
+    def invalidate_cache(self, user_id: Optional[str] = None) -> int:
+        """
+        Invalidate extraction cache.
+        Args:
+            user_id: Specific user to invalidate, or None for all
+        Returns:
+            Number of entries invalidated
+        """
+        if not self._enable_extraction_cache or not self._extraction_cache:
+            return 0
+        if user_id:
+            return 1 if self._extraction_cache.invalidate(user_id) else 0
+        else:
+            return self._extraction_cache.clear()
+    def _serialize_decisions(self, decisions: list[DecisionTrail]) -> list[dict]:
+        """Serialize decisions for cache storage."""
+        result = []
+        for d in decisions:
+            result.append({
+                "id": d.id,
+                "decision": d.decision,
+                "context": d.context,
+                "rationale": d.rationale,
+                "confidence": d.confidence,
+                "related_goals": d.related_goals,
+                "dependencies": d.dependencies,
+                "invalidated_by": d.invalidated_by,
+                "turn_id": d.turn_id,
+                "alternatives": [
+                    {"option": a.option, "pros": a.pros, "cons": a.cons, "rejection_reason": a.rejection_reason}
+                    for a in (d.alternatives or [])
+                ],
+                "evidence": [
+                    {"claim": e.claim, "source": e.source.value if e.source else "inferred", "turn_id": e.turn_id, "confidence": e.confidence}
+                    for e in (d.evidence or [])
+                ],
+                "trade_offs": [
+                    {"gained": t.gained, "sacrificed": t.sacrificed, "justification": t.justification}
+                    for t in (d.trade_offs or [])
+                ],
+            })
+        return result
+    def _deserialize_decisions(self, data: list[dict]) -> list[DecisionTrail]:
+        """Deserialize decisions from cache storage."""
+        from .core.models import Alternative, Evidence, EvidenceSource, TradeOff
+        result = []
+        for d in data:
+            alternatives = [
+                Alternative(
+                    option=a.get("option", ""),
+                    pros=a.get("pros", []),
+                    cons=a.get("cons", []),
+                    rejection_reason=a.get("rejection_reason")
+                )
+                for a in d.get("alternatives", [])
+            ]
+            evidence = []
+            for e in d.get("evidence", []):
+                try:
+                    source = EvidenceSource(e.get("source", "inferred"))
+                except ValueError:
+                    source = EvidenceSource.INFERRED
+                evidence.append(Evidence(
+                    claim=e.get("claim", ""),
+                    source=source,
+                    turn_id=e.get("turn_id"),
+                    confidence=e.get("confidence", 0.8)
+                ))
+            trade_offs = [
+                TradeOff(
+                    gained=t.get("gained", ""),
+                    sacrificed=t.get("sacrificed", ""),
+                    justification=t.get("justification")
+                )
+                for t in d.get("trade_offs", [])
+            ]
+            result.append(DecisionTrail(
+                id=d.get("id", ""),
+                decision=d.get("decision", ""),
+                context=d.get("context", ""),
+                rationale=d.get("rationale", ""),
+                confidence=d.get("confidence", 0.5),
+                related_goals=d.get("related_goals", []),
+                dependencies=d.get("dependencies", []),
+                invalidated_by=d.get("invalidated_by"),
+                turn_id=d.get("turn_id"),
+                alternatives=alternatives,
+                evidence=evidence,
+                trade_offs=trade_offs,
+            ))
+        return result
+    def _goal_type_to_intent_type(self, goal_type: str) -> IntentType:
+        """Convert unified graph goal_type string to IntentType enum."""
+        mapping = {
+            "mission": IntentType.MISSION,
+            "goal": IntentType.GOAL,
+            "task": IntentType.TASK,
+            "decision": IntentType.DECISION,
+        }
+        return mapping.get(goal_type.lower(), IntentType.GOAL)
+    def _status_to_intent_status(self, status: str) -> IntentStatus:
+        """Convert unified graph status string to IntentStatus enum."""
+        mapping = {
+            "active": IntentStatus.ACTIVE,
+            "completed": IntentStatus.COMPLETED,
+            "abandoned": IntentStatus.ABANDONED,
+            "pending": IntentStatus.PENDING,
+            "blocked": IntentStatus.BLOCKED,
+        }
+        return mapping.get(status.lower(), IntentStatus.ACTIVE)