PyPI - hindsight-api - Versions diffs - 0.0.13__py3-none-any.whl - Mend

hindsight-api 0.0.13__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (48) hide show

hindsight_api/__init__.py +38 -0
hindsight_api/api/__init__.py +105 -0
hindsight_api/api/http.py +1872 -0
hindsight_api/api/mcp.py +157 -0
hindsight_api/engine/__init__.py +47 -0
hindsight_api/engine/cross_encoder.py +97 -0
hindsight_api/engine/db_utils.py +93 -0
hindsight_api/engine/embeddings.py +113 -0
hindsight_api/engine/entity_resolver.py +575 -0
hindsight_api/engine/llm_wrapper.py +269 -0
hindsight_api/engine/memory_engine.py +3095 -0
hindsight_api/engine/query_analyzer.py +519 -0
hindsight_api/engine/response_models.py +222 -0
hindsight_api/engine/retain/__init__.py +50 -0
hindsight_api/engine/retain/bank_utils.py +423 -0
hindsight_api/engine/retain/chunk_storage.py +82 -0
hindsight_api/engine/retain/deduplication.py +104 -0
hindsight_api/engine/retain/embedding_processing.py +62 -0
hindsight_api/engine/retain/embedding_utils.py +54 -0
hindsight_api/engine/retain/entity_processing.py +90 -0
hindsight_api/engine/retain/fact_extraction.py +1027 -0
hindsight_api/engine/retain/fact_storage.py +176 -0
hindsight_api/engine/retain/link_creation.py +121 -0
hindsight_api/engine/retain/link_utils.py +651 -0
hindsight_api/engine/retain/orchestrator.py +405 -0
hindsight_api/engine/retain/types.py +206 -0
hindsight_api/engine/search/__init__.py +15 -0
hindsight_api/engine/search/fusion.py +122 -0
hindsight_api/engine/search/observation_utils.py +132 -0
hindsight_api/engine/search/reranking.py +103 -0
hindsight_api/engine/search/retrieval.py +503 -0
hindsight_api/engine/search/scoring.py +161 -0
hindsight_api/engine/search/temporal_extraction.py +64 -0
hindsight_api/engine/search/think_utils.py +255 -0
hindsight_api/engine/search/trace.py +215 -0
hindsight_api/engine/search/tracer.py +447 -0
hindsight_api/engine/search/types.py +160 -0
hindsight_api/engine/task_backend.py +223 -0
hindsight_api/engine/utils.py +203 -0
hindsight_api/metrics.py +227 -0
hindsight_api/migrations.py +163 -0
hindsight_api/models.py +309 -0
hindsight_api/pg0.py +425 -0
hindsight_api/web/__init__.py +12 -0
hindsight_api/web/server.py +143 -0
hindsight_api-0.0.13.dist-info/METADATA +41 -0
hindsight_api-0.0.13.dist-info/RECORD +48 -0
hindsight_api-0.0.13.dist-info/WHEEL +4 -0

hindsight_api/engine/memory_engine.py ADDED Viewed

@@ -0,0 +1,3095 @@
+"""
+Memory Engine for Memory Banks.
+This implements a sophisticated memory architecture that combines:
+1. Temporal links: Memories connected by time proximity
+2. Semantic links: Memories connected by meaning/similarity
+3. Entity links: Memories connected by shared entities (PERSON, ORG, etc.)
+4. Spreading activation: Search through the graph with activation decay
+5. Dynamic weighting: Recency and frequency-based importance
+"""
+import json
+import os
+from datetime import datetime, timedelta, timezone
+from typing import Any, Dict, List, Optional, Tuple, Union, TypedDict
+import asyncpg
+import asyncio
+from .embeddings import Embeddings, SentenceTransformersEmbeddings
+from .cross_encoder import CrossEncoderModel
+import time
+import numpy as np
+import uuid
+import logging
+from pydantic import BaseModel, Field
+class RetainContentDict(TypedDict, total=False):
+    """Type definition for content items in retain_batch_async.
+    Fields:
+        content: Text content to store (required)
+        context: Context about the content (optional)
+        event_date: When the content occurred (optional, defaults to now)
+        metadata: Custom key-value metadata (optional)
+        document_id: Document ID for this content item (optional)
+    """
+    content: str  # Required
+    context: str
+    event_date: datetime
+    metadata: Dict[str, str]
+    document_id: str
+from .query_analyzer import QueryAnalyzer
+from .search.scoring import (
+    calculate_recency_weight,
+    calculate_frequency_weight,
+)
+from .entity_resolver import EntityResolver
+from .retain import embedding_utils, bank_utils
+from .search import think_utils, observation_utils
+from .llm_wrapper import LLMConfig
+from .response_models import RecallResult as RecallResultModel, ReflectResult, MemoryFact, EntityState, EntityObservation
+from .task_backend import TaskBackend, AsyncIOQueueBackend
+from .search.reranking import CrossEncoderReranker
+from ..pg0 import EmbeddedPostgres
+from enum import Enum
+class Budget(str, Enum):
+    """Budget levels for recall/reflect operations."""
+    LOW = "low"
+    MID = "mid"
+    HIGH = "high"
+def utcnow():
+    """Get current UTC time with timezone info."""
+    return datetime.now(timezone.utc)
+# Logger for memory system
+logger = logging.getLogger(__name__)
+from .db_utils import acquire_with_retry, retry_with_backoff
+import tiktoken
+from dateutil import parser as date_parser
+# Cache tiktoken encoding for token budget filtering (module-level singleton)
+_TIKTOKEN_ENCODING = None
+def _get_tiktoken_encoding():
+    """Get cached tiktoken encoding (cl100k_base for GPT-4/3.5)."""
+    global _TIKTOKEN_ENCODING
+    if _TIKTOKEN_ENCODING is None:
+        _TIKTOKEN_ENCODING = tiktoken.get_encoding("cl100k_base")
+    return _TIKTOKEN_ENCODING
+class MemoryEngine:
+    """
+    Advanced memory system using temporal and semantic linking with PostgreSQL.
+    This class provides:
+    - Embedding generation for semantic search
+    - Entity, temporal, and semantic link creation
+    - Think operations for formulating answers with opinions
+    - bank profile and personality management
+    """
+    def __init__(
+        self,
+        db_url: str,
+        memory_llm_provider: str,
+        memory_llm_api_key: str,
+        memory_llm_model: str,
+        memory_llm_base_url: Optional[str] = None,
+        embeddings: Optional[Embeddings] = None,
+        cross_encoder: Optional[CrossEncoderModel] = None,
+        query_analyzer: Optional[QueryAnalyzer] = None,
+        pool_min_size: int = 5,
+        pool_max_size: int = 100,
+        task_backend: Optional[TaskBackend] = None,
+    ):
+        """
+        Initialize the temporal + semantic memory system.
+        Args:
+            db_url: PostgreSQL connection URL (postgresql://user:pass@host:port/dbname). Required.
+            memory_llm_provider: LLM provider for memory operations: "openai", "groq", or "ollama". Required.
+            memory_llm_api_key: API key for the LLM provider. Required.
+            memory_llm_model: Model name to use for all memory operations (put/think/opinions). Required.
+            memory_llm_base_url: Base URL for the LLM API. Optional. Defaults based on provider:
+                                - groq: https://api.groq.com/openai/v1
+                                - ollama: http://localhost:11434/v1
+            embeddings: Embeddings implementation to use. If not provided, uses SentenceTransformersEmbeddings
+            cross_encoder: Cross-encoder model for reranking. If not provided, uses default when cross-encoder reranker is selected
+            query_analyzer: Query analyzer implementation to use. If not provided, uses TransformerQueryAnalyzer
+            pool_min_size: Minimum number of connections in the pool (default: 5)
+            pool_max_size: Maximum number of connections in the pool (default: 100)
+                          Increase for parallel think/search operations (e.g., 200-300 for 100+ parallel thinks)
+            task_backend: Custom task backend for async task execution. If not provided, uses AsyncIOQueueBackend
+        """
+        if not db_url:
+            raise ValueError("Database url is required")
+        # Track pg0 instance (if used)
+        self._pg0: Optional[EmbeddedPostgres] = None
+        # Initialize PostgreSQL connection URL
+        # The actual URL will be set during initialize() after starting the server
+        self._use_pg0 = db_url == "pg0"
+        self.db_url = db_url if not self._use_pg0 else None
+        # Set default base URL if not provided
+        if memory_llm_base_url is None:
+            if memory_llm_provider.lower() == "groq":
+                memory_llm_base_url = "https://api.groq.com/openai/v1"
+            elif memory_llm_provider.lower() == "ollama":
+                memory_llm_base_url = "http://localhost:11434/v1"
+            else:
+                memory_llm_base_url = ""
+        # Connection pool (will be created in initialize())
+        self._pool = None
+        self._initialized = False
+        self._pool_min_size = pool_min_size
+        self._pool_max_size = pool_max_size
+        # Initialize entity resolver (will be created in initialize())
+        self.entity_resolver = None
+        # Initialize embeddings
+        if embeddings is not None:
+            self.embeddings = embeddings
+        else:
+            self.embeddings = SentenceTransformersEmbeddings("BAAI/bge-small-en-v1.5")
+        # Initialize query analyzer
+        if query_analyzer is not None:
+            self.query_analyzer = query_analyzer
+        else:
+            from .query_analyzer import DateparserQueryAnalyzer
+            self.query_analyzer = DateparserQueryAnalyzer()
+        # Initialize LLM configuration
+        self._llm_config = LLMConfig(
+            provider=memory_llm_provider,
+            api_key=memory_llm_api_key,
+            base_url=memory_llm_base_url,
+            model=memory_llm_model,
+        )
+        # Store client and model for convenience (deprecated: use _llm_config.call() instead)
+        self._llm_client = self._llm_config._client
+        self._llm_model = self._llm_config.model
+        # Initialize cross-encoder reranker (cached for performance)
+        self._cross_encoder_reranker = CrossEncoderReranker(cross_encoder=cross_encoder)
+        # Initialize task backend
+        self._task_backend = task_backend or AsyncIOQueueBackend(
+            batch_size=100,
+            batch_interval=1.0
+        )
+        # Backpressure mechanism: limit concurrent searches to prevent overwhelming the database
+        # Limit concurrent searches to prevent connection pool exhaustion
+        # Each search can use 2-4 connections, so with 10 concurrent searches
+        # we use ~20-40 connections max, staying well within pool limits
+        self._search_semaphore = asyncio.Semaphore(10)
+        # Backpressure for put operations: limit concurrent puts to prevent database contention
+        # Each put_batch holds a connection for the entire transaction, so we limit to 5
+        # concurrent puts to avoid connection pool exhaustion and reduce write contention
+        self._put_semaphore = asyncio.Semaphore(5)
+        # initialize encoding eagerly to avoid delaying the first time
+        _get_tiktoken_encoding()
+    async def _handle_access_count_update(self, task_dict: Dict[str, Any]):
+        """
+        Handler for access count update tasks.
+        Args:
+            task_dict: Dict with 'node_ids' key containing list of node IDs to update
+        """
+        node_ids = task_dict.get('node_ids', [])
+        if not node_ids:
+            return
+        pool = await self._get_pool()
+        try:
+            # Convert string UUIDs to UUID type for faster matching
+            uuid_list = [uuid.UUID(nid) for nid in node_ids]
+            async with acquire_with_retry(pool) as conn:
+                await conn.execute(
+                    "UPDATE memory_units SET access_count = access_count + 1 WHERE id = ANY($1::uuid[])",
+                    uuid_list
+                )
+        except Exception as e:
+            logger.error(f"Access count handler: Error updating access counts: {e}")
+    async def _handle_batch_retain(self, task_dict: Dict[str, Any]):
+        """
+        Handler for batch retain tasks.
+        Args:
+            task_dict: Dict with 'bank_id', 'contents'
+        """
+        try:
+            bank_id = task_dict.get('bank_id')
+            contents = task_dict.get('contents', [])
+            logger.info(f"[BATCH_RETAIN_TASK] Starting background batch retain for bank_id={bank_id}, {len(contents)} items")
+            await self.retain_batch_async(
+                bank_id=bank_id,
+                contents=contents
+            )
+            logger.info(f"[BATCH_RETAIN_TASK] Completed background batch retain for bank_id={bank_id}")
+        except Exception as e:
+            logger.error(f"Batch retain handler: Error processing batch retain: {e}")
+            import traceback
+            traceback.print_exc()
+    async def execute_task(self, task_dict: Dict[str, Any]):
+        """
+        Execute a task by routing it to the appropriate handler.
+        This method is called by the task backend to execute tasks.
+        It receives a plain dict that can be serialized and sent over the network.
+        Args:
+            task_dict: Task dictionary with 'type' key and other payload data
+                      Example: {'type': 'access_count_update', 'node_ids': [...]}
+        """
+        task_type = task_dict.get('type')
+        operation_id = task_dict.get('operation_id')
+        retry_count = task_dict.get('retry_count', 0)
+        max_retries = 3
+        # Check if operation was cancelled (only for tasks with operation_id)
+        if operation_id:
+            try:
+                pool = await self._get_pool()
+                async with acquire_with_retry(pool) as conn:
+                    result = await conn.fetchrow(
+                        "SELECT id FROM async_operations WHERE id = $1",
+                        uuid.UUID(operation_id)
+                    )
+                    if not result:
+                        # Operation was cancelled, skip processing
+                        logger.info(f"Skipping cancelled operation: {operation_id}")
+                        return
+            except Exception as e:
+                logger.error(f"Failed to check operation status {operation_id}: {e}")
+                # Continue with processing if we can't check status
+        try:
+            if task_type == 'access_count_update':
+                await self._handle_access_count_update(task_dict)
+            elif task_type == 'reinforce_opinion':
+                await self._handle_reinforce_opinion(task_dict)
+            elif task_type == 'form_opinion':
+                await self._handle_form_opinion(task_dict)
+            elif task_type == 'batch_put':
+                await self._handle_batch_retain(task_dict)
+            elif task_type == 'regenerate_observations':
+                await self._handle_regenerate_observations(task_dict)
+            else:
+                logger.error(f"Unknown task type: {task_type}")
+                # Don't retry unknown task types
+                if operation_id:
+                    await self._delete_operation_record(operation_id)
+                return
+            # Task succeeded - delete operation record
+            if operation_id:
+                await self._delete_operation_record(operation_id)
+        except Exception as e:
+            # Task failed - check if we should retry
+            logger.error(f"Task execution failed (attempt {retry_count + 1}/{max_retries + 1}): {task_type}, error: {e}")
+            import traceback
+            error_traceback = traceback.format_exc()
+            traceback.print_exc()
+            if retry_count < max_retries:
+                # Reschedule with incremented retry count
+                task_dict['retry_count'] = retry_count + 1
+                logger.info(f"Rescheduling task {task_type} (retry {retry_count + 1}/{max_retries})")
+                await self._task_backend.submit_task(task_dict)
+            else:
+                # Max retries exceeded - mark operation as failed
+                logger.error(f"Max retries exceeded for task {task_type}, marking as failed")
+                if operation_id:
+                    await self._mark_operation_failed(operation_id, str(e), error_traceback)
+    async def _delete_operation_record(self, operation_id: str):
+        """Helper to delete an operation record from the database."""
+        try:
+            pool = await self._get_pool()
+            async with acquire_with_retry(pool) as conn:
+                await conn.execute(
+                    "DELETE FROM async_operations WHERE id = $1",
+                    uuid.UUID(operation_id)
+                )
+        except Exception as e:
+            logger.error(f"Failed to delete async operation record {operation_id}: {e}")
+    async def _mark_operation_failed(self, operation_id: str, error_message: str, error_traceback: str):
+        """Helper to mark an operation as failed in the database."""
+        try:
+            pool = await self._get_pool()
+            # Truncate error message to avoid extremely long strings
+            full_error = f"{error_message}\n\nTraceback:\n{error_traceback}"
+            truncated_error = full_error[:5000] if len(full_error) > 5000 else full_error
+            async with acquire_with_retry(pool) as conn:
+                await conn.execute(
+                    """
+                    UPDATE async_operations
+                    SET status = 'failed', error_message = $2
+                    WHERE id = $1
+                    """,
+                    uuid.UUID(operation_id),
+                    truncated_error
+                )
+            logger.info(f"Marked async operation as failed: {operation_id}")
+        except Exception as e:
+            logger.error(f"Failed to mark operation as failed {operation_id}: {e}")
+    async def initialize(self):
+        """Initialize the connection pool, models, and background workers.
+        Loads models (embeddings, cross-encoder) in parallel with pg0 startup
+        for faster overall initialization.
+        """
+        if self._initialized:
+            return
+        import concurrent.futures
+        # Run model loading in thread pool (CPU-bound) in parallel with pg0 startup
+        loop = asyncio.get_event_loop()
+        async def start_pg0():
+            """Start pg0 if configured."""
+            if self._use_pg0:
+                self._pg0 = EmbeddedPostgres()
+                self.db_url = await self._pg0.ensure_running()
+        def load_embeddings():
+            """Load embedding model (CPU-bound)."""
+            self.embeddings.load()
+        def load_cross_encoder():
+            """Load cross-encoder model (CPU-bound)."""
+            self._cross_encoder_reranker.cross_encoder.load()
+        def load_query_analyzer():
+            """Load query analyzer model (CPU-bound)."""
+            self.query_analyzer.load()
+        # Run pg0 and all model loads in parallel
+        # pg0 is async (IO-bound), models are sync (CPU-bound in thread pool)
+        # Use 3 workers to load all models concurrently
+        with concurrent.futures.ThreadPoolExecutor(max_workers=3) as executor:
+            # Start all tasks
+            pg0_task = asyncio.create_task(start_pg0())
+            embeddings_future = loop.run_in_executor(executor, load_embeddings)
+            cross_encoder_future = loop.run_in_executor(executor, load_cross_encoder)
+            query_analyzer_future = loop.run_in_executor(executor, load_query_analyzer)
+            # Wait for all to complete
+            await asyncio.gather(
+                pg0_task, embeddings_future, cross_encoder_future, query_analyzer_future
+            )
+        logger.info(f"Connecting to PostgreSQL at {self.db_url}")
+        # Create connection pool
+        # For read-heavy workloads with many parallel think/search operations,
+        # we need a larger pool. Read operations don't need strong isolation.
+        self._pool = await asyncpg.create_pool(
+            self.db_url,
+            min_size=self._pool_min_size,
+            max_size=self._pool_max_size,
+            command_timeout=60,
+            statement_cache_size=0,  # Disable prepared statement cache
+            timeout=30,  # Connection acquisition timeout (seconds)
+        )
+        # Initialize entity resolver with pool
+        self.entity_resolver = EntityResolver(self._pool)
+        # Set executor for task backend and initialize
+        self._task_backend.set_executor(self.execute_task)
+        await self._task_backend.initialize()
+        self._initialized = True
+        logger.info("Memory system initialized (pool and task backend started)")
+    async def _get_pool(self) -> asyncpg.Pool:
+        """Get the connection pool (must call initialize() first)."""
+        if not self._initialized:
+            await self.initialize()
+        return self._pool
+    async def _acquire_connection(self):
+        """
+        Acquire a connection from the pool with retry logic.
+        Returns an async context manager that yields a connection.
+        Retries on transient connection errors with exponential backoff.
+        """
+        pool = await self._get_pool()
+        async def acquire():
+            return await pool.acquire()
+        return await _retry_with_backoff(acquire)
+    async def health_check(self) -> dict:
+        """
+        Perform a health check by querying the database.
+        Returns:
+            dict with status and optional error message
+        """
+        try:
+            pool = await self._get_pool()
+            async with pool.acquire() as conn:
+                result = await conn.fetchval("SELECT 1")
+                if result == 1:
+                    return {"status": "healthy", "database": "connected"}
+                else:
+                    return {"status": "unhealthy", "database": "unexpected response"}
+        except Exception as e:
+            return {"status": "unhealthy", "database": "error", "error": str(e)}
+    async def close(self):
+        """Close the connection pool and shutdown background workers."""
+        logger.info("close() started")
+        # Shutdown task backend
+        await self._task_backend.shutdown()
+        # Close pool
+        if self._pool is not None:
+            self._pool.terminate()
+            self._pool = None
+        self._initialized = False
+        # Stop pg0 if we started it
+        if self._pg0 is not None:
+            logger.info("Stopping pg0...")
+            await self._pg0.stop()
+            self._pg0 = None
+            logger.info("pg0 stopped")
+    async def wait_for_background_tasks(self):
+        """
+        Wait for all pending background tasks to complete.
+        This is useful in tests to ensure background tasks (like opinion reinforcement)
+        complete before making assertions.
+        """
+        if hasattr(self._task_backend, 'wait_for_pending_tasks'):
+            await self._task_backend.wait_for_pending_tasks()
+    def _format_readable_date(self, dt: datetime) -> str:
+        """
+        Format a datetime into a readable string for temporal matching.
+        Examples:
+            - June 2024
+            - January 15, 2024
+            - December 2023
+        This helps queries like "camping in June" match facts that happened in June.
+        Args:
+            dt: datetime object to format
+        Returns:
+            Readable date string
+        """
+        # Format as "Month Year" for most cases
+        # Could be extended to include day for very specific dates if needed
+        month_name = dt.strftime("%B")  # Full month name (e.g., "June")
+        year = dt.strftime("%Y")  # Year (e.g., "2024")
+        # For now, use "Month Year" format
+        # Could check if day is significant (not 1st or 15th) and include it
+        return f"{month_name} {year}"
+    async def _find_duplicate_facts_batch(
+        self,
+        conn,
+        bank_id: str,
+        texts: List[str],
+        embeddings: List[List[float]],
+        event_date: datetime,
+        time_window_hours: int = 24,
+        similarity_threshold: float = 0.95
+    ) -> List[bool]:
+        """
+        Check which facts are duplicates using semantic similarity + temporal window.
+        For each new fact, checks if a semantically similar fact already exists
+        within the time window. Uses pgvector cosine similarity for efficiency.
+        Args:
+            conn: Database connection
+            bank_id: bank IDentifier
+            texts: List of fact texts to check
+            embeddings: Corresponding embeddings
+            event_date: Event date for temporal filtering
+            time_window_hours: Hours before/after event_date to search (default: 24)
+            similarity_threshold: Minimum cosine similarity to consider duplicate (default: 0.95)
+        Returns:
+            List of booleans - True if fact is a duplicate (should skip), False if new
+        """
+        if not texts:
+            return []
+        # Handle edge cases where event_date is at datetime boundaries
+        try:
+            time_lower = event_date - timedelta(hours=time_window_hours)
+        except OverflowError:
+            time_lower = datetime.min
+        try:
+            time_upper = event_date + timedelta(hours=time_window_hours)
+        except OverflowError:
+            time_upper = datetime.max
+        # Fetch ALL existing facts in time window ONCE (much faster than N queries)
+        import time as time_mod
+        fetch_start = time_mod.time()
+        existing_facts = await conn.fetch(
+            """
+            SELECT id, text, embedding
+            FROM memory_units
+            WHERE bank_id = $1
+              AND event_date BETWEEN $2 AND $3
+            """,
+            bank_id, time_lower, time_upper
+        )
+        # If no existing facts, nothing is duplicate
+        if not existing_facts:
+            return [False] * len(texts)
+        # Compute similarities in Python (vectorized with numpy)
+        import numpy as np
+        is_duplicate = []
+        # Convert existing embeddings to numpy for faster computation
+        embedding_arrays = []
+        for row in existing_facts:
+            raw_emb = row['embedding']
+            # Handle different pgvector formats
+            if isinstance(raw_emb, str):
+                # Parse string format: "[1.0, 2.0, ...]"
+                import json
+                emb = np.array(json.loads(raw_emb), dtype=np.float32)
+            elif isinstance(raw_emb, (list, tuple)):
+                emb = np.array(raw_emb, dtype=np.float32)
+            else:
+                # Try direct conversion
+                emb = np.array(raw_emb, dtype=np.float32)
+            embedding_arrays.append(emb)
+        if not embedding_arrays:
+            existing_embeddings = np.array([])
+        elif len(embedding_arrays) == 1:
+            # Single embedding: reshape to (1, dim)
+            existing_embeddings = embedding_arrays[0].reshape(1, -1)
+        else:
+            # Multiple embeddings: vstack
+            existing_embeddings = np.vstack(embedding_arrays)
+        comp_start = time_mod.time()
+        for embedding in embeddings:
+            # Compute cosine similarity with all existing facts
+            emb_array = np.array(embedding)
+            # Cosine similarity = 1 - cosine distance
+            # For normalized vectors: cosine_sim = dot product
+            similarities = np.dot(existing_embeddings, emb_array)
+            # Check if any existing fact is too similar
+            max_similarity = np.max(similarities) if len(similarities) > 0 else 0
+            is_duplicate.append(max_similarity > similarity_threshold)
+        return is_duplicate
+    def retain(
+        self,
+        bank_id: str,
+        content: str,
+        context: str = "",
+        event_date: Optional[datetime] = None,
+    ) -> List[str]:
+        """
+        Store content as memory units (synchronous wrapper).
+        This is a synchronous wrapper around retain_async() for convenience.
+        For best performance, use retain_async() directly.
+        Args:
+            bank_id: Unique identifier for the bank
+            content: Text content to store
+            context: Context about when/why this memory was formed
+            event_date: When the event occurred (defaults to now)
+        Returns:
+            List of created unit IDs
+        """
+        # Run async version synchronously
+        return asyncio.run(self.retain_async(bank_id, content, context, event_date))
+    async def retain_async(
+        self,
+        bank_id: str,
+        content: str,
+        context: str = "",
+        event_date: Optional[datetime] = None,
+        document_id: Optional[str] = None,
+        fact_type_override: Optional[str] = None,
+        confidence_score: Optional[float] = None,
+    ) -> List[str]:
+        """
+        Store content as memory units with temporal and semantic links (ASYNC version).
+        This is a convenience wrapper around retain_batch_async for a single content item.
+        Args:
+            bank_id: Unique identifier for the bank
+            content: Text content to store
+            context: Context about when/why this memory was formed
+            event_date: When the event occurred (defaults to now)
+            document_id: Optional document ID for tracking (always upserts if document already exists)
+            fact_type_override: Override fact type ('world', 'bank', 'opinion')
+            confidence_score: Confidence score for opinions (0.0 to 1.0)
+        Returns:
+            List of created unit IDs
+        """
+        # Build content dict
+        content_dict: RetainContentDict = {
+            "content": content,
+            "context": context,
+            "event_date": event_date
+        }
+        if document_id:
+            content_dict["document_id"] = document_id
+        # Use retain_batch_async with a single item (avoids code duplication)
+        result = await self.retain_batch_async(
+            bank_id=bank_id,
+            contents=[content_dict],
+            fact_type_override=fact_type_override,
+            confidence_score=confidence_score
+        )
+        # Return the first (and only) list of unit IDs
+        return result[0] if result else []
+    async def retain_batch_async(
+        self,
+        bank_id: str,
+        contents: List[RetainContentDict],
+        document_id: Optional[str] = None,
+        fact_type_override: Optional[str] = None,
+        confidence_score: Optional[float] = None,
+    ) -> List[List[str]]:
+        """
+        Store multiple content items as memory units in ONE batch operation.
+        This is MUCH more efficient than calling retain_async multiple times:
+        - Extracts facts from all contents in parallel
+        - Generates ALL embeddings in ONE batch
+        - Does ALL database operations in ONE transaction
+        - Automatically chunks large batches to prevent timeouts
+        Args:
+            bank_id: Unique identifier for the bank
+            contents: List of dicts with keys:
+                - "content" (required): Text content to store
+                - "context" (optional): Context about the memory
+                - "event_date" (optional): When the event occurred
+                - "document_id" (optional): Document ID for this specific content item
+            document_id: **DEPRECATED** - Use "document_id" key in each content dict instead.
+                        Applies the same document_id to ALL content items that don't specify their own.
+            fact_type_override: Override fact type for all facts ('world', 'bank', 'opinion')
+            confidence_score: Confidence score for opinions (0.0 to 1.0)
+        Returns:
+            List of lists of unit IDs (one list per content item)
+        Example (new style - per-content document_id):
+            unit_ids = await memory.retain_batch_async(
+                bank_id="user123",
+                contents=[
+                    {"content": "Alice works at Google", "document_id": "doc1"},
+                    {"content": "Bob loves Python", "document_id": "doc2"},
+                    {"content": "More about Alice", "document_id": "doc1"},
+                ]
+            )
+            # Returns: [["unit-id-1"], ["unit-id-2"], ["unit-id-3"]]
+        Example (deprecated style - batch-level document_id):
+            unit_ids = await memory.retain_batch_async(
+                bank_id="user123",
+                contents=[
+                    {"content": "Alice works at Google"},
+                    {"content": "Bob loves Python"},
+                ],
+                document_id="meeting-2024-01-15"
+            )
+            # Returns: [["unit-id-1"], ["unit-id-2"]]
+        """
+        start_time = time.time()
+        if not contents:
+            return []
+        # Apply batch-level document_id to contents that don't have their own (backwards compatibility)
+        if document_id:
+            for item in contents:
+                if "document_id" not in item:
+                    item["document_id"] = document_id
+        # Auto-chunk large batches by character count to avoid timeouts and memory issues
+        # Calculate total character count
+        total_chars = sum(len(item.get("content", "")) for item in contents)
+        CHARS_PER_BATCH = 600_000
+        if total_chars > CHARS_PER_BATCH:
+            # Split into smaller batches based on character count
+            logger.info(f"Large batch detected ({total_chars:,} chars from {len(contents)} items). Splitting into sub-batches of ~{CHARS_PER_BATCH:,} chars each...")
+            sub_batches = []
+            current_batch = []
+            current_batch_chars = 0
+            for item in contents:
+                item_chars = len(item.get("content", ""))
+                # If adding this item would exceed the limit, start a new batch
+                # (unless current batch is empty - then we must include it even if it's large)
+                if current_batch and current_batch_chars + item_chars > CHARS_PER_BATCH:
+                    sub_batches.append(current_batch)
+                    current_batch = [item]
+                    current_batch_chars = item_chars
+                else:
+                    current_batch.append(item)
+                    current_batch_chars += item_chars
+            # Add the last batch
+            if current_batch:
+                sub_batches.append(current_batch)
+            logger.info(f"Split into {len(sub_batches)} sub-batches: {[len(b) for b in sub_batches]} items each")
+            # Process each sub-batch using internal method (skip chunking check)
+            all_results = []
+            for i, sub_batch in enumerate(sub_batches, 1):
+                sub_batch_chars = sum(len(item.get("content", "")) for item in sub_batch)
+                logger.info(f"Processing sub-batch {i}/{len(sub_batches)}: {len(sub_batch)} items, {sub_batch_chars:,} chars")
+                sub_results = await self._retain_batch_async_internal(
+                    bank_id=bank_id,
+                    contents=sub_batch,
+                    document_id=document_id,
+                    is_first_batch=i == 1,  # Only upsert on first batch
+                    fact_type_override=fact_type_override,
+                    confidence_score=confidence_score
+                )
+                all_results.extend(sub_results)
+            total_time = time.time() - start_time
+            logger.info(f"RETAIN_BATCH_ASYNC (chunked) COMPLETE: {len(all_results)} results from {len(contents)} contents in {total_time:.3f}s")
+            return all_results
+        # Small batch - use internal method directly
+        return await self._retain_batch_async_internal(
+            bank_id=bank_id,
+            contents=contents,
+            document_id=document_id,
+            is_first_batch=True,
+            fact_type_override=fact_type_override,
+            confidence_score=confidence_score
+        )
+    async def _retain_batch_async_internal(
+        self,
+        bank_id: str,
+        contents: List[RetainContentDict],
+        document_id: Optional[str] = None,
+        is_first_batch: bool = True,
+        fact_type_override: Optional[str] = None,
+        confidence_score: Optional[float] = None,
+    ) -> List[List[str]]:
+        """
+        Internal method for batch processing without chunking logic.
+        Assumes contents are already appropriately sized (< 50k chars).
+        Called by retain_batch_async after chunking large batches.
+        Uses semaphore for backpressure to limit concurrent retains.
+        Args:
+            bank_id: Unique identifier for the bank
+            contents: List of dicts with content, context, event_date
+            document_id: Optional document ID (always upserts if exists)
+            is_first_batch: Whether this is the first batch (for chunked operations, only delete on first batch)
+            fact_type_override: Override fact type for all facts
+            confidence_score: Confidence score for opinions
+        """
+        # Backpressure: limit concurrent retains to prevent database contention
+        async with self._put_semaphore:
+            # Use the new modular orchestrator
+            from .retain import orchestrator
+            pool = await self._get_pool()
+            return await orchestrator.retain_batch(
+                pool=pool,
+                embeddings_model=self.embeddings,
+                llm_config=self._llm_config,
+                entity_resolver=self.entity_resolver,
+                task_backend=self._task_backend,
+                format_date_fn=self._format_readable_date,
+                duplicate_checker_fn=self._find_duplicate_facts_batch,
+                regenerate_observations_fn=self._regenerate_observations_sync,
+                bank_id=bank_id,
+                contents_dicts=contents,
+                document_id=document_id,
+                is_first_batch=is_first_batch,
+                fact_type_override=fact_type_override,
+                confidence_score=confidence_score
+            )
+    def recall(
+        self,
+        bank_id: str,
+        query: str,
+        fact_type: str,
+        budget: Budget = Budget.MID,
+        max_tokens: int = 4096,
+        enable_trace: bool = False,
+    ) -> tuple[List[Dict[str, Any]], Optional[Any]]:
+        """
+        Recall memories using 4-way parallel retrieval (synchronous wrapper).
+        This is a synchronous wrapper around recall_async() for convenience.
+        For best performance, use recall_async() directly.
+        Args:
+            bank_id: bank ID to recall for
+            query: Recall query
+            fact_type: Required filter for fact type ('world', 'agent', or 'opinion')
+            budget: Budget level for graph traversal (low=100, mid=300, high=600 units)
+            max_tokens: Maximum tokens to return (counts only 'text' field, default 4096)
+            enable_trace: If True, returns detailed trace object
+        Returns:
+            Tuple of (results, trace)
+        """
+        # Run async version synchronously
+        return asyncio.run(self.recall_async(
+            bank_id, query, [fact_type], budget, max_tokens, enable_trace
+        ))
+    async def recall_async(
+        self,
+        bank_id: str,
+        query: str,
+        fact_type: List[str],
+        budget: Budget = Budget.MID,
+        max_tokens: int = 4096,
+        enable_trace: bool = False,
+        question_date: Optional[datetime] = None,
+        include_entities: bool = False,
+        max_entity_tokens: int = 1024,
+        include_chunks: bool = False,
+        max_chunk_tokens: int = 8192,
+    ) -> RecallResultModel:
+        """
+        Recall memories using N*4-way parallel retrieval (N fact types × 4 retrieval methods).
+        This implements the core RECALL operation:
+        1. Retrieval: For each fact type, run 4 parallel retrievals (semantic vector, BM25 keyword, graph activation, temporal graph)
+        2. Merge: Combine using Reciprocal Rank Fusion (RRF)
+        3. Rerank: Score using selected reranker (heuristic or cross-encoder)
+        4. Diversify: Apply MMR for diversity
+        5. Token Filter: Return results up to max_tokens budget
+        Args:
+            bank_id: bank ID to recall for
+            query: Recall query
+            fact_type: List of fact types to recall (e.g., ['world', 'bank'])
+            budget: Budget level for graph traversal (low=100, mid=300, high=600 units)
+            max_tokens: Maximum tokens to return (counts only 'text' field, default 4096)
+                       Results are returned until token budget is reached, stopping before
+                       including a fact that would exceed the limit
+            enable_trace: Whether to return trace for debugging (deprecated)
+            question_date: Optional date when question was asked (for temporal filtering)
+            include_entities: Whether to include entity observations in the response
+            max_entity_tokens: Maximum tokens for entity observations (default 500)
+            include_chunks: Whether to include raw chunks in the response
+            max_chunk_tokens: Maximum tokens for chunks (default 8192)
+        Returns:
+            RecallResultModel containing:
+            - results: List of MemoryFact objects
+            - trace: Optional trace information for debugging
+            - entities: Optional dict of entity states (if include_entities=True)
+            - chunks: Optional dict of chunks (if include_chunks=True)
+        """
+        # Map budget enum to thinking_budget number
+        budget_mapping = {
+            Budget.LOW: 100,
+            Budget.MID: 300,
+            Budget.HIGH: 600
+        }
+        thinking_budget = budget_mapping[budget]
+        # Backpressure: limit concurrent recalls to prevent overwhelming the database
+        async with self._search_semaphore:
+            # Retry loop for connection errors
+            max_retries = 3
+            for attempt in range(max_retries + 1):
+                try:
+                    return await self._search_with_retries(
+                        bank_id, query, fact_type, thinking_budget, max_tokens, enable_trace, question_date,
+                        include_entities, max_entity_tokens, include_chunks, max_chunk_tokens
+                    )
+                except Exception as e:
+                    # Check if it's a connection error
+                    is_connection_error = (
+                        isinstance(e, asyncpg.TooManyConnectionsError) or
+                        isinstance(e, asyncpg.CannotConnectNowError) or
+                        (isinstance(e, asyncpg.PostgresError) and 'connection' in str(e).lower())
+                    )
+                    if is_connection_error and attempt < max_retries:
+                        # Wait with exponential backoff before retry
+                        wait_time = 0.5 * (2 ** attempt)  # 0.5s, 1s, 2s
+                        logger.warning(
+                            f"Connection error on search attempt {attempt + 1}/{max_retries + 1}: {str(e)}. "
+                            f"Retrying in {wait_time:.1f}s..."
+                        )
+                        await asyncio.sleep(wait_time)
+                    else:
+                        # Not a connection error or out of retries - raise
+                        raise
+            raise Exception("Exceeded maximum retries for search due to connection errors.")
+    async def _search_with_retries(
+        self,
+        bank_id: str,
+        query: str,
+        fact_type: List[str],
+        thinking_budget: int,
+        max_tokens: int,
+        enable_trace: bool,
+        question_date: Optional[datetime] = None,
+        include_entities: bool = False,
+        max_entity_tokens: int = 500,
+        include_chunks: bool = False,
+        max_chunk_tokens: int = 8192,
+    ) -> RecallResultModel:
+        """
+        Search implementation with modular retrieval and reranking.
+        Architecture:
+        1. Retrieval: 4-way parallel (semantic, keyword, graph, temporal graph)
+        2. Merge: RRF to combine ranked lists
+        3. Reranking: Pluggable strategy (heuristic or cross-encoder)
+        4. Diversity: MMR with λ=0.5
+        5. Token Filter: Limit results to max_tokens budget
+        Args:
+            bank_id: bank IDentifier
+            query: Search query
+            fact_type: Type of facts to search
+            thinking_budget: Nodes to explore in graph traversal
+            max_tokens: Maximum tokens to return (counts only 'text' field)
+            enable_trace: Whether to return search trace (deprecated)
+            include_entities: Whether to include entity observations
+            max_entity_tokens: Maximum tokens for entity observations
+            include_chunks: Whether to include raw chunks
+            max_chunk_tokens: Maximum tokens for chunks
+        Returns:
+            RecallResultModel with results, trace, optional entities, and optional chunks
+        """
+        # Initialize tracer if requested
+        from .search.tracer import SearchTracer
+        tracer = SearchTracer(query, thinking_budget, max_tokens) if enable_trace else None
+        if tracer:
+            tracer.start()
+        pool = await self._get_pool()
+        search_start = time.time()
+        # Buffer logs for clean output in concurrent scenarios
+        search_id = f"{bank_id[:8]}-{int(time.time() * 1000) % 100000}"
+        log_buffer = []
+        log_buffer.append(f"[SEARCH {search_id}] Query: '{query[:50]}...' (budget={thinking_budget}, max_tokens={max_tokens})")
+        try:
+            # Step 1: Generate query embedding (for semantic search)
+            step_start = time.time()
+            query_embedding = embedding_utils.generate_embedding(self.embeddings, query)
+            step_duration = time.time() - step_start
+            log_buffer.append(f"  [1] Generate query embedding: {step_duration:.3f}s")
+            if tracer:
+                tracer.record_query_embedding(query_embedding)
+                tracer.add_phase_metric("generate_query_embedding", step_duration)
+            # Step 2: N*4-Way Parallel Retrieval (N fact types × 4 retrieval methods)
+            step_start = time.time()
+            query_embedding_str = str(query_embedding)
+            from .search.retrieval import retrieve_parallel
+            # Track each retrieval start time
+            retrieval_start = time.time()
+            # Run retrieval for each fact type in parallel
+            retrieval_tasks = [
+                retrieve_parallel(
+                    pool, query, query_embedding_str, bank_id, ft, thinking_budget,
+                    question_date, self.query_analyzer
+                )
+                for ft in fact_type
+            ]
+            all_retrievals = await asyncio.gather(*retrieval_tasks)
+            # Combine all results from all fact types and aggregate timings
+            semantic_results = []
+            bm25_results = []
+            graph_results = []
+            temporal_results = []
+            aggregated_timings = {"semantic": 0.0, "bm25": 0.0, "graph": 0.0, "temporal": 0.0}
+            detected_temporal_constraint = None
+            for idx, (ft_semantic, ft_bm25, ft_graph, ft_temporal, ft_timings, ft_temporal_constraint) in enumerate(all_retrievals):
+                # Log fact types in this retrieval batch
+                ft_name = fact_type[idx] if idx < len(fact_type) else "unknown"
+                logger.debug(f"[SEARCH {search_id}] Fact type '{ft_name}': semantic={len(ft_semantic)}, bm25={len(ft_bm25)}, graph={len(ft_graph)}, temporal={len(ft_temporal) if ft_temporal else 0}")
+                semantic_results.extend(ft_semantic)
+                bm25_results.extend(ft_bm25)
+                graph_results.extend(ft_graph)
+                if ft_temporal:
+                    temporal_results.extend(ft_temporal)
+                # Track max timing for each method (since they run in parallel across fact types)
+                for method, duration in ft_timings.items():
+                    aggregated_timings[method] = max(aggregated_timings[method], duration)
+                # Capture temporal constraint (same across all fact types)
+                if ft_temporal_constraint:
+                    detected_temporal_constraint = ft_temporal_constraint
+            # If no temporal results from any fact type, set to None
+            if not temporal_results:
+                temporal_results = None
+            # Sort combined results by score (descending) so higher-scored results
+            # get better ranks in the trace, regardless of fact type
+            semantic_results.sort(key=lambda r: r.similarity if hasattr(r, 'similarity') else 0, reverse=True)
+            bm25_results.sort(key=lambda r: r.bm25_score if hasattr(r, 'bm25_score') else 0, reverse=True)
+            graph_results.sort(key=lambda r: r.activation if hasattr(r, 'activation') else 0, reverse=True)
+            if temporal_results:
+                temporal_results.sort(key=lambda r: r.combined_score if hasattr(r, 'combined_score') else 0, reverse=True)
+            retrieval_duration = time.time() - retrieval_start
+            step_duration = time.time() - step_start
+            total_retrievals = len(fact_type) * (4 if temporal_results else 3)
+            # Format per-method timings
+            timing_parts = [
+                f"semantic={len(semantic_results)}({aggregated_timings['semantic']:.3f}s)",
+                f"bm25={len(bm25_results)}({aggregated_timings['bm25']:.3f}s)",
+                f"graph={len(graph_results)}({aggregated_timings['graph']:.3f}s)"
+            ]
+            temporal_info = ""
+            if detected_temporal_constraint:
+                start_dt, end_dt = detected_temporal_constraint
+                temporal_count = len(temporal_results) if temporal_results else 0
+                timing_parts.append(f"temporal={temporal_count}({aggregated_timings['temporal']:.3f}s)")
+                temporal_info = f" | temporal_range={start_dt.strftime('%Y-%m-%d')} to {end_dt.strftime('%Y-%m-%d')}"
+            log_buffer.append(f"  [2] {total_retrievals}-way retrieval ({len(fact_type)} fact_types): {', '.join(timing_parts)} in {step_duration:.3f}s{temporal_info}")
+            # Record retrieval results for tracer (convert typed results to old format)
+            if tracer:
+                # Convert RetrievalResult to old tuple format for tracer
+                def to_tuple_format(results):
+                    return [(r.id, r.__dict__) for r in results]
+                # Add semantic retrieval results
+                tracer.add_retrieval_results(
+                    method_name="semantic",
+                    results=to_tuple_format(semantic_results),
+                    duration_seconds=aggregated_timings["semantic"],
+                    score_field="similarity",
+                    metadata={"limit": thinking_budget}
+                )
+                # Add BM25 retrieval results
+                tracer.add_retrieval_results(
+                    method_name="bm25",
+                    results=to_tuple_format(bm25_results),
+                    duration_seconds=aggregated_timings["bm25"],
+                    score_field="bm25_score",
+                    metadata={"limit": thinking_budget}
+                )
+                # Add graph retrieval results
+                tracer.add_retrieval_results(
+                    method_name="graph",
+                    results=to_tuple_format(graph_results),
+                    duration_seconds=aggregated_timings["graph"],
+                    score_field="similarity",  # Graph uses similarity for activation
+                    metadata={"budget": thinking_budget}
+                )
+                # Add temporal retrieval results if present
+                if temporal_results:
+                    tracer.add_retrieval_results(
+                        method_name="temporal",
+                        results=to_tuple_format(temporal_results),
+                        duration_seconds=aggregated_timings["temporal"],
+                        score_field="temporal_score",
+                        metadata={"budget": thinking_budget}
+                    )
+                # Record entry points (from semantic results) for legacy graph view
+                for rank, retrieval in enumerate(semantic_results[:10], start=1):  # Top 10 as entry points
+                    tracer.add_entry_point(retrieval.id, retrieval.text, retrieval.similarity or 0.0, rank)
+                tracer.add_phase_metric("parallel_retrieval", step_duration, {
+                    "semantic_count": len(semantic_results),
+                    "bm25_count": len(bm25_results),
+                    "graph_count": len(graph_results),
+                    "temporal_count": len(temporal_results) if temporal_results else 0
+                })
+            # Step 3: Merge with RRF
+            step_start = time.time()
+            from .search.fusion import reciprocal_rank_fusion
+            # Merge 3 or 4 result lists depending on temporal constraint
+            if temporal_results:
+                merged_candidates = reciprocal_rank_fusion([semantic_results, bm25_results, graph_results, temporal_results])
+            else:
+                merged_candidates = reciprocal_rank_fusion([semantic_results, bm25_results, graph_results])
+            step_duration = time.time() - step_start
+            log_buffer.append(f"  [3] RRF merge: {len(merged_candidates)} unique candidates in {step_duration:.3f}s")
+            if tracer:
+                # Convert MergedCandidate to old tuple format for tracer
+                tracer_merged = [(mc.id, mc.retrieval.__dict__, {"rrf_score": mc.rrf_score, **mc.source_ranks})
+                                for mc in merged_candidates]
+                tracer.add_rrf_merged(tracer_merged)
+                tracer.add_phase_metric("rrf_merge", step_duration, {"candidates_merged": len(merged_candidates)})
+            # Step 4: Rerank using cross-encoder (MergedCandidate -> ScoredResult)
+            step_start = time.time()
+            reranker_instance = self._cross_encoder_reranker
+            log_buffer.append(f"  [4] Using cross-encoder reranker")
+            # Rerank using cross-encoder
+            scored_results = reranker_instance.rerank(query, merged_candidates)
+            step_duration = time.time() - step_start
+            log_buffer.append(f"  [4] Reranking: {len(scored_results)} candidates scored in {step_duration:.3f}s")
+            if tracer:
+                # Convert to old format for tracer
+                results_dict = [sr.to_dict() for sr in scored_results]
+                tracer_merged = [(mc.id, mc.retrieval.__dict__, {"rrf_score": mc.rrf_score, **mc.source_ranks})
+                                for mc in merged_candidates]
+                tracer.add_reranked(results_dict, tracer_merged)
+                tracer.add_phase_metric("reranking", step_duration, {
+                    "reranker_type": "cross-encoder",
+                    "candidates_reranked": len(scored_results)
+                })
+            # Step 4.5: Combine cross-encoder score with retrieval signals
+            # This preserves retrieval work (RRF, temporal, recency) instead of pure cross-encoder ranking
+            if scored_results:
+                # Normalize RRF scores to [0, 1] range
+                rrf_scores = [sr.candidate.rrf_score for sr in scored_results]
+                max_rrf = max(rrf_scores) if rrf_scores else 1.0
+                min_rrf = min(rrf_scores) if rrf_scores else 0.0
+                rrf_range = max_rrf - min_rrf if max_rrf > min_rrf else 1.0
+                # Calculate recency based on occurred_start (more recent = higher score)
+                now = utcnow()
+                for sr in scored_results:
+                    # Normalize RRF score
+                    sr.rrf_normalized = (sr.candidate.rrf_score - min_rrf) / rrf_range if rrf_range > 0 else 0.5
+                    # Calculate recency (decay over 365 days, minimum 0.1)
+                    sr.recency = 0.5  # default for missing dates
+                    if sr.retrieval.occurred_start:
+                        occurred = sr.retrieval.occurred_start
+                        if hasattr(occurred, 'tzinfo') and occurred.tzinfo is None:
+                            from datetime import timezone
+                            occurred = occurred.replace(tzinfo=timezone.utc)
+                        days_ago = (now - occurred).total_seconds() / 86400
+                        sr.recency = max(0.1, 1.0 - (days_ago / 365))  # Linear decay over 1 year
+                    # Get temporal proximity if available (already 0-1)
+                    sr.temporal = sr.retrieval.temporal_proximity if sr.retrieval.temporal_proximity is not None else 0.5
+                    # Weighted combination
+                    # Cross-encoder: 60% (semantic relevance)
+                    # RRF: 20% (retrieval consensus)
+                    # Temporal proximity: 10% (time relevance for temporal queries)
+                    # Recency: 10% (prefer recent facts)
+                    sr.combined_score = (
+                        0.6 * sr.cross_encoder_score_normalized +
+                        0.2 * sr.rrf_normalized +
+                        0.1 * sr.temporal +
+                        0.1 * sr.recency
+                    )
+                    sr.weight = sr.combined_score  # Update weight for final ranking
+                # Re-sort by combined score
+                scored_results.sort(key=lambda x: x.weight, reverse=True)
+                log_buffer.append(f"  [4.6] Combined scoring: cross_encoder(0.6) + rrf(0.2) + temporal(0.1) + recency(0.1)")
+            # Step 5: Truncate to thinking_budget * 2 for token filtering
+            rerank_limit = thinking_budget * 2
+            top_scored = scored_results[:rerank_limit]
+            log_buffer.append(f"  [5] Truncated to top {len(top_scored)} results")
+            # Step 6: Token budget filtering
+            step_start = time.time()
+            # Convert to dict for token filtering (backward compatibility)
+            top_dicts = [sr.to_dict() for sr in top_scored]
+            filtered_dicts, total_tokens = self._filter_by_token_budget(top_dicts, max_tokens)
+            # Convert back to list of IDs and filter scored_results
+            filtered_ids = {d["id"] for d in filtered_dicts}
+            top_scored = [sr for sr in top_scored if sr.id in filtered_ids]
+            step_duration = time.time() - step_start
+            log_buffer.append(f"  [6] Token filtering: {len(top_scored)} results, {total_tokens}/{max_tokens} tokens in {step_duration:.3f}s")
+            if tracer:
+                tracer.add_phase_metric("token_filtering", step_duration, {
+                    "results_selected": len(top_scored),
+                    "tokens_used": total_tokens,
+                    "max_tokens": max_tokens
+                })
+            # Record visits for all retrieved nodes
+            if tracer:
+                for sr in scored_results:
+                    tracer.visit_node(
+                        node_id=sr.id,
+                        text=sr.retrieval.text,
+                        context=sr.retrieval.context or "",
+                        event_date=sr.retrieval.occurred_start,
+                        access_count=sr.retrieval.access_count,
+                        is_entry_point=(sr.id in [ep.node_id for ep in tracer.entry_points]),
+                        parent_node_id=None,  # In parallel retrieval, there's no clear parent
+                        link_type=None,
+                        link_weight=None,
+                        activation=sr.candidate.rrf_score,  # Use RRF score as activation
+                        semantic_similarity=sr.retrieval.similarity or 0.0,
+                        recency=sr.recency,
+                        frequency=0.0,
+                        final_weight=sr.weight
+                    )
+            # Step 8: Queue access count updates for visited nodes
+            visited_ids = list(set([sr.id for sr in scored_results[:50]]))  # Top 50
+            if visited_ids:
+                await self._task_backend.submit_task({
+                    'type': 'access_count_update',
+                    'node_ids': visited_ids
+                })
+                log_buffer.append(f"  [7] Queued access count updates for {len(visited_ids)} nodes")
+            # Log fact_type distribution in results
+            fact_type_counts = {}
+            for sr in top_scored:
+                ft = sr.retrieval.fact_type
+                fact_type_counts[ft] = fact_type_counts.get(ft, 0) + 1
+            total_time = time.time() - search_start
+            fact_type_summary = ", ".join([f"{ft}={count}" for ft, count in sorted(fact_type_counts.items())])
+            log_buffer.append(f"[SEARCH {search_id}] Complete: {len(top_scored)} results ({fact_type_summary}) ({total_tokens} tokens) in {total_time:.3f}s")
+            # Log all buffered logs at once
+            logger.info("\n" + "\n".join(log_buffer))
+            # Convert ScoredResult to dicts with ISO datetime strings
+            top_results_dicts = []
+            for sr in top_scored:
+                result_dict = sr.to_dict()
+                # Convert datetime objects to ISO strings for JSON serialization
+                if result_dict.get("occurred_start"):
+                    occurred_start = result_dict["occurred_start"]
+                    result_dict["occurred_start"] = occurred_start.isoformat() if hasattr(occurred_start, 'isoformat') else occurred_start
+                if result_dict.get("occurred_end"):
+                    occurred_end = result_dict["occurred_end"]
+                    result_dict["occurred_end"] = occurred_end.isoformat() if hasattr(occurred_end, 'isoformat') else occurred_end
+                if result_dict.get("mentioned_at"):
+                    mentioned_at = result_dict["mentioned_at"]
+                    result_dict["mentioned_at"] = mentioned_at.isoformat() if hasattr(mentioned_at, 'isoformat') else mentioned_at
+                top_results_dicts.append(result_dict)
+            # Get entities for each fact if include_entities is requested
+            fact_entity_map = {}  # unit_id -> list of (entity_id, entity_name)
+            if include_entities and top_scored:
+                unit_ids = [uuid.UUID(sr.id) for sr in top_scored]
+                if unit_ids:
+                    async with acquire_with_retry(pool) as entity_conn:
+                        entity_rows = await entity_conn.fetch(
+                            """
+                            SELECT ue.unit_id, e.id as entity_id, e.canonical_name
+                            FROM unit_entities ue
+                            JOIN entities e ON ue.entity_id = e.id
+                            WHERE ue.unit_id = ANY($1::uuid[])
+                            """,
+                            unit_ids
+                        )
+                        for row in entity_rows:
+                            unit_id = str(row['unit_id'])
+                            if unit_id not in fact_entity_map:
+                                fact_entity_map[unit_id] = []
+                            fact_entity_map[unit_id].append({
+                                'entity_id': str(row['entity_id']),
+                                'canonical_name': row['canonical_name']
+                            })
+            # Convert results to MemoryFact objects
+            memory_facts = []
+            for result_dict in top_results_dicts:
+                result_id = str(result_dict.get("id"))
+                # Get entity names for this fact
+                entity_names = None
+                if include_entities and result_id in fact_entity_map:
+                    entity_names = [e['canonical_name'] for e in fact_entity_map[result_id]]
+                memory_facts.append(MemoryFact(
+                    id=result_id,
+                    text=result_dict.get("text"),
+                    fact_type=result_dict.get("fact_type", "world"),
+                    entities=entity_names,
+                    context=result_dict.get("context"),
+                    occurred_start=result_dict.get("occurred_start"),
+                    occurred_end=result_dict.get("occurred_end"),
+                    mentioned_at=result_dict.get("mentioned_at"),
+                    document_id=result_dict.get("document_id"),
+                    chunk_id=result_dict.get("chunk_id"),
+                    activation=result_dict.get("weight")  # Use final weight as activation
+                ))
+            # Fetch entity observations if requested
+            entities_dict = None
+            if include_entities and fact_entity_map:
+                # Collect unique entities in order of fact relevance (preserving order from top_scored)
+                # Use a list to maintain order, but track seen entities to avoid duplicates
+                entities_ordered = []  # list of (entity_id, entity_name) tuples
+                seen_entity_ids = set()
+                # Iterate through facts in relevance order
+                for sr in top_scored:
+                    unit_id = sr.id
+                    if unit_id in fact_entity_map:
+                        for entity in fact_entity_map[unit_id]:
+                            entity_id = entity['entity_id']
+                            entity_name = entity['canonical_name']
+                            if entity_id not in seen_entity_ids:
+                                entities_ordered.append((entity_id, entity_name))
+                                seen_entity_ids.add(entity_id)
+                # Fetch observations for each entity (respect token budget, in order)
+                entities_dict = {}
+                total_entity_tokens = 0
+                encoding = _get_tiktoken_encoding()
+                for entity_id, entity_name in entities_ordered:
+                    if total_entity_tokens >= max_entity_tokens:
+                        break
+                    observations = await self.get_entity_observations(bank_id, entity_id, limit=5)
+                    # Calculate tokens for this entity's observations
+                    entity_tokens = 0
+                    included_observations = []
+                    for obs in observations:
+                        obs_tokens = len(encoding.encode(obs.text))
+                        if total_entity_tokens + entity_tokens + obs_tokens <= max_entity_tokens:
+                            included_observations.append(obs)
+                            entity_tokens += obs_tokens
+                        else:
+                            break
+                    if included_observations:
+                        entities_dict[entity_name] = EntityState(
+                            entity_id=entity_id,
+                            canonical_name=entity_name,
+                            observations=included_observations
+                        )
+                        total_entity_tokens += entity_tokens
+            # Fetch chunks if requested
+            chunks_dict = None
+            if include_chunks and top_scored:
+                from .response_models import ChunkInfo
+                # Collect chunk_ids in order of fact relevance (preserving order from top_scored)
+                # Use a list to maintain order, but track seen chunks to avoid duplicates
+                chunk_ids_ordered = []
+                seen_chunk_ids = set()
+                for sr in top_scored:
+                    chunk_id = sr.retrieval.chunk_id
+                    if chunk_id and chunk_id not in seen_chunk_ids:
+                        chunk_ids_ordered.append(chunk_id)
+                        seen_chunk_ids.add(chunk_id)
+                if chunk_ids_ordered:
+                    # Fetch chunk data from database using chunk_ids (no ORDER BY to preserve input order)
+                    async with acquire_with_retry(pool) as conn:
+                        chunks_rows = await conn.fetch(
+                            """
+                            SELECT chunk_id, chunk_text, chunk_index
+                            FROM chunks
+                            WHERE chunk_id = ANY($1::text[])
+                            """,
+                            chunk_ids_ordered
+                        )
+                    # Create a lookup dict for fast access
+                    chunks_lookup = {row['chunk_id']: row for row in chunks_rows}
+                    # Apply token limit and build chunks_dict in the order of chunk_ids_ordered
+                    chunks_dict = {}
+                    total_chunk_tokens = 0
+                    encoding = _get_tiktoken_encoding()
+                    for chunk_id in chunk_ids_ordered:
+                        if chunk_id not in chunks_lookup:
+                            continue
+                        row = chunks_lookup[chunk_id]
+                        chunk_text = row['chunk_text']
+                        chunk_tokens = len(encoding.encode(chunk_text))
+                        # Check if adding this chunk would exceed the limit
+                        if total_chunk_tokens + chunk_tokens > max_chunk_tokens:
+                            # Truncate the chunk to fit within the remaining budget
+                            remaining_tokens = max_chunk_tokens - total_chunk_tokens
+                            if remaining_tokens > 0:
+                                # Truncate to remaining tokens
+                                truncated_text = encoding.decode(encoding.encode(chunk_text)[:remaining_tokens])
+                                chunks_dict[chunk_id] = ChunkInfo(
+                                    chunk_text=truncated_text,
+                                    chunk_index=row['chunk_index'],
+                                    truncated=True
+                                )
+                                total_chunk_tokens = max_chunk_tokens
+                            # Stop adding more chunks once we hit the limit
+                            break
+                        else:
+                            chunks_dict[chunk_id] = ChunkInfo(
+                                chunk_text=chunk_text,
+                                chunk_index=row['chunk_index'],
+                                truncated=False
+                            )
+                            total_chunk_tokens += chunk_tokens
+            # Finalize trace if enabled
+            trace_dict = None
+            if tracer:
+                trace = tracer.finalize(top_results_dicts)
+                trace_dict = trace.to_dict() if trace else None
+            return RecallResultModel(results=memory_facts, trace=trace_dict, entities=entities_dict, chunks=chunks_dict)
+        except Exception as e:
+            log_buffer.append(f"[SEARCH {search_id}] ERROR after {time.time() - search_start:.3f}s: {str(e)}")
+            logger.error("\n" + "\n".join(log_buffer))
+            raise Exception(f"Failed to search memories: {str(e)}")
+    def _filter_by_token_budget(
+        self,
+        results: List[Dict[str, Any]],
+        max_tokens: int
+    ) -> Tuple[List[Dict[str, Any]], int]:
+        """
+        Filter results to fit within token budget.
+        Counts tokens only for the 'text' field using tiktoken (cl100k_base encoding).
+        Stops before including a fact that would exceed the budget.
+        Args:
+            results: List of search results
+            max_tokens: Maximum tokens allowed
+        Returns:
+            Tuple of (filtered_results, total_tokens_used)
+        """
+        encoding = _get_tiktoken_encoding()
+        filtered_results = []
+        total_tokens = 0
+        for result in results:
+            text = result.get("text", "")
+            text_tokens = len(encoding.encode(text))
+            # Check if adding this result would exceed budget
+            if total_tokens + text_tokens <= max_tokens:
+                filtered_results.append(result)
+                total_tokens += text_tokens
+            else:
+                # Stop before including a fact that would exceed limit
+                break
+        return filtered_results, total_tokens
+    async def get_document(self, document_id: str, bank_id: str) -> Optional[Dict[str, Any]]:
+        """
+        Retrieve document metadata and statistics.
+        Args:
+            document_id: Document ID to retrieve
+            bank_id: bank ID that owns the document
+        Returns:
+            Dictionary with document info or None if not found
+        """
+        pool = await self._get_pool()
+        async with acquire_with_retry(pool) as conn:
+            doc = await conn.fetchrow(
+                """
+                SELECT d.id, d.bank_id, d.original_text, d.content_hash,
+                       d.created_at, d.updated_at, COUNT(mu.id) as unit_count
+                FROM documents d
+                LEFT JOIN memory_units mu ON mu.document_id = d.id
+                WHERE d.id = $1 AND d.bank_id = $2
+                GROUP BY d.id, d.bank_id, d.original_text, d.content_hash, d.created_at, d.updated_at
+                """,
+                document_id, bank_id
+            )
+            if not doc:
+                return None
+            return {
+                "id": doc["id"],
+                "bank_id": doc["bank_id"],
+                "original_text": doc["original_text"],
+                "content_hash": doc["content_hash"],
+                "memory_unit_count": doc["unit_count"],
+                "created_at": doc["created_at"],
+                "updated_at": doc["updated_at"]
+            }
+    async def delete_document(self, document_id: str, bank_id: str) -> Dict[str, int]:
+        """
+        Delete a document and all its associated memory units and links.
+        Args:
+            document_id: Document ID to delete
+            bank_id: bank ID that owns the document
+        Returns:
+            Dictionary with counts of deleted items
+        """
+        pool = await self._get_pool()
+        async with acquire_with_retry(pool) as conn:
+            async with conn.transaction():
+                # Count units before deletion
+                units_count = await conn.fetchval(
+                    "SELECT COUNT(*) FROM memory_units WHERE document_id = $1",
+                    document_id
+                )
+                # Delete document (cascades to memory_units and all their links)
+                deleted = await conn.fetchval(
+                    "DELETE FROM documents WHERE id = $1 AND bank_id = $2 RETURNING id",
+                    document_id, bank_id
+                )
+                return {
+                    "document_deleted": 1 if deleted else 0,
+                    "memory_units_deleted": units_count if deleted else 0
+                }
+    async def delete_memory_unit(self, unit_id: str) -> Dict[str, Any]:
+        """
+        Delete a single memory unit and all its associated links.
+        Due to CASCADE DELETE constraints, this will automatically delete:
+        - All links from this unit (memory_links where from_unit_id = unit_id)
+        - All links to this unit (memory_links where to_unit_id = unit_id)
+        - All entity associations (unit_entities where unit_id = unit_id)
+        Args:
+            unit_id: UUID of the memory unit to delete
+        Returns:
+            Dictionary with deletion result
+        """
+        pool = await self._get_pool()
+        async with acquire_with_retry(pool) as conn:
+            async with conn.transaction():
+                # Delete the memory unit (cascades to links and associations)
+                deleted = await conn.fetchval(
+                    "DELETE FROM memory_units WHERE id = $1 RETURNING id",
+                    unit_id
+                )
+                return {
+                    "success": deleted is not None,
+                    "unit_id": str(deleted) if deleted else None,
+                    "message": "Memory unit and all its links deleted successfully" if deleted else "Memory unit not found"
+                }
+    async def delete_bank(self, bank_id: str, fact_type: Optional[str] = None) -> Dict[str, int]:
+        """
+        Delete all data for a specific agent (multi-tenant cleanup).
+        This is much more efficient than dropping all tables and allows
+        multiple agents to coexist in the same database.
+        Deletes (with CASCADE):
+        - All memory units for this bank (optionally filtered by fact_type)
+        - All entities for this bank (if deleting all memory units)
+        - All associated links, unit-entity associations, and co-occurrences
+        Args:
+            bank_id: bank ID to delete
+            fact_type: Optional fact type filter (world, bank, opinion). If provided, only deletes memories of that type.
+        Returns:
+            Dictionary with counts of deleted items
+        """
+        pool = await self._get_pool()
+        async with acquire_with_retry(pool) as conn:
+            async with conn.transaction():
+                try:
+                    if fact_type:
+                        # Delete only memories of a specific fact type
+                        units_count = await conn.fetchval(
+                            "SELECT COUNT(*) FROM memory_units WHERE bank_id = $1 AND fact_type = $2",
+                            bank_id, fact_type
+                        )
+                        await conn.execute(
+                            "DELETE FROM memory_units WHERE bank_id = $1 AND fact_type = $2",
+                            bank_id, fact_type
+                        )
+                        # Note: We don't delete entities when fact_type is specified,
+                        # as they may be referenced by other memory units
+                        return {
+                            "memory_units_deleted": units_count,
+                            "entities_deleted": 0
+                        }
+                    else:
+                        # Delete all data for the bank
+                        units_count = await conn.fetchval("SELECT COUNT(*) FROM memory_units WHERE bank_id = $1", bank_id)
+                        entities_count = await conn.fetchval("SELECT COUNT(*) FROM entities WHERE bank_id = $1", bank_id)
+                        documents_count = await conn.fetchval("SELECT COUNT(*) FROM documents WHERE bank_id = $1", bank_id)
+                        # Delete documents (cascades to chunks)
+                        await conn.execute("DELETE FROM documents WHERE bank_id = $1", bank_id)
+                        # Delete memory units (cascades to unit_entities, memory_links)
+                        await conn.execute("DELETE FROM memory_units WHERE bank_id = $1", bank_id)
+                        # Delete entities (cascades to unit_entities, entity_cooccurrences, memory_links with entity_id)
+                        await conn.execute("DELETE FROM entities WHERE bank_id = $1", bank_id)
+                        return {
+                            "memory_units_deleted": units_count,
+                            "entities_deleted": entities_count,
+                            "documents_deleted": documents_count
+                        }
+                except Exception as e:
+                    raise Exception(f"Failed to delete agent data: {str(e)}")
+    async def get_graph_data(self, bank_id: Optional[str] = None, fact_type: Optional[str] = None):
+        """
+        Get graph data for visualization.
+        Args:
+            bank_id: Filter by bank ID
+            fact_type: Filter by fact type (world, bank, opinion)
+        Returns:
+            Dict with nodes, edges, and table_rows
+        """
+        pool = await self._get_pool()
+        async with acquire_with_retry(pool) as conn:
+            # Get memory units, optionally filtered by bank_id and fact_type
+            query_conditions = []
+            query_params = []
+            param_count = 0
+            if bank_id:
+                param_count += 1
+                query_conditions.append(f"bank_id = ${param_count}")
+                query_params.append(bank_id)
+            if fact_type:
+                param_count += 1
+                query_conditions.append(f"fact_type = ${param_count}")
+                query_params.append(fact_type)
+            where_clause = "WHERE " + " AND ".join(query_conditions) if query_conditions else ""
+            units = await conn.fetch(f"""
+                SELECT id, text, event_date, context, occurred_start, occurred_end, mentioned_at, document_id, chunk_id, fact_type
+                FROM memory_units
+                {where_clause}
+                ORDER BY mentioned_at DESC NULLS LAST, event_date DESC
+                LIMIT 1000
+            """, *query_params)
+            # Get links, filtering to only include links between units of the selected agent
+            unit_ids = [row['id'] for row in units]
+            if unit_ids:
+                links = await conn.fetch("""
+                    SELECT
+                        ml.from_unit_id,
+                        ml.to_unit_id,
+                        ml.link_type,
+                        ml.weight,
+                        e.canonical_name as entity_name
+                    FROM memory_links ml
+                    LEFT JOIN entities e ON ml.entity_id = e.id
+                    WHERE ml.from_unit_id = ANY($1::uuid[]) AND ml.to_unit_id = ANY($1::uuid[])
+                    ORDER BY ml.link_type, ml.weight DESC
+                """, unit_ids)
+            else:
+                links = []
+            # Get entity information
+            unit_entities = await conn.fetch("""
+                SELECT ue.unit_id, e.canonical_name
+                FROM unit_entities ue
+                JOIN entities e ON ue.entity_id = e.id
+                ORDER BY ue.unit_id
+            """)
+        # Build entity mapping
+        entity_map = {}
+        for row in unit_entities:
+            unit_id = row['unit_id']
+            entity_name = row['canonical_name']
+            if unit_id not in entity_map:
+                entity_map[unit_id] = []
+            entity_map[unit_id].append(entity_name)
+        # Build nodes
+        nodes = []
+        for row in units:
+            unit_id = row['id']
+            text = row['text']
+            event_date = row['event_date']
+            context = row['context']
+            entities = entity_map.get(unit_id, [])
+            entity_count = len(entities)
+            # Color by entity count
+            if entity_count == 0:
+                color = "#e0e0e0"
+            elif entity_count == 1:
+                color = "#90caf9"
+            else:
+                color = "#42a5f5"
+            nodes.append({
+                "data": {
+                    "id": str(unit_id),
+                    "label": f"{text[:30]}..." if len(text) > 30 else text,
+                    "text": text,
+                    "date": event_date.isoformat() if event_date else "",
+                    "context": context if context else "",
+                    "entities": ", ".join(entities) if entities else "None",
+                    "color": color
+                }
+            })
+        # Build edges
+        edges = []
+        for row in links:
+            from_id = str(row['from_unit_id'])
+            to_id = str(row['to_unit_id'])
+            link_type = row['link_type']
+            weight = row['weight']
+            entity_name = row['entity_name']
+            # Color by link type
+            if link_type == 'temporal':
+                color = "#00bcd4"
+                line_style = "dashed"
+            elif link_type == 'semantic':
+                color = "#ff69b4"
+                line_style = "solid"
+            elif link_type == 'entity':
+                color = "#ffd700"
+                line_style = "solid"
+            else:
+                color = "#999999"
+                line_style = "solid"
+            edges.append({
+                "data": {
+                    "id": f"{from_id}-{to_id}-{link_type}",
+                    "source": from_id,
+                    "target": to_id,
+                    "linkType": link_type,
+                    "weight": weight,
+                    "entityName": entity_name if entity_name else "",
+                    "color": color,
+                    "lineStyle": line_style
+                }
+            })
+        # Build table rows
+        table_rows = []
+        for row in units:
+            unit_id = row['id']
+            entities = entity_map.get(unit_id, [])
+            table_rows.append({
+                "id": str(unit_id),
+                "text": row['text'],
+                "context": row['context'] if row['context'] else "N/A",
+                "occurred_start": row['occurred_start'].isoformat() if row['occurred_start'] else None,
+                "occurred_end": row['occurred_end'].isoformat() if row['occurred_end'] else None,
+                "mentioned_at": row['mentioned_at'].isoformat() if row['mentioned_at'] else None,
+                "date": row['event_date'].strftime("%Y-%m-%d %H:%M") if row['event_date'] else "N/A",  # Deprecated, kept for backwards compatibility
+                "entities": ", ".join(entities) if entities else "None",
+                "document_id": row['document_id'],
+                "chunk_id": row['chunk_id'] if row['chunk_id'] else None,
+                "fact_type": row['fact_type']
+            })
+        return {
+            "nodes": nodes,
+            "edges": edges,
+            "table_rows": table_rows,
+            "total_units": len(units)
+        }
+    async def list_memory_units(
+        self,
+        bank_id: Optional[str] = None,
+        fact_type: Optional[str] = None,
+        search_query: Optional[str] = None,
+        limit: int = 100,
+        offset: int = 0
+    ):
+        """
+        List memory units for table view with optional full-text search.
+        Args:
+            bank_id: Filter by bank ID
+            fact_type: Filter by fact type (world, bank, opinion)
+            search_query: Full-text search query (searches text and context fields)
+            limit: Maximum number of results to return
+            offset: Offset for pagination
+        Returns:
+            Dict with items (list of memory units) and total count
+        """
+        pool = await self._get_pool()
+        async with acquire_with_retry(pool) as conn:
+            # Build query conditions
+            query_conditions = []
+            query_params = []
+            param_count = 0
+            if bank_id:
+                param_count += 1
+                query_conditions.append(f"bank_id = ${param_count}")
+                query_params.append(bank_id)
+            if fact_type:
+                param_count += 1
+                query_conditions.append(f"fact_type = ${param_count}")
+                query_params.append(fact_type)
+            if search_query:
+                # Full-text search on text and context fields using ILIKE
+                param_count += 1
+                query_conditions.append(f"(text ILIKE ${param_count} OR context ILIKE ${param_count})")
+                query_params.append(f"%{search_query}%")
+            where_clause = "WHERE " + " AND ".join(query_conditions) if query_conditions else ""
+            # Get total count
+            count_query = f"""
+                SELECT COUNT(*) as total
+                FROM memory_units
+                {where_clause}
+            """
+            count_result = await conn.fetchrow(count_query, *query_params)
+            total = count_result['total']
+            # Get units with limit and offset
+            param_count += 1
+            limit_param = f"${param_count}"
+            query_params.append(limit)
+            param_count += 1
+            offset_param = f"${param_count}"
+            query_params.append(offset)
+            units = await conn.fetch(f"""
+                SELECT id, text, event_date, context, fact_type, mentioned_at, occurred_start, occurred_end, chunk_id
+                FROM memory_units
+                {where_clause}
+                ORDER BY mentioned_at DESC NULLS LAST, created_at DESC
+                LIMIT {limit_param} OFFSET {offset_param}
+            """, *query_params)
+            # Get entity information for these units
+            if units:
+                unit_ids = [row['id'] for row in units]
+                unit_entities = await conn.fetch("""
+                    SELECT ue.unit_id, e.canonical_name
+                    FROM unit_entities ue
+                    JOIN entities e ON ue.entity_id = e.id
+                    WHERE ue.unit_id = ANY($1::uuid[])
+                    ORDER BY ue.unit_id
+                """, unit_ids)
+            else:
+                unit_entities = []
+            # Build entity mapping
+            entity_map = {}
+            for row in unit_entities:
+                unit_id = row['unit_id']
+                entity_name = row['canonical_name']
+                if unit_id not in entity_map:
+                    entity_map[unit_id] = []
+                entity_map[unit_id].append(entity_name)
+            # Build result items
+            items = []
+            for row in units:
+                unit_id = row['id']
+                entities = entity_map.get(unit_id, [])
+                items.append({
+                    "id": str(unit_id),
+                    "text": row['text'],
+                    "context": row['context'] if row['context'] else "",
+                    "date": row['event_date'].isoformat() if row['event_date'] else "",
+                    "fact_type": row['fact_type'],
+                    "mentioned_at": row['mentioned_at'].isoformat() if row['mentioned_at'] else None,
+                    "occurred_start": row['occurred_start'].isoformat() if row['occurred_start'] else None,
+                    "occurred_end": row['occurred_end'].isoformat() if row['occurred_end'] else None,
+                    "entities": ", ".join(entities) if entities else "",
+                    "chunk_id": row['chunk_id'] if row['chunk_id'] else None
+                })
+            return {
+                "items": items,
+                "total": total,
+                "limit": limit,
+                "offset": offset
+            }
+    async def list_documents(
+        self,
+        bank_id: str,
+        search_query: Optional[str] = None,
+        limit: int = 100,
+        offset: int = 0
+    ):
+        """
+        List documents with optional search and pagination.
+        Args:
+            bank_id: bank ID (required)
+            search_query: Search in document ID
+            limit: Maximum number of results
+            offset: Offset for pagination
+        Returns:
+            Dict with items (list of documents without original_text) and total count
+        """
+        pool = await self._get_pool()
+        async with acquire_with_retry(pool) as conn:
+            # Build query conditions
+            query_conditions = []
+            query_params = []
+            param_count = 0
+            param_count += 1
+            query_conditions.append(f"bank_id = ${param_count}")
+            query_params.append(bank_id)
+            if search_query:
+                # Search in document ID
+                param_count += 1
+                query_conditions.append(f"id ILIKE ${param_count}")
+                query_params.append(f"%{search_query}%")
+            where_clause = "WHERE " + " AND ".join(query_conditions) if query_conditions else ""
+            # Get total count
+            count_query = f"""
+                SELECT COUNT(*) as total
+                FROM documents
+                {where_clause}
+            """
+            count_result = await conn.fetchrow(count_query, *query_params)
+            total = count_result['total']
+            # Get documents with limit and offset (without original_text for performance)
+            param_count += 1
+            limit_param = f"${param_count}"
+            query_params.append(limit)
+            param_count += 1
+            offset_param = f"${param_count}"
+            query_params.append(offset)
+            documents = await conn.fetch(f"""
+                SELECT
+                    id,
+                    bank_id,
+                    content_hash,
+                    created_at,
+                    updated_at,
+                    LENGTH(original_text) as text_length,
+                    retain_params
+                FROM documents
+                {where_clause}
+                ORDER BY created_at DESC
+                LIMIT {limit_param} OFFSET {offset_param}
+            """, *query_params)
+            # Get memory unit count for each document
+            if documents:
+                doc_ids = [(row['id'], row['bank_id']) for row in documents]
+                # Create placeholders for the query
+                placeholders = []
+                params_for_count = []
+                for i, (doc_id, bank_id_val) in enumerate(doc_ids):
+                    idx_doc = i * 2 + 1
+                    idx_agent = i * 2 + 2
+                    placeholders.append(f"(document_id = ${idx_doc} AND bank_id = ${idx_agent})")
+                    params_for_count.extend([doc_id, bank_id_val])
+                where_clause_count = " OR ".join(placeholders)
+                unit_counts = await conn.fetch(f"""
+                    SELECT document_id, bank_id, COUNT(*) as unit_count
+                    FROM memory_units
+                    WHERE {where_clause_count}
+                    GROUP BY document_id, bank_id
+                """, *params_for_count)
+            else:
+                unit_counts = []
+            # Build count mapping
+            count_map = {(row['document_id'], row['bank_id']): row['unit_count'] for row in unit_counts}
+            # Build result items
+            items = []
+            for row in documents:
+                doc_id = row['id']
+                bank_id_val = row['bank_id']
+                unit_count = count_map.get((doc_id, bank_id_val), 0)
+                items.append({
+                    "id": doc_id,
+                    "bank_id": bank_id_val,
+                    "content_hash": row['content_hash'],
+                    "created_at": row['created_at'].isoformat() if row['created_at'] else "",
+                    "updated_at": row['updated_at'].isoformat() if row['updated_at'] else "",
+                    "text_length": row['text_length'] or 0,
+                    "memory_unit_count": unit_count,
+                    "retain_params": row['retain_params'] if row['retain_params'] else None
+                })
+            return {
+                "items": items,
+                "total": total,
+                "limit": limit,
+                "offset": offset
+            }
+    async def get_document(
+        self,
+        document_id: str,
+        bank_id: str
+    ):
+        """
+        Get a specific document including its original_text.
+        Args:
+            document_id: Document ID
+            bank_id: bank ID
+        Returns:
+            Dict with document details including original_text, or None if not found
+        """
+        pool = await self._get_pool()
+        async with acquire_with_retry(pool) as conn:
+            doc = await conn.fetchrow("""
+                SELECT
+                    id,
+                    bank_id,
+                    original_text,
+                    content_hash,
+                    created_at,
+                    updated_at,
+                    retain_params
+                FROM documents
+                WHERE id = $1 AND bank_id = $2
+            """, document_id, bank_id)
+            if not doc:
+                return None
+            # Get memory unit count
+            unit_count_row = await conn.fetchrow("""
+                SELECT COUNT(*) as unit_count
+                FROM memory_units
+                WHERE document_id = $1 AND bank_id = $2
+            """, document_id, bank_id)
+            return {
+                "id": doc['id'],
+                "bank_id": doc['bank_id'],
+                "original_text": doc['original_text'],
+                "content_hash": doc['content_hash'],
+                "created_at": doc['created_at'].isoformat() if doc['created_at'] else "",
+                "updated_at": doc['updated_at'].isoformat() if doc['updated_at'] else "",
+                "memory_unit_count": unit_count_row['unit_count'] if unit_count_row else 0,
+                "retain_params": doc['retain_params'] if doc['retain_params'] else None
+            }
+    async def get_chunk(
+        self,
+        chunk_id: str
+    ):
+        """
+        Get a specific chunk by its ID.
+        Args:
+            chunk_id: Chunk ID (format: bank_id_document_id_chunk_index)
+        Returns:
+            Dict with chunk details including chunk_text, or None if not found
+        """
+        pool = await self._get_pool()
+        async with acquire_with_retry(pool) as conn:
+            chunk = await conn.fetchrow("""
+                SELECT
+                    chunk_id,
+                    document_id,
+                    bank_id,
+                    chunk_index,
+                    chunk_text,
+                    created_at
+                FROM chunks
+                WHERE chunk_id = $1
+            """, chunk_id)
+            if not chunk:
+                return None
+            return {
+                "chunk_id": chunk['chunk_id'],
+                "document_id": chunk['document_id'],
+                "bank_id": chunk['bank_id'],
+                "chunk_index": chunk['chunk_index'],
+                "chunk_text": chunk['chunk_text'],
+                "created_at": chunk['created_at'].isoformat() if chunk['created_at'] else ""
+            }
+    async def _evaluate_opinion_update_async(
+        self,
+        opinion_text: str,
+        opinion_confidence: float,
+        new_event_text: str,
+        entity_name: str,
+    ) -> Optional[Dict[str, Any]]:
+        """
+        Evaluate if an opinion should be updated based on a new event.
+        Args:
+            opinion_text: Current opinion text (includes reasons)
+            opinion_confidence: Current confidence score (0.0-1.0)
+            new_event_text: Text of the new event
+            entity_name: Name of the entity this opinion is about
+        Returns:
+            Dict with 'action' ('keep'|'update'), 'new_confidence', 'new_text' (if action=='update')
+            or None if no changes needed
+        """
+        from pydantic import BaseModel, Field
+        class OpinionEvaluation(BaseModel):
+            """Evaluation of whether an opinion should be updated."""
+            action: str = Field(description="Action to take: 'keep' (no change) or 'update' (modify opinion)")
+            reasoning: str = Field(description="Brief explanation of why this action was chosen")
+            new_confidence: float = Field(description="New confidence score (0.0-1.0). Can be higher, lower, or same as before.")
+            new_opinion_text: Optional[str] = Field(
+                default=None,
+                description="If action is 'update', the revised opinion text that acknowledges the previous view. Otherwise None."
+            )
+        evaluation_prompt = f"""You are evaluating whether an existing opinion should be updated based on new information.
+ENTITY: {entity_name}
+EXISTING OPINION:
+{opinion_text}
+Current confidence: {opinion_confidence:.2f}
+NEW EVENT:
+{new_event_text}
+Evaluate whether this new event:
+1. REINFORCES the opinion (increase confidence, keep text)
+2. WEAKENS the opinion (decrease confidence, keep text)
+3. CHANGES the opinion (update both text and confidence, noting "Previously I thought X, but now Y...")
+4. IRRELEVANT (keep everything as is)
+Guidelines:
+- Only suggest 'update' action if the new event genuinely contradicts or significantly modifies the opinion
+- If updating the text, acknowledge the previous opinion and explain the change
+- Confidence should reflect accumulated evidence (0.0 = no confidence, 1.0 = very confident)
+- Small changes in confidence are normal; large jumps should be rare"""
+        try:
+            result = await self._llm_config.call(
+                messages=[
+                    {"role": "system", "content": "You evaluate and update opinions based on new information."},
+                    {"role": "user", "content": evaluation_prompt}
+                ],
+                response_format=OpinionEvaluation,
+                scope="memory_evaluate_opinion",
+                temperature=0.3  # Lower temperature for more consistent evaluation
+            )
+            # Only return updates if something actually changed
+            if result.action == 'keep' and abs(result.new_confidence - opinion_confidence) < 0.01:
+                return None
+            return {
+                'action': result.action,
+                'reasoning': result.reasoning,
+                'new_confidence': result.new_confidence,
+                'new_text': result.new_opinion_text if result.action == 'update' else None
+            }
+        except Exception as e:
+            logger.warning(f"Failed to evaluate opinion update: {str(e)}")
+            return None
+    async def _handle_form_opinion(self, task_dict: Dict[str, Any]):
+        """
+        Handler for form opinion tasks.
+        Args:
+            task_dict: Dict with keys: 'bank_id', 'answer_text', 'query'
+        """
+        bank_id = task_dict['bank_id']
+        answer_text = task_dict['answer_text']
+        query = task_dict['query']
+        await self._extract_and_store_opinions_async(
+            bank_id=bank_id,
+            answer_text=answer_text,
+            query=query
+        )
+    async def _handle_reinforce_opinion(self, task_dict: Dict[str, Any]):
+        """
+        Handler for reinforce opinion tasks.
+        Args:
+            task_dict: Dict with keys: 'bank_id', 'created_unit_ids', 'unit_texts', 'unit_entities'
+        """
+        bank_id = task_dict['bank_id']
+        created_unit_ids = task_dict['created_unit_ids']
+        unit_texts = task_dict['unit_texts']
+        unit_entities = task_dict['unit_entities']
+        await self._reinforce_opinions_async(
+            bank_id=bank_id,
+            created_unit_ids=created_unit_ids,
+            unit_texts=unit_texts,
+            unit_entities=unit_entities
+        )
+    async def _reinforce_opinions_async(
+        self,
+        bank_id: str,
+        created_unit_ids: List[str],
+        unit_texts: List[str],
+        unit_entities: List[List[Dict[str, str]]],
+    ):
+        """
+        Background task to reinforce opinions based on newly ingested events.
+        This runs asynchronously and does not block the put operation.
+        Args:
+            bank_id: bank ID
+            created_unit_ids: List of newly created memory unit IDs
+            unit_texts: Texts of the newly created units
+            unit_entities: Entities extracted from each unit
+        """
+        try:
+            # Extract all unique entity names from the new units
+            entity_names = set()
+            for entities_list in unit_entities:
+                for entity in entities_list:
+                    # Handle both Entity objects and dicts
+                    if hasattr(entity, 'text'):
+                        entity_names.add(entity.text)
+                    elif isinstance(entity, dict):
+                        entity_names.add(entity['text'])
+            if not entity_names:
+                return
+            pool = await self._get_pool()
+            async with acquire_with_retry(pool) as conn:
+                # Find all opinions related to these entities
+                opinions = await conn.fetch(
+                    """
+                    SELECT DISTINCT mu.id, mu.text, mu.confidence_score, e.canonical_name
+                    FROM memory_units mu
+                    JOIN unit_entities ue ON mu.id = ue.unit_id
+                    JOIN entities e ON ue.entity_id = e.id
+                    WHERE mu.bank_id = $1
+                      AND mu.fact_type = 'opinion'
+                      AND e.canonical_name = ANY($2::text[])
+                    """,
+                    bank_id,
+                    list(entity_names)
+                )
+                if not opinions:
+                    return
+                # Use cached LLM config
+                if self._llm_config is None:
+                    logger.error("[REINFORCE] LLM config not available, skipping opinion reinforcement")
+                    return
+                # Evaluate each opinion against the new events
+                updates_to_apply = []
+                for opinion in opinions:
+                    opinion_id = str(opinion['id'])
+                    opinion_text = opinion['text']
+                    opinion_confidence = opinion['confidence_score']
+                    entity_name = opinion['canonical_name']
+                    # Find all new events mentioning this entity
+                    relevant_events = []
+                    for unit_text, entities_list in zip(unit_texts, unit_entities):
+                        if any(e['text'] == entity_name for e in entities_list):
+                            relevant_events.append(unit_text)
+                    if not relevant_events:
+                        continue
+                    # Combine all relevant events
+                    combined_events = "\n".join(relevant_events)
+                    # Evaluate if opinion should be updated
+                    evaluation = await self._evaluate_opinion_update_async(
+                        opinion_text,
+                        opinion_confidence,
+                        combined_events,
+                        entity_name
+                    )
+                    if evaluation:
+                        updates_to_apply.append({
+                            'opinion_id': opinion_id,
+                            'evaluation': evaluation
+                        })
+                # Apply all updates in a single transaction
+                if updates_to_apply:
+                    async with conn.transaction():
+                        for update in updates_to_apply:
+                            opinion_id = update['opinion_id']
+                            evaluation = update['evaluation']
+                            if evaluation['action'] == 'update' and evaluation['new_text']:
+                                # Update both text and confidence
+                                await conn.execute(
+                                    """
+                                    UPDATE memory_units
+                                    SET text = $1, confidence_score = $2, updated_at = NOW()
+                                    WHERE id = $3
+                                    """,
+                                    evaluation['new_text'],
+                                    evaluation['new_confidence'],
+                                    uuid.UUID(opinion_id)
+                                )
+                            else:
+                                # Only update confidence
+                                await conn.execute(
+                                    """
+                                    UPDATE memory_units
+                                    SET confidence_score = $1, updated_at = NOW()
+                                    WHERE id = $2
+                                    """,
+                                    evaluation['new_confidence'],
+                                    uuid.UUID(opinion_id)
+                                )
+                else:
+                    pass  # No opinions to update
+        except Exception as e:
+            logger.error(f"[REINFORCE] Error during opinion reinforcement: {str(e)}")
+            import traceback
+            traceback.print_exc()
+    # ==================== bank profile Methods ====================
+    async def get_bank_profile(self, bank_id: str) -> "bank_utils.BankProfile":
+        """
+        Get bank profile (name, personality + background).
+        Auto-creates agent with default values if not exists.
+        Args:
+            bank_id: bank IDentifier
+        Returns:
+            BankProfile with name, typed PersonalityTraits, and background
+        """
+        pool = await self._get_pool()
+        return await bank_utils.get_bank_profile(pool, bank_id)
+    async def update_bank_personality(
+        self,
+        bank_id: str,
+        personality: Dict[str, float]
+    ) -> None:
+        """
+        Update bank personality traits.
+        Args:
+            bank_id: bank IDentifier
+            personality: Dict with Big Five traits + bias_strength (all 0-1)
+        """
+        pool = await self._get_pool()
+        await bank_utils.update_bank_personality(pool, bank_id, personality)
+    async def merge_bank_background(
+        self,
+        bank_id: str,
+        new_info: str,
+        update_personality: bool = True
+    ) -> dict:
+        """
+        Merge new background information with existing background using LLM.
+        Normalizes to first person ("I") and resolves conflicts.
+        Optionally infers personality traits from the merged background.
+        Args:
+            bank_id: bank IDentifier
+            new_info: New background information to add/merge
+            update_personality: If True, infer Big Five traits from background (default: True)
+        Returns:
+            Dict with 'background' (str) and optionally 'personality' (dict) keys
+        """
+        pool = await self._get_pool()
+        return await bank_utils.merge_bank_background(
+            pool, self._llm_config, bank_id, new_info, update_personality
+        )
+    async def list_banks(self) -> list:
+        """
+        List all agents in the system.
+        Returns:
+            List of dicts with bank_id, name, personality, background, created_at, updated_at
+        """
+        pool = await self._get_pool()
+        return await bank_utils.list_banks(pool)
+    # ==================== Reflect Methods ====================
+    async def reflect_async(
+        self,
+        bank_id: str,
+        query: str,
+        budget: Budget = Budget.LOW,
+        context: str = None,
+    ) -> ReflectResult:
+        """
+        Reflect and formulate an answer using bank identity, world facts, and opinions.
+        This method:
+        1. Retrieves agent facts (bank's identity and past actions)
+        2. Retrieves world facts (general knowledge)
+        3. Retrieves existing opinions (bank's formed perspectives)
+        4. Uses LLM to formulate an answer
+        5. Extracts and stores any new opinions formed during reflection
+        6. Returns plain text answer and the facts used
+        Args:
+            bank_id: bank identifier
+            query: Question to answer
+            budget: Budget level for memory exploration (low=100, mid=300, high=600 units)
+            context: Additional context string to include in LLM prompt (not used in recall)
+        Returns:
+            ReflectResult containing:
+                - text: Plain text answer (no markdown)
+                - based_on: Dict with 'world', 'agent', and 'opinion' fact lists (MemoryFact objects)
+                - new_opinions: List of newly formed opinions
+        """
+        # Use cached LLM config
+        if self._llm_config is None:
+            raise ValueError("Memory LLM API key not set. Set HINDSIGHT_API_LLM_API_KEY environment variable.")
+        # Steps 1-3: Run multi-fact-type search (12-way retrieval: 4 methods × 3 fact types)
+        search_result = await self.recall_async(
+            bank_id=bank_id,
+            query=query,
+            budget=budget,
+            max_tokens=4096,
+            enable_trace=False,
+            fact_type=['agent', 'world', 'opinion'],
+            include_entities=True
+        )
+        all_results = search_result.results
+        logger.info(f"[THINK] Search returned {len(all_results)} results")
+        # Split results by fact type for structured response
+        agent_results = [r for r in all_results if r.fact_type == 'bank']
+        world_results = [r for r in all_results if r.fact_type == 'world']
+        opinion_results = [r for r in all_results if r.fact_type == 'opinion']
+        logger.info(f"[THINK] Split results - agent: {len(agent_results)}, world: {len(world_results)}, opinion: {len(opinion_results)}")
+        # Format facts for LLM
+        agent_facts_text = think_utils.format_facts_for_prompt(agent_results)
+        world_facts_text = think_utils.format_facts_for_prompt(world_results)
+        opinion_facts_text = think_utils.format_facts_for_prompt(opinion_results)
+        logger.info(f"[THINK] Formatted facts - agent: {len(agent_facts_text)} chars, world: {len(world_facts_text)} chars, opinion: {len(opinion_facts_text)} chars")
+        # Get bank profile (name, personality + background)
+        profile = await self.get_bank_profile(bank_id)
+        name = profile["name"]
+        personality = profile["personality"]  # Typed as PersonalityTraits
+        background = profile["background"]
+        # Build the prompt
+        prompt = think_utils.build_think_prompt(
+            agent_facts_text=agent_facts_text,
+            world_facts_text=world_facts_text,
+            opinion_facts_text=opinion_facts_text,
+            query=query,
+            name=name,
+            personality=personality,
+            background=background,
+            context=context,
+        )
+        logger.info(f"[THINK] Full prompt length: {len(prompt)} chars")
+        system_message = think_utils.get_system_message(personality)
+        answer_text = await self._llm_config.call(
+            messages=[
+                {"role": "system", "content": system_message},
+                {"role": "user", "content": prompt}
+            ],
+            scope="memory_think",
+            temperature=0.9,
+            max_tokens=1000
+        )
+        answer_text = answer_text.strip()
+        # Submit form_opinion task for background processing
+        await self._task_backend.submit_task({
+            'type': 'form_opinion',
+            'bank_id': bank_id,
+            'answer_text': answer_text,
+            'query': query
+        })
+        # Return response with facts split by type
+        return ReflectResult(
+            text=answer_text,
+            based_on={
+                "world": world_results,
+                "agent": agent_results,
+                "opinion": opinion_results
+            },
+            new_opinions=[]  # Opinions are being extracted asynchronously
+        )
+    async def _extract_and_store_opinions_async(
+        self,
+        bank_id: str,
+        answer_text: str,
+        query: str
+    ):
+        """
+        Background task to extract and store opinions from think response.
+        This runs asynchronously and does not block the think response.
+        Args:
+            bank_id: bank IDentifier
+            answer_text: The generated answer text
+            query: The original query
+        """
+        try:
+            # Extract opinions from the answer
+            new_opinions = await think_utils.extract_opinions_from_text(
+                self._llm_config, text=answer_text, query=query
+            )
+            # Store new opinions
+            if new_opinions:
+                from datetime import datetime, timezone
+                current_time = datetime.now(timezone.utc)
+                for opinion in new_opinions:
+                    await self.retain_async(
+                        bank_id=bank_id,
+                        content=opinion.opinion,
+                        context=f"formed during thinking about: {query}",
+                        event_date=current_time,
+                        fact_type_override='opinion',
+                        confidence_score=opinion.confidence
+                    )
+        except Exception as e:
+            logger.warning(f"[THINK] Failed to extract/store opinions: {str(e)}")
+    async def get_entity_observations(
+        self,
+        bank_id: str,
+        entity_id: str,
+        limit: int = 10
+    ) -> List[EntityObservation]:
+        """
+        Get observations linked to an entity.
+        Args:
+            bank_id: bank IDentifier
+            entity_id: Entity UUID to get observations for
+            limit: Maximum number of observations to return
+        Returns:
+            List of EntityObservation objects
+        """
+        pool = await self._get_pool()
+        async with acquire_with_retry(pool) as conn:
+            rows = await conn.fetch(
+                """
+                SELECT mu.text, mu.mentioned_at
+                FROM memory_units mu
+                JOIN unit_entities ue ON mu.id = ue.unit_id
+                WHERE mu.bank_id = $1
+                  AND mu.fact_type = 'observation'
+                  AND ue.entity_id = $2
+                ORDER BY mu.mentioned_at DESC
+                LIMIT $3
+                """,
+                bank_id, uuid.UUID(entity_id), limit
+            )
+            observations = []
+            for row in rows:
+                mentioned_at = row['mentioned_at'].isoformat() if row['mentioned_at'] else None
+                observations.append(EntityObservation(
+                    text=row['text'],
+                    mentioned_at=mentioned_at
+                ))
+            return observations
+    async def list_entities(
+        self,
+        bank_id: str,
+        limit: int = 100
+    ) -> List[Dict[str, Any]]:
+        """
+        List all entities for a bank.
+        Args:
+            bank_id: bank IDentifier
+            limit: Maximum number of entities to return
+        Returns:
+            List of entity dicts with id, canonical_name, mention_count, first_seen, last_seen
+        """
+        pool = await self._get_pool()
+        async with acquire_with_retry(pool) as conn:
+            rows = await conn.fetch(
+                """
+                SELECT id, canonical_name, mention_count, first_seen, last_seen, metadata
+                FROM entities
+                WHERE bank_id = $1
+                ORDER BY mention_count DESC, last_seen DESC
+                LIMIT $2
+                """,
+                bank_id, limit
+            )
+            entities = []
+            for row in rows:
+                # Handle metadata - may be dict, JSON string, or None
+                metadata = row['metadata']
+                if metadata is None:
+                    metadata = {}
+                elif isinstance(metadata, str):
+                    import json
+                    try:
+                        metadata = json.loads(metadata)
+                    except json.JSONDecodeError:
+                        metadata = {}
+                entities.append({
+                    'id': str(row['id']),
+                    'canonical_name': row['canonical_name'],
+                    'mention_count': row['mention_count'],
+                    'first_seen': row['first_seen'].isoformat() if row['first_seen'] else None,
+                    'last_seen': row['last_seen'].isoformat() if row['last_seen'] else None,
+                    'metadata': metadata
+                })
+            return entities
+    async def get_entity_state(
+        self,
+        bank_id: str,
+        entity_id: str,
+        entity_name: str,
+        limit: int = 10
+    ) -> EntityState:
+        """
+        Get the current state (mental model) of an entity.
+        Args:
+            bank_id: bank IDentifier
+            entity_id: Entity UUID
+            entity_name: Canonical name of the entity
+            limit: Maximum number of observations to include
+        Returns:
+            EntityState with observations
+        """
+        observations = await self.get_entity_observations(bank_id, entity_id, limit)
+        return EntityState(
+            entity_id=entity_id,
+            canonical_name=entity_name,
+            observations=observations
+        )
+    async def regenerate_entity_observations(
+        self,
+        bank_id: str,
+        entity_id: str,
+        entity_name: str,
+        version: str | None = None
+    ) -> List[str]:
+        """
+        Regenerate observations for an entity by:
+        1. Checking version for deduplication (if provided)
+        2. Searching all facts mentioning the entity
+        3. Using LLM to synthesize observations (no personality)
+        4. Deleting old observations for this entity
+        5. Storing new observations linked to the entity
+        Args:
+            bank_id: bank IDentifier
+            entity_id: Entity UUID
+            entity_name: Canonical name of the entity
+            version: Entity's last_seen timestamp when task was created (for deduplication)
+        Returns:
+            List of created observation IDs
+        """
+        pool = await self._get_pool()
+        # Step 1: Check version for deduplication
+        if version:
+            async with acquire_with_retry(pool) as conn:
+                current_last_seen = await conn.fetchval(
+                    """
+                    SELECT last_seen
+                    FROM entities
+                    WHERE id = $1 AND bank_id = $2
+                    """,
+                    uuid.UUID(entity_id), bank_id
+                )
+                if current_last_seen and current_last_seen.isoformat() != version:
+                    return []
+        # Step 2: Get all facts mentioning this entity (exclude observations themselves)
+        async with acquire_with_retry(pool) as conn:
+            rows = await conn.fetch(
+                """
+                SELECT mu.id, mu.text, mu.context, mu.occurred_start, mu.fact_type
+                FROM memory_units mu
+                JOIN unit_entities ue ON mu.id = ue.unit_id
+                WHERE mu.bank_id = $1
+                  AND ue.entity_id = $2
+                  AND mu.fact_type IN ('world', 'agent')
+                ORDER BY mu.occurred_start DESC
+                LIMIT 50
+                """,
+                bank_id, uuid.UUID(entity_id)
+            )
+        if not rows:
+            return []
+        # Convert to MemoryFact objects for the observation extraction
+        facts = []
+        for row in rows:
+            occurred_start = row['occurred_start'].isoformat() if row['occurred_start'] else None
+            facts.append(MemoryFact(
+                id=str(row['id']),
+                text=row['text'],
+                fact_type=row['fact_type'],
+                context=row['context'],
+                occurred_start=occurred_start
+            ))
+        # Step 3: Extract observations using LLM (no personality)
+        observations = await observation_utils.extract_observations_from_facts(
+            self._llm_config,
+            entity_name,
+            facts
+        )
+        if not observations:
+            return []
+        # Step 4: Delete old observations and insert new ones in a transaction
+        async with acquire_with_retry(pool) as conn:
+            async with conn.transaction():
+                # Delete old observations for this entity
+                await conn.execute(
+                    """
+                    DELETE FROM memory_units
+                    WHERE id IN (
+                        SELECT mu.id
+                        FROM memory_units mu
+                        JOIN unit_entities ue ON mu.id = ue.unit_id
+                        WHERE mu.bank_id = $1
+                          AND mu.fact_type = 'observation'
+                          AND ue.entity_id = $2
+                    )
+                    """,
+                    bank_id, uuid.UUID(entity_id)
+                )
+                # Generate embeddings for new observations
+                embeddings = await embedding_utils.generate_embeddings_batch(
+                    self.embeddings, observations
+                )
+                # Insert new observations
+                current_time = utcnow()
+                created_ids = []
+                for obs_text, embedding in zip(observations, embeddings):
+                    result = await conn.fetchrow(
+                        """
+                        INSERT INTO memory_units (
+                            bank_id, text, embedding, context, event_date,
+                            occurred_start, occurred_end, mentioned_at,
+                            fact_type, access_count
+                        )
+                        VALUES ($1, $2, $3, $4, $5, $6, $7, $8, 'observation', 0)
+                        RETURNING id
+                        """,
+                        bank_id,
+                        obs_text,
+                        str(embedding),
+                        f"observation about {entity_name}",
+                        current_time,
+                        current_time,
+                        current_time,
+                        current_time
+                    )
+                    obs_id = str(result['id'])
+                    created_ids.append(obs_id)
+                    # Link observation to entity
+                    await conn.execute(
+                        """
+                        INSERT INTO unit_entities (unit_id, entity_id)
+                        VALUES ($1, $2)
+                        """,
+                        uuid.UUID(obs_id), uuid.UUID(entity_id)
+                    )
+        # Single consolidated log line
+        logger.info(f"[OBSERVATIONS] {entity_name}: {len(facts)} facts -> {len(created_ids)} observations")
+        return created_ids
+    async def _regenerate_observations_sync(
+        self,
+        bank_id: str,
+        entity_ids: List[str],
+        min_facts: int = 5
+    ) -> None:
+        """
+        Regenerate observations for entities synchronously (called during retain).
+        Args:
+            bank_id: Bank identifier
+            entity_ids: List of entity IDs to process
+            min_facts: Minimum facts required to regenerate observations
+        """
+        if not bank_id or not entity_ids:
+            return
+        pool = await self._get_pool()
+        async with pool.acquire() as conn:
+            for entity_id in entity_ids:
+                try:
+                    entity_uuid = uuid.UUID(entity_id) if isinstance(entity_id, str) else entity_id
+                    # Check if entity exists
+                    entity_exists = await conn.fetchrow(
+                        "SELECT canonical_name FROM entities WHERE id = $1 AND bank_id = $2",
+                        entity_uuid, bank_id
+                    )
+                    if not entity_exists:
+                        logger.debug(f"[OBSERVATIONS] Entity {entity_id} not yet in bank {bank_id}, skipping")
+                        continue
+                    entity_name = entity_exists['canonical_name']
+                    # Count facts linked to this entity
+                    fact_count = await conn.fetchval(
+                        "SELECT COUNT(*) FROM unit_entities WHERE entity_id = $1",
+                        entity_uuid
+                    ) or 0
+                    # Only regenerate if entity has enough facts
+                    if fact_count >= min_facts:
+                        await self.regenerate_entity_observations(bank_id, entity_id, entity_name, version=None)
+                    else:
+                        logger.debug(f"[OBSERVATIONS] Skipping {entity_name} ({fact_count} facts < {min_facts} threshold)")
+                except Exception as e:
+                    logger.error(f"[OBSERVATIONS] Error processing entity {entity_id}: {e}")
+                    continue
+    async def _handle_regenerate_observations(self, task_dict: Dict[str, Any]):
+        """
+        Handler for regenerate_observations tasks.
+        Args:
+            task_dict: Dict with 'bank_id' and either:
+                       - 'entity_ids' (list): Process multiple entities
+                       - 'entity_id', 'entity_name': Process single entity (legacy)
+        """
+        try:
+            bank_id = task_dict.get('bank_id')
+            # New format: multiple entity_ids
+            if 'entity_ids' in task_dict:
+                entity_ids = task_dict.get('entity_ids', [])
+                min_facts = task_dict.get('min_facts', 5)
+                if not bank_id or not entity_ids:
+                    logger.error(f"[OBSERVATIONS] Missing required fields in task: {task_dict}")
+                    return
+                # Process each entity
+                pool = await self._get_pool()
+                async with pool.acquire() as conn:
+                    for entity_id in entity_ids:
+                        try:
+                            # Fetch entity name and check fact count
+                            import uuid as uuid_module
+                            entity_uuid = uuid_module.UUID(entity_id) if isinstance(entity_id, str) else entity_id
+                            # First check if entity exists
+                            entity_exists = await conn.fetchrow(
+                                "SELECT canonical_name FROM entities WHERE id = $1 AND bank_id = $2",
+                                entity_uuid, bank_id
+                            )
+                            if not entity_exists:
+                                logger.debug(f"[OBSERVATIONS] Entity {entity_id} not yet in bank {bank_id}, skipping")
+                                continue
+                            entity_name = entity_exists['canonical_name']
+                            # Count facts linked to this entity
+                            fact_count = await conn.fetchval(
+                                "SELECT COUNT(*) FROM unit_entities WHERE entity_id = $1",
+                                entity_uuid
+                            ) or 0
+                            # Only regenerate if entity has enough facts
+                            if fact_count >= min_facts:
+                                await self.regenerate_entity_observations(bank_id, entity_id, entity_name, version=None)
+                            else:
+                                logger.debug(f"[OBSERVATIONS] Skipping {entity_name} ({fact_count} facts < {min_facts} threshold)")
+                        except Exception as e:
+                            logger.error(f"[OBSERVATIONS] Error processing entity {entity_id}: {e}")
+                            continue
+            # Legacy format: single entity
+            else:
+                entity_id = task_dict.get('entity_id')
+                entity_name = task_dict.get('entity_name')
+                version = task_dict.get('version')
+                if not all([bank_id, entity_id, entity_name]):
+                    logger.error(f"[OBSERVATIONS] Missing required fields in task: {task_dict}")
+                    return
+                await self.regenerate_entity_observations(bank_id, entity_id, entity_name, version)
+        except Exception as e:
+            logger.error(f"[OBSERVATIONS] Error regenerating observations: {e}")
+            import traceback
+            traceback.print_exc()