PyPI - hindsight-api - Versions diffs - 0.2.1__py3-none-any.whl → 0.4.0__py3-none-any.whl - Mend

hindsight-api 0.2.1py3-none-any.whl → 0.4.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (88) hide show

hindsight_api/admin/__init__.py +1 -0
hindsight_api/admin/cli.py +311 -0
hindsight_api/alembic/versions/f1a2b3c4d5e6_add_memory_links_composite_index.py +44 -0
hindsight_api/alembic/versions/g2a3b4c5d6e7_add_tags_column.py +48 -0
hindsight_api/alembic/versions/h3c4d5e6f7g8_mental_models_v4.py +112 -0
hindsight_api/alembic/versions/i4d5e6f7g8h9_delete_opinions.py +41 -0
hindsight_api/alembic/versions/j5e6f7g8h9i0_mental_model_versions.py +95 -0
hindsight_api/alembic/versions/k6f7g8h9i0j1_add_directive_subtype.py +58 -0
hindsight_api/alembic/versions/l7g8h9i0j1k2_add_worker_columns.py +109 -0
hindsight_api/alembic/versions/m8h9i0j1k2l3_mental_model_id_to_text.py +41 -0
hindsight_api/alembic/versions/n9i0j1k2l3m4_learnings_and_pinned_reflections.py +134 -0
hindsight_api/alembic/versions/o0j1k2l3m4n5_migrate_mental_models_data.py +113 -0
hindsight_api/alembic/versions/p1k2l3m4n5o6_new_knowledge_architecture.py +194 -0
hindsight_api/alembic/versions/q2l3m4n5o6p7_fix_mental_model_fact_type.py +50 -0
hindsight_api/alembic/versions/r3m4n5o6p7q8_add_reflect_response_to_reflections.py +47 -0
hindsight_api/alembic/versions/s4n5o6p7q8r9_add_consolidated_at_to_memory_units.py +53 -0
hindsight_api/alembic/versions/t5o6p7q8r9s0_rename_mental_models_to_observations.py +134 -0
hindsight_api/alembic/versions/u6p7q8r9s0t1_mental_models_text_id.py +41 -0
hindsight_api/alembic/versions/v7q8r9s0t1u2_add_max_tokens_to_mental_models.py +50 -0
hindsight_api/api/http.py +1406 -118
hindsight_api/api/mcp.py +11 -196
hindsight_api/config.py +359 -27
hindsight_api/engine/consolidation/__init__.py +5 -0
hindsight_api/engine/consolidation/consolidator.py +859 -0
hindsight_api/engine/consolidation/prompts.py +69 -0
hindsight_api/engine/cross_encoder.py +706 -88
hindsight_api/engine/db_budget.py +284 -0
hindsight_api/engine/db_utils.py +11 -0
hindsight_api/engine/directives/__init__.py +5 -0
hindsight_api/engine/directives/models.py +37 -0
hindsight_api/engine/embeddings.py +553 -29
hindsight_api/engine/entity_resolver.py +8 -5
hindsight_api/engine/interface.py +40 -17
hindsight_api/engine/llm_wrapper.py +744 -68
hindsight_api/engine/memory_engine.py +2505 -1017
hindsight_api/engine/mental_models/__init__.py +14 -0
hindsight_api/engine/mental_models/models.py +53 -0
hindsight_api/engine/query_analyzer.py +4 -3
hindsight_api/engine/reflect/__init__.py +18 -0
hindsight_api/engine/reflect/agent.py +933 -0
hindsight_api/engine/reflect/models.py +109 -0
hindsight_api/engine/reflect/observations.py +186 -0
hindsight_api/engine/reflect/prompts.py +483 -0
hindsight_api/engine/reflect/tools.py +437 -0
hindsight_api/engine/reflect/tools_schema.py +250 -0
hindsight_api/engine/response_models.py +168 -4
hindsight_api/engine/retain/bank_utils.py +79 -201
hindsight_api/engine/retain/fact_extraction.py +424 -195
hindsight_api/engine/retain/fact_storage.py +35 -12
hindsight_api/engine/retain/link_utils.py +29 -24
hindsight_api/engine/retain/orchestrator.py +24 -43
hindsight_api/engine/retain/types.py +11 -2
hindsight_api/engine/search/graph_retrieval.py +43 -14
hindsight_api/engine/search/link_expansion_retrieval.py +391 -0
hindsight_api/engine/search/mpfp_retrieval.py +362 -117
hindsight_api/engine/search/reranking.py +2 -2
hindsight_api/engine/search/retrieval.py +848 -201
hindsight_api/engine/search/tags.py +172 -0
hindsight_api/engine/search/think_utils.py +42 -141
hindsight_api/engine/search/trace.py +12 -1
hindsight_api/engine/search/tracer.py +26 -6
hindsight_api/engine/search/types.py +21 -3
hindsight_api/engine/task_backend.py +113 -106
hindsight_api/engine/utils.py +1 -152
hindsight_api/extensions/__init__.py +10 -1
hindsight_api/extensions/builtin/tenant.py +5 -1
hindsight_api/extensions/context.py +10 -1
hindsight_api/extensions/operation_validator.py +81 -4
hindsight_api/extensions/tenant.py +26 -0
hindsight_api/main.py +69 -6
hindsight_api/mcp_local.py +12 -53
hindsight_api/mcp_tools.py +494 -0
hindsight_api/metrics.py +433 -48
hindsight_api/migrations.py +141 -1
hindsight_api/models.py +3 -3
hindsight_api/pg0.py +53 -0
hindsight_api/server.py +39 -2
hindsight_api/worker/__init__.py +11 -0
hindsight_api/worker/main.py +296 -0
hindsight_api/worker/poller.py +486 -0
{hindsight_api-0.2.1.dist-info → hindsight_api-0.4.0.dist-info}/METADATA +16 -6
hindsight_api-0.4.0.dist-info/RECORD +112 -0
{hindsight_api-0.2.1.dist-info → hindsight_api-0.4.0.dist-info}/entry_points.txt +2 -0
hindsight_api/engine/retain/observation_regeneration.py +0 -254
hindsight_api/engine/search/observation_utils.py +0 -125
hindsight_api/engine/search/scoring.py +0 -159
hindsight_api-0.2.1.dist-info/RECORD +0 -75
{hindsight_api-0.2.1.dist-info → hindsight_api-0.4.0.dist-info}/WHEEL +0 -0

hindsight_api/engine/memory_engine.py CHANGED Viewed

@@ -11,6 +11,7 @@ This implements a sophisticated memory architecture that combines:
 import asyncio
 import contextvars
+import json
 import logging
 import time
 import uuid
@@ -18,6 +19,8 @@ from datetime import UTC, datetime, timedelta
 from typing import TYPE_CHECKING, Any
 from ..config import get_config
+from ..metrics import get_metrics_collector
+from .db_budget import budgeted_operation
 # Context variable for current schema (async-safe, per-task isolation)
 _current_schema: contextvars.ContextVar[str] = contextvars.ContextVar("current_schema", default="public")
@@ -132,17 +135,31 @@ if TYPE_CHECKING:
 from enum import Enum
-from ..pg0 import EmbeddedPostgres
+from ..metrics import get_metrics_collector
+from ..pg0 import EmbeddedPostgres, parse_pg0_url
 from .entity_resolver import EntityResolver
 from .llm_wrapper import LLMConfig
 from .query_analyzer import QueryAnalyzer
-from .response_models import VALID_RECALL_FACT_TYPES, EntityObservation, EntityState, MemoryFact, ReflectResult
+from .reflect import run_reflect_agent
+from .reflect.tools import tool_expand, tool_recall, tool_search_mental_models, tool_search_observations
+from .response_models import (
+    VALID_RECALL_FACT_TYPES,
+    EntityObservation,
+    EntityState,
+    LLMCallTrace,
+    MemoryFact,
+    ObservationRef,
+    ReflectResult,
+    TokenUsage,
+    ToolCallTrace,
+)
 from .response_models import RecallResult as RecallResultModel
 from .retain import bank_utils, embedding_utils
 from .retain.types import RetainContentDict
-from .search import observation_utils, think_utils
+from .search import think_utils
 from .search.reranking import CrossEncoderReranker
-from .task_backend import AsyncIOQueueBackend, TaskBackend
+from .search.tags import TagsMatch
+from .task_backend import BrokerTaskBackend, SyncTaskBackend, TaskBackend
 class Budget(str, Enum):
@@ -195,11 +212,26 @@ class MemoryEngine(MemoryEngineInterface):
         memory_llm_api_key: str | None = None,
         memory_llm_model: str | None = None,
         memory_llm_base_url: str | None = None,
+        # Per-operation LLM config (optional, falls back to memory_llm_* params)
+        retain_llm_provider: str | None = None,
+        retain_llm_api_key: str | None = None,
+        retain_llm_model: str | None = None,
+        retain_llm_base_url: str | None = None,
+        reflect_llm_provider: str | None = None,
+        reflect_llm_api_key: str | None = None,
+        reflect_llm_model: str | None = None,
+        reflect_llm_base_url: str | None = None,
+        consolidation_llm_provider: str | None = None,
+        consolidation_llm_api_key: str | None = None,
+        consolidation_llm_model: str | None = None,
+        consolidation_llm_base_url: str | None = None,
         embeddings: Embeddings | None = None,
         cross_encoder: CrossEncoderModel | None = None,
         query_analyzer: QueryAnalyzer | None = None,
-        pool_min_size: int = 5,
-        pool_max_size: int = 100,
+        pool_min_size: int | None = None,
+        pool_max_size: int | None = None,
+        db_command_timeout: int | None = None,
+        db_acquire_timeout: int | None = None,
         task_backend: TaskBackend | None = None,
         run_migrations: bool = True,
         operation_validator: "OperationValidatorExtension | None" = None,
@@ -220,12 +252,26 @@ class MemoryEngine(MemoryEngineInterface):
             memory_llm_api_key: API key for the LLM provider. Defaults to HINDSIGHT_API_LLM_API_KEY env var.
             memory_llm_model: Model name. Defaults to HINDSIGHT_API_LLM_MODEL env var.
             memory_llm_base_url: Base URL for the LLM API. Defaults based on provider.
+            retain_llm_provider: LLM provider for retain operations. Falls back to memory_llm_provider.
+            retain_llm_api_key: API key for retain LLM. Falls back to memory_llm_api_key.
+            retain_llm_model: Model for retain operations. Falls back to memory_llm_model.
+            retain_llm_base_url: Base URL for retain LLM. Falls back to memory_llm_base_url.
+            reflect_llm_provider: LLM provider for reflect operations. Falls back to memory_llm_provider.
+            reflect_llm_api_key: API key for reflect LLM. Falls back to memory_llm_api_key.
+            reflect_llm_model: Model for reflect operations. Falls back to memory_llm_model.
+            reflect_llm_base_url: Base URL for reflect LLM. Falls back to memory_llm_base_url.
+            consolidation_llm_provider: LLM provider for consolidation operations. Falls back to memory_llm_provider.
+            consolidation_llm_api_key: API key for consolidation LLM. Falls back to memory_llm_api_key.
+            consolidation_llm_model: Model for consolidation operations. Falls back to memory_llm_model.
+            consolidation_llm_base_url: Base URL for consolidation LLM. Falls back to memory_llm_base_url.
             embeddings: Embeddings implementation. If not provided, created from env vars.
             cross_encoder: Cross-encoder model. If not provided, created from env vars.
             query_analyzer: Query analyzer implementation. If not provided, uses DateparserQueryAnalyzer.
-            pool_min_size: Minimum number of connections in the pool (default: 5)
-            pool_max_size: Maximum number of connections in the pool (default: 100)
-            task_backend: Custom task backend. If not provided, uses AsyncIOQueueBackend.
+            pool_min_size: Minimum number of connections in the pool. Defaults to HINDSIGHT_API_DB_POOL_MIN_SIZE.
+            pool_max_size: Maximum number of connections in the pool. Defaults to HINDSIGHT_API_DB_POOL_MAX_SIZE.
+            db_command_timeout: PostgreSQL command timeout in seconds. Defaults to HINDSIGHT_API_DB_COMMAND_TIMEOUT.
+            db_acquire_timeout: Connection acquisition timeout in seconds. Defaults to HINDSIGHT_API_DB_ACQUIRE_TIMEOUT.
+            task_backend: Custom task backend. If not provided, uses BrokerTaskBackend for distributed processing.
             run_migrations: Whether to run database migrations during initialize(). Default: True
             operation_validator: Optional extension to validate operations before execution.
                                 If provided, retain/recall/reflect operations will be validated.
@@ -252,38 +298,21 @@ class MemoryEngine(MemoryEngineInterface):
         db_url = db_url or config.database_url
         memory_llm_provider = memory_llm_provider or config.llm_provider
         memory_llm_api_key = memory_llm_api_key or config.llm_api_key
-        # Ollama doesn't require an API key
-        if not memory_llm_api_key and memory_llm_provider != "ollama":
+        # Ollama and mock don't require an API key
+        if not memory_llm_api_key and memory_llm_provider not in ("ollama", "mock"):
             raise ValueError("LLM API key is required. Set HINDSIGHT_API_LLM_API_KEY environment variable.")
         memory_llm_model = memory_llm_model or config.llm_model
         memory_llm_base_url = memory_llm_base_url or config.get_llm_base_url() or None
         # Track pg0 instance (if used)
         self._pg0: EmbeddedPostgres | None = None
-        self._pg0_instance_name: str | None = None
         # Initialize PostgreSQL connection URL
         # The actual URL will be set during initialize() after starting the server
         # Supports: "pg0" (default instance), "pg0://instance-name" (named instance), or regular postgresql:// URL
-        if db_url == "pg0":
-            self._use_pg0 = True
-            self._pg0_instance_name = "hindsight"
-            self._pg0_port = None  # Use default port
-            self.db_url = None
-        elif db_url.startswith("pg0://"):
-            self._use_pg0 = True
-            # Parse instance name and optional port: pg0://instance-name or pg0://instance-name:port
-            url_part = db_url[6:]  # Remove "pg0://"
-            if ":" in url_part:
-                self._pg0_instance_name, port_str = url_part.rsplit(":", 1)
-                self._pg0_port = int(port_str)
-            else:
-                self._pg0_instance_name = url_part or "hindsight"
-                self._pg0_port = None  # Use default port
+        self._use_pg0, self._pg0_instance_name, self._pg0_port = parse_pg0_url(db_url)
+        if self._use_pg0:
             self.db_url = None
         else:
-            self._use_pg0 = False
-            self._pg0_instance_name = None
-            self._pg0_port = None
             self.db_url = db_url
         # Set default base URL if not provided
@@ -298,8 +327,10 @@ class MemoryEngine(MemoryEngineInterface):
         # Connection pool (will be created in initialize())
         self._pool = None
         self._initialized = False
-        self._pool_min_size = pool_min_size
-        self._pool_max_size = pool_max_size
+        self._pool_min_size = pool_min_size if pool_min_size is not None else config.db_pool_min_size
+        self._pool_max_size = pool_max_size if pool_max_size is not None else config.db_pool_max_size
+        self._db_command_timeout = db_command_timeout if db_command_timeout is not None else config.db_command_timeout
+        self._db_acquire_timeout = db_acquire_timeout if db_acquire_timeout is not None else config.db_acquire_timeout
         self._run_migrations = run_migrations
         # Initialize entity resolver (will be created in initialize())
@@ -319,7 +350,7 @@ class MemoryEngine(MemoryEngineInterface):
             self.query_analyzer = DateparserQueryAnalyzer()
-        # Initialize LLM configuration
+        # Initialize LLM configuration (default, used as fallback)
         self._llm_config = LLMConfig(
             provider=memory_llm_provider,
             api_key=memory_llm_api_key,
@@ -331,17 +362,84 @@ class MemoryEngine(MemoryEngineInterface):
         self._llm_client = self._llm_config._client
         self._llm_model = self._llm_config.model
+        # Initialize per-operation LLM configs (fall back to default if not specified)
+        # Retain LLM config - for fact extraction (benefits from strong structured output)
+        retain_provider = retain_llm_provider or config.retain_llm_provider or memory_llm_provider
+        retain_api_key = retain_llm_api_key or config.retain_llm_api_key or memory_llm_api_key
+        retain_model = retain_llm_model or config.retain_llm_model or memory_llm_model
+        retain_base_url = retain_llm_base_url or config.retain_llm_base_url or memory_llm_base_url
+        # Apply provider-specific base URL defaults for retain
+        if retain_base_url is None:
+            if retain_provider.lower() == "groq":
+                retain_base_url = "https://api.groq.com/openai/v1"
+            elif retain_provider.lower() == "ollama":
+                retain_base_url = "http://localhost:11434/v1"
+            else:
+                retain_base_url = ""
+        self._retain_llm_config = LLMConfig(
+            provider=retain_provider,
+            api_key=retain_api_key,
+            base_url=retain_base_url,
+            model=retain_model,
+        )
+        # Reflect LLM config - for think/observe operations (can use lighter models)
+        reflect_provider = reflect_llm_provider or config.reflect_llm_provider or memory_llm_provider
+        reflect_api_key = reflect_llm_api_key or config.reflect_llm_api_key or memory_llm_api_key
+        reflect_model = reflect_llm_model or config.reflect_llm_model or memory_llm_model
+        reflect_base_url = reflect_llm_base_url or config.reflect_llm_base_url or memory_llm_base_url
+        # Apply provider-specific base URL defaults for reflect
+        if reflect_base_url is None:
+            if reflect_provider.lower() == "groq":
+                reflect_base_url = "https://api.groq.com/openai/v1"
+            elif reflect_provider.lower() == "ollama":
+                reflect_base_url = "http://localhost:11434/v1"
+            else:
+                reflect_base_url = ""
+        self._reflect_llm_config = LLMConfig(
+            provider=reflect_provider,
+            api_key=reflect_api_key,
+            base_url=reflect_base_url,
+            model=reflect_model,
+        )
+        # Consolidation LLM config - for mental model consolidation (can use efficient models)
+        consolidation_provider = consolidation_llm_provider or config.consolidation_llm_provider or memory_llm_provider
+        consolidation_api_key = consolidation_llm_api_key or config.consolidation_llm_api_key or memory_llm_api_key
+        consolidation_model = consolidation_llm_model or config.consolidation_llm_model or memory_llm_model
+        consolidation_base_url = consolidation_llm_base_url or config.consolidation_llm_base_url or memory_llm_base_url
+        # Apply provider-specific base URL defaults for consolidation
+        if consolidation_base_url is None:
+            if consolidation_provider.lower() == "groq":
+                consolidation_base_url = "https://api.groq.com/openai/v1"
+            elif consolidation_provider.lower() == "ollama":
+                consolidation_base_url = "http://localhost:11434/v1"
+            else:
+                consolidation_base_url = ""
+        self._consolidation_llm_config = LLMConfig(
+            provider=consolidation_provider,
+            api_key=consolidation_api_key,
+            base_url=consolidation_base_url,
+            model=consolidation_model,
+        )
         # Initialize cross-encoder reranker (cached for performance)
         self._cross_encoder_reranker = CrossEncoderReranker(cross_encoder=cross_encoder)
         # Initialize task backend
-        self._task_backend = task_backend or AsyncIOQueueBackend(batch_size=100, batch_interval=1.0)
+        # If no custom backend provided, use BrokerTaskBackend which stores tasks in PostgreSQL
+        # The pool_getter lambda will return the pool once it's initialized
+        self._task_backend = task_backend or BrokerTaskBackend(
+            pool_getter=lambda: self._pool,
+            schema_getter=get_current_schema,
+        )
         # Backpressure mechanism: limit concurrent searches to prevent overwhelming the database
-        # Limit concurrent searches to prevent connection pool exhaustion
-        # Each search can use 2-4 connections, so with 10 concurrent searches
-        # we use ~20-40 connections max, staying well within pool limits
-        self._search_semaphore = asyncio.Semaphore(10)
+        # Configurable via HINDSIGHT_API_RECALL_MAX_CONCURRENT (default: 50)
+        self._search_semaphore = asyncio.Semaphore(get_config().recall_max_concurrent)
         # Backpressure for put operations: limit concurrent puts to prevent database contention
         # Each put_batch holds a connection for the entire transaction, so we limit to 5
@@ -401,35 +499,19 @@ class MemoryEngine(MemoryEngineInterface):
         if request_context is None:
             raise AuthenticationError("RequestContext is required when tenant extension is configured")
+        # For internal/background operations (e.g., worker tasks), skip extension authentication
+        # if the schema has already been set by execute_task via the _schema field.
+        if request_context.internal:
+            current = _current_schema.get()
+            if current and current != "public":
+                return current
         # Let AuthenticationError propagate - HTTP layer will convert to 401
         tenant_context = await self._tenant_extension.authenticate(request_context)
         _current_schema.set(tenant_context.schema_name)
         return tenant_context.schema_name
-    async def _handle_access_count_update(self, task_dict: dict[str, Any]):
-        """
-        Handler for access count update tasks.
-        Args:
-            task_dict: Dict with 'node_ids' key containing list of node IDs to update
-        Raises:
-            Exception: Any exception from database operations (propagates to execute_task for retry)
-        """
-        node_ids = task_dict.get("node_ids", [])
-        if not node_ids:
-            return
-        pool = await self._get_pool()
-        # Convert string UUIDs to UUID type for faster matching
-        uuid_list = [uuid.UUID(nid) for nid in node_ids]
-        async with acquire_with_retry(pool) as conn:
-            await conn.execute(
-                f"UPDATE {fq_table('memory_units')} SET access_count = access_count + 1 WHERE id = ANY($1::uuid[])",
-                uuid_list,
-            )
     async def _handle_batch_retain(self, task_dict: dict[str, Any]):
         """
         Handler for batch retain tasks.
@@ -450,14 +532,113 @@ class MemoryEngine(MemoryEngineInterface):
             f"[BATCH_RETAIN_TASK] Starting background batch retain for bank_id={bank_id}, {len(contents)} items"
         )
-        # Use internal request context for background tasks
+        # Use internal request context for background tasks (skips tenant auth when schema is pre-set)
         from hindsight_api.models import RequestContext
-        internal_context = RequestContext()
+        internal_context = RequestContext(internal=True)
         await self.retain_batch_async(bank_id=bank_id, contents=contents, request_context=internal_context)
         logger.info(f"[BATCH_RETAIN_TASK] Completed background batch retain for bank_id={bank_id}")
+    async def _handle_consolidation(self, task_dict: dict[str, Any]):
+        """
+        Handler for consolidation tasks.
+        Consolidates new memories into mental models for a bank.
+        Args:
+            task_dict: Dict with 'bank_id'
+        Raises:
+            ValueError: If bank_id is missing
+            Exception: Any exception from consolidation (propagates to execute_task for retry)
+        """
+        bank_id = task_dict.get("bank_id")
+        if not bank_id:
+            raise ValueError("bank_id is required for consolidation task")
+        from hindsight_api.models import RequestContext
+        from .consolidation import run_consolidation_job
+        internal_context = RequestContext(internal=True)
+        result = await run_consolidation_job(
+            memory_engine=self,
+            bank_id=bank_id,
+            request_context=internal_context,
+        )
+        logger.info(f"[CONSOLIDATION] bank={bank_id} completed: {result.get('memories_processed', 0)} processed")
+    async def _handle_refresh_mental_model(self, task_dict: dict[str, Any]):
+        """
+        Handler for refresh_mental_model tasks.
+        Re-runs the source query through reflect and updates the mental model content.
+        Args:
+            task_dict: Dict with 'bank_id', 'mental_model_id', 'operation_id'
+        Raises:
+            ValueError: If required fields are missing
+            Exception: Any exception from reflect/update (propagates to execute_task for retry)
+        """
+        bank_id = task_dict.get("bank_id")
+        mental_model_id = task_dict.get("mental_model_id")
+        if not bank_id or not mental_model_id:
+            raise ValueError("bank_id and mental_model_id are required for refresh_mental_model task")
+        logger.info(f"[REFRESH_MENTAL_MODEL_TASK] Starting for bank_id={bank_id}, mental_model_id={mental_model_id}")
+        from hindsight_api.models import RequestContext
+        internal_context = RequestContext(internal=True)
+        # Get the current mental model to get source_query
+        mental_model = await self.get_mental_model(bank_id, mental_model_id, request_context=internal_context)
+        if not mental_model:
+            raise ValueError(f"Mental model {mental_model_id} not found in bank {bank_id}")
+        source_query = mental_model["source_query"]
+        # Run reflect to generate new content, excluding the mental model being refreshed
+        reflect_result = await self.reflect_async(
+            bank_id=bank_id,
+            query=source_query,
+            request_context=internal_context,
+            exclude_mental_model_ids=[mental_model_id],
+        )
+        generated_content = reflect_result.text or "No content generated"
+        # Build reflect_response payload to store
+        reflect_response = {
+            "text": reflect_result.text,
+            "based_on": {
+                fact_type: [
+                    {
+                        "id": str(fact.id),
+                        "text": fact.text,
+                        "type": fact_type,
+                    }
+                    for fact in facts
+                ]
+                for fact_type, facts in reflect_result.based_on.items()
+            },
+        }
+        # Update the mental model with the generated content and reflect_response
+        await self.update_mental_model(
+            bank_id=bank_id,
+            mental_model_id=mental_model_id,
+            content=generated_content,
+            reflect_response=reflect_response,
+            request_context=internal_context,
+        )
+        logger.info(f"[REFRESH_MENTAL_MODEL_TASK] Completed for bank_id={bank_id}, mental_model_id={mental_model_id}")
     async def execute_task(self, task_dict: dict[str, Any]):
         """
         Execute a task by routing it to the appropriate handler.
@@ -467,13 +648,18 @@ class MemoryEngine(MemoryEngineInterface):
         Args:
             task_dict: Task dictionary with 'type' key and other payload data
-                      Example: {'type': 'access_count_update', 'node_ids': [...]}
+                      Example: {'type': 'batch_retain', 'bank_id': '...', 'contents': [...]}
         """
         task_type = task_dict.get("type")
         operation_id = task_dict.get("operation_id")
         retry_count = task_dict.get("retry_count", 0)
         max_retries = 3
+        # Set schema context for multi-tenant task execution
+        schema = task_dict.pop("_schema", None)
+        if schema:
+            _current_schema.set(schema)
         # Check if operation was cancelled (only for tasks with operation_id)
         if operation_id:
             try:
@@ -492,16 +678,12 @@ class MemoryEngine(MemoryEngineInterface):
                 # Continue with processing if we can't check status
         try:
-            if task_type == "access_count_update":
-                await self._handle_access_count_update(task_dict)
-            elif task_type == "reinforce_opinion":
-                await self._handle_reinforce_opinion(task_dict)
-            elif task_type == "form_opinion":
-                await self._handle_form_opinion(task_dict)
-            elif task_type == "batch_retain":
+            if task_type == "batch_retain":
                 await self._handle_batch_retain(task_dict)
-            elif task_type == "regenerate_observations":
-                await self._handle_regenerate_observations(task_dict)
+            elif task_type == "consolidation":
+                await self._handle_consolidation(task_dict)
+            elif task_type == "refresh_mental_model":
+                await self._handle_refresh_mental_model(task_dict)
             else:
                 logger.error(f"Unknown task type: {task_type}")
                 # Don't retry unknown task types
@@ -509,9 +691,9 @@ class MemoryEngine(MemoryEngineInterface):
                     await self._delete_operation_record(operation_id)
                 return
-            # Task succeeded - delete operation record
+            # Task succeeded - mark operation as completed
             if operation_id:
-                await self._delete_operation_record(operation_id)
+                await self._mark_operation_completed(operation_id)
         except Exception as e:
             # Task failed - check if we should retry
@@ -557,7 +739,7 @@ class MemoryEngine(MemoryEngineInterface):
                 await conn.execute(
                     f"""
                     UPDATE {fq_table("async_operations")}
-                    SET status = 'failed', error_message = $2
+                    SET status = 'failed', error_message = $2, updated_at = NOW()
                     WHERE operation_id = $1
                     """,
                     uuid.UUID(operation_id),
@@ -567,6 +749,23 @@ class MemoryEngine(MemoryEngineInterface):
         except Exception as e:
             logger.error(f"Failed to mark operation as failed {operation_id}: {e}")
+    async def _mark_operation_completed(self, operation_id: str):
+        """Helper to mark an operation as completed in the database."""
+        try:
+            pool = await self._get_pool()
+            async with acquire_with_retry(pool) as conn:
+                await conn.execute(
+                    f"""
+                    UPDATE {fq_table("async_operations")}
+                    SET status = 'completed', updated_at = NOW(), completed_at = NOW()
+                    WHERE operation_id = $1
+                    """,
+                    uuid.UUID(operation_id),
+                )
+            logger.info(f"Marked async operation as completed: {operation_id}")
+        except Exception as e:
+            logger.error(f"Failed to mark operation as completed {operation_id}: {e}")
     async def initialize(self):
         """Initialize the connection pool, models, and background workers.
@@ -618,9 +817,44 @@ class MemoryEngine(MemoryEngineInterface):
             await loop.run_in_executor(None, self.query_analyzer.load)
         async def verify_llm():
-            """Verify LLM connection is working."""
+            """Verify LLM connections are working for all unique configs."""
             if not self._skip_llm_verification:
+                # Verify default config
                 await self._llm_config.verify_connection()
+                # Verify retain config if different from default
+                retain_is_different = (
+                    self._retain_llm_config.provider != self._llm_config.provider
+                    or self._retain_llm_config.model != self._llm_config.model
+                )
+                if retain_is_different:
+                    await self._retain_llm_config.verify_connection()
+                # Verify reflect config if different from default and retain
+                reflect_is_different = (
+                    self._reflect_llm_config.provider != self._llm_config.provider
+                    or self._reflect_llm_config.model != self._llm_config.model
+                ) and (
+                    self._reflect_llm_config.provider != self._retain_llm_config.provider
+                    or self._reflect_llm_config.model != self._retain_llm_config.model
+                )
+                if reflect_is_different:
+                    await self._reflect_llm_config.verify_connection()
+                # Verify consolidation config if different from all others
+                consolidation_is_different = (
+                    (
+                        self._consolidation_llm_config.provider != self._llm_config.provider
+                        or self._consolidation_llm_config.model != self._llm_config.model
+                    )
+                    and (
+                        self._consolidation_llm_config.provider != self._retain_llm_config.provider
+                        or self._consolidation_llm_config.model != self._retain_llm_config.model
+                    )
+                    and (
+                        self._consolidation_llm_config.provider != self._reflect_llm_config.provider
+                        or self._consolidation_llm_config.model != self._reflect_llm_config.model
+                    )
+                )
+                if consolidation_is_different:
+                    await self._consolidation_llm_config.verify_connection()
         # Build list of initialization tasks
         init_tasks = [
@@ -642,13 +876,17 @@ class MemoryEngine(MemoryEngineInterface):
         # Run database migrations if enabled
         if self._run_migrations:
-            from ..migrations import run_migrations
+            from ..migrations import ensure_embedding_dimension, run_migrations
             if not self.db_url:
                 raise ValueError("Database URL is required for migrations")
             logger.info("Running database migrations...")
             run_migrations(self.db_url)
+            # Ensure embedding column dimension matches the model's dimension
+            # This is done after migrations and after embeddings.initialize()
+            ensure_embedding_dimension(self.db_url, self.embeddings.dimension)
         logger.info(f"Connecting to PostgreSQL at {self.db_url}")
         # Create connection pool
@@ -658,9 +896,9 @@ class MemoryEngine(MemoryEngineInterface):
             self.db_url,
             min_size=self._pool_min_size,
             max_size=self._pool_max_size,
-            command_timeout=60,
+            command_timeout=self._db_command_timeout,
             statement_cache_size=0,  # Disable prepared statement cache
-            timeout=30,  # Connection acquisition timeout (seconds)
+            timeout=self._db_acquire_timeout,  # Connection acquisition timeout (seconds)
         )
         # Initialize entity resolver with pool
@@ -743,8 +981,7 @@ class MemoryEngine(MemoryEngineInterface):
         """
         Wait for all pending background tasks to complete.
-        This is useful in tests to ensure background tasks (like opinion reinforcement)
-        complete before making assertions.
+        This is useful in tests to ensure background tasks complete before making assertions.
         """
         if hasattr(self._task_backend, "wait_for_pending_tasks"):
             await self._task_backend.wait_for_pending_tasks()
@@ -967,7 +1204,9 @@ class MemoryEngine(MemoryEngineInterface):
         document_id: str | None = None,
         fact_type_override: str | None = None,
         confidence_score: float | None = None,
-    ) -> list[list[str]]:
+        document_tags: list[str] | None = None,
+        return_usage: bool = False,
+    ):
         """
         Store multiple content items as memory units in ONE batch operation.
@@ -988,9 +1227,11 @@ class MemoryEngine(MemoryEngineInterface):
                         Applies the same document_id to ALL content items that don't specify their own.
             fact_type_override: Override fact type for all facts ('world', 'experience', 'opinion')
             confidence_score: Confidence score for opinions (0.0 to 1.0)
+            return_usage: If True, returns tuple of (unit_ids, TokenUsage). Default False for backward compatibility.
         Returns:
-            List of lists of unit IDs (one list per content item)
+            If return_usage=False: List of lists of unit IDs (one list per content item)
+            If return_usage=True: Tuple of (unit_ids, TokenUsage)
         Example (new style - per-content document_id):
             unit_ids = await memory.retain_batch_async(
@@ -1017,6 +1258,8 @@ class MemoryEngine(MemoryEngineInterface):
         start_time = time.time()
         if not contents:
+            if return_usage:
+                return [], TokenUsage()
             return []
         # Authenticate tenant and set schema in context (for fq_table())
@@ -1046,6 +1289,7 @@ class MemoryEngine(MemoryEngineInterface):
         # Auto-chunk large batches by character count to avoid timeouts and memory issues
         # Calculate total character count
         total_chars = sum(len(item.get("content", "")) for item in contents)
+        total_usage = TokenUsage()
         CHARS_PER_BATCH = 600_000
@@ -1078,7 +1322,7 @@ class MemoryEngine(MemoryEngineInterface):
             logger.info(f"Split into {len(sub_batches)} sub-batches: {[len(b) for b in sub_batches]} items each")
-            # Process each sub-batch using internal method (skip chunking check)
+            # Process each sub-batch
             all_results = []
             for i, sub_batch in enumerate(sub_batches, 1):
                 sub_batch_chars = sum(len(item.get("content", "")) for item in sub_batch)
@@ -1086,15 +1330,17 @@ class MemoryEngine(MemoryEngineInterface):
                     f"Processing sub-batch {i}/{len(sub_batches)}: {len(sub_batch)} items, {sub_batch_chars:,} chars"
                 )
-                sub_results = await self._retain_batch_async_internal(
+                sub_results, sub_usage = await self._retain_batch_async_internal(
                     bank_id=bank_id,
                     contents=sub_batch,
                     document_id=document_id,
                     is_first_batch=i == 1,  # Only upsert on first batch
                     fact_type_override=fact_type_override,
                     confidence_score=confidence_score,
+                    document_tags=document_tags,
                 )
                 all_results.extend(sub_results)
+                total_usage = total_usage + sub_usage
             total_time = time.time() - start_time
             logger.info(
@@ -1103,13 +1349,14 @@ class MemoryEngine(MemoryEngineInterface):
             result = all_results
         else:
             # Small batch - use internal method directly
-            result = await self._retain_batch_async_internal(
+            result, total_usage = await self._retain_batch_async_internal(
                 bank_id=bank_id,
                 contents=contents,
                 document_id=document_id,
                 is_first_batch=True,
                 fact_type_override=fact_type_override,
                 confidence_score=confidence_score,
+                document_tags=document_tags,
             )
         # Call post-operation hook if validator is configured
@@ -1132,6 +1379,19 @@ class MemoryEngine(MemoryEngineInterface):
             except Exception as e:
                 logger.warning(f"Post-retain hook error (non-fatal): {e}")
+        # Trigger consolidation as a tracked async operation if enabled
+        from ..config import get_config
+        config = get_config()
+        if config.enable_observations:
+            try:
+                await self.submit_async_consolidation(bank_id=bank_id, request_context=request_context)
+            except Exception as e:
+                # Log but don't fail the retain - consolidation is non-critical
+                logger.warning(f"Failed to submit consolidation task for bank {bank_id}: {e}")
+        if return_usage:
+            return result, total_usage
         return result
     async def _retain_batch_async_internal(
@@ -1142,7 +1402,8 @@ class MemoryEngine(MemoryEngineInterface):
         is_first_batch: bool = True,
         fact_type_override: str | None = None,
         confidence_score: float | None = None,
-    ) -> list[list[str]]:
+        document_tags: list[str] | None = None,
+    ) -> tuple[list[list[str]], "TokenUsage"]:
         """
         Internal method for batch processing without chunking logic.
@@ -1158,6 +1419,10 @@ class MemoryEngine(MemoryEngineInterface):
             is_first_batch: Whether this is the first batch (for chunked operations, only delete on first batch)
             fact_type_override: Override fact type for all facts
             confidence_score: Confidence score for opinions
+            document_tags: Tags applied to all items in this batch
+        Returns:
+            Tuple of (unit ID lists, token usage for fact extraction)
         """
         # Backpressure: limit concurrent retains to prevent database contention
         async with self._put_semaphore:
@@ -1168,9 +1433,8 @@ class MemoryEngine(MemoryEngineInterface):
             return await orchestrator.retain_batch(
                 pool=pool,
                 embeddings_model=self.embeddings,
-                llm_config=self._llm_config,
+                llm_config=self._retain_llm_config,
                 entity_resolver=self.entity_resolver,
-                task_backend=self._task_backend,
                 format_date_fn=self._format_readable_date,
                 duplicate_checker_fn=self._find_duplicate_facts_batch,
                 bank_id=bank_id,
@@ -1179,6 +1443,7 @@ class MemoryEngine(MemoryEngineInterface):
                 is_first_batch=is_first_batch,
                 fact_type_override=fact_type_override,
                 confidence_score=confidence_score,
+                document_tags=document_tags,
             )
     def recall(
@@ -1237,6 +1502,10 @@ class MemoryEngine(MemoryEngineInterface):
         include_chunks: bool = False,
         max_chunk_tokens: int = 8192,
         request_context: "RequestContext",
+        tags: list[str] | None = None,
+        tags_match: TagsMatch = "any",
+        _connection_budget: int | None = None,
+        _quiet: bool = False,
     ) -> RecallResultModel:
         """
         Recall memories using N*4-way parallel retrieval (N fact types × 4 retrieval methods).
@@ -1262,6 +1531,8 @@ class MemoryEngine(MemoryEngineInterface):
             max_entity_tokens: Maximum tokens for entity observations (default 500)
             include_chunks: Whether to include raw chunks in the response
             max_chunk_tokens: Maximum tokens for chunks (default 8192)
+            tags: Optional list of tags for visibility filtering (OR matching - returns
+                  memories that have at least one matching tag)
         Returns:
             RecallResultModel containing:
@@ -1285,6 +1556,12 @@ class MemoryEngine(MemoryEngineInterface):
                 f"Must be one of: {', '.join(sorted(VALID_RECALL_FACT_TYPES))}"
             )
+        # Filter out 'opinion' - opinions are no longer returned from recall
+        fact_type = [ft for ft in fact_type if ft != "opinion"]
+        if not fact_type:
+            # All requested types were opinions - return empty result
+            return RecallResultModel(results=[], entities={}, chunks={})
         # Validate operation if validator is configured
         if self._operation_validator:
             from hindsight_api.extensions import RecallContext
@@ -1310,10 +1587,17 @@ class MemoryEngine(MemoryEngineInterface):
         effective_budget = budget if budget is not None else Budget.MID
         thinking_budget = budget_mapping[effective_budget]
+        # Log recall start with tags if present (skip if quiet mode for internal operations)
+        if not _quiet:
+            tags_info = f", tags={tags} ({tags_match})" if tags else ""
+            logger.info(f"[RECALL {bank_id[:8]}] Starting recall for query: {query[:50]}...{tags_info}")
         # Backpressure: limit concurrent recalls to prevent overwhelming the database
         result = None
         error_msg = None
+        semaphore_wait_start = time.time()
         async with self._search_semaphore:
+            semaphore_wait = time.time() - semaphore_wait_start
             # Retry loop for connection errors
             max_retries = 3
             for attempt in range(max_retries + 1):
@@ -1331,6 +1615,11 @@ class MemoryEngine(MemoryEngineInterface):
                         include_chunks,
                         max_chunk_tokens,
                         request_context,
+                        semaphore_wait=semaphore_wait,
+                        tags=tags,
+                        tags_match=tags_match,
+                        connection_budget=_connection_budget,
+                        quiet=_quiet,
                     )
                     break  # Success - exit retry loop
                 except Exception as e:
@@ -1448,6 +1737,11 @@ class MemoryEngine(MemoryEngineInterface):
         include_chunks: bool = False,
         max_chunk_tokens: int = 8192,
         request_context: "RequestContext" = None,
+        semaphore_wait: float = 0.0,
+        tags: list[str] | None = None,
+        tags_match: TagsMatch = "any",
+        connection_budget: int | None = None,
+        quiet: bool = False,
     ) -> RecallResultModel:
         """
         Search implementation with modular retrieval and reranking.
@@ -1477,7 +1771,9 @@ class MemoryEngine(MemoryEngineInterface):
         # Initialize tracer if requested
         from .search.tracer import SearchTracer
-        tracer = SearchTracer(query, thinking_budget, max_tokens) if enable_trace else None
+        tracer = (
+            SearchTracer(query, thinking_budget, max_tokens, tags=tags, tags_match=tags_match) if enable_trace else None
+        )
         if tracer:
             tracer.start()
@@ -1487,8 +1783,9 @@ class MemoryEngine(MemoryEngineInterface):
         # Buffer logs for clean output in concurrent scenarios
         recall_id = f"{bank_id[:8]}-{int(time.time() * 1000) % 100000}"
         log_buffer = []
+        tags_info = f", tags={tags}, tags_match={tags_match}" if tags else ""
         log_buffer.append(
-            f"[RECALL {recall_id}] Query: '{query[:50]}...' (budget={thinking_budget}, max_tokens={max_tokens})"
+            f"[RECALL {recall_id}] Query: '{query[:50]}...' (budget={thinking_budget}, max_tokens={max_tokens}{tags_info})"
         )
         try:
@@ -1502,37 +1799,70 @@ class MemoryEngine(MemoryEngineInterface):
                 tracer.record_query_embedding(query_embedding)
                 tracer.add_phase_metric("generate_query_embedding", step_duration)
-            # Step 2: N*4-Way Parallel Retrieval (N fact types × 4 retrieval methods)
+            # Step 2: Optimized parallel retrieval using batched queries
+            # - Semantic + BM25 combined in 1 CTE query for ALL fact types
+            # - Graph runs per fact type (complex traversal)
+            # - Temporal runs per fact type (if constraint detected)
             step_start = time.time()
             query_embedding_str = str(query_embedding)
-            from .search.retrieval import retrieve_parallel
+            from .search.retrieval import (
+                get_default_graph_retriever,
+                retrieve_all_fact_types_parallel,
+            )
             # Track each retrieval start time
             retrieval_start = time.time()
-            # Run retrieval for each fact type in parallel
-            retrieval_tasks = [
-                retrieve_parallel(
-                    pool, query, query_embedding_str, bank_id, ft, thinking_budget, question_date, self.query_analyzer
+            # Run optimized retrieval with connection budget
+            config = get_config()
+            effective_connection_budget = (
+                connection_budget if connection_budget is not None else config.recall_connection_budget
+            )
+            async with budgeted_operation(
+                max_connections=effective_connection_budget,
+                operation_id=f"recall-{recall_id}",
+            ) as op:
+                budgeted_pool = op.wrap_pool(pool)
+                parallel_start = time.time()
+                multi_result = await retrieve_all_fact_types_parallel(
+                    budgeted_pool,
+                    query,
+                    query_embedding_str,
+                    bank_id,
+                    fact_type,  # Pass all fact types at once
+                    thinking_budget,
+                    question_date,
+                    self.query_analyzer,
+                    tags=tags,
+                    tags_match=tags_match,
                 )
-                for ft in fact_type
-            ]
-            all_retrievals = await asyncio.gather(*retrieval_tasks)
+                parallel_duration = time.time() - parallel_start
             # Combine all results from all fact types and aggregate timings
             semantic_results = []
             bm25_results = []
             graph_results = []
             temporal_results = []
-            aggregated_timings = {"semantic": 0.0, "bm25": 0.0, "graph": 0.0, "temporal": 0.0}
+            aggregated_timings = {
+                "semantic": 0.0,
+                "bm25": 0.0,
+                "graph": 0.0,
+                "temporal": 0.0,
+                "temporal_extraction": 0.0,
+            }
+            all_mpfp_timings = []
             detected_temporal_constraint = None
-            for idx, retrieval_result in enumerate(all_retrievals):
+            max_conn_wait = multi_result.max_conn_wait
+            for ft in fact_type:
+                retrieval_result = multi_result.results_by_fact_type.get(ft)
+                if not retrieval_result:
+                    continue
                 # Log fact types in this retrieval batch
-                ft_name = fact_type[idx] if idx < len(fact_type) else "unknown"
                 logger.debug(
-                    f"[RECALL {recall_id}] Fact type '{ft_name}': semantic={len(retrieval_result.semantic)}, bm25={len(retrieval_result.bm25)}, graph={len(retrieval_result.graph)}, temporal={len(retrieval_result.temporal) if retrieval_result.temporal else 0}"
+                    f"[RECALL {recall_id}] Fact type '{ft}': semantic={len(retrieval_result.semantic)}, bm25={len(retrieval_result.bm25)}, graph={len(retrieval_result.graph)}, temporal={len(retrieval_result.temporal) if retrieval_result.temporal else 0}"
                 )
                 semantic_results.extend(retrieval_result.semantic)
@@ -1570,6 +1900,7 @@ class MemoryEngine(MemoryEngineInterface):
                 f"semantic={len(semantic_results)}({aggregated_timings['semantic']:.3f}s)",
                 f"bm25={len(bm25_results)}({aggregated_timings['bm25']:.3f}s)",
                 f"graph={len(graph_results)}({aggregated_timings['graph']:.3f}s)",
+                f"temporal_extraction={aggregated_timings['temporal_extraction']:.3f}s",
             ]
             temporal_info = ""
             if detected_temporal_constraint:
@@ -1578,9 +1909,41 @@ class MemoryEngine(MemoryEngineInterface):
                 timing_parts.append(f"temporal={temporal_count}({aggregated_timings['temporal']:.3f}s)")
                 temporal_info = f" | temporal_range={start_dt.strftime('%Y-%m-%d')} to {end_dt.strftime('%Y-%m-%d')}"
             log_buffer.append(
-                f"  [2] {total_retrievals}-way retrieval ({len(fact_type)} fact_types): {', '.join(timing_parts)} in {step_duration:.3f}s{temporal_info}"
+                f"  [2] Parallel retrieval ({len(fact_type)} fact_types): {', '.join(timing_parts)} in {parallel_duration:.3f}s{temporal_info}"
             )
+            # Log graph retriever timing breakdown if available
+            if all_mpfp_timings:
+                retriever_name = get_default_graph_retriever().name.upper()
+                mpfp_total = all_mpfp_timings[0]  # Take first fact type's timing as representative
+                mpfp_parts = [
+                    f"db_queries={mpfp_total.db_queries}",
+                    f"edge_load={mpfp_total.edge_load_time:.3f}s",
+                    f"edges={mpfp_total.edge_count}",
+                    f"patterns={mpfp_total.pattern_count}",
+                ]
+                if mpfp_total.seeds_time > 0.01:
+                    mpfp_parts.append(f"seeds={mpfp_total.seeds_time:.3f}s")
+                if mpfp_total.fusion > 0.001:
+                    mpfp_parts.append(f"fusion={mpfp_total.fusion:.3f}s")
+                if mpfp_total.fetch > 0.001:
+                    mpfp_parts.append(f"fetch={mpfp_total.fetch:.3f}s")
+                log_buffer.append(f"      [{retriever_name}] {', '.join(mpfp_parts)}")
+                # Log detailed hop timing for debugging slow queries
+                if mpfp_total.hop_details:
+                    for hd in mpfp_total.hop_details:
+                        log_buffer.append(
+                            f"        hop{hd['hop']}: exec={hd.get('exec_time', 0) * 1000:.0f}ms, "
+                            f"uncached={hd.get('uncached_after_filter', 0)}, "
+                            f"load={hd.get('load_time', 0) * 1000:.0f}ms, "
+                            f"edges={hd.get('edges_loaded', 0)}"
+                        )
+            # Record temporal constraint in tracer if detected
+            if tracer and detected_temporal_constraint:
+                start_dt, end_dt = detected_temporal_constraint
+                tracer.record_temporal_constraint(start_dt, end_dt)
             # Record retrieval results for tracer - per fact type
             if tracer:
                 # Convert RetrievalResult to old tuple format for tracer
@@ -1588,8 +1951,10 @@ class MemoryEngine(MemoryEngineInterface):
                     return [(r.id, r.__dict__) for r in results]
                 # Add retrieval results per fact type (to show parallel execution in UI)
-                for idx, rr in enumerate(all_retrievals):
-                    ft_name = fact_type[idx] if idx < len(fact_type) else "unknown"
+                for ft_name in fact_type:
+                    rr = multi_result.results_by_fact_type.get(ft_name)
+                    if not rr:
+                        continue
                     # Add semantic retrieval results for this fact type
                     tracer.add_retrieval_results(
@@ -1621,14 +1986,22 @@ class MemoryEngine(MemoryEngineInterface):
                         fact_type=ft_name,
                     )
-                    # Add temporal retrieval results for this fact type (even if empty, to show it ran)
-                    if rr.temporal is not None:
+                    # Add temporal retrieval results for this fact type
+                    # Show temporal even with 0 results if constraint was detected
+                    if rr.temporal is not None or rr.temporal_constraint is not None:
+                        temporal_metadata = {"budget": thinking_budget}
+                        if rr.temporal_constraint:
+                            start_dt, end_dt = rr.temporal_constraint
+                            temporal_metadata["constraint"] = {
+                                "start": start_dt.isoformat() if start_dt else None,
+                                "end": end_dt.isoformat() if end_dt else None,
+                            }
                         tracer.add_retrieval_results(
                             method_name="temporal",
-                            results=to_tuple_format(rr.temporal),
+                            results=to_tuple_format(rr.temporal or []),
                             duration_seconds=rr.timings.get("temporal", 0.0),
                             score_field="temporal_score",
-                            metadata={"budget": thinking_budget},
+                            metadata=temporal_metadata,
                             fact_type=ft_name,
                         )
@@ -1678,11 +2051,24 @@ class MemoryEngine(MemoryEngineInterface):
             # Ensure reranker is initialized (for lazy initialization mode)
             await reranker_instance.ensure_initialized()
+            # Pre-filter candidates to reduce reranking cost (RRF already provides good ranking)
+            # This is especially important for remote rerankers with network latency
+            reranker_max_candidates = get_config().reranker_max_candidates
+            pre_filtered_count = 0
+            if len(merged_candidates) > reranker_max_candidates:
+                # Sort by RRF score and take top candidates
+                merged_candidates.sort(key=lambda mc: mc.rrf_score, reverse=True)
+                pre_filtered_count = len(merged_candidates) - reranker_max_candidates
+                merged_candidates = merged_candidates[:reranker_max_candidates]
             # Rerank using cross-encoder
-            scored_results = reranker_instance.rerank(query, merged_candidates)
+            scored_results = await reranker_instance.rerank(query, merged_candidates)
             step_duration = time.time() - step_start
-            log_buffer.append(f"  [4] Reranking: {len(scored_results)} candidates scored in {step_duration:.3f}s")
+            pre_filter_note = f" (pre-filtered {pre_filtered_count})" if pre_filtered_count > 0 else ""
+            log_buffer.append(
+                f"  [4] Reranking: {len(scored_results)} candidates scored in {step_duration:.3f}s{pre_filter_note}"
+            )
             # Step 4.5: Combine cross-encoder score with retrieval signals
             # This preserves retrieval work (RRF, temporal, recency) instead of pure cross-encoder ranking
@@ -1786,7 +2172,6 @@ class MemoryEngine(MemoryEngineInterface):
                         text=sr.retrieval.text,
                         context=sr.retrieval.context or "",
                         event_date=sr.retrieval.occurred_start,
-                        access_count=sr.retrieval.access_count,
                         is_entry_point=(sr.id in [ep.node_id for ep in tracer.entry_points]),
                         parent_node_id=None,  # In parallel retrieval, there's no clear parent
                         link_type=None,
@@ -1798,12 +2183,6 @@ class MemoryEngine(MemoryEngineInterface):
                         final_weight=sr.weight,
                     )
-            # Step 8: Queue access count updates for visited nodes
-            visited_ids = list(set([sr.id for sr in scored_results[:50]]))  # Top 50
-            if visited_ids:
-                await self._task_backend.submit_task({"type": "access_count_update", "node_ids": visited_ids})
-                log_buffer.append(f"  [7] Queued access count updates for {len(visited_ids)} nodes")
             # Log fact_type distribution in results
             fact_type_counts = {}
             for sr in top_scored:
@@ -1878,6 +2257,7 @@ class MemoryEngine(MemoryEngineInterface):
                         mentioned_at=result_dict.get("mentioned_at"),
                         document_id=result_dict.get("document_id"),
                         chunk_id=result_dict.get("chunk_id"),
+                        tags=result_dict.get("tags"),
                     )
                 )
@@ -1902,35 +2282,15 @@ class MemoryEngine(MemoryEngineInterface):
                                 entities_ordered.append((entity_id, entity_name))
                                 seen_entity_ids.add(entity_id)
-                # Fetch observations for each entity (respect token budget, in order)
+                # Return entities with empty observations (summaries now live in mental models)
                 entities_dict = {}
-                encoding = _get_tiktoken_encoding()
                 for entity_id, entity_name in entities_ordered:
-                    if total_entity_tokens >= max_entity_tokens:
-                        break
-                    observations = await self.get_entity_observations(
-                        bank_id, entity_id, limit=5, request_context=request_context
+                    entities_dict[entity_name] = EntityState(
+                        entity_id=entity_id,
+                        canonical_name=entity_name,
+                        observations=[],  # Mental models provide this now
                     )
-                    # Calculate tokens for this entity's observations
-                    entity_tokens = 0
-                    included_observations = []
-                    for obs in observations:
-                        obs_tokens = len(encoding.encode(obs.text))
-                        if total_entity_tokens + entity_tokens + obs_tokens <= max_entity_tokens:
-                            included_observations.append(obs)
-                            entity_tokens += obs_tokens
-                        else:
-                            break
-                    if included_observations:
-                        entities_dict[entity_name] = EntityState(
-                            entity_id=entity_id, canonical_name=entity_name, observations=included_observations
-                        )
-                        total_entity_tokens += entity_tokens
             # Fetch chunks if requested
             chunks_dict = None
             if include_chunks and top_scored:
@@ -2002,16 +2362,25 @@ class MemoryEngine(MemoryEngineInterface):
             total_time = time.time() - recall_start
             num_chunks = len(chunks_dict) if chunks_dict else 0
             num_entities = len(entities_dict) if entities_dict else 0
+            # Include wait times in log if significant
+            wait_parts = []
+            if semaphore_wait > 0.01:
+                wait_parts.append(f"sem={semaphore_wait:.3f}s")
+            if max_conn_wait > 0.01:
+                wait_parts.append(f"conn={max_conn_wait:.3f}s")
+            wait_info = f" | waits: {', '.join(wait_parts)}" if wait_parts else ""
             log_buffer.append(
-                f"[RECALL {recall_id}] Complete: {len(top_scored)} facts ({total_tokens} tok), {num_chunks} chunks ({total_chunk_tokens} tok), {num_entities} entities ({total_entity_tokens} tok) | {fact_type_summary} | {total_time:.3f}s"
+                f"[RECALL {recall_id}] Complete: {len(top_scored)} facts ({total_tokens} tok), {num_chunks} chunks ({total_chunk_tokens} tok), {num_entities} entities ({total_entity_tokens} tok) | {fact_type_summary} | {total_time:.3f}s{wait_info}"
             )
-            logger.info("\n" + "\n".join(log_buffer))
+            if not quiet:
+                logger.info("\n" + "\n".join(log_buffer))
             return RecallResultModel(results=memory_facts, trace=trace_dict, entities=entities_dict, chunks=chunks_dict)
         except Exception as e:
             log_buffer.append(f"[RECALL {recall_id}] ERROR after {time.time() - recall_start:.3f}s: {str(e)}")
-            logger.error("\n" + "\n".join(log_buffer))
+            if not quiet:
+                logger.error("\n" + "\n".join(log_buffer))
             raise Exception(f"Failed to search memories: {str(e)}")
     def _filter_by_token_budget(
@@ -2073,11 +2442,11 @@ class MemoryEngine(MemoryEngineInterface):
             doc = await conn.fetchrow(
                 f"""
                 SELECT d.id, d.bank_id, d.original_text, d.content_hash,
-                       d.created_at, d.updated_at, COUNT(mu.id) as unit_count
+                       d.created_at, d.updated_at, d.tags, COUNT(mu.id) as unit_count
                 FROM {fq_table("documents")} d
                 LEFT JOIN {fq_table("memory_units")} mu ON mu.document_id = d.id
                 WHERE d.id = $1 AND d.bank_id = $2
-                GROUP BY d.id, d.bank_id, d.original_text, d.content_hash, d.created_at, d.updated_at
+                GROUP BY d.id, d.bank_id, d.original_text, d.content_hash, d.created_at, d.updated_at, d.tags
                 """,
                 document_id,
                 bank_id,
@@ -2094,6 +2463,7 @@ class MemoryEngine(MemoryEngineInterface):
                 "memory_unit_count": doc["unit_count"],
                 "created_at": doc["created_at"].isoformat() if doc["created_at"] else None,
                 "updated_at": doc["updated_at"].isoformat() if doc["updated_at"] else None,
+                "tags": list(doc["tags"]) if doc["tags"] else [],
             }
     async def delete_document(
@@ -2118,10 +2488,12 @@ class MemoryEngine(MemoryEngineInterface):
         pool = await self._get_pool()
         async with acquire_with_retry(pool) as conn:
             async with conn.transaction():
-                # Count units before deletion
-                units_count = await conn.fetchval(
-                    f"SELECT COUNT(*) FROM {fq_table('memory_units')} WHERE document_id = $1", document_id
+                # Get memory unit IDs before deletion (for mental model invalidation)
+                unit_rows = await conn.fetch(
+                    f"SELECT id FROM {fq_table('memory_units')} WHERE document_id = $1", document_id
                 )
+                unit_ids = [str(row["id"]) for row in unit_rows]
+                units_count = len(unit_ids)
                 # Delete document (cascades to memory_units and all their links)
                 deleted = await conn.fetchval(
@@ -2130,6 +2502,10 @@ class MemoryEngine(MemoryEngineInterface):
                     bank_id,
                 )
+                # Invalidate deleted fact IDs from mental models
+                if deleted and unit_ids:
+                    await self._invalidate_facts_from_mental_models(conn, bank_id, unit_ids)
                 return {"document_deleted": 1 if deleted else 0, "memory_units_deleted": units_count if deleted else 0}
     async def delete_memory_unit(
@@ -2157,11 +2533,18 @@ class MemoryEngine(MemoryEngineInterface):
         pool = await self._get_pool()
         async with acquire_with_retry(pool) as conn:
             async with conn.transaction():
+                # Get bank_id before deletion (for mental model invalidation)
+                bank_id = await conn.fetchval(f"SELECT bank_id FROM {fq_table('memory_units')} WHERE id = $1", unit_id)
                 # Delete the memory unit (cascades to links and associations)
                 deleted = await conn.fetchval(
                     f"DELETE FROM {fq_table('memory_units')} WHERE id = $1 RETURNING id", unit_id
                 )
+                # Invalidate deleted fact ID from mental models
+                if deleted and bank_id:
+                    await self._invalidate_facts_from_mental_models(conn, bank_id, [str(deleted)])
                 return {
                     "success": deleted is not None,
                     "unit_id": str(deleted) if deleted else None,
@@ -2253,11 +2636,85 @@ class MemoryEngine(MemoryEngineInterface):
                 except Exception as e:
                     raise Exception(f"Failed to delete agent data: {str(e)}")
+    async def clear_observations(
+        self,
+        bank_id: str,
+        *,
+        request_context: "RequestContext",
+    ) -> dict[str, int]:
+        """
+        Clear all observations for a bank (consolidated knowledge).
+        Args:
+            bank_id: Bank ID to clear observations for
+            request_context: Request context for authentication.
+        Returns:
+            Dictionary with count of deleted observations
+        """
+        await self._authenticate_tenant(request_context)
+        pool = await self._get_pool()
+        async with acquire_with_retry(pool) as conn:
+            async with conn.transaction():
+                # Count observations before deletion
+                count = await conn.fetchval(
+                    f"SELECT COUNT(*) FROM {fq_table('memory_units')} WHERE bank_id = $1 AND fact_type = 'observation'",
+                    bank_id,
+                )
+                # Delete all observations
+                await conn.execute(
+                    f"DELETE FROM {fq_table('memory_units')} WHERE bank_id = $1 AND fact_type = 'observation'",
+                    bank_id,
+                )
+                # Reset consolidation timestamp
+                await conn.execute(
+                    f"UPDATE {fq_table('banks')} SET last_consolidated_at = NULL WHERE bank_id = $1",
+                    bank_id,
+                )
+                return {"deleted_count": count or 0}
+    async def run_consolidation(
+        self,
+        bank_id: str,
+        *,
+        request_context: "RequestContext",
+    ) -> dict[str, int]:
+        """
+        Run memory consolidation to create/update mental models.
+        Args:
+            bank_id: Bank ID to run consolidation for
+            request_context: Request context for authentication.
+        Returns:
+            Dictionary with consolidation stats
+        """
+        await self._authenticate_tenant(request_context)
+        from .consolidation import run_consolidation_job
+        result = await run_consolidation_job(
+            memory_engine=self,
+            bank_id=bank_id,
+            request_context=request_context,
+        )
+        return {
+            "processed": result.get("processed", 0),
+            "created": result.get("created", 0),
+            "updated": result.get("updated", 0),
+            "skipped": result.get("skipped", 0),
+        }
     async def get_graph_data(
         self,
         bank_id: str | None = None,
         fact_type: str | None = None,
         *,
+        limit: int = 1000,
         request_context: "RequestContext",
     ):
         """
@@ -2266,10 +2723,11 @@ class MemoryEngine(MemoryEngineInterface):
         Args:
             bank_id: Filter by bank ID
             fact_type: Filter by fact type (world, experience, opinion)
+            limit: Maximum number of items to return (default: 1000)
             request_context: Request context for authentication.
         Returns:
-            Dict with nodes, edges, and table_rows
+            Dict with nodes, edges, table_rows, total_units, and limit
         """
         await self._authenticate_tenant(request_context)
         pool = await self._get_pool()
@@ -2291,21 +2749,46 @@ class MemoryEngine(MemoryEngineInterface):
             where_clause = "WHERE " + " AND ".join(query_conditions) if query_conditions else ""
+            # Get total count first
+            total_count_result = await conn.fetchrow(
+                f"""
+                SELECT COUNT(*) as total
+                FROM {fq_table("memory_units")}
+                {where_clause}
+            """,
+                *query_params,
+            )
+            total_count = total_count_result["total"] if total_count_result else 0
+            # Get units with limit
+            param_count += 1
             units = await conn.fetch(
                 f"""
-                SELECT id, text, event_date, context, occurred_start, occurred_end, mentioned_at, document_id, chunk_id, fact_type
+                SELECT id, text, event_date, context, occurred_start, occurred_end, mentioned_at, document_id, chunk_id, fact_type, tags, created_at, proof_count, source_memory_ids
                 FROM {fq_table("memory_units")}
                 {where_clause}
                 ORDER BY mentioned_at DESC NULLS LAST, event_date DESC
-                LIMIT 1000
+                LIMIT ${param_count}
             """,
                 *query_params,
+                limit,
             )
             # Get links, filtering to only include links between units of the selected agent
             # Use DISTINCT ON with LEAST/GREATEST to deduplicate bidirectional links
             unit_ids = [row["id"] for row in units]
-            if unit_ids:
+            unit_id_set = set(unit_ids)
+            # Collect source memory IDs from observations
+            source_memory_ids = []
+            for unit in units:
+                if unit["source_memory_ids"]:
+                    source_memory_ids.extend(unit["source_memory_ids"])
+            source_memory_ids = list(set(source_memory_ids))  # Deduplicate
+            # Fetch links involving both visible units AND source memories
+            all_relevant_ids = unit_ids + source_memory_ids
+            if all_relevant_ids:
                 links = await conn.fetch(
                     f"""
                     SELECT DISTINCT ON (LEAST(ml.from_unit_id, ml.to_unit_id), GREATEST(ml.from_unit_id, ml.to_unit_id), ml.link_type, COALESCE(ml.entity_id, '00000000-0000-0000-0000-000000000000'::uuid))
@@ -2316,14 +2799,69 @@ class MemoryEngine(MemoryEngineInterface):
                         e.canonical_name as entity_name
                     FROM {fq_table("memory_links")} ml
                     LEFT JOIN {fq_table("entities")} e ON ml.entity_id = e.id
-                    WHERE ml.from_unit_id = ANY($1::uuid[]) AND ml.to_unit_id = ANY($1::uuid[])
+                    WHERE ml.from_unit_id = ANY($1::uuid[]) OR ml.to_unit_id = ANY($1::uuid[])
                     ORDER BY LEAST(ml.from_unit_id, ml.to_unit_id), GREATEST(ml.from_unit_id, ml.to_unit_id), ml.link_type, COALESCE(ml.entity_id, '00000000-0000-0000-0000-000000000000'::uuid), ml.weight DESC
                 """,
-                    unit_ids,
+                    all_relevant_ids,
                 )
             else:
                 links = []
+            # Copy links from source memories to observations
+            # Observations inherit links from their source memories via source_memory_ids
+            # Build a map from source_id to observation_ids
+            source_to_observations = {}
+            for unit in units:
+                if unit["source_memory_ids"]:
+                    for source_id in unit["source_memory_ids"]:
+                        if source_id not in source_to_observations:
+                            source_to_observations[source_id] = []
+                        source_to_observations[source_id].append(unit["id"])
+            copied_links = []
+            for link in links:
+                from_id = link["from_unit_id"]
+                to_id = link["to_unit_id"]
+                # Get observations that should inherit this link
+                from_observations = source_to_observations.get(from_id, [])
+                to_observations = source_to_observations.get(to_id, [])
+                # If from_id is a source memory, copy links to its observations
+                if from_observations:
+                    for obs_id in from_observations:
+                        # Only include if the target is visible
+                        if to_id in unit_id_set or to_observations:
+                            target = to_observations[0] if to_observations and to_id not in unit_id_set else to_id
+                            if target in unit_id_set:
+                                copied_links.append(
+                                    {
+                                        "from_unit_id": obs_id,
+                                        "to_unit_id": target,
+                                        "link_type": link["link_type"],
+                                        "weight": link["weight"],
+                                        "entity_name": link["entity_name"],
+                                    }
+                                )
+                # If to_id is a source memory, copy links to its observations
+                if to_observations and from_id in unit_id_set:
+                    for obs_id in to_observations:
+                        copied_links.append(
+                            {
+                                "from_unit_id": from_id,
+                                "to_unit_id": obs_id,
+                                "link_type": link["link_type"],
+                                "weight": link["weight"],
+                                "entity_name": link["entity_name"],
+                            }
+                        )
+            # Keep only direct links between visible nodes
+            direct_links = [
+                link for link in links if link["from_unit_id"] in unit_id_set and link["to_unit_id"] in unit_id_set
+            ]
             # Get entity information
             unit_entities = await conn.fetch(f"""
                 SELECT ue.unit_id, e.canonical_name
@@ -2341,6 +2879,18 @@ class MemoryEngine(MemoryEngineInterface):
                 entity_map[unit_id] = []
             entity_map[unit_id].append(entity_name)
+        # For observations, inherit entities from source memories
+        for unit in units:
+            if unit["source_memory_ids"] and unit["id"] not in entity_map:
+                # Collect entities from all source memories
+                source_entities = []
+                for source_id in unit["source_memory_ids"]:
+                    if source_id in entity_map:
+                        source_entities.extend(entity_map[source_id])
+                if source_entities:
+                    # Deduplicate while preserving order
+                    entity_map[unit["id"]] = list(dict.fromkeys(source_entities))
         # Build nodes
         nodes = []
         for row in units:
@@ -2374,14 +2924,15 @@ class MemoryEngine(MemoryEngineInterface):
                 }
             )
-        # Build edges
+        # Build edges (combine direct links and copied links from sources)
         edges = []
-        for row in links:
+        all_links = direct_links + copied_links
+        for row in all_links:
             from_id = str(row["from_unit_id"])
             to_id = str(row["to_unit_id"])
             link_type = row["link_type"]
             weight = row["weight"]
-            entity_name = row["entity_name"]
+            entity_name = row.get("entity_name")
             # Color by link type
             if link_type == "temporal":
@@ -2433,10 +2984,13 @@ class MemoryEngine(MemoryEngineInterface):
                     "document_id": row["document_id"],
                     "chunk_id": row["chunk_id"] if row["chunk_id"] else None,
                     "fact_type": row["fact_type"],
+                    "tags": list(row["tags"]) if row["tags"] else [],
+                    "created_at": row["created_at"].isoformat() if row["created_at"] else None,
+                    "proof_count": row["proof_count"] if row["proof_count"] else None,
                 }
             )
-        return {"nodes": nodes, "edges": edges, "table_rows": table_rows, "total_units": len(units)}
+        return {"nodes": nodes, "edges": edges, "table_rows": table_rows, "total_units": total_count, "limit": limit}
     async def list_memory_units(
         self,
@@ -2565,6 +3119,97 @@ class MemoryEngine(MemoryEngineInterface):
             return {"items": items, "total": total, "limit": limit, "offset": offset}
+    async def get_memory_unit(
+        self,
+        bank_id: str,
+        memory_id: str,
+        request_context: "RequestContext",
+    ):
+        """
+        Get a single memory unit by ID.
+        Args:
+            bank_id: Bank ID
+            memory_id: Memory unit ID
+            request_context: Request context for authentication.
+        Returns:
+            Dict with memory unit data or None if not found
+        """
+        await self._authenticate_tenant(request_context)
+        pool = await self._get_pool()
+        async with acquire_with_retry(pool) as conn:
+            # Get the memory unit (include source_memory_ids for mental models)
+            row = await conn.fetchrow(
+                f"""
+                SELECT id, text, context, event_date, occurred_start, occurred_end,
+                       mentioned_at, fact_type, document_id, chunk_id, tags, source_memory_ids
+                FROM {fq_table("memory_units")}
+                WHERE id = $1 AND bank_id = $2
+                """,
+                memory_id,
+                bank_id,
+            )
+            if not row:
+                return None
+            # Get entity information
+            entities_rows = await conn.fetch(
+                f"""
+                SELECT e.canonical_name
+                FROM {fq_table("unit_entities")} ue
+                JOIN {fq_table("entities")} e ON ue.entity_id = e.id
+                WHERE ue.unit_id = $1
+                """,
+                row["id"],
+            )
+            entities = [r["canonical_name"] for r in entities_rows]
+            result = {
+                "id": str(row["id"]),
+                "text": row["text"],
+                "context": row["context"] if row["context"] else "",
+                "date": row["event_date"].isoformat() if row["event_date"] else "",
+                "type": row["fact_type"],
+                "mentioned_at": row["mentioned_at"].isoformat() if row["mentioned_at"] else None,
+                "occurred_start": row["occurred_start"].isoformat() if row["occurred_start"] else None,
+                "occurred_end": row["occurred_end"].isoformat() if row["occurred_end"] else None,
+                "entities": entities,
+                "document_id": row["document_id"] if row["document_id"] else None,
+                "chunk_id": str(row["chunk_id"]) if row["chunk_id"] else None,
+                "tags": row["tags"] if row["tags"] else [],
+            }
+            # For observations, include source_memory_ids and fetch source_memories
+            if row["fact_type"] == "observation" and row["source_memory_ids"]:
+                source_ids = row["source_memory_ids"]
+                result["source_memory_ids"] = [str(sid) for sid in source_ids]
+                # Fetch source memories
+                source_rows = await conn.fetch(
+                    f"""
+                    SELECT id, text, fact_type, context, occurred_start, mentioned_at
+                    FROM {fq_table("memory_units")}
+                    WHERE id = ANY($1::uuid[])
+                    ORDER BY mentioned_at DESC NULLS LAST
+                    """,
+                    source_ids,
+                )
+                result["source_memories"] = [
+                    {
+                        "id": str(r["id"]),
+                        "text": r["text"],
+                        "type": r["fact_type"],
+                        "context": r["context"],
+                        "occurred_start": r["occurred_start"].isoformat() if r["occurred_start"] else None,
+                        "mentioned_at": r["mentioned_at"].isoformat() if r["mentioned_at"] else None,
+                    }
+                    for r in source_rows
+                ]
+            return result
     async def list_documents(
         self,
         bank_id: str,
@@ -2741,264 +3386,24 @@ class MemoryEngine(MemoryEngineInterface):
                 "created_at": chunk["created_at"].isoformat() if chunk["created_at"] else "",
             }
-    async def _evaluate_opinion_update_async(
+    # ==================== bank profile Methods ====================
+    async def get_bank_profile(
         self,
-        opinion_text: str,
-        opinion_confidence: float,
-        new_event_text: str,
-        entity_name: str,
-    ) -> dict[str, Any] | None:
+        bank_id: str,
+        *,
+        request_context: "RequestContext",
+    ) -> dict[str, Any]:
         """
-        Evaluate if an opinion should be updated based on a new event.
-        Args:
-            opinion_text: Current opinion text (includes reasons)
-            opinion_confidence: Current confidence score (0.0-1.0)
-            new_event_text: Text of the new event
-            entity_name: Name of the entity this opinion is about
-        Returns:
-            Dict with 'action' ('keep'|'update'), 'new_confidence', 'new_text' (if action=='update')
-            or None if no changes needed
-        """
-        class OpinionEvaluation(BaseModel):
-            """Evaluation of whether an opinion should be updated."""
-            action: str = Field(description="Action to take: 'keep' (no change) or 'update' (modify opinion)")
-            reasoning: str = Field(description="Brief explanation of why this action was chosen")
-            new_confidence: float = Field(
-                description="New confidence score (0.0-1.0). Can be higher, lower, or same as before."
-            )
-            new_opinion_text: str | None = Field(
-                default=None,
-                description="If action is 'update', the revised opinion text that acknowledges the previous view. Otherwise None.",
-            )
-        evaluation_prompt = f"""You are evaluating whether an existing opinion should be updated based on new information.
-ENTITY: {entity_name}
-EXISTING OPINION:
-{opinion_text}
-Current confidence: {opinion_confidence:.2f}
-NEW EVENT:
-{new_event_text}
-Evaluate whether this new event:
-1. REINFORCES the opinion (increase confidence, keep text)
-2. WEAKENS the opinion (decrease confidence, keep text)
-3. CHANGES the opinion (update both text and confidence, noting "Previously I thought X, but now Y...")
-4. IRRELEVANT (keep everything as is)
-Guidelines:
-- Only suggest 'update' action if the new event genuinely contradicts or significantly modifies the opinion
-- If updating the text, acknowledge the previous opinion and explain the change
-- Confidence should reflect accumulated evidence (0.0 = no confidence, 1.0 = very confident)
-- Small changes in confidence are normal; large jumps should be rare"""
-        try:
-            result = await self._llm_config.call(
-                messages=[
-                    {"role": "system", "content": "You evaluate and update opinions based on new information."},
-                    {"role": "user", "content": evaluation_prompt},
-                ],
-                response_format=OpinionEvaluation,
-                scope="memory_evaluate_opinion",
-                temperature=0.3,  # Lower temperature for more consistent evaluation
-            )
-            # Only return updates if something actually changed
-            if result.action == "keep" and abs(result.new_confidence - opinion_confidence) < 0.01:
-                return None
-            return {
-                "action": result.action,
-                "reasoning": result.reasoning,
-                "new_confidence": result.new_confidence,
-                "new_text": result.new_opinion_text if result.action == "update" else None,
-            }
-        except Exception as e:
-            logger.warning(f"Failed to evaluate opinion update: {str(e)}")
-            return None
-    async def _handle_form_opinion(self, task_dict: dict[str, Any]):
-        """
-        Handler for form opinion tasks.
-        Args:
-            task_dict: Dict with keys: 'bank_id', 'answer_text', 'query', 'tenant_id'
-        """
-        bank_id = task_dict["bank_id"]
-        answer_text = task_dict["answer_text"]
-        query = task_dict["query"]
-        tenant_id = task_dict.get("tenant_id")
-        await self._extract_and_store_opinions_async(
-            bank_id=bank_id, answer_text=answer_text, query=query, tenant_id=tenant_id
-        )
-    async def _handle_reinforce_opinion(self, task_dict: dict[str, Any]):
-        """
-        Handler for reinforce opinion tasks.
-        Args:
-            task_dict: Dict with keys: 'bank_id', 'created_unit_ids', 'unit_texts', 'unit_entities'
-        """
-        bank_id = task_dict["bank_id"]
-        created_unit_ids = task_dict["created_unit_ids"]
-        unit_texts = task_dict["unit_texts"]
-        unit_entities = task_dict["unit_entities"]
-        await self._reinforce_opinions_async(
-            bank_id=bank_id, created_unit_ids=created_unit_ids, unit_texts=unit_texts, unit_entities=unit_entities
-        )
-    async def _reinforce_opinions_async(
-        self,
-        bank_id: str,
-        created_unit_ids: list[str],
-        unit_texts: list[str],
-        unit_entities: list[list[dict[str, str]]],
-    ):
-        """
-        Background task to reinforce opinions based on newly ingested events.
-        This runs asynchronously and does not block the put operation.
-        Args:
-            bank_id: bank ID
-            created_unit_ids: List of newly created memory unit IDs
-            unit_texts: Texts of the newly created units
-            unit_entities: Entities extracted from each unit
-        """
-        try:
-            # Extract all unique entity names from the new units
-            entity_names = set()
-            for entities_list in unit_entities:
-                for entity in entities_list:
-                    # Handle both Entity objects and dicts
-                    if hasattr(entity, "text"):
-                        entity_names.add(entity.text)
-                    elif isinstance(entity, dict):
-                        entity_names.add(entity["text"])
-            if not entity_names:
-                return
-            pool = await self._get_pool()
-            async with acquire_with_retry(pool) as conn:
-                # Find all opinions related to these entities
-                opinions = await conn.fetch(
-                    f"""
-                    SELECT DISTINCT mu.id, mu.text, mu.confidence_score, e.canonical_name
-                    FROM {fq_table("memory_units")} mu
-                    JOIN {fq_table("unit_entities")} ue ON mu.id = ue.unit_id
-                    JOIN {fq_table("entities")} e ON ue.entity_id = e.id
-                    WHERE mu.bank_id = $1
-                      AND mu.fact_type = 'opinion'
-                      AND e.canonical_name = ANY($2::text[])
-                    """,
-                    bank_id,
-                    list(entity_names),
-                )
-                if not opinions:
-                    return
-                # Use cached LLM config
-                if self._llm_config is None:
-                    logger.error("[REINFORCE] LLM config not available, skipping opinion reinforcement")
-                    return
-                # Evaluate each opinion against the new events
-                updates_to_apply = []
-                for opinion in opinions:
-                    opinion_id = str(opinion["id"])
-                    opinion_text = opinion["text"]
-                    opinion_confidence = opinion["confidence_score"]
-                    entity_name = opinion["canonical_name"]
-                    # Find all new events mentioning this entity
-                    relevant_events = []
-                    for unit_text, entities_list in zip(unit_texts, unit_entities):
-                        if any(e["text"] == entity_name for e in entities_list):
-                            relevant_events.append(unit_text)
-                    if not relevant_events:
-                        continue
-                    # Combine all relevant events
-                    combined_events = "\n".join(relevant_events)
-                    # Evaluate if opinion should be updated
-                    evaluation = await self._evaluate_opinion_update_async(
-                        opinion_text, opinion_confidence, combined_events, entity_name
-                    )
-                    if evaluation:
-                        updates_to_apply.append({"opinion_id": opinion_id, "evaluation": evaluation})
-                # Apply all updates in a single transaction
-                if updates_to_apply:
-                    async with conn.transaction():
-                        for update in updates_to_apply:
-                            opinion_id = update["opinion_id"]
-                            evaluation = update["evaluation"]
-                            if evaluation["action"] == "update" and evaluation["new_text"]:
-                                # Update both text and confidence
-                                await conn.execute(
-                                    f"""
-                                    UPDATE {fq_table("memory_units")}
-                                    SET text = $1, confidence_score = $2, updated_at = NOW()
-                                    WHERE id = $3
-                                    """,
-                                    evaluation["new_text"],
-                                    evaluation["new_confidence"],
-                                    uuid.UUID(opinion_id),
-                                )
-                            else:
-                                # Only update confidence
-                                await conn.execute(
-                                    f"""
-                                    UPDATE {fq_table("memory_units")}
-                                    SET confidence_score = $1, updated_at = NOW()
-                                    WHERE id = $2
-                                    """,
-                                    evaluation["new_confidence"],
-                                    uuid.UUID(opinion_id),
-                                )
-                else:
-                    pass  # No opinions to update
-        except Exception as e:
-            logger.error(f"[REINFORCE] Error during opinion reinforcement: {str(e)}")
-            import traceback
-            traceback.print_exc()
-    # ==================== bank profile Methods ====================
-    async def get_bank_profile(
-        self,
-        bank_id: str,
-        *,
-        request_context: "RequestContext",
-    ) -> dict[str, Any]:
-        """
-        Get bank profile (name, disposition + background).
-        Auto-creates agent with default values if not exists.
+        Get bank profile (name, disposition + mission).
+        Auto-creates agent with default values if not exists.
         Args:
             bank_id: bank IDentifier
             request_context: Request context for authentication.
         Returns:
-            Dict with name, disposition traits, and background
+            Dict with name, disposition traits, and mission
         """
         await self._authenticate_tenant(request_context)
         pool = await self._get_pool()
@@ -3008,7 +3413,7 @@ Guidelines:
             "bank_id": bank_id,
             "name": profile["name"],
             "disposition": disposition,
-            "background": profile["background"],
+            "mission": profile["mission"],
         }
     async def update_bank_disposition(
@@ -3030,31 +3435,51 @@ Guidelines:
         pool = await self._get_pool()
         await bank_utils.update_bank_disposition(pool, bank_id, disposition)
-    async def merge_bank_background(
+    async def set_bank_mission(
+        self,
+        bank_id: str,
+        mission: str,
+        *,
+        request_context: "RequestContext",
+    ) -> dict[str, Any]:
+        """
+        Set the mission for a bank.
+        Args:
+            bank_id: bank IDentifier
+            mission: The mission text
+            request_context: Request context for authentication.
+        Returns:
+            Dict with bank_id and mission.
+        """
+        await self._authenticate_tenant(request_context)
+        pool = await self._get_pool()
+        await bank_utils.set_bank_mission(pool, bank_id, mission)
+        return {"bank_id": bank_id, "mission": mission}
+    async def merge_bank_mission(
         self,
         bank_id: str,
         new_info: str,
         *,
-        update_disposition: bool = True,
         request_context: "RequestContext",
     ) -> dict[str, Any]:
         """
-        Merge new background information with existing background using LLM.
+        Merge new mission information with existing mission using LLM.
         Normalizes to first person ("I") and resolves conflicts.
-        Optionally infers disposition traits from the merged background.
         Args:
             bank_id: bank IDentifier
-            new_info: New background information to add/merge
-            update_disposition: If True, infer Big Five traits from background (default: True)
+            new_info: New mission information to add/merge
             request_context: Request context for authentication.
         Returns:
-            Dict with 'background' (str) and optionally 'disposition' (dict) keys
+            Dict with 'mission' (str) key
         """
         await self._authenticate_tenant(request_context)
         pool = await self._get_pool()
-        return await bank_utils.merge_bank_background(pool, self._llm_config, bank_id, new_info, update_disposition)
+        return await bank_utils.merge_bank_mission(pool, self._reflect_llm_config, bank_id, new_info)
     async def list_banks(
         self,
@@ -3068,7 +3493,7 @@ Guidelines:
             request_context: Request context for authentication.
         Returns:
-            List of dicts with bank_id, name, disposition, background, created_at, updated_at
+            List of dicts with bank_id, name, disposition, mission, created_at, updated_at
         """
         await self._authenticate_tenant(request_context)
         pool = await self._get_pool()
@@ -3086,35 +3511,44 @@ Guidelines:
         max_tokens: int = 4096,
         response_schema: dict | None = None,
         request_context: "RequestContext",
+        tags: list[str] | None = None,
+        tags_match: TagsMatch = "any",
+        exclude_mental_model_ids: list[str] | None = None,
     ) -> ReflectResult:
         """
-        Reflect and formulate an answer using bank identity, world facts, and opinions.
+        Reflect and formulate an answer using an agentic loop with tools.
-        This method:
-        1. Retrieves experience (conversations and events)
-        2. Retrieves world facts (general knowledge)
-        3. Retrieves existing opinions (bank's formed perspectives)
-        4. Uses LLM to formulate an answer
-        5. Extracts and stores any new opinions formed during reflection
-        6. Optionally generates structured output based on response_schema
-        7. Returns plain text answer and the facts used
+        The reflect agent iteratively uses tools to:
+        1. lookup: Get mental models (synthesized knowledge)
+        2. recall: Search facts (semantic + temporal retrieval)
+        3. learn: Create/update mental models with new insights
+        4. expand: Get chunk/document context for memories
+        The agent starts with empty context and must call tools to gather
+        information. On the last iteration, tools are removed to force a
+        final text response.
         Args:
             bank_id: bank identifier
             query: Question to answer
-            budget: Budget level for memory exploration (low=100, mid=300, high=600 units)
-            context: Additional context string to include in LLM prompt (not used in recall)
-            response_schema: Optional JSON Schema for structured output
+            budget: Budget level (currently unused, reserved for future)
+            context: Additional context string to include in agent prompt
+            max_tokens: Max tokens (currently unused, reserved for future)
+            response_schema: Optional JSON Schema for structured output (not yet supported)
+            tags: Optional tags to filter memories
+            tags_match: How to match tags - "any" (OR), "all" (AND)
+            exclude_mental_model_ids: Optional list of mental model IDs to exclude from search
+                (used when refreshing a mental model to avoid circular reference)
         Returns:
             ReflectResult containing:
-                - text: Plain text answer (no markdown)
-                - based_on: Dict with 'world', 'experience', and 'opinion' fact lists (MemoryFact objects)
-                - new_opinions: List of newly formed opinions
-                - structured_output: Optional dict if response_schema was provided
+                - text: Plain text answer
+                - based_on: Empty dict (agent retrieves facts dynamically)
+                - new_opinions: Empty list
+                - structured_output: None (not yet supported for agentic reflect)
         """
         # Use cached LLM config
-        if self._llm_config is None:
+        if self._reflect_llm_config is None:
             raise ValueError("Memory LLM API key not set. Set HINDSIGHT_API_LLM_API_KEY environment variable.")
         # Authenticate tenant and set schema in context (for fq_table())
@@ -3135,121 +3569,312 @@ Guidelines:
         reflect_start = time.time()
         reflect_id = f"{bank_id[:8]}-{int(time.time() * 1000) % 100000}"
-        log_buffer = []
-        log_buffer.append(f"[REFLECT {reflect_id}] Query: '{query[:50]}...'")
+        tags_info = f", tags={tags} ({tags_match})" if tags else ""
+        logger.info(f"[REFLECT {reflect_id}] Starting agentic reflect for query: {query[:50]}...{tags_info}")
-        # Steps 1-3: Run multi-fact-type search (12-way retrieval: 4 methods × 3 fact types)
-        recall_start = time.time()
-        search_result = await self.recall_async(
-            bank_id=bank_id,
-            query=query,
-            budget=budget,
-            max_tokens=4096,
-            enable_trace=False,
-            fact_type=["experience", "world", "opinion"],
-            include_entities=True,
-            request_context=request_context,
-        )
-        recall_time = time.time() - recall_start
+        # Get bank profile for agent identity
+        profile = await self.get_bank_profile(bank_id, request_context=request_context)
+        # NOTE: Mental models are NOT pre-loaded to keep the initial prompt small.
+        # The agent can call lookup() to list available models if needed.
+        # This is critical for banks with many mental models to avoid huge prompts.
+        # Compute max iterations based on budget
+        config = get_config()
+        base_max_iterations = config.reflect_max_iterations
+        # Budget multipliers: low=0.5x, mid=1x, high=2x
+        budget_multipliers = {Budget.LOW: 0.5, Budget.MID: 1.0, Budget.HIGH: 2.0}
+        effective_budget = budget or Budget.LOW
+        max_iterations = max(1, int(base_max_iterations * budget_multipliers.get(effective_budget, 1.0)))
+        # Run agentic loop - acquire connections only when needed for DB operations
+        # (not held during LLM calls which can be slow)
+        pool = await self._get_pool()
-        all_results = search_result.results
+        # Get bank stats for freshness info
+        bank_stats = await self.get_bank_stats(bank_id, request_context=request_context)
+        last_consolidated_at = bank_stats.last_consolidated_at if hasattr(bank_stats, "last_consolidated_at") else None
+        pending_consolidation = bank_stats.pending_consolidation if hasattr(bank_stats, "pending_consolidation") else 0
-        # Split results by fact type for structured response
-        agent_results = [r for r in all_results if r.fact_type == "experience"]
-        world_results = [r for r in all_results if r.fact_type == "world"]
-        opinion_results = [r for r in all_results if r.fact_type == "opinion"]
+        # Create tool callbacks that acquire connections only when needed
+        from .retain import embedding_utils
-        log_buffer.append(
-            f"[REFLECT {reflect_id}] Recall: {len(all_results)} facts (experience={len(agent_results)}, world={len(world_results)}, opinion={len(opinion_results)}) in {recall_time:.3f}s"
+        async def search_mental_models_fn(q: str, max_results: int = 5) -> dict[str, Any]:
+            # Generate embedding for the query
+            embeddings = await embedding_utils.generate_embeddings_batch(self.embeddings, [q])
+            query_embedding = embeddings[0]
+            async with pool.acquire() as conn:
+                return await tool_search_mental_models(
+                    conn,
+                    bank_id,
+                    q,
+                    query_embedding,
+                    max_results=max_results,
+                    tags=tags,
+                    tags_match=tags_match,
+                    exclude_ids=exclude_mental_model_ids,
+                )
+        async def search_observations_fn(q: str, max_tokens: int = 5000) -> dict[str, Any]:
+            return await tool_search_observations(
+                self,
+                bank_id,
+                q,
+                request_context,
+                max_tokens=max_tokens,
+                tags=tags,
+                tags_match=tags_match,
+                last_consolidated_at=last_consolidated_at,
+                pending_consolidation=pending_consolidation,
+            )
+        async def recall_fn(q: str, max_tokens: int = 4096) -> dict[str, Any]:
+            return await tool_recall(
+                self, bank_id, q, request_context, max_tokens=max_tokens, tags=tags, tags_match=tags_match
+            )
+        async def expand_fn(memory_ids: list[str], depth: str) -> dict[str, Any]:
+            async with pool.acquire() as conn:
+                return await tool_expand(conn, bank_id, memory_ids, depth)
+        # Load directives from the dedicated directives table
+        # Directives are hard rules that must be followed in all responses
+        directives_raw = await self.list_directives(
+            bank_id=bank_id,
+            tags=tags,
+            tags_match=tags_match,
+            active_only=True,
+            request_context=request_context,
         )
+        # Convert directive format to the expected format for reflect agent
+        # The agent expects: name, description (optional), observations (list of {title, content})
+        directives = [
+            {
+                "name": d["name"],
+                "description": d["content"],  # Use content as description
+                "observations": [],  # Directives use content directly, not observations
+            }
+            for d in directives_raw
+        ]
+        if directives:
+            logger.info(f"[REFLECT {reflect_id}] Loaded {len(directives)} directives")
-        # Format facts for LLM
-        agent_facts_text = think_utils.format_facts_for_prompt(agent_results)
-        world_facts_text = think_utils.format_facts_for_prompt(world_results)
-        opinion_facts_text = think_utils.format_facts_for_prompt(opinion_results)
+        # Check if the bank has any mental models
+        async with pool.acquire() as conn:
+            mental_model_count = await conn.fetchval(
+                f"SELECT COUNT(*) FROM {fq_table('mental_models')} WHERE bank_id = $1",
+                bank_id,
+            )
+        has_mental_models = mental_model_count > 0
+        if has_mental_models:
+            logger.info(f"[REFLECT {reflect_id}] Bank has {mental_model_count} mental models")
-        # Get bank profile (name, disposition + background)
-        profile = await self.get_bank_profile(bank_id, request_context=request_context)
-        name = profile["name"]
-        disposition = profile["disposition"]  # Typed as DispositionTraits
-        background = profile["background"]
-        # Build the prompt
-        prompt = think_utils.build_think_prompt(
-            agent_facts_text=agent_facts_text,
-            world_facts_text=world_facts_text,
-            opinion_facts_text=opinion_facts_text,
+        # Run the agent
+        agent_result = await run_reflect_agent(
+            llm_config=self._reflect_llm_config,
+            bank_id=bank_id,
             query=query,
-            name=name,
-            disposition=disposition,
-            background=background,
+            bank_profile=profile,
+            search_mental_models_fn=search_mental_models_fn,
+            search_observations_fn=search_observations_fn,
+            recall_fn=recall_fn,
+            expand_fn=expand_fn,
             context=context,
+            max_iterations=max_iterations,
+            max_tokens=max_tokens,
+            response_schema=response_schema,
+            directives=directives,
+            has_mental_models=has_mental_models,
+            budget=effective_budget,
         )
-        log_buffer.append(f"[REFLECT {reflect_id}] Prompt: {len(prompt)} chars")
-        system_message = think_utils.get_system_message(disposition)
-        messages = [{"role": "system", "content": system_message}, {"role": "user", "content": prompt}]
-        # Prepare response_format if schema provided
-        response_format = None
-        if response_schema is not None:
-            # Wrapper class to provide Pydantic-like interface for raw JSON schemas
-            class JsonSchemaWrapper:
-                def __init__(self, schema: dict):
-                    self._schema = schema
-                def model_json_schema(self):
-                    return self._schema
-            response_format = JsonSchemaWrapper(response_schema)
-        llm_start = time.time()
-        result = await self._llm_config.call(
-            messages=messages,
-            scope="memory_reflect",
-            max_completion_tokens=max_tokens,
-            response_format=response_format,
-            skip_validation=True if response_format else False,
-            # Don't enforce strict_schema - not all providers support it and may retry forever
-            # Soft enforcement (schema in prompt + json_object mode) is sufficient
-            strict_schema=False,
+        total_time = time.time() - reflect_start
+        logger.info(
+            f"[REFLECT {reflect_id}] Complete: {len(agent_result.text)} chars, "
+            f"{agent_result.iterations} iterations, {agent_result.tools_called} tool calls | {total_time:.3f}s"
         )
-        llm_time = time.time() - llm_start
-        # Handle response based on whether structured output was requested
-        if response_schema is not None:
-            structured_output = result
-            answer_text = ""  # Empty for backward compatibility
-            log_buffer.append(f"[REFLECT {reflect_id}] Structured output generated")
-        else:
-            structured_output = None
-            answer_text = result.strip()
+        # Convert agent tool trace to ToolCallTrace objects
+        tool_trace_result = [
+            ToolCallTrace(
+                tool=tc.tool,
+                reason=tc.reason,
+                input=tc.input,
+                output=tc.output,
+                duration_ms=tc.duration_ms,
+                iteration=tc.iteration,
+            )
+            for tc in agent_result.tool_trace
+        ]
-        # Submit form_opinion task for background processing
-        # Pass tenant_id from request context for internal authentication in background task
-        await self._task_backend.submit_task(
-            {
-                "type": "form_opinion",
-                "bank_id": bank_id,
-                "answer_text": answer_text,
-                "query": query,
-                "tenant_id": getattr(request_context, "tenant_id", None) if request_context else None,
-            }
-        )
+        # Convert agent LLM trace to LLMCallTrace objects
+        llm_trace_result = [LLMCallTrace(scope=lc.scope, duration_ms=lc.duration_ms) for lc in agent_result.llm_trace]
+        # Extract memories from recall tool outputs - only include memories the agent actually used
+        # agent_result.used_memory_ids contains validated IDs from the done action
+        used_memory_ids_set = set(agent_result.used_memory_ids) if agent_result.used_memory_ids else set()
+        based_on: dict[str, list[MemoryFact]] = {"world": [], "experience": [], "opinion": [], "observation": []}
+        seen_memory_ids: set[str] = set()
+        for tc in agent_result.tool_trace:
+            if tc.tool == "recall" and "memories" in tc.output:
+                for memory_data in tc.output["memories"]:
+                    memory_id = memory_data.get("id")
+                    # Only include memories that the agent declared as used (or all if none specified)
+                    if memory_id and memory_id not in seen_memory_ids:
+                        if used_memory_ids_set and memory_id not in used_memory_ids_set:
+                            continue  # Skip memories not actually used by the agent
+                        seen_memory_ids.add(memory_id)
+                        fact_type = memory_data.get("type", "world")
+                        if fact_type in based_on:
+                            based_on[fact_type].append(
+                                MemoryFact(
+                                    id=memory_id,
+                                    text=memory_data.get("text", ""),
+                                    fact_type=fact_type,
+                                    context=None,
+                                    occurred_start=memory_data.get("occurred"),
+                                    occurred_end=memory_data.get("occurred"),
+                                )
+                            )
-        total_time = time.time() - reflect_start
-        log_buffer.append(
-            f"[REFLECT {reflect_id}] Complete: {len(answer_text)} chars response, LLM {llm_time:.3f}s, total {total_time:.3f}s"
+        # Extract mental models from tool outputs - only include models the agent actually used
+        # agent_result.used_mental_model_ids contains validated IDs from the done action
+        used_model_ids_set = set(agent_result.used_mental_model_ids) if agent_result.used_mental_model_ids else set()
+        based_on["mental-models"] = []
+        seen_model_ids: set[str] = set()
+        for tc in agent_result.tool_trace:
+            if tc.tool == "get_mental_model":
+                # Single model lookup (with full details)
+                if tc.output.get("found") and "model" in tc.output:
+                    model = tc.output["model"]
+                    model_id = model.get("id")
+                    if model_id and model_id not in seen_model_ids:
+                        # Only include models that the agent declared as used (or all if none specified)
+                        if used_model_ids_set and model_id not in used_model_ids_set:
+                            continue  # Skip models not actually used by the agent
+                        seen_model_ids.add(model_id)
+                        # Add to based_on as MemoryFact with type "mental-models"
+                        model_name = model.get("name", "")
+                        model_summary = model.get("summary") or model.get("description", "")
+                        based_on["mental-models"].append(
+                            MemoryFact(
+                                id=model_id,
+                                text=f"{model_name}: {model_summary}",
+                                fact_type="mental-models",
+                                context=f"{model.get('type', 'concept')} ({model.get('subtype', 'structural')})",
+                                occurred_start=None,
+                                occurred_end=None,
+                            )
+                        )
+            elif tc.tool == "search_mental_models":
+                # Search mental models - include all returned models (filtered by used_model_ids_set if specified)
+                for model in tc.output.get("mental_models", []):
+                    model_id = model.get("id")
+                    if model_id and model_id not in seen_model_ids:
+                        # Only include models that the agent declared as used (or all if none specified)
+                        if used_model_ids_set and model_id not in used_model_ids_set:
+                            continue  # Skip models not actually used by the agent
+                        seen_model_ids.add(model_id)
+                        # Add to based_on as MemoryFact with type "mental-models"
+                        model_name = model.get("name", "")
+                        model_summary = model.get("summary") or model.get("description", "")
+                        based_on["mental-models"].append(
+                            MemoryFact(
+                                id=model_id,
+                                text=f"{model_name}: {model_summary}",
+                                fact_type="mental-models",
+                                context=f"{model.get('type', 'concept')} ({model.get('subtype', 'structural')})",
+                                occurred_start=None,
+                                occurred_end=None,
+                            )
+                        )
+            elif tc.tool == "search_mental_models":
+                # Search mental models - include all returned mental models (filtered by used_mental_model_ids_set if specified)
+                used_mental_model_ids_set = (
+                    set(agent_result.used_mental_model_ids) if agent_result.used_mental_model_ids else set()
+                )
+                for mental_model in tc.output.get("mental_models", []):
+                    mental_model_id = mental_model.get("id")
+                    if mental_model_id and mental_model_id not in seen_model_ids:
+                        # Only include mental models that the agent declared as used (or all if none specified)
+                        if used_mental_model_ids_set and mental_model_id not in used_mental_model_ids_set:
+                            continue  # Skip mental models not actually used by the agent
+                        seen_model_ids.add(mental_model_id)
+                        # Add to based_on as MemoryFact with type "mental-models" (mental models are synthesized knowledge)
+                        mental_model_name = mental_model.get("name", "")
+                        mental_model_content = mental_model.get("content", "")
+                        based_on["mental-models"].append(
+                            MemoryFact(
+                                id=mental_model_id,
+                                text=f"{mental_model_name}: {mental_model_content}",
+                                fact_type="mental-models",
+                                context="mental model (user-curated)",
+                                occurred_start=None,
+                                occurred_end=None,
+                            )
+                        )
+                # List all models lookup - don't add to based_on (too verbose, just a listing)
+        # Add directives to based_on["mental-models"] (they are mental models with subtype='directive')
+        for directive in directives:
+            # Extract summary from observations
+            summary_parts: list[str] = []
+            for obs in directive.get("observations", []):
+                # Support both Pydantic Observation objects and dicts
+                if hasattr(obs, "content"):
+                    content = obs.content
+                    title = obs.title
+                else:
+                    content = obs.get("content", "")
+                    title = obs.get("title", "")
+                if title and content:
+                    summary_parts.append(f"{title}: {content}")
+                elif content:
+                    summary_parts.append(content)
+            # Fallback to description if no observations
+            if not summary_parts and directive.get("description"):
+                summary_parts.append(directive["description"])
+            directive_name = directive.get("name", "")
+            directive_summary = "; ".join(summary_parts) if summary_parts else ""
+            based_on["mental-models"].append(
+                MemoryFact(
+                    id=directive.get("id", ""),
+                    text=f"{directive_name}: {directive_summary}",
+                    fact_type="mental-models",
+                    context="directive (directive)",
+                    occurred_start=None,
+                    occurred_end=None,
+                )
+            )
+        # Build directives_applied from agent result
+        from hindsight_api.engine.response_models import DirectiveRef
+        directives_applied_result = [
+            DirectiveRef(id=d.id, name=d.name, content=d.content) for d in agent_result.directives_applied
+        ]
+        # Convert agent usage to TokenUsage format
+        from hindsight_api.engine.response_models import TokenUsage
+        usage = TokenUsage(
+            input_tokens=agent_result.usage.input_tokens,
+            output_tokens=agent_result.usage.output_tokens,
+            total_tokens=agent_result.usage.total_tokens,
         )
-        logger.info("\n" + "\n".join(log_buffer))
-        # Return response with facts split by type
+        # Return response (compatible with existing API)
         result = ReflectResult(
-            text=answer_text,
-            based_on={"world": world_results, "experience": agent_results, "opinion": opinion_results},
-            new_opinions=[],  # Opinions are being extracted asynchronously
-            structured_output=structured_output,
+            text=agent_result.text,
+            based_on=based_on,
+            new_opinions=[],  # Learnings stored as mental models
+            structured_output=agent_result.structured_output,
+            usage=usage,
+            tool_trace=tool_trace_result,
+            llm_trace=llm_trace_result,
+            directives_applied=directives_applied_result,
         )
         # Call post-operation hook if validator is configured
@@ -3273,48 +3898,6 @@ Guidelines:
         return result
-    async def _extract_and_store_opinions_async(
-        self, bank_id: str, answer_text: str, query: str, tenant_id: str | None = None
-    ):
-        """
-        Background task to extract and store opinions from think response.
-        This runs asynchronously and does not block the think response.
-        Args:
-            bank_id: bank IDentifier
-            answer_text: The generated answer text
-            query: The original query
-            tenant_id: Tenant identifier for internal authentication
-        """
-        try:
-            # Extract opinions from the answer
-            new_opinions = await think_utils.extract_opinions_from_text(self._llm_config, text=answer_text, query=query)
-            # Store new opinions
-            if new_opinions:
-                from datetime import datetime
-                current_time = datetime.now(UTC)
-                # Use internal context with tenant_id for background authentication
-                # Extension can check internal=True to bypass normal auth
-                from hindsight_api.models import RequestContext
-                internal_context = RequestContext(tenant_id=tenant_id, internal=True)
-                for opinion in new_opinions:
-                    await self.retain_async(
-                        bank_id=bank_id,
-                        content=opinion.opinion,
-                        context=f"formed during thinking about: {query}",
-                        event_date=current_time,
-                        fact_type_override="opinion",
-                        confidence_score=opinion.confidence,
-                        request_context=internal_context,
-                    )
-        except Exception as e:
-            logger.warning(f"[REFLECT] Failed to extract/store opinions: {str(e)}")
     async def get_entity_observations(
         self,
         bank_id: str,
@@ -3324,73 +3907,69 @@ Guidelines:
         request_context: "RequestContext",
     ) -> list[Any]:
         """
-        Get observations linked to an entity.
+        Get observations for an entity.
+        NOTE: Entity observations/summaries have been moved to mental models.
+        This method returns an empty list. Use mental models for entity summaries.
         Args:
             bank_id: bank IDentifier
             entity_id: Entity UUID to get observations for
-            limit: Maximum number of observations to return
+            limit: Ignored (kept for backwards compatibility)
             request_context: Request context for authentication.
         Returns:
-            List of EntityObservation objects
+            Empty list (observations now in mental models)
         """
         await self._authenticate_tenant(request_context)
-        pool = await self._get_pool()
-        async with acquire_with_retry(pool) as conn:
-            rows = await conn.fetch(
-                f"""
-                SELECT mu.text, mu.mentioned_at
-                FROM {fq_table("memory_units")} mu
-                JOIN {fq_table("unit_entities")} ue ON mu.id = ue.unit_id
-                WHERE mu.bank_id = $1
-                  AND mu.fact_type = 'observation'
-                  AND ue.entity_id = $2
-                ORDER BY mu.mentioned_at DESC
-                LIMIT $3
-                """,
-                bank_id,
-                uuid.UUID(entity_id),
-                limit,
-            )
-            observations = []
-            for row in rows:
-                mentioned_at = row["mentioned_at"].isoformat() if row["mentioned_at"] else None
-                observations.append(EntityObservation(text=row["text"], mentioned_at=mentioned_at))
-            return observations
+        return []
     async def list_entities(
         self,
         bank_id: str,
         *,
         limit: int = 100,
+        offset: int = 0,
         request_context: "RequestContext",
-    ) -> list[dict[str, Any]]:
+    ) -> dict[str, Any]:
         """
-        List all entities for a bank.
+        List all entities for a bank with pagination.
         Args:
             bank_id: bank IDentifier
             limit: Maximum number of entities to return
+            offset: Offset for pagination
             request_context: Request context for authentication.
         Returns:
-            List of entity dicts with id, canonical_name, mention_count, first_seen, last_seen
+            Dict with items, total, limit, offset
         """
         await self._authenticate_tenant(request_context)
         pool = await self._get_pool()
         async with acquire_with_retry(pool) as conn:
+            # Get total count
+            total_row = await conn.fetchrow(
+                f"""
+                SELECT COUNT(*) as total
+                FROM {fq_table("entities")}
+                WHERE bank_id = $1
+                """,
+                bank_id,
+            )
+            total = total_row["total"] if total_row else 0
+            # Get paginated entities
             rows = await conn.fetch(
                 f"""
                 SELECT id, canonical_name, mention_count, first_seen, last_seen, metadata
                 FROM {fq_table("entities")}
                 WHERE bank_id = $1
-                ORDER BY mention_count DESC, last_seen DESC
-                LIMIT $2
+                ORDER BY mention_count DESC, last_seen DESC, id ASC
+                LIMIT $2 OFFSET $3
                 """,
                 bank_id,
                 limit,
+                offset,
             )
             entities = []
@@ -3417,7 +3996,91 @@ Guidelines:
                         "metadata": metadata,
                     }
                 )
-            return entities
+            return {
+                "items": entities,
+                "total": total,
+                "limit": limit,
+                "offset": offset,
+            }
+    async def list_tags(
+        self,
+        bank_id: str,
+        *,
+        pattern: str | None = None,
+        limit: int = 100,
+        offset: int = 0,
+        request_context: "RequestContext",
+    ) -> dict[str, Any]:
+        """
+        List all unique tags for a bank with usage counts.
+        Use this to discover available tags or expand wildcard patterns.
+        Supports '*' as wildcard for flexible matching (case-insensitive):
+        - 'user:*' matches user:alice, user:bob
+        - '*-admin' matches role-admin, super-admin
+        - 'env*-prod' matches env-prod, environment-prod
+        Args:
+            bank_id: Bank identifier
+            pattern: Wildcard pattern to filter tags (use '*' as wildcard, case-insensitive)
+            limit: Maximum number of tags to return
+            offset: Offset for pagination
+            request_context: Request context for authentication.
+        Returns:
+            Dict with items (list of {tag, count}), total, limit, offset
+        """
+        await self._authenticate_tenant(request_context)
+        pool = await self._get_pool()
+        async with acquire_with_retry(pool) as conn:
+            # Build pattern filter if provided (convert * to % for ILIKE)
+            pattern_clause = ""
+            params: list[Any] = [bank_id]
+            if pattern:
+                # Convert wildcard pattern: * -> % for SQL ILIKE
+                sql_pattern = pattern.replace("*", "%")
+                pattern_clause = "AND tag ILIKE $2"
+                params.append(sql_pattern)
+            # Get total count of distinct tags matching pattern
+            total_row = await conn.fetchrow(
+                f"""
+                SELECT COUNT(DISTINCT tag) as total
+                FROM {fq_table("memory_units")}, unnest(tags) AS tag
+                WHERE bank_id = $1 AND tags IS NOT NULL AND tags != '{{}}'
+                {pattern_clause}
+                """,
+                *params,
+            )
+            total = total_row["total"] if total_row else 0
+            # Get paginated tags with counts, ordered by frequency
+            limit_param = len(params) + 1
+            offset_param = len(params) + 2
+            params.extend([limit, offset])
+            rows = await conn.fetch(
+                f"""
+                SELECT tag, COUNT(*) as count
+                FROM {fq_table("memory_units")}, unnest(tags) AS tag
+                WHERE bank_id = $1 AND tags IS NOT NULL AND tags != '{{}}'
+                {pattern_clause}
+                GROUP BY tag
+                ORDER BY count DESC, tag ASC
+                LIMIT ${limit_param} OFFSET ${offset_param}
+                """,
+                *params,
+            )
+            items = [{"tag": row["tag"], "count": row["count"]} for row in rows]
+            return {
+                "items": items,
+                "total": total,
+                "limit": limit,
+                "offset": offset,
+            }
     async def get_entity_state(
         self,
@@ -3429,22 +4092,23 @@ Guidelines:
         request_context: "RequestContext",
     ) -> EntityState:
         """
-        Get the current state (mental model) of an entity.
+        Get the current state of an entity.
+        NOTE: Entity observations/summaries have been moved to mental models.
+        This method returns an entity with empty observations.
         Args:
             bank_id: bank IDentifier
             entity_id: Entity UUID
             entity_name: Canonical name of the entity
-            limit: Maximum number of observations to include
+            limit: Maximum number of observations to include (kept for backwards compat)
             request_context: Request context for authentication.
         Returns:
-            EntityState with observations
+            EntityState with empty observations (summaries now in mental models)
         """
-        observations = await self.get_entity_observations(
-            bank_id, entity_id, limit=limit, request_context=request_context
-        )
-        return EntityState(entity_id=entity_id, canonical_name=entity_name, observations=observations)
+        await self._authenticate_tenant(request_context)
+        return EntityState(entity_id=entity_id, canonical_name=entity_name, observations=[])
     async def regenerate_entity_observations(
         self,
@@ -3455,533 +4119,1228 @@ Guidelines:
         version: str | None = None,
         conn=None,
         request_context: "RequestContext",
-    ) -> None:
+    ) -> list[str]:
         """
-        Regenerate observations for an entity by:
-        1. Checking version for deduplication (if provided)
-        2. Searching all facts mentioning the entity
-        3. Using LLM to synthesize observations (no personality)
-        4. Deleting old observations for this entity
-        5. Storing new observations linked to the entity
+        Regenerate observations for an entity.
+        NOTE: Entity observations/summaries have been moved to mental models.
+        This method is now a no-op and returns an empty list.
         Args:
             bank_id: bank IDentifier
             entity_id: Entity UUID
             entity_name: Canonical name of the entity
             version: Entity's last_seen timestamp when task was created (for deduplication)
-            conn: Optional database connection (for transactional atomicity with caller)
+            conn: Optional database connection (ignored)
             request_context: Request context for authentication.
+        Returns:
+            Empty list (observations now in mental models)
         """
         await self._authenticate_tenant(request_context)
-        pool = await self._get_pool()
-        entity_uuid = uuid.UUID(entity_id)
-        # Helper to run a query with provided conn or acquire one
-        async def fetch_with_conn(query, *args):
-            if conn is not None:
-                return await conn.fetch(query, *args)
-            else:
-                async with acquire_with_retry(pool) as acquired_conn:
-                    return await acquired_conn.fetch(query, *args)
+        return []
-        async def fetchval_with_conn(query, *args):
-            if conn is not None:
-                return await conn.fetchval(query, *args)
-            else:
-                async with acquire_with_retry(pool) as acquired_conn:
-                    return await acquired_conn.fetchval(query, *args)
+    # =========================================================================
+    # Statistics & Operations (for HTTP API layer)
+    # =========================================================================
+    async def get_bank_stats(
+        self,
+        bank_id: str,
+        *,
+        request_context: "RequestContext",
+    ) -> dict[str, Any]:
+        """Get statistics about memory nodes and links for a bank."""
+        await self._authenticate_tenant(request_context)
+        pool = await self._get_pool()
-        # Step 1: Check version for deduplication
-        if version:
-            current_last_seen = await fetchval_with_conn(
+        async with acquire_with_retry(pool) as conn:
+            # Get node counts by fact_type
+            node_stats = await conn.fetch(
+                f"""
+                SELECT fact_type, COUNT(*) as count
+                FROM {fq_table("memory_units")}
+                WHERE bank_id = $1
+                GROUP BY fact_type
+                """,
+                bank_id,
+            )
+            # Get link counts by link_type
+            link_stats = await conn.fetch(
                 f"""
-                SELECT last_seen
+                SELECT ml.link_type, COUNT(*) as count
+                FROM {fq_table("memory_links")} ml
+                JOIN {fq_table("memory_units")} mu ON ml.from_unit_id = mu.id
+                WHERE mu.bank_id = $1
+                GROUP BY ml.link_type
+                """,
+                bank_id,
+            )
+            # Get link counts by fact_type (from nodes)
+            link_fact_type_stats = await conn.fetch(
+                f"""
+                SELECT mu.fact_type, COUNT(*) as count
+                FROM {fq_table("memory_links")} ml
+                JOIN {fq_table("memory_units")} mu ON ml.from_unit_id = mu.id
+                WHERE mu.bank_id = $1
+                GROUP BY mu.fact_type
+                """,
+                bank_id,
+            )
+            # Get link counts by fact_type AND link_type
+            link_breakdown_stats = await conn.fetch(
+                f"""
+                SELECT mu.fact_type, ml.link_type, COUNT(*) as count
+                FROM {fq_table("memory_links")} ml
+                JOIN {fq_table("memory_units")} mu ON ml.from_unit_id = mu.id
+                WHERE mu.bank_id = $1
+                GROUP BY mu.fact_type, ml.link_type
+                """,
+                bank_id,
+            )
+            # Get pending and failed operations counts
+            ops_stats = await conn.fetch(
+                f"""
+                SELECT status, COUNT(*) as count
+                FROM {fq_table("async_operations")}
+                WHERE bank_id = $1
+                GROUP BY status
+                """,
+                bank_id,
+            )
+            return {
+                "bank_id": bank_id,
+                "node_counts": {row["fact_type"]: row["count"] for row in node_stats},
+                "link_counts": {row["link_type"]: row["count"] for row in link_stats},
+                "link_counts_by_fact_type": {row["fact_type"]: row["count"] for row in link_fact_type_stats},
+                "link_breakdown": [
+                    {"fact_type": row["fact_type"], "link_type": row["link_type"], "count": row["count"]}
+                    for row in link_breakdown_stats
+                ],
+                "operations": {row["status"]: row["count"] for row in ops_stats},
+            }
+    async def get_entity(
+        self,
+        bank_id: str,
+        entity_id: str,
+        *,
+        request_context: "RequestContext",
+    ) -> dict[str, Any] | None:
+        """Get entity details including metadata and observations."""
+        await self._authenticate_tenant(request_context)
+        pool = await self._get_pool()
+        async with acquire_with_retry(pool) as conn:
+            entity_row = await conn.fetchrow(
+                f"""
+                SELECT id, canonical_name, mention_count, first_seen, last_seen, metadata
                 FROM {fq_table("entities")}
-                WHERE id = $1 AND bank_id = $2
+                WHERE bank_id = $1 AND id = $2
                 """,
-                entity_uuid,
                 bank_id,
+                uuid.UUID(entity_id),
             )
-            if current_last_seen and current_last_seen.isoformat() != version:
-                return []
+        if not entity_row:
+            return None
-        # Step 2: Get all facts mentioning this entity (exclude observations themselves)
-        rows = await fetch_with_conn(
+        # Get observations for the entity
+        observations = await self.get_entity_observations(bank_id, entity_id, limit=20, request_context=request_context)
+        return {
+            "id": str(entity_row["id"]),
+            "canonical_name": entity_row["canonical_name"],
+            "mention_count": entity_row["mention_count"],
+            "first_seen": entity_row["first_seen"].isoformat() if entity_row["first_seen"] else None,
+            "last_seen": entity_row["last_seen"].isoformat() if entity_row["last_seen"] else None,
+            "metadata": entity_row["metadata"] or {},
+            "observations": observations,
+        }
+    def _parse_observations(self, observations_raw: list):
+        """Parse raw observation dicts into typed Observation models.
+        Returns list of Observation models with computed trend/evidence_span/evidence_count.
+        """
+        from .reflect.observations import Observation, ObservationEvidence
+        observations: list[Observation] = []
+        for obs in observations_raw:
+            if not isinstance(obs, dict):
+                continue
+            try:
+                parsed = Observation(
+                    title=obs.get("title", ""),
+                    content=obs.get("content", ""),
+                    evidence=[
+                        ObservationEvidence(
+                            memory_id=ev.get("memory_id", ""),
+                            quote=ev.get("quote", ""),
+                            relevance=ev.get("relevance", ""),
+                            timestamp=ev.get("timestamp"),
+                        )
+                        for ev in obs.get("evidence", [])
+                        if isinstance(ev, dict)
+                    ],
+                    created_at=obs.get("created_at"),
+                )
+                observations.append(parsed)
+            except Exception as e:
+                logger.warning(f"Failed to parse observation: {e}")
+                continue
+        return observations
+    async def _count_memories_since(
+        self,
+        bank_id: str,
+        since_timestamp: str | None,
+        pool=None,
+    ) -> int:
+        """
+        Count memories created after a given timestamp.
+        Args:
+            bank_id: Bank identifier
+            since_timestamp: ISO timestamp string. If None, returns total count.
+            pool: Optional database pool (uses default if not provided)
+        Returns:
+            Number of memories created since the timestamp
+        """
+        if pool is None:
+            pool = await self._get_pool()
+        async with acquire_with_retry(pool) as conn:
+            if since_timestamp:
+                # Parse the timestamp
+                from datetime import datetime
+                try:
+                    ts = datetime.fromisoformat(since_timestamp.replace("Z", "+00:00"))
+                except ValueError:
+                    # Invalid timestamp, return total count
+                    ts = None
+                if ts:
+                    count = await conn.fetchval(
+                        f"SELECT COUNT(*) FROM {fq_table('memory_units')} WHERE bank_id = $1 AND created_at > $2",
+                        bank_id,
+                        ts,
+                    )
+                    return count or 0
+            # No timestamp or invalid, return total count
+            count = await conn.fetchval(
+                f"SELECT COUNT(*) FROM {fq_table('memory_units')} WHERE bank_id = $1",
+                bank_id,
+            )
+            return count or 0
+    async def _invalidate_facts_from_mental_models(
+        self,
+        conn,
+        bank_id: str,
+        fact_ids: list[str],
+    ) -> int:
+        """
+        Remove fact IDs from observation source_memory_ids when memories are deleted.
+        Observations are stored in memory_units with fact_type='observation'
+        and have a source_memory_ids column (UUID[]) tracking their source memories.
+        Args:
+            conn: Database connection
+            bank_id: Bank identifier
+            fact_ids: List of fact IDs to remove from observations
+        Returns:
+            Number of observations updated
+        """
+        if not fact_ids:
+            return 0
+        # Convert string IDs to UUIDs for the array comparison
+        import uuid as uuid_module
+        fact_uuids = [uuid_module.UUID(fid) for fid in fact_ids]
+        # Update observations (memory_units with fact_type='observation')
+        # by removing the deleted fact IDs from source_memory_ids
+        # Use array subtraction: source_memory_ids - deleted_ids
+        result = await conn.execute(
             f"""
-            SELECT mu.id, mu.text, mu.context, mu.occurred_start, mu.fact_type
-            FROM {fq_table("memory_units")} mu
-            JOIN {fq_table("unit_entities")} ue ON mu.id = ue.unit_id
-            WHERE mu.bank_id = $1
-              AND ue.entity_id = $2
-              AND mu.fact_type IN ('world', 'experience')
-            ORDER BY mu.occurred_start DESC
-            LIMIT 50
+            UPDATE {fq_table("memory_units")}
+            SET source_memory_ids = (
+                SELECT COALESCE(array_agg(elem), ARRAY[]::uuid[])
+                FROM unnest(source_memory_ids) AS elem
+                WHERE elem != ALL($2::uuid[])
+            ),
+                updated_at = NOW()
+            WHERE bank_id = $1
+            AND fact_type = 'observation'
+            AND source_memory_ids && $2::uuid[]
             """,
             bank_id,
-            entity_uuid,
+            fact_uuids,
         )
-        if not rows:
-            return []
+        # Parse the result to get number of updated rows
+        updated_count = int(result.split()[-1]) if result and "UPDATE" in result else 0
+        if updated_count > 0:
+            logger.info(
+                f"[OBSERVATIONS] Invalidated {len(fact_ids)} fact IDs from {updated_count} observations in bank {bank_id}"
+            )
+        return updated_count
-        # Convert to MemoryFact objects for the observation extraction
-        facts = []
-        for row in rows:
-            occurred_start = row["occurred_start"].isoformat() if row["occurred_start"] else None
-            facts.append(
-                MemoryFact(
-                    id=str(row["id"]),
-                    text=row["text"],
-                    fact_type=row["fact_type"],
-                    context=row["context"],
-                    occurred_start=occurred_start,
-                )
+    # =========================================================================
+    # MENTAL MODELS (CONSOLIDATED) - Read-only access to auto-consolidated mental models
+    # =========================================================================
+    async def list_mental_models_consolidated(
+        self,
+        bank_id: str,
+        *,
+        tags: list[str] | None = None,
+        tags_match: str = "any",
+        limit: int = 100,
+        offset: int = 0,
+        request_context: "RequestContext",
+    ) -> list[dict[str, Any]]:
+        """List auto-consolidated observations for a bank.
+        Observations are stored in memory_units with fact_type='observation'.
+        They are automatically created and updated by the consolidation engine.
+        Args:
+            bank_id: Bank identifier
+            tags: Optional tags to filter by
+            tags_match: How to match tags - 'any', 'all', or 'exact'
+            limit: Maximum number of results
+            offset: Offset for pagination
+            request_context: Request context for authentication
+        Returns:
+            List of observation dicts
+        """
+        await self._authenticate_tenant(request_context)
+        pool = await self._get_pool()
+        async with acquire_with_retry(pool) as conn:
+            # Build tag filter
+            tag_filter = ""
+            params: list[Any] = [bank_id, limit, offset]
+            if tags:
+                if tags_match == "all":
+                    tag_filter = " AND tags @> $4::varchar[]"
+                elif tags_match == "exact":
+                    tag_filter = " AND tags = $4::varchar[]"
+                else:  # any
+                    tag_filter = " AND tags && $4::varchar[]"
+                params.append(tags)
+            rows = await conn.fetch(
+                f"""
+                SELECT id, bank_id, text, proof_count, history, tags, source_memory_ids, created_at, updated_at
+                FROM {fq_table("memory_units")}
+                WHERE bank_id = $1 AND fact_type = 'observation' {tag_filter}
+                ORDER BY updated_at DESC NULLS LAST
+                LIMIT $2 OFFSET $3
+                """,
+                *params,
             )
-        # Step 3: Extract observations using LLM (no personality)
-        observations = await observation_utils.extract_observations_from_facts(self._llm_config, entity_name, facts)
+            return [self._row_to_observation_consolidated(row) for row in rows]
-        if not observations:
-            return []
+    async def get_observation_consolidated(
+        self,
+        bank_id: str,
+        observation_id: str,
+        *,
+        include_source_memories: bool = True,
+        request_context: "RequestContext",
+    ) -> dict[str, Any] | None:
+        """Get a single observation by ID.
+        Args:
+            bank_id: Bank identifier
+            observation_id: Observation ID
+            include_source_memories: Whether to include full source memory details
+            request_context: Request context for authentication
+        Returns:
+            Observation dict or None if not found
+        """
+        await self._authenticate_tenant(request_context)
+        pool = await self._get_pool()
-        # Step 4: Delete old observations and insert new ones
-        # If conn provided, we're already in a transaction - don't start another
-        # If conn is None, acquire one and start a transaction
-        async def do_db_operations(db_conn):
-            # Delete old observations for this entity
-            await db_conn.execute(
+        async with acquire_with_retry(pool) as conn:
+            row = await conn.fetchrow(
                 f"""
-                DELETE FROM {fq_table("memory_units")}
-                WHERE id IN (
-                    SELECT mu.id
-                    FROM {fq_table("memory_units")} mu
-                    JOIN {fq_table("unit_entities")} ue ON mu.id = ue.unit_id
-                    WHERE mu.bank_id = $1
-                      AND mu.fact_type = 'observation'
-                      AND ue.entity_id = $2
-                )
+                SELECT id, bank_id, text, proof_count, history, tags, source_memory_ids, created_at, updated_at
+                FROM {fq_table("memory_units")}
+                WHERE bank_id = $1 AND id = $2 AND fact_type = 'observation'
                 """,
                 bank_id,
-                entity_uuid,
+                observation_id,
             )
-            # Generate embeddings for new observations
-            embeddings = await embedding_utils.generate_embeddings_batch(self.embeddings, observations)
+            if not row:
+                return None
-            # Insert new observations
-            current_time = utcnow()
-            created_ids = []
+            result = self._row_to_observation_consolidated(row)
-            for obs_text, embedding in zip(observations, embeddings):
-                result = await db_conn.fetchrow(
+            # Fetch source memories if requested and source_memory_ids exist
+            if include_source_memories and result.get("source_memory_ids"):
+                source_ids = [uuid.UUID(sid) if isinstance(sid, str) else sid for sid in result["source_memory_ids"]]
+                source_rows = await conn.fetch(
                     f"""
-                    INSERT INTO {fq_table("memory_units")} (
-                        bank_id, text, embedding, context, event_date,
-                        occurred_start, occurred_end, mentioned_at,
-                        fact_type, access_count
-                    )
-                    VALUES ($1, $2, $3, $4, $5, $6, $7, $8, 'observation', 0)
-                    RETURNING id
+                    SELECT id, text, fact_type, context, occurred_start, mentioned_at
+                    FROM {fq_table("memory_units")}
+                    WHERE id = ANY($1::uuid[])
+                    ORDER BY mentioned_at DESC NULLS LAST
                     """,
-                    bank_id,
-                    obs_text,
-                    str(embedding),
-                    f"observation about {entity_name}",
-                    current_time,
-                    current_time,
-                    current_time,
-                    current_time,
+                    source_ids,
                 )
-                obs_id = str(result["id"])
-                created_ids.append(obs_id)
+                result["source_memories"] = [
+                    {
+                        "id": str(r["id"]),
+                        "text": r["text"],
+                        "type": r["fact_type"],
+                        "context": r["context"],
+                        "occurred_start": r["occurred_start"].isoformat() if r["occurred_start"] else None,
+                        "mentioned_at": r["mentioned_at"].isoformat() if r["mentioned_at"] else None,
+                    }
+                    for r in source_rows
+                ]
-                # Link observation to entity
-                await db_conn.execute(
-                    f"""
-                    INSERT INTO {fq_table("unit_entities")} (unit_id, entity_id)
-                    VALUES ($1, $2)
-                    """,
-                    uuid.UUID(obs_id),
-                    entity_uuid,
-                )
+            return result
-            return created_ids
+    def _row_to_observation_consolidated(self, row: Any) -> dict[str, Any]:
+        """Convert a database row to an observation dict."""
+        import json
-        if conn is not None:
-            # Use provided connection (already in a transaction)
-            return await do_db_operations(conn)
-        else:
-            # Acquire connection and start our own transaction
-            async with acquire_with_retry(pool) as acquired_conn:
-                async with acquired_conn.transaction():
-                    return await do_db_operations(acquired_conn)
+        history = row["history"]
+        if isinstance(history, str):
+            history = json.loads(history)
+        elif history is None:
+            history = []
-    async def _regenerate_observations_sync(
+        # Convert source_memory_ids to strings
+        source_memory_ids = row.get("source_memory_ids") or []
+        source_memory_ids = [str(sid) for sid in source_memory_ids]
+        return {
+            "id": str(row["id"]),
+            "bank_id": row["bank_id"],
+            "text": row["text"],
+            "proof_count": row["proof_count"] or 1,
+            "history": history,
+            "tags": row["tags"] or [],
+            "source_memory_ids": source_memory_ids,
+            "source_memories": [],  # Populated separately when fetching full details
+            "created_at": row["created_at"].isoformat() if row["created_at"] else None,
+            "updated_at": row["updated_at"].isoformat() if row["updated_at"] else None,
+        }
+    # =========================================================================
+    # MENTAL MODELS CRUD
+    # =========================================================================
+    async def list_mental_models(
         self,
         bank_id: str,
-        entity_ids: list[str],
-        min_facts: int | None = None,
-        conn=None,
-        request_context: "RequestContext | None" = None,
-    ) -> None:
-        """
-        Regenerate observations for entities synchronously (called during retain).
-        Processes entities in PARALLEL for faster execution.
+        *,
+        tags: list[str] | None = None,
+        tags_match: str = "any",
+        limit: int = 100,
+        offset: int = 0,
+        request_context: "RequestContext",
+    ) -> list[dict[str, Any]]:
+        """List pinned mental models for a bank.
         Args:
             bank_id: Bank identifier
-            entity_ids: List of entity IDs to process
-            min_facts: Minimum facts required to regenerate observations (uses config default if None)
-            conn: Optional database connection (for transactional atomicity)
-        """
-        if not bank_id or not entity_ids:
-            return
+            tags: Optional tags to filter by
+            tags_match: How to match tags - 'any', 'all', or 'exact'
+            limit: Maximum number of results
+            offset: Offset for pagination
+            request_context: Request context for authentication
-        # Use config default if min_facts not specified
-        if min_facts is None:
-            min_facts = get_config().observation_min_facts
+        Returns:
+            List of pinned mental model dicts
+        """
+        await self._authenticate_tenant(request_context)
+        pool = await self._get_pool()
-        # Convert to UUIDs
-        entity_uuids = [uuid.UUID(eid) if isinstance(eid, str) else eid for eid in entity_ids]
+        async with acquire_with_retry(pool) as conn:
+            # Build tag filter
+            tag_filter = ""
+            params: list[Any] = [bank_id, limit, offset]
+            if tags:
+                if tags_match == "all":
+                    tag_filter = " AND tags @> $4::varchar[]"
+                elif tags_match == "exact":
+                    tag_filter = " AND tags = $4::varchar[]"
+                else:  # any
+                    tag_filter = " AND tags && $4::varchar[]"
+                params.append(tags)
-        # Use provided connection or acquire a new one
-        if conn is not None:
-            # Use the provided connection (transactional with caller)
-            entity_rows = await conn.fetch(
+            rows = await conn.fetch(
                 f"""
-                SELECT id, canonical_name FROM {fq_table("entities")}
-                WHERE id = ANY($1) AND bank_id = $2
+                SELECT id, bank_id, name, source_query, content, tags,
+                       last_refreshed_at, created_at, reflect_response,
+                       max_tokens, trigger
+                FROM {fq_table("mental_models")}
+                WHERE bank_id = $1 {tag_filter}
+                ORDER BY last_refreshed_at DESC
+                LIMIT $2 OFFSET $3
                 """,
-                entity_uuids,
-                bank_id,
+                *params,
             )
-            entity_names = {row["id"]: row["canonical_name"] for row in entity_rows}
-            fact_counts = await conn.fetch(
+            return [self._row_to_mental_model(row) for row in rows]
+    async def get_mental_model(
+        self,
+        bank_id: str,
+        mental_model_id: str,
+        *,
+        request_context: "RequestContext",
+    ) -> dict[str, Any] | None:
+        """Get a single pinned mental model by ID.
+        Args:
+            bank_id: Bank identifier
+            mental_model_id: Pinned mental model UUID
+            request_context: Request context for authentication
+        Returns:
+            Pinned mental model dict or None if not found
+        """
+        await self._authenticate_tenant(request_context)
+        pool = await self._get_pool()
+        async with acquire_with_retry(pool) as conn:
+            row = await conn.fetchrow(
                 f"""
-                SELECT ue.entity_id, COUNT(*) as cnt
-                FROM {fq_table("unit_entities")} ue
-                JOIN {fq_table("memory_units")} mu ON ue.unit_id = mu.id
-                WHERE ue.entity_id = ANY($1) AND mu.bank_id = $2
-                GROUP BY ue.entity_id
+                SELECT id, bank_id, name, source_query, content, tags,
+                       last_refreshed_at, created_at, reflect_response,
+                       max_tokens, trigger
+                FROM {fq_table("mental_models")}
+                WHERE bank_id = $1 AND id = $2
                 """,
-                entity_uuids,
                 bank_id,
+                mental_model_id,
             )
-            entity_fact_counts = {row["entity_id"]: row["cnt"] for row in fact_counts}
-        else:
-            # Acquire a new connection (standalone call)
-            pool = await self._get_pool()
-            async with pool.acquire() as acquired_conn:
-                entity_rows = await acquired_conn.fetch(
+            return self._row_to_mental_model(row) if row else None
+    async def create_mental_model(
+        self,
+        bank_id: str,
+        name: str,
+        source_query: str,
+        content: str,
+        *,
+        mental_model_id: str | None = None,
+        tags: list[str] | None = None,
+        max_tokens: int | None = None,
+        trigger: dict[str, Any] | None = None,
+        request_context: "RequestContext",
+    ) -> dict[str, Any]:
+        """Create a new pinned mental model.
+        Args:
+            bank_id: Bank identifier
+            name: Human-readable name for the mental model
+            source_query: The query that generated this mental model
+            content: The synthesized content
+            mental_model_id: Optional UUID for the mental model (auto-generated if not provided)
+            tags: Optional tags for scoped visibility
+            max_tokens: Token limit for content generation during refresh
+            trigger: Trigger settings (e.g., refresh_after_consolidation)
+            request_context: Request context for authentication
+        Returns:
+            The created pinned mental model dict
+        """
+        await self._authenticate_tenant(request_context)
+        pool = await self._get_pool()
+        # Generate embedding for the content
+        embedding_text = f"{name} {content}"
+        embedding = await embedding_utils.generate_embeddings_batch(self.embeddings, [embedding_text])
+        # Convert embedding to string for asyncpg vector type
+        embedding_str = str(embedding[0]) if embedding else None
+        async with acquire_with_retry(pool) as conn:
+            if mental_model_id:
+                row = await conn.fetchrow(
                     f"""
-                    SELECT id, canonical_name FROM {fq_table("entities")}
-                    WHERE id = ANY($1) AND bank_id = $2
+                    INSERT INTO {fq_table("mental_models")}
+                    (id, bank_id, name, source_query, content, embedding, tags, max_tokens, trigger)
+                    VALUES ($1, $2, $3, $4, $5, $6, $7, COALESCE($8, 2048), COALESCE($9, '{{"refresh_after_consolidation": false}}'::jsonb))
+                    RETURNING id, bank_id, name, source_query, content, tags,
+                              last_refreshed_at, created_at, reflect_response,
+                              max_tokens, trigger
                     """,
-                    entity_uuids,
+                    mental_model_id,
                     bank_id,
+                    name,
+                    source_query,
+                    content,
+                    embedding_str,
+                    tags or [],
+                    max_tokens,
+                    json.dumps(trigger) if trigger else None,
                 )
-                entity_names = {row["id"]: row["canonical_name"] for row in entity_rows}
-                fact_counts = await acquired_conn.fetch(
+            else:
+                row = await conn.fetchrow(
                     f"""
-                    SELECT ue.entity_id, COUNT(*) as cnt
-                    FROM {fq_table("unit_entities")} ue
-                    JOIN {fq_table("memory_units")} mu ON ue.unit_id = mu.id
-                    WHERE ue.entity_id = ANY($1) AND mu.bank_id = $2
-                    GROUP BY ue.entity_id
+                    INSERT INTO {fq_table("mental_models")}
+                    (bank_id, name, source_query, content, embedding, tags, max_tokens, trigger)
+                    VALUES ($1, $2, $3, $4, $5, $6, COALESCE($7, 2048), COALESCE($8, '{{"refresh_after_consolidation": false}}'::jsonb))
+                    RETURNING id, bank_id, name, source_query, content, tags,
+                              last_refreshed_at, created_at, reflect_response,
+                              max_tokens, trigger
                     """,
-                    entity_uuids,
                     bank_id,
+                    name,
+                    source_query,
+                    content,
+                    embedding_str,
+                    tags or [],
+                    max_tokens,
+                    json.dumps(trigger) if trigger else None,
                 )
-                entity_fact_counts = {row["entity_id"]: row["cnt"] for row in fact_counts}
-        # Filter entities that meet the threshold
-        entities_to_process = []
-        for entity_id in entity_ids:
-            entity_uuid = uuid.UUID(entity_id) if isinstance(entity_id, str) else entity_id
-            if entity_uuid not in entity_names:
-                continue
-            fact_count = entity_fact_counts.get(entity_uuid, 0)
-            if fact_count >= min_facts:
-                entities_to_process.append((entity_id, entity_names[entity_uuid]))
+        logger.info(f"[MENTAL_MODELS] Created pinned mental model '{name}' for bank {bank_id}")
+        return self._row_to_mental_model(row)
-        if not entities_to_process:
-            return
+    async def refresh_mental_model(
+        self,
+        bank_id: str,
+        mental_model_id: str,
+        *,
+        request_context: "RequestContext",
+    ) -> dict[str, Any] | None:
+        """Refresh a pinned mental model by re-running its source query.
-        # Use internal context if not provided (for internal/background calls)
-        from hindsight_api.models import RequestContext as RC
+        This method:
+        1. Gets the pinned mental model
+        2. Runs the source_query through reflect
+        3. Updates the content with the new synthesis
+        4. Updates last_refreshed_at
-        ctx = request_context if request_context is not None else RC()
+        Args:
+            bank_id: Bank identifier
+            mental_model_id: Pinned mental model UUID
+            request_context: Request context for authentication
-        # Process all entities in PARALLEL (LLM calls are the bottleneck)
-        async def process_entity(entity_id: str, entity_name: str):
-            try:
-                await self.regenerate_entity_observations(
-                    bank_id, entity_id, entity_name, version=None, conn=conn, request_context=ctx
-                )
-            except Exception as e:
-                logger.error(f"[OBSERVATIONS] Error processing entity {entity_id}: {e}")
+        Returns:
+            Updated pinned mental model dict or None if not found
+        """
+        await self._authenticate_tenant(request_context)
+        # Get the current mental model
+        mental_model = await self.get_mental_model(bank_id, mental_model_id, request_context=request_context)
+        if not mental_model:
+            return None
+        # Run reflect with the source query, excluding the mental model being refreshed
+        reflect_result = await self.reflect_async(
+            bank_id=bank_id,
+            query=mental_model["source_query"],
+            request_context=request_context,
+            exclude_mental_model_ids=[mental_model_id],
+        )
+        # Build reflect_response payload to store
+        reflect_response_payload = {
+            "text": reflect_result.text,
+            "based_on": {
+                fact_type: [
+                    {
+                        "id": str(fact.id),
+                        "text": fact.text,
+                        "type": fact_type,
+                    }
+                    for fact in facts
+                ]
+                for fact_type, facts in reflect_result.based_on.items()
+            },
+            "mental_models": [],  # Mental models are included in based_on["mental-models"]
+        }
-        await asyncio.gather(*[process_entity(eid, name) for eid, name in entities_to_process])
+        # Update the mental model with new content and reflect_response
+        return await self.update_mental_model(
+            bank_id,
+            mental_model_id,
+            content=reflect_result.text,
+            reflect_response=reflect_response_payload,
+            request_context=request_context,
+        )
+    async def update_mental_model(
+        self,
+        bank_id: str,
+        mental_model_id: str,
+        *,
+        name: str | None = None,
+        content: str | None = None,
+        source_query: str | None = None,
+        max_tokens: int | None = None,
+        tags: list[str] | None = None,
+        trigger: dict[str, Any] | None = None,
+        reflect_response: dict[str, Any] | None = None,
+        request_context: "RequestContext",
+    ) -> dict[str, Any] | None:
+        """Update a pinned mental model.
+        Args:
+            bank_id: Bank identifier
+            mental_model_id: Pinned mental model UUID
+            name: New name (if changing)
+            content: New content (if changing)
+            source_query: New source query (if changing)
+            max_tokens: New max tokens (if changing)
+            tags: New tags (if changing)
+            trigger: New trigger settings (if changing)
+            reflect_response: Full reflect API response payload (if changing)
+            request_context: Request context for authentication
-    async def _handle_regenerate_observations(self, task_dict: dict[str, Any]):
+        Returns:
+            Updated pinned mental model dict or None if not found
         """
-        Handler for regenerate_observations tasks.
+        await self._authenticate_tenant(request_context)
+        pool = await self._get_pool()
+        async with acquire_with_retry(pool) as conn:
+            # Build dynamic update
+            updates = []
+            params: list[Any] = [bank_id, mental_model_id]
+            param_idx = 3
+            if name is not None:
+                updates.append(f"name = ${param_idx}")
+                params.append(name)
+                param_idx += 1
+            if content is not None:
+                updates.append(f"content = ${param_idx}")
+                params.append(content)
+                param_idx += 1
+                updates.append("last_refreshed_at = NOW()")
+                # Also update embedding (convert to string for asyncpg vector type)
+                embedding_text = f"{name or ''} {content}"
+                embedding = await embedding_utils.generate_embeddings_batch(self.embeddings, [embedding_text])
+                if embedding:
+                    updates.append(f"embedding = ${param_idx}")
+                    params.append(str(embedding[0]))
+                    param_idx += 1
+            if reflect_response is not None:
+                updates.append(f"reflect_response = ${param_idx}")
+                params.append(json.dumps(reflect_response))
+                param_idx += 1
+            if source_query is not None:
+                updates.append(f"source_query = ${param_idx}")
+                params.append(source_query)
+                param_idx += 1
+            if max_tokens is not None:
+                updates.append(f"max_tokens = ${param_idx}")
+                params.append(max_tokens)
+                param_idx += 1
+            if tags is not None:
+                updates.append(f"tags = ${param_idx}")
+                params.append(tags)
+                param_idx += 1
+            if trigger is not None:
+                updates.append(f"trigger = ${param_idx}")
+                params.append(json.dumps(trigger))
+                param_idx += 1
+            if not updates:
+                return None
+            query = f"""
+                UPDATE {fq_table("mental_models")}
+                SET {", ".join(updates)}
+                WHERE bank_id = $1 AND id = $2
+                RETURNING id, bank_id, name, source_query, content, tags,
+                          last_refreshed_at, created_at, reflect_response,
+                          max_tokens, trigger
+            """
+            row = await conn.fetchrow(query, *params)
+            return self._row_to_mental_model(row) if row else None
+    async def delete_mental_model(
+        self,
+        bank_id: str,
+        mental_model_id: str,
+        *,
+        request_context: "RequestContext",
+    ) -> bool:
+        """Delete a pinned mental model.
         Args:
-            task_dict: Dict with 'bank_id' and either:
-                       - 'entity_ids' (list): Process multiple entities
-                       - 'entity_id', 'entity_name': Process single entity (legacy)
+            bank_id: Bank identifier
+            mental_model_id: Pinned mental model UUID
+            request_context: Request context for authentication
-        Raises:
-            ValueError: If required fields are missing
-            Exception: Any exception from regenerate_entity_observations (propagates to execute_task for retry)
+        Returns:
+            True if deleted, False if not found
         """
-        bank_id = task_dict.get("bank_id")
-        # Use internal request context for background tasks
-        from hindsight_api.models import RequestContext
+        await self._authenticate_tenant(request_context)
+        pool = await self._get_pool()
-        internal_context = RequestContext()
+        async with acquire_with_retry(pool) as conn:
+            result = await conn.execute(
+                f"DELETE FROM {fq_table('mental_models')} WHERE bank_id = $1 AND id = $2",
+                bank_id,
+                mental_model_id,
+            )
-        # New format: multiple entity_ids
-        if "entity_ids" in task_dict:
-            entity_ids = task_dict.get("entity_ids", [])
-            min_facts = task_dict.get("min_facts", 5)
+        return result == "DELETE 1"
-            if not bank_id or not entity_ids:
-                raise ValueError(f"[OBSERVATIONS] Missing required fields in task: {task_dict}")
+    def _row_to_mental_model(self, row) -> dict[str, Any]:
+        """Convert a database row to a mental model dict."""
+        reflect_response = row.get("reflect_response")
+        # Parse JSON string to dict if needed (asyncpg may return JSONB as string)
+        if isinstance(reflect_response, str):
+            try:
+                reflect_response = json.loads(reflect_response)
+            except json.JSONDecodeError:
+                reflect_response = None
+        trigger = row.get("trigger")
+        if isinstance(trigger, str):
+            try:
+                trigger = json.loads(trigger)
+            except json.JSONDecodeError:
+                trigger = None
+        return {
+            "id": str(row["id"]),
+            "bank_id": row["bank_id"],
+            "name": row["name"],
+            "source_query": row["source_query"],
+            "content": row["content"],
+            "tags": row["tags"] or [],
+            "max_tokens": row.get("max_tokens"),
+            "trigger": trigger,
+            "last_refreshed_at": row["last_refreshed_at"].isoformat() if row["last_refreshed_at"] else None,
+            "created_at": row["created_at"].isoformat() if row["created_at"] else None,
+            "reflect_response": reflect_response,
+        }
-            # Process each entity
-            pool = await self._get_pool()
-            async with pool.acquire() as conn:
-                for entity_id in entity_ids:
-                    try:
-                        # Fetch entity name and check fact count
-                        import uuid as uuid_module
+    # =========================================================================
+    # Directives - Hard rules injected into prompts
+    # =========================================================================
-                        entity_uuid = uuid_module.UUID(entity_id) if isinstance(entity_id, str) else entity_id
+    async def list_directives(
+        self,
+        bank_id: str,
+        *,
+        tags: list[str] | None = None,
+        tags_match: str = "any",
+        active_only: bool = True,
+        limit: int = 100,
+        offset: int = 0,
+        request_context: "RequestContext",
+    ) -> list[dict[str, Any]]:
+        """List directives for a bank.
-                        # First check if entity exists
-                        entity_exists = await conn.fetchrow(
-                            f"SELECT canonical_name FROM {fq_table('entities')} WHERE id = $1 AND bank_id = $2",
-                            entity_uuid,
-                            bank_id,
-                        )
+        Args:
+            bank_id: Bank identifier
+            tags: Optional tags to filter by
+            tags_match: How to match tags - 'any', 'all', or 'exact'
+            active_only: Only return active directives (default True)
+            limit: Maximum number of results
+            offset: Offset for pagination
+            request_context: Request context for authentication
-                        if not entity_exists:
-                            logger.debug(f"[OBSERVATIONS] Entity {entity_id} not yet in bank {bank_id}, skipping")
-                            continue
+        Returns:
+            List of directive dicts
+        """
+        await self._authenticate_tenant(request_context)
+        pool = await self._get_pool()
-                        entity_name = entity_exists["canonical_name"]
+        async with acquire_with_retry(pool) as conn:
+            # Build filters
+            filters = ["bank_id = $1"]
+            params: list[Any] = [bank_id]
+            param_idx = 2
+            if active_only:
+                filters.append("is_active = TRUE")
+            if tags:
+                if tags_match == "all":
+                    filters.append(f"tags @> ${param_idx}::varchar[]")
+                elif tags_match == "exact":
+                    filters.append(f"tags = ${param_idx}::varchar[]")
+                else:  # any
+                    filters.append(f"tags && ${param_idx}::varchar[]")
+                params.append(tags)
+                param_idx += 1
+            params.extend([limit, offset])
-                        # Count facts linked to this entity
-                        fact_count = (
-                            await conn.fetchval(
-                                f"SELECT COUNT(*) FROM {fq_table('unit_entities')} WHERE entity_id = $1",
-                                entity_uuid,
-                            )
-                            or 0
-                        )
+            rows = await conn.fetch(
+                f"""
+                SELECT id, bank_id, name, content, priority, is_active, tags, created_at, updated_at
+                FROM {fq_table("directives")}
+                WHERE {" AND ".join(filters)}
+                ORDER BY priority DESC, created_at DESC
+                LIMIT ${param_idx} OFFSET ${param_idx + 1}
+                """,
+                *params,
+            )
-                        # Only regenerate if entity has enough facts
-                        if fact_count >= min_facts:
-                            await self.regenerate_entity_observations(
-                                bank_id, entity_id, entity_name, version=None, request_context=internal_context
-                            )
-                        else:
-                            logger.debug(
-                                f"[OBSERVATIONS] Skipping {entity_name} ({fact_count} facts < {min_facts} threshold)"
-                            )
+            return [self._row_to_directive(row) for row in rows]
-                    except Exception as e:
-                        # Log but continue processing other entities - individual entity failures
-                        # shouldn't fail the whole batch
-                        logger.error(f"[OBSERVATIONS] Error processing entity {entity_id}: {e}")
-                        continue
+    async def get_directive(
+        self,
+        bank_id: str,
+        directive_id: str,
+        *,
+        request_context: "RequestContext",
+    ) -> dict[str, Any] | None:
+        """Get a single directive by ID.
-        # Legacy format: single entity
-        else:
-            entity_id = task_dict.get("entity_id")
-            entity_name = task_dict.get("entity_name")
-            version = task_dict.get("version")
+        Args:
+            bank_id: Bank identifier
+            directive_id: Directive UUID
+            request_context: Request context for authentication
-            if not all([bank_id, entity_id, entity_name]):
-                raise ValueError(f"[OBSERVATIONS] Missing required fields in task: {task_dict}")
+        Returns:
+            Directive dict or None if not found
+        """
+        await self._authenticate_tenant(request_context)
+        pool = await self._get_pool()
-            # Type assertions after validation
-            assert isinstance(bank_id, str) and isinstance(entity_id, str) and isinstance(entity_name, str)
-            await self.regenerate_entity_observations(
-                bank_id, entity_id, entity_name, version=version, request_context=internal_context
+        async with acquire_with_retry(pool) as conn:
+            row = await conn.fetchrow(
+                f"""
+                SELECT id, bank_id, name, content, priority, is_active, tags, created_at, updated_at
+                FROM {fq_table("directives")}
+                WHERE bank_id = $1 AND id = $2
+                """,
+                bank_id,
+                directive_id,
             )
-    # =========================================================================
-    # Statistics & Operations (for HTTP API layer)
-    # =========================================================================
+            return self._row_to_directive(row) if row else None
-    async def get_bank_stats(
+    async def create_directive(
         self,
         bank_id: str,
+        name: str,
+        content: str,
         *,
+        priority: int = 0,
+        is_active: bool = True,
+        tags: list[str] | None = None,
         request_context: "RequestContext",
     ) -> dict[str, Any]:
-        """Get statistics about memory nodes and links for a bank."""
+        """Create a new directive.
+        Args:
+            bank_id: Bank identifier
+            name: Human-readable name for the directive
+            content: The directive text to inject into prompts
+            priority: Higher priority directives are injected first (default 0)
+            is_active: Whether this directive is active (default True)
+            tags: Optional tags for filtering
+            request_context: Request context for authentication
+        Returns:
+            The created directive dict
+        """
         await self._authenticate_tenant(request_context)
         pool = await self._get_pool()
         async with acquire_with_retry(pool) as conn:
-            # Get node counts by fact_type
-            node_stats = await conn.fetch(
+            row = await conn.fetchrow(
                 f"""
-                SELECT fact_type, COUNT(*) as count
-                FROM {fq_table("memory_units")}
-                WHERE bank_id = $1
-                GROUP BY fact_type
+                INSERT INTO {fq_table("directives")}
+                (bank_id, name, content, priority, is_active, tags)
+                VALUES ($1, $2, $3, $4, $5, $6)
+                RETURNING id, bank_id, name, content, priority, is_active, tags, created_at, updated_at
                 """,
                 bank_id,
+                name,
+                content,
+                priority,
+                is_active,
+                tags or [],
             )
-            # Get link counts by link_type
-            link_stats = await conn.fetch(
-                f"""
-                SELECT ml.link_type, COUNT(*) as count
-                FROM {fq_table("memory_links")} ml
-                JOIN {fq_table("memory_units")} mu ON ml.from_unit_id = mu.id
-                WHERE mu.bank_id = $1
-                GROUP BY ml.link_type
-                """,
-                bank_id,
-            )
+        logger.info(f"[DIRECTIVES] Created directive '{name}' for bank {bank_id}")
+        return self._row_to_directive(row)
-            # Get link counts by fact_type (from nodes)
-            link_fact_type_stats = await conn.fetch(
-                f"""
-                SELECT mu.fact_type, COUNT(*) as count
-                FROM {fq_table("memory_links")} ml
-                JOIN {fq_table("memory_units")} mu ON ml.from_unit_id = mu.id
-                WHERE mu.bank_id = $1
-                GROUP BY mu.fact_type
-                """,
-                bank_id,
-            )
+    async def update_directive(
+        self,
+        bank_id: str,
+        directive_id: str,
+        *,
+        name: str | None = None,
+        content: str | None = None,
+        priority: int | None = None,
+        is_active: bool | None = None,
+        tags: list[str] | None = None,
+        request_context: "RequestContext",
+    ) -> dict[str, Any] | None:
+        """Update a directive.
-            # Get link counts by fact_type AND link_type
-            link_breakdown_stats = await conn.fetch(
-                f"""
-                SELECT mu.fact_type, ml.link_type, COUNT(*) as count
-                FROM {fq_table("memory_links")} ml
-                JOIN {fq_table("memory_units")} mu ON ml.from_unit_id = mu.id
-                WHERE mu.bank_id = $1
-                GROUP BY mu.fact_type, ml.link_type
-                """,
-                bank_id,
-            )
+        Args:
+            bank_id: Bank identifier
+            directive_id: Directive UUID
+            name: New name (optional)
+            content: New content (optional)
+            priority: New priority (optional)
+            is_active: New active status (optional)
+            tags: New tags (optional)
+            request_context: Request context for authentication
-            # Get pending and failed operations counts
-            ops_stats = await conn.fetch(
+        Returns:
+            Updated directive dict or None if not found
+        """
+        await self._authenticate_tenant(request_context)
+        pool = await self._get_pool()
+        # Build update query dynamically
+        updates = ["updated_at = now()"]
+        params: list[Any] = []
+        param_idx = 1
+        if name is not None:
+            updates.append(f"name = ${param_idx}")
+            params.append(name)
+            param_idx += 1
+        if content is not None:
+            updates.append(f"content = ${param_idx}")
+            params.append(content)
+            param_idx += 1
+        if priority is not None:
+            updates.append(f"priority = ${param_idx}")
+            params.append(priority)
+            param_idx += 1
+        if is_active is not None:
+            updates.append(f"is_active = ${param_idx}")
+            params.append(is_active)
+            param_idx += 1
+        if tags is not None:
+            updates.append(f"tags = ${param_idx}")
+            params.append(tags)
+            param_idx += 1
+        params.extend([bank_id, directive_id])
+        async with acquire_with_retry(pool) as conn:
+            row = await conn.fetchrow(
                 f"""
-                SELECT status, COUNT(*) as count
-                FROM {fq_table("async_operations")}
-                WHERE bank_id = $1
-                GROUP BY status
+                UPDATE {fq_table("directives")}
+                SET {", ".join(updates)}
+                WHERE bank_id = ${param_idx} AND id = ${param_idx + 1}
+                RETURNING id, bank_id, name, content, priority, is_active, tags, created_at, updated_at
                 """,
-                bank_id,
+                *params,
             )
-            return {
-                "bank_id": bank_id,
-                "node_counts": {row["fact_type"]: row["count"] for row in node_stats},
-                "link_counts": {row["link_type"]: row["count"] for row in link_stats},
-                "link_counts_by_fact_type": {row["fact_type"]: row["count"] for row in link_fact_type_stats},
-                "link_breakdown": [
-                    {"fact_type": row["fact_type"], "link_type": row["link_type"], "count": row["count"]}
-                    for row in link_breakdown_stats
-                ],
-                "operations": {row["status"]: row["count"] for row in ops_stats},
-            }
+            return self._row_to_directive(row) if row else None
-    async def get_entity(
+    async def delete_directive(
         self,
         bank_id: str,
-        entity_id: str,
+        directive_id: str,
         *,
         request_context: "RequestContext",
-    ) -> dict[str, Any] | None:
-        """Get entity details including metadata and observations."""
+    ) -> bool:
+        """Delete a directive.
+        Args:
+            bank_id: Bank identifier
+            directive_id: Directive UUID
+            request_context: Request context for authentication
+        Returns:
+            True if deleted, False if not found
+        """
         await self._authenticate_tenant(request_context)
         pool = await self._get_pool()
         async with acquire_with_retry(pool) as conn:
-            entity_row = await conn.fetchrow(
-                f"""
-                SELECT id, canonical_name, mention_count, first_seen, last_seen, metadata
-                FROM {fq_table("entities")}
-                WHERE bank_id = $1 AND id = $2
-                """,
+            result = await conn.execute(
+                f"DELETE FROM {fq_table('directives')} WHERE bank_id = $1 AND id = $2",
                 bank_id,
-                uuid.UUID(entity_id),
+                directive_id,
             )
-        if not entity_row:
-            return None
-        # Get observations for the entity
-        observations = await self.get_entity_observations(bank_id, entity_id, limit=20, request_context=request_context)
+        return result == "DELETE 1"
+    def _row_to_directive(self, row) -> dict[str, Any]:
+        """Convert a database row to a directive dict."""
         return {
-            "id": str(entity_row["id"]),
-            "canonical_name": entity_row["canonical_name"],
-            "mention_count": entity_row["mention_count"],
-            "first_seen": entity_row["first_seen"].isoformat() if entity_row["first_seen"] else None,
-            "last_seen": entity_row["last_seen"].isoformat() if entity_row["last_seen"] else None,
-            "metadata": entity_row["metadata"] or {},
-            "observations": observations,
+            "id": str(row["id"]),
+            "bank_id": row["bank_id"],
+            "name": row["name"],
+            "content": row["content"],
+            "priority": row["priority"],
+            "is_active": row["is_active"],
+            "tags": row["tags"] or [],
+            "created_at": row["created_at"].isoformat() if row["created_at"] else None,
+            "updated_at": row["updated_at"].isoformat() if row["updated_at"] else None,
         }
     async def list_operations(
         self,
         bank_id: str,
         *,
+        status: str | None = None,
+        limit: int = 20,
+        offset: int = 0,
         request_context: "RequestContext",
-    ) -> list[dict[str, Any]]:
-        """List async operations for a bank."""
+    ) -> dict[str, Any]:
+        """List async operations for a bank with optional filtering and pagination.
+        Args:
+            bank_id: Bank identifier
+            status: Optional status filter (pending, completed, failed)
+            limit: Maximum number of operations to return (default 20)
+            offset: Number of operations to skip (default 0)
+            request_context: Request context for authentication
+        Returns:
+            Dict with total count and list of operations, sorted by most recent first
+        """
         await self._authenticate_tenant(request_context)
         pool = await self._get_pool()
         async with acquire_with_retry(pool) as conn:
+            # Build WHERE clause
+            where_conditions = ["bank_id = $1"]
+            params: list[Any] = [bank_id]
+            if status:
+                # Map API status to DB statuses (pending includes processing)
+                if status == "pending":
+                    where_conditions.append("status IN ('pending', 'processing')")
+                else:
+                    where_conditions.append(f"status = ${len(params) + 1}")
+                    params.append(status)
+            where_clause = " AND ".join(where_conditions)
+            # Get total count (with filter)
+            total_row = await conn.fetchrow(
+                f"SELECT COUNT(*) as total FROM {fq_table('async_operations')} WHERE {where_clause}",
+                *params,
+            )
+            total = total_row["total"] if total_row else 0
+            # Get operations with pagination
             operations = await conn.fetch(
                 f"""
-                SELECT operation_id, bank_id, operation_type, created_at, status, error_message, result_metadata
+                SELECT operation_id, operation_type, created_at, status, error_message
                 FROM {fq_table("async_operations")}
-                WHERE bank_id = $1
+                WHERE {where_clause}
                 ORDER BY created_at DESC
+                LIMIT ${len(params) + 1} OFFSET ${len(params) + 2}
                 """,
-                bank_id,
+                *params,
+                limit,
+                offset,
             )
-            def parse_metadata(metadata):
-                if metadata is None:
-                    return {}
-                if isinstance(metadata, str):
-                    import json
+            return {
+                "total": total,
+                "operations": [
+                    {
+                        "id": str(row["operation_id"]),
+                        "task_type": row["operation_type"],
+                        "items_count": 0,
+                        "document_id": None,
+                        "created_at": row["created_at"].isoformat(),
+                        # Map DB status to API status (processing -> pending for simplicity)
+                        "status": "pending" if row["status"] in ("pending", "processing") else row["status"],
+                        "error_message": row["error_message"],
+                    }
+                    for row in operations
+                ],
+            }
-                    return json.loads(metadata)
-                return metadata
+    async def get_operation_status(
+        self,
+        bank_id: str,
+        operation_id: str,
+        *,
+        request_context: "RequestContext",
+    ) -> dict[str, Any]:
+        """Get the status of a specific async operation.
-            return [
-                {
-                    "id": str(row["operation_id"]),
-                    "task_type": row["operation_type"],
-                    "items_count": parse_metadata(row["result_metadata"]).get("items_count", 0),
-                    "document_id": parse_metadata(row["result_metadata"]).get("document_id"),
-                    "created_at": row["created_at"].isoformat(),
-                    "status": row["status"],
+        Returns:
+            - status: "pending", "completed", or "failed"
+            - updated_at: last update timestamp
+            - completed_at: completion timestamp (if completed)
+        """
+        await self._authenticate_tenant(request_context)
+        pool = await self._get_pool()
+        op_uuid = uuid.UUID(operation_id)
+        async with acquire_with_retry(pool) as conn:
+            row = await conn.fetchrow(
+                f"""
+                SELECT operation_id, operation_type, created_at, updated_at, completed_at, status, error_message
+                FROM {fq_table("async_operations")}
+                WHERE operation_id = $1 AND bank_id = $2
+                """,
+                op_uuid,
+                bank_id,
+            )
+            if row:
+                # Map DB status to API status (processing -> pending for simplicity)
+                db_status = row["status"]
+                api_status = "pending" if db_status in ("pending", "processing") else db_status
+                return {
+                    "operation_id": operation_id,
+                    "status": api_status,
+                    "operation_type": row["operation_type"],
+                    "created_at": row["created_at"].isoformat() if row["created_at"] else None,
+                    "updated_at": row["updated_at"].isoformat() if row["updated_at"] else None,
+                    "completed_at": row["completed_at"].isoformat() if row["completed_at"] else None,
                     "error_message": row["error_message"],
                 }
-                for row in operations
-            ]
+            else:
+                # Operation not found
+                return {
+                    "operation_id": operation_id,
+                    "status": "not_found",
+                    "operation_type": None,
+                    "created_at": None,
+                    "updated_at": None,
+                    "completed_at": None,
+                    "error_message": None,
+                }
     async def cancel_operation(
         self,
@@ -4022,10 +5381,10 @@ Guidelines:
         bank_id: str,
         *,
         name: str | None = None,
-        background: str | None = None,
+        mission: str | None = None,
         request_context: "RequestContext",
     ) -> dict[str, Any]:
-        """Update bank name and/or background."""
+        """Update bank name and/or mission."""
         await self._authenticate_tenant(request_context)
         pool = await self._get_pool()
@@ -4041,33 +5400,72 @@ Guidelines:
                     name,
                 )
-            if background is not None:
+            if mission is not None:
                 await conn.execute(
                     f"""
                     UPDATE {fq_table("banks")}
-                    SET background = $2, updated_at = NOW()
+                    SET mission = $2, updated_at = NOW()
                     WHERE bank_id = $1
                     """,
                     bank_id,
-                    background,
+                    mission,
                 )
         # Return updated profile
         return await self.get_bank_profile(bank_id, request_context=request_context)
-    async def submit_async_retain(
+    async def _submit_async_operation(
         self,
         bank_id: str,
-        contents: list[dict[str, Any]],
+        operation_type: str,
+        task_type: str,
+        task_payload: dict[str, Any],
         *,
-        request_context: "RequestContext",
+        result_metadata: dict[str, Any] | None = None,
+        dedupe_by_bank: bool = False,
     ) -> dict[str, Any]:
-        """Submit a batch retain operation to run asynchronously."""
-        await self._authenticate_tenant(request_context)
-        pool = await self._get_pool()
+        """Generic helper to submit an async operation.
+        Args:
+            bank_id: Bank identifier
+            operation_type: Operation type for the async_operations record (e.g., 'consolidation', 'retain')
+            task_type: Task type for the task payload (e.g., 'consolidation', 'batch_retain')
+            task_payload: Additional task payload fields (operation_id and bank_id are added automatically)
+            result_metadata: Optional metadata to store with the operation record
+            dedupe_by_bank: If True, skip creating a new task if one is already pending for this bank+operation_type
+        Returns:
+            Dict with operation_id and optionally deduplicated=True if an existing task was found
+        """
         import json
+        pool = await self._get_pool()
+        # Check for existing pending task if deduplication is enabled
+        # Note: We only check 'pending', not 'processing', because a processing task
+        # uses a watermark from when it started - new memories added after that point
+        # would need another consolidation run to be processed.
+        if dedupe_by_bank:
+            async with acquire_with_retry(pool) as conn:
+                existing = await conn.fetchrow(
+                    f"""
+                    SELECT operation_id FROM {fq_table("async_operations")}
+                    WHERE bank_id = $1 AND operation_type = $2 AND status = 'pending'
+                    LIMIT 1
+                    """,
+                    bank_id,
+                    operation_type,
+                )
+                if existing:
+                    logger.debug(
+                        f"{operation_type} task already pending for bank_id={bank_id}, "
+                        f"skipping duplicate (existing operation_id={existing['operation_id']})"
+                    )
+                    return {
+                        "operation_id": str(existing["operation_id"]),
+                        "deduplicated": True,
+                    }
         operation_id = uuid.uuid4()
         # Insert operation record into database
@@ -4079,23 +5477,113 @@ Guidelines:
                 """,
                 operation_id,
                 bank_id,
-                "retain",
-                json.dumps({"items_count": len(contents)}),
+                operation_type,
+                json.dumps(result_metadata or {}),
             )
-        # Submit task to background queue
-        await self._task_backend.submit_task(
-            {
-                "type": "batch_retain",
-                "operation_id": str(operation_id),
-                "bank_id": bank_id,
-                "contents": contents,
-            }
-        )
+        # Build and submit task payload
+        full_payload = {
+            "type": task_type,
+            "operation_id": str(operation_id),
+            "bank_id": bank_id,
+            **task_payload,
+        }
+        await self._task_backend.submit_task(full_payload)
-        logger.info(f"Retain task queued for bank_id={bank_id}, {len(contents)} items, operation_id={operation_id}")
+        logger.info(f"{operation_type} task queued for bank_id={bank_id}, operation_id={operation_id}")
         return {
             "operation_id": str(operation_id),
-            "items_count": len(contents),
         }
+    async def submit_async_retain(
+        self,
+        bank_id: str,
+        contents: list[dict[str, Any]],
+        *,
+        request_context: "RequestContext",
+        document_tags: list[str] | None = None,
+    ) -> dict[str, Any]:
+        """Submit a batch retain operation to run asynchronously."""
+        await self._authenticate_tenant(request_context)
+        task_payload: dict[str, Any] = {"contents": contents}
+        if document_tags:
+            task_payload["document_tags"] = document_tags
+        result = await self._submit_async_operation(
+            bank_id=bank_id,
+            operation_type="retain",
+            task_type="batch_retain",
+            task_payload=task_payload,
+            result_metadata={"items_count": len(contents)},
+            dedupe_by_bank=False,
+        )
+        result["items_count"] = len(contents)
+        return result
+    async def submit_async_consolidation(
+        self,
+        bank_id: str,
+        *,
+        request_context: "RequestContext",
+    ) -> dict[str, Any]:
+        """Submit a consolidation operation to run asynchronously.
+        Deduplicates by bank_id - if there's already a pending consolidation for this bank,
+        returns the existing operation_id instead of creating a new one.
+        Args:
+            bank_id: Bank identifier
+            request_context: Request context for authentication
+        Returns:
+            Dict with operation_id
+        """
+        await self._authenticate_tenant(request_context)
+        return await self._submit_async_operation(
+            bank_id=bank_id,
+            operation_type="consolidation",
+            task_type="consolidation",
+            task_payload={},
+            dedupe_by_bank=True,
+        )
+    async def submit_async_refresh_mental_model(
+        self,
+        bank_id: str,
+        mental_model_id: str,
+        *,
+        request_context: "RequestContext",
+    ) -> dict[str, Any]:
+        """Submit an async mental model refresh operation.
+        This schedules a background task to re-run the source query and update the content.
+        Args:
+            bank_id: Bank identifier
+            mental_model_id: Mental model UUID to refresh
+            request_context: Request context for authentication
+        Returns:
+            Dict with operation_id
+        """
+        await self._authenticate_tenant(request_context)
+        # Verify mental model exists
+        mental_model = await self.get_mental_model(bank_id, mental_model_id, request_context=request_context)
+        if not mental_model:
+            raise ValueError(f"Mental model {mental_model_id} not found in bank {bank_id}")
+        return await self._submit_async_operation(
+            bank_id=bank_id,
+            operation_type="refresh_mental_model",
+            task_type="refresh_mental_model",
+            task_payload={
+                "mental_model_id": mental_model_id,
+            },
+            result_metadata={"mental_model_id": mental_model_id, "name": mental_model["name"]},
+            dedupe_by_bank=False,
+        )

hindsight-api 0.2.1__py3-none-any.whl → 0.4.0__py3-none-any.whl

hindsight-api 0.2.1py3-none-any.whl → 0.4.0py3-none-any.whl