PyPI - alchemist-nrel - Versions diffs - 0.3.0__py3-none-any.whl → 0.3.2__py3-none-any.whl - Mend

alchemist-nrel 0.3.0py3-none-any.whl → 0.3.2py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (37) hide show

alchemist_core/__init__.py +2 -2
alchemist_core/acquisition/botorch_acquisition.py +84 -126
alchemist_core/data/experiment_manager.py +196 -20
alchemist_core/models/botorch_model.py +292 -63
alchemist_core/models/sklearn_model.py +175 -15
alchemist_core/session.py +3532 -76
alchemist_core/utils/__init__.py +3 -1
alchemist_core/utils/acquisition_utils.py +60 -0
alchemist_core/visualization/__init__.py +45 -0
alchemist_core/visualization/helpers.py +130 -0
alchemist_core/visualization/plots.py +1449 -0
alchemist_nrel-0.3.2.dist-info/METADATA +185 -0
{alchemist_nrel-0.3.0.dist-info → alchemist_nrel-0.3.2.dist-info}/RECORD +34 -29
{alchemist_nrel-0.3.0.dist-info → alchemist_nrel-0.3.2.dist-info}/WHEEL +1 -1
{alchemist_nrel-0.3.0.dist-info → alchemist_nrel-0.3.2.dist-info}/entry_points.txt +1 -1
{alchemist_nrel-0.3.0.dist-info → alchemist_nrel-0.3.2.dist-info}/top_level.txt +0 -1
api/example_client.py +7 -2
api/main.py +3 -2
api/models/requests.py +76 -1
api/models/responses.py +102 -2
api/routers/acquisition.py +25 -0
api/routers/experiments.py +352 -11
api/routers/sessions.py +195 -11
api/routers/visualizations.py +6 -4
api/routers/websocket.py +132 -0
run_api.py → api/run_api.py +8 -7
api/services/session_store.py +370 -71
api/static/assets/index-B6Cf6s_b.css +1 -0
api/static/assets/{index-C0_glioA.js → index-B7njvc9r.js} +223 -208
api/static/index.html +2 -2
ui/gpr_panel.py +11 -5
ui/target_column_dialog.py +299 -0
ui/ui.py +52 -5
alchemist_core/models/ax_model.py +0 -159
alchemist_nrel-0.3.0.dist-info/METADATA +0 -223
api/static/assets/index-CB4V1LI5.css +0 -1
{alchemist_nrel-0.3.0.dist-info → alchemist_nrel-0.3.2.dist-info}/licenses/LICENSE +0 -0

alchemist_core/session.py CHANGED Viewed

@@ -4,7 +4,7 @@ Optimization Session API - High-level interface for Bayesian optimization workfl
 This module provides the main entry point for using ALchemist as a headless library.
 """
-from typing import Optional, Dict, Any, List, Tuple, Callable
+from typing import Optional, Dict, Any, List, Tuple, Callable, Union, Literal
 import pandas as pd
 import numpy as np
 import json
@@ -16,6 +16,30 @@ from alchemist_core.events import EventEmitter
 from alchemist_core.config import get_logger
 from alchemist_core.audit_log import AuditLog, SessionMetadata, AuditEntry
+# Optional matplotlib import for visualization methods
+try:
+    import matplotlib.pyplot as plt
+    from matplotlib.figure import Figure
+    _HAS_MATPLOTLIB = True
+except ImportError:
+    _HAS_MATPLOTLIB = False
+    Figure = None  # Type hint placeholder
+# Import visualization functions (delegates to visualization module)
+try:
+    from alchemist_core.visualization import (
+        create_parity_plot,
+        create_contour_plot,
+        create_slice_plot,
+        create_metrics_plot,
+        create_qq_plot,
+        create_calibration_plot,
+        check_matplotlib
+    )
+    _HAS_VISUALIZATION = True
+except ImportError:
+    _HAS_VISUALIZATION = False
 logger = get_logger(__name__)
@@ -31,23 +55,23 @@ class OptimizationSession:
     5. Iterate
     Example:
-        >>> from alchemist_core import OptimizationSession
-        >>>
-        >>> # Create session with search space
-        >>> session = OptimizationSession()
-        >>> session.add_variable('temperature', 'real', bounds=(300, 500))
-        >>> session.add_variable('pressure', 'real', bounds=(1, 10))
-        >>> session.add_variable('catalyst', 'categorical', categories=['A', 'B', 'C'])
-        >>>
-        >>> # Load experimental data
-        >>> session.load_data('experiments.csv', target_column='yield')
-        >>>
-        >>> # Train model
-        >>> session.train_model(backend='botorch', kernel='Matern')
-        >>>
-        >>> # Suggest next experiment
-        >>> next_point = session.suggest_next(strategy='EI', goal='maximize')
-        >>> print(next_point)
+        > from alchemist_core import OptimizationSession
+        >
+        > # Create session with search space
+        > session = OptimizationSession()
+        > session.add_variable('temperature', 'real', bounds=(300, 500))
+        > session.add_variable('pressure', 'real', bounds=(1, 10))
+        > session.add_variable('catalyst', 'categorical', categories=['A', 'B', 'C'])
+        >
+        > # Load experimental data
+        > session.load_data('experiments.csv', target_column='yield')
+        >
+        > # Train model
+        > session.train_model(backend='botorch', kernel='Matern')
+        >
+        > # Suggest next experiment
+        > next_point = session.suggest_next(strategy='EI', goal='maximize')
+        > print(next_point)
     """
     def __init__(self, search_space: Optional[SearchSpace] = None,
@@ -79,10 +103,16 @@ class OptimizationSession:
         self.model_backend = None
         self.acquisition = None
+        # Staged experiments (for workflow management)
+        self.staged_experiments = []  # List of experiment dicts awaiting evaluation
+        self.last_suggestions = []  # Most recent acquisition suggestions (for UI)
         # Configuration
         self.config = {
             'random_state': 42,
-            'verbose': True
+            'verbose': True,
+            'auto_train': False,  # Auto-train model after adding experiments
+            'auto_train_threshold': 5  # Minimum experiments before auto-train
         }
         logger.info(f"OptimizationSession initialized: {self.metadata.session_id}")
@@ -103,8 +133,8 @@ class OptimizationSession:
                 - For 'categorical': categories=[list of values] or values=[list]
         Example:
-            >>> session.add_variable('temp', 'real', bounds=(300, 500))
-            >>> session.add_variable('catalyst', 'categorical', categories=['A', 'B'])
+            > session.add_variable('temp', 'real', bounds=(300, 500))
+            > session.add_variable('catalyst', 'categorical', categories=['A', 'B'])
         """
         # Convert user-friendly API to internal format
         params = kwargs.copy()
@@ -185,29 +215,62 @@ class OptimizationSession:
     # Data Management
     # ============================================================
-    def load_data(self, filepath: str, target_column: str = 'Output',
+    def load_data(self, filepath: str, target_columns: Union[str, List[str]] = 'Output',
                   noise_column: Optional[str] = None) -> None:
         """
         Load experimental data from CSV file.
         Args:
             filepath: Path to CSV file
-            target_column: Name of target/output column (default: 'Output')
+            target_columns: Target column name(s). Can be:
+                - String for single-objective: 'yield'
+                - List for multi-objective: ['yield', 'selectivity']
+                Default: 'Output'
             noise_column: Optional column with measurement noise/uncertainty
-        Example:
-            >>> session.load_data('experiments.csv', target_column='yield')
+        Examples:
+            Single-objective:
+            >>> session.load_data('experiments.csv', target_columns='yield')
+            >>> session.load_data('experiments.csv', target_columns=['yield'])  # also works
+            Multi-objective (future):
+            >>> session.load_data('experiments.csv', target_columns=['yield', 'selectivity'])
+        Note:
+            If the CSV doesn't have columns matching target_columns, an error will be raised.
+            Target columns will be preserved with their original names internally.
         """
         # Load the CSV
         import pandas as pd
         df = pd.read_csv(filepath)
-        # Rename target column to 'Output' if different
-        if target_column != 'Output' and target_column in df.columns:
-            df = df.rename(columns={target_column: 'Output'})
+        # Normalize target_columns to list
+        if isinstance(target_columns, str):
+            target_columns_list = [target_columns]
+        else:
+            target_columns_list = list(target_columns)
+        # Validate that all target columns exist
+        missing_cols = [col for col in target_columns_list if col not in df.columns]
+        if missing_cols:
+            raise ValueError(
+                f"Target column(s) {missing_cols} not found in CSV file. "
+                f"Available columns: {list(df.columns)}. "
+                f"Please specify the correct target column name(s) using the target_columns parameter."
+            )
+        # Warn if 'Output' column exists but user specified different target(s)
+        if 'Output' in df.columns and 'Output' not in target_columns_list:
+            logger.warning(
+                f"CSV contains 'Output' column but you specified {target_columns_list}. "
+                f"Using {target_columns_list} as specified."
+            )
-        # Rename noise column to 'Noise' if specified
-        if noise_column and noise_column in df.columns:
+        # Store the target column names for ExperimentManager
+        target_col_internal = target_columns_list
+        # Rename noise column to 'Noise' if specified and different
+        if noise_column and noise_column in df.columns and noise_column != 'Noise':
             df = df.rename(columns={noise_column: 'Noise'})
         # Save to temporary file and load via ExperimentManager
@@ -217,10 +280,12 @@ class OptimizationSession:
             temp_path = tmp.name
         try:
-            self.experiment_manager = ExperimentManager.from_csv(
-                temp_path,
-                self.search_space
+            # Create ExperimentManager with the specified target column(s)
+            self.experiment_manager = ExperimentManager(
+                search_space=self.search_space,
+                target_columns=target_col_internal
             )
+            self.experiment_manager.load_from_csv(temp_path)
         finally:
             # Clean up temp file
             import os
@@ -245,7 +310,7 @@ class OptimizationSession:
             reason: Reason for this experiment (e.g., 'Manual', 'Expected Improvement')
         Example:
-            >>> session.add_experiment(
+            > session.add_experiment(
             ...     inputs={'temperature': 350, 'catalyst': 'A'},
             ...     output=0.85,
             ...     reason='Manual'
@@ -288,6 +353,132 @@ class OptimizationSession:
             'feature_names': list(X.columns)
         }
+    # ============================================================
+    # Staged Experiments (Workflow Management)
+    # ============================================================
+    def add_staged_experiment(self, inputs: Dict[str, Any]) -> None:
+        """
+        Add an experiment to the staging area (awaiting evaluation).
+        Staged experiments are typically suggested by acquisition functions
+        but not yet evaluated. They can be retrieved, evaluated externally,
+        and then added to the dataset with add_experiment().
+        Args:
+            inputs: Dictionary mapping variable names to values
+        Example:
+            > # Generate suggestions and stage them
+            > suggestions = session.suggest_next(n_suggestions=3)
+            > for point in suggestions.to_dict('records'):
+            >     session.add_staged_experiment(point)
+            >
+            > # Later, evaluate and add
+            > staged = session.get_staged_experiments()
+            > for point in staged:
+            >     output = run_experiment(**point)
+            >     session.add_experiment(point, output=output)
+            > session.clear_staged_experiments()
+        """
+        self.staged_experiments.append(inputs)
+        logger.debug(f"Staged experiment: {inputs}")
+        self.events.emit('experiment_staged', {'inputs': inputs})
+    def get_staged_experiments(self) -> List[Dict[str, Any]]:
+        """
+        Get all staged experiments awaiting evaluation.
+        Returns:
+            List of experiment input dictionaries
+        """
+        return self.staged_experiments.copy()
+    def clear_staged_experiments(self) -> int:
+        """
+        Clear all staged experiments.
+        Returns:
+            Number of experiments cleared
+        """
+        count = len(self.staged_experiments)
+        self.staged_experiments.clear()
+        if count > 0:
+            logger.info(f"Cleared {count} staged experiments")
+            self.events.emit('staged_experiments_cleared', {'count': count})
+        return count
+    def move_staged_to_experiments(self, outputs: List[float],
+                                   noises: Optional[List[float]] = None,
+                                   iteration: Optional[int] = None,
+                                   reason: Optional[str] = None) -> int:
+        """
+        Evaluate staged experiments and add them to the dataset in batch.
+        Convenience method that pairs staged inputs with outputs and adds
+        them all to the experiment manager, then clears the staging area.
+        Args:
+            outputs: List of output values (must match length of staged experiments)
+            noises: Optional list of measurement uncertainties
+            iteration: Iteration number for all experiments (auto-assigned if None)
+            reason: Reason for these experiments (e.g., 'Expected Improvement')
+        Returns:
+            Number of experiments added
+        Example:
+            > # Stage some experiments
+            > session.add_staged_experiment({'x': 1.0, 'y': 2.0})
+            > session.add_staged_experiment({'x': 3.0, 'y': 4.0})
+            >
+            > # Evaluate them
+            > outputs = [run_experiment(**point) for point in session.get_staged_experiments()]
+            >
+            > # Add to dataset and clear staging
+            > session.move_staged_to_experiments(outputs, reason='LogEI')
+        """
+        if len(outputs) != len(self.staged_experiments):
+            raise ValueError(
+                f"Number of outputs ({len(outputs)}) must match "
+                f"number of staged experiments ({len(self.staged_experiments)})"
+            )
+        if noises is not None and len(noises) != len(self.staged_experiments):
+            raise ValueError(
+                f"Number of noise values ({len(noises)}) must match "
+                f"number of staged experiments ({len(self.staged_experiments)})"
+            )
+        # Add each experiment
+        for i, inputs in enumerate(self.staged_experiments):
+            noise = noises[i] if noises is not None else None
+            # Strip any metadata fields (prefixed with _) from inputs
+            # These are used for UI/workflow tracking but shouldn't be stored as variables
+            clean_inputs = {k: v for k, v in inputs.items() if not k.startswith('_')}
+            # Use per-experiment reason if stored in _reason, otherwise use batch reason
+            exp_reason = inputs.get('_reason', reason)
+            self.add_experiment(
+                inputs=clean_inputs,
+                output=outputs[i],
+                noise=noise,
+                iteration=iteration,
+                reason=exp_reason
+            )
+        count = len(self.staged_experiments)
+        self.clear_staged_experiments()
+        logger.info(f"Moved {count} staged experiments to dataset")
+        return count
+    # ============================================================
+    # Initial Design Generation
+    # ============================================================
     def generate_initial_design(
         self,
         method: str = "lhs",
@@ -320,16 +511,16 @@ class OptimizationSession:
             List of dictionaries with variable names and values (no outputs)
         Example:
-            >>> # Generate initial design
-            >>> points = session.generate_initial_design('lhs', n_points=10)
-            >>>
-            >>> # Run experiments and add results
-            >>> for point in points:
-            >>>     output = run_experiment(**point)  # Your experiment function
-            >>>     session.add_experiment(point, output=output)
-            >>>
-            >>> # Now ready to train model
-            >>> session.train_model()
+            > # Generate initial design
+            > points = session.generate_initial_design('lhs', n_points=10)
+            >
+            > # Run experiments and add results
+            > for point in points:
+            >     output = run_experiment(**point)  # Your experiment function
+            >     session.add_experiment(point, output=output)
+            >
+            > # Now ready to train model
+            > session.train_model()
         """
         if len(self.search_space.variables) == 0:
             raise ValueError(
@@ -389,8 +580,8 @@ class OptimizationSession:
             Dictionary with training results and hyperparameters
         Example:
-            >>> results = session.train_model(backend='botorch', kernel='Matern')
-            >>> print(results['metrics'])
+            > results = session.train_model(backend='botorch', kernel='Matern')
+            > print(results['metrics'])
         """
         df = self.experiment_manager.get_data()
         if df is None or df.empty:
@@ -410,6 +601,27 @@ class OptimizationSession:
         # Extract calibration_enabled before passing kwargs to model constructor
         calibration_enabled = kwargs.pop('calibration_enabled', False)
+        # Validate and map transform types based on backend
+        # BoTorch uses: 'normalize', 'standardize'
+        # Sklearn uses: 'minmax', 'standard', 'robust', 'none'
+        if self.model_backend == 'sklearn':
+            # Map BoTorch transform types to sklearn equivalents
+            transform_map = {
+                'normalize': 'minmax',      # BoTorch normalize → sklearn minmax
+                'standardize': 'standard',  # BoTorch standardize → sklearn standard
+                'none': 'none'
+            }
+            if 'input_transform_type' in kwargs:
+                original = kwargs['input_transform_type']
+                kwargs['input_transform_type'] = transform_map.get(original, original)
+                if original != kwargs['input_transform_type']:
+                    logger.debug(f"Mapped input transform '{original}' → '{kwargs['input_transform_type']}' for sklearn")
+            if 'output_transform_type' in kwargs:
+                original = kwargs['output_transform_type']
+                kwargs['output_transform_type'] = transform_map.get(original, original)
+                if original != kwargs['output_transform_type']:
+                    logger.debug(f"Mapped output transform '{original}' → '{kwargs['output_transform_type']}' for sklearn")
         # Import appropriate model class
         if self.model_backend == 'sklearn':
             from alchemist_core.models.sklearn_model import SklearnModel
@@ -428,6 +640,15 @@ class OptimizationSession:
         elif self.model_backend == 'botorch':
             from alchemist_core.models.botorch_model import BoTorchModel
+            # Apply sensible defaults for BoTorch if not explicitly overridden
+            # Input normalization and output standardization are critical for performance
+            if 'input_transform_type' not in kwargs:
+                kwargs['input_transform_type'] = 'normalize'
+                logger.debug("Auto-applying input normalization for BoTorch model")
+            if 'output_transform_type' not in kwargs:
+                kwargs['output_transform_type'] = 'standardize'
+                logger.debug("Auto-applying output standardization for BoTorch model")
             # Build kernel options - BoTorch uses 'cont_kernel_type' not 'kernel_type'
             kernel_options = {'cont_kernel_type': kernel}
             if kernel_params:
@@ -439,6 +660,18 @@ class OptimizationSession:
                     if k != 'nu':  # Already handled above
                         kernel_options[k] = v
+            # Identify categorical variable indices for BoTorch
+            # Only compute if not already provided in kwargs (e.g., from UI)
+            if 'cat_dims' not in kwargs:
+                cat_dims = []
+                categorical_var_names = self.search_space.get_categorical_variables()
+                if categorical_var_names:
+                    # Get the column order from search space
+                    all_var_names = self.search_space.get_variable_names()
+                    cat_dims = [i for i, name in enumerate(all_var_names) if name in categorical_var_names]
+                    logger.debug(f"Categorical dimensions for BoTorch: {cat_dims} (variables: {categorical_var_names})")
+                kwargs['cat_dims'] = cat_dims if cat_dims else None
             self.model = BoTorchModel(
                 kernel_options=kernel_options,
                 random_state=self.config['random_state'],
@@ -589,21 +822,57 @@ class OptimizationSession:
         Suggest next experiment(s) using acquisition function.
         Args:
-            strategy: Acquisition strategy ('EI', 'PI', 'UCB', 'qEI', etc.)
+            strategy: Acquisition strategy
+                - 'EI': Expected Improvement
+                - 'PI': Probability of Improvement
+                - 'UCB': Upper Confidence Bound
+                - 'LogEI': Log Expected Improvement (BoTorch only)
+                - 'LogPI': Log Probability of Improvement (BoTorch only)
+                - 'qEI', 'qUCB', 'qIPV': Batch acquisition (BoTorch only)
             goal: 'maximize' or 'minimize'
             n_suggestions: Number of suggestions (batch acquisition)
-            **kwargs: Strategy-specific parameters
+            **kwargs: Strategy-specific parameters:
+                **Sklearn backend:**
+                - xi (float): Exploration parameter for EI/PI (default: 0.01)
+                  Higher values favor exploration over exploitation
+                - kappa (float): Exploration parameter for UCB (default: 1.96)
+                  Higher values favor exploration (typically 1.96 for 95% CI)
+                **BoTorch backend:**
+                - beta (float): Exploration parameter for UCB (default: 0.5)
+                  Trades off mean vs. variance (higher = more exploration)
+                - mc_samples (int): Monte Carlo samples for batch acquisition (default: 128)
         Returns:
             DataFrame with suggested experiment(s)
-        Example:
-            >>> next_point = session.suggest_next(strategy='EI', goal='maximize')
-            >>> print(next_point)
+        Examples:
+            >>> # Expected Improvement with custom exploration
+            >>> next_point = session.suggest_next(strategy='EI', goal='maximize', xi=0.05)
+            >>> # Upper Confidence Bound with high exploration
+            >>> next_point = session.suggest_next(strategy='UCB', goal='maximize', kappa=2.5)
+            >>> # BoTorch UCB with beta parameter
+            >>> next_point = session.suggest_next(strategy='UCB', goal='maximize', beta=1.0)
         """
         if self.model is None:
             raise ValueError("No trained model available. Use train_model() first.")
+        # Validate and log kwargs
+        supported_kwargs = self._get_supported_kwargs(strategy, self.model_backend)
+        if kwargs:
+            unsupported = set(kwargs.keys()) - supported_kwargs
+            if unsupported:
+                logger.warning(
+                    f"Unsupported parameters for {strategy} with {self.model_backend} backend: "
+                    f"{unsupported}. Supported parameters: {supported_kwargs or 'none'}"
+                )
+            used_kwargs = {k: v for k, v in kwargs.items() if k in supported_kwargs}
+            if used_kwargs:
+                logger.info(f"Using acquisition parameters: {used_kwargs}")
         # Import appropriate acquisition class
         if self.model_backend == 'sklearn':
             from alchemist_core.acquisition.skopt_acquisition import SkoptAcquisition
@@ -629,10 +898,14 @@ class OptimizationSession:
                 search_space=self.search_space,
                 acq_func=strategy,
                 maximize=(goal.lower() == 'maximize'),
-                batch_size=n_suggestions
+                batch_size=n_suggestions,
+                acq_func_kwargs=kwargs  # FIX: Pass kwargs to BoTorch acquisition!
             )
-        logger.info(f"Running acquisition: {strategy} ({goal})")
+        # Check if this is a pure exploration acquisition (doesn't use best_f)
+        is_exploratory = strategy.lower() in ['qnipv', 'qipv']
+        goal_desc = 'pure exploration' if is_exploratory else goal
+        logger.info(f"Running acquisition: {strategy} ({goal_desc})")
         self.events.emit('acquisition_started', {'strategy': strategy, 'goal': goal})
         # Get suggestion
@@ -663,14 +936,119 @@ class OptimizationSession:
         logger.info(f"Suggested point: {suggestion_dict}")
         self.events.emit('acquisition_completed', {'suggestion': suggestion_dict})
-        # Cache suggestion info for audit log
+        # Store suggestions for UI/API access
+        self.last_suggestions = result_df.to_dict('records')
+        # Cache suggestion info for audit log and visualization
         self._last_acquisition_info = {
             'strategy': strategy,
             'goal': goal,
             'parameters': kwargs
         }
+        self._last_acq_func = strategy.lower()
+        self._last_goal = goal.lower()
+        return result_df
+    def _get_supported_kwargs(self, strategy: str, backend: str) -> set:
+        """
+        Return supported kwargs for given acquisition strategy and backend.
+        Args:
+            strategy: Acquisition strategy name
+            backend: Model backend ('sklearn' or 'botorch')
+        Returns:
+            Set of supported kwarg names
+        """
+        strategy_lower = strategy.lower()
+        if backend == 'sklearn':
+            if strategy_lower in ['ei', 'pi', 'expectedimprovement', 'probabilityofimprovement']:
+                return {'xi'}
+            elif strategy_lower in ['ucb', 'lcb', 'upperconfidencebound', 'lowerconfidencebound']:
+                return {'kappa'}
+            elif strategy_lower == 'gp_hedge':
+                return {'xi', 'kappa'}
+        elif backend == 'botorch':
+            if strategy_lower in ['ei', 'logei', 'pi', 'logpi', 'expectedimprovement', 'probabilityofimprovement']:
+                return set()  # No additional parameters for these
+            elif strategy_lower in ['ucb', 'upperconfidencebound']:
+                return {'beta'}
+            elif strategy_lower in ['qei', 'qucb']:
+                return {'mc_samples', 'beta'}
+            elif strategy_lower in ['qipv', 'qnipv']:
+                return {'mc_samples', 'n_mc_points'}
+        return set()
+    def find_optimum(self, goal: str = 'maximize', n_grid_points: int = 10000) -> Dict[str, Any]:
+        """
+        Find the point where the model predicts the optimal value.
+        Uses a grid search approach to find the point with the best predicted
+        value (maximum or minimum) across the search space. This is useful for
+        identifying the model's predicted optimum independent of acquisition
+        function suggestions.
+        Args:
+            goal: 'maximize' or 'minimize' - which direction to optimize
+            n_grid_points: Target number of grid points for search (default: 10000)
+        Returns:
+            Dictionary with:
+                - 'x_opt': DataFrame with optimal point (single row)
+                - 'value': Predicted value at optimum
+                - 'std': Uncertainty (standard deviation) at optimum
+        Example:
+            >>> # Find predicted maximum
+            >>> result = session.find_optimum(goal='maximize')
+            >>> print(f"Optimum at: {result['x_opt']}")
+            >>> print(f"Predicted value: {result['value']:.2f} ± {result['std']:.2f}")
+            >>> # Find predicted minimum
+            >>> result = session.find_optimum(goal='minimize')
+            >>> # Use finer grid for more accuracy
+            >>> result = session.find_optimum(goal='maximize', n_grid_points=50000)
+        Note:
+            - Requires a trained model
+            - Uses the same grid-based approach as regret plot for consistency
+            - Handles categorical variables correctly through proper encoding
+            - Grid size is target value; actual number depends on dimensionality
+        """
+        if self.model is None:
+            raise ValueError("No trained model available. Use train_model() first.")
+        # Generate prediction grid in ORIGINAL variable space (not encoded)
+        grid = self._generate_prediction_grid(n_grid_points)
+        # Use model's predict method which handles encoding internally
+        means, stds = self.predict(grid)
+        # Find argmax or argmin
+        if goal.lower() == 'maximize':
+            best_idx = np.argmax(means)
+        else:
+            best_idx = np.argmin(means)
+        # Extract the optimal point (already in original variable space)
+        opt_point_df = grid.iloc[[best_idx]].reset_index(drop=True)
+        result = {
+            'x_opt': opt_point_df,
+            'value': float(means[best_idx]),
+            'std': float(stds[best_idx])
+        }
-        return result_df    # ============================================================
+        logger.info(f"Found optimum: {result['x_opt'].to_dict('records')[0]}")
+        logger.info(f"Predicted value: {result['value']:.4f} ± {result['std']:.4f}")
+        return result
+    # ============================================================
     # Predictions
     # ============================================================
@@ -685,11 +1063,11 @@ class OptimizationSession:
             Tuple of (predictions, uncertainties)
         Example:
-            >>> test_points = pd.DataFrame({
+            > test_points = pd.DataFrame({
             ...     'temperature': [350, 400],
             ...     'catalyst': ['A', 'B']
             ... })
-            >>> predictions, uncertainties = session.predict(test_points)
+            > predictions, uncertainties = session.predict(test_points)
         """
         if self.model is None:
             raise ValueError("No trained model available. Use train_model() first.")
@@ -722,9 +1100,9 @@ class OptimizationSession:
             callback: Callback function
         Example:
-            >>> def on_training_done(data):
+            > def on_training_done(data):
             ...     print(f"Training completed with R² = {data['metrics']['r2']}")
-            >>> session.on('training_completed', on_training_done)
+            > session.on('training_completed', on_training_done)
         """
         self.events.on(event, callback)
@@ -740,7 +1118,7 @@ class OptimizationSession:
             **kwargs: Configuration parameters to update
         Example:
-            >>> session.set_config(random_state=123, verbose=False)
+            > session.set_config(random_state=123, verbose=False)
         """
         self.config.update(kwargs)
         logger.info(f"Updated config: {kwargs}")
@@ -764,8 +1142,8 @@ class OptimizationSession:
             Created AuditEntry
         Example:
-            >>> session.add_experiment({'temp': 100, 'pressure': 5}, output=85.2)
-            >>> session.lock_data(notes="Initial screening dataset")
+            > session.add_experiment({'temp': 100, 'pressure': 5}, output=85.2)
+            > session.lock_data(notes="Initial screening dataset")
         """
         # Set search space in audit log (once)
         if self.audit_log.search_space_definition is None:
@@ -805,8 +1183,8 @@ class OptimizationSession:
             ValueError: If no model has been trained
         Example:
-            >>> session.train_model(backend='sklearn', kernel='matern')
-            >>> session.lock_model(notes="Best cross-validation performance")
+            > session.train_model(backend='sklearn', kernel='matern')
+            > session.lock_model(notes="Best cross-validation performance")
         """
         if self.model is None:
             raise ValueError("No trained model available. Use train_model() first.")
@@ -898,8 +1276,8 @@ class OptimizationSession:
             Created AuditEntry
         Example:
-            >>> suggestions = session.suggest_next(strategy='EI', n_suggestions=3)
-            >>> session.lock_acquisition(
+            > suggestions = session.suggest_next(strategy='EI', n_suggestions=3)
+            > session.lock_acquisition(
             ...     strategy='EI',
             ...     parameters={'xi': 0.01, 'goal': 'maximize'},
             ...     suggestions=suggestions,
@@ -967,7 +1345,7 @@ class OptimizationSession:
             filepath: Path to save session file (.json extension recommended)
         Example:
-            >>> session.save_session("~/ALchemist_Sessions/catalyst_study_nov2025.json")
+            > session.save_session("~/ALchemist_Sessions/catalyst_study_nov2025.json")
         """
         filepath = Path(filepath)
@@ -1054,19 +1432,68 @@ class OptimizationSession:
         return content
-    @staticmethod
-    def load_session(filepath: str, retrain_on_load: bool = True) -> 'OptimizationSession':
+    def load_session(self, filepath: str = None, retrain_on_load: bool = True) -> 'OptimizationSession':
         """
         Load session from JSON file.
+        This method works both as a static method (creating a new session) and as an
+        instance method (loading into existing session):
+        Static usage (returns new session):
+            > session = OptimizationSession.load_session("my_session.json")
+        Instance usage (loads into existing session):
+            > session = OptimizationSession()
+            > session.load_session("my_session.json")
+            > # session.experiment_manager.df is now populated
         Args:
-            filepath: Path to session file
+            filepath: Path to session file (required when called as static method,
+                     can be self when called as instance method)
+            retrain_on_load: Whether to retrain model if config exists (default: True)
         Returns:
-            OptimizationSession with restored state
+            OptimizationSession (new or modified instance)
+        """
+        # Detect if called as instance method or static method
+        # When called as static method: self is actually the filepath string
+        # When called as instance method: self is an OptimizationSession instance
+        if isinstance(self, OptimizationSession):
+            # Instance method: load into this session
+            if filepath is None:
+                raise ValueError("filepath is required when calling as instance method")
-        Example:
-            >>> session = OptimizationSession.load_session("my_session.json")
+            # Load from static implementation
+            loaded_session = OptimizationSession._load_session_impl(filepath, retrain_on_load)
+            # Copy all attributes from loaded session to this instance
+            self.search_space = loaded_session.search_space
+            self.experiment_manager = loaded_session.experiment_manager
+            self.metadata = loaded_session.metadata
+            self.audit_log = loaded_session.audit_log
+            self.config = loaded_session.config
+            self.model = loaded_session.model
+            self.model_backend = loaded_session.model_backend
+            self.acquisition = loaded_session.acquisition
+            self.staged_experiments = loaded_session.staged_experiments
+            self.last_suggestions = loaded_session.last_suggestions
+            # Don't copy events emitter - keep the original
+            logger.info(f"Loaded session data into current instance from {filepath}")
+            self.events.emit('session_loaded', {'filepath': str(filepath)})
+            return self
+        else:
+            # Static method: self is actually the filepath, retrain_on_load is in filepath param
+            actual_filepath = self
+            actual_retrain = filepath if filepath is not None else True
+            return OptimizationSession._load_session_impl(actual_filepath, actual_retrain)
+    @staticmethod
+    def _load_session_impl(filepath: str, retrain_on_load: bool = True) -> 'OptimizationSession':
+        """
+        Internal implementation for loading session from file.
+        This always creates and returns a new session.
         """
         filepath = Path(filepath)
@@ -1156,7 +1583,7 @@ class OptimizationSession:
             tags: New tags (optional)
         Example:
-            >>> session.update_metadata(
+            > session.update_metadata(
             ...     name="Catalyst Screening - Final",
             ...     description="Optimized Pt/Pd ratios",
             ...     tags=["catalyst", "platinum", "palladium", "final"]
@@ -1188,8 +1615,3037 @@ class OptimizationSession:
             **kwargs: Configuration parameters to update
         Example:
-            >>> session.set_config(random_state=123, verbose=False)
+            > session.set_config(random_state=123, verbose=False)
         """
         self.config.update(kwargs)
         logger.info(f"Updated config: {kwargs}")
+    # ============================================================
+    # Visualization Methods (Notebook Support)
+    # ============================================================
+    def _check_matplotlib(self) -> None:
+        """Check if matplotlib is available for plotting."""
+        if _HAS_VISUALIZATION:
+            check_matplotlib()  # Use visualization module's check
+        elif not _HAS_MATPLOTLIB:
+            raise ImportError(
+                "matplotlib is required for visualization methods. "
+                "Install with: pip install matplotlib"
+            )
+    def _check_model_trained(self) -> None:
+        """Check if model is trained before plotting."""
+        if self.model is None:
+            raise ValueError(
+                "Model not trained. Call train_model() before creating visualizations."
+            )
+    def _check_cv_results(self, use_calibrated: bool = False) -> Dict[str, np.ndarray]:
+        """
+        Get CV results from model, handling both calibrated and uncalibrated.
+        Args:
+            use_calibrated: Whether to use calibrated results if available
+        Returns:
+            Dictionary with y_true, y_pred, y_std arrays
+        """
+        self._check_model_trained()
+        # Check for calibrated results first if requested
+        if use_calibrated and hasattr(self.model, 'cv_cached_results_calibrated'):
+            if self.model.cv_cached_results_calibrated is not None:
+                return self.model.cv_cached_results_calibrated
+        # Fall back to uncalibrated results
+        if hasattr(self.model, 'cv_cached_results'):
+            if self.model.cv_cached_results is not None:
+                return self.model.cv_cached_results
+        raise ValueError(
+            "No CV results available. Model must be trained with cross-validation."
+        )
+    def plot_parity(
+        self,
+        use_calibrated: bool = False,
+        sigma_multiplier: float = 1.96,
+        figsize: Tuple[float, float] = (8, 6),
+        dpi: int = 100,
+        title: Optional[str] = None,
+        show_metrics: bool = True,
+        show_error_bars: bool = True
+    ) -> Figure: # pyright: ignore[reportInvalidTypeForm]
+        """
+        Create parity plot of actual vs predicted values from cross-validation.
+        This plot shows how well the model's predictions match the actual experimental
+        values, with optional error bars indicating prediction uncertainty.
+        Args:
+            use_calibrated: Use calibrated uncertainty estimates if available
+            sigma_multiplier: Error bar size (1.96 = 95% CI, 1.0 = 68% CI, 2.58 = 99% CI)
+            figsize: Figure size as (width, height) in inches
+            dpi: Dots per inch for figure resolution
+            title: Custom title (default: auto-generated with metrics)
+            show_metrics: Include RMSE, MAE, R² in title
+            show_error_bars: Display uncertainty error bars
+        Returns:
+            matplotlib Figure object (displays inline in Jupyter)
+        Example:
+            >>> fig = session.plot_parity()
+            >>> fig.show()  # In notebooks, displays automatically
+            >>> # With custom styling
+            >>> fig = session.plot_parity(
+            ...     sigma_multiplier=2.58,  # 99% confidence interval
+            ...     figsize=(10, 8),
+            ...     dpi=150
+            ... )
+            >>> fig.savefig('parity.png', bbox_inches='tight')
+        Note:
+            Requires model to be trained with cross-validation (default behavior).
+            Error bars are only shown if model provides uncertainty estimates.
+        """
+        self._check_matplotlib()
+        self._check_model_trained()
+        # Get CV results
+        cv_results = self._check_cv_results(use_calibrated)
+        y_true = cv_results['y_true']
+        y_pred = cv_results['y_pred']
+        y_std = cv_results.get('y_std', None)
+        # Delegate to visualization module
+        fig, ax = create_parity_plot(
+            y_true=y_true,
+            y_pred=y_pred,
+            y_std=y_std,
+            sigma_multiplier=sigma_multiplier,
+            figsize=figsize,
+            dpi=dpi,
+            title=title,
+            show_metrics=show_metrics,
+            show_error_bars=show_error_bars
+        )
+        logger.info("Generated parity plot")
+        return fig
+    def plot_slice(
+        self,
+        x_var: str,
+        fixed_values: Optional[Dict[str, Any]] = None,
+        n_points: int = 100,
+        show_uncertainty: Union[bool, List[float]] = True,
+        show_experiments: bool = True,
+        figsize: Tuple[float, float] = (8, 6),
+        dpi: int = 100,
+        title: Optional[str] = None
+    ) -> Figure: # pyright: ignore[reportInvalidTypeForm]
+        """
+        Create 1D slice plot showing model predictions along one variable.
+        Visualizes how the model's prediction changes as one variable is varied
+        while all other variables are held constant. Shows prediction mean and
+        optional uncertainty bands.
+        Args:
+            x_var: Variable name to vary along X axis (must be 'real' or 'integer')
+            fixed_values: Dict of {var_name: value} for other variables.
+                         If not provided, uses midpoint for real/integer,
+                         first category for categorical.
+            n_points: Number of points to evaluate along the slice
+            show_uncertainty: Show uncertainty bands. Can be:
+                - True: Show ±1σ and ±2σ bands (default)
+                - False: No uncertainty bands
+                - List[float]: Custom sigma values, e.g., [1.0, 2.0, 3.0] for ±1σ, ±2σ, ±3σ
+            show_experiments: Plot experimental data points as scatter
+            figsize: Figure size as (width, height) in inches
+            dpi: Dots per inch for figure resolution
+            title: Custom title (default: auto-generated)
+        Returns:
+            matplotlib Figure object
+        Example:
+            >>> # With custom uncertainty bands (±1σ, ±2σ, ±3σ)
+            >>> fig = session.plot_slice(
+            ...     'temperature',
+            ...     fixed_values={'pressure': 5.0, 'catalyst': 'Pt'},
+            ...     show_uncertainty=[1.0, 2.0, 3.0]
+            ... )
+            >>> fig.savefig('slice.png', dpi=300)
+        Note:
+            - Model must be trained before plotting
+            - Uncertainty bands require model to support std predictions
+        """
+        self._check_matplotlib()
+        self._check_model_trained()
+        if fixed_values is None:
+            fixed_values = {}
+        # Get variable info
+        var_names = self.search_space.get_variable_names()
+        if x_var not in var_names:
+            raise ValueError(f"Variable '{x_var}' not in search space")
+        # Get x variable definition
+        x_var_def = next(v for v in self.search_space.variables if v['name'] == x_var)
+        if x_var_def['type'] not in ['real', 'integer']:
+            raise ValueError(f"Variable '{x_var}' must be 'real' or 'integer' type for slice plot")
+        # Create range for x variable
+        x_min, x_max = x_var_def['min'], x_var_def['max']
+        x_values = np.linspace(x_min, x_max, n_points)
+        # Build prediction data with fixed values
+        slice_data = {x_var: x_values}
+        for var in self.search_space.variables:
+            var_name = var['name']
+            if var_name == x_var:
+                continue
+            if var_name in fixed_values:
+                slice_data[var_name] = fixed_values[var_name]
+            else:
+                # Use default value
+                if var['type'] in ['real', 'integer']:
+                    slice_data[var_name] = (var['min'] + var['max']) / 2
+                elif var['type'] == 'categorical':
+                    slice_data[var_name] = var['values'][0]
+        # Create DataFrame with correct column order
+        if hasattr(self.model, 'original_feature_names') and self.model.original_feature_names:
+            column_order = self.model.original_feature_names
+        else:
+            column_order = self.search_space.get_variable_names()
+        slice_df = pd.DataFrame(slice_data, columns=column_order)
+        # Get predictions with uncertainty
+        predictions, std = self.predict(slice_df)
+        # Prepare experimental data for plotting
+        exp_x = None
+        exp_y = None
+        if show_experiments and len(self.experiment_manager.df) > 0:
+            df = self.experiment_manager.df
+            # Filter points that match the fixed values
+            mask = pd.Series([True] * len(df))
+            for var_name, fixed_val in fixed_values.items():
+                if var_name in df.columns:
+                    # For numerical values, allow small tolerance
+                    if isinstance(fixed_val, (int, float)):
+                        mask &= np.abs(df[var_name] - fixed_val) < 1e-6
+                    else:
+                        mask &= df[var_name] == fixed_val
+            if mask.any():
+                filtered_df = df[mask]
+                exp_x = filtered_df[x_var].values
+                exp_y = filtered_df['Output'].values
+        # Generate title if not provided
+        if title is None:
+            if fixed_values:
+                fixed_str = ', '.join([f'{k}={v}' for k, v in fixed_values.items()])
+                title = f"1D Slice: {x_var}\n({fixed_str})"
+            else:
+                title = f"1D Slice: {x_var}"
+        # Delegate to visualization module
+        # Handle show_uncertainty parameter conversion
+        sigma_bands = None
+        if show_uncertainty is not False:
+            if isinstance(show_uncertainty, bool):
+                # Default: [1.0, 2.0]
+                sigma_bands = [1.0, 2.0] if show_uncertainty else None
+            else:
+                # Custom list of sigma values
+                sigma_bands = show_uncertainty
+        fig, ax = create_slice_plot(
+            x_values=x_values,
+            predictions=predictions,
+            x_var=x_var,
+            std=std,
+            sigma_bands=sigma_bands,
+            exp_x=exp_x,
+            exp_y=exp_y,
+            figsize=figsize,
+            dpi=dpi,
+            title=title
+        )
+        logger.info(f"Generated 1D slice plot for {x_var}")
+        return fig
+    def plot_contour(
+        self,
+        x_var: str,
+        y_var: str,
+        fixed_values: Optional[Dict[str, Any]] = None,
+        grid_resolution: int = 50,
+        show_experiments: bool = True,
+        show_suggestions: bool = False,
+        cmap: str = 'viridis',
+        figsize: Tuple[float, float] = (8, 6),
+        dpi: int = 100,
+        title: Optional[str] = None
+    ) -> Figure: # pyright: ignore[reportInvalidTypeForm]
+        """
+        Create 2D contour plot of model predictions over a variable space.
+        Visualizes the model's predicted response surface by varying two variables
+        while holding others constant. Useful for understanding variable interactions
+        and identifying optimal regions.
+        Args:
+            x_var: Variable name for X axis (must be 'real' type)
+            y_var: Variable name for Y axis (must be 'real' type)
+            fixed_values: Dict of {var_name: value} for other variables.
+                         If not provided, uses midpoint for real/integer,
+                         first category for categorical.
+            grid_resolution: Grid density (NxN points)
+            show_experiments: Plot experimental data points as scatter
+            show_suggestions: Plot last suggested points (if available)
+            cmap: Matplotlib colormap name (e.g., 'viridis', 'coolwarm', 'plasma')
+            figsize: Figure size as (width, height) in inches
+            dpi: Dots per inch for figure resolution
+            title: Custom title (default: "Contour Plot of Model Predictions")
+        Returns:
+            matplotlib Figure object (displays inline in Jupyter)
+        Example:
+            >>> # Basic contour plot
+            >>> fig = session.plot_contour('temperature', 'pressure')
+            >>> # With fixed values for other variables
+            >>> fig = session.plot_contour(
+            ...     'temperature', 'pressure',
+            ...     fixed_values={'catalyst': 'Pt', 'flow_rate': 50},
+            ...     cmap='coolwarm',
+            ...     grid_resolution=100
+            ... )
+            >>> fig.savefig('contour.png', dpi=300, bbox_inches='tight')
+        Note:
+            - Requires at least 2 'real' type variables
+            - Model must be trained before plotting
+            - Categorical variables are automatically encoded using model's encoding
+        """
+        self._check_matplotlib()
+        self._check_model_trained()
+        if fixed_values is None:
+            fixed_values = {}
+        # Get variable names
+        var_names = self.search_space.get_variable_names()
+        # Validate variables exist
+        if x_var not in var_names:
+            raise ValueError(f"Variable '{x_var}' not in search space")
+        if y_var not in var_names:
+            raise ValueError(f"Variable '{y_var}' not in search space")
+        # Get variable info (search_space.variables is a list)
+        x_var_info = next(v for v in self.search_space.variables if v['name'] == x_var)
+        y_var_info = next(v for v in self.search_space.variables if v['name'] == y_var)
+        if x_var_info['type'] != 'real':
+            raise ValueError(f"X variable '{x_var}' must be 'real' type, got '{x_var_info['type']}'")
+        if y_var_info['type'] != 'real':
+            raise ValueError(f"Y variable '{y_var}' must be 'real' type, got '{y_var_info['type']}'")
+        # Get bounds
+        x_bounds = (x_var_info['min'], x_var_info['max'])
+        y_bounds = (y_var_info['min'], y_var_info['max'])
+        # Create meshgrid
+        x = np.linspace(x_bounds[0], x_bounds[1], grid_resolution)
+        y = np.linspace(y_bounds[0], y_bounds[1], grid_resolution)
+        X_grid, Y_grid = np.meshgrid(x, y)
+        # Build prediction dataframe with ALL variables in proper order
+        # Start with grid variables
+        grid_data = {
+            x_var: X_grid.ravel(),
+            y_var: Y_grid.ravel()
+        }
+        # Add fixed values for other variables
+        for var in self.search_space.variables:
+            var_name = var['name']
+            if var_name in [x_var, y_var]:
+                continue
+            if var_name in fixed_values:
+                grid_data[var_name] = fixed_values[var_name]
+            else:
+                # Use default value
+                if var['type'] in ['real', 'integer']:
+                    grid_data[var_name] = (var['min'] + var['max']) / 2
+                elif var['type'] == 'categorical':
+                    grid_data[var_name] = var['values'][0]
+        # Create DataFrame with columns in the same order as original training data
+        # This is critical for model preprocessing to work correctly
+        if hasattr(self.model, 'original_feature_names') and self.model.original_feature_names:
+            # Use the model's stored column order
+            column_order = self.model.original_feature_names
+        else:
+            # Fall back to search space order
+            column_order = self.search_space.get_variable_names()
+        grid_df = pd.DataFrame(grid_data, columns=column_order)
+        # Get predictions - use Session's predict method for consistency
+        predictions, _ = self.predict(grid_df)
+        # Reshape to grid
+        predictions_grid = predictions.reshape(X_grid.shape)
+        # Prepare experimental data for overlay
+        exp_x = None
+        exp_y = None
+        if show_experiments and not self.experiment_manager.df.empty:
+            exp_df = self.experiment_manager.df
+            if x_var in exp_df.columns and y_var in exp_df.columns:
+                exp_x = exp_df[x_var].values
+                exp_y = exp_df[y_var].values
+        # Prepare suggestion data for overlay
+        sugg_x = None
+        sugg_y = None
+        if show_suggestions and len(self.last_suggestions) > 0:
+            # last_suggestions is a DataFrame
+            if isinstance(self.last_suggestions, pd.DataFrame):
+                sugg_df = self.last_suggestions
+            else:
+                sugg_df = pd.DataFrame(self.last_suggestions)
+            if x_var in sugg_df.columns and y_var in sugg_df.columns:
+                sugg_x = sugg_df[x_var].values
+                sugg_y = sugg_df[y_var].values
+        # Delegate to visualization module
+        fig, ax, cbar = create_contour_plot(
+            x_grid=X_grid,
+            y_grid=Y_grid,
+            predictions_grid=predictions_grid,
+            x_var=x_var,
+            y_var=y_var,
+            exp_x=exp_x,
+            exp_y=exp_y,
+            suggest_x=sugg_x,
+            suggest_y=sugg_y,
+            cmap=cmap,
+            figsize=figsize,
+            dpi=dpi,
+            title=title or "Contour Plot of Model Predictions"
+        )
+        logger.info(f"Generated contour plot for {x_var} vs {y_var}")
+        # Return figure only for backwards compatibility (colorbar accessible via fig/ax)
+        return fig
+    def plot_voxel(
+        self,
+        x_var: str,
+        y_var: str,
+        z_var: str,
+        fixed_values: Optional[Dict[str, Any]] = None,
+        grid_resolution: int = 15,
+        show_experiments: bool = True,
+        show_suggestions: bool = False,
+        cmap: str = 'viridis',
+        alpha: float = 0.5,
+        use_log_scale: bool = False,
+        figsize: Tuple[float, float] = (10, 8),
+        dpi: int = 100,
+        title: Optional[str] = None
+    ) -> Figure: # pyright: ignore[reportInvalidTypeForm]
+        """
+        Create 3D voxel plot of model predictions over a variable space.
+        Visualizes the model's predicted response surface by varying three variables
+        while holding others constant. Uses volumetric rendering to show the 3D
+        prediction landscape with adjustable transparency.
+        Args:
+            x_var: Variable name for X axis (must be 'real' or 'integer' type)
+            y_var: Variable name for Y axis (must be 'real' or 'integer' type)
+            z_var: Variable name for Z axis (must be 'real' or 'integer' type)
+            fixed_values: Dict of {var_name: value} for other variables.
+                         If not provided, uses midpoint for real/integer,
+                         first category for categorical.
+            grid_resolution: Grid density (NxNxN points, default: 15)
+                           Note: 15³ = 3375 points, scales as N³
+            show_experiments: Plot experimental data points as scatter
+            show_suggestions: Plot last suggested points (if available)
+            cmap: Matplotlib colormap name (e.g., 'viridis', 'coolwarm', 'plasma')
+            alpha: Transparency level (0.0=transparent, 1.0=opaque, default: 0.5)
+                  Lower values reveal interior structure better
+            use_log_scale: Use logarithmic color scale for values spanning orders of magnitude
+            figsize: Figure size as (width, height) in inches
+            dpi: Dots per inch for figure resolution
+            title: Custom title (default: "3D Voxel Plot of Model Predictions")
+        Returns:
+            matplotlib Figure object with 3D axes
+        Example:
+            >>> # Basic 3D voxel plot
+            >>> fig = session.plot_voxel('temperature', 'pressure', 'flow_rate')
+            >>> # With transparency to see interior
+            >>> fig = session.plot_voxel(
+            ...     'temperature', 'pressure', 'flow_rate',
+            ...     alpha=0.3,
+            ...     grid_resolution=20
+            ... )
+            >>> fig.savefig('voxel_plot.png', dpi=150, bbox_inches='tight')
+            >>> # With fixed values for other variables
+            >>> fig = session.plot_voxel(
+            ...     'temperature', 'pressure', 'flow_rate',
+            ...     fixed_values={'catalyst': 'Pt', 'pH': 7.0},
+            ...     cmap='coolwarm'
+            ... )
+        Raises:
+            ValueError: If search space doesn't have at least 3 continuous variables
+        Note:
+            - Requires at least 3 'real' or 'integer' type variables
+            - Model must be trained before plotting
+            - Computationally expensive: O(N³) evaluations
+            - Lower grid_resolution for faster rendering
+            - Use alpha < 0.5 to see interior structure
+            - Interactive rotation available in some backends (notebook)
+        """
+        self._check_matplotlib()
+        self._check_model_trained()
+        if fixed_values is None:
+            fixed_values = {}
+        # Get all variable names and check for continuous variables
+        var_names = self.search_space.get_variable_names()
+        # Count continuous variables (real or integer)
+        continuous_vars = []
+        for var in self.search_space.variables:
+            if var['type'] in ['real', 'integer']:
+                continuous_vars.append(var['name'])
+        # Check if we have at least 3 continuous variables
+        if len(continuous_vars) < 3:
+            raise ValueError(
+                f"3D voxel plot requires at least 3 continuous (real or integer) variables. "
+                f"Found only {len(continuous_vars)}: {continuous_vars}. "
+                f"Use plot_slice() for 1D or plot_contour() for 2D visualization instead."
+            )
+        # Validate that the requested variables exist and are continuous
+        for var_name, var_label in [(x_var, 'X'), (y_var, 'Y'), (z_var, 'Z')]:
+            if var_name not in var_names:
+                raise ValueError(f"{var_label} variable '{var_name}' not in search space")
+            var_def = next(v for v in self.search_space.variables if v['name'] == var_name)
+            if var_def['type'] not in ['real', 'integer']:
+                raise ValueError(
+                    f"{var_label} variable '{var_name}' must be 'real' or 'integer' type for voxel plot, "
+                    f"got '{var_def['type']}'"
+                )
+        # Get variable definitions
+        x_var_def = next(v for v in self.search_space.variables if v['name'] == x_var)
+        y_var_def = next(v for v in self.search_space.variables if v['name'] == y_var)
+        z_var_def = next(v for v in self.search_space.variables if v['name'] == z_var)
+        # Get bounds
+        x_bounds = (x_var_def['min'], x_var_def['max'])
+        y_bounds = (y_var_def['min'], y_var_def['max'])
+        z_bounds = (z_var_def['min'], z_var_def['max'])
+        # Create 3D meshgrid
+        x = np.linspace(x_bounds[0], x_bounds[1], grid_resolution)
+        y = np.linspace(y_bounds[0], y_bounds[1], grid_resolution)
+        z = np.linspace(z_bounds[0], z_bounds[1], grid_resolution)
+        X_grid, Y_grid, Z_grid = np.meshgrid(x, y, z, indexing='ij')
+        # Build prediction dataframe with ALL variables in proper order
+        grid_data = {
+            x_var: X_grid.ravel(),
+            y_var: Y_grid.ravel(),
+            z_var: Z_grid.ravel()
+        }
+        # Add fixed values for other variables
+        for var in self.search_space.variables:
+            var_name = var['name']
+            if var_name in [x_var, y_var, z_var]:
+                continue
+            if var_name in fixed_values:
+                grid_data[var_name] = fixed_values[var_name]
+            else:
+                # Use default value
+                if var['type'] in ['real', 'integer']:
+                    grid_data[var_name] = (var['min'] + var['max']) / 2
+                elif var['type'] == 'categorical':
+                    grid_data[var_name] = var['values'][0]
+        # Create DataFrame with columns in correct order
+        if hasattr(self.model, 'original_feature_names') and self.model.original_feature_names:
+            column_order = self.model.original_feature_names
+        else:
+            column_order = self.search_space.get_variable_names()
+        grid_df = pd.DataFrame(grid_data, columns=column_order)
+        # Get predictions
+        predictions, _ = self.predict(grid_df)
+        # Reshape to 3D grid
+        predictions_grid = predictions.reshape(X_grid.shape)
+        # Prepare experimental data for overlay
+        exp_x = None
+        exp_y = None
+        exp_z = None
+        if show_experiments and not self.experiment_manager.df.empty:
+            exp_df = self.experiment_manager.df
+            if x_var in exp_df.columns and y_var in exp_df.columns and z_var in exp_df.columns:
+                exp_x = exp_df[x_var].values
+                exp_y = exp_df[y_var].values
+                exp_z = exp_df[z_var].values
+        # Prepare suggestion data for overlay
+        sugg_x = None
+        sugg_y = None
+        sugg_z = None
+        if show_suggestions and len(self.last_suggestions) > 0:
+            if isinstance(self.last_suggestions, pd.DataFrame):
+                sugg_df = self.last_suggestions
+            else:
+                sugg_df = pd.DataFrame(self.last_suggestions)
+            if x_var in sugg_df.columns and y_var in sugg_df.columns and z_var in sugg_df.columns:
+                sugg_x = sugg_df[x_var].values
+                sugg_y = sugg_df[y_var].values
+                sugg_z = sugg_df[z_var].values
+        # Delegate to visualization module
+        from alchemist_core.visualization.plots import create_voxel_plot
+        fig, ax = create_voxel_plot(
+            x_grid=X_grid,
+            y_grid=Y_grid,
+            z_grid=Z_grid,
+            predictions_grid=predictions_grid,
+            x_var=x_var,
+            y_var=y_var,
+            z_var=z_var,
+            exp_x=exp_x,
+            exp_y=exp_y,
+            exp_z=exp_z,
+            suggest_x=sugg_x,
+            suggest_y=sugg_y,
+            suggest_z=sugg_z,
+            cmap=cmap,
+            alpha=alpha,
+            use_log_scale=use_log_scale,
+            figsize=figsize,
+            dpi=dpi,
+            title=title or "3D Voxel Plot of Model Predictions"
+        )
+        logger.info(f"Generated 3D voxel plot for {x_var} vs {y_var} vs {z_var}")
+        return fig
+    def plot_metrics(
+        self,
+        metric: Literal['rmse', 'mae', 'r2', 'mape'] = 'rmse',
+        cv_splits: int = 5,
+        figsize: Tuple[float, float] = (8, 6),
+        dpi: int = 100,
+        use_cached: bool = True
+    ) -> Figure: # pyright: ignore[reportInvalidTypeForm]
+        """
+        Plot cross-validation metrics as a function of training set size.
+        Shows how model performance improves as more experimental data is added.
+        This evaluates the model at each training set size from 5 observations up to
+        the current total, providing insight into data efficiency and whether more
+        experiments are needed.
+        Args:
+            metric: Which metric to plot ('rmse', 'mae', 'r2', or 'mape')
+            cv_splits: Number of cross-validation folds (default: 5)
+            figsize: Figure size as (width, height) in inches
+            dpi: Dots per inch for figure resolution
+            use_cached: Use cached metrics if available (default: True)
+        Returns:
+            matplotlib Figure object
+        Example:
+            >>> # Plot RMSE vs number of experiments
+            >>> fig = session.plot_metrics('rmse')
+            >>> # Plot R² to see improvement
+            >>> fig = session.plot_metrics('r2')
+            >>> # Force recomputation of metrics
+            >>> fig = session.plot_metrics('rmse', use_cached=False)
+        Note:
+            Calls model.evaluate() if metrics not cached, which can be computationally
+            expensive for large datasets. Set use_cached=False to force recomputation.
+        """
+        self._check_matplotlib()
+        self._check_model_trained()
+        # Need at least 5 observations for CV
+        n_total = len(self.experiment_manager.df)
+        if n_total < 5:
+            raise ValueError(f"Need at least 5 observations for metrics plot (have {n_total})")
+        # Check for cached metrics first
+        cache_key = f'_cached_cv_metrics_{cv_splits}'
+        if use_cached and hasattr(self.model, cache_key):
+            cv_metrics = getattr(self.model, cache_key)
+            logger.info(f"Using cached CV metrics for {metric.upper()}")
+        else:
+            # Call model's evaluate method to get metrics over training sizes
+            logger.info(f"Computing {metric.upper()} over training set sizes (this may take a moment)...")
+            cv_metrics = self.model.evaluate(
+                self.experiment_manager,
+                cv_splits=cv_splits,
+                debug=False
+            )
+            # Cache the results
+            setattr(self.model, cache_key, cv_metrics)
+        # Extract the requested metric
+        metric_key_map = {
+            'rmse': 'RMSE',
+            'mae': 'MAE',
+            'r2': 'R²',
+            'mape': 'MAPE'
+        }
+        if metric not in metric_key_map:
+            raise ValueError(f"Unknown metric '{metric}'. Choose from: {list(metric_key_map.keys())}")
+        metric_key = metric_key_map[metric]
+        metric_values = cv_metrics.get(metric_key, [])
+        if not metric_values:
+            raise RuntimeError(f"Model did not return {metric_key} values from evaluate()")
+        # X-axis: training set sizes (starts at 5)
+        x_range = np.arange(5, len(metric_values) + 5)
+        metric_array = np.array(metric_values)
+        # Delegate to visualization module
+        fig, ax = create_metrics_plot(
+            training_sizes=x_range,
+            metric_values=metric_array,
+            metric_name=metric,
+            figsize=figsize,
+            dpi=dpi
+        )
+        logger.info(f"Generated {metric} metrics plot with {len(metric_values)} points")
+        return fig
+    def plot_qq(
+        self,
+        use_calibrated: bool = False,
+        figsize: Tuple[float, float] = (8, 6),
+        dpi: int = 100,
+        title: Optional[str] = None
+    ) -> Figure: # pyright: ignore[reportInvalidTypeForm]
+        """
+        Create Q-Q (quantile-quantile) plot for model residuals normality check.
+        Visualizes whether the model's prediction errors (residuals) follow a normal
+        distribution. Points should lie close to the diagonal line if residuals are
+        normally distributed, which is an assumption of Gaussian Process models.
+        Args:
+            use_calibrated: Use calibrated uncertainty estimates if available
+            figsize: Figure size as (width, height) in inches
+            dpi: Dots per inch for figure resolution
+            title: Custom title (default: "Q-Q Plot: Residuals Normality Check")
+        Returns:
+            matplotlib Figure object
+        Example:
+            >>> # Check if residuals are normally distributed
+            >>> fig = session.plot_qq()
+            >>> fig.savefig('qq_plot.png')
+            >>> # Use calibrated predictions if available
+            >>> fig = session.plot_qq(use_calibrated=True)
+        Note:
+            - Requires model to be trained with cross-validation
+            - Significant deviations from the diagonal suggest non-normal residuals
+            - Useful for diagnosing model assumptions and identifying outliers
+        """
+        self._check_matplotlib()
+        self._check_model_trained()
+        # Get CV results
+        cv_results = self._check_cv_results(use_calibrated)
+        y_true = cv_results['y_true']
+        y_pred = cv_results['y_pred']
+        y_std = cv_results.get('y_std', None)
+        # Compute standardized residuals (z-scores)
+        residuals = y_true - y_pred
+        if y_std is not None and len(y_std) > 0:
+            z_scores = residuals / y_std
+        else:
+            # Fallback: standardize by residual standard deviation
+            z_scores = residuals / np.std(residuals)
+        # Delegate to visualization module
+        fig, ax = create_qq_plot(
+            z_scores=z_scores,
+            figsize=figsize,
+            dpi=dpi,
+            title=title
+        )
+        logger.info("Generated Q-Q plot for residuals")
+        return fig
+    def plot_calibration(
+        self,
+        use_calibrated: bool = False,
+        n_bins: int = 10,
+        figsize: Tuple[float, float] = (8, 6),
+        dpi: int = 100,
+        title: Optional[str] = None
+    ) -> Figure: # pyright: ignore[reportInvalidTypeForm]
+        """
+        Create calibration plot showing reliability of uncertainty estimates.
+        Compares predicted confidence intervals to actual coverage. For well-calibrated
+        models, a 68% confidence interval should contain ~68% of true values, 95% should
+        contain ~95%, etc. This plot helps diagnose if the model's uncertainty estimates
+        are too narrow (overconfident) or too wide (underconfident).
+        Args:
+            use_calibrated: Use calibrated uncertainty estimates if available
+            n_bins: Number of bins for grouping predictions (default: 10)
+            figsize: Figure size as (width, height) in inches
+            dpi: Dots per inch for figure resolution
+            title: Custom title (default: "Calibration Plot: Uncertainty Reliability")
+        Returns:
+            matplotlib Figure object
+        Example:
+            >>> # Check if uncertainty estimates are reliable
+            >>> fig = session.plot_calibration()
+            >>> fig.savefig('calibration_plot.png')
+            >>> # With more bins for finer resolution
+            >>> fig = session.plot_calibration(n_bins=20)
+        Note:
+            - Requires model to be trained with cross-validation and provide uncertainties
+            - Points above diagonal = model is underconfident (intervals too wide)
+            - Points below diagonal = model is overconfident (intervals too narrow)
+            - Well-calibrated models have points close to the diagonal
+        """
+        self._check_matplotlib()
+        self._check_model_trained()
+        # Get CV results
+        cv_results = self._check_cv_results(use_calibrated)
+        y_true = cv_results['y_true']
+        y_pred = cv_results['y_pred']
+        y_std = cv_results.get('y_std', None)
+        if y_std is None:
+            raise ValueError(
+                "Model does not provide uncertainty estimates (y_std). "
+                "Calibration plot requires uncertainty predictions."
+            )
+        # Compute calibration curve data
+        from scipy import stats
+        # Compute empirical coverage for a range of nominal probabilities
+        nominal_probs = np.arange(0.10, 1.00, 0.05)
+        empirical_coverage = []
+        for prob in nominal_probs:
+            # Convert probability to sigma multiplier
+            sigma = stats.norm.ppf((1 + prob) / 2)
+            # Compute empirical coverage at this sigma level
+            lower_bound = y_pred - sigma * y_std
+            upper_bound = y_pred + sigma * y_std
+            within_interval = (y_true >= lower_bound) & (y_true <= upper_bound)
+            empirical_coverage.append(np.mean(within_interval))
+        empirical_coverage = np.array(empirical_coverage)
+        # Delegate to visualization module
+        fig, ax = create_calibration_plot(
+            nominal_probs=nominal_probs,
+            empirical_coverage=empirical_coverage,
+            figsize=figsize,
+            dpi=dpi,
+            title=title or "Calibration Plot: Uncertainty Reliability"
+        )
+        logger.info("Generated calibration plot for uncertainty estimates")
+        return fig
+    def plot_regret(
+        self,
+        goal: Literal['maximize', 'minimize'] = 'maximize',
+        include_predictions: bool = True,
+        show_cumulative: bool = False,
+        backend: Optional[str] = None,
+        kernel: Optional[str] = None,
+        n_grid_points: int = 1000,
+        sigma_bands: Optional[List[float]] = None,
+        start_iteration: int = 5,
+        reuse_hyperparameters: bool = True,
+        use_calibrated_uncertainty: bool = False,
+        figsize: Tuple[float, float] = (8, 6),
+        dpi: int = 100,
+        title: Optional[str] = None
+    ) -> Figure: # pyright: ignore[reportInvalidTypeForm]
+        """
+        Plot optimization progress (regret curve).
+        Shows the best value found as a function of iteration number. The curve
+        displays cumulative best results and all observed values, providing insight
+        into optimization convergence.
+        A flattening curve indicates the optimization is converging (no further
+        improvements being found). This is useful for determining when to stop
+        an optimization campaign.
+        Optionally overlays the model's predicted best value (max posterior mean)
+        with uncertainty bands, showing where the model believes the optimum lies.
+        Args:
+            goal: 'maximize' or 'minimize' - which direction to optimize
+            include_predictions: Whether to overlay max(posterior mean) with uncertainty bands
+            backend: Model backend ('sklearn' or 'botorch'). Uses session default if None.
+            kernel: Kernel type ('RBF', 'Matern', etc.). Uses session default if None.
+            n_grid_points: Number of points to evaluate for finding max posterior mean
+            sigma_bands: List of sigma values for uncertainty bands (e.g., [1.0, 2.0])
+            start_iteration: First iteration to compute predictions (needs enough data)
+            reuse_hyperparameters: Reuse final model's hyperparameters (faster, default True)
+            use_calibrated_uncertainty: If True, apply calibration to uncertainties. If False,
+                use raw GP uncertainties. Default False recommended for convergence assessment
+                since raw uncertainties better reflect model's internal convergence. Set True
+                for realistic prediction intervals that account for model miscalibration.
+            figsize: Figure size as (width, height) in inches
+            dpi: Dots per inch for figure resolution
+            title: Custom plot title (auto-generated if None)
+        Returns:
+            matplotlib Figure object
+        Example:
+            >>> # For a maximization problem
+            >>> fig = session.plot_regret(goal='maximize')
+            >>> fig.savefig('optimization_progress.png')
+            >>> # With custom uncertainty bands (±1σ, ±2σ)
+            >>> fig = session.plot_regret(goal='maximize', sigma_bands=[1.0, 2.0])
+            >>> # For a minimization problem
+            >>> fig = session.plot_regret(goal='minimize')
+        Note:
+            - Requires at least 2 experiments
+            - Also known as "simple regret" or "incumbent trajectory"
+            - Best used to visualize overall optimization progress
+        """
+        self._check_matplotlib()
+        # Check we have experiments
+        n_exp = len(self.experiment_manager.df)
+        if n_exp < 2:
+            raise ValueError(f"Need at least 2 experiments for regret plot (have {n_exp})")
+        # Get observed values and create iteration array (1-based for user clarity)
+        # Use first target column (single-objective optimization)
+        target_col = self.experiment_manager.target_columns[0]
+        observed_values = self.experiment_manager.df[target_col].values
+        iterations = np.arange(1, n_exp + 1)  # 1-based: [1, 2, 3, ..., n]
+        # Compute posterior predictions if requested
+        predicted_means = None
+        predicted_stds = None
+        if include_predictions and n_exp >= start_iteration:
+            try:
+                predicted_means, predicted_stds = self._compute_posterior_predictions(
+                    goal=goal,
+                    backend=backend,
+                    kernel=kernel,
+                    n_grid_points=n_grid_points,
+                    start_iteration=start_iteration,
+                    reuse_hyperparameters=reuse_hyperparameters,
+                    use_calibrated_uncertainty=use_calibrated_uncertainty
+                )
+            except Exception as e:
+                logger.warning(f"Could not compute posterior predictions: {e}. Plotting observations only.")
+        # Import visualization function
+        from alchemist_core.visualization.plots import create_regret_plot
+        # Delegate to visualization module
+        fig, ax = create_regret_plot(
+            iterations=iterations,
+            observed_values=observed_values,
+            show_cumulative=show_cumulative,
+            goal=goal,
+            predicted_means=predicted_means,
+            predicted_stds=predicted_stds,
+            sigma_bands=sigma_bands,
+            figsize=figsize,
+            dpi=dpi,
+            title=title
+        )
+        logger.info(f"Generated regret plot with {n_exp} experiments")
+        return fig
+    def _generate_prediction_grid(self, n_grid_points: int) -> pd.DataFrame:
+        """
+        Generate grid of test points across search space for predictions.
+        Args:
+            n_grid_points: Target number of grid points (actual number depends on dimensionality)
+        Returns:
+            DataFrame with columns for each variable
+        """
+        grid_1d = []
+        var_names = []
+        for var in self.search_space.variables:
+            var_names.append(var['name'])
+            if var['type'] == 'real':
+                # Continuous: linspace
+                n_per_dim = int(n_grid_points ** (1/len(self.search_space.variables)))
+                grid_1d.append(np.linspace(var['min'], var['max'], n_per_dim))
+            elif var['type'] == 'integer':
+                # Integer: range of integers
+                n_per_dim = int(n_grid_points ** (1/len(self.search_space.variables)))
+                grid_1d.append(np.linspace(var['min'], var['max'], n_per_dim).astype(int))
+            else:
+                # Categorical: use actual category values
+                grid_1d.append(var['values'])
+        # Generate test points using Cartesian product
+        from itertools import product
+        X_test_tuples = list(product(*grid_1d))
+        # Convert to DataFrame with proper variable names and types
+        grid = pd.DataFrame(X_test_tuples, columns=var_names)
+        # Ensure correct dtypes for categorical variables
+        for var in self.search_space.variables:
+            if var['type'] == 'categorical':
+                grid[var['name']] = grid[var['name']].astype(str)
+        return grid
+    def _compute_posterior_predictions(
+        self,
+        goal: str,
+        backend: Optional[str],
+        kernel: Optional[str],
+        n_grid_points: int,
+        start_iteration: int,
+        reuse_hyperparameters: bool,
+        use_calibrated_uncertainty: bool
+    ) -> Tuple[np.ndarray, np.ndarray]:
+        """
+        Compute max(posterior mean) and corresponding std at each iteration.
+        Helper method for regret plot to overlay model predictions with uncertainty.
+        IMPORTANT: When reuse_hyperparameters=True, this uses the final model's
+        hyperparameters for ALL iterations by creating fresh GP models with those
+        hyperparameters and subsets of data. This avoids numerical instability from
+        repeated MLE optimization.
+        Returns:
+            Tuple of (predicted_means, predicted_stds) arrays, same length as n_experiments
+        """
+        n_exp = len(self.experiment_manager.df)
+        # Initialize arrays (NaN for iterations before start_iteration)
+        predicted_means = np.full(n_exp, np.nan)
+        predicted_stds = np.full(n_exp, np.nan)
+        # Determine backend and kernel
+        if backend is None:
+            if self.model is None or not self.model.is_trained:
+                raise ValueError("No trained model in session. Train a model first or specify backend/kernel.")
+            backend = self.model_backend
+        if kernel is None:
+            if self.model is None or not self.model.is_trained:
+                raise ValueError("No trained model in session. Train a model first or specify backend/kernel.")
+            if backend == 'sklearn':
+                kernel = self.model.kernel_options.get('kernel_type', 'RBF')
+            elif backend == 'botorch':
+                # BoTorchModel stores kernel type in cont_kernel_type
+                kernel = getattr(self.model, 'cont_kernel_type', 'Matern')
+        # Extract optimized state_dict for botorch or kernel params for sklearn
+        optimized_state_dict = None
+        optimized_kernel_params = None
+        if reuse_hyperparameters and self.model is not None and self.model.is_trained:
+            if backend == 'sklearn':
+                optimized_kernel_params = self.model.optimized_kernel.get_params()
+            elif backend == 'botorch':
+                # Store the fitted state dict from the final model
+                optimized_state_dict = self.model.fitted_state_dict
+        # Generate grid for predictions
+        grid = self._generate_prediction_grid(n_grid_points)
+        # Get full dataset
+        full_df = self.experiment_manager.df
+        target_col = self.experiment_manager.target_columns[0]
+        # Suppress INFO logging for temp sessions to avoid spam
+        import logging
+        original_session_level = logger.level
+        original_model_level = logging.getLogger('alchemist_core.models.botorch_model').level
+        logger.setLevel(logging.WARNING)
+        logging.getLogger('alchemist_core.models.botorch_model').setLevel(logging.WARNING)
+        # Loop through iterations
+        for i in range(start_iteration, n_exp + 1):
+            try:
+                # Create temporary session with subset of data
+                temp_session = OptimizationSession()
+                # Directly assign search space to avoid logging spam
+                temp_session.search_space = self.search_space
+                temp_session.experiment_manager.set_search_space(self.search_space)
+                # Add subset of experiments
+                for idx in range(i):
+                    row = full_df.iloc[idx]
+                    inputs = {var['name']: row[var['name']] for var in self.experiment_manager.search_space.variables}
+                    temp_session.add_experiment(inputs, output=row[target_col])
+                # Train model on subset using SAME approach for all iterations
+                if backend == 'sklearn':
+                    # Create model instance
+                    from alchemist_core.models.sklearn_model import SklearnModel
+                    temp_model = SklearnModel(kernel_options={'kernel_type': kernel})
+                    if reuse_hyperparameters and optimized_kernel_params is not None:
+                        # Override n_restarts to disable optimization
+                        temp_model.n_restarts_optimizer = 0
+                        temp_model._custom_optimizer = None
+                        # Store the optimized kernel to use
+                        from sklearn.base import clone
+                        temp_model._reuse_kernel = clone(self.model.optimized_kernel)
+                    # Attach model and train
+                    temp_session.model = temp_model
+                    temp_session.model_backend = 'sklearn'
+                    # Train WITHOUT recomputing calibration (if reusing hyperparameters)
+                    if reuse_hyperparameters:
+                        temp_model.train(temp_session.experiment_manager, calibrate_uncertainty=False)
+                        # Transfer calibration factor from final model
+                        if hasattr(self.model, 'calibration_factor'):
+                            temp_model.calibration_factor = self.model.calibration_factor
+                            # Enable calibration only if user requested calibrated uncertainties
+                            temp_model.calibration_enabled = use_calibrated_uncertainty
+                    else:
+                        temp_model.train(temp_session.experiment_manager)
+                    # Verify model was trained
+                    if not temp_model.is_trained:
+                        raise ValueError(f"Model training failed at iteration {i}")
+                    if temp_session.model is None:
+                        raise ValueError(f"temp_session.model is None after training at iteration {i}")
+                elif backend == 'botorch':
+                    # For BoTorch: create a fresh model and load the fitted hyperparameters
+                    from alchemist_core.models.botorch_model import BoTorchModel
+                    import torch
+                    # Create model instance with same configuration as original model
+                    kernel_opts = {'cont_kernel_type': kernel}
+                    if hasattr(self.model, 'matern_nu'):
+                        kernel_opts['matern_nu'] = self.model.matern_nu
+                    temp_model = BoTorchModel(
+                        kernel_options=kernel_opts,
+                        input_transform_type=self.model.input_transform_type if hasattr(self.model, 'input_transform_type') else 'normalize',
+                        output_transform_type=self.model.output_transform_type if hasattr(self.model, 'output_transform_type') else 'standardize'
+                    )
+                    # Train model on subset (this creates the GP with subset of data)
+                    # Disable calibration computation if reusing hyperparameters
+                    if reuse_hyperparameters:
+                        temp_model.train(temp_session.experiment_manager, calibrate_uncertainty=False)
+                    else:
+                        temp_model.train(temp_session.experiment_manager)
+                    # Apply optimized hyperparameters from final model to trained subset model
+                    # Only works for simple kernel structures (no categorical variables)
+                    if reuse_hyperparameters and optimized_state_dict is not None:
+                        try:
+                            with torch.no_grad():
+                                # Extract hyperparameters from final model
+                                # This only works for ScaleKernel(base_kernel), not AdditiveKernel
+                                final_lengthscale = self.model.model.covar_module.base_kernel.lengthscale.detach().clone()
+                                final_outputscale = self.model.model.covar_module.outputscale.detach().clone()
+                                final_noise = self.model.model.likelihood.noise.detach().clone()
+                                # Set hyperparameters in temp model (trained on subset)
+                                temp_model.model.covar_module.base_kernel.lengthscale = final_lengthscale
+                                temp_model.model.covar_module.outputscale = final_outputscale
+                                temp_model.model.likelihood.noise = final_noise
+                        except AttributeError:
+                            # If kernel structure is complex (e.g., has categorical variables),
+                            # skip hyperparameter reuse - fall back to each iteration's own optimization
+                            pass
+                    # Transfer calibration factor from final model (even if hyperparameters couldn't be transferred)
+                    # This ensures last iteration matches final model exactly
+                    if reuse_hyperparameters and hasattr(self.model, 'calibration_factor'):
+                        temp_model.calibration_factor = self.model.calibration_factor
+                        # Enable calibration only if user requested calibrated uncertainties
+                        temp_model.calibration_enabled = use_calibrated_uncertainty
+                    # Attach to session
+                    temp_session.model = temp_model
+                    temp_session.model_backend = 'botorch'
+                # Predict on grid using temp_session.predict (consistent for all iterations)
+                result = temp_session.predict(grid)
+                if result is None:
+                    raise ValueError(f"predict() returned None at iteration {i}")
+                means, stds = result
+                # Find max mean (or min for minimization)
+                if goal.lower() == 'maximize':
+                    best_idx = np.argmax(means)
+                else:
+                    best_idx = np.argmin(means)
+                predicted_means[i - 1] = means[best_idx]
+                predicted_stds[i - 1] = stds[best_idx]
+            except Exception as e:
+                import traceback
+                logger.warning(f"Failed to compute predictions for iteration {i}: {e}")
+                logger.debug(traceback.format_exc())
+                # Leave as NaN
+        # Restore original logging levels
+        logger.setLevel(original_session_level)
+        logging.getLogger('alchemist_core.models.botorch_model').setLevel(original_model_level)
+        return predicted_means, predicted_stds
+    def plot_acquisition_slice(
+        self,
+        x_var: str,
+        acq_func: str = 'ei',
+        fixed_values: Optional[Dict[str, Any]] = None,
+        n_points: int = 100,
+        acq_func_kwargs: Optional[Dict[str, Any]] = None,
+        goal: str = 'maximize',
+        show_experiments: bool = True,
+        show_suggestions: bool = True,
+        figsize: Tuple[float, float] = (8, 6),
+        dpi: int = 100,
+        title: Optional[str] = None
+    ) -> Figure: # pyright: ignore[reportInvalidTypeForm]
+        """
+        Create 1D slice plot showing acquisition function along one variable.
+        Visualizes how the acquisition function value changes as one variable is varied
+        while all other variables are held constant. This shows which regions along that
+        variable axis are most promising for the next experiment.
+        Args:
+            x_var: Variable name to vary along X axis (must be 'real' or 'integer')
+            acq_func: Acquisition function name ('ei', 'pi', 'ucb', 'logei', 'logpi')
+            fixed_values: Dict of {var_name: value} for other variables.
+                         If not provided, uses midpoint for real/integer,
+                         first category for categorical.
+            n_points: Number of points to evaluate along the slice
+            acq_func_kwargs: Additional acquisition parameters (xi, kappa, beta)
+            goal: 'maximize' or 'minimize' - optimization direction
+            show_experiments: Plot experimental data points as scatter
+            show_suggestions: Plot last suggested points (if available)
+            figsize: Figure size as (width, height) in inches
+            dpi: Dots per inch for figure resolution
+            title: Custom title (default: auto-generated)
+        Returns:
+            matplotlib Figure object
+        Example:
+            >>> # Visualize Expected Improvement along temperature
+            >>> fig = session.plot_acquisition_slice(
+            ...     'temperature',
+            ...     acq_func='ei',
+            ...     fixed_values={'pressure': 5.0, 'catalyst': 'Pt'}
+            ... )
+            >>> fig.savefig('acq_slice.png', dpi=300)
+            >>> # See where UCB is highest
+            >>> fig = session.plot_acquisition_slice(
+            ...     'pressure',
+            ...     acq_func='ucb',
+            ...     acq_func_kwargs={'beta': 0.5}
+            ... )
+        Note:
+            - Model must be trained before plotting
+            - Higher acquisition values indicate more promising regions
+            - Use this to understand where the algorithm wants to explore next
+        """
+        self._check_matplotlib()
+        self._check_model_trained()
+        from alchemist_core.utils.acquisition_utils import evaluate_acquisition
+        from alchemist_core.visualization.plots import create_slice_plot
+        if fixed_values is None:
+            fixed_values = {}
+        # Get variable info
+        var_names = self.search_space.get_variable_names()
+        if x_var not in var_names:
+            raise ValueError(f"Variable '{x_var}' not in search space")
+        # Get x variable definition
+        x_var_def = next(v for v in self.search_space.variables if v['name'] == x_var)
+        if x_var_def['type'] not in ['real', 'integer']:
+            raise ValueError(f"Variable '{x_var}' must be 'real' or 'integer' type for slice plot")
+        # Create range for x variable
+        x_min, x_max = x_var_def['min'], x_var_def['max']
+        x_values = np.linspace(x_min, x_max, n_points)
+        # Build acquisition evaluation grid
+        slice_data = {x_var: x_values}
+        for var in self.search_space.variables:
+            var_name = var['name']
+            if var_name == x_var:
+                continue
+            if var_name in fixed_values:
+                slice_data[var_name] = fixed_values[var_name]
+            else:
+                # Use default value
+                if var['type'] in ['real', 'integer']:
+                    slice_data[var_name] = (var['min'] + var['max']) / 2
+                elif var['type'] == 'categorical':
+                    slice_data[var_name] = var['values'][0]
+        # Create DataFrame with correct column order
+        if hasattr(self.model, 'original_feature_names') and self.model.original_feature_names:
+            column_order = self.model.original_feature_names
+        else:
+            column_order = self.search_space.get_variable_names()
+        slice_df = pd.DataFrame(slice_data, columns=column_order)
+        # Evaluate acquisition function
+        acq_values, _ = evaluate_acquisition(
+            self.model,
+            slice_df,
+            acq_func=acq_func,
+            acq_func_kwargs=acq_func_kwargs,
+            goal=goal
+        )
+        # Prepare experimental data for plotting
+        exp_x = None
+        exp_y = None
+        if show_experiments and len(self.experiment_manager.df) > 0:
+            df = self.experiment_manager.df
+            # Filter points that match the fixed values
+            mask = pd.Series([True] * len(df))
+            for var_name, fixed_val in fixed_values.items():
+                if var_name in df.columns:
+                    if isinstance(fixed_val, str):
+                        mask &= (df[var_name] == fixed_val)
+                    else:
+                        mask &= np.isclose(df[var_name], fixed_val, atol=1e-6)
+            if mask.any():
+                filtered_df = df[mask]
+                exp_x = filtered_df[x_var].values
+                # For acquisition, we just mark where experiments exist (no y-value)
+                exp_y = np.zeros_like(exp_x)
+        # Prepare suggestion data
+        sugg_x = None
+        if show_suggestions and len(self.last_suggestions) > 0:
+            if isinstance(self.last_suggestions, pd.DataFrame):
+                sugg_df = self.last_suggestions
+            else:
+                sugg_df = pd.DataFrame(self.last_suggestions)
+            if x_var in sugg_df.columns:
+                sugg_x = sugg_df[x_var].values
+        # Generate title if not provided
+        if title is None:
+            acq_name = acq_func.upper()
+            if fixed_values:
+                fixed_str = ', '.join([f'{k}={v}' for k, v in fixed_values.items()])
+                title = f"Acquisition Function ({acq_name}): {x_var}\n({fixed_str})"
+            else:
+                title = f"Acquisition Function ({acq_name}): {x_var}"
+        # Use create_slice_plot but with acquisition values
+        # Note: We pass None for std since acquisition functions are deterministic
+        fig, ax = create_slice_plot(
+            x_values=x_values,
+            predictions=acq_values,
+            x_var=x_var,
+            std=None,
+            sigma_bands=None,  # No uncertainty for acquisition
+            exp_x=exp_x,
+            exp_y=None,  # Don't show experiment y-values for acquisition
+            figsize=figsize,
+            dpi=dpi,
+            title=title,
+            prediction_label=acq_func.upper(),
+            line_color='darkgreen',
+            line_width=1.5
+        )
+        # Add green fill under acquisition curve
+        ax.fill_between(x_values, 0, acq_values, alpha=0.3, color='green', zorder=0)
+        # Update y-label for acquisition
+        ax.set_ylabel(f'{acq_func.upper()} Value')
+        # Mark suggestions with star markers if present
+        if sugg_x is not None and len(sugg_x) > 0:
+            # Evaluate acquisition at suggested points
+            for i, sx in enumerate(sugg_x):
+                # Find acquisition value at this x
+                idx = np.argmin(np.abs(x_values - sx))
+                sy = acq_values[idx]
+                label = 'Suggestion' if i == 0 else None  # Only label first marker
+                ax.scatter([sx], [sy], color='black', s=102, marker='*', zorder=10, label=label)
+        logger.info(f"Generated acquisition slice plot for {x_var} using {acq_func}")
+        return fig
+    def plot_acquisition_contour(
+        self,
+        x_var: str,
+        y_var: str,
+        acq_func: str = 'ei',
+        fixed_values: Optional[Dict[str, Any]] = None,
+        grid_resolution: int = 50,
+        acq_func_kwargs: Optional[Dict[str, Any]] = None,
+        goal: str = 'maximize',
+        show_experiments: bool = True,
+        show_suggestions: bool = True,
+        cmap: str = 'viridis',
+        use_log_scale: Optional[bool] = None,
+        figsize: Tuple[float, float] = (8, 6),
+        dpi: int = 100,
+        title: Optional[str] = None
+    ) -> Figure: # pyright: ignore[reportInvalidTypeForm]
+        """
+        Create 2D contour plot of acquisition function over variable space.
+        Visualizes the acquisition function surface by varying two variables
+        while holding others constant. Shows "hot spots" where the algorithm
+        believes the next experiment should be conducted. Higher values indicate
+        more promising regions to explore.
+        Args:
+            x_var: Variable name for X axis (must be 'real' type)
+            y_var: Variable name for Y axis (must be 'real' type)
+            acq_func: Acquisition function name ('ei', 'pi', 'ucb', 'logei', 'logpi')
+            fixed_values: Dict of {var_name: value} for other variables.
+                         If not provided, uses midpoint for real/integer,
+                         first category for categorical.
+            grid_resolution: Grid density (NxN points)
+            acq_func_kwargs: Additional acquisition parameters (xi, kappa, beta)
+            goal: 'maximize' or 'minimize' - optimization direction
+            show_experiments: Plot experimental data points as scatter
+            show_suggestions: Plot last suggested points (if available)
+            cmap: Matplotlib colormap name (e.g., 'viridis', 'hot', 'plasma')
+            use_log_scale: Use logarithmic color scale (default: auto-enable for logei/logpi)
+            figsize: Figure size as (width, height) in inches
+            dpi: Dots per inch for figure resolution
+            title: Custom title (default: auto-generated)
+        Returns:
+            matplotlib Figure object
+        Example:
+            >>> # Visualize Expected Improvement surface
+            >>> fig = session.plot_acquisition_contour(
+            ...     'temperature', 'pressure',
+            ...     acq_func='ei'
+            ... )
+            >>> fig.savefig('acq_contour.png', dpi=300)
+            >>> # See UCB landscape with custom exploration
+            >>> fig = session.plot_acquisition_contour(
+            ...     'temperature', 'pressure',
+            ...     acq_func='ucb',
+            ...     acq_func_kwargs={'beta': 1.0},
+            ...     cmap='hot'
+            ... )
+        Note:
+            - Requires at least 2 'real' type variables
+            - Model must be trained before plotting
+            - Higher acquisition values = more promising regions
+            - Suggestions are overlaid to show why they were chosen
+        """
+        self._check_matplotlib()
+        self._check_model_trained()
+        from alchemist_core.utils.acquisition_utils import evaluate_acquisition
+        from alchemist_core.visualization.plots import create_contour_plot
+        if fixed_values is None:
+            fixed_values = {}
+        # Get variable names
+        var_names = self.search_space.get_variable_names()
+        # Validate variables exist
+        if x_var not in var_names:
+            raise ValueError(f"Variable '{x_var}' not in search space")
+        if y_var not in var_names:
+            raise ValueError(f"Variable '{y_var}' not in search space")
+        # Get variable info
+        x_var_info = next(v for v in self.search_space.variables if v['name'] == x_var)
+        y_var_info = next(v for v in self.search_space.variables if v['name'] == y_var)
+        if x_var_info['type'] != 'real':
+            raise ValueError(f"X variable '{x_var}' must be 'real' type, got '{x_var_info['type']}'")
+        if y_var_info['type'] != 'real':
+            raise ValueError(f"Y variable '{y_var}' must be 'real' type, got '{y_var_info['type']}'")
+        # Get bounds
+        x_bounds = (x_var_info['min'], x_var_info['max'])
+        y_bounds = (y_var_info['min'], y_var_info['max'])
+        # Create meshgrid
+        x = np.linspace(x_bounds[0], x_bounds[1], grid_resolution)
+        y = np.linspace(y_bounds[0], y_bounds[1], grid_resolution)
+        X_grid, Y_grid = np.meshgrid(x, y)
+        # Build acquisition evaluation grid
+        grid_data = {
+            x_var: X_grid.ravel(),
+            y_var: Y_grid.ravel()
+        }
+        # Add fixed values for other variables
+        for var in self.search_space.variables:
+            var_name = var['name']
+            if var_name in [x_var, y_var]:
+                continue
+            if var_name in fixed_values:
+                grid_data[var_name] = fixed_values[var_name]
+            else:
+                # Use default value
+                if var['type'] in ['real', 'integer']:
+                    grid_data[var_name] = (var['min'] + var['max']) / 2
+                elif var['type'] == 'categorical':
+                    grid_data[var_name] = var['values'][0]
+        # Create DataFrame with correct column order
+        if hasattr(self.model, 'original_feature_names') and self.model.original_feature_names:
+            column_order = self.model.original_feature_names
+        else:
+            column_order = self.search_space.get_variable_names()
+        grid_df = pd.DataFrame(grid_data, columns=column_order)
+        # Evaluate acquisition function
+        acq_values, _ = evaluate_acquisition(
+            self.model,
+            grid_df,
+            acq_func=acq_func,
+            acq_func_kwargs=acq_func_kwargs,
+            goal=goal
+        )
+        # Reshape to grid
+        acq_grid = acq_values.reshape(X_grid.shape)
+        # Prepare experimental data for overlay
+        exp_x = None
+        exp_y = None
+        if show_experiments and not self.experiment_manager.df.empty:
+            exp_df = self.experiment_manager.df
+            if x_var in exp_df.columns and y_var in exp_df.columns:
+                exp_x = exp_df[x_var].values
+                exp_y = exp_df[y_var].values
+        # Prepare suggestion data for overlay
+        sugg_x = None
+        sugg_y = None
+        if show_suggestions and len(self.last_suggestions) > 0:
+            if isinstance(self.last_suggestions, pd.DataFrame):
+                sugg_df = self.last_suggestions
+            else:
+                sugg_df = pd.DataFrame(self.last_suggestions)
+            if x_var in sugg_df.columns and y_var in sugg_df.columns:
+                sugg_x = sugg_df[x_var].values
+                sugg_y = sugg_df[y_var].values
+        # Auto-enable log scale for logei/logpi if not explicitly set
+        if use_log_scale is None:
+            use_log_scale = acq_func.lower() in ['logei', 'logpi']
+        # Generate title if not provided
+        if title is None:
+            acq_name = acq_func.upper()
+            title = f"Acquisition Function ({acq_name}): {x_var} vs {y_var}"
+        # Delegate to visualization module
+        fig, ax, cbar = create_contour_plot(
+            x_grid=X_grid,
+            y_grid=Y_grid,
+            predictions_grid=acq_grid,
+            x_var=x_var,
+            y_var=y_var,
+            exp_x=exp_x,
+            exp_y=exp_y,
+            suggest_x=sugg_x,
+            suggest_y=sugg_y,
+            cmap='Greens',  # Green colormap for acquisition
+            use_log_scale=use_log_scale,
+            figsize=figsize,
+            dpi=dpi,
+            title=title
+        )
+        # Update colorbar label for acquisition
+        cbar.set_label(f'{acq_func.upper()} Value', rotation=270, labelpad=20)
+        logger.info(f"Generated acquisition contour plot for {x_var} vs {y_var} using {acq_func}")
+        return fig
+    def plot_uncertainty_contour(
+        self,
+        x_var: str,
+        y_var: str,
+        fixed_values: Optional[Dict[str, Any]] = None,
+        grid_resolution: int = 50,
+        show_experiments: bool = True,
+        show_suggestions: bool = False,
+        cmap: str = 'Reds',
+        figsize: Tuple[float, float] = (8, 6),
+        dpi: int = 100,
+        title: Optional[str] = None
+    ) -> Figure: # pyright: ignore[reportInvalidTypeForm]
+        """
+        Create 2D contour plot of posterior uncertainty over a variable space.
+        Visualizes where the model is most uncertain about predictions, showing
+        regions that may benefit from additional sampling. Higher values indicate
+        greater uncertainty (standard deviation).
+        Args:
+            x_var: Variable name for X axis (must be 'real' type)
+            y_var: Variable name for Y axis (must be 'real' type)
+            fixed_values: Dict of {var_name: value} for other variables.
+                         If not provided, uses midpoint for real/integer,
+                         first category for categorical.
+            grid_resolution: Grid density (NxN points)
+            show_experiments: Plot experimental data points as scatter
+            show_suggestions: Plot last suggested points (if available)
+            cmap: Matplotlib colormap name (default: 'Reds' - darker = more uncertain)
+            figsize: Figure size as (width, height) in inches
+            dpi: Dots per inch for figure resolution
+            title: Custom title (default: auto-generated)
+        Returns:
+            matplotlib Figure object
+        Example:
+            >>> # Visualize uncertainty landscape
+            >>> fig = session.plot_uncertainty_contour('temperature', 'pressure')
+            >>> # Custom colormap
+            >>> fig = session.plot_uncertainty_contour(
+            ...     'temperature', 'pressure',
+            ...     cmap='YlOrRd',
+            ...     grid_resolution=100
+            ... )
+            >>> fig.savefig('uncertainty_contour.png', dpi=300)
+        Note:
+            - Requires at least 2 'real' type variables
+            - Model must be trained and support std predictions
+            - High uncertainty near data gaps is expected
+            - Useful for planning exploration strategies
+        """
+        self._check_matplotlib()
+        self._check_model_trained()
+        from alchemist_core.visualization.plots import create_uncertainty_contour_plot
+        if fixed_values is None:
+            fixed_values = {}
+        # Get variable names
+        var_names = self.search_space.get_variable_names()
+        # Validate variables exist
+        if x_var not in var_names:
+            raise ValueError(f"Variable '{x_var}' not in search space")
+        if y_var not in var_names:
+            raise ValueError(f"Variable '{y_var}' not in search space")
+        # Get variable info
+        x_var_info = next(v for v in self.search_space.variables if v['name'] == x_var)
+        y_var_info = next(v for v in self.search_space.variables if v['name'] == y_var)
+        if x_var_info['type'] != 'real':
+            raise ValueError(f"X variable '{x_var}' must be 'real' type, got '{x_var_info['type']}'")
+        if y_var_info['type'] != 'real':
+            raise ValueError(f"Y variable '{y_var}' must be 'real' type, got '{y_var_info['type']}'")
+        # Get bounds
+        x_bounds = (x_var_info['min'], x_var_info['max'])
+        y_bounds = (y_var_info['min'], y_var_info['max'])
+        # Create meshgrid
+        x = np.linspace(x_bounds[0], x_bounds[1], grid_resolution)
+        y = np.linspace(y_bounds[0], y_bounds[1], grid_resolution)
+        X_grid, Y_grid = np.meshgrid(x, y)
+        # Build prediction grid
+        grid_data = {
+            x_var: X_grid.ravel(),
+            y_var: Y_grid.ravel()
+        }
+        # Add fixed values for other variables
+        for var in self.search_space.variables:
+            var_name = var['name']
+            if var_name in [x_var, y_var]:
+                continue
+            if var_name in fixed_values:
+                grid_data[var_name] = fixed_values[var_name]
+            else:
+                # Use default value
+                if var['type'] in ['real', 'integer']:
+                    grid_data[var_name] = (var['min'] + var['max']) / 2
+                elif var['type'] == 'categorical':
+                    grid_data[var_name] = var['values'][0]
+        # Create DataFrame with correct column order
+        if hasattr(self.model, 'original_feature_names') and self.model.original_feature_names:
+            column_order = self.model.original_feature_names
+        else:
+            column_order = self.search_space.get_variable_names()
+        grid_df = pd.DataFrame(grid_data, columns=column_order)
+        # Get predictions with uncertainty
+        _, std = self.predict(grid_df)
+        # Reshape to grid
+        uncertainty_grid = std.reshape(X_grid.shape)
+        # Prepare experimental data for overlay
+        exp_x = None
+        exp_y = None
+        if show_experiments and not self.experiment_manager.df.empty:
+            exp_df = self.experiment_manager.df
+            if x_var in exp_df.columns and y_var in exp_df.columns:
+                exp_x = exp_df[x_var].values
+                exp_y = exp_df[y_var].values
+        # Prepare suggestion data for overlay
+        sugg_x = None
+        sugg_y = None
+        if show_suggestions and len(self.last_suggestions) > 0:
+            if isinstance(self.last_suggestions, pd.DataFrame):
+                sugg_df = self.last_suggestions
+            else:
+                sugg_df = pd.DataFrame(self.last_suggestions)
+            if x_var in sugg_df.columns and y_var in sugg_df.columns:
+                sugg_x = sugg_df[x_var].values
+                sugg_y = sugg_df[y_var].values
+        # Generate title if not provided
+        if title is None:
+            title = f"Posterior Uncertainty: {x_var} vs {y_var}"
+        # Delegate to visualization module
+        fig, ax, cbar = create_uncertainty_contour_plot(
+            x_grid=X_grid,
+            y_grid=Y_grid,
+            uncertainty_grid=uncertainty_grid,
+            x_var=x_var,
+            y_var=y_var,
+            exp_x=exp_x,
+            exp_y=exp_y,
+            suggest_x=sugg_x,
+            suggest_y=sugg_y,
+            cmap=cmap,
+            figsize=figsize,
+            dpi=dpi,
+            title=title
+        )
+        logger.info(f"Generated uncertainty contour plot for {x_var} vs {y_var}")
+        return fig
+    def plot_uncertainty_voxel(
+        self,
+        x_var: str,
+        y_var: str,
+        z_var: str,
+        fixed_values: Optional[Dict[str, Any]] = None,
+        grid_resolution: int = 15,
+        show_experiments: bool = True,
+        show_suggestions: bool = False,
+        cmap: str = 'Reds',
+        alpha: float = 0.5,
+        figsize: Tuple[float, float] = (10, 8),
+        dpi: int = 100,
+        title: Optional[str] = None
+    ) -> Figure: # pyright: ignore[reportInvalidTypeForm]
+        """
+        Create 3D voxel plot of posterior uncertainty over variable space.
+        Visualizes where the model is most uncertain in 3D, helping identify
+        under-explored regions that may benefit from additional sampling.
+        Higher values indicate greater uncertainty (standard deviation).
+        Args:
+            x_var: Variable name for X axis (must be 'real' or 'integer' type)
+            y_var: Variable name for Y axis (must be 'real' or 'integer' type)
+            z_var: Variable name for Z axis (must be 'real' or 'integer' type)
+            fixed_values: Dict of {var_name: value} for other variables.
+                         If not provided, uses midpoint for real/integer,
+                         first category for categorical.
+            grid_resolution: Grid density (NxNxN points, default: 15)
+            show_experiments: Plot experimental data points as scatter
+            show_suggestions: Plot last suggested points (if available)
+            cmap: Matplotlib colormap name (default: 'Reds')
+            alpha: Transparency level (0=transparent, 1=opaque)
+            figsize: Figure size as (width, height) in inches
+            dpi: Dots per inch for figure resolution
+            title: Custom title (default: auto-generated)
+        Returns:
+            matplotlib Figure object with 3D axes
+        Example:
+            >>> # Visualize uncertainty in 3D
+            >>> fig = session.plot_uncertainty_voxel('temperature', 'pressure', 'flow_rate')
+            >>> # With transparency to see interior
+            >>> fig = session.plot_uncertainty_voxel(
+            ...     'temperature', 'pressure', 'flow_rate',
+            ...     alpha=0.3,
+            ...     grid_resolution=20
+            ... )
+            >>> fig.savefig('uncertainty_voxel.png', dpi=150)
+        Raises:
+            ValueError: If search space doesn't have at least 3 continuous variables
+        Note:
+            - Requires at least 3 'real' or 'integer' type variables
+            - Model must be trained and support std predictions
+            - Computationally expensive: O(N³) evaluations
+            - Useful for planning exploration in 3D space
+        """
+        self._check_matplotlib()
+        self._check_model_trained()
+        from alchemist_core.visualization.plots import create_uncertainty_voxel_plot
+        if fixed_values is None:
+            fixed_values = {}
+        # Get all variable names
+        var_names = self.search_space.get_variable_names()
+        # Validate that the requested variables exist and are continuous
+        for var_name, var_label in [(x_var, 'X'), (y_var, 'Y'), (z_var, 'Z')]:
+            if var_name not in var_names:
+                raise ValueError(f"{var_label} variable '{var_name}' not in search space")
+            var_def = next(v for v in self.search_space.variables if v['name'] == var_name)
+            if var_def['type'] not in ['real', 'integer']:
+                raise ValueError(
+                    f"{var_label} variable '{var_name}' must be 'real' or 'integer' type for voxel plot, "
+                    f"got '{var_def['type']}'"
+                )
+        # Get variable definitions
+        x_var_def = next(v for v in self.search_space.variables if v['name'] == x_var)
+        y_var_def = next(v for v in self.search_space.variables if v['name'] == y_var)
+        z_var_def = next(v for v in self.search_space.variables if v['name'] == z_var)
+        # Get bounds
+        x_bounds = (x_var_def['min'], x_var_def['max'])
+        y_bounds = (y_var_def['min'], y_var_def['max'])
+        z_bounds = (z_var_def['min'], z_var_def['max'])
+        # Create 3D meshgrid
+        x = np.linspace(x_bounds[0], x_bounds[1], grid_resolution)
+        y = np.linspace(y_bounds[0], y_bounds[1], grid_resolution)
+        z = np.linspace(z_bounds[0], z_bounds[1], grid_resolution)
+        X_grid, Y_grid, Z_grid = np.meshgrid(x, y, z, indexing='ij')
+        # Build prediction grid
+        grid_data = {
+            x_var: X_grid.ravel(),
+            y_var: Y_grid.ravel(),
+            z_var: Z_grid.ravel()
+        }
+        # Add fixed values for other variables
+        for var in self.search_space.variables:
+            var_name = var['name']
+            if var_name in [x_var, y_var, z_var]:
+                continue
+            if var_name in fixed_values:
+                grid_data[var_name] = fixed_values[var_name]
+            else:
+                # Use default value
+                if var['type'] in ['real', 'integer']:
+                    grid_data[var_name] = (var['min'] + var['max']) / 2
+                elif var['type'] == 'categorical':
+                    grid_data[var_name] = var['values'][0]
+        # Create DataFrame with correct column order
+        if hasattr(self.model, 'original_feature_names') and self.model.original_feature_names:
+            column_order = self.model.original_feature_names
+        else:
+            column_order = self.search_space.get_variable_names()
+        grid_df = pd.DataFrame(grid_data, columns=column_order)
+        # Get predictions with uncertainty
+        _, std = self.predict(grid_df)
+        # Reshape to 3D grid
+        uncertainty_grid = std.reshape(X_grid.shape)
+        # Prepare experimental data for overlay
+        exp_x = None
+        exp_y = None
+        exp_z = None
+        if show_experiments and not self.experiment_manager.df.empty:
+            exp_df = self.experiment_manager.df
+            if x_var in exp_df.columns and y_var in exp_df.columns and z_var in exp_df.columns:
+                exp_x = exp_df[x_var].values
+                exp_y = exp_df[y_var].values
+                exp_z = exp_df[z_var].values
+        # Prepare suggestion data for overlay
+        sugg_x = None
+        sugg_y = None
+        sugg_z = None
+        if show_suggestions and len(self.last_suggestions) > 0:
+            if isinstance(self.last_suggestions, pd.DataFrame):
+                sugg_df = self.last_suggestions
+            else:
+                sugg_df = pd.DataFrame(self.last_suggestions)
+            if x_var in sugg_df.columns and y_var in sugg_df.columns and z_var in sugg_df.columns:
+                sugg_x = sugg_df[x_var].values
+                sugg_y = sugg_df[y_var].values
+                sugg_z = sugg_df[z_var].values
+        # Generate title if not provided
+        if title is None:
+            title = f"3D Posterior Uncertainty: {x_var} vs {y_var} vs {z_var}"
+        # Delegate to visualization module
+        fig, ax = create_uncertainty_voxel_plot(
+            x_grid=X_grid,
+            y_grid=Y_grid,
+            z_grid=Z_grid,
+            uncertainty_grid=uncertainty_grid,
+            x_var=x_var,
+            y_var=y_var,
+            z_var=z_var,
+            exp_x=exp_x,
+            exp_y=exp_y,
+            exp_z=exp_z,
+            suggest_x=sugg_x,
+            suggest_y=sugg_y,
+            suggest_z=sugg_z,
+            cmap=cmap,
+            alpha=alpha,
+            figsize=figsize,
+            dpi=dpi,
+            title=title
+        )
+        logger.info(f"Generated 3D uncertainty voxel plot for {x_var} vs {y_var} vs {z_var}")
+        return fig
+    def plot_acquisition_voxel(
+        self,
+        x_var: str,
+        y_var: str,
+        z_var: str,
+        acq_func: str = 'ei',
+        fixed_values: Optional[Dict[str, Any]] = None,
+        grid_resolution: int = 15,
+        acq_func_kwargs: Optional[Dict[str, Any]] = None,
+        goal: str = 'maximize',
+        show_experiments: bool = True,
+        show_suggestions: bool = True,
+        cmap: str = 'hot',
+        alpha: float = 0.5,
+        use_log_scale: Optional[bool] = None,
+        figsize: Tuple[float, float] = (10, 8),
+        dpi: int = 100,
+        title: Optional[str] = None
+    ) -> Figure: # pyright: ignore[reportInvalidTypeForm]
+        """
+        Create 3D voxel plot of acquisition function over variable space.
+        Visualizes the acquisition function in 3D, showing "hot spots" where
+        the optimization algorithm believes the next experiment should be conducted.
+        Higher values indicate more promising regions.
+        Args:
+            x_var: Variable name for X axis (must be 'real' or 'integer' type)
+            y_var: Variable name for Y axis (must be 'real' or 'integer' type)
+            z_var: Variable name for Z axis (must be 'real' or 'integer' type)
+            acq_func: Acquisition function name ('ei', 'pi', 'ucb', 'logei', 'logpi')
+            fixed_values: Dict of {var_name: value} for other variables.
+                         If not provided, uses midpoint for real/integer,
+                         first category for categorical.
+            grid_resolution: Grid density (NxNxN points, default: 15)
+            acq_func_kwargs: Additional acquisition parameters (xi, kappa, beta)
+            goal: 'maximize' or 'minimize' - optimization direction
+            show_experiments: Plot experimental data points as scatter
+            show_suggestions: Plot last suggested points (if available)
+            cmap: Matplotlib colormap name (default: 'hot')
+            alpha: Transparency level (0=transparent, 1=opaque)
+            use_log_scale: Use logarithmic color scale (default: auto for logei/logpi)
+            figsize: Figure size as (width, height) in inches
+            dpi: Dots per inch for figure resolution
+            title: Custom title (default: auto-generated)
+        Returns:
+            matplotlib Figure object with 3D axes
+        Example:
+            >>> # Visualize Expected Improvement in 3D
+            >>> fig = session.plot_acquisition_voxel(
+            ...     'temperature', 'pressure', 'flow_rate',
+            ...     acq_func='ei'
+            ... )
+            >>> # UCB with custom exploration
+            >>> fig = session.plot_acquisition_voxel(
+            ...     'temperature', 'pressure', 'flow_rate',
+            ...     acq_func='ucb',
+            ...     acq_func_kwargs={'beta': 1.0},
+            ...     alpha=0.3
+            ... )
+            >>> fig.savefig('acq_voxel.png', dpi=150)
+        Raises:
+            ValueError: If search space doesn't have at least 3 continuous variables
+        Note:
+            - Requires at least 3 'real' or 'integer' type variables
+            - Model must be trained before plotting
+            - Computationally expensive: O(N³) evaluations
+            - Higher values = more promising for next experiment
+            - Suggestions should align with high-value regions
+        """
+        self._check_matplotlib()
+        self._check_model_trained()
+        from alchemist_core.utils.acquisition_utils import evaluate_acquisition
+        from alchemist_core.visualization.plots import create_acquisition_voxel_plot
+        if fixed_values is None:
+            fixed_values = {}
+        # Get all variable names
+        var_names = self.search_space.get_variable_names()
+        # Validate that the requested variables exist and are continuous
+        for var_name, var_label in [(x_var, 'X'), (y_var, 'Y'), (z_var, 'Z')]:
+            if var_name not in var_names:
+                raise ValueError(f"{var_label} variable '{var_name}' not in search space")
+            var_def = next(v for v in self.search_space.variables if v['name'] == var_name)
+            if var_def['type'] not in ['real', 'integer']:
+                raise ValueError(
+                    f"{var_label} variable '{var_name}' must be 'real' or 'integer' type for voxel plot, "
+                    f"got '{var_def['type']}'"
+                )
+        # Get variable definitions
+        x_var_def = next(v for v in self.search_space.variables if v['name'] == x_var)
+        y_var_def = next(v for v in self.search_space.variables if v['name'] == y_var)
+        z_var_def = next(v for v in self.search_space.variables if v['name'] == z_var)
+        # Get bounds
+        x_bounds = (x_var_def['min'], x_var_def['max'])
+        y_bounds = (y_var_def['min'], y_var_def['max'])
+        z_bounds = (z_var_def['min'], z_var_def['max'])
+        # Create 3D meshgrid
+        x = np.linspace(x_bounds[0], x_bounds[1], grid_resolution)
+        y = np.linspace(y_bounds[0], y_bounds[1], grid_resolution)
+        z = np.linspace(z_bounds[0], z_bounds[1], grid_resolution)
+        X_grid, Y_grid, Z_grid = np.meshgrid(x, y, z, indexing='ij')
+        # Build acquisition evaluation grid
+        grid_data = {
+            x_var: X_grid.ravel(),
+            y_var: Y_grid.ravel(),
+            z_var: Z_grid.ravel()
+        }
+        # Add fixed values for other variables
+        for var in self.search_space.variables:
+            var_name = var['name']
+            if var_name in [x_var, y_var, z_var]:
+                continue
+            if var_name in fixed_values:
+                grid_data[var_name] = fixed_values[var_name]
+            else:
+                # Use default value
+                if var['type'] in ['real', 'integer']:
+                    grid_data[var_name] = (var['min'] + var['max']) / 2
+                elif var['type'] == 'categorical':
+                    grid_data[var_name] = var['values'][0]
+        # Create DataFrame with correct column order
+        if hasattr(self.model, 'original_feature_names') and self.model.original_feature_names:
+            column_order = self.model.original_feature_names
+        else:
+            column_order = self.search_space.get_variable_names()
+        grid_df = pd.DataFrame(grid_data, columns=column_order)
+        # Evaluate acquisition function
+        acq_values, _ = evaluate_acquisition(
+            self.model,
+            grid_df,
+            acq_func=acq_func,
+            acq_func_kwargs=acq_func_kwargs,
+            goal=goal
+        )
+        # Reshape to 3D grid
+        acquisition_grid = acq_values.reshape(X_grid.shape)
+        # Prepare experimental data for overlay
+        exp_x = None
+        exp_y = None
+        exp_z = None
+        if show_experiments and not self.experiment_manager.df.empty:
+            exp_df = self.experiment_manager.df
+            if x_var in exp_df.columns and y_var in exp_df.columns and z_var in exp_df.columns:
+                exp_x = exp_df[x_var].values
+                exp_y = exp_df[y_var].values
+                exp_z = exp_df[z_var].values
+        # Prepare suggestion data for overlay
+        sugg_x = None
+        sugg_y = None
+        sugg_z = None
+        if show_suggestions and len(self.last_suggestions) > 0:
+            if isinstance(self.last_suggestions, pd.DataFrame):
+                sugg_df = self.last_suggestions
+            else:
+                sugg_df = pd.DataFrame(self.last_suggestions)
+            if x_var in sugg_df.columns and y_var in sugg_df.columns and z_var in sugg_df.columns:
+                sugg_x = sugg_df[x_var].values
+                sugg_y = sugg_df[y_var].values
+                sugg_z = sugg_df[z_var].values
+        # Auto-enable log scale for logei/logpi if not explicitly set
+        if use_log_scale is None:
+            use_log_scale = acq_func.lower() in ['logei', 'logpi']
+        # Generate title if not provided
+        if title is None:
+            acq_name = acq_func.upper()
+            title = f"3D Acquisition Function ({acq_name}): {x_var} vs {y_var} vs {z_var}"
+        # Delegate to visualization module
+        fig, ax = create_acquisition_voxel_plot(
+            x_grid=X_grid,
+            y_grid=Y_grid,
+            z_grid=Z_grid,
+            acquisition_grid=acquisition_grid,
+            x_var=x_var,
+            y_var=y_var,
+            z_var=z_var,
+            exp_x=exp_x,
+            exp_y=exp_y,
+            exp_z=exp_z,
+            suggest_x=sugg_x,
+            suggest_y=sugg_y,
+            suggest_z=sugg_z,
+            cmap=cmap,
+            alpha=alpha,
+            use_log_scale=use_log_scale,
+            figsize=figsize,
+            dpi=dpi,
+            title=title
+        )
+        logger.info(f"Generated 3D acquisition voxel plot for {x_var} vs {y_var} vs {z_var} using {acq_func}")
+        return fig
+    def plot_suggested_next(
+        self,
+        x_var: str,
+        y_var: Optional[str] = None,
+        z_var: Optional[str] = None,
+        acq_func: Optional[str] = None,
+        fixed_values: Optional[Dict[str, Any]] = None,
+        suggestion_index: int = 0,
+        n_points: int = 100,
+        grid_resolution: int = 50,
+        show_uncertainty: Optional[Union[bool, List[float]]] = [1.0, 2.0],
+        show_experiments: bool = True,
+        acq_func_kwargs: Optional[Dict[str, Any]] = None,
+        goal: Optional[str] = None,
+        figsize: Tuple[float, float] = (10, 12),
+        dpi: int = 100,
+        title_prefix: Optional[str] = None
+    ) -> Figure: # pyright: ignore[reportInvalidTypeForm]
+        """
+        Create visualization of suggested next experiment with posterior and acquisition.
+        This creates a stacked subplot showing:
+        - Top: Posterior mean prediction (slice/contour/voxel)
+        - Bottom: Acquisition function with suggested point marked
+        The fixed values for non-varying dimensions are automatically extracted from
+        the suggested point coordinates, making it easy to visualize why that point
+        was chosen.
+        Args:
+            x_var: Variable name for X axis (required)
+            y_var: Variable name for Y axis (optional, creates 2D plot if provided)
+            z_var: Variable name for Z axis (optional, creates 3D plot if provided with y_var)
+            acq_func: Acquisition function used (if None, extracts from last run or defaults to 'ei')
+            fixed_values: Override automatic fixed values from suggestion (optional)
+            suggestion_index: Which suggestion to visualize if multiple (default: 0 = most recent)
+            n_points: Points to evaluate for 1D slice (default: 100)
+            grid_resolution: Grid density for 2D/3D plots (default: 50)
+            show_uncertainty: For posterior plot - True, False, or list of sigma values (e.g., [1.0, 2.0])
+            show_experiments: Overlay experimental data points
+            acq_func_kwargs: Additional acquisition parameters (xi, kappa, beta)
+            goal: 'maximize' or 'minimize' (if None, uses session's last goal)
+            figsize: Figure size as (width, height) in inches
+            dpi: Dots per inch
+            title_prefix: Custom prefix for titles (default: auto-generated)
+        Returns:
+            matplotlib Figure object with 2 subplots
+        Example:
+            >>> # After running suggest_next()
+            >>> session.suggest_next(strategy='ei')
+            >>>
+            >>> # Visualize the suggestion in 1D
+            >>> fig = session.plot_suggested_next('temperature')
+            >>>
+            >>> # Visualize in 2D
+            >>> fig = session.plot_suggested_next('temperature', 'pressure')
+            >>>
+            >>> # Visualize in 3D
+            >>> fig = session.plot_suggested_next('temperature', 'pressure', 'time')
+            >>> fig.savefig('suggestion_3d.png', dpi=300)
+        Note:
+            - Must call suggest_next() before using this function
+            - Automatically extracts fixed values from the suggested point
+            - Creates intuitive visualization showing why the point was chosen
+        """
+        self._check_matplotlib()
+        self._check_model_trained()
+        # Check if we have suggestions
+        if not self.last_suggestions or len(self.last_suggestions) == 0:
+            raise ValueError("No suggestions available. Call suggest_next() first.")
+        # Get the suggestion to visualize
+        if isinstance(self.last_suggestions, pd.DataFrame):
+            sugg_df = self.last_suggestions
+        else:
+            sugg_df = pd.DataFrame(self.last_suggestions)
+        if suggestion_index >= len(sugg_df):
+            raise ValueError(f"Suggestion index {suggestion_index} out of range (have {len(sugg_df)} suggestions)")
+        suggestion = sugg_df.iloc[suggestion_index].to_dict()
+        # Determine plot dimensionality
+        if z_var is not None and y_var is None:
+            raise ValueError("Must provide y_var if z_var is specified")
+        is_1d = (y_var is None)
+        is_2d = (y_var is not None and z_var is None)
+        is_3d = (z_var is not None)
+        # Cap 3D resolution to prevent kernel crashes
+        if is_3d and grid_resolution > 30:
+            logger.warning(f"3D voxel resolution capped at 30 (requested {grid_resolution})")
+            grid_resolution = 30
+        # Get variable names for the plot
+        plot_vars = [x_var]
+        if y_var is not None:
+            plot_vars.append(y_var)
+        if z_var is not None:
+            plot_vars.append(z_var)
+        # Extract fixed values from suggestion (for non-varying dimensions)
+        if fixed_values is None:
+            fixed_values = {}
+            for var_name in self.search_space.get_variable_names():
+                if var_name not in plot_vars and var_name in suggestion:
+                    fixed_values[var_name] = suggestion[var_name]
+        # Get acquisition function and goal from last run if not specified
+        if acq_func is None:
+            # Try to get from last acquisition run
+            if hasattr(self, '_last_acq_func'):
+                acq_func = self._last_acq_func
+            else:
+                acq_func = 'ei'  # Default fallback
+        if goal is None:
+            if hasattr(self, '_last_goal'):
+                goal = self._last_goal
+            else:
+                goal = 'maximize'  # Default fallback
+        # Create figure with 2 subplots (stacked vertically)
+        fig, (ax1, ax2) = plt.subplots(2, 1, figsize=figsize, dpi=dpi)
+        # Generate titles
+        if title_prefix is None:
+            title_prefix = "Suggested Next Experiment"
+        # Format fixed values with smart rounding (2 decimals for floats, no .00 for integers)
+        def format_value(v):
+            if isinstance(v, float):
+                # Round to 2 decimals, but strip trailing zeros
+                rounded = round(v, 2)
+                # Check if it's effectively an integer
+                if rounded == int(rounded):
+                    return str(int(rounded))
+                return f"{rounded:.2f}".rstrip('0').rstrip('.')
+            return str(v)
+        fixed_str = ', '.join([f'{k}={format_value(v)}' for k, v in fixed_values.items()])
+        # Plot 1: Posterior Mean
+        if is_1d:
+            # 1D slice plot
+            x_var_def = next(v for v in self.search_space.variables if v['name'] == x_var)
+            x_values = np.linspace(x_var_def['min'], x_var_def['max'], n_points)
+            # Build grid
+            grid_data = {x_var: x_values}
+            for var in self.search_space.variables:
+                var_name = var['name']
+                if var_name == x_var:
+                    continue
+                if var_name in fixed_values:
+                    grid_data[var_name] = fixed_values[var_name]
+                else:
+                    if var['type'] in ['real', 'integer']:
+                        grid_data[var_name] = (var['min'] + var['max']) / 2
+                    elif var['type'] == 'categorical':
+                        grid_data[var_name] = var['values'][0]
+            # Create DataFrame with correct column order
+            if hasattr(self.model, 'original_feature_names') and self.model.original_feature_names:
+                column_order = self.model.original_feature_names
+            else:
+                column_order = self.search_space.get_variable_names()
+            grid_df = pd.DataFrame(grid_data, columns=column_order)
+            # Get predictions
+            predictions, std = self.predict(grid_df)
+            # Prepare experiment overlay
+            exp_x, exp_y = None, None
+            if show_experiments and not self.experiment_manager.df.empty:
+                df = self.experiment_manager.df
+                mask = pd.Series([True] * len(df))
+                for var_name, fixed_val in fixed_values.items():
+                    if var_name in df.columns:
+                        if isinstance(fixed_val, str):
+                            mask &= (df[var_name] == fixed_val)
+                        else:
+                            mask &= np.isclose(df[var_name], fixed_val, atol=1e-6)
+                if mask.any():
+                    filtered_df = df[mask]
+                    exp_x = filtered_df[x_var].values
+                    exp_y = filtered_df[self.experiment_manager.target_columns[0]].values
+            # Determine sigma bands
+            sigma_bands = None
+            if show_uncertainty is not None:
+                if isinstance(show_uncertainty, bool):
+                    sigma_bands = [1.0, 2.0] if show_uncertainty else None
+                else:
+                    sigma_bands = show_uncertainty
+            from alchemist_core.visualization.plots import create_slice_plot
+            create_slice_plot(
+                x_values=x_values,
+                predictions=predictions,
+                x_var=x_var,
+                std=std,
+                sigma_bands=sigma_bands,
+                exp_x=exp_x,
+                exp_y=exp_y,
+                title=f"{title_prefix} - Posterior Mean\n({fixed_str})" if fixed_str else f"{title_prefix} - Posterior Mean",
+                ax=ax1
+            )
+            # Mark the suggested point on posterior plot
+            sugg_x = suggestion[x_var]
+            sugg_y_pred, _ = self.predict(pd.DataFrame([suggestion]))
+            ax1.scatter([sugg_x], sugg_y_pred, color='black', s=102, marker='*', zorder=10,
+                       linewidths=1.5, label='Suggested')
+            ax1.legend()
+        elif is_2d:
+            # 2D contour plot
+            x_var_def = next(v for v in self.search_space.variables if v['name'] == x_var)
+            y_var_def = next(v for v in self.search_space.variables if v['name'] == y_var)
+            x_values = np.linspace(x_var_def['min'], x_var_def['max'], grid_resolution)
+            y_values = np.linspace(y_var_def['min'], y_var_def['max'], grid_resolution)
+            X_grid, Y_grid = np.meshgrid(x_values, y_values)
+            grid_data = {
+                x_var: X_grid.ravel(),
+                y_var: Y_grid.ravel()
+            }
+            for var in self.search_space.variables:
+                var_name = var['name']
+                if var_name in [x_var, y_var]:
+                    continue
+                if var_name in fixed_values:
+                    grid_data[var_name] = fixed_values[var_name]
+                else:
+                    if var['type'] in ['real', 'integer']:
+                        grid_data[var_name] = (var['min'] + var['max']) / 2
+                    elif var['type'] == 'categorical':
+                        grid_data[var_name] = var['values'][0]
+            if hasattr(self.model, 'original_feature_names') and self.model.original_feature_names:
+                column_order = self.model.original_feature_names
+            else:
+                column_order = self.search_space.get_variable_names()
+            grid_df = pd.DataFrame(grid_data, columns=column_order)
+            predictions, _ = self.predict(grid_df)
+            prediction_grid = predictions.reshape(X_grid.shape)
+            # Prepare overlays
+            exp_x, exp_y = None, None
+            if show_experiments and not self.experiment_manager.df.empty:
+                exp_df = self.experiment_manager.df
+                if x_var in exp_df.columns and y_var in exp_df.columns:
+                    exp_x = exp_df[x_var].values
+                    exp_y = exp_df[y_var].values
+            from alchemist_core.visualization.plots import create_contour_plot
+            _, _, _ = create_contour_plot(
+                x_grid=X_grid,
+                y_grid=Y_grid,
+                predictions_grid=prediction_grid,
+                x_var=x_var,
+                y_var=y_var,
+                exp_x=exp_x,
+                exp_y=exp_y,
+                suggest_x=None,
+                suggest_y=None,
+                title=f"{title_prefix} - Posterior Mean\n({fixed_str})" if fixed_str else f"{title_prefix} - Posterior Mean",
+                ax=ax1
+            )
+            # Mark the suggested point
+            sugg_x = suggestion[x_var]
+            sugg_y = suggestion[y_var]
+            ax1.scatter([sugg_x], [sugg_y], color='black', s=102, marker='*', zorder=10,
+                       linewidths=1.5, label='Suggested')
+            ax1.legend()
+        else:  # 3D
+            # 3D voxel plot
+            x_var_def = next(v for v in self.search_space.variables if v['name'] == x_var)
+            y_var_def = next(v for v in self.search_space.variables if v['name'] == y_var)
+            z_var_def = next(v for v in self.search_space.variables if v['name'] == z_var)
+            x_values = np.linspace(x_var_def['min'], x_var_def['max'], grid_resolution)
+            y_values = np.linspace(y_var_def['min'], y_var_def['max'], grid_resolution)
+            z_values = np.linspace(z_var_def['min'], z_var_def['max'], grid_resolution)
+            X_grid, Y_grid, Z_grid = np.meshgrid(x_values, y_values, z_values, indexing='ij')
+            grid_data = {
+                x_var: X_grid.ravel(),
+                y_var: Y_grid.ravel(),
+                z_var: Z_grid.ravel()
+            }
+            for var in self.search_space.variables:
+                var_name = var['name']
+                if var_name in [x_var, y_var, z_var]:
+                    continue
+                if var_name in fixed_values:
+                    grid_data[var_name] = fixed_values[var_name]
+                else:
+                    if var['type'] in ['real', 'integer']:
+                        grid_data[var_name] = (var['min'] + var['max']) / 2
+                    elif var['type'] == 'categorical':
+                        grid_data[var_name] = var['values'][0]
+            if hasattr(self.model, 'original_feature_names') and self.model.original_feature_names:
+                column_order = self.model.original_feature_names
+            else:
+                column_order = self.search_space.get_variable_names()
+            grid_df = pd.DataFrame(grid_data, columns=column_order)
+            predictions, _ = self.predict(grid_df)
+            prediction_grid = predictions.reshape(X_grid.shape)
+            # Prepare overlays
+            exp_x, exp_y, exp_z = None, None, None
+            if show_experiments and not self.experiment_manager.df.empty:
+                exp_df = self.experiment_manager.df
+                if all(v in exp_df.columns for v in [x_var, y_var, z_var]):
+                    exp_x = exp_df[x_var].values
+                    exp_y = exp_df[y_var].values
+                    exp_z = exp_df[z_var].values
+            from alchemist_core.visualization.plots import create_voxel_plot
+            # Note: voxel plots don't support ax parameter yet, need to create separately
+            # For now, we'll note this limitation
+            logger.warning("3D voxel plots for suggestions not yet fully supported with subplots")
+            ax1.text(0.5, 0.5, "3D voxel posterior visualization\n(use plot_voxel separately)",
+                    ha='center', va='center', transform=ax1.transAxes)
+            ax1.axis('off')
+        # Plot 2: Acquisition Function
+        if is_1d:
+            # 1D acquisition slice
+            from alchemist_core.utils.acquisition_utils import evaluate_acquisition
+            from alchemist_core.visualization.plots import create_slice_plot
+            x_var_def = next(v for v in self.search_space.variables if v['name'] == x_var)
+            x_values = np.linspace(x_var_def['min'], x_var_def['max'], n_points)
+            grid_data = {x_var: x_values}
+            for var in self.search_space.variables:
+                var_name = var['name']
+                if var_name == x_var:
+                    continue
+                if var_name in fixed_values:
+                    grid_data[var_name] = fixed_values[var_name]
+                else:
+                    if var['type'] in ['real', 'integer']:
+                        grid_data[var_name] = (var['min'] + var['max']) / 2
+                    elif var['type'] == 'categorical':
+                        grid_data[var_name] = var['values'][0]
+            if hasattr(self.model, 'original_feature_names') and self.model.original_feature_names:
+                column_order = self.model.original_feature_names
+            else:
+                column_order = self.search_space.get_variable_names()
+            grid_df = pd.DataFrame(grid_data, columns=column_order)
+            acq_values, _ = evaluate_acquisition(
+                self.model,
+                grid_df,
+                acq_func=acq_func,
+                acq_func_kwargs=acq_func_kwargs,
+                goal=goal
+            )
+            create_slice_plot(
+                x_values=x_values,
+                predictions=acq_values,
+                x_var=x_var,
+                std=None,
+                sigma_bands=None,
+                exp_x=None,
+                exp_y=None,
+                title=None,  # No title for acquisition subplot
+                ax=ax2,
+                prediction_label=acq_func.upper(),
+                line_color='darkgreen',
+                line_width=1.5
+            )
+            # Add green fill under acquisition curve
+            ax2.fill_between(x_values, 0, acq_values, alpha=0.3, color='green', zorder=0)
+            ax2.set_ylabel(f'{acq_func.upper()} Value')
+            # Mark the suggested point
+            sugg_x = suggestion[x_var]
+            # Evaluate acquisition at the suggested point
+            sugg_acq, _ = evaluate_acquisition(
+                self.model,
+                pd.DataFrame([suggestion]),
+                acq_func=acq_func,
+                acq_func_kwargs=acq_func_kwargs,
+                goal=goal
+            )
+            ax2.scatter([sugg_x], sugg_acq, color='black', s=102, marker='*', zorder=10,
+                       linewidths=1.5, label=f'{acq_func.upper()} (suggested)')
+            ax2.legend()
+        elif is_2d:
+            # 2D acquisition contour
+            from alchemist_core.utils.acquisition_utils import evaluate_acquisition
+            from alchemist_core.visualization.plots import create_contour_plot
+            x_var_def = next(v for v in self.search_space.variables if v['name'] == x_var)
+            y_var_def = next(v for v in self.search_space.variables if v['name'] == y_var)
+            x_values = np.linspace(x_var_def['min'], x_var_def['max'], grid_resolution)
+            y_values = np.linspace(y_var_def['min'], y_var_def['max'], grid_resolution)
+            X_grid, Y_grid = np.meshgrid(x_values, y_values)
+            grid_data = {
+                x_var: X_grid.ravel(),
+                y_var: Y_grid.ravel()
+            }
+            for var in self.search_space.variables:
+                var_name = var['name']
+                if var_name in [x_var, y_var]:
+                    continue
+                if var_name in fixed_values:
+                    grid_data[var_name] = fixed_values[var_name]
+                else:
+                    if var['type'] in ['real', 'integer']:
+                        grid_data[var_name] = (var['min'] + var['max']) / 2
+                    elif var['type'] == 'categorical':
+                        grid_data[var_name] = var['values'][0]
+            if hasattr(self.model, 'original_feature_names') and self.model.original_feature_names:
+                column_order = self.model.original_feature_names
+            else:
+                column_order = self.search_space.get_variable_names()
+            grid_df = pd.DataFrame(grid_data, columns=column_order)
+            acq_values, _ = evaluate_acquisition(
+                self.model,
+                grid_df,
+                acq_func=acq_func,
+                acq_func_kwargs=acq_func_kwargs,
+                goal=goal
+            )
+            acquisition_grid = acq_values.reshape(X_grid.shape)
+            _, _, _ = create_contour_plot(
+                x_grid=X_grid,
+                y_grid=Y_grid,
+                predictions_grid=acquisition_grid,
+                x_var=x_var,
+                y_var=y_var,
+                exp_x=None,
+                exp_y=None,
+                suggest_x=None,
+                suggest_y=None,
+                cmap='Greens',  # Green colormap for acquisition
+                title=None,  # No title for acquisition subplot
+                ax=ax2
+            )
+            # Mark the suggested point
+            sugg_x = suggestion[x_var]
+            sugg_y = suggestion[y_var]
+            ax2.scatter([sugg_x], [sugg_y], color='black', s=102, marker='*', zorder=10,
+                       linewidths=1.5, label=f'{acq_func.upper()} (suggested)')
+            ax2.legend()
+        else:  # 3D
+            # 3D acquisition voxel
+            logger.warning("3D voxel plots for acquisition not yet fully supported with subplots")
+            ax2.text(0.5, 0.5, "3D voxel acquisition visualization\n(use plot_acquisition_voxel separately)",
+                    ha='center', va='center', transform=ax2.transAxes)
+            ax2.axis('off')
+        plt.tight_layout()
+        logger.info(f"Generated suggested next experiment visualization ({len(plot_vars)}D)")
+        return fig
+    def plot_probability_of_improvement(
+        self,
+        goal: Literal['maximize', 'minimize'] = 'maximize',
+        backend: Optional[str] = None,
+        kernel: Optional[str] = None,
+        n_grid_points: int = 1000,
+        start_iteration: int = 5,
+        reuse_hyperparameters: bool = True,
+        xi: float = 0.01,
+        figsize: Tuple[float, float] = (8, 6),
+        dpi: int = 100,
+        title: Optional[str] = None
+    ) -> Figure: # pyright: ignore[reportInvalidTypeForm]
+        """
+        Plot maximum probability of improvement over optimization iterations.
+        Retroactively computes how the probability of finding a better solution
+        evolved during optimization. At each iteration:
+        1. Trains GP on observations up to that point (reusing hyperparameters)
+        2. Computes PI across the search space using native acquisition functions
+        3. Records the maximum PI value
+        Uses native PI implementations:
+        - sklearn backend: skopt.acquisition.gaussian_pi
+        - botorch backend: botorch.acquisition.ProbabilityOfImprovement
+        Decreasing max(PI) indicates the optimization is converging and has
+        less potential for improvement remaining.
+        Args:
+            goal: 'maximize' or 'minimize' - optimization direction
+            backend: Model backend to use (defaults to session's model_backend)
+            kernel: Kernel type for GP (defaults to session's kernel type)
+            n_grid_points: Number of points to sample search space
+            start_iteration: Minimum observations before computing PI (default: 5)
+            reuse_hyperparameters: If True, use final model's optimized hyperparameters
+                                   for all iterations (much faster, recommended)
+            xi: PI parameter controlling improvement threshold (default: 0.01)
+            figsize: Figure size as (width, height) in inches
+            dpi: Dots per inch for figure resolution
+            title: Custom plot title (auto-generated if None)
+        Returns:
+            matplotlib Figure object
+        Example:
+            >>> # After running optimization
+            >>> fig = session.plot_probability_of_improvement(goal='maximize')
+            >>> fig.savefig('pi_convergence.png')
+        Note:
+            - Requires at least `start_iteration` experiments
+            - Use fewer n_grid_points for faster computation
+            - PI values near 0 suggest little room for improvement
+            - Reusing hyperparameters (default) is much faster and usually sufficient
+            - Uses rigorous acquisition function implementations (not approximations)
+        """
+        self._check_matplotlib()
+        # Check we have enough experiments
+        n_exp = len(self.experiment_manager.df)
+        if n_exp < start_iteration:
+            raise ValueError(
+                f"Need at least {start_iteration} experiments for PI plot "
+                f"(have {n_exp}). Lower start_iteration if needed."
+            )
+        # Default to session's model configuration if not specified
+        if backend is None:
+            if self.model_backend is None:
+                raise ValueError(
+                    "No backend specified and session has no trained model. "
+                    "Either train a model first or specify backend parameter."
+                )
+            backend = self.model_backend
+        if kernel is None:
+            if self.model is None:
+                raise ValueError(
+                    "No kernel specified and session has no trained model. "
+                    "Either train a model first or specify kernel parameter."
+                )
+            # Extract kernel type from trained model
+            if self.model_backend == 'sklearn' and hasattr(self.model, 'optimized_kernel'):
+                # sklearn model
+                kernel_obj = self.model.optimized_kernel
+                if 'RBF' in str(type(kernel_obj)):
+                    kernel = 'RBF'
+                elif 'Matern' in str(type(kernel_obj)):
+                    kernel = 'Matern'
+                elif 'RationalQuadratic' in str(type(kernel_obj)):
+                    kernel = 'RationalQuadratic'
+                else:
+                    kernel = 'RBF'  # fallback
+            elif self.model_backend == 'botorch' and hasattr(self.model, 'cont_kernel_type'):
+                # botorch model - use the stored kernel type
+                kernel = self.model.cont_kernel_type
+            else:
+                # Final fallback if we can't determine kernel
+                kernel = 'Matern'
+        # Get optimized hyperparameters if reusing them
+        optimized_kernel_params = None
+        if reuse_hyperparameters and self.model is not None:
+            if backend.lower() == 'sklearn' and hasattr(self.model, 'optimized_kernel'):
+                # Extract the optimized kernel parameters
+                optimized_kernel_params = self.model.optimized_kernel
+                logger.info(f"Reusing optimized kernel hyperparameters from trained model")
+            # Note: botorch hyperparameter reuse would go here if needed
+        # Get data
+        target_col = self.experiment_manager.target_columns[0]
+        X_all, y_all = self.experiment_manager.get_features_and_target()
+        # Generate grid of test points across search space
+        X_test = self._generate_prediction_grid(n_grid_points)
+        logger.info(f"Computing PI convergence from iteration {start_iteration} to {n_exp}...")
+        logger.info(f"Using {len(X_test)} test points across search space")
+        logger.info(f"Using native PI acquisition functions (xi={xi})")
+        if reuse_hyperparameters and optimized_kernel_params is not None:
+            logger.info("Using optimized hyperparameters from final model (faster)")
+        else:
+            logger.info("Optimizing hyperparameters at each iteration (slower but more accurate)")
+        # Compute max PI at each iteration
+        iterations = []
+        max_pi_values = []
+        for i in range(start_iteration, n_exp + 1):
+            # Get data up to iteration i
+            X_train = X_all.iloc[:i]
+            y_train = y_all[:i]
+            # Create temporary session for this iteration
+            temp_session = OptimizationSession(
+                search_space=self.search_space,
+                experiment_manager=ExperimentManager(search_space=self.search_space)
+            )
+            temp_session.experiment_manager.df = self.experiment_manager.df.iloc[:i].copy()
+            # Train model with optimized hyperparameters if available
+            try:
+                if reuse_hyperparameters and optimized_kernel_params is not None and backend.lower() == 'sklearn':
+                    # For sklearn: directly access model and set optimized kernel
+                    from alchemist_core.models.sklearn_model import SklearnModel
+                    # Create model instance with kernel options
+                    model_kwargs = {
+                        'kernel_options': {'kernel_type': kernel},
+                        'n_restarts_optimizer': 0  # Don't optimize since we're using fixed hyperparameters
+                    }
+                    temp_model = SklearnModel(**model_kwargs)
+                    # Preprocess data
+                    X_processed, y_processed = temp_model._preprocess_data(temp_session.experiment_manager)
+                    # Import sklearn's GP
+                    from sklearn.gaussian_process import GaussianProcessRegressor
+                    # Create GP with the optimized kernel and optimizer=None to keep it fixed
+                    gp_params = {
+                        'kernel': optimized_kernel_params,
+                        'optimizer': None,  # Keep hyperparameters fixed
+                        'random_state': temp_model.random_state
+                    }
+                    # Only add alpha if we have noise values
+                    if temp_model.alpha is not None:
+                        gp_params['alpha'] = temp_model.alpha
+                    temp_model.model = GaussianProcessRegressor(**gp_params)
+                    # Fit model (only computes GP weights, not hyperparameters)
+                    temp_model.model.fit(X_processed, y_processed)
+                    temp_model._is_trained = True
+                    # Set the model in the session
+                    temp_session.model = temp_model
+                    temp_session.model_backend = 'sklearn'
+                else:
+                    # Standard training with hyperparameter optimization
+                    temp_session.train_model(backend=backend, kernel=kernel)
+            except Exception as e:
+                logger.warning(f"Failed to train model at iteration {i}: {e}")
+                continue
+            # Compute PI using native acquisition functions
+            try:
+                if backend.lower() == 'sklearn':
+                    # Use skopt's gaussian_pi function
+                    from skopt.acquisition import gaussian_pi
+                    # For maximization, negate y values so skopt treats it as minimization
+                    if goal.lower() == 'maximize':
+                        y_opt = -y_train.max()
+                    else:
+                        y_opt = y_train.min()
+                    # Preprocess X_test using the model's preprocessing pipeline
+                    # This handles categorical encoding and scaling
+                    X_test_processed = temp_session.model._preprocess_X(X_test)
+                    # Compute PI for all test points using skopt's implementation
+                    # Note: gaussian_pi expects model with predict(X, return_std=True)
+                    pi_values = gaussian_pi(
+                        X=X_test_processed,
+                        model=temp_session.model.model,  # sklearn GP model
+                        y_opt=y_opt,
+                        xi=xi
+                    )
+                    max_pi = float(np.max(pi_values))
+                elif backend.lower() == 'botorch':
+                    # Use BoTorch's ProbabilityOfImprovement
+                    import torch
+                    from botorch.acquisition import ProbabilityOfImprovement
+                    # Determine best value seen so far
+                    if goal.lower() == 'maximize':
+                        best_f = float(y_train.max())
+                    else:
+                        best_f = float(y_train.min())
+                    # Encode categorical variables if present
+                    X_test_encoded = temp_session.model._encode_categorical_data(X_test)
+                    # Convert to torch tensor
+                    X_tensor = torch.from_numpy(X_test_encoded.values).to(
+                        dtype=temp_session.model.model.train_inputs[0].dtype,
+                        device=temp_session.model.model.train_inputs[0].device
+                    )
+                    # Create PI acquisition function
+                    if goal.lower() == 'maximize':
+                        pi_acq = ProbabilityOfImprovement(
+                            model=temp_session.model.model,
+                            best_f=best_f,
+                            maximize=True
+                        )
+                    else:
+                        pi_acq = ProbabilityOfImprovement(
+                            model=temp_session.model.model,
+                            best_f=best_f,
+                            maximize=False
+                        )
+                    # Evaluate PI on all test points
+                    temp_session.model.model.eval()
+                    with torch.no_grad():
+                        pi_values = pi_acq(X_tensor.unsqueeze(-2))  # Add batch dimension
+                    max_pi = float(pi_values.max().item())
+                else:
+                    raise ValueError(f"Unknown backend: {backend}")
+            except Exception as e:
+                logger.warning(f"Failed to compute PI at iteration {i}: {e}")
+                import traceback
+                logger.debug(traceback.format_exc())
+                continue
+            # Record max PI
+            iterations.append(i)
+            max_pi_values.append(max_pi)
+            if i % 5 == 0 or i == n_exp:
+                logger.info(f"  Iteration {i}/{n_exp}: max(PI) = {max_pi:.4f}")
+        if not iterations:
+            raise RuntimeError("Failed to compute PI for any iterations")
+        # Import visualization function
+        from alchemist_core.visualization.plots import create_probability_of_improvement_plot
+        # Create plot
+        fig, ax = create_probability_of_improvement_plot(
+            iterations=np.array(iterations),
+            max_pi_values=np.array(max_pi_values),
+            figsize=figsize,
+            dpi=dpi,
+            title=title
+        )
+        logger.info(f"Generated PI convergence plot with {len(iterations)} points")
+        return fig

alchemist-nrel 0.3.0__py3-none-any.whl → 0.3.2__py3-none-any.whl

alchemist-nrel 0.3.0py3-none-any.whl → 0.3.2py3-none-any.whl