PyPI - alchemist-nrel - Versions diffs - 0.2.1__py3-none-any.whl → 0.3.1__py3-none-any.whl - Mend

alchemist-nrel 0.2.1py3-none-any.whl → 0.3.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (42) hide show

alchemist_core/__init__.py +14 -7
alchemist_core/acquisition/botorch_acquisition.py +15 -6
alchemist_core/audit_log.py +594 -0
alchemist_core/data/experiment_manager.py +76 -5
alchemist_core/models/botorch_model.py +6 -4
alchemist_core/models/sklearn_model.py +74 -8
alchemist_core/session.py +788 -39
alchemist_core/utils/doe.py +200 -0
alchemist_nrel-0.3.1.dist-info/METADATA +185 -0
alchemist_nrel-0.3.1.dist-info/RECORD +66 -0
{alchemist_nrel-0.2.1.dist-info → alchemist_nrel-0.3.1.dist-info}/entry_points.txt +1 -0
api/example_client.py +7 -2
api/main.py +21 -4
api/models/requests.py +95 -1
api/models/responses.py +167 -0
api/routers/acquisition.py +25 -0
api/routers/experiments.py +134 -6
api/routers/sessions.py +438 -10
api/routers/visualizations.py +10 -5
api/routers/websocket.py +132 -0
api/run_api.py +56 -0
api/services/session_store.py +285 -54
api/static/NEW_ICON.ico +0 -0
api/static/NEW_ICON.png +0 -0
api/static/NEW_LOGO_DARK.png +0 -0
api/static/NEW_LOGO_LIGHT.png +0 -0
api/static/assets/api-vcoXEqyq.js +1 -0
api/static/assets/index-DWfIKU9j.js +4094 -0
api/static/assets/index-sMIa_1hV.css +1 -0
api/static/index.html +14 -0
api/static/vite.svg +1 -0
ui/gpr_panel.py +7 -2
ui/notifications.py +197 -10
ui/ui.py +1117 -68
ui/variables_setup.py +47 -2
ui/visualizations.py +60 -3
alchemist_core/models/ax_model.py +0 -159
alchemist_nrel-0.2.1.dist-info/METADATA +0 -206
alchemist_nrel-0.2.1.dist-info/RECORD +0 -54
{alchemist_nrel-0.2.1.dist-info → alchemist_nrel-0.3.1.dist-info}/WHEEL +0 -0
{alchemist_nrel-0.2.1.dist-info → alchemist_nrel-0.3.1.dist-info}/licenses/LICENSE +0 -0
{alchemist_nrel-0.2.1.dist-info → alchemist_nrel-0.3.1.dist-info}/top_level.txt +0 -0

alchemist_core/session.py CHANGED Viewed

@@ -7,10 +7,14 @@ This module provides the main entry point for using ALchemist as a headless libr
 from typing import Optional, Dict, Any, List, Tuple, Callable
 import pandas as pd
 import numpy as np
+import json
+import hashlib
+from pathlib import Path
 from alchemist_core.data.search_space import SearchSpace
 from alchemist_core.data.experiment_manager import ExperimentManager
 from alchemist_core.events import EventEmitter
 from alchemist_core.config import get_logger
+from alchemist_core.audit_log import AuditLog, SessionMetadata, AuditEntry
 logger = get_logger(__name__)
@@ -27,28 +31,29 @@ class OptimizationSession:
     5. Iterate
     Example:
-        >>> from alchemist_core import OptimizationSession
-        >>>
-        >>> # Create session with search space
-        >>> session = OptimizationSession()
-        >>> session.add_variable('temperature', 'real', bounds=(300, 500))
-        >>> session.add_variable('pressure', 'real', bounds=(1, 10))
-        >>> session.add_variable('catalyst', 'categorical', categories=['A', 'B', 'C'])
-        >>>
-        >>> # Load experimental data
-        >>> session.load_data('experiments.csv', target_column='yield')
-        >>>
-        >>> # Train model
-        >>> session.train_model(backend='botorch', kernel='Matern')
-        >>>
-        >>> # Suggest next experiment
-        >>> next_point = session.suggest_next(strategy='EI', goal='maximize')
-        >>> print(next_point)
+        > from alchemist_core import OptimizationSession
+        >
+        > # Create session with search space
+        > session = OptimizationSession()
+        > session.add_variable('temperature', 'real', bounds=(300, 500))
+        > session.add_variable('pressure', 'real', bounds=(1, 10))
+        > session.add_variable('catalyst', 'categorical', categories=['A', 'B', 'C'])
+        >
+        > # Load experimental data
+        > session.load_data('experiments.csv', target_column='yield')
+        >
+        > # Train model
+        > session.train_model(backend='botorch', kernel='Matern')
+        >
+        > # Suggest next experiment
+        > next_point = session.suggest_next(strategy='EI', goal='maximize')
+        > print(next_point)
     """
     def __init__(self, search_space: Optional[SearchSpace] = None,
                  experiment_manager: Optional[ExperimentManager] = None,
-                 event_emitter: Optional[EventEmitter] = None):
+                 event_emitter: Optional[EventEmitter] = None,
+                 session_metadata: Optional[SessionMetadata] = None):
         """
         Initialize optimization session.
@@ -56,11 +61,16 @@ class OptimizationSession:
             search_space: Pre-configured SearchSpace object (optional)
             experiment_manager: Pre-configured ExperimentManager (optional)
             event_emitter: EventEmitter for progress notifications (optional)
+            session_metadata: Pre-configured session metadata (optional)
         """
         self.search_space = search_space if search_space is not None else SearchSpace()
         self.experiment_manager = experiment_manager if experiment_manager is not None else ExperimentManager()
         self.events = event_emitter if event_emitter is not None else EventEmitter()
+        # Session metadata and audit log
+        self.metadata = session_metadata if session_metadata is not None else SessionMetadata.create()
+        self.audit_log = AuditLog()
         # Link search_space to experiment_manager
         self.experiment_manager.set_search_space(self.search_space)
@@ -69,13 +79,19 @@ class OptimizationSession:
         self.model_backend = None
         self.acquisition = None
+        # Staged experiments (for workflow management)
+        self.staged_experiments = []  # List of experiment dicts awaiting evaluation
+        self.last_suggestions = []  # Most recent acquisition suggestions (for UI)
         # Configuration
         self.config = {
             'random_state': 42,
-            'verbose': True
+            'verbose': True,
+            'auto_train': False,  # Auto-train model after adding experiments
+            'auto_train_threshold': 5  # Minimum experiments before auto-train
         }
-        logger.info("OptimizationSession initialized")
+        logger.info(f"OptimizationSession initialized: {self.metadata.session_id}")
     # ============================================================
     # Search Space Management
@@ -93,8 +109,8 @@ class OptimizationSession:
                 - For 'categorical': categories=[list of values] or values=[list]
         Example:
-            >>> session.add_variable('temp', 'real', bounds=(300, 500))
-            >>> session.add_variable('catalyst', 'categorical', categories=['A', 'B'])
+            > session.add_variable('temp', 'real', bounds=(300, 500))
+            > session.add_variable('catalyst', 'categorical', categories=['A', 'B'])
         """
         # Convert user-friendly API to internal format
         params = kwargs.copy()
@@ -186,7 +202,7 @@ class OptimizationSession:
             noise_column: Optional column with measurement noise/uncertainty
         Example:
-            >>> session.load_data('experiments.csv', target_column='yield')
+            > session.load_data('experiments.csv', target_column='yield')
         """
         # Load the CSV
         import pandas as pd
@@ -222,7 +238,8 @@ class OptimizationSession:
         self.events.emit('data_loaded', {'n_experiments': n_experiments, 'filepath': filepath})
     def add_experiment(self, inputs: Dict[str, Any], output: float,
-                      noise: Optional[float] = None) -> None:
+                      noise: Optional[float] = None, iteration: Optional[int] = None,
+                      reason: Optional[str] = None) -> None:
         """
         Add a single experiment to the dataset.
@@ -230,18 +247,23 @@ class OptimizationSession:
             inputs: Dictionary mapping variable names to values
             output: Target/output value
             noise: Optional measurement uncertainty
+            iteration: Iteration number (auto-assigned if None)
+            reason: Reason for this experiment (e.g., 'Manual', 'Expected Improvement')
         Example:
-            >>> session.add_experiment(
+            > session.add_experiment(
             ...     inputs={'temperature': 350, 'catalyst': 'A'},
-            ...     output=0.85
+            ...     output=0.85,
+            ...     reason='Manual'
             ... )
         """
         # Use ExperimentManager's add_experiment method
         self.experiment_manager.add_experiment(
             point_dict=inputs,
             output_value=output,
-            noise_value=noise
+            noise_value=noise,
+            iteration=iteration,
+            reason=reason
         )
         logger.info(f"Added experiment: {inputs} → {output}")
@@ -272,6 +294,206 @@ class OptimizationSession:
             'feature_names': list(X.columns)
         }
+    # ============================================================
+    # Staged Experiments (Workflow Management)
+    # ============================================================
+    def add_staged_experiment(self, inputs: Dict[str, Any]) -> None:
+        """
+        Add an experiment to the staging area (awaiting evaluation).
+        Staged experiments are typically suggested by acquisition functions
+        but not yet evaluated. They can be retrieved, evaluated externally,
+        and then added to the dataset with add_experiment().
+        Args:
+            inputs: Dictionary mapping variable names to values
+        Example:
+            > # Generate suggestions and stage them
+            > suggestions = session.suggest_next(n_suggestions=3)
+            > for point in suggestions.to_dict('records'):
+            >     session.add_staged_experiment(point)
+            >
+            > # Later, evaluate and add
+            > staged = session.get_staged_experiments()
+            > for point in staged:
+            >     output = run_experiment(**point)
+            >     session.add_experiment(point, output=output)
+            > session.clear_staged_experiments()
+        """
+        self.staged_experiments.append(inputs)
+        logger.debug(f"Staged experiment: {inputs}")
+        self.events.emit('experiment_staged', {'inputs': inputs})
+    def get_staged_experiments(self) -> List[Dict[str, Any]]:
+        """
+        Get all staged experiments awaiting evaluation.
+        Returns:
+            List of experiment input dictionaries
+        """
+        return self.staged_experiments.copy()
+    def clear_staged_experiments(self) -> int:
+        """
+        Clear all staged experiments.
+        Returns:
+            Number of experiments cleared
+        """
+        count = len(self.staged_experiments)
+        self.staged_experiments.clear()
+        if count > 0:
+            logger.info(f"Cleared {count} staged experiments")
+            self.events.emit('staged_experiments_cleared', {'count': count})
+        return count
+    def move_staged_to_experiments(self, outputs: List[float],
+                                   noises: Optional[List[float]] = None,
+                                   iteration: Optional[int] = None,
+                                   reason: Optional[str] = None) -> int:
+        """
+        Evaluate staged experiments and add them to the dataset in batch.
+        Convenience method that pairs staged inputs with outputs and adds
+        them all to the experiment manager, then clears the staging area.
+        Args:
+            outputs: List of output values (must match length of staged experiments)
+            noises: Optional list of measurement uncertainties
+            iteration: Iteration number for all experiments (auto-assigned if None)
+            reason: Reason for these experiments (e.g., 'Expected Improvement')
+        Returns:
+            Number of experiments added
+        Example:
+            > # Stage some experiments
+            > session.add_staged_experiment({'x': 1.0, 'y': 2.0})
+            > session.add_staged_experiment({'x': 3.0, 'y': 4.0})
+            >
+            > # Evaluate them
+            > outputs = [run_experiment(**point) for point in session.get_staged_experiments()]
+            >
+            > # Add to dataset and clear staging
+            > session.move_staged_to_experiments(outputs, reason='LogEI')
+        """
+        if len(outputs) != len(self.staged_experiments):
+            raise ValueError(
+                f"Number of outputs ({len(outputs)}) must match "
+                f"number of staged experiments ({len(self.staged_experiments)})"
+            )
+        if noises is not None and len(noises) != len(self.staged_experiments):
+            raise ValueError(
+                f"Number of noise values ({len(noises)}) must match "
+                f"number of staged experiments ({len(self.staged_experiments)})"
+            )
+        # Add each experiment
+        for i, inputs in enumerate(self.staged_experiments):
+            noise = noises[i] if noises is not None else None
+            self.add_experiment(
+                inputs=inputs,
+                output=outputs[i],
+                noise=noise,
+                iteration=iteration,
+                reason=reason
+            )
+        count = len(self.staged_experiments)
+        self.clear_staged_experiments()
+        logger.info(f"Moved {count} staged experiments to dataset")
+        return count
+    # ============================================================
+    # Initial Design Generation
+    # ============================================================
+    def generate_initial_design(
+        self,
+        method: str = "lhs",
+        n_points: int = 10,
+        random_seed: Optional[int] = None,
+        **kwargs
+    ) -> List[Dict[str, Any]]:
+        """
+        Generate initial experimental design (Design of Experiments).
+        Creates a set of experimental conditions to evaluate before starting
+        Bayesian optimization. This does NOT add the experiments to the session -
+        you must evaluate them and add the results using add_experiment().
+        Supported methods:
+        - 'random': Uniform random sampling
+        - 'lhs': Latin Hypercube Sampling (recommended, good space-filling properties)
+        - 'sobol': Sobol quasi-random sequences (low discrepancy)
+        - 'halton': Halton sequences
+        - 'hammersly': Hammersly sequences (low discrepancy)
+        Args:
+            method: Sampling strategy to use
+            n_points: Number of points to generate
+            random_seed: Random seed for reproducibility
+            **kwargs: Additional method-specific parameters:
+                - lhs_criterion: For LHS method ("maximin", "correlation", "ratio")
+        Returns:
+            List of dictionaries with variable names and values (no outputs)
+        Example:
+            > # Generate initial design
+            > points = session.generate_initial_design('lhs', n_points=10)
+            >
+            > # Run experiments and add results
+            > for point in points:
+            >     output = run_experiment(**point)  # Your experiment function
+            >     session.add_experiment(point, output=output)
+            >
+            > # Now ready to train model
+            > session.train_model()
+        """
+        if len(self.search_space.variables) == 0:
+            raise ValueError(
+                "No variables defined in search space. "
+                "Use add_variable() to define variables before generating initial design."
+            )
+        from alchemist_core.utils.doe import generate_initial_design
+        points = generate_initial_design(
+            search_space=self.search_space,
+            method=method,
+            n_points=n_points,
+            random_seed=random_seed,
+            **kwargs
+        )
+        # Store sampler info in config for audit trail
+        self.config['initial_design_method'] = method
+        self.config['initial_design_n_points'] = len(points)
+        logger.info(f"Generated {len(points)} initial design points using {method} method")
+        self.events.emit('initial_design_generated', {
+            'method': method,
+            'n_points': len(points)
+        })
+        # Add a lightweight audit data_locked entry for the initial design metadata
+        try:
+            extra = {'initial_design_method': method, 'initial_design_n_points': len(points)}
+            # Create an empty dataframe snapshot of the planned points
+            import pandas as pd
+            planned_df = pd.DataFrame(points)
+            self.audit_log.lock_data(planned_df, notes=f"Initial design ({method})", extra_parameters=extra)
+        except Exception:
+            # Audit logging should not block design generation
+            logger.debug("Failed to add initial design to audit log")
+        return points
     # ============================================================
     # Model Training
     # ============================================================
@@ -291,8 +513,8 @@ class OptimizationSession:
             Dictionary with training results and hyperparameters
         Example:
-            >>> results = session.train_model(backend='botorch', kernel='Matern')
-            >>> print(results['metrics'])
+            > results = session.train_model(backend='botorch', kernel='Matern')
+            > print(results['metrics'])
         """
         df = self.experiment_manager.get_data()
         if df is None or df.empty:
@@ -312,6 +534,27 @@ class OptimizationSession:
         # Extract calibration_enabled before passing kwargs to model constructor
         calibration_enabled = kwargs.pop('calibration_enabled', False)
+        # Validate and map transform types based on backend
+        # BoTorch uses: 'normalize', 'standardize'
+        # Sklearn uses: 'minmax', 'standard', 'robust', 'none'
+        if self.model_backend == 'sklearn':
+            # Map BoTorch transform types to sklearn equivalents
+            transform_map = {
+                'normalize': 'minmax',      # BoTorch normalize → sklearn minmax
+                'standardize': 'standard',  # BoTorch standardize → sklearn standard
+                'none': 'none'
+            }
+            if 'input_transform_type' in kwargs:
+                original = kwargs['input_transform_type']
+                kwargs['input_transform_type'] = transform_map.get(original, original)
+                if original != kwargs['input_transform_type']:
+                    logger.debug(f"Mapped input transform '{original}' → '{kwargs['input_transform_type']}' for sklearn")
+            if 'output_transform_type' in kwargs:
+                original = kwargs['output_transform_type']
+                kwargs['output_transform_type'] = transform_map.get(original, original)
+                if original != kwargs['output_transform_type']:
+                    logger.debug(f"Mapped output transform '{original}' → '{kwargs['output_transform_type']}' for sklearn")
         # Import appropriate model class
         if self.model_backend == 'sklearn':
             from alchemist_core.models.sklearn_model import SklearnModel
@@ -330,6 +573,15 @@ class OptimizationSession:
         elif self.model_backend == 'botorch':
             from alchemist_core.models.botorch_model import BoTorchModel
+            # Apply sensible defaults for BoTorch if not explicitly overridden
+            # Input normalization and output standardization are critical for performance
+            if 'input_transform_type' not in kwargs:
+                kwargs['input_transform_type'] = 'normalize'
+                logger.debug("Auto-applying input normalization for BoTorch model")
+            if 'output_transform_type' not in kwargs:
+                kwargs['output_transform_type'] = 'standardize'
+                logger.debug("Auto-applying output standardization for BoTorch model")
             # Build kernel options - BoTorch uses 'cont_kernel_type' not 'kernel_type'
             kernel_options = {'cont_kernel_type': kernel}
             if kernel_params:
@@ -437,8 +689,45 @@ class OptimizationSession:
                 # Convert complex objects to their string representation
                 json_hyperparams[key] = str(value)
+        # Extract kernel name and parameters
+        kernel_name = 'unknown'
+        if self.model_backend == 'sklearn':
+            # First try kernel_options
+            if hasattr(self.model, 'kernel_options') and 'kernel_type' in self.model.kernel_options:
+                kernel_name = self.model.kernel_options['kernel_type']
+                # Add nu parameter for Matern kernels
+                if kernel_name == 'Matern' and 'matern_nu' in self.model.kernel_options:
+                    json_hyperparams['matern_nu'] = self.model.kernel_options['matern_nu']
+            # Then try trained kernel
+            elif hasattr(self.model, 'model') and hasattr(self.model.model, 'kernel_'):
+                kernel_obj = self.model.model.kernel_
+                # Navigate through Product/Sum kernels to find base kernel
+                if hasattr(kernel_obj, 'k2'):  # Product kernel (Constant * BaseKernel)
+                    base_kernel = kernel_obj.k2
+                else:
+                    base_kernel = kernel_obj
+                kernel_class = type(base_kernel).__name__
+                if 'Matern' in kernel_class:
+                    kernel_name = 'Matern'
+                    # Extract nu parameter if available
+                    if hasattr(base_kernel, 'nu'):
+                        json_hyperparams['matern_nu'] = float(base_kernel.nu)
+                elif 'RBF' in kernel_class:
+                    kernel_name = 'RBF'
+                elif 'RationalQuadratic' in kernel_class:
+                    kernel_name = 'RationalQuadratic'
+                else:
+                    kernel_name = kernel_class
+        elif self.model_backend == 'botorch':
+            if hasattr(self.model, 'cont_kernel_type'):
+                kernel_name = self.model.cont_kernel_type
+            elif 'kernel_type' in json_hyperparams:
+                kernel_name = json_hyperparams['kernel_type']
         return {
             'backend': self.model_backend,
+            'kernel': kernel_name,
             'hyperparameters': json_hyperparams,
             'metrics': metrics,
             'is_trained': True
@@ -463,8 +752,8 @@ class OptimizationSession:
             DataFrame with suggested experiment(s)
         Example:
-            >>> next_point = session.suggest_next(strategy='EI', goal='maximize')
-            >>> print(next_point)
+            > next_point = session.suggest_next(strategy='EI', goal='maximize')
+            > print(next_point)
         """
         if self.model is None:
             raise ValueError("No trained model available. Use train_model() first.")
@@ -478,7 +767,8 @@ class OptimizationSession:
                 model=self.model,  # Pass the full SklearnModel wrapper, not just .model
                 acq_func=strategy.lower(),
                 maximize=(goal.lower() == 'maximize'),
-                random_state=self.config['random_state']
+                random_state=self.config['random_state'],
+                acq_func_kwargs=kwargs  # Pass xi, kappa, etc. to acquisition function
             )
             # Update acquisition with existing experimental data (un-encoded)
@@ -527,6 +817,16 @@ class OptimizationSession:
         logger.info(f"Suggested point: {suggestion_dict}")
         self.events.emit('acquisition_completed', {'suggestion': suggestion_dict})
+        # Store suggestions for UI/API access
+        self.last_suggestions = result_df.to_dict('records')
+        # Cache suggestion info for audit log
+        self._last_acquisition_info = {
+            'strategy': strategy,
+            'goal': goal,
+            'parameters': kwargs
+        }
         return result_df    # ============================================================
     # Predictions
     # ============================================================
@@ -542,11 +842,11 @@ class OptimizationSession:
             Tuple of (predictions, uncertainties)
         Example:
-            >>> test_points = pd.DataFrame({
+            > test_points = pd.DataFrame({
             ...     'temperature': [350, 400],
             ...     'catalyst': ['A', 'B']
             ... })
-            >>> predictions, uncertainties = session.predict(test_points)
+            > predictions, uncertainties = session.predict(test_points)
         """
         if self.model is None:
             raise ValueError("No trained model available. Use train_model() first.")
@@ -579,9 +879,9 @@ class OptimizationSession:
             callback: Callback function
         Example:
-            >>> def on_training_done(data):
+            > def on_training_done(data):
             ...     print(f"Training completed with R² = {data['metrics']['r2']}")
-            >>> session.on('training_completed', on_training_done)
+            > session.on('training_completed', on_training_done)
         """
         self.events.on(event, callback)
@@ -594,10 +894,459 @@ class OptimizationSession:
         Update session configuration.
         Args:
-            **kwargs: Configuration parameters (random_state, verbose, etc.)
+            **kwargs: Configuration parameters to update
+        Example:
+            > session.set_config(random_state=123, verbose=False)
+        """
+        self.config.update(kwargs)
+        logger.info(f"Updated config: {kwargs}")
+    # ============================================================
+    # Audit Log & Session Management
+    # ============================================================
+    def lock_data(self, notes: str = "", extra_parameters: Optional[Dict[str, Any]] = None) -> AuditEntry:
+        """
+        Lock in current experimental data configuration.
+        Creates an immutable audit log entry capturing the current data state.
+        This should be called when you're satisfied with your experimental dataset
+        and ready to proceed with modeling.
+        Args:
+            notes: Optional user notes about this data configuration
+        Returns:
+            Created AuditEntry
+        Example:
+            > session.add_experiment({'temp': 100, 'pressure': 5}, output=85.2)
+            > session.lock_data(notes="Initial screening dataset")
+        """
+        # Set search space in audit log (once)
+        if self.audit_log.search_space_definition is None:
+            self.audit_log.set_search_space(self.search_space.variables)
+        # Get current experimental data
+        df = self.experiment_manager.get_data()
+        # Lock data in audit log
+        entry = self.audit_log.lock_data(
+            experiment_data=df,
+            notes=notes,
+            extra_parameters=extra_parameters
+        )
+        self.metadata.update_modified()
+        logger.info(f"Locked data: {len(df)} experiments")
+        self.events.emit('data_locked', {'entry': entry.to_dict()})
+        return entry
+    def lock_model(self, notes: str = "") -> AuditEntry:
+        """
+        Lock in current trained model configuration.
+        Creates an immutable audit log entry capturing the trained model state.
+        This should be called when you're satisfied with your model performance
+        and ready to use it for acquisition.
+        Args:
+            notes: Optional user notes about this model
+        Returns:
+            Created AuditEntry
+        Raises:
+            ValueError: If no model has been trained
+        Example:
+            > session.train_model(backend='sklearn', kernel='matern')
+            > session.lock_model(notes="Best cross-validation performance")
+        """
+        if self.model is None:
+            raise ValueError("No trained model available. Use train_model() first.")
+        # Set search space in audit log (once)
+        if self.audit_log.search_space_definition is None:
+            self.audit_log.set_search_space(self.search_space.variables)
+        # Get model info
+        model_info = self.get_model_summary()
+        # Extract hyperparameters
+        hyperparameters = model_info.get('hyperparameters', {})
+        # Get kernel name from model_info (which extracts it properly)
+        kernel_name = model_info.get('kernel', 'unknown')
+        # Get CV metrics if available - use model_info metrics which are already populated
+        cv_metrics = model_info.get('metrics', None)
+        if cv_metrics and all(k in cv_metrics for k in ['rmse', 'r2']):
+            # Metrics already in correct format from get_model_summary
+            pass
+        elif hasattr(self.model, 'cv_cached_results') and self.model.cv_cached_results:
+            # Fallback to direct access
+            cv_metrics = {
+                'rmse': float(self.model.cv_cached_results.get('rmse', 0)),
+                'r2': float(self.model.cv_cached_results.get('r2', 0)),
+                'mae': float(self.model.cv_cached_results.get('mae', 0))
+            }
+        else:
+            cv_metrics = None
+        # Get current iteration number
+        # Use the next iteration number for the model lock so model+acquisition share the same iteration
+        iteration = self.experiment_manager._current_iteration + 1
+        # Include scaler information if available in hyperparameters
+        try:
+            if hasattr(self.model, 'input_transform_type'):
+                hyperparameters['input_transform_type'] = self.model.input_transform_type
+            if hasattr(self.model, 'output_transform_type'):
+                hyperparameters['output_transform_type'] = self.model.output_transform_type
+        except Exception:
+            pass
+        # Try to extract Matern nu for sklearn models if not already present
+        try:
+            if self.model_backend == 'sklearn' and 'matern_nu' not in hyperparameters:
+                # Try to navigate fitted kernel object for sklearn GaussianProcessRegressor
+                if hasattr(self.model, 'model') and hasattr(self.model.model, 'kernel_'):
+                    kernel_obj = self.model.model.kernel_
+                    base_kernel = getattr(kernel_obj, 'k2', kernel_obj)
+                    if hasattr(base_kernel, 'nu'):
+                        hyperparameters['matern_nu'] = float(base_kernel.nu)
+        except Exception:
+            pass
+        entry = self.audit_log.lock_model(
+            backend=self.model_backend,
+            kernel=kernel_name,
+            hyperparameters=hyperparameters,
+            cv_metrics=cv_metrics,
+            iteration=iteration,
+            notes=notes
+        )
+        self.metadata.update_modified()
+        logger.info(f"Locked model: {self.model_backend}/{model_info.get('kernel')}, iteration {iteration}")
+        self.events.emit('model_locked', {'entry': entry.to_dict()})
+        return entry
+    def lock_acquisition(self, strategy: str, parameters: Dict[str, Any],
+                        suggestions: List[Dict[str, Any]], notes: str = "") -> AuditEntry:
+        """
+        Lock in acquisition function decision and suggested experiments.
+        Creates an immutable audit log entry capturing the acquisition decision.
+        This should be called when you've reviewed the suggestions and are ready
+        to run the recommended experiments.
+        Args:
+            strategy: Acquisition strategy name ('EI', 'PI', 'UCB', etc.)
+            parameters: Acquisition function parameters (xi, kappa, etc.)
+            suggestions: List of suggested experiment dictionaries
+            notes: Optional user notes about this decision
+        Returns:
+            Created AuditEntry
+        Example:
+            > suggestions = session.suggest_next(strategy='EI', n_suggestions=3)
+            > session.lock_acquisition(
+            ...     strategy='EI',
+            ...     parameters={'xi': 0.01, 'goal': 'maximize'},
+            ...     suggestions=suggestions,
+            ...     notes="Top 3 candidates for next batch"
+            ... )
+        """
+        # Set search space in audit log (once)
+        if self.audit_log.search_space_definition is None:
+            self.audit_log.set_search_space(self.search_space.variables)
+        # Increment iteration counter first so this acquisition is logged as the next iteration
+        self.experiment_manager._current_iteration += 1
+        iteration = self.experiment_manager._current_iteration
+        entry = self.audit_log.lock_acquisition(
+            strategy=strategy,
+            parameters=parameters,
+            suggestions=suggestions,
+            iteration=iteration,
+            notes=notes
+        )
+        self.metadata.update_modified()
+        logger.info(f"Locked acquisition: {strategy}, {len(suggestions)} suggestions")
+        self.events.emit('acquisition_locked', {'entry': entry.to_dict()})
+        return entry
+    def get_audit_log(self) -> List[Dict[str, Any]]:
+        """
+        Get complete audit log as list of dictionaries.
+        Returns:
+            List of audit entry dictionaries
+        """
+        return self.audit_log.to_dict()
+    def export_audit_markdown(self) -> str:
+        """
+        Export audit log as markdown for publications.
+        Returns:
+            Markdown-formatted audit trail
+        """
+        # Pass session metadata to markdown exporter so user-entered metadata appears
+        try:
+            metadata_dict = self.metadata.to_dict()
+        except Exception:
+            metadata_dict = None
+        return self.audit_log.to_markdown(session_metadata=metadata_dict)
+    def save_session(self, filepath: str):
+        """
+        Save complete session state to JSON file.
+        Saves all session data including:
+        - Session metadata (name, description, tags)
+        - Search space definition
+        - Experimental data
+        - Trained model state (if available)
+        - Complete audit log
+        Args:
+            filepath: Path to save session file (.json extension recommended)
+        Example:
+            > session.save_session("~/ALchemist_Sessions/catalyst_study_nov2025.json")
+        """
+        filepath = Path(filepath)
+        # Update audit log's experimental data snapshot to reflect current state
+        # This ensures the data table in the audit log markdown is always up-to-date
+        current_data = self.experiment_manager.get_data()
+        if current_data is not None and len(current_data) > 0:
+            self.audit_log.experiment_data = current_data.copy()
+        # Prepare session data
+        session_data = {
+            'version': '1.0.0',
+            'metadata': self.metadata.to_dict(),
+            'audit_log': self.audit_log.to_dict(),
+            'search_space': {
+                'variables': self.search_space.variables
+            },
+            'experiments': {
+                'data': self.experiment_manager.get_data().to_dict(orient='records'),
+                'n_total': len(self.experiment_manager.df)
+            },
+            'config': self.config
+        }
+        # Add model state if available
+        if self.model is not None:
+            model_info = self.get_model_summary()
+            # Get kernel name from model_info which properly extracts it
+            kernel_name = model_info.get('kernel', 'unknown')
+            # Extract kernel parameters if available
+            kernel_params = {}
+            if self.model_backend == 'sklearn' and hasattr(self.model, 'model'):
+                kernel_obj = self.model.model.kernel
+                # Extract kernel-specific parameters
+                if hasattr(kernel_obj, 'get_params'):
+                    kernel_params = kernel_obj.get_params()
+            elif self.model_backend == 'botorch':
+                # For BoTorch, parameters are in hyperparameters
+                hyperparams = model_info.get('hyperparameters', {})
+                if 'matern_nu' in hyperparams:
+                    kernel_params['nu'] = hyperparams['matern_nu']
+            session_data['model_config'] = {
+                'backend': self.model_backend,
+                'kernel': kernel_name,
+                'kernel_params': kernel_params,
+                'hyperparameters': model_info.get('hyperparameters', {}),
+                'metrics': model_info.get('metrics', {})
+            }
+        # Create directory if needed
+        filepath.parent.mkdir(parents=True, exist_ok=True)
+        # Write JSON
+        with open(filepath, 'w') as f:
+            json.dump(session_data, f, indent=2, default=str)
+        self.metadata.update_modified()
+        logger.info(f"Saved session to {filepath}")
+        self.events.emit('session_saved', {'filepath': str(filepath)})
+    def export_session_json(self) -> str:
+        """
+        Export current session state as a JSON string (no filesystem side-effects for caller).
+        Returns:
+            JSON string of session data
+        """
+        import tempfile
+        from pathlib import Path
+        with tempfile.NamedTemporaryFile(mode='w', suffix='.json', delete=False) as tmp:
+            tmp_path = tmp.name
+            # Use existing save_session logic to write a complete JSON
+            self.save_session(tmp_path)
+        try:
+            with open(tmp_path, 'r') as f:
+                content = f.read()
+        finally:
+            Path(tmp_path).unlink(missing_ok=True)
+        return content
+    @staticmethod
+    def load_session(filepath: str, retrain_on_load: bool = True) -> 'OptimizationSession':
+        """
+        Load session from JSON file.
+        Args:
+            filepath: Path to session file
+        Returns:
+            OptimizationSession with restored state
+        Example:
+            > session = OptimizationSession.load_session("my_session.json")
+        """
+        filepath = Path(filepath)
+        with open(filepath, 'r') as f:
+            session_data = json.load(f)
+        # Check version compatibility
+        version = session_data.get('version', '1.0.0')
+        if not version.startswith('1.'):
+            logger.warning(f"Session file version {version} may not be fully compatible")
+        # Create session
+        session = OptimizationSession()
+        # Restore metadata
+        if 'metadata' in session_data:
+            session.metadata = SessionMetadata.from_dict(session_data['metadata'])
+        # Restore audit log
+        if 'audit_log' in session_data:
+            session.audit_log.from_dict(session_data['audit_log'])
+        # Restore search space
+        if 'search_space' in session_data:
+            for var in session_data['search_space']['variables']:
+                session.search_space.add_variable(
+                    var['name'],
+                    var['type'],
+                    **{k: v for k, v in var.items() if k not in ['name', 'type']}
+                )
+        # Restore experimental data
+        if 'experiments' in session_data and session_data['experiments']['data']:
+            df = pd.DataFrame(session_data['experiments']['data'])
+            # Metadata columns to exclude from inputs
+            metadata_cols = {'Output', 'Noise', 'Iteration', 'Reason'}
+            # Add experiments one by one
+            for _, row in df.iterrows():
+                # Only include actual input variables, not metadata
+                inputs = {col: row[col] for col in df.columns if col not in metadata_cols}
+                output = row.get('Output')
+                noise = row.get('Noise') if pd.notna(row.get('Noise')) else None
+                iteration = row.get('Iteration') if pd.notna(row.get('Iteration')) else None
+                reason = row.get('Reason') if pd.notna(row.get('Reason')) else None
+                session.add_experiment(inputs, output, noise=noise, iteration=iteration, reason=reason)
+        # Restore config
+        if 'config' in session_data:
+            session.config.update(session_data['config'])
+        # Auto-retrain model if configuration exists (optional)
+        if 'model_config' in session_data and retrain_on_load:
+            model_config = session_data['model_config']
+            logger.info(f"Auto-retraining model: {model_config['backend']} with {model_config.get('kernel', 'default')} kernel")
+            try:
+                # Trigger model training with saved configuration
+                session.train_model(
+                    backend=model_config['backend'],
+                    kernel=model_config.get('kernel', 'Matern'),
+                    kernel_params=model_config.get('kernel_params', {})
+                )
+                logger.info("Model retrained successfully")
+                session.events.emit('model_retrained', {'backend': model_config['backend']})
+            except Exception as e:
+                logger.warning(f"Failed to retrain model: {e}")
+                session.events.emit('model_retrain_failed', {'error': str(e)})
+        logger.info(f"Loaded session from {filepath}")
+        session.events.emit('session_loaded', {'filepath': str(filepath)})
+        return session
+    def update_metadata(self, name: Optional[str] = None,
+                       description: Optional[str] = None,
+                       tags: Optional[List[str]] = None,
+                       author: Optional[str] = None):
+        """
+        Update session metadata.
+        Args:
+            name: New session name (optional)
+            description: New description (optional)
+            tags: New tags (optional)
+        Example:
+            > session.update_metadata(
+            ...     name="Catalyst Screening - Final",
+            ...     description="Optimized Pt/Pd ratios",
+            ...     tags=["catalyst", "platinum", "palladium", "final"]
+            ... )
+        """
+        if name is not None:
+            self.metadata.name = name
+        if description is not None:
+            self.metadata.description = description
+        if author is not None:
+            # Backwards compatible: store author if provided
+            setattr(self.metadata, 'author', author)
+        if tags is not None:
+            self.metadata.tags = tags
+        self.metadata.update_modified()
+        logger.info("Updated session metadata")
+        self.events.emit('metadata_updated', self.metadata.to_dict())
+    # ============================================================
+    # Legacy Configuration
+    # ============================================================
+    def set_config(self, **kwargs) -> None:
+        """
+        Update session configuration.
+        Args:
+            **kwargs: Configuration parameters to update
         Example:
-            >>> session.set_config(random_state=123, verbose=False)
+            > session.set_config(random_state=123, verbose=False)
         """
         self.config.update(kwargs)
-        logger.info(f"Updated configuration: {kwargs}")
+        logger.info(f"Updated config: {kwargs}")

alchemist-nrel 0.2.1__py3-none-any.whl → 0.3.1__py3-none-any.whl

alchemist-nrel 0.2.1py3-none-any.whl → 0.3.1py3-none-any.whl