PyPI - alchemist-nrel - Versions diffs - 0.3.1__py3-none-any.whl → 0.3.2__py3-none-any.whl - Mend

alchemist-nrel 0.3.1py3-none-any.whl → 0.3.2py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (32) hide show

alchemist_core/__init__.py +2 -2
alchemist_core/acquisition/botorch_acquisition.py +83 -126
alchemist_core/data/experiment_manager.py +181 -12
alchemist_core/models/botorch_model.py +292 -63
alchemist_core/models/sklearn_model.py +145 -13
alchemist_core/session.py +3330 -31
alchemist_core/utils/__init__.py +3 -1
alchemist_core/utils/acquisition_utils.py +60 -0
alchemist_core/visualization/__init__.py +45 -0
alchemist_core/visualization/helpers.py +130 -0
alchemist_core/visualization/plots.py +1449 -0
{alchemist_nrel-0.3.1.dist-info → alchemist_nrel-0.3.2.dist-info}/METADATA +13 -13
{alchemist_nrel-0.3.1.dist-info → alchemist_nrel-0.3.2.dist-info}/RECORD +31 -26
{alchemist_nrel-0.3.1.dist-info → alchemist_nrel-0.3.2.dist-info}/WHEEL +1 -1
api/main.py +1 -1
api/models/requests.py +52 -0
api/models/responses.py +79 -2
api/routers/experiments.py +333 -8
api/routers/sessions.py +84 -9
api/routers/visualizations.py +6 -4
api/routers/websocket.py +2 -2
api/services/session_store.py +295 -71
api/static/assets/index-B6Cf6s_b.css +1 -0
api/static/assets/{index-DWfIKU9j.js → index-B7njvc9r.js} +201 -196
api/static/index.html +2 -2
ui/gpr_panel.py +11 -5
ui/target_column_dialog.py +299 -0
ui/ui.py +52 -5
api/static/assets/index-sMIa_1hV.css +0 -1
{alchemist_nrel-0.3.1.dist-info → alchemist_nrel-0.3.2.dist-info}/entry_points.txt +0 -0
{alchemist_nrel-0.3.1.dist-info → alchemist_nrel-0.3.2.dist-info}/licenses/LICENSE +0 -0
{alchemist_nrel-0.3.1.dist-info → alchemist_nrel-0.3.2.dist-info}/top_level.txt +0 -0

alchemist_core/__init__.py CHANGED Viewed

@@ -27,10 +27,10 @@ Example:
     >>> # Get next experiment suggestion
     >>> next_point = session.suggest_next(acq_func="ei")
-Version: 0.3.0-beta.1
+Version: 0.3.2
 """
-__version__ = "0.3.0b1"
+__version__ = "0.3.2"
 __author__ = "Caleb Coatney"
 __email__ = "caleb.coatney@nrel.gov"

alchemist_core/acquisition/botorch_acquisition.py CHANGED Viewed

@@ -312,6 +312,10 @@ class BoTorchAcquisition(BaseAcquisition):
                         options=options,
                     )
+                    # Log the acquisition value found
+                    acq_val = batch_acq_values.item() if batch_acq_values.numel() == 1 else batch_acq_values.max().item()
+                    logger.info(f"Optimization found acquisition value: {acq_val:.4f}")
                     # Get the best candidate(s)
                     best_candidates = batch_candidates.detach().cpu()
@@ -533,7 +537,14 @@ class BoTorchAcquisition(BaseAcquisition):
         return self
     def find_optimum(self, model=None, maximize=None, random_state=None):
-        """Find the point where the model predicts the optimal value."""
+        """
+        Find the point where the model predicts the optimal value.
+        This uses the same approach as regret plot predictions: generate a grid
+        in the original variable space, predict using the model's standard pipeline,
+        and find the argmax/argmin. This ensures categorical variables are handled
+        correctly through proper encoding/decoding.
+        """
         if model is not None:
             self.model = model
@@ -543,135 +554,81 @@ class BoTorchAcquisition(BaseAcquisition):
         if random_state is not None:
             self.random_state = random_state
-        # Get bounds from the search space
-        bounds_tensor = self._get_bounds_from_search_space()
+        # Generate prediction grid in ORIGINAL variable space (not encoded)
+        # This handles categorical variables correctly
+        n_grid_points = 10000  # Target number of grid points
+        grid = self._generate_prediction_grid(n_grid_points)
+        # Use model's predict method which handles encoding internally
+        # This is the same pipeline used by regret plot (correct approach)
+        means, stds = self.model.predict(grid, return_std=True)
+        # Find argmax or argmin
+        if self.maximize:
+            best_idx = np.argmax(means)
+        else:
+            best_idx = np.argmin(means)
-        # Identify categorical and integer variables
-        categorical_variables = []
-        integer_variables = []
-        if hasattr(self.search_space_obj, 'get_categorical_variables'):
-            categorical_variables = self.search_space_obj.get_categorical_variables()
-        if hasattr(self.search_space_obj, 'get_integer_variables'):
-            integer_variables = self.search_space_obj.get_integer_variables()
+        # Extract the optimal point (already in original variable space)
+        opt_point_df = grid.iloc[[best_idx]].reset_index(drop=True)
+        return {
+            'x_opt': opt_point_df,
+            'value': float(means[best_idx]),
+            'std': float(stds[best_idx])
+        }
-        # Prepare for optimization
-        torch.manual_seed(self.random_state)
+    def _generate_prediction_grid(self, n_grid_points: int) -> pd.DataFrame:
+        """
+        Generate grid of test points across search space for predictions.
-        try:
-            # Use a simpler randomized search approach instead of optimize_acqf
-            # This avoids the dimension issues in the more complex optimization
-            n_samples = 20000  # Large number of random samples
-            best_value = float('-inf') if self.maximize else float('inf')
-            best_x = None
-            # Generate random samples within bounds
-            lower_bounds, upper_bounds = bounds_tensor[0], bounds_tensor[1]
-            X_samples = torch.rand(n_samples, len(lower_bounds), dtype=torch.double)
-            X_samples = X_samples * (upper_bounds - lower_bounds) + lower_bounds
-            # Round integer variables to nearest integer
-            if integer_variables:
-                for i, feature_name in enumerate(self.model.feature_names):
-                    if feature_name in integer_variables:
-                        X_samples[:, i] = torch.round(X_samples[:, i])
-            # Evaluate model at all samples
-            self.model.model.eval()
-            with torch.no_grad():
-                posterior = self.model.model.posterior(X_samples)
-                values = posterior.mean.squeeze()
-                # If minimizing, negate values for finding maximum
-                if not self.maximize:
-                    values = -values
-                # Find the best value
-                best_idx = torch.argmax(values)
-                best_x = X_samples[best_idx]
-                best_value = values[best_idx].item()
-            # Convert to numpy
-            best_candidate = best_x.cpu().numpy().reshape(1, -1)
-        except Exception as e:
-            logger.error(f"Error in random search optimization: {e}")
-            # Fallback to grid search
-            logger.info("Falling back to grid search...")
-            # Create a simple grid search
-            n_points = 10  # Points per dimension
-            grid_points = []
-            # Create grid for each dimension
-            for i, feature_name in enumerate(self.model.feature_names):
-                if feature_name in integer_variables:
-                    # For integer variables, create integer grid
-                    min_val = int(lower_bounds[i])
-                    max_val = int(upper_bounds[i])
-                    if max_val - min_val + 1 <= n_points:
-                        # If range is small, use all integer values
-                        grid_points.append(torch.arange(min_val, max_val + 1, dtype=torch.double))
-                    else:
-                        # If range is large, sample n_points integers
-                        step = max(1, (max_val - min_val) // (n_points - 1))
-                        values = torch.arange(min_val, max_val + 1, step, dtype=torch.double)
-                        grid_points.append(values[:n_points])
+        This creates a grid in the ORIGINAL variable space (with actual category
+        names, not encoded values), which is then properly encoded by the model's
+        predict() method.
+        Args:
+            n_grid_points: Target number of grid points (actual number depends on dimensionality)
+        Returns:
+            DataFrame with columns for each variable in original space
+        """
+        from itertools import product
+        grid_1d = []
+        var_names = []
+        variables = self.search_space_obj.variables
+        n_vars = len(variables)
+        n_per_dim = max(2, int(n_grid_points ** (1/n_vars)))
+        for var in variables:
+            var_names.append(var['name'])
+            if var['type'] == 'real':
+                # Continuous: linspace
+                grid_1d.append(np.linspace(var['min'], var['max'], n_per_dim))
+            elif var['type'] == 'integer':
+                # Integer: range of integers
+                n_integers = var['max'] - var['min'] + 1
+                if n_integers <= n_per_dim:
+                    # Use all integers if range is small
+                    grid_1d.append(np.arange(var['min'], var['max'] + 1))
                 else:
-                    # For continuous variables, use linspace
-                    grid_points.append(torch.linspace(
-                        lower_bounds[i], upper_bounds[i], n_points, dtype=torch.double
-                    ))
-            # Create meshgrid
-            meshgrid = torch.meshgrid(*grid_points, indexing='ij')
-            X_grid = torch.stack([x.reshape(-1) for x in meshgrid], dim=1)
-            # Evaluate model on grid
-            self.model.model.eval()
-            with torch.no_grad():
-                posterior = self.model.model.posterior(X_grid)
-                values = posterior.mean.squeeze()
-                # If minimizing, negate values
-                if not self.maximize:
-                    values = -values
-                # Find the best value
-                best_idx = torch.argmax(values)
-                best_x = X_grid[best_idx]
-                best_value = values[best_idx].item()
-            # Convert to numpy
-            best_candidate = best_x.cpu().numpy().reshape(1, -1)
-        # Convert to dictionary and then to DataFrame
-        feature_names = self.model.original_feature_names
-        result = {}
-        for i, name in enumerate(feature_names):
-            value = best_candidate[0, i]
-            # If this is a categorical variable, convert back to original value
-            if name in categorical_variables:
-                # Find the original categorical value from the encoding
-                encoding = self.model.categorical_encodings.get(name, {})
-                inv_encoding = {v: k for k, v in encoding.items()}
-                if value in inv_encoding:
-                    value = inv_encoding[value]
-                elif int(value) in inv_encoding:
-                    value = inv_encoding[int(value)]
-            # If this is an integer variable, ensure it's an integer
-            elif name in integer_variables:
-                value = int(round(value))
+                    # Sample n_per_dim integers
+                    grid_1d.append(np.linspace(var['min'], var['max'], n_per_dim).astype(int))
+            elif var['type'] == 'categorical':
+                # Categorical: use ACTUAL category values (not encoded)
+                grid_1d.append(var['values'])
-            result[name] = value
-        # Convert to DataFrame
-        opt_point_df = pd.DataFrame([result])
+        # Generate test points using Cartesian product
+        X_test_tuples = list(product(*grid_1d))
-        # Get predicted value and std at optimum
-        pred_mean, pred_std = self.model.predict_with_std(opt_point_df)
+        # Convert to DataFrame with proper variable names and types
+        grid = pd.DataFrame(X_test_tuples, columns=var_names)
-        return {
-            'x_opt': opt_point_df,
-            'value': float(pred_mean[0]),
-            'std': float(pred_std[0])
-        }
+        # Ensure correct dtypes for categorical variables
+        for var in variables:
+            if var['type'] == 'categorical':
+                grid[var['name']] = grid[var['name']].astype(str)
+        return grid

alchemist_core/data/experiment_manager.py CHANGED Viewed

@@ -8,12 +8,20 @@ class ExperimentManager:
     """
     Class for storing and managing experimental data in a consistent way across backends.
     Provides methods for data access, saving/loading, and conversion to formats needed by different backends.
+    Supports both single-objective and multi-objective optimization:
+    - Single-objective: Uses single target column (default: 'Output', but configurable)
+    - Multi-objective: Uses multiple target columns specified in target_columns attribute
+    The target_column parameter allows flexible column naming to support various CSV formats.
     """
-    def __init__(self, search_space=None):
+    def __init__(self, search_space=None, target_columns: Optional[List[str]] = None):
         self.df = pd.DataFrame()  # Raw experimental data
         self.search_space = search_space  # Reference to the search space
         self.filepath = None  # Path to saved experiment file
         self._current_iteration = 0  # Track current iteration for audit log
+        # Support flexible target column naming for both single and multi-objective
+        self.target_columns = target_columns or ['Output']  # Default to 'Output' for backward compatibility
     def set_search_space(self, search_space):
         """Set or update the search space reference."""
@@ -35,9 +43,9 @@ class ExperimentManager:
         # Create a copy of the point_dict to avoid modifying the original
         new_point = point_dict.copy()
-        # Add output value if provided
+        # Add output value if provided (use first target column for single-objective)
         if output_value is not None:
-            new_point['Output'] = output_value
+            new_point[self.target_columns[0]] = output_value
         # Add noise value if provided
         if noise_value is not None:
@@ -107,12 +115,20 @@ class ExperimentManager:
         Returns:
             X: Features DataFrame
             y: Target Series
+        Raises:
+            ValueError: If configured target column is not found in data
         """
-        if 'Output' not in self.df.columns:
-            raise ValueError("DataFrame doesn't contain 'Output' column")
+        target_col = self.target_columns[0]  # Use first target column for single-objective
-        # Drop metadata columns (Output, Noise, Iteration, Reason)
-        metadata_cols = ['Output']
+        if target_col not in self.df.columns:
+            raise ValueError(
+                f"DataFrame doesn't contain target column '{target_col}'. "
+                f"Available columns: {list(self.df.columns)}"
+            )
+        # Drop metadata columns (target, Noise, Iteration, Reason)
+        metadata_cols = self.target_columns.copy()
         if 'Noise' in self.df.columns:
             metadata_cols.append('Noise')
         if 'Iteration' in self.df.columns:
@@ -121,7 +137,7 @@ class ExperimentManager:
             metadata_cols.append('Reason')
         X = self.df.drop(columns=metadata_cols)
-        y = self.df['Output']
+        y = self.df[target_col]
         return X, y
     def get_features_target_and_noise(self) -> Tuple[pd.DataFrame, pd.Series, Optional[pd.Series]]:
@@ -132,12 +148,20 @@ class ExperimentManager:
             X: Features DataFrame
             y: Target Series
             noise: Noise Series if available, otherwise None
+        Raises:
+            ValueError: If configured target column is not found in data
         """
-        if 'Output' not in self.df.columns:
-            raise ValueError("DataFrame doesn't contain 'Output' column")
+        target_col = self.target_columns[0]  # Use first target column for single-objective
+        if target_col not in self.df.columns:
+            raise ValueError(
+                f"DataFrame doesn't contain target column '{target_col}'. "
+                f"Available columns: {list(self.df.columns)}"
+            )
         # Drop metadata columns
-        metadata_cols = ['Output']
+        metadata_cols = self.target_columns.copy()
         if 'Noise' in self.df.columns:
             metadata_cols.append('Noise')
         if 'Iteration' in self.df.columns:
@@ -146,7 +170,7 @@ class ExperimentManager:
             metadata_cols.append('Reason')
         X = self.df.drop(columns=metadata_cols)
-        y = self.df['Output']
+        y = self.df[target_col]
         noise = self.df['Noise'] if 'Noise' in self.df.columns else None
         return X, y, noise
@@ -224,3 +248,148 @@ class ExperimentManager:
     def __len__(self):
         return len(self.df)
+    def get_pareto_frontier(self, directions: Optional[List[str]] = None) -> pd.DataFrame:
+        """
+        Compute Pareto-optimal solutions from experiments with multiple objectives.
+        Uses BoTorch's fast non-dominated sorting algorithm to identify Pareto-optimal
+        points. Works with both single-objective (returns all data) and multi-objective
+        experiments.
+        Args:
+            directions: List of 'maximize' or 'minimize' for each target column.
+                       If None, assumes all objectives are maximized.
+                       Length must match number of target columns.
+        Returns:
+            DataFrame containing only Pareto-optimal experiments with all columns.
+        Raises:
+            ValueError: If directions length doesn't match target columns.
+            ValueError: If target columns contain missing data.
+        Example:
+            >>> # For 2 objectives: maximize yield, minimize cost
+            >>> pareto_df = exp_mgr.get_pareto_frontier(['maximize', 'minimize'])
+        """
+        import torch
+        from botorch.utils.multi_objective.pareto import is_non_dominated
+        if len(self.df) == 0:
+            return pd.DataFrame()
+        # Validate target columns exist
+        missing_cols = [col for col in self.target_columns if col not in self.df.columns]
+        if missing_cols:
+            raise ValueError(f"Target columns {missing_cols} not found in experiment data")
+        # Extract objective values
+        Y = self.df[self.target_columns].values
+        # Check for missing values
+        if pd.isna(Y).any():
+            raise ValueError("Target columns contain missing values (NaN). Cannot compute Pareto frontier.")
+        # Single objective case: return all data
+        if len(self.target_columns) == 1:
+            return self.df.copy()
+        # Set default directions if not provided
+        if directions is None:
+            directions = ['maximize'] * len(self.target_columns)
+        # Validate directions
+        if len(directions) != len(self.target_columns):
+            raise ValueError(
+                f"Number of directions ({len(directions)}) must match number of "
+                f"target columns ({len(self.target_columns)})"
+            )
+        # Convert objectives to maximization form (BoTorch assumes maximization)
+        Y_torch = torch.tensor(Y, dtype=torch.double)
+        for i, direction in enumerate(directions):
+            if direction.lower() == 'minimize':
+                Y_torch[:, i] = -Y_torch[:, i]
+        # Compute non-dominated mask
+        nd_mask = is_non_dominated(Y_torch, maximize=True, deduplicate=True)
+        # Return Pareto-optimal experiments
+        return self.df[nd_mask.numpy()].copy()
+    def compute_hypervolume(self, ref_point: Union[List[float], np.ndarray],
+                           directions: Optional[List[str]] = None) -> float:
+        """
+        Compute hypervolume indicator for multi-objective experiments.
+        The hypervolume measures the volume of objective space dominated by the
+        Pareto frontier relative to a reference point. Larger values indicate
+        better overall performance.
+        Args:
+            ref_point: Reference point (worst acceptable values) for each objective.
+                      Must have same length as target_columns.
+                      For maximization: should be below minimum observed values.
+                      For minimization: should be above maximum observed values.
+            directions: List of 'maximize' or 'minimize' for each target column.
+                       If None, assumes all objectives are maximized.
+        Returns:
+            Hypervolume value (float). Zero if no Pareto-optimal points exist.
+        Raises:
+            ValueError: If ref_point length doesn't match target columns.
+            ValueError: If target columns contain missing data.
+        Example:
+            >>> # For 2 objectives (maximize yield, minimize cost)
+            >>> # ref_point = [min_acceptable_yield, max_acceptable_cost]
+            >>> hv = exp_mgr.compute_hypervolume([50.0, 100.0], ['maximize', 'minimize'])
+        """
+        import torch
+        from botorch.utils.multi_objective.hypervolume import Hypervolume
+        if len(self.df) == 0:
+            return 0.0
+        # Single objective case: not meaningful
+        if len(self.target_columns) == 1:
+            raise ValueError(
+                "Hypervolume is only defined for multi-objective problems. "
+                "For single-objective, use best observed value instead."
+            )
+        # Validate ref_point
+        ref_point = np.array(ref_point)
+        if len(ref_point) != len(self.target_columns):
+            raise ValueError(
+                f"Reference point length ({len(ref_point)}) must match number of "
+                f"target columns ({len(self.target_columns)})"
+            )
+        # Get Pareto frontier
+        pareto_df = self.get_pareto_frontier(directions)
+        if len(pareto_df) == 0:
+            return 0.0
+        # Set default directions if not provided
+        if directions is None:
+            directions = ['maximize'] * len(self.target_columns)
+        # Extract Pareto objectives and convert to torch tensors
+        Y_pareto = pareto_df[self.target_columns].values
+        Y_torch = torch.tensor(Y_pareto, dtype=torch.double)
+        ref_torch = torch.tensor(ref_point, dtype=torch.double)
+        # Convert to maximization form (BoTorch assumes maximization)
+        for i, direction in enumerate(directions):
+            if direction.lower() == 'minimize':
+                Y_torch[:, i] = -Y_torch[:, i]
+                ref_torch[i] = -ref_torch[i]
+        # Compute hypervolume
+        hv_calculator = Hypervolume(ref_point=ref_torch)
+        hv = hv_calculator.compute(Y_torch)
+        return float(hv)

alchemist-nrel 0.3.1__py3-none-any.whl → 0.3.2__py3-none-any.whl

alchemist-nrel 0.3.1py3-none-any.whl → 0.3.2py3-none-any.whl