alchemist-nrel 0.3.1__py3-none-any.whl → 0.3.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- alchemist_core/__init__.py +2 -2
- alchemist_core/acquisition/botorch_acquisition.py +83 -126
- alchemist_core/data/experiment_manager.py +181 -12
- alchemist_core/models/botorch_model.py +292 -63
- alchemist_core/models/sklearn_model.py +145 -13
- alchemist_core/session.py +3330 -31
- alchemist_core/utils/__init__.py +3 -1
- alchemist_core/utils/acquisition_utils.py +60 -0
- alchemist_core/visualization/__init__.py +45 -0
- alchemist_core/visualization/helpers.py +130 -0
- alchemist_core/visualization/plots.py +1449 -0
- {alchemist_nrel-0.3.1.dist-info → alchemist_nrel-0.3.2.dist-info}/METADATA +13 -13
- {alchemist_nrel-0.3.1.dist-info → alchemist_nrel-0.3.2.dist-info}/RECORD +31 -26
- {alchemist_nrel-0.3.1.dist-info → alchemist_nrel-0.3.2.dist-info}/WHEEL +1 -1
- api/main.py +1 -1
- api/models/requests.py +52 -0
- api/models/responses.py +79 -2
- api/routers/experiments.py +333 -8
- api/routers/sessions.py +84 -9
- api/routers/visualizations.py +6 -4
- api/routers/websocket.py +2 -2
- api/services/session_store.py +295 -71
- api/static/assets/index-B6Cf6s_b.css +1 -0
- api/static/assets/{index-DWfIKU9j.js → index-B7njvc9r.js} +201 -196
- api/static/index.html +2 -2
- ui/gpr_panel.py +11 -5
- ui/target_column_dialog.py +299 -0
- ui/ui.py +52 -5
- api/static/assets/index-sMIa_1hV.css +0 -1
- {alchemist_nrel-0.3.1.dist-info → alchemist_nrel-0.3.2.dist-info}/entry_points.txt +0 -0
- {alchemist_nrel-0.3.1.dist-info → alchemist_nrel-0.3.2.dist-info}/licenses/LICENSE +0 -0
- {alchemist_nrel-0.3.1.dist-info → alchemist_nrel-0.3.2.dist-info}/top_level.txt +0 -0
alchemist_core/__init__.py
CHANGED
|
@@ -27,10 +27,10 @@ Example:
|
|
|
27
27
|
>>> # Get next experiment suggestion
|
|
28
28
|
>>> next_point = session.suggest_next(acq_func="ei")
|
|
29
29
|
|
|
30
|
-
Version: 0.3.
|
|
30
|
+
Version: 0.3.2
|
|
31
31
|
"""
|
|
32
32
|
|
|
33
|
-
__version__ = "0.3.
|
|
33
|
+
__version__ = "0.3.2"
|
|
34
34
|
__author__ = "Caleb Coatney"
|
|
35
35
|
__email__ = "caleb.coatney@nrel.gov"
|
|
36
36
|
|
|
@@ -312,6 +312,10 @@ class BoTorchAcquisition(BaseAcquisition):
|
|
|
312
312
|
options=options,
|
|
313
313
|
)
|
|
314
314
|
|
|
315
|
+
# Log the acquisition value found
|
|
316
|
+
acq_val = batch_acq_values.item() if batch_acq_values.numel() == 1 else batch_acq_values.max().item()
|
|
317
|
+
logger.info(f"Optimization found acquisition value: {acq_val:.4f}")
|
|
318
|
+
|
|
315
319
|
# Get the best candidate(s)
|
|
316
320
|
best_candidates = batch_candidates.detach().cpu()
|
|
317
321
|
|
|
@@ -533,7 +537,14 @@ class BoTorchAcquisition(BaseAcquisition):
|
|
|
533
537
|
return self
|
|
534
538
|
|
|
535
539
|
def find_optimum(self, model=None, maximize=None, random_state=None):
|
|
536
|
-
"""
|
|
540
|
+
"""
|
|
541
|
+
Find the point where the model predicts the optimal value.
|
|
542
|
+
|
|
543
|
+
This uses the same approach as regret plot predictions: generate a grid
|
|
544
|
+
in the original variable space, predict using the model's standard pipeline,
|
|
545
|
+
and find the argmax/argmin. This ensures categorical variables are handled
|
|
546
|
+
correctly through proper encoding/decoding.
|
|
547
|
+
"""
|
|
537
548
|
if model is not None:
|
|
538
549
|
self.model = model
|
|
539
550
|
|
|
@@ -543,135 +554,81 @@ class BoTorchAcquisition(BaseAcquisition):
|
|
|
543
554
|
if random_state is not None:
|
|
544
555
|
self.random_state = random_state
|
|
545
556
|
|
|
546
|
-
#
|
|
547
|
-
|
|
557
|
+
# Generate prediction grid in ORIGINAL variable space (not encoded)
|
|
558
|
+
# This handles categorical variables correctly
|
|
559
|
+
n_grid_points = 10000 # Target number of grid points
|
|
560
|
+
grid = self._generate_prediction_grid(n_grid_points)
|
|
561
|
+
|
|
562
|
+
# Use model's predict method which handles encoding internally
|
|
563
|
+
# This is the same pipeline used by regret plot (correct approach)
|
|
564
|
+
means, stds = self.model.predict(grid, return_std=True)
|
|
565
|
+
|
|
566
|
+
# Find argmax or argmin
|
|
567
|
+
if self.maximize:
|
|
568
|
+
best_idx = np.argmax(means)
|
|
569
|
+
else:
|
|
570
|
+
best_idx = np.argmin(means)
|
|
548
571
|
|
|
549
|
-
#
|
|
550
|
-
|
|
551
|
-
|
|
552
|
-
|
|
553
|
-
|
|
554
|
-
|
|
555
|
-
|
|
572
|
+
# Extract the optimal point (already in original variable space)
|
|
573
|
+
opt_point_df = grid.iloc[[best_idx]].reset_index(drop=True)
|
|
574
|
+
|
|
575
|
+
return {
|
|
576
|
+
'x_opt': opt_point_df,
|
|
577
|
+
'value': float(means[best_idx]),
|
|
578
|
+
'std': float(stds[best_idx])
|
|
579
|
+
}
|
|
556
580
|
|
|
557
|
-
|
|
558
|
-
|
|
581
|
+
def _generate_prediction_grid(self, n_grid_points: int) -> pd.DataFrame:
|
|
582
|
+
"""
|
|
583
|
+
Generate grid of test points across search space for predictions.
|
|
559
584
|
|
|
560
|
-
|
|
561
|
-
|
|
562
|
-
|
|
563
|
-
|
|
564
|
-
|
|
565
|
-
|
|
566
|
-
|
|
567
|
-
|
|
568
|
-
|
|
569
|
-
|
|
570
|
-
|
|
571
|
-
|
|
572
|
-
|
|
573
|
-
|
|
574
|
-
|
|
575
|
-
|
|
576
|
-
|
|
577
|
-
|
|
578
|
-
|
|
579
|
-
|
|
580
|
-
|
|
581
|
-
|
|
582
|
-
|
|
583
|
-
|
|
584
|
-
|
|
585
|
-
|
|
586
|
-
|
|
587
|
-
|
|
588
|
-
|
|
589
|
-
|
|
590
|
-
|
|
591
|
-
best_value = values[best_idx].item()
|
|
592
|
-
|
|
593
|
-
# Convert to numpy
|
|
594
|
-
best_candidate = best_x.cpu().numpy().reshape(1, -1)
|
|
595
|
-
except Exception as e:
|
|
596
|
-
logger.error(f"Error in random search optimization: {e}")
|
|
597
|
-
# Fallback to grid search
|
|
598
|
-
logger.info("Falling back to grid search...")
|
|
599
|
-
|
|
600
|
-
# Create a simple grid search
|
|
601
|
-
n_points = 10 # Points per dimension
|
|
602
|
-
grid_points = []
|
|
603
|
-
|
|
604
|
-
# Create grid for each dimension
|
|
605
|
-
for i, feature_name in enumerate(self.model.feature_names):
|
|
606
|
-
if feature_name in integer_variables:
|
|
607
|
-
# For integer variables, create integer grid
|
|
608
|
-
min_val = int(lower_bounds[i])
|
|
609
|
-
max_val = int(upper_bounds[i])
|
|
610
|
-
if max_val - min_val + 1 <= n_points:
|
|
611
|
-
# If range is small, use all integer values
|
|
612
|
-
grid_points.append(torch.arange(min_val, max_val + 1, dtype=torch.double))
|
|
613
|
-
else:
|
|
614
|
-
# If range is large, sample n_points integers
|
|
615
|
-
step = max(1, (max_val - min_val) // (n_points - 1))
|
|
616
|
-
values = torch.arange(min_val, max_val + 1, step, dtype=torch.double)
|
|
617
|
-
grid_points.append(values[:n_points])
|
|
585
|
+
This creates a grid in the ORIGINAL variable space (with actual category
|
|
586
|
+
names, not encoded values), which is then properly encoded by the model's
|
|
587
|
+
predict() method.
|
|
588
|
+
|
|
589
|
+
Args:
|
|
590
|
+
n_grid_points: Target number of grid points (actual number depends on dimensionality)
|
|
591
|
+
|
|
592
|
+
Returns:
|
|
593
|
+
DataFrame with columns for each variable in original space
|
|
594
|
+
"""
|
|
595
|
+
from itertools import product
|
|
596
|
+
|
|
597
|
+
grid_1d = []
|
|
598
|
+
var_names = []
|
|
599
|
+
|
|
600
|
+
variables = self.search_space_obj.variables
|
|
601
|
+
n_vars = len(variables)
|
|
602
|
+
n_per_dim = max(2, int(n_grid_points ** (1/n_vars)))
|
|
603
|
+
|
|
604
|
+
for var in variables:
|
|
605
|
+
var_names.append(var['name'])
|
|
606
|
+
|
|
607
|
+
if var['type'] == 'real':
|
|
608
|
+
# Continuous: linspace
|
|
609
|
+
grid_1d.append(np.linspace(var['min'], var['max'], n_per_dim))
|
|
610
|
+
elif var['type'] == 'integer':
|
|
611
|
+
# Integer: range of integers
|
|
612
|
+
n_integers = var['max'] - var['min'] + 1
|
|
613
|
+
if n_integers <= n_per_dim:
|
|
614
|
+
# Use all integers if range is small
|
|
615
|
+
grid_1d.append(np.arange(var['min'], var['max'] + 1))
|
|
618
616
|
else:
|
|
619
|
-
#
|
|
620
|
-
|
|
621
|
-
|
|
622
|
-
|
|
623
|
-
|
|
624
|
-
# Create meshgrid
|
|
625
|
-
meshgrid = torch.meshgrid(*grid_points, indexing='ij')
|
|
626
|
-
X_grid = torch.stack([x.reshape(-1) for x in meshgrid], dim=1)
|
|
627
|
-
|
|
628
|
-
# Evaluate model on grid
|
|
629
|
-
self.model.model.eval()
|
|
630
|
-
with torch.no_grad():
|
|
631
|
-
posterior = self.model.model.posterior(X_grid)
|
|
632
|
-
values = posterior.mean.squeeze()
|
|
633
|
-
|
|
634
|
-
# If minimizing, negate values
|
|
635
|
-
if not self.maximize:
|
|
636
|
-
values = -values
|
|
637
|
-
|
|
638
|
-
# Find the best value
|
|
639
|
-
best_idx = torch.argmax(values)
|
|
640
|
-
best_x = X_grid[best_idx]
|
|
641
|
-
best_value = values[best_idx].item()
|
|
642
|
-
|
|
643
|
-
# Convert to numpy
|
|
644
|
-
best_candidate = best_x.cpu().numpy().reshape(1, -1)
|
|
645
|
-
|
|
646
|
-
# Convert to dictionary and then to DataFrame
|
|
647
|
-
feature_names = self.model.original_feature_names
|
|
648
|
-
result = {}
|
|
649
|
-
for i, name in enumerate(feature_names):
|
|
650
|
-
value = best_candidate[0, i]
|
|
651
|
-
|
|
652
|
-
# If this is a categorical variable, convert back to original value
|
|
653
|
-
if name in categorical_variables:
|
|
654
|
-
# Find the original categorical value from the encoding
|
|
655
|
-
encoding = self.model.categorical_encodings.get(name, {})
|
|
656
|
-
inv_encoding = {v: k for k, v in encoding.items()}
|
|
657
|
-
if value in inv_encoding:
|
|
658
|
-
value = inv_encoding[value]
|
|
659
|
-
elif int(value) in inv_encoding:
|
|
660
|
-
value = inv_encoding[int(value)]
|
|
661
|
-
# If this is an integer variable, ensure it's an integer
|
|
662
|
-
elif name in integer_variables:
|
|
663
|
-
value = int(round(value))
|
|
617
|
+
# Sample n_per_dim integers
|
|
618
|
+
grid_1d.append(np.linspace(var['min'], var['max'], n_per_dim).astype(int))
|
|
619
|
+
elif var['type'] == 'categorical':
|
|
620
|
+
# Categorical: use ACTUAL category values (not encoded)
|
|
621
|
+
grid_1d.append(var['values'])
|
|
664
622
|
|
|
665
|
-
|
|
666
|
-
|
|
667
|
-
# Convert to DataFrame
|
|
668
|
-
opt_point_df = pd.DataFrame([result])
|
|
623
|
+
# Generate test points using Cartesian product
|
|
624
|
+
X_test_tuples = list(product(*grid_1d))
|
|
669
625
|
|
|
670
|
-
#
|
|
671
|
-
|
|
626
|
+
# Convert to DataFrame with proper variable names and types
|
|
627
|
+
grid = pd.DataFrame(X_test_tuples, columns=var_names)
|
|
672
628
|
|
|
673
|
-
|
|
674
|
-
|
|
675
|
-
'
|
|
676
|
-
|
|
677
|
-
|
|
629
|
+
# Ensure correct dtypes for categorical variables
|
|
630
|
+
for var in variables:
|
|
631
|
+
if var['type'] == 'categorical':
|
|
632
|
+
grid[var['name']] = grid[var['name']].astype(str)
|
|
633
|
+
|
|
634
|
+
return grid
|
|
@@ -8,12 +8,20 @@ class ExperimentManager:
|
|
|
8
8
|
"""
|
|
9
9
|
Class for storing and managing experimental data in a consistent way across backends.
|
|
10
10
|
Provides methods for data access, saving/loading, and conversion to formats needed by different backends.
|
|
11
|
+
|
|
12
|
+
Supports both single-objective and multi-objective optimization:
|
|
13
|
+
- Single-objective: Uses single target column (default: 'Output', but configurable)
|
|
14
|
+
- Multi-objective: Uses multiple target columns specified in target_columns attribute
|
|
15
|
+
|
|
16
|
+
The target_column parameter allows flexible column naming to support various CSV formats.
|
|
11
17
|
"""
|
|
12
|
-
def __init__(self, search_space=None):
|
|
18
|
+
def __init__(self, search_space=None, target_columns: Optional[List[str]] = None):
|
|
13
19
|
self.df = pd.DataFrame() # Raw experimental data
|
|
14
20
|
self.search_space = search_space # Reference to the search space
|
|
15
21
|
self.filepath = None # Path to saved experiment file
|
|
16
22
|
self._current_iteration = 0 # Track current iteration for audit log
|
|
23
|
+
# Support flexible target column naming for both single and multi-objective
|
|
24
|
+
self.target_columns = target_columns or ['Output'] # Default to 'Output' for backward compatibility
|
|
17
25
|
|
|
18
26
|
def set_search_space(self, search_space):
|
|
19
27
|
"""Set or update the search space reference."""
|
|
@@ -35,9 +43,9 @@ class ExperimentManager:
|
|
|
35
43
|
# Create a copy of the point_dict to avoid modifying the original
|
|
36
44
|
new_point = point_dict.copy()
|
|
37
45
|
|
|
38
|
-
# Add output value if provided
|
|
46
|
+
# Add output value if provided (use first target column for single-objective)
|
|
39
47
|
if output_value is not None:
|
|
40
|
-
new_point[
|
|
48
|
+
new_point[self.target_columns[0]] = output_value
|
|
41
49
|
|
|
42
50
|
# Add noise value if provided
|
|
43
51
|
if noise_value is not None:
|
|
@@ -107,12 +115,20 @@ class ExperimentManager:
|
|
|
107
115
|
Returns:
|
|
108
116
|
X: Features DataFrame
|
|
109
117
|
y: Target Series
|
|
118
|
+
|
|
119
|
+
Raises:
|
|
120
|
+
ValueError: If configured target column is not found in data
|
|
110
121
|
"""
|
|
111
|
-
|
|
112
|
-
raise ValueError("DataFrame doesn't contain 'Output' column")
|
|
122
|
+
target_col = self.target_columns[0] # Use first target column for single-objective
|
|
113
123
|
|
|
114
|
-
|
|
115
|
-
|
|
124
|
+
if target_col not in self.df.columns:
|
|
125
|
+
raise ValueError(
|
|
126
|
+
f"DataFrame doesn't contain target column '{target_col}'. "
|
|
127
|
+
f"Available columns: {list(self.df.columns)}"
|
|
128
|
+
)
|
|
129
|
+
|
|
130
|
+
# Drop metadata columns (target, Noise, Iteration, Reason)
|
|
131
|
+
metadata_cols = self.target_columns.copy()
|
|
116
132
|
if 'Noise' in self.df.columns:
|
|
117
133
|
metadata_cols.append('Noise')
|
|
118
134
|
if 'Iteration' in self.df.columns:
|
|
@@ -121,7 +137,7 @@ class ExperimentManager:
|
|
|
121
137
|
metadata_cols.append('Reason')
|
|
122
138
|
|
|
123
139
|
X = self.df.drop(columns=metadata_cols)
|
|
124
|
-
y = self.df[
|
|
140
|
+
y = self.df[target_col]
|
|
125
141
|
return X, y
|
|
126
142
|
|
|
127
143
|
def get_features_target_and_noise(self) -> Tuple[pd.DataFrame, pd.Series, Optional[pd.Series]]:
|
|
@@ -132,12 +148,20 @@ class ExperimentManager:
|
|
|
132
148
|
X: Features DataFrame
|
|
133
149
|
y: Target Series
|
|
134
150
|
noise: Noise Series if available, otherwise None
|
|
151
|
+
|
|
152
|
+
Raises:
|
|
153
|
+
ValueError: If configured target column is not found in data
|
|
135
154
|
"""
|
|
136
|
-
|
|
137
|
-
|
|
155
|
+
target_col = self.target_columns[0] # Use first target column for single-objective
|
|
156
|
+
|
|
157
|
+
if target_col not in self.df.columns:
|
|
158
|
+
raise ValueError(
|
|
159
|
+
f"DataFrame doesn't contain target column '{target_col}'. "
|
|
160
|
+
f"Available columns: {list(self.df.columns)}"
|
|
161
|
+
)
|
|
138
162
|
|
|
139
163
|
# Drop metadata columns
|
|
140
|
-
metadata_cols =
|
|
164
|
+
metadata_cols = self.target_columns.copy()
|
|
141
165
|
if 'Noise' in self.df.columns:
|
|
142
166
|
metadata_cols.append('Noise')
|
|
143
167
|
if 'Iteration' in self.df.columns:
|
|
@@ -146,7 +170,7 @@ class ExperimentManager:
|
|
|
146
170
|
metadata_cols.append('Reason')
|
|
147
171
|
|
|
148
172
|
X = self.df.drop(columns=metadata_cols)
|
|
149
|
-
y = self.df[
|
|
173
|
+
y = self.df[target_col]
|
|
150
174
|
noise = self.df['Noise'] if 'Noise' in self.df.columns else None
|
|
151
175
|
return X, y, noise
|
|
152
176
|
|
|
@@ -224,3 +248,148 @@ class ExperimentManager:
|
|
|
224
248
|
|
|
225
249
|
def __len__(self):
|
|
226
250
|
return len(self.df)
|
|
251
|
+
|
|
252
|
+
def get_pareto_frontier(self, directions: Optional[List[str]] = None) -> pd.DataFrame:
|
|
253
|
+
"""
|
|
254
|
+
Compute Pareto-optimal solutions from experiments with multiple objectives.
|
|
255
|
+
|
|
256
|
+
Uses BoTorch's fast non-dominated sorting algorithm to identify Pareto-optimal
|
|
257
|
+
points. Works with both single-objective (returns all data) and multi-objective
|
|
258
|
+
experiments.
|
|
259
|
+
|
|
260
|
+
Args:
|
|
261
|
+
directions: List of 'maximize' or 'minimize' for each target column.
|
|
262
|
+
If None, assumes all objectives are maximized.
|
|
263
|
+
Length must match number of target columns.
|
|
264
|
+
|
|
265
|
+
Returns:
|
|
266
|
+
DataFrame containing only Pareto-optimal experiments with all columns.
|
|
267
|
+
|
|
268
|
+
Raises:
|
|
269
|
+
ValueError: If directions length doesn't match target columns.
|
|
270
|
+
ValueError: If target columns contain missing data.
|
|
271
|
+
|
|
272
|
+
Example:
|
|
273
|
+
>>> # For 2 objectives: maximize yield, minimize cost
|
|
274
|
+
>>> pareto_df = exp_mgr.get_pareto_frontier(['maximize', 'minimize'])
|
|
275
|
+
"""
|
|
276
|
+
import torch
|
|
277
|
+
from botorch.utils.multi_objective.pareto import is_non_dominated
|
|
278
|
+
|
|
279
|
+
if len(self.df) == 0:
|
|
280
|
+
return pd.DataFrame()
|
|
281
|
+
|
|
282
|
+
# Validate target columns exist
|
|
283
|
+
missing_cols = [col for col in self.target_columns if col not in self.df.columns]
|
|
284
|
+
if missing_cols:
|
|
285
|
+
raise ValueError(f"Target columns {missing_cols} not found in experiment data")
|
|
286
|
+
|
|
287
|
+
# Extract objective values
|
|
288
|
+
Y = self.df[self.target_columns].values
|
|
289
|
+
|
|
290
|
+
# Check for missing values
|
|
291
|
+
if pd.isna(Y).any():
|
|
292
|
+
raise ValueError("Target columns contain missing values (NaN). Cannot compute Pareto frontier.")
|
|
293
|
+
|
|
294
|
+
# Single objective case: return all data
|
|
295
|
+
if len(self.target_columns) == 1:
|
|
296
|
+
return self.df.copy()
|
|
297
|
+
|
|
298
|
+
# Set default directions if not provided
|
|
299
|
+
if directions is None:
|
|
300
|
+
directions = ['maximize'] * len(self.target_columns)
|
|
301
|
+
|
|
302
|
+
# Validate directions
|
|
303
|
+
if len(directions) != len(self.target_columns):
|
|
304
|
+
raise ValueError(
|
|
305
|
+
f"Number of directions ({len(directions)}) must match number of "
|
|
306
|
+
f"target columns ({len(self.target_columns)})"
|
|
307
|
+
)
|
|
308
|
+
|
|
309
|
+
# Convert objectives to maximization form (BoTorch assumes maximization)
|
|
310
|
+
Y_torch = torch.tensor(Y, dtype=torch.double)
|
|
311
|
+
for i, direction in enumerate(directions):
|
|
312
|
+
if direction.lower() == 'minimize':
|
|
313
|
+
Y_torch[:, i] = -Y_torch[:, i]
|
|
314
|
+
|
|
315
|
+
# Compute non-dominated mask
|
|
316
|
+
nd_mask = is_non_dominated(Y_torch, maximize=True, deduplicate=True)
|
|
317
|
+
|
|
318
|
+
# Return Pareto-optimal experiments
|
|
319
|
+
return self.df[nd_mask.numpy()].copy()
|
|
320
|
+
|
|
321
|
+
def compute_hypervolume(self, ref_point: Union[List[float], np.ndarray],
|
|
322
|
+
directions: Optional[List[str]] = None) -> float:
|
|
323
|
+
"""
|
|
324
|
+
Compute hypervolume indicator for multi-objective experiments.
|
|
325
|
+
|
|
326
|
+
The hypervolume measures the volume of objective space dominated by the
|
|
327
|
+
Pareto frontier relative to a reference point. Larger values indicate
|
|
328
|
+
better overall performance.
|
|
329
|
+
|
|
330
|
+
Args:
|
|
331
|
+
ref_point: Reference point (worst acceptable values) for each objective.
|
|
332
|
+
Must have same length as target_columns.
|
|
333
|
+
For maximization: should be below minimum observed values.
|
|
334
|
+
For minimization: should be above maximum observed values.
|
|
335
|
+
directions: List of 'maximize' or 'minimize' for each target column.
|
|
336
|
+
If None, assumes all objectives are maximized.
|
|
337
|
+
|
|
338
|
+
Returns:
|
|
339
|
+
Hypervolume value (float). Zero if no Pareto-optimal points exist.
|
|
340
|
+
|
|
341
|
+
Raises:
|
|
342
|
+
ValueError: If ref_point length doesn't match target columns.
|
|
343
|
+
ValueError: If target columns contain missing data.
|
|
344
|
+
|
|
345
|
+
Example:
|
|
346
|
+
>>> # For 2 objectives (maximize yield, minimize cost)
|
|
347
|
+
>>> # ref_point = [min_acceptable_yield, max_acceptable_cost]
|
|
348
|
+
>>> hv = exp_mgr.compute_hypervolume([50.0, 100.0], ['maximize', 'minimize'])
|
|
349
|
+
"""
|
|
350
|
+
import torch
|
|
351
|
+
from botorch.utils.multi_objective.hypervolume import Hypervolume
|
|
352
|
+
|
|
353
|
+
if len(self.df) == 0:
|
|
354
|
+
return 0.0
|
|
355
|
+
|
|
356
|
+
# Single objective case: not meaningful
|
|
357
|
+
if len(self.target_columns) == 1:
|
|
358
|
+
raise ValueError(
|
|
359
|
+
"Hypervolume is only defined for multi-objective problems. "
|
|
360
|
+
"For single-objective, use best observed value instead."
|
|
361
|
+
)
|
|
362
|
+
|
|
363
|
+
# Validate ref_point
|
|
364
|
+
ref_point = np.array(ref_point)
|
|
365
|
+
if len(ref_point) != len(self.target_columns):
|
|
366
|
+
raise ValueError(
|
|
367
|
+
f"Reference point length ({len(ref_point)}) must match number of "
|
|
368
|
+
f"target columns ({len(self.target_columns)})"
|
|
369
|
+
)
|
|
370
|
+
|
|
371
|
+
# Get Pareto frontier
|
|
372
|
+
pareto_df = self.get_pareto_frontier(directions)
|
|
373
|
+
if len(pareto_df) == 0:
|
|
374
|
+
return 0.0
|
|
375
|
+
|
|
376
|
+
# Set default directions if not provided
|
|
377
|
+
if directions is None:
|
|
378
|
+
directions = ['maximize'] * len(self.target_columns)
|
|
379
|
+
|
|
380
|
+
# Extract Pareto objectives and convert to torch tensors
|
|
381
|
+
Y_pareto = pareto_df[self.target_columns].values
|
|
382
|
+
Y_torch = torch.tensor(Y_pareto, dtype=torch.double)
|
|
383
|
+
ref_torch = torch.tensor(ref_point, dtype=torch.double)
|
|
384
|
+
|
|
385
|
+
# Convert to maximization form (BoTorch assumes maximization)
|
|
386
|
+
for i, direction in enumerate(directions):
|
|
387
|
+
if direction.lower() == 'minimize':
|
|
388
|
+
Y_torch[:, i] = -Y_torch[:, i]
|
|
389
|
+
ref_torch[i] = -ref_torch[i]
|
|
390
|
+
|
|
391
|
+
# Compute hypervolume
|
|
392
|
+
hv_calculator = Hypervolume(ref_point=ref_torch)
|
|
393
|
+
hv = hv_calculator.compute(Y_torch)
|
|
394
|
+
|
|
395
|
+
return float(hv)
|