alchemist-nrel 0.3.0__py3-none-any.whl → 0.3.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- alchemist_core/__init__.py +2 -2
- alchemist_core/acquisition/botorch_acquisition.py +84 -126
- alchemist_core/data/experiment_manager.py +196 -20
- alchemist_core/models/botorch_model.py +292 -63
- alchemist_core/models/sklearn_model.py +175 -15
- alchemist_core/session.py +3532 -76
- alchemist_core/utils/__init__.py +3 -1
- alchemist_core/utils/acquisition_utils.py +60 -0
- alchemist_core/visualization/__init__.py +45 -0
- alchemist_core/visualization/helpers.py +130 -0
- alchemist_core/visualization/plots.py +1449 -0
- alchemist_nrel-0.3.2.dist-info/METADATA +185 -0
- {alchemist_nrel-0.3.0.dist-info → alchemist_nrel-0.3.2.dist-info}/RECORD +34 -29
- {alchemist_nrel-0.3.0.dist-info → alchemist_nrel-0.3.2.dist-info}/WHEEL +1 -1
- {alchemist_nrel-0.3.0.dist-info → alchemist_nrel-0.3.2.dist-info}/entry_points.txt +1 -1
- {alchemist_nrel-0.3.0.dist-info → alchemist_nrel-0.3.2.dist-info}/top_level.txt +0 -1
- api/example_client.py +7 -2
- api/main.py +3 -2
- api/models/requests.py +76 -1
- api/models/responses.py +102 -2
- api/routers/acquisition.py +25 -0
- api/routers/experiments.py +352 -11
- api/routers/sessions.py +195 -11
- api/routers/visualizations.py +6 -4
- api/routers/websocket.py +132 -0
- run_api.py → api/run_api.py +8 -7
- api/services/session_store.py +370 -71
- api/static/assets/index-B6Cf6s_b.css +1 -0
- api/static/assets/{index-C0_glioA.js → index-B7njvc9r.js} +223 -208
- api/static/index.html +2 -2
- ui/gpr_panel.py +11 -5
- ui/target_column_dialog.py +299 -0
- ui/ui.py +52 -5
- alchemist_core/models/ax_model.py +0 -159
- alchemist_nrel-0.3.0.dist-info/METADATA +0 -223
- api/static/assets/index-CB4V1LI5.css +0 -1
- {alchemist_nrel-0.3.0.dist-info → alchemist_nrel-0.3.2.dist-info}/licenses/LICENSE +0 -0
alchemist_core/__init__.py
CHANGED
|
@@ -27,10 +27,10 @@ Example:
|
|
|
27
27
|
>>> # Get next experiment suggestion
|
|
28
28
|
>>> next_point = session.suggest_next(acq_func="ei")
|
|
29
29
|
|
|
30
|
-
Version: 0.3.
|
|
30
|
+
Version: 0.3.2
|
|
31
31
|
"""
|
|
32
32
|
|
|
33
|
-
__version__ = "0.3.
|
|
33
|
+
__version__ = "0.3.2"
|
|
34
34
|
__author__ = "Caleb Coatney"
|
|
35
35
|
__email__ = "caleb.coatney@nrel.gov"
|
|
36
36
|
|
|
@@ -268,6 +268,7 @@ class BoTorchAcquisition(BaseAcquisition):
|
|
|
268
268
|
"batch_limit": batch_limit,
|
|
269
269
|
"maxiter": max_iter,
|
|
270
270
|
"ftol": 1e-3, # More relaxed convergence criteria
|
|
271
|
+
"factr": None, # Required when ftol is specified
|
|
271
272
|
}
|
|
272
273
|
else:
|
|
273
274
|
# Standard parameters for other acquisition functions
|
|
@@ -311,6 +312,10 @@ class BoTorchAcquisition(BaseAcquisition):
|
|
|
311
312
|
options=options,
|
|
312
313
|
)
|
|
313
314
|
|
|
315
|
+
# Log the acquisition value found
|
|
316
|
+
acq_val = batch_acq_values.item() if batch_acq_values.numel() == 1 else batch_acq_values.max().item()
|
|
317
|
+
logger.info(f"Optimization found acquisition value: {acq_val:.4f}")
|
|
318
|
+
|
|
314
319
|
# Get the best candidate(s)
|
|
315
320
|
best_candidates = batch_candidates.detach().cpu()
|
|
316
321
|
|
|
@@ -532,7 +537,14 @@ class BoTorchAcquisition(BaseAcquisition):
|
|
|
532
537
|
return self
|
|
533
538
|
|
|
534
539
|
def find_optimum(self, model=None, maximize=None, random_state=None):
|
|
535
|
-
"""
|
|
540
|
+
"""
|
|
541
|
+
Find the point where the model predicts the optimal value.
|
|
542
|
+
|
|
543
|
+
This uses the same approach as regret plot predictions: generate a grid
|
|
544
|
+
in the original variable space, predict using the model's standard pipeline,
|
|
545
|
+
and find the argmax/argmin. This ensures categorical variables are handled
|
|
546
|
+
correctly through proper encoding/decoding.
|
|
547
|
+
"""
|
|
536
548
|
if model is not None:
|
|
537
549
|
self.model = model
|
|
538
550
|
|
|
@@ -542,135 +554,81 @@ class BoTorchAcquisition(BaseAcquisition):
|
|
|
542
554
|
if random_state is not None:
|
|
543
555
|
self.random_state = random_state
|
|
544
556
|
|
|
545
|
-
#
|
|
546
|
-
|
|
557
|
+
# Generate prediction grid in ORIGINAL variable space (not encoded)
|
|
558
|
+
# This handles categorical variables correctly
|
|
559
|
+
n_grid_points = 10000 # Target number of grid points
|
|
560
|
+
grid = self._generate_prediction_grid(n_grid_points)
|
|
561
|
+
|
|
562
|
+
# Use model's predict method which handles encoding internally
|
|
563
|
+
# This is the same pipeline used by regret plot (correct approach)
|
|
564
|
+
means, stds = self.model.predict(grid, return_std=True)
|
|
565
|
+
|
|
566
|
+
# Find argmax or argmin
|
|
567
|
+
if self.maximize:
|
|
568
|
+
best_idx = np.argmax(means)
|
|
569
|
+
else:
|
|
570
|
+
best_idx = np.argmin(means)
|
|
547
571
|
|
|
548
|
-
#
|
|
549
|
-
|
|
550
|
-
|
|
551
|
-
|
|
552
|
-
|
|
553
|
-
|
|
554
|
-
|
|
572
|
+
# Extract the optimal point (already in original variable space)
|
|
573
|
+
opt_point_df = grid.iloc[[best_idx]].reset_index(drop=True)
|
|
574
|
+
|
|
575
|
+
return {
|
|
576
|
+
'x_opt': opt_point_df,
|
|
577
|
+
'value': float(means[best_idx]),
|
|
578
|
+
'std': float(stds[best_idx])
|
|
579
|
+
}
|
|
555
580
|
|
|
556
|
-
|
|
557
|
-
|
|
581
|
+
def _generate_prediction_grid(self, n_grid_points: int) -> pd.DataFrame:
|
|
582
|
+
"""
|
|
583
|
+
Generate grid of test points across search space for predictions.
|
|
558
584
|
|
|
559
|
-
|
|
560
|
-
|
|
561
|
-
|
|
562
|
-
|
|
563
|
-
|
|
564
|
-
|
|
565
|
-
|
|
566
|
-
|
|
567
|
-
|
|
568
|
-
|
|
569
|
-
|
|
570
|
-
|
|
571
|
-
|
|
572
|
-
|
|
573
|
-
|
|
574
|
-
|
|
575
|
-
|
|
576
|
-
|
|
577
|
-
|
|
578
|
-
|
|
579
|
-
|
|
580
|
-
|
|
581
|
-
|
|
582
|
-
|
|
583
|
-
|
|
584
|
-
|
|
585
|
-
|
|
586
|
-
|
|
587
|
-
|
|
588
|
-
|
|
589
|
-
|
|
590
|
-
best_value = values[best_idx].item()
|
|
591
|
-
|
|
592
|
-
# Convert to numpy
|
|
593
|
-
best_candidate = best_x.cpu().numpy().reshape(1, -1)
|
|
594
|
-
except Exception as e:
|
|
595
|
-
logger.error(f"Error in random search optimization: {e}")
|
|
596
|
-
# Fallback to grid search
|
|
597
|
-
logger.info("Falling back to grid search...")
|
|
598
|
-
|
|
599
|
-
# Create a simple grid search
|
|
600
|
-
n_points = 10 # Points per dimension
|
|
601
|
-
grid_points = []
|
|
602
|
-
|
|
603
|
-
# Create grid for each dimension
|
|
604
|
-
for i, feature_name in enumerate(self.model.feature_names):
|
|
605
|
-
if feature_name in integer_variables:
|
|
606
|
-
# For integer variables, create integer grid
|
|
607
|
-
min_val = int(lower_bounds[i])
|
|
608
|
-
max_val = int(upper_bounds[i])
|
|
609
|
-
if max_val - min_val + 1 <= n_points:
|
|
610
|
-
# If range is small, use all integer values
|
|
611
|
-
grid_points.append(torch.arange(min_val, max_val + 1, dtype=torch.double))
|
|
612
|
-
else:
|
|
613
|
-
# If range is large, sample n_points integers
|
|
614
|
-
step = max(1, (max_val - min_val) // (n_points - 1))
|
|
615
|
-
values = torch.arange(min_val, max_val + 1, step, dtype=torch.double)
|
|
616
|
-
grid_points.append(values[:n_points])
|
|
585
|
+
This creates a grid in the ORIGINAL variable space (with actual category
|
|
586
|
+
names, not encoded values), which is then properly encoded by the model's
|
|
587
|
+
predict() method.
|
|
588
|
+
|
|
589
|
+
Args:
|
|
590
|
+
n_grid_points: Target number of grid points (actual number depends on dimensionality)
|
|
591
|
+
|
|
592
|
+
Returns:
|
|
593
|
+
DataFrame with columns for each variable in original space
|
|
594
|
+
"""
|
|
595
|
+
from itertools import product
|
|
596
|
+
|
|
597
|
+
grid_1d = []
|
|
598
|
+
var_names = []
|
|
599
|
+
|
|
600
|
+
variables = self.search_space_obj.variables
|
|
601
|
+
n_vars = len(variables)
|
|
602
|
+
n_per_dim = max(2, int(n_grid_points ** (1/n_vars)))
|
|
603
|
+
|
|
604
|
+
for var in variables:
|
|
605
|
+
var_names.append(var['name'])
|
|
606
|
+
|
|
607
|
+
if var['type'] == 'real':
|
|
608
|
+
# Continuous: linspace
|
|
609
|
+
grid_1d.append(np.linspace(var['min'], var['max'], n_per_dim))
|
|
610
|
+
elif var['type'] == 'integer':
|
|
611
|
+
# Integer: range of integers
|
|
612
|
+
n_integers = var['max'] - var['min'] + 1
|
|
613
|
+
if n_integers <= n_per_dim:
|
|
614
|
+
# Use all integers if range is small
|
|
615
|
+
grid_1d.append(np.arange(var['min'], var['max'] + 1))
|
|
617
616
|
else:
|
|
618
|
-
#
|
|
619
|
-
|
|
620
|
-
|
|
621
|
-
|
|
622
|
-
|
|
623
|
-
# Create meshgrid
|
|
624
|
-
meshgrid = torch.meshgrid(*grid_points, indexing='ij')
|
|
625
|
-
X_grid = torch.stack([x.reshape(-1) for x in meshgrid], dim=1)
|
|
626
|
-
|
|
627
|
-
# Evaluate model on grid
|
|
628
|
-
self.model.model.eval()
|
|
629
|
-
with torch.no_grad():
|
|
630
|
-
posterior = self.model.model.posterior(X_grid)
|
|
631
|
-
values = posterior.mean.squeeze()
|
|
632
|
-
|
|
633
|
-
# If minimizing, negate values
|
|
634
|
-
if not self.maximize:
|
|
635
|
-
values = -values
|
|
636
|
-
|
|
637
|
-
# Find the best value
|
|
638
|
-
best_idx = torch.argmax(values)
|
|
639
|
-
best_x = X_grid[best_idx]
|
|
640
|
-
best_value = values[best_idx].item()
|
|
641
|
-
|
|
642
|
-
# Convert to numpy
|
|
643
|
-
best_candidate = best_x.cpu().numpy().reshape(1, -1)
|
|
644
|
-
|
|
645
|
-
# Convert to dictionary and then to DataFrame
|
|
646
|
-
feature_names = self.model.original_feature_names
|
|
647
|
-
result = {}
|
|
648
|
-
for i, name in enumerate(feature_names):
|
|
649
|
-
value = best_candidate[0, i]
|
|
650
|
-
|
|
651
|
-
# If this is a categorical variable, convert back to original value
|
|
652
|
-
if name in categorical_variables:
|
|
653
|
-
# Find the original categorical value from the encoding
|
|
654
|
-
encoding = self.model.categorical_encodings.get(name, {})
|
|
655
|
-
inv_encoding = {v: k for k, v in encoding.items()}
|
|
656
|
-
if value in inv_encoding:
|
|
657
|
-
value = inv_encoding[value]
|
|
658
|
-
elif int(value) in inv_encoding:
|
|
659
|
-
value = inv_encoding[int(value)]
|
|
660
|
-
# If this is an integer variable, ensure it's an integer
|
|
661
|
-
elif name in integer_variables:
|
|
662
|
-
value = int(round(value))
|
|
617
|
+
# Sample n_per_dim integers
|
|
618
|
+
grid_1d.append(np.linspace(var['min'], var['max'], n_per_dim).astype(int))
|
|
619
|
+
elif var['type'] == 'categorical':
|
|
620
|
+
# Categorical: use ACTUAL category values (not encoded)
|
|
621
|
+
grid_1d.append(var['values'])
|
|
663
622
|
|
|
664
|
-
|
|
665
|
-
|
|
666
|
-
# Convert to DataFrame
|
|
667
|
-
opt_point_df = pd.DataFrame([result])
|
|
623
|
+
# Generate test points using Cartesian product
|
|
624
|
+
X_test_tuples = list(product(*grid_1d))
|
|
668
625
|
|
|
669
|
-
#
|
|
670
|
-
|
|
626
|
+
# Convert to DataFrame with proper variable names and types
|
|
627
|
+
grid = pd.DataFrame(X_test_tuples, columns=var_names)
|
|
671
628
|
|
|
672
|
-
|
|
673
|
-
|
|
674
|
-
'
|
|
675
|
-
|
|
676
|
-
|
|
629
|
+
# Ensure correct dtypes for categorical variables
|
|
630
|
+
for var in variables:
|
|
631
|
+
if var['type'] == 'categorical':
|
|
632
|
+
grid[var['name']] = grid[var['name']].astype(str)
|
|
633
|
+
|
|
634
|
+
return grid
|
|
@@ -8,12 +8,20 @@ class ExperimentManager:
|
|
|
8
8
|
"""
|
|
9
9
|
Class for storing and managing experimental data in a consistent way across backends.
|
|
10
10
|
Provides methods for data access, saving/loading, and conversion to formats needed by different backends.
|
|
11
|
+
|
|
12
|
+
Supports both single-objective and multi-objective optimization:
|
|
13
|
+
- Single-objective: Uses single target column (default: 'Output', but configurable)
|
|
14
|
+
- Multi-objective: Uses multiple target columns specified in target_columns attribute
|
|
15
|
+
|
|
16
|
+
The target_column parameter allows flexible column naming to support various CSV formats.
|
|
11
17
|
"""
|
|
12
|
-
def __init__(self, search_space=None):
|
|
18
|
+
def __init__(self, search_space=None, target_columns: Optional[List[str]] = None):
|
|
13
19
|
self.df = pd.DataFrame() # Raw experimental data
|
|
14
20
|
self.search_space = search_space # Reference to the search space
|
|
15
21
|
self.filepath = None # Path to saved experiment file
|
|
16
22
|
self._current_iteration = 0 # Track current iteration for audit log
|
|
23
|
+
# Support flexible target column naming for both single and multi-objective
|
|
24
|
+
self.target_columns = target_columns or ['Output'] # Default to 'Output' for backward compatibility
|
|
17
25
|
|
|
18
26
|
def set_search_space(self, search_space):
|
|
19
27
|
"""Set or update the search space reference."""
|
|
@@ -35,9 +43,9 @@ class ExperimentManager:
|
|
|
35
43
|
# Create a copy of the point_dict to avoid modifying the original
|
|
36
44
|
new_point = point_dict.copy()
|
|
37
45
|
|
|
38
|
-
# Add output value if provided
|
|
46
|
+
# Add output value if provided (use first target column for single-objective)
|
|
39
47
|
if output_value is not None:
|
|
40
|
-
new_point[
|
|
48
|
+
new_point[self.target_columns[0]] = output_value
|
|
41
49
|
|
|
42
50
|
# Add noise value if provided
|
|
43
51
|
if noise_value is not None:
|
|
@@ -45,16 +53,23 @@ class ExperimentManager:
|
|
|
45
53
|
|
|
46
54
|
# Add iteration tracking
|
|
47
55
|
if iteration is not None:
|
|
48
|
-
# Use provided iteration
|
|
56
|
+
# Use provided iteration explicitly
|
|
49
57
|
new_point['Iteration'] = int(iteration)
|
|
50
|
-
# Keep _current_iteration in sync with the latest explicit iteration
|
|
51
|
-
try:
|
|
52
|
-
self._current_iteration = int(iteration)
|
|
53
|
-
except Exception:
|
|
54
|
-
pass
|
|
55
58
|
else:
|
|
56
|
-
#
|
|
57
|
-
|
|
59
|
+
# Auto-calculate next iteration based on existing data
|
|
60
|
+
# This ensures proper iteration tracking across all clients
|
|
61
|
+
if len(self.df) > 0 and 'Iteration' in self.df.columns:
|
|
62
|
+
max_iteration = int(self.df['Iteration'].max())
|
|
63
|
+
new_point['Iteration'] = max_iteration + 1
|
|
64
|
+
else:
|
|
65
|
+
# First experiment defaults to iteration 0
|
|
66
|
+
new_point['Iteration'] = 0
|
|
67
|
+
|
|
68
|
+
# Keep _current_iteration in sync with latest iteration for backward compatibility
|
|
69
|
+
try:
|
|
70
|
+
self._current_iteration = int(new_point['Iteration'])
|
|
71
|
+
except Exception:
|
|
72
|
+
pass
|
|
58
73
|
|
|
59
74
|
# Add reason
|
|
60
75
|
new_point['Reason'] = reason if reason is not None else 'Manual'
|
|
@@ -100,12 +115,20 @@ class ExperimentManager:
|
|
|
100
115
|
Returns:
|
|
101
116
|
X: Features DataFrame
|
|
102
117
|
y: Target Series
|
|
118
|
+
|
|
119
|
+
Raises:
|
|
120
|
+
ValueError: If configured target column is not found in data
|
|
103
121
|
"""
|
|
104
|
-
|
|
105
|
-
|
|
122
|
+
target_col = self.target_columns[0] # Use first target column for single-objective
|
|
123
|
+
|
|
124
|
+
if target_col not in self.df.columns:
|
|
125
|
+
raise ValueError(
|
|
126
|
+
f"DataFrame doesn't contain target column '{target_col}'. "
|
|
127
|
+
f"Available columns: {list(self.df.columns)}"
|
|
128
|
+
)
|
|
106
129
|
|
|
107
|
-
# Drop metadata columns (
|
|
108
|
-
metadata_cols =
|
|
130
|
+
# Drop metadata columns (target, Noise, Iteration, Reason)
|
|
131
|
+
metadata_cols = self.target_columns.copy()
|
|
109
132
|
if 'Noise' in self.df.columns:
|
|
110
133
|
metadata_cols.append('Noise')
|
|
111
134
|
if 'Iteration' in self.df.columns:
|
|
@@ -114,7 +137,7 @@ class ExperimentManager:
|
|
|
114
137
|
metadata_cols.append('Reason')
|
|
115
138
|
|
|
116
139
|
X = self.df.drop(columns=metadata_cols)
|
|
117
|
-
y = self.df[
|
|
140
|
+
y = self.df[target_col]
|
|
118
141
|
return X, y
|
|
119
142
|
|
|
120
143
|
def get_features_target_and_noise(self) -> Tuple[pd.DataFrame, pd.Series, Optional[pd.Series]]:
|
|
@@ -125,12 +148,20 @@ class ExperimentManager:
|
|
|
125
148
|
X: Features DataFrame
|
|
126
149
|
y: Target Series
|
|
127
150
|
noise: Noise Series if available, otherwise None
|
|
151
|
+
|
|
152
|
+
Raises:
|
|
153
|
+
ValueError: If configured target column is not found in data
|
|
128
154
|
"""
|
|
129
|
-
|
|
130
|
-
|
|
155
|
+
target_col = self.target_columns[0] # Use first target column for single-objective
|
|
156
|
+
|
|
157
|
+
if target_col not in self.df.columns:
|
|
158
|
+
raise ValueError(
|
|
159
|
+
f"DataFrame doesn't contain target column '{target_col}'. "
|
|
160
|
+
f"Available columns: {list(self.df.columns)}"
|
|
161
|
+
)
|
|
131
162
|
|
|
132
163
|
# Drop metadata columns
|
|
133
|
-
metadata_cols =
|
|
164
|
+
metadata_cols = self.target_columns.copy()
|
|
134
165
|
if 'Noise' in self.df.columns:
|
|
135
166
|
metadata_cols.append('Noise')
|
|
136
167
|
if 'Iteration' in self.df.columns:
|
|
@@ -139,7 +170,7 @@ class ExperimentManager:
|
|
|
139
170
|
metadata_cols.append('Reason')
|
|
140
171
|
|
|
141
172
|
X = self.df.drop(columns=metadata_cols)
|
|
142
|
-
y = self.df[
|
|
173
|
+
y = self.df[target_col]
|
|
143
174
|
noise = self.df['Noise'] if 'Noise' in self.df.columns else None
|
|
144
175
|
return X, y, noise
|
|
145
176
|
|
|
@@ -217,3 +248,148 @@ class ExperimentManager:
|
|
|
217
248
|
|
|
218
249
|
def __len__(self):
|
|
219
250
|
return len(self.df)
|
|
251
|
+
|
|
252
|
+
def get_pareto_frontier(self, directions: Optional[List[str]] = None) -> pd.DataFrame:
|
|
253
|
+
"""
|
|
254
|
+
Compute Pareto-optimal solutions from experiments with multiple objectives.
|
|
255
|
+
|
|
256
|
+
Uses BoTorch's fast non-dominated sorting algorithm to identify Pareto-optimal
|
|
257
|
+
points. Works with both single-objective (returns all data) and multi-objective
|
|
258
|
+
experiments.
|
|
259
|
+
|
|
260
|
+
Args:
|
|
261
|
+
directions: List of 'maximize' or 'minimize' for each target column.
|
|
262
|
+
If None, assumes all objectives are maximized.
|
|
263
|
+
Length must match number of target columns.
|
|
264
|
+
|
|
265
|
+
Returns:
|
|
266
|
+
DataFrame containing only Pareto-optimal experiments with all columns.
|
|
267
|
+
|
|
268
|
+
Raises:
|
|
269
|
+
ValueError: If directions length doesn't match target columns.
|
|
270
|
+
ValueError: If target columns contain missing data.
|
|
271
|
+
|
|
272
|
+
Example:
|
|
273
|
+
>>> # For 2 objectives: maximize yield, minimize cost
|
|
274
|
+
>>> pareto_df = exp_mgr.get_pareto_frontier(['maximize', 'minimize'])
|
|
275
|
+
"""
|
|
276
|
+
import torch
|
|
277
|
+
from botorch.utils.multi_objective.pareto import is_non_dominated
|
|
278
|
+
|
|
279
|
+
if len(self.df) == 0:
|
|
280
|
+
return pd.DataFrame()
|
|
281
|
+
|
|
282
|
+
# Validate target columns exist
|
|
283
|
+
missing_cols = [col for col in self.target_columns if col not in self.df.columns]
|
|
284
|
+
if missing_cols:
|
|
285
|
+
raise ValueError(f"Target columns {missing_cols} not found in experiment data")
|
|
286
|
+
|
|
287
|
+
# Extract objective values
|
|
288
|
+
Y = self.df[self.target_columns].values
|
|
289
|
+
|
|
290
|
+
# Check for missing values
|
|
291
|
+
if pd.isna(Y).any():
|
|
292
|
+
raise ValueError("Target columns contain missing values (NaN). Cannot compute Pareto frontier.")
|
|
293
|
+
|
|
294
|
+
# Single objective case: return all data
|
|
295
|
+
if len(self.target_columns) == 1:
|
|
296
|
+
return self.df.copy()
|
|
297
|
+
|
|
298
|
+
# Set default directions if not provided
|
|
299
|
+
if directions is None:
|
|
300
|
+
directions = ['maximize'] * len(self.target_columns)
|
|
301
|
+
|
|
302
|
+
# Validate directions
|
|
303
|
+
if len(directions) != len(self.target_columns):
|
|
304
|
+
raise ValueError(
|
|
305
|
+
f"Number of directions ({len(directions)}) must match number of "
|
|
306
|
+
f"target columns ({len(self.target_columns)})"
|
|
307
|
+
)
|
|
308
|
+
|
|
309
|
+
# Convert objectives to maximization form (BoTorch assumes maximization)
|
|
310
|
+
Y_torch = torch.tensor(Y, dtype=torch.double)
|
|
311
|
+
for i, direction in enumerate(directions):
|
|
312
|
+
if direction.lower() == 'minimize':
|
|
313
|
+
Y_torch[:, i] = -Y_torch[:, i]
|
|
314
|
+
|
|
315
|
+
# Compute non-dominated mask
|
|
316
|
+
nd_mask = is_non_dominated(Y_torch, maximize=True, deduplicate=True)
|
|
317
|
+
|
|
318
|
+
# Return Pareto-optimal experiments
|
|
319
|
+
return self.df[nd_mask.numpy()].copy()
|
|
320
|
+
|
|
321
|
+
def compute_hypervolume(self, ref_point: Union[List[float], np.ndarray],
|
|
322
|
+
directions: Optional[List[str]] = None) -> float:
|
|
323
|
+
"""
|
|
324
|
+
Compute hypervolume indicator for multi-objective experiments.
|
|
325
|
+
|
|
326
|
+
The hypervolume measures the volume of objective space dominated by the
|
|
327
|
+
Pareto frontier relative to a reference point. Larger values indicate
|
|
328
|
+
better overall performance.
|
|
329
|
+
|
|
330
|
+
Args:
|
|
331
|
+
ref_point: Reference point (worst acceptable values) for each objective.
|
|
332
|
+
Must have same length as target_columns.
|
|
333
|
+
For maximization: should be below minimum observed values.
|
|
334
|
+
For minimization: should be above maximum observed values.
|
|
335
|
+
directions: List of 'maximize' or 'minimize' for each target column.
|
|
336
|
+
If None, assumes all objectives are maximized.
|
|
337
|
+
|
|
338
|
+
Returns:
|
|
339
|
+
Hypervolume value (float). Zero if no Pareto-optimal points exist.
|
|
340
|
+
|
|
341
|
+
Raises:
|
|
342
|
+
ValueError: If ref_point length doesn't match target columns.
|
|
343
|
+
ValueError: If target columns contain missing data.
|
|
344
|
+
|
|
345
|
+
Example:
|
|
346
|
+
>>> # For 2 objectives (maximize yield, minimize cost)
|
|
347
|
+
>>> # ref_point = [min_acceptable_yield, max_acceptable_cost]
|
|
348
|
+
>>> hv = exp_mgr.compute_hypervolume([50.0, 100.0], ['maximize', 'minimize'])
|
|
349
|
+
"""
|
|
350
|
+
import torch
|
|
351
|
+
from botorch.utils.multi_objective.hypervolume import Hypervolume
|
|
352
|
+
|
|
353
|
+
if len(self.df) == 0:
|
|
354
|
+
return 0.0
|
|
355
|
+
|
|
356
|
+
# Single objective case: not meaningful
|
|
357
|
+
if len(self.target_columns) == 1:
|
|
358
|
+
raise ValueError(
|
|
359
|
+
"Hypervolume is only defined for multi-objective problems. "
|
|
360
|
+
"For single-objective, use best observed value instead."
|
|
361
|
+
)
|
|
362
|
+
|
|
363
|
+
# Validate ref_point
|
|
364
|
+
ref_point = np.array(ref_point)
|
|
365
|
+
if len(ref_point) != len(self.target_columns):
|
|
366
|
+
raise ValueError(
|
|
367
|
+
f"Reference point length ({len(ref_point)}) must match number of "
|
|
368
|
+
f"target columns ({len(self.target_columns)})"
|
|
369
|
+
)
|
|
370
|
+
|
|
371
|
+
# Get Pareto frontier
|
|
372
|
+
pareto_df = self.get_pareto_frontier(directions)
|
|
373
|
+
if len(pareto_df) == 0:
|
|
374
|
+
return 0.0
|
|
375
|
+
|
|
376
|
+
# Set default directions if not provided
|
|
377
|
+
if directions is None:
|
|
378
|
+
directions = ['maximize'] * len(self.target_columns)
|
|
379
|
+
|
|
380
|
+
# Extract Pareto objectives and convert to torch tensors
|
|
381
|
+
Y_pareto = pareto_df[self.target_columns].values
|
|
382
|
+
Y_torch = torch.tensor(Y_pareto, dtype=torch.double)
|
|
383
|
+
ref_torch = torch.tensor(ref_point, dtype=torch.double)
|
|
384
|
+
|
|
385
|
+
# Convert to maximization form (BoTorch assumes maximization)
|
|
386
|
+
for i, direction in enumerate(directions):
|
|
387
|
+
if direction.lower() == 'minimize':
|
|
388
|
+
Y_torch[:, i] = -Y_torch[:, i]
|
|
389
|
+
ref_torch[i] = -ref_torch[i]
|
|
390
|
+
|
|
391
|
+
# Compute hypervolume
|
|
392
|
+
hv_calculator = Hypervolume(ref_point=ref_torch)
|
|
393
|
+
hv = hv_calculator.compute(Y_torch)
|
|
394
|
+
|
|
395
|
+
return float(hv)
|