alchemist-nrel 0.2.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (54) hide show
  1. alchemist_core/__init__.py +63 -0
  2. alchemist_core/acquisition/__init__.py +1 -0
  3. alchemist_core/acquisition/base_acquisition.py +17 -0
  4. alchemist_core/acquisition/botorch_acquisition.py +668 -0
  5. alchemist_core/acquisition/skopt_acquisition.py +330 -0
  6. alchemist_core/config.py +113 -0
  7. alchemist_core/data/__init__.py +10 -0
  8. alchemist_core/data/experiment_manager.py +155 -0
  9. alchemist_core/data/search_space.py +169 -0
  10. alchemist_core/events.py +211 -0
  11. alchemist_core/models/__init__.py +0 -0
  12. alchemist_core/models/ax_model.py +159 -0
  13. alchemist_core/models/base_model.py +81 -0
  14. alchemist_core/models/botorch_model.py +922 -0
  15. alchemist_core/models/sklearn_model.py +717 -0
  16. alchemist_core/session.py +603 -0
  17. alchemist_core/utils/__init__.py +7 -0
  18. alchemist_nrel-0.2.1.dist-info/METADATA +206 -0
  19. alchemist_nrel-0.2.1.dist-info/RECORD +54 -0
  20. alchemist_nrel-0.2.1.dist-info/WHEEL +5 -0
  21. alchemist_nrel-0.2.1.dist-info/entry_points.txt +2 -0
  22. alchemist_nrel-0.2.1.dist-info/licenses/LICENSE +13 -0
  23. alchemist_nrel-0.2.1.dist-info/top_level.txt +4 -0
  24. api/__init__.py +3 -0
  25. api/dependencies.py +43 -0
  26. api/example_client.py +192 -0
  27. api/main.py +117 -0
  28. api/middleware/__init__.py +17 -0
  29. api/middleware/error_handlers.py +127 -0
  30. api/models/__init__.py +52 -0
  31. api/models/requests.py +202 -0
  32. api/models/responses.py +292 -0
  33. api/routers/__init__.py +5 -0
  34. api/routers/acquisition.py +164 -0
  35. api/routers/experiments.py +154 -0
  36. api/routers/models.py +116 -0
  37. api/routers/sessions.py +146 -0
  38. api/routers/variables.py +317 -0
  39. api/routers/visualizations.py +557 -0
  40. api/services/__init__.py +5 -0
  41. api/services/session_store.py +291 -0
  42. main.py +8 -0
  43. ui/__init__.py +34 -0
  44. ui/acquisition_panel.py +878 -0
  45. ui/custom_widgets.py +105 -0
  46. ui/experiment_logger.py +205 -0
  47. ui/gpr_panel.py +613 -0
  48. ui/notifications.py +654 -0
  49. ui/pool_viz.py +240 -0
  50. ui/ui.py +1192 -0
  51. ui/ui_utils.py +25 -0
  52. ui/utils.py +33 -0
  53. ui/variables_setup.py +496 -0
  54. ui/visualizations.py +1424 -0
@@ -0,0 +1,668 @@
1
+ from typing import Dict, List, Optional, Union, Tuple, Any
2
+ from alchemist_core.config import get_logger
3
+ import numpy as np
4
+ import pandas as pd
5
+ import torch
6
+ from botorch.acquisition.analytic import (
7
+ ExpectedImprovement,
8
+ LogExpectedImprovement,
9
+ ProbabilityOfImprovement,
10
+ LogProbabilityOfImprovement,
11
+ UpperConfidenceBound,
12
+ )
13
+ from botorch.acquisition.monte_carlo import (
14
+ qExpectedImprovement,
15
+ qUpperConfidenceBound,
16
+ )
17
+ # Import active learning module separately - fix for the import error
18
+ from botorch.acquisition.active_learning import qNegIntegratedPosteriorVariance
19
+ from botorch.sampling import SobolQMCNormalSampler
20
+ from botorch.optim import optimize_acqf, optimize_acqf_mixed
21
+ from .base_acquisition import BaseAcquisition
22
+
23
+ logger = get_logger(__name__)
24
+ from alchemist_core.data.search_space import SearchSpace
25
+ from alchemist_core.models.botorch_model import BoTorchModel
26
+
27
+ class BoTorchAcquisition(BaseAcquisition):
28
+ """
29
+ Acquisition function implementation using BoTorch.
30
+
31
+ Supported acquisition functions:
32
+ - 'ei': Expected Improvement
33
+ - 'logei': Log Expected Improvement (numerically stable)
34
+ - 'pi': Probability of Improvement
35
+ - 'logpi': Log Probability of Improvement (numerically stable)
36
+ - 'ucb': Upper Confidence Bound
37
+ - 'qei': Batch Expected Improvement (for q>1)
38
+ - 'qucb': Batch Upper Confidence Bound (for q>1)
39
+ - 'qipv' or 'qnipv': q-Negative Integrated Posterior Variance (exploratory)
40
+ """
41
+
42
+ # Valid acquisition function names
43
+ VALID_ACQ_FUNCS = {
44
+ 'ei', 'logei', 'pi', 'logpi', 'ucb',
45
+ 'qei', 'qucb', 'qipv', 'qnipv',
46
+ 'expectedimprovement', 'probabilityofimprovement', 'upperconfidencebound'
47
+ }
48
+
49
+ def __init__(
50
+ self,
51
+ search_space,
52
+ model=None,
53
+ acq_func='ucb',
54
+ maximize=True,
55
+ random_state=42,
56
+ acq_func_kwargs=None,
57
+ batch_size=1
58
+ ):
59
+ """
60
+ Initialize the BoTorch acquisition function.
61
+
62
+ Args:
63
+ search_space: The search space (SearchSpace object)
64
+ model: A trained model (BoTorchModel)
65
+ acq_func: Acquisition function type (see class docstring for options)
66
+ maximize: Whether to maximize (True) or minimize (False) the objective
67
+ random_state: Random state for reproducibility
68
+ acq_func_kwargs: Dictionary of additional arguments for the acquisition function
69
+ batch_size: Number of points to select at once (q)
70
+
71
+ Raises:
72
+ ValueError: If acq_func is not a valid acquisition function name
73
+ """
74
+ # Validate acquisition function before proceeding
75
+ acq_func_lower = acq_func.lower()
76
+ if acq_func_lower not in self.VALID_ACQ_FUNCS:
77
+ valid_funcs = "', '".join(sorted(['ei', 'logei', 'pi', 'logpi', 'ucb', 'qei', 'qucb', 'qipv']))
78
+ raise ValueError(
79
+ f"Invalid acquisition function '{acq_func}' for BoTorch backend. "
80
+ f"Valid options are: '{valid_funcs}'"
81
+ )
82
+
83
+ self.search_space_obj = search_space
84
+ self.maximize = maximize
85
+ self.random_state = random_state
86
+ self.acq_func_name = acq_func_lower
87
+ self.batch_size = batch_size
88
+
89
+ # Process acquisition function kwargs
90
+ self.acq_func_kwargs = acq_func_kwargs or {}
91
+
92
+ # Set default values if not provided
93
+ if self.acq_func_name == 'ucb' and 'beta' not in self.acq_func_kwargs:
94
+ self.acq_func_kwargs['beta'] = 0.5 # Default UCB exploration parameter
95
+
96
+ if self.acq_func_name == 'qucb' and 'beta' not in self.acq_func_kwargs:
97
+ self.acq_func_kwargs['beta'] = 0.5 # Default qUCB exploration parameter
98
+
99
+ if self.acq_func_name in ['qei', 'qucb', 'qipv'] and 'mc_samples' not in self.acq_func_kwargs:
100
+ self.acq_func_kwargs['mc_samples'] = 128 # Default MC samples for batch methods
101
+
102
+ # Create the acquisition function if model is provided
103
+ self.acq_function = None
104
+ self.model = None
105
+ if model is not None and isinstance(model, BoTorchModel):
106
+ self.update_model(model)
107
+
108
+ def update_model(self, model):
109
+ """Update the underlying model."""
110
+ if not isinstance(model, BoTorchModel):
111
+ raise ValueError("Model must be a BoTorchModel instance")
112
+
113
+ self.model = model
114
+
115
+ # Create the acquisition function based on the specified type
116
+ self._create_acquisition_function()
117
+
118
+ def _create_acquisition_function(self):
119
+ """Create the appropriate BoTorch acquisition function."""
120
+ if self.model is None or not hasattr(self.model, 'model') or not self.model.is_trained:
121
+ return
122
+
123
+ # Set torch seed for reproducibility
124
+ torch.manual_seed(self.random_state)
125
+
126
+ # Get best observed value from the model
127
+ # Important: Use original scale values, not transformed values, because
128
+ # the acquisition function optimization works in original space
129
+ if hasattr(self.model, 'model') and hasattr(self.model.model, 'train_targets'):
130
+ # Check if we have access to original scale targets
131
+ if hasattr(self.model, 'Y_orig') and self.model.Y_orig is not None:
132
+ # Use original scale targets for best_f calculation
133
+ train_Y_orig = self.model.Y_orig.cpu().numpy() if torch.is_tensor(self.model.Y_orig) else self.model.Y_orig
134
+ best_f = float(np.max(train_Y_orig) if self.maximize else np.min(train_Y_orig))
135
+ else:
136
+ # Fallback: use train_targets (may be in transformed space)
137
+ train_Y = self.model.model.train_targets.cpu().numpy()
138
+ best_f = float(np.max(train_Y) if self.maximize else np.min(train_Y))
139
+ best_f = torch.tensor(best_f, dtype=torch.double)
140
+ else:
141
+ best_f = torch.tensor(0.0, dtype=torch.double)
142
+
143
+ # Create the appropriate acquisition function based on type
144
+ if self.acq_func_name == 'ei':
145
+ # Standard Expected Improvement
146
+ self.acq_function = ExpectedImprovement(
147
+ model=self.model.model,
148
+ best_f=best_f,
149
+ maximize=self.maximize
150
+ )
151
+ elif self.acq_func_name == 'logei':
152
+ # Log Expected Improvement (numerically more stable)
153
+ self.acq_function = LogExpectedImprovement(
154
+ model=self.model.model,
155
+ best_f=best_f,
156
+ maximize=self.maximize
157
+ )
158
+ elif self.acq_func_name == 'pi':
159
+ # Probability of Improvement
160
+ self.acq_function = ProbabilityOfImprovement(
161
+ model=self.model.model,
162
+ best_f=best_f,
163
+ maximize=self.maximize
164
+ )
165
+ elif self.acq_func_name == 'logpi':
166
+ # Log Probability of Improvement
167
+ self.acq_function = LogProbabilityOfImprovement(
168
+ model=self.model.model,
169
+ best_f=best_f,
170
+ maximize=self.maximize
171
+ )
172
+ elif self.acq_func_name == 'ucb':
173
+ # Upper Confidence Bound
174
+ beta = self.acq_func_kwargs.get('beta', 0.5)
175
+ self.acq_function = UpperConfidenceBound(
176
+ model=self.model.model,
177
+ beta=beta,
178
+ maximize=self.maximize
179
+ )
180
+ elif self.acq_func_name == 'qei':
181
+ # Batch Expected Improvement
182
+ mc_samples = self.acq_func_kwargs.get('mc_samples', 128)
183
+ sampler = SobolQMCNormalSampler(sample_shape=torch.Size([mc_samples]), seed=self.random_state)
184
+
185
+ # Remove the maximize parameter - qEI always maximizes
186
+ # For minimization, we should negate the objectives when training the model
187
+ self.acq_function = qExpectedImprovement(
188
+ model=self.model.model,
189
+ best_f=best_f,
190
+ sampler=sampler
191
+ )
192
+ elif self.acq_func_name == 'qucb':
193
+ # Batch Upper Confidence Bound
194
+ beta = self.acq_func_kwargs.get('beta', 0.5)
195
+ mc_samples = self.acq_func_kwargs.get('mc_samples', 128)
196
+ sampler = SobolQMCNormalSampler(sample_shape=torch.Size([mc_samples]), seed=self.random_state)
197
+ self.acq_function = qUpperConfidenceBound(
198
+ model=self.model.model,
199
+ beta=beta,
200
+ sampler=sampler,
201
+ # Remove maximize parameter here too
202
+ )
203
+ elif self.acq_func_name in ['qipv', 'qnipv']:
204
+ # q-Negative Integrated Posterior Variance (exploratory)
205
+ # Generate MC points for integration over the search space
206
+ bounds_tensor = self._get_bounds_from_search_space()
207
+ n_mc_points = self.acq_func_kwargs.get('n_mc_points', 500) # Reduced default
208
+
209
+ # Generate MC points from uniform distribution within bounds
210
+ lower_bounds, upper_bounds = bounds_tensor[0], bounds_tensor[1]
211
+ mc_points = torch.rand(n_mc_points, len(lower_bounds), dtype=torch.double)
212
+ mc_points = mc_points * (upper_bounds - lower_bounds) + lower_bounds
213
+
214
+ # Create Integrated Posterior Variance acquisition function
215
+ self.acq_function = qNegIntegratedPosteriorVariance(
216
+ model=self.model.model,
217
+ mc_points=mc_points,
218
+ )
219
+ else:
220
+ # This should never happen due to validation in __init__, but just in case
221
+ raise ValueError(f"Unsupported acquisition function: {self.acq_func_name}")
222
+
223
+ def select_next(self, candidate_points=None):
224
+ """
225
+ Suggest the next experiment point(s) using BoTorch optimization.
226
+
227
+ Args:
228
+ candidate_points: Candidate points to evaluate (optional)
229
+
230
+ Returns:
231
+ Dictionary with the selected point or list of points
232
+ """
233
+ # Ensure we have an acquisition function
234
+ if self.acq_function is None:
235
+ self._create_acquisition_function()
236
+ if self.acq_function is None:
237
+ raise ValueError("Could not create acquisition function - model not properly set")
238
+
239
+ # Get bounds from the search space
240
+ bounds_tensor = self._get_bounds_from_search_space()
241
+
242
+ # Identify categorical and integer variables
243
+ categorical_variables = []
244
+ integer_variables = []
245
+ if hasattr(self.search_space_obj, 'get_categorical_variables'):
246
+ categorical_variables = self.search_space_obj.get_categorical_variables()
247
+ if hasattr(self.search_space_obj, 'get_integer_variables'):
248
+ integer_variables = self.search_space_obj.get_integer_variables()
249
+
250
+ # Set torch seed for reproducibility
251
+ torch.manual_seed(self.random_state)
252
+
253
+ # If no candidates provided, optimize the acquisition function
254
+ if candidate_points is None:
255
+ # Check if we need batch optimization or single-point optimization
256
+ q = self.batch_size
257
+
258
+ # For batch acquisition functions, we need a different optimization approach
259
+ is_batch_acq = self.acq_func_name.startswith('q')
260
+
261
+ # Adjust optimization parameters for qIPV to improve stability and performance
262
+ if self.acq_func_name == 'qipv':
263
+ num_restarts = 20 # More restarts for qIPV
264
+ raw_samples = 100 # Fewer samples per restart
265
+ max_iter = 150 # Fewer iterations
266
+ batch_limit = 5 # Standard batch limit
267
+ options = {
268
+ "batch_limit": batch_limit,
269
+ "maxiter": max_iter,
270
+ "ftol": 1e-3, # More relaxed convergence criteria
271
+ }
272
+ else:
273
+ # Standard parameters for other acquisition functions
274
+ num_restarts = 20 if is_batch_acq else 10
275
+ raw_samples = 500 if is_batch_acq else 200
276
+ max_iter = 300
277
+ batch_limit = 5
278
+ options = {"batch_limit": batch_limit, "maxiter": max_iter}
279
+
280
+ # Check if we have categorical variables
281
+ if categorical_variables and len(categorical_variables) > 0:
282
+ # Get categorical dimensions and their possible values
283
+ fixed_features_list = []
284
+
285
+ # Map variable names to indices
286
+ var_to_idx = {name: i for i, name in enumerate(self.model.feature_names)}
287
+
288
+ # Identify which dimensions are categorical
289
+ for var_name in categorical_variables:
290
+ if var_name in var_to_idx:
291
+ cat_idx = var_to_idx[var_name]
292
+
293
+ # Get possible values for this categorical variable
294
+ if var_name in self.model.categorical_encodings:
295
+ cat_values = list(self.model.categorical_encodings[var_name].values())
296
+
297
+ # Create a fixed_features entry for each possible value
298
+ for val in cat_values:
299
+ fixed_features = {cat_idx: val}
300
+ fixed_features_list.append(fixed_features)
301
+
302
+ try:
303
+ # Use mixed optimization for categorical variables
304
+ batch_candidates, batch_acq_values = optimize_acqf_mixed(
305
+ acq_function=self.acq_function,
306
+ bounds=bounds_tensor,
307
+ q=q,
308
+ num_restarts=num_restarts,
309
+ raw_samples=raw_samples,
310
+ fixed_features_list=fixed_features_list,
311
+ options=options,
312
+ )
313
+
314
+ # Get the best candidate(s)
315
+ best_candidates = batch_candidates.detach().cpu()
316
+
317
+ # Apply integer constraints if needed
318
+ if integer_variables:
319
+ var_to_idx = {name: i for i, name in enumerate(self.model.feature_names)}
320
+ for var_name in integer_variables:
321
+ if var_name in var_to_idx:
322
+ idx = var_to_idx[var_name]
323
+ best_candidates[:, idx] = torch.round(best_candidates[:, idx])
324
+
325
+ best_candidates = best_candidates.numpy()
326
+ except Exception as e:
327
+ logger.error(f"Error in optimize_acqf_mixed: {e}")
328
+ # Fallback to standard optimization
329
+ batch_candidates, batch_acq_values = optimize_acqf(
330
+ acq_function=self.acq_function,
331
+ bounds=bounds_tensor,
332
+ q=q,
333
+ num_restarts=num_restarts // 2, # Reduce for fallback
334
+ raw_samples=raw_samples // 2, # Reduce for fallback
335
+ options=options,
336
+ )
337
+ best_candidates = batch_candidates.detach().cpu()
338
+
339
+ # Apply integer constraints if needed
340
+ if integer_variables:
341
+ var_to_idx = {name: i for i, name in enumerate(self.model.feature_names)}
342
+ for var_name in integer_variables:
343
+ if var_name in var_to_idx:
344
+ idx = var_to_idx[var_name]
345
+ best_candidates[:, idx] = torch.round(best_candidates[:, idx])
346
+
347
+ best_candidates = best_candidates.numpy()
348
+ else:
349
+ # For purely continuous variables
350
+ batch_candidates, batch_acq_values = optimize_acqf(
351
+ acq_function=self.acq_function,
352
+ bounds=bounds_tensor,
353
+ q=q,
354
+ num_restarts=num_restarts,
355
+ raw_samples=raw_samples,
356
+ options=options,
357
+ )
358
+
359
+ best_candidates = batch_candidates.detach().cpu()
360
+
361
+ # Apply integer constraints if needed
362
+ if integer_variables:
363
+ var_to_idx = {name: i for i, name in enumerate(self.model.feature_names)}
364
+ for var_name in integer_variables:
365
+ if var_name in var_to_idx:
366
+ idx = var_to_idx[var_name]
367
+ best_candidates[:, idx] = torch.round(best_candidates[:, idx])
368
+
369
+ best_candidates = best_candidates.numpy()
370
+ else:
371
+ # If candidates are provided, evaluate them directly
372
+ if isinstance(candidate_points, np.ndarray):
373
+ candidate_tensor = torch.tensor(candidate_points, dtype=torch.double)
374
+ elif isinstance(candidate_points, pd.DataFrame):
375
+ # Encode categorical variables
376
+ candidates_encoded = self.model._encode_categorical_data(candidate_points)
377
+ candidate_tensor = torch.tensor(candidates_encoded.values, dtype=torch.double)
378
+ else:
379
+ candidate_tensor = candidate_points # Assume it's already a tensor
380
+
381
+ # Evaluate acquisition function at candidate points
382
+ with torch.no_grad():
383
+ acq_values = self.acq_function(candidate_tensor.unsqueeze(1))
384
+
385
+ # Find the best candidate
386
+ best_idx = torch.argmax(acq_values)
387
+ best_candidate = candidate_points.iloc[best_idx] if isinstance(candidate_points, pd.DataFrame) else candidate_points[best_idx]
388
+
389
+ return best_candidate
390
+
391
+ # If we're returning batch results (q > 1)
392
+ if self.batch_size > 1:
393
+ result_points = []
394
+
395
+ # Convert each point in the batch to a dictionary with feature names
396
+ for i in range(best_candidates.shape[0]):
397
+ point_dict = {}
398
+ for j, name in enumerate(self.model.feature_names):
399
+ value = best_candidates[i, j]
400
+
401
+ # If this is a categorical variable, convert back to original value
402
+ if name in categorical_variables:
403
+ # Find the original categorical value from the encoding
404
+ encoding = self.model.categorical_encodings.get(name, {})
405
+ inv_encoding = {v: k for k, v in encoding.items()}
406
+ if value in inv_encoding:
407
+ value = inv_encoding[value]
408
+ elif int(value) in inv_encoding:
409
+ value = inv_encoding[int(value)]
410
+ # If this is an integer variable, ensure it's an integer
411
+ elif name in integer_variables:
412
+ value = int(round(float(value)))
413
+
414
+ point_dict[name] = value
415
+
416
+ result_points.append(point_dict)
417
+
418
+ return result_points
419
+
420
+ # For single-point results (q = 1)
421
+ result = {}
422
+ for i, name in enumerate(self.model.feature_names):
423
+ value = best_candidates[0, i]
424
+
425
+ # If this is a categorical variable, convert back to original value
426
+ if name in categorical_variables:
427
+ # Find the original categorical value from the encoding
428
+ encoding = self.model.categorical_encodings.get(name, {})
429
+ inv_encoding = {v: k for k, v in encoding.items()}
430
+ if value in inv_encoding:
431
+ value = inv_encoding[value]
432
+ elif int(value) in inv_encoding:
433
+ value = inv_encoding[int(value)]
434
+ # If this is an integer variable, ensure it's an integer
435
+ elif name in integer_variables:
436
+ value = int(round(float(value)))
437
+
438
+ result[name] = value
439
+
440
+ return result
441
+
442
+ def _get_bounds_from_search_space(self):
443
+ """Extract bounds from the search space."""
444
+ # First try to use the to_botorch_bounds method if available
445
+ if hasattr(self.search_space_obj, 'to_botorch_bounds'):
446
+ bounds_tensor = self.search_space_obj.to_botorch_bounds()
447
+ if isinstance(bounds_tensor, torch.Tensor) and bounds_tensor.dim() == 2 and bounds_tensor.shape[0] == 2:
448
+ return bounds_tensor
449
+
450
+ # Get feature names from model to ensure proper order
451
+ if not hasattr(self.model, 'feature_names'):
452
+ raise ValueError("Model doesn't have feature_names attribute")
453
+
454
+ feature_names = self.model.feature_names
455
+
456
+ # Get categorical variables
457
+ categorical_variables = []
458
+ if hasattr(self.search_space_obj, 'get_categorical_variables'):
459
+ categorical_variables = self.search_space_obj.get_categorical_variables()
460
+
461
+ # Extract bounds for each feature
462
+ lower_bounds = []
463
+ upper_bounds = []
464
+
465
+ if hasattr(self.search_space_obj, 'variables'):
466
+ # Create a map for quick lookup
467
+ var_dict = {var['name']: var for var in self.search_space_obj.variables}
468
+
469
+ for name in feature_names:
470
+ if name in var_dict:
471
+ var = var_dict[name]
472
+ if var.get('type') == 'categorical':
473
+ # For categorical variables, use the appropriate encoding range
474
+ if hasattr(self.model, 'categorical_encodings') and name in self.model.categorical_encodings:
475
+ encodings = self.model.categorical_encodings[name]
476
+ lower_bounds.append(0.0)
477
+ upper_bounds.append(float(max(encodings.values())))
478
+ else:
479
+ # Default fallback for categorical variables
480
+ lower_bounds.append(0.0)
481
+ upper_bounds.append(1.0)
482
+ elif 'min' in var and 'max' in var:
483
+ lower_bounds.append(float(var['min']))
484
+ upper_bounds.append(float(var['max']))
485
+ elif 'bounds' in var:
486
+ lower_bounds.append(float(var['bounds'][0]))
487
+ upper_bounds.append(float(var['bounds'][1]))
488
+ else:
489
+ # Default fallback if variable not found
490
+ lower_bounds.append(0.0)
491
+ upper_bounds.append(1.0)
492
+
493
+ # Validate bounds
494
+ if not lower_bounds or not upper_bounds:
495
+ raise ValueError("Could not extract bounds from search space")
496
+
497
+ if len(lower_bounds) != len(upper_bounds):
498
+ raise ValueError(f"Inconsistent bounds: got {len(lower_bounds)} lower bounds and {len(upper_bounds)} upper bounds")
499
+
500
+ if len(lower_bounds) != len(feature_names):
501
+ raise ValueError(f"Dimension mismatch: got {len(lower_bounds)} bounds but model expects {len(feature_names)} features")
502
+
503
+ return torch.tensor([lower_bounds, upper_bounds], dtype=torch.double)
504
+
505
+ def update(self, X=None, y=None):
506
+ """
507
+ Update the acquisition function with new observations.
508
+
509
+ Args:
510
+ X: Features of new observations
511
+ y: Target values of new observations
512
+ """
513
+ # For BoTorch, we typically don't need to explicitly update the acquisition
514
+ # since we create a new one each time with update_model().
515
+ #
516
+ # However, we need to implement this method to satisfy the BaseAcquisition interface.
517
+
518
+ if X is not None and y is not None and hasattr(self.model, 'update'):
519
+ # If the model has an update method, use it
520
+ self.model.update(X, y)
521
+
522
+ # Recreate the acquisition function with the updated model
523
+ self._create_acquisition_function()
524
+
525
+ return self
526
+
527
+ def find_optimum(self, model=None, maximize=None, random_state=None):
528
+ """Find the point where the model predicts the optimal value."""
529
+ if model is not None:
530
+ self.model = model
531
+
532
+ if maximize is not None:
533
+ self.maximize = maximize
534
+
535
+ if random_state is not None:
536
+ self.random_state = random_state
537
+
538
+ # Get bounds from the search space
539
+ bounds_tensor = self._get_bounds_from_search_space()
540
+
541
+ # Identify categorical and integer variables
542
+ categorical_variables = []
543
+ integer_variables = []
544
+ if hasattr(self.search_space_obj, 'get_categorical_variables'):
545
+ categorical_variables = self.search_space_obj.get_categorical_variables()
546
+ if hasattr(self.search_space_obj, 'get_integer_variables'):
547
+ integer_variables = self.search_space_obj.get_integer_variables()
548
+
549
+ # Prepare for optimization
550
+ torch.manual_seed(self.random_state)
551
+
552
+ try:
553
+ # Use a simpler randomized search approach instead of optimize_acqf
554
+ # This avoids the dimension issues in the more complex optimization
555
+ n_samples = 20000 # Large number of random samples
556
+ best_value = float('-inf') if self.maximize else float('inf')
557
+ best_x = None
558
+
559
+ # Generate random samples within bounds
560
+ lower_bounds, upper_bounds = bounds_tensor[0], bounds_tensor[1]
561
+ X_samples = torch.rand(n_samples, len(lower_bounds), dtype=torch.double)
562
+ X_samples = X_samples * (upper_bounds - lower_bounds) + lower_bounds
563
+
564
+ # Round integer variables to nearest integer
565
+ if integer_variables:
566
+ for i, feature_name in enumerate(self.model.feature_names):
567
+ if feature_name in integer_variables:
568
+ X_samples[:, i] = torch.round(X_samples[:, i])
569
+
570
+ # Evaluate model at all samples
571
+ self.model.model.eval()
572
+ with torch.no_grad():
573
+ posterior = self.model.model.posterior(X_samples)
574
+ values = posterior.mean.squeeze()
575
+
576
+ # If minimizing, negate values for finding maximum
577
+ if not self.maximize:
578
+ values = -values
579
+
580
+ # Find the best value
581
+ best_idx = torch.argmax(values)
582
+ best_x = X_samples[best_idx]
583
+ best_value = values[best_idx].item()
584
+
585
+ # Convert to numpy
586
+ best_candidate = best_x.cpu().numpy().reshape(1, -1)
587
+ except Exception as e:
588
+ logger.error(f"Error in random search optimization: {e}")
589
+ # Fallback to grid search
590
+ logger.info("Falling back to grid search...")
591
+
592
+ # Create a simple grid search
593
+ n_points = 10 # Points per dimension
594
+ grid_points = []
595
+
596
+ # Create grid for each dimension
597
+ for i, feature_name in enumerate(self.model.feature_names):
598
+ if feature_name in integer_variables:
599
+ # For integer variables, create integer grid
600
+ min_val = int(lower_bounds[i])
601
+ max_val = int(upper_bounds[i])
602
+ if max_val - min_val + 1 <= n_points:
603
+ # If range is small, use all integer values
604
+ grid_points.append(torch.arange(min_val, max_val + 1, dtype=torch.double))
605
+ else:
606
+ # If range is large, sample n_points integers
607
+ step = max(1, (max_val - min_val) // (n_points - 1))
608
+ values = torch.arange(min_val, max_val + 1, step, dtype=torch.double)
609
+ grid_points.append(values[:n_points])
610
+ else:
611
+ # For continuous variables, use linspace
612
+ grid_points.append(torch.linspace(
613
+ lower_bounds[i], upper_bounds[i], n_points, dtype=torch.double
614
+ ))
615
+
616
+ # Create meshgrid
617
+ meshgrid = torch.meshgrid(*grid_points, indexing='ij')
618
+ X_grid = torch.stack([x.reshape(-1) for x in meshgrid], dim=1)
619
+
620
+ # Evaluate model on grid
621
+ self.model.model.eval()
622
+ with torch.no_grad():
623
+ posterior = self.model.model.posterior(X_grid)
624
+ values = posterior.mean.squeeze()
625
+
626
+ # If minimizing, negate values
627
+ if not self.maximize:
628
+ values = -values
629
+
630
+ # Find the best value
631
+ best_idx = torch.argmax(values)
632
+ best_x = X_grid[best_idx]
633
+ best_value = values[best_idx].item()
634
+
635
+ # Convert to numpy
636
+ best_candidate = best_x.cpu().numpy().reshape(1, -1)
637
+
638
+ # Convert to dictionary and then to DataFrame
639
+ result = {}
640
+ for i, name in enumerate(self.model.feature_names):
641
+ value = best_candidate[0, i]
642
+
643
+ # If this is a categorical variable, convert back to original value
644
+ if name in categorical_variables:
645
+ # Find the original categorical value from the encoding
646
+ encoding = self.model.categorical_encodings.get(name, {})
647
+ inv_encoding = {v: k for k, v in encoding.items()}
648
+ if value in inv_encoding:
649
+ value = inv_encoding[value]
650
+ elif int(value) in inv_encoding:
651
+ value = inv_encoding[int(value)]
652
+ # If this is an integer variable, ensure it's an integer
653
+ elif name in integer_variables:
654
+ value = int(round(value))
655
+
656
+ result[name] = value
657
+
658
+ # Convert to DataFrame
659
+ opt_point_df = pd.DataFrame([result])
660
+
661
+ # Get predicted value and std at optimum
662
+ pred_mean, pred_std = self.model.predict_with_std(opt_point_df)
663
+
664
+ return {
665
+ 'x_opt': opt_point_df,
666
+ 'value': float(pred_mean[0]),
667
+ 'std': float(pred_std[0])
668
+ }