alchemist-nrel 0.3.0__py3-none-any.whl → 0.3.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (37) hide show
  1. alchemist_core/__init__.py +2 -2
  2. alchemist_core/acquisition/botorch_acquisition.py +84 -126
  3. alchemist_core/data/experiment_manager.py +196 -20
  4. alchemist_core/models/botorch_model.py +292 -63
  5. alchemist_core/models/sklearn_model.py +175 -15
  6. alchemist_core/session.py +3532 -76
  7. alchemist_core/utils/__init__.py +3 -1
  8. alchemist_core/utils/acquisition_utils.py +60 -0
  9. alchemist_core/visualization/__init__.py +45 -0
  10. alchemist_core/visualization/helpers.py +130 -0
  11. alchemist_core/visualization/plots.py +1449 -0
  12. alchemist_nrel-0.3.2.dist-info/METADATA +185 -0
  13. {alchemist_nrel-0.3.0.dist-info → alchemist_nrel-0.3.2.dist-info}/RECORD +34 -29
  14. {alchemist_nrel-0.3.0.dist-info → alchemist_nrel-0.3.2.dist-info}/WHEEL +1 -1
  15. {alchemist_nrel-0.3.0.dist-info → alchemist_nrel-0.3.2.dist-info}/entry_points.txt +1 -1
  16. {alchemist_nrel-0.3.0.dist-info → alchemist_nrel-0.3.2.dist-info}/top_level.txt +0 -1
  17. api/example_client.py +7 -2
  18. api/main.py +3 -2
  19. api/models/requests.py +76 -1
  20. api/models/responses.py +102 -2
  21. api/routers/acquisition.py +25 -0
  22. api/routers/experiments.py +352 -11
  23. api/routers/sessions.py +195 -11
  24. api/routers/visualizations.py +6 -4
  25. api/routers/websocket.py +132 -0
  26. run_api.py → api/run_api.py +8 -7
  27. api/services/session_store.py +370 -71
  28. api/static/assets/index-B6Cf6s_b.css +1 -0
  29. api/static/assets/{index-C0_glioA.js → index-B7njvc9r.js} +223 -208
  30. api/static/index.html +2 -2
  31. ui/gpr_panel.py +11 -5
  32. ui/target_column_dialog.py +299 -0
  33. ui/ui.py +52 -5
  34. alchemist_core/models/ax_model.py +0 -159
  35. alchemist_nrel-0.3.0.dist-info/METADATA +0 -223
  36. api/static/assets/index-CB4V1LI5.css +0 -1
  37. {alchemist_nrel-0.3.0.dist-info → alchemist_nrel-0.3.2.dist-info}/licenses/LICENSE +0 -0
alchemist_core/session.py CHANGED
@@ -4,7 +4,7 @@ Optimization Session API - High-level interface for Bayesian optimization workfl
4
4
  This module provides the main entry point for using ALchemist as a headless library.
5
5
  """
6
6
 
7
- from typing import Optional, Dict, Any, List, Tuple, Callable
7
+ from typing import Optional, Dict, Any, List, Tuple, Callable, Union, Literal
8
8
  import pandas as pd
9
9
  import numpy as np
10
10
  import json
@@ -16,6 +16,30 @@ from alchemist_core.events import EventEmitter
16
16
  from alchemist_core.config import get_logger
17
17
  from alchemist_core.audit_log import AuditLog, SessionMetadata, AuditEntry
18
18
 
19
+ # Optional matplotlib import for visualization methods
20
+ try:
21
+ import matplotlib.pyplot as plt
22
+ from matplotlib.figure import Figure
23
+ _HAS_MATPLOTLIB = True
24
+ except ImportError:
25
+ _HAS_MATPLOTLIB = False
26
+ Figure = None # Type hint placeholder
27
+
28
+ # Import visualization functions (delegates to visualization module)
29
+ try:
30
+ from alchemist_core.visualization import (
31
+ create_parity_plot,
32
+ create_contour_plot,
33
+ create_slice_plot,
34
+ create_metrics_plot,
35
+ create_qq_plot,
36
+ create_calibration_plot,
37
+ check_matplotlib
38
+ )
39
+ _HAS_VISUALIZATION = True
40
+ except ImportError:
41
+ _HAS_VISUALIZATION = False
42
+
19
43
  logger = get_logger(__name__)
20
44
 
21
45
 
@@ -31,23 +55,23 @@ class OptimizationSession:
31
55
  5. Iterate
32
56
 
33
57
  Example:
34
- >>> from alchemist_core import OptimizationSession
35
- >>>
36
- >>> # Create session with search space
37
- >>> session = OptimizationSession()
38
- >>> session.add_variable('temperature', 'real', bounds=(300, 500))
39
- >>> session.add_variable('pressure', 'real', bounds=(1, 10))
40
- >>> session.add_variable('catalyst', 'categorical', categories=['A', 'B', 'C'])
41
- >>>
42
- >>> # Load experimental data
43
- >>> session.load_data('experiments.csv', target_column='yield')
44
- >>>
45
- >>> # Train model
46
- >>> session.train_model(backend='botorch', kernel='Matern')
47
- >>>
48
- >>> # Suggest next experiment
49
- >>> next_point = session.suggest_next(strategy='EI', goal='maximize')
50
- >>> print(next_point)
58
+ > from alchemist_core import OptimizationSession
59
+ >
60
+ > # Create session with search space
61
+ > session = OptimizationSession()
62
+ > session.add_variable('temperature', 'real', bounds=(300, 500))
63
+ > session.add_variable('pressure', 'real', bounds=(1, 10))
64
+ > session.add_variable('catalyst', 'categorical', categories=['A', 'B', 'C'])
65
+ >
66
+ > # Load experimental data
67
+ > session.load_data('experiments.csv', target_column='yield')
68
+ >
69
+ > # Train model
70
+ > session.train_model(backend='botorch', kernel='Matern')
71
+ >
72
+ > # Suggest next experiment
73
+ > next_point = session.suggest_next(strategy='EI', goal='maximize')
74
+ > print(next_point)
51
75
  """
52
76
 
53
77
  def __init__(self, search_space: Optional[SearchSpace] = None,
@@ -79,10 +103,16 @@ class OptimizationSession:
79
103
  self.model_backend = None
80
104
  self.acquisition = None
81
105
 
106
+ # Staged experiments (for workflow management)
107
+ self.staged_experiments = [] # List of experiment dicts awaiting evaluation
108
+ self.last_suggestions = [] # Most recent acquisition suggestions (for UI)
109
+
82
110
  # Configuration
83
111
  self.config = {
84
112
  'random_state': 42,
85
- 'verbose': True
113
+ 'verbose': True,
114
+ 'auto_train': False, # Auto-train model after adding experiments
115
+ 'auto_train_threshold': 5 # Minimum experiments before auto-train
86
116
  }
87
117
 
88
118
  logger.info(f"OptimizationSession initialized: {self.metadata.session_id}")
@@ -103,8 +133,8 @@ class OptimizationSession:
103
133
  - For 'categorical': categories=[list of values] or values=[list]
104
134
 
105
135
  Example:
106
- >>> session.add_variable('temp', 'real', bounds=(300, 500))
107
- >>> session.add_variable('catalyst', 'categorical', categories=['A', 'B'])
136
+ > session.add_variable('temp', 'real', bounds=(300, 500))
137
+ > session.add_variable('catalyst', 'categorical', categories=['A', 'B'])
108
138
  """
109
139
  # Convert user-friendly API to internal format
110
140
  params = kwargs.copy()
@@ -185,29 +215,62 @@ class OptimizationSession:
185
215
  # Data Management
186
216
  # ============================================================
187
217
 
188
- def load_data(self, filepath: str, target_column: str = 'Output',
218
+ def load_data(self, filepath: str, target_columns: Union[str, List[str]] = 'Output',
189
219
  noise_column: Optional[str] = None) -> None:
190
220
  """
191
221
  Load experimental data from CSV file.
192
222
 
193
223
  Args:
194
224
  filepath: Path to CSV file
195
- target_column: Name of target/output column (default: 'Output')
225
+ target_columns: Target column name(s). Can be:
226
+ - String for single-objective: 'yield'
227
+ - List for multi-objective: ['yield', 'selectivity']
228
+ Default: 'Output'
196
229
  noise_column: Optional column with measurement noise/uncertainty
197
230
 
198
- Example:
199
- >>> session.load_data('experiments.csv', target_column='yield')
231
+ Examples:
232
+ Single-objective:
233
+ >>> session.load_data('experiments.csv', target_columns='yield')
234
+ >>> session.load_data('experiments.csv', target_columns=['yield']) # also works
235
+
236
+ Multi-objective (future):
237
+ >>> session.load_data('experiments.csv', target_columns=['yield', 'selectivity'])
238
+
239
+ Note:
240
+ If the CSV doesn't have columns matching target_columns, an error will be raised.
241
+ Target columns will be preserved with their original names internally.
200
242
  """
201
243
  # Load the CSV
202
244
  import pandas as pd
203
245
  df = pd.read_csv(filepath)
204
246
 
205
- # Rename target column to 'Output' if different
206
- if target_column != 'Output' and target_column in df.columns:
207
- df = df.rename(columns={target_column: 'Output'})
247
+ # Normalize target_columns to list
248
+ if isinstance(target_columns, str):
249
+ target_columns_list = [target_columns]
250
+ else:
251
+ target_columns_list = list(target_columns)
252
+
253
+ # Validate that all target columns exist
254
+ missing_cols = [col for col in target_columns_list if col not in df.columns]
255
+ if missing_cols:
256
+ raise ValueError(
257
+ f"Target column(s) {missing_cols} not found in CSV file. "
258
+ f"Available columns: {list(df.columns)}. "
259
+ f"Please specify the correct target column name(s) using the target_columns parameter."
260
+ )
261
+
262
+ # Warn if 'Output' column exists but user specified different target(s)
263
+ if 'Output' in df.columns and 'Output' not in target_columns_list:
264
+ logger.warning(
265
+ f"CSV contains 'Output' column but you specified {target_columns_list}. "
266
+ f"Using {target_columns_list} as specified."
267
+ )
208
268
 
209
- # Rename noise column to 'Noise' if specified
210
- if noise_column and noise_column in df.columns:
269
+ # Store the target column names for ExperimentManager
270
+ target_col_internal = target_columns_list
271
+
272
+ # Rename noise column to 'Noise' if specified and different
273
+ if noise_column and noise_column in df.columns and noise_column != 'Noise':
211
274
  df = df.rename(columns={noise_column: 'Noise'})
212
275
 
213
276
  # Save to temporary file and load via ExperimentManager
@@ -217,10 +280,12 @@ class OptimizationSession:
217
280
  temp_path = tmp.name
218
281
 
219
282
  try:
220
- self.experiment_manager = ExperimentManager.from_csv(
221
- temp_path,
222
- self.search_space
283
+ # Create ExperimentManager with the specified target column(s)
284
+ self.experiment_manager = ExperimentManager(
285
+ search_space=self.search_space,
286
+ target_columns=target_col_internal
223
287
  )
288
+ self.experiment_manager.load_from_csv(temp_path)
224
289
  finally:
225
290
  # Clean up temp file
226
291
  import os
@@ -245,7 +310,7 @@ class OptimizationSession:
245
310
  reason: Reason for this experiment (e.g., 'Manual', 'Expected Improvement')
246
311
 
247
312
  Example:
248
- >>> session.add_experiment(
313
+ > session.add_experiment(
249
314
  ... inputs={'temperature': 350, 'catalyst': 'A'},
250
315
  ... output=0.85,
251
316
  ... reason='Manual'
@@ -288,6 +353,132 @@ class OptimizationSession:
288
353
  'feature_names': list(X.columns)
289
354
  }
290
355
 
356
+ # ============================================================
357
+ # Staged Experiments (Workflow Management)
358
+ # ============================================================
359
+
360
+ def add_staged_experiment(self, inputs: Dict[str, Any]) -> None:
361
+ """
362
+ Add an experiment to the staging area (awaiting evaluation).
363
+
364
+ Staged experiments are typically suggested by acquisition functions
365
+ but not yet evaluated. They can be retrieved, evaluated externally,
366
+ and then added to the dataset with add_experiment().
367
+
368
+ Args:
369
+ inputs: Dictionary mapping variable names to values
370
+
371
+ Example:
372
+ > # Generate suggestions and stage them
373
+ > suggestions = session.suggest_next(n_suggestions=3)
374
+ > for point in suggestions.to_dict('records'):
375
+ > session.add_staged_experiment(point)
376
+ >
377
+ > # Later, evaluate and add
378
+ > staged = session.get_staged_experiments()
379
+ > for point in staged:
380
+ > output = run_experiment(**point)
381
+ > session.add_experiment(point, output=output)
382
+ > session.clear_staged_experiments()
383
+ """
384
+ self.staged_experiments.append(inputs)
385
+ logger.debug(f"Staged experiment: {inputs}")
386
+ self.events.emit('experiment_staged', {'inputs': inputs})
387
+
388
+ def get_staged_experiments(self) -> List[Dict[str, Any]]:
389
+ """
390
+ Get all staged experiments awaiting evaluation.
391
+
392
+ Returns:
393
+ List of experiment input dictionaries
394
+ """
395
+ return self.staged_experiments.copy()
396
+
397
+ def clear_staged_experiments(self) -> int:
398
+ """
399
+ Clear all staged experiments.
400
+
401
+ Returns:
402
+ Number of experiments cleared
403
+ """
404
+ count = len(self.staged_experiments)
405
+ self.staged_experiments.clear()
406
+ if count > 0:
407
+ logger.info(f"Cleared {count} staged experiments")
408
+ self.events.emit('staged_experiments_cleared', {'count': count})
409
+ return count
410
+
411
+ def move_staged_to_experiments(self, outputs: List[float],
412
+ noises: Optional[List[float]] = None,
413
+ iteration: Optional[int] = None,
414
+ reason: Optional[str] = None) -> int:
415
+ """
416
+ Evaluate staged experiments and add them to the dataset in batch.
417
+
418
+ Convenience method that pairs staged inputs with outputs and adds
419
+ them all to the experiment manager, then clears the staging area.
420
+
421
+ Args:
422
+ outputs: List of output values (must match length of staged experiments)
423
+ noises: Optional list of measurement uncertainties
424
+ iteration: Iteration number for all experiments (auto-assigned if None)
425
+ reason: Reason for these experiments (e.g., 'Expected Improvement')
426
+
427
+ Returns:
428
+ Number of experiments added
429
+
430
+ Example:
431
+ > # Stage some experiments
432
+ > session.add_staged_experiment({'x': 1.0, 'y': 2.0})
433
+ > session.add_staged_experiment({'x': 3.0, 'y': 4.0})
434
+ >
435
+ > # Evaluate them
436
+ > outputs = [run_experiment(**point) for point in session.get_staged_experiments()]
437
+ >
438
+ > # Add to dataset and clear staging
439
+ > session.move_staged_to_experiments(outputs, reason='LogEI')
440
+ """
441
+ if len(outputs) != len(self.staged_experiments):
442
+ raise ValueError(
443
+ f"Number of outputs ({len(outputs)}) must match "
444
+ f"number of staged experiments ({len(self.staged_experiments)})"
445
+ )
446
+
447
+ if noises is not None and len(noises) != len(self.staged_experiments):
448
+ raise ValueError(
449
+ f"Number of noise values ({len(noises)}) must match "
450
+ f"number of staged experiments ({len(self.staged_experiments)})"
451
+ )
452
+
453
+ # Add each experiment
454
+ for i, inputs in enumerate(self.staged_experiments):
455
+ noise = noises[i] if noises is not None else None
456
+
457
+ # Strip any metadata fields (prefixed with _) from inputs
458
+ # These are used for UI/workflow tracking but shouldn't be stored as variables
459
+ clean_inputs = {k: v for k, v in inputs.items() if not k.startswith('_')}
460
+
461
+ # Use per-experiment reason if stored in _reason, otherwise use batch reason
462
+ exp_reason = inputs.get('_reason', reason)
463
+
464
+ self.add_experiment(
465
+ inputs=clean_inputs,
466
+ output=outputs[i],
467
+ noise=noise,
468
+ iteration=iteration,
469
+ reason=exp_reason
470
+ )
471
+
472
+ count = len(self.staged_experiments)
473
+ self.clear_staged_experiments()
474
+
475
+ logger.info(f"Moved {count} staged experiments to dataset")
476
+ return count
477
+
478
+ # ============================================================
479
+ # Initial Design Generation
480
+ # ============================================================
481
+
291
482
  def generate_initial_design(
292
483
  self,
293
484
  method: str = "lhs",
@@ -320,16 +511,16 @@ class OptimizationSession:
320
511
  List of dictionaries with variable names and values (no outputs)
321
512
 
322
513
  Example:
323
- >>> # Generate initial design
324
- >>> points = session.generate_initial_design('lhs', n_points=10)
325
- >>>
326
- >>> # Run experiments and add results
327
- >>> for point in points:
328
- >>> output = run_experiment(**point) # Your experiment function
329
- >>> session.add_experiment(point, output=output)
330
- >>>
331
- >>> # Now ready to train model
332
- >>> session.train_model()
514
+ > # Generate initial design
515
+ > points = session.generate_initial_design('lhs', n_points=10)
516
+ >
517
+ > # Run experiments and add results
518
+ > for point in points:
519
+ > output = run_experiment(**point) # Your experiment function
520
+ > session.add_experiment(point, output=output)
521
+ >
522
+ > # Now ready to train model
523
+ > session.train_model()
333
524
  """
334
525
  if len(self.search_space.variables) == 0:
335
526
  raise ValueError(
@@ -389,8 +580,8 @@ class OptimizationSession:
389
580
  Dictionary with training results and hyperparameters
390
581
 
391
582
  Example:
392
- >>> results = session.train_model(backend='botorch', kernel='Matern')
393
- >>> print(results['metrics'])
583
+ > results = session.train_model(backend='botorch', kernel='Matern')
584
+ > print(results['metrics'])
394
585
  """
395
586
  df = self.experiment_manager.get_data()
396
587
  if df is None or df.empty:
@@ -410,6 +601,27 @@ class OptimizationSession:
410
601
  # Extract calibration_enabled before passing kwargs to model constructor
411
602
  calibration_enabled = kwargs.pop('calibration_enabled', False)
412
603
 
604
+ # Validate and map transform types based on backend
605
+ # BoTorch uses: 'normalize', 'standardize'
606
+ # Sklearn uses: 'minmax', 'standard', 'robust', 'none'
607
+ if self.model_backend == 'sklearn':
608
+ # Map BoTorch transform types to sklearn equivalents
609
+ transform_map = {
610
+ 'normalize': 'minmax', # BoTorch normalize → sklearn minmax
611
+ 'standardize': 'standard', # BoTorch standardize → sklearn standard
612
+ 'none': 'none'
613
+ }
614
+ if 'input_transform_type' in kwargs:
615
+ original = kwargs['input_transform_type']
616
+ kwargs['input_transform_type'] = transform_map.get(original, original)
617
+ if original != kwargs['input_transform_type']:
618
+ logger.debug(f"Mapped input transform '{original}' → '{kwargs['input_transform_type']}' for sklearn")
619
+ if 'output_transform_type' in kwargs:
620
+ original = kwargs['output_transform_type']
621
+ kwargs['output_transform_type'] = transform_map.get(original, original)
622
+ if original != kwargs['output_transform_type']:
623
+ logger.debug(f"Mapped output transform '{original}' → '{kwargs['output_transform_type']}' for sklearn")
624
+
413
625
  # Import appropriate model class
414
626
  if self.model_backend == 'sklearn':
415
627
  from alchemist_core.models.sklearn_model import SklearnModel
@@ -428,6 +640,15 @@ class OptimizationSession:
428
640
  elif self.model_backend == 'botorch':
429
641
  from alchemist_core.models.botorch_model import BoTorchModel
430
642
 
643
+ # Apply sensible defaults for BoTorch if not explicitly overridden
644
+ # Input normalization and output standardization are critical for performance
645
+ if 'input_transform_type' not in kwargs:
646
+ kwargs['input_transform_type'] = 'normalize'
647
+ logger.debug("Auto-applying input normalization for BoTorch model")
648
+ if 'output_transform_type' not in kwargs:
649
+ kwargs['output_transform_type'] = 'standardize'
650
+ logger.debug("Auto-applying output standardization for BoTorch model")
651
+
431
652
  # Build kernel options - BoTorch uses 'cont_kernel_type' not 'kernel_type'
432
653
  kernel_options = {'cont_kernel_type': kernel}
433
654
  if kernel_params:
@@ -439,6 +660,18 @@ class OptimizationSession:
439
660
  if k != 'nu': # Already handled above
440
661
  kernel_options[k] = v
441
662
 
663
+ # Identify categorical variable indices for BoTorch
664
+ # Only compute if not already provided in kwargs (e.g., from UI)
665
+ if 'cat_dims' not in kwargs:
666
+ cat_dims = []
667
+ categorical_var_names = self.search_space.get_categorical_variables()
668
+ if categorical_var_names:
669
+ # Get the column order from search space
670
+ all_var_names = self.search_space.get_variable_names()
671
+ cat_dims = [i for i, name in enumerate(all_var_names) if name in categorical_var_names]
672
+ logger.debug(f"Categorical dimensions for BoTorch: {cat_dims} (variables: {categorical_var_names})")
673
+ kwargs['cat_dims'] = cat_dims if cat_dims else None
674
+
442
675
  self.model = BoTorchModel(
443
676
  kernel_options=kernel_options,
444
677
  random_state=self.config['random_state'],
@@ -589,21 +822,57 @@ class OptimizationSession:
589
822
  Suggest next experiment(s) using acquisition function.
590
823
 
591
824
  Args:
592
- strategy: Acquisition strategy ('EI', 'PI', 'UCB', 'qEI', etc.)
825
+ strategy: Acquisition strategy
826
+ - 'EI': Expected Improvement
827
+ - 'PI': Probability of Improvement
828
+ - 'UCB': Upper Confidence Bound
829
+ - 'LogEI': Log Expected Improvement (BoTorch only)
830
+ - 'LogPI': Log Probability of Improvement (BoTorch only)
831
+ - 'qEI', 'qUCB', 'qIPV': Batch acquisition (BoTorch only)
593
832
  goal: 'maximize' or 'minimize'
594
833
  n_suggestions: Number of suggestions (batch acquisition)
595
- **kwargs: Strategy-specific parameters
834
+ **kwargs: Strategy-specific parameters:
835
+
836
+ **Sklearn backend:**
837
+ - xi (float): Exploration parameter for EI/PI (default: 0.01)
838
+ Higher values favor exploration over exploitation
839
+ - kappa (float): Exploration parameter for UCB (default: 1.96)
840
+ Higher values favor exploration (typically 1.96 for 95% CI)
841
+
842
+ **BoTorch backend:**
843
+ - beta (float): Exploration parameter for UCB (default: 0.5)
844
+ Trades off mean vs. variance (higher = more exploration)
845
+ - mc_samples (int): Monte Carlo samples for batch acquisition (default: 128)
596
846
 
597
847
  Returns:
598
848
  DataFrame with suggested experiment(s)
599
849
 
600
- Example:
601
- >>> next_point = session.suggest_next(strategy='EI', goal='maximize')
602
- >>> print(next_point)
850
+ Examples:
851
+ >>> # Expected Improvement with custom exploration
852
+ >>> next_point = session.suggest_next(strategy='EI', goal='maximize', xi=0.05)
853
+
854
+ >>> # Upper Confidence Bound with high exploration
855
+ >>> next_point = session.suggest_next(strategy='UCB', goal='maximize', kappa=2.5)
856
+
857
+ >>> # BoTorch UCB with beta parameter
858
+ >>> next_point = session.suggest_next(strategy='UCB', goal='maximize', beta=1.0)
603
859
  """
604
860
  if self.model is None:
605
861
  raise ValueError("No trained model available. Use train_model() first.")
606
862
 
863
+ # Validate and log kwargs
864
+ supported_kwargs = self._get_supported_kwargs(strategy, self.model_backend)
865
+ if kwargs:
866
+ unsupported = set(kwargs.keys()) - supported_kwargs
867
+ if unsupported:
868
+ logger.warning(
869
+ f"Unsupported parameters for {strategy} with {self.model_backend} backend: "
870
+ f"{unsupported}. Supported parameters: {supported_kwargs or 'none'}"
871
+ )
872
+ used_kwargs = {k: v for k, v in kwargs.items() if k in supported_kwargs}
873
+ if used_kwargs:
874
+ logger.info(f"Using acquisition parameters: {used_kwargs}")
875
+
607
876
  # Import appropriate acquisition class
608
877
  if self.model_backend == 'sklearn':
609
878
  from alchemist_core.acquisition.skopt_acquisition import SkoptAcquisition
@@ -629,10 +898,14 @@ class OptimizationSession:
629
898
  search_space=self.search_space,
630
899
  acq_func=strategy,
631
900
  maximize=(goal.lower() == 'maximize'),
632
- batch_size=n_suggestions
901
+ batch_size=n_suggestions,
902
+ acq_func_kwargs=kwargs # FIX: Pass kwargs to BoTorch acquisition!
633
903
  )
634
904
 
635
- logger.info(f"Running acquisition: {strategy} ({goal})")
905
+ # Check if this is a pure exploration acquisition (doesn't use best_f)
906
+ is_exploratory = strategy.lower() in ['qnipv', 'qipv']
907
+ goal_desc = 'pure exploration' if is_exploratory else goal
908
+ logger.info(f"Running acquisition: {strategy} ({goal_desc})")
636
909
  self.events.emit('acquisition_started', {'strategy': strategy, 'goal': goal})
637
910
 
638
911
  # Get suggestion
@@ -663,14 +936,119 @@ class OptimizationSession:
663
936
  logger.info(f"Suggested point: {suggestion_dict}")
664
937
  self.events.emit('acquisition_completed', {'suggestion': suggestion_dict})
665
938
 
666
- # Cache suggestion info for audit log
939
+ # Store suggestions for UI/API access
940
+ self.last_suggestions = result_df.to_dict('records')
941
+
942
+ # Cache suggestion info for audit log and visualization
667
943
  self._last_acquisition_info = {
668
944
  'strategy': strategy,
669
945
  'goal': goal,
670
946
  'parameters': kwargs
671
947
  }
948
+ self._last_acq_func = strategy.lower()
949
+ self._last_goal = goal.lower()
950
+
951
+ return result_df
952
+
953
+ def _get_supported_kwargs(self, strategy: str, backend: str) -> set:
954
+ """
955
+ Return supported kwargs for given acquisition strategy and backend.
956
+
957
+ Args:
958
+ strategy: Acquisition strategy name
959
+ backend: Model backend ('sklearn' or 'botorch')
960
+
961
+ Returns:
962
+ Set of supported kwarg names
963
+ """
964
+ strategy_lower = strategy.lower()
965
+
966
+ if backend == 'sklearn':
967
+ if strategy_lower in ['ei', 'pi', 'expectedimprovement', 'probabilityofimprovement']:
968
+ return {'xi'}
969
+ elif strategy_lower in ['ucb', 'lcb', 'upperconfidencebound', 'lowerconfidencebound']:
970
+ return {'kappa'}
971
+ elif strategy_lower == 'gp_hedge':
972
+ return {'xi', 'kappa'}
973
+ elif backend == 'botorch':
974
+ if strategy_lower in ['ei', 'logei', 'pi', 'logpi', 'expectedimprovement', 'probabilityofimprovement']:
975
+ return set() # No additional parameters for these
976
+ elif strategy_lower in ['ucb', 'upperconfidencebound']:
977
+ return {'beta'}
978
+ elif strategy_lower in ['qei', 'qucb']:
979
+ return {'mc_samples', 'beta'}
980
+ elif strategy_lower in ['qipv', 'qnipv']:
981
+ return {'mc_samples', 'n_mc_points'}
982
+
983
+ return set()
984
+
985
+ def find_optimum(self, goal: str = 'maximize', n_grid_points: int = 10000) -> Dict[str, Any]:
986
+ """
987
+ Find the point where the model predicts the optimal value.
988
+
989
+ Uses a grid search approach to find the point with the best predicted
990
+ value (maximum or minimum) across the search space. This is useful for
991
+ identifying the model's predicted optimum independent of acquisition
992
+ function suggestions.
993
+
994
+ Args:
995
+ goal: 'maximize' or 'minimize' - which direction to optimize
996
+ n_grid_points: Target number of grid points for search (default: 10000)
997
+
998
+ Returns:
999
+ Dictionary with:
1000
+ - 'x_opt': DataFrame with optimal point (single row)
1001
+ - 'value': Predicted value at optimum
1002
+ - 'std': Uncertainty (standard deviation) at optimum
1003
+
1004
+ Example:
1005
+ >>> # Find predicted maximum
1006
+ >>> result = session.find_optimum(goal='maximize')
1007
+ >>> print(f"Optimum at: {result['x_opt']}")
1008
+ >>> print(f"Predicted value: {result['value']:.2f} ± {result['std']:.2f}")
1009
+
1010
+ >>> # Find predicted minimum
1011
+ >>> result = session.find_optimum(goal='minimize')
1012
+
1013
+ >>> # Use finer grid for more accuracy
1014
+ >>> result = session.find_optimum(goal='maximize', n_grid_points=50000)
1015
+
1016
+ Note:
1017
+ - Requires a trained model
1018
+ - Uses the same grid-based approach as regret plot for consistency
1019
+ - Handles categorical variables correctly through proper encoding
1020
+ - Grid size is target value; actual number depends on dimensionality
1021
+ """
1022
+ if self.model is None:
1023
+ raise ValueError("No trained model available. Use train_model() first.")
1024
+
1025
+ # Generate prediction grid in ORIGINAL variable space (not encoded)
1026
+ grid = self._generate_prediction_grid(n_grid_points)
1027
+
1028
+ # Use model's predict method which handles encoding internally
1029
+ means, stds = self.predict(grid)
1030
+
1031
+ # Find argmax or argmin
1032
+ if goal.lower() == 'maximize':
1033
+ best_idx = np.argmax(means)
1034
+ else:
1035
+ best_idx = np.argmin(means)
1036
+
1037
+ # Extract the optimal point (already in original variable space)
1038
+ opt_point_df = grid.iloc[[best_idx]].reset_index(drop=True)
1039
+
1040
+ result = {
1041
+ 'x_opt': opt_point_df,
1042
+ 'value': float(means[best_idx]),
1043
+ 'std': float(stds[best_idx])
1044
+ }
672
1045
 
673
- return result_df # ============================================================
1046
+ logger.info(f"Found optimum: {result['x_opt'].to_dict('records')[0]}")
1047
+ logger.info(f"Predicted value: {result['value']:.4f} ± {result['std']:.4f}")
1048
+
1049
+ return result
1050
+
1051
+ # ============================================================
674
1052
  # Predictions
675
1053
  # ============================================================
676
1054
 
@@ -685,11 +1063,11 @@ class OptimizationSession:
685
1063
  Tuple of (predictions, uncertainties)
686
1064
 
687
1065
  Example:
688
- >>> test_points = pd.DataFrame({
1066
+ > test_points = pd.DataFrame({
689
1067
  ... 'temperature': [350, 400],
690
1068
  ... 'catalyst': ['A', 'B']
691
1069
  ... })
692
- >>> predictions, uncertainties = session.predict(test_points)
1070
+ > predictions, uncertainties = session.predict(test_points)
693
1071
  """
694
1072
  if self.model is None:
695
1073
  raise ValueError("No trained model available. Use train_model() first.")
@@ -722,9 +1100,9 @@ class OptimizationSession:
722
1100
  callback: Callback function
723
1101
 
724
1102
  Example:
725
- >>> def on_training_done(data):
1103
+ > def on_training_done(data):
726
1104
  ... print(f"Training completed with R² = {data['metrics']['r2']}")
727
- >>> session.on('training_completed', on_training_done)
1105
+ > session.on('training_completed', on_training_done)
728
1106
  """
729
1107
  self.events.on(event, callback)
730
1108
 
@@ -740,7 +1118,7 @@ class OptimizationSession:
740
1118
  **kwargs: Configuration parameters to update
741
1119
 
742
1120
  Example:
743
- >>> session.set_config(random_state=123, verbose=False)
1121
+ > session.set_config(random_state=123, verbose=False)
744
1122
  """
745
1123
  self.config.update(kwargs)
746
1124
  logger.info(f"Updated config: {kwargs}")
@@ -764,8 +1142,8 @@ class OptimizationSession:
764
1142
  Created AuditEntry
765
1143
 
766
1144
  Example:
767
- >>> session.add_experiment({'temp': 100, 'pressure': 5}, output=85.2)
768
- >>> session.lock_data(notes="Initial screening dataset")
1145
+ > session.add_experiment({'temp': 100, 'pressure': 5}, output=85.2)
1146
+ > session.lock_data(notes="Initial screening dataset")
769
1147
  """
770
1148
  # Set search space in audit log (once)
771
1149
  if self.audit_log.search_space_definition is None:
@@ -805,8 +1183,8 @@ class OptimizationSession:
805
1183
  ValueError: If no model has been trained
806
1184
 
807
1185
  Example:
808
- >>> session.train_model(backend='sklearn', kernel='matern')
809
- >>> session.lock_model(notes="Best cross-validation performance")
1186
+ > session.train_model(backend='sklearn', kernel='matern')
1187
+ > session.lock_model(notes="Best cross-validation performance")
810
1188
  """
811
1189
  if self.model is None:
812
1190
  raise ValueError("No trained model available. Use train_model() first.")
@@ -898,8 +1276,8 @@ class OptimizationSession:
898
1276
  Created AuditEntry
899
1277
 
900
1278
  Example:
901
- >>> suggestions = session.suggest_next(strategy='EI', n_suggestions=3)
902
- >>> session.lock_acquisition(
1279
+ > suggestions = session.suggest_next(strategy='EI', n_suggestions=3)
1280
+ > session.lock_acquisition(
903
1281
  ... strategy='EI',
904
1282
  ... parameters={'xi': 0.01, 'goal': 'maximize'},
905
1283
  ... suggestions=suggestions,
@@ -967,7 +1345,7 @@ class OptimizationSession:
967
1345
  filepath: Path to save session file (.json extension recommended)
968
1346
 
969
1347
  Example:
970
- >>> session.save_session("~/ALchemist_Sessions/catalyst_study_nov2025.json")
1348
+ > session.save_session("~/ALchemist_Sessions/catalyst_study_nov2025.json")
971
1349
  """
972
1350
  filepath = Path(filepath)
973
1351
 
@@ -1054,19 +1432,68 @@ class OptimizationSession:
1054
1432
 
1055
1433
  return content
1056
1434
 
1057
- @staticmethod
1058
- def load_session(filepath: str, retrain_on_load: bool = True) -> 'OptimizationSession':
1435
+ def load_session(self, filepath: str = None, retrain_on_load: bool = True) -> 'OptimizationSession':
1059
1436
  """
1060
1437
  Load session from JSON file.
1061
1438
 
1439
+ This method works both as a static method (creating a new session) and as an
1440
+ instance method (loading into existing session):
1441
+
1442
+ Static usage (returns new session):
1443
+ > session = OptimizationSession.load_session("my_session.json")
1444
+
1445
+ Instance usage (loads into existing session):
1446
+ > session = OptimizationSession()
1447
+ > session.load_session("my_session.json")
1448
+ > # session.experiment_manager.df is now populated
1449
+
1062
1450
  Args:
1063
- filepath: Path to session file
1451
+ filepath: Path to session file (required when called as static method,
1452
+ can be self when called as instance method)
1453
+ retrain_on_load: Whether to retrain model if config exists (default: True)
1064
1454
 
1065
1455
  Returns:
1066
- OptimizationSession with restored state
1456
+ OptimizationSession (new or modified instance)
1457
+ """
1458
+ # Detect if called as instance method or static method
1459
+ # When called as static method: self is actually the filepath string
1460
+ # When called as instance method: self is an OptimizationSession instance
1461
+ if isinstance(self, OptimizationSession):
1462
+ # Instance method: load into this session
1463
+ if filepath is None:
1464
+ raise ValueError("filepath is required when calling as instance method")
1067
1465
 
1068
- Example:
1069
- >>> session = OptimizationSession.load_session("my_session.json")
1466
+ # Load from static implementation
1467
+ loaded_session = OptimizationSession._load_session_impl(filepath, retrain_on_load)
1468
+
1469
+ # Copy all attributes from loaded session to this instance
1470
+ self.search_space = loaded_session.search_space
1471
+ self.experiment_manager = loaded_session.experiment_manager
1472
+ self.metadata = loaded_session.metadata
1473
+ self.audit_log = loaded_session.audit_log
1474
+ self.config = loaded_session.config
1475
+ self.model = loaded_session.model
1476
+ self.model_backend = loaded_session.model_backend
1477
+ self.acquisition = loaded_session.acquisition
1478
+ self.staged_experiments = loaded_session.staged_experiments
1479
+ self.last_suggestions = loaded_session.last_suggestions
1480
+
1481
+ # Don't copy events emitter - keep the original
1482
+ logger.info(f"Loaded session data into current instance from {filepath}")
1483
+ self.events.emit('session_loaded', {'filepath': str(filepath)})
1484
+
1485
+ return self
1486
+ else:
1487
+ # Static method: self is actually the filepath, retrain_on_load is in filepath param
1488
+ actual_filepath = self
1489
+ actual_retrain = filepath if filepath is not None else True
1490
+ return OptimizationSession._load_session_impl(actual_filepath, actual_retrain)
1491
+
1492
+ @staticmethod
1493
+ def _load_session_impl(filepath: str, retrain_on_load: bool = True) -> 'OptimizationSession':
1494
+ """
1495
+ Internal implementation for loading session from file.
1496
+ This always creates and returns a new session.
1070
1497
  """
1071
1498
  filepath = Path(filepath)
1072
1499
 
@@ -1156,7 +1583,7 @@ class OptimizationSession:
1156
1583
  tags: New tags (optional)
1157
1584
 
1158
1585
  Example:
1159
- >>> session.update_metadata(
1586
+ > session.update_metadata(
1160
1587
  ... name="Catalyst Screening - Final",
1161
1588
  ... description="Optimized Pt/Pd ratios",
1162
1589
  ... tags=["catalyst", "platinum", "palladium", "final"]
@@ -1188,8 +1615,3037 @@ class OptimizationSession:
1188
1615
  **kwargs: Configuration parameters to update
1189
1616
 
1190
1617
  Example:
1191
- >>> session.set_config(random_state=123, verbose=False)
1618
+ > session.set_config(random_state=123, verbose=False)
1192
1619
  """
1193
1620
  self.config.update(kwargs)
1194
1621
  logger.info(f"Updated config: {kwargs}")
1195
-
1622
+
1623
+ # ============================================================
1624
+ # Visualization Methods (Notebook Support)
1625
+ # ============================================================
1626
+
1627
+ def _check_matplotlib(self) -> None:
1628
+ """Check if matplotlib is available for plotting."""
1629
+ if _HAS_VISUALIZATION:
1630
+ check_matplotlib() # Use visualization module's check
1631
+ elif not _HAS_MATPLOTLIB:
1632
+ raise ImportError(
1633
+ "matplotlib is required for visualization methods. "
1634
+ "Install with: pip install matplotlib"
1635
+ )
1636
+
1637
+ def _check_model_trained(self) -> None:
1638
+ """Check if model is trained before plotting."""
1639
+ if self.model is None:
1640
+ raise ValueError(
1641
+ "Model not trained. Call train_model() before creating visualizations."
1642
+ )
1643
+
1644
+ def _check_cv_results(self, use_calibrated: bool = False) -> Dict[str, np.ndarray]:
1645
+ """
1646
+ Get CV results from model, handling both calibrated and uncalibrated.
1647
+
1648
+ Args:
1649
+ use_calibrated: Whether to use calibrated results if available
1650
+
1651
+ Returns:
1652
+ Dictionary with y_true, y_pred, y_std arrays
1653
+ """
1654
+ self._check_model_trained()
1655
+
1656
+ # Check for calibrated results first if requested
1657
+ if use_calibrated and hasattr(self.model, 'cv_cached_results_calibrated'):
1658
+ if self.model.cv_cached_results_calibrated is not None:
1659
+ return self.model.cv_cached_results_calibrated
1660
+
1661
+ # Fall back to uncalibrated results
1662
+ if hasattr(self.model, 'cv_cached_results'):
1663
+ if self.model.cv_cached_results is not None:
1664
+ return self.model.cv_cached_results
1665
+
1666
+ raise ValueError(
1667
+ "No CV results available. Model must be trained with cross-validation."
1668
+ )
1669
+
1670
+ def plot_parity(
1671
+ self,
1672
+ use_calibrated: bool = False,
1673
+ sigma_multiplier: float = 1.96,
1674
+ figsize: Tuple[float, float] = (8, 6),
1675
+ dpi: int = 100,
1676
+ title: Optional[str] = None,
1677
+ show_metrics: bool = True,
1678
+ show_error_bars: bool = True
1679
+ ) -> Figure: # pyright: ignore[reportInvalidTypeForm]
1680
+ """
1681
+ Create parity plot of actual vs predicted values from cross-validation.
1682
+
1683
+ This plot shows how well the model's predictions match the actual experimental
1684
+ values, with optional error bars indicating prediction uncertainty.
1685
+
1686
+ Args:
1687
+ use_calibrated: Use calibrated uncertainty estimates if available
1688
+ sigma_multiplier: Error bar size (1.96 = 95% CI, 1.0 = 68% CI, 2.58 = 99% CI)
1689
+ figsize: Figure size as (width, height) in inches
1690
+ dpi: Dots per inch for figure resolution
1691
+ title: Custom title (default: auto-generated with metrics)
1692
+ show_metrics: Include RMSE, MAE, R² in title
1693
+ show_error_bars: Display uncertainty error bars
1694
+
1695
+ Returns:
1696
+ matplotlib Figure object (displays inline in Jupyter)
1697
+
1698
+ Example:
1699
+ >>> fig = session.plot_parity()
1700
+ >>> fig.show() # In notebooks, displays automatically
1701
+
1702
+ >>> # With custom styling
1703
+ >>> fig = session.plot_parity(
1704
+ ... sigma_multiplier=2.58, # 99% confidence interval
1705
+ ... figsize=(10, 8),
1706
+ ... dpi=150
1707
+ ... )
1708
+ >>> fig.savefig('parity.png', bbox_inches='tight')
1709
+
1710
+ Note:
1711
+ Requires model to be trained with cross-validation (default behavior).
1712
+ Error bars are only shown if model provides uncertainty estimates.
1713
+ """
1714
+ self._check_matplotlib()
1715
+ self._check_model_trained()
1716
+
1717
+ # Get CV results
1718
+ cv_results = self._check_cv_results(use_calibrated)
1719
+ y_true = cv_results['y_true']
1720
+ y_pred = cv_results['y_pred']
1721
+ y_std = cv_results.get('y_std', None)
1722
+
1723
+ # Delegate to visualization module
1724
+ fig, ax = create_parity_plot(
1725
+ y_true=y_true,
1726
+ y_pred=y_pred,
1727
+ y_std=y_std,
1728
+ sigma_multiplier=sigma_multiplier,
1729
+ figsize=figsize,
1730
+ dpi=dpi,
1731
+ title=title,
1732
+ show_metrics=show_metrics,
1733
+ show_error_bars=show_error_bars
1734
+ )
1735
+
1736
+ logger.info("Generated parity plot")
1737
+ return fig
1738
+
1739
+ def plot_slice(
1740
+ self,
1741
+ x_var: str,
1742
+ fixed_values: Optional[Dict[str, Any]] = None,
1743
+ n_points: int = 100,
1744
+ show_uncertainty: Union[bool, List[float]] = True,
1745
+ show_experiments: bool = True,
1746
+ figsize: Tuple[float, float] = (8, 6),
1747
+ dpi: int = 100,
1748
+ title: Optional[str] = None
1749
+ ) -> Figure: # pyright: ignore[reportInvalidTypeForm]
1750
+ """
1751
+ Create 1D slice plot showing model predictions along one variable.
1752
+
1753
+ Visualizes how the model's prediction changes as one variable is varied
1754
+ while all other variables are held constant. Shows prediction mean and
1755
+ optional uncertainty bands.
1756
+
1757
+ Args:
1758
+ x_var: Variable name to vary along X axis (must be 'real' or 'integer')
1759
+ fixed_values: Dict of {var_name: value} for other variables.
1760
+ If not provided, uses midpoint for real/integer,
1761
+ first category for categorical.
1762
+ n_points: Number of points to evaluate along the slice
1763
+ show_uncertainty: Show uncertainty bands. Can be:
1764
+ - True: Show ±1σ and ±2σ bands (default)
1765
+ - False: No uncertainty bands
1766
+ - List[float]: Custom sigma values, e.g., [1.0, 2.0, 3.0] for ±1σ, ±2σ, ±3σ
1767
+ show_experiments: Plot experimental data points as scatter
1768
+ figsize: Figure size as (width, height) in inches
1769
+ dpi: Dots per inch for figure resolution
1770
+ title: Custom title (default: auto-generated)
1771
+
1772
+ Returns:
1773
+ matplotlib Figure object
1774
+
1775
+ Example:
1776
+ >>> # With custom uncertainty bands (±1σ, ±2σ, ±3σ)
1777
+ >>> fig = session.plot_slice(
1778
+ ... 'temperature',
1779
+ ... fixed_values={'pressure': 5.0, 'catalyst': 'Pt'},
1780
+ ... show_uncertainty=[1.0, 2.0, 3.0]
1781
+ ... )
1782
+ >>> fig.savefig('slice.png', dpi=300)
1783
+
1784
+ Note:
1785
+ - Model must be trained before plotting
1786
+ - Uncertainty bands require model to support std predictions
1787
+ """
1788
+ self._check_matplotlib()
1789
+ self._check_model_trained()
1790
+
1791
+ if fixed_values is None:
1792
+ fixed_values = {}
1793
+
1794
+ # Get variable info
1795
+ var_names = self.search_space.get_variable_names()
1796
+ if x_var not in var_names:
1797
+ raise ValueError(f"Variable '{x_var}' not in search space")
1798
+
1799
+ # Get x variable definition
1800
+ x_var_def = next(v for v in self.search_space.variables if v['name'] == x_var)
1801
+
1802
+ if x_var_def['type'] not in ['real', 'integer']:
1803
+ raise ValueError(f"Variable '{x_var}' must be 'real' or 'integer' type for slice plot")
1804
+
1805
+ # Create range for x variable
1806
+ x_min, x_max = x_var_def['min'], x_var_def['max']
1807
+ x_values = np.linspace(x_min, x_max, n_points)
1808
+
1809
+ # Build prediction data with fixed values
1810
+ slice_data = {x_var: x_values}
1811
+
1812
+ for var in self.search_space.variables:
1813
+ var_name = var['name']
1814
+ if var_name == x_var:
1815
+ continue
1816
+
1817
+ if var_name in fixed_values:
1818
+ slice_data[var_name] = fixed_values[var_name]
1819
+ else:
1820
+ # Use default value
1821
+ if var['type'] in ['real', 'integer']:
1822
+ slice_data[var_name] = (var['min'] + var['max']) / 2
1823
+ elif var['type'] == 'categorical':
1824
+ slice_data[var_name] = var['values'][0]
1825
+
1826
+ # Create DataFrame with correct column order
1827
+ if hasattr(self.model, 'original_feature_names') and self.model.original_feature_names:
1828
+ column_order = self.model.original_feature_names
1829
+ else:
1830
+ column_order = self.search_space.get_variable_names()
1831
+
1832
+ slice_df = pd.DataFrame(slice_data, columns=column_order)
1833
+
1834
+ # Get predictions with uncertainty
1835
+ predictions, std = self.predict(slice_df)
1836
+
1837
+ # Prepare experimental data for plotting
1838
+ exp_x = None
1839
+ exp_y = None
1840
+ if show_experiments and len(self.experiment_manager.df) > 0:
1841
+ df = self.experiment_manager.df
1842
+
1843
+ # Filter points that match the fixed values
1844
+ mask = pd.Series([True] * len(df))
1845
+ for var_name, fixed_val in fixed_values.items():
1846
+ if var_name in df.columns:
1847
+ # For numerical values, allow small tolerance
1848
+ if isinstance(fixed_val, (int, float)):
1849
+ mask &= np.abs(df[var_name] - fixed_val) < 1e-6
1850
+ else:
1851
+ mask &= df[var_name] == fixed_val
1852
+
1853
+ if mask.any():
1854
+ filtered_df = df[mask]
1855
+ exp_x = filtered_df[x_var].values
1856
+ exp_y = filtered_df['Output'].values
1857
+
1858
+ # Generate title if not provided
1859
+ if title is None:
1860
+ if fixed_values:
1861
+ fixed_str = ', '.join([f'{k}={v}' for k, v in fixed_values.items()])
1862
+ title = f"1D Slice: {x_var}\n({fixed_str})"
1863
+ else:
1864
+ title = f"1D Slice: {x_var}"
1865
+
1866
+ # Delegate to visualization module
1867
+ # Handle show_uncertainty parameter conversion
1868
+ sigma_bands = None
1869
+ if show_uncertainty is not False:
1870
+ if isinstance(show_uncertainty, bool):
1871
+ # Default: [1.0, 2.0]
1872
+ sigma_bands = [1.0, 2.0] if show_uncertainty else None
1873
+ else:
1874
+ # Custom list of sigma values
1875
+ sigma_bands = show_uncertainty
1876
+
1877
+ fig, ax = create_slice_plot(
1878
+ x_values=x_values,
1879
+ predictions=predictions,
1880
+ x_var=x_var,
1881
+ std=std,
1882
+ sigma_bands=sigma_bands,
1883
+ exp_x=exp_x,
1884
+ exp_y=exp_y,
1885
+ figsize=figsize,
1886
+ dpi=dpi,
1887
+ title=title
1888
+ )
1889
+
1890
+ logger.info(f"Generated 1D slice plot for {x_var}")
1891
+ return fig
1892
+
1893
+ def plot_contour(
1894
+ self,
1895
+ x_var: str,
1896
+ y_var: str,
1897
+ fixed_values: Optional[Dict[str, Any]] = None,
1898
+ grid_resolution: int = 50,
1899
+ show_experiments: bool = True,
1900
+ show_suggestions: bool = False,
1901
+ cmap: str = 'viridis',
1902
+ figsize: Tuple[float, float] = (8, 6),
1903
+ dpi: int = 100,
1904
+ title: Optional[str] = None
1905
+ ) -> Figure: # pyright: ignore[reportInvalidTypeForm]
1906
+ """
1907
+ Create 2D contour plot of model predictions over a variable space.
1908
+
1909
+ Visualizes the model's predicted response surface by varying two variables
1910
+ while holding others constant. Useful for understanding variable interactions
1911
+ and identifying optimal regions.
1912
+
1913
+ Args:
1914
+ x_var: Variable name for X axis (must be 'real' type)
1915
+ y_var: Variable name for Y axis (must be 'real' type)
1916
+ fixed_values: Dict of {var_name: value} for other variables.
1917
+ If not provided, uses midpoint for real/integer,
1918
+ first category for categorical.
1919
+ grid_resolution: Grid density (NxN points)
1920
+ show_experiments: Plot experimental data points as scatter
1921
+ show_suggestions: Plot last suggested points (if available)
1922
+ cmap: Matplotlib colormap name (e.g., 'viridis', 'coolwarm', 'plasma')
1923
+ figsize: Figure size as (width, height) in inches
1924
+ dpi: Dots per inch for figure resolution
1925
+ title: Custom title (default: "Contour Plot of Model Predictions")
1926
+
1927
+ Returns:
1928
+ matplotlib Figure object (displays inline in Jupyter)
1929
+
1930
+ Example:
1931
+ >>> # Basic contour plot
1932
+ >>> fig = session.plot_contour('temperature', 'pressure')
1933
+
1934
+ >>> # With fixed values for other variables
1935
+ >>> fig = session.plot_contour(
1936
+ ... 'temperature', 'pressure',
1937
+ ... fixed_values={'catalyst': 'Pt', 'flow_rate': 50},
1938
+ ... cmap='coolwarm',
1939
+ ... grid_resolution=100
1940
+ ... )
1941
+ >>> fig.savefig('contour.png', dpi=300, bbox_inches='tight')
1942
+
1943
+ Note:
1944
+ - Requires at least 2 'real' type variables
1945
+ - Model must be trained before plotting
1946
+ - Categorical variables are automatically encoded using model's encoding
1947
+ """
1948
+ self._check_matplotlib()
1949
+ self._check_model_trained()
1950
+
1951
+ if fixed_values is None:
1952
+ fixed_values = {}
1953
+
1954
+ # Get variable names
1955
+ var_names = self.search_space.get_variable_names()
1956
+
1957
+ # Validate variables exist
1958
+ if x_var not in var_names:
1959
+ raise ValueError(f"Variable '{x_var}' not in search space")
1960
+ if y_var not in var_names:
1961
+ raise ValueError(f"Variable '{y_var}' not in search space")
1962
+
1963
+ # Get variable info (search_space.variables is a list)
1964
+ x_var_info = next(v for v in self.search_space.variables if v['name'] == x_var)
1965
+ y_var_info = next(v for v in self.search_space.variables if v['name'] == y_var)
1966
+
1967
+ if x_var_info['type'] != 'real':
1968
+ raise ValueError(f"X variable '{x_var}' must be 'real' type, got '{x_var_info['type']}'")
1969
+ if y_var_info['type'] != 'real':
1970
+ raise ValueError(f"Y variable '{y_var}' must be 'real' type, got '{y_var_info['type']}'")
1971
+
1972
+ # Get bounds
1973
+ x_bounds = (x_var_info['min'], x_var_info['max'])
1974
+ y_bounds = (y_var_info['min'], y_var_info['max'])
1975
+
1976
+ # Create meshgrid
1977
+ x = np.linspace(x_bounds[0], x_bounds[1], grid_resolution)
1978
+ y = np.linspace(y_bounds[0], y_bounds[1], grid_resolution)
1979
+ X_grid, Y_grid = np.meshgrid(x, y)
1980
+
1981
+ # Build prediction dataframe with ALL variables in proper order
1982
+ # Start with grid variables
1983
+ grid_data = {
1984
+ x_var: X_grid.ravel(),
1985
+ y_var: Y_grid.ravel()
1986
+ }
1987
+
1988
+ # Add fixed values for other variables
1989
+ for var in self.search_space.variables:
1990
+ var_name = var['name']
1991
+ if var_name in [x_var, y_var]:
1992
+ continue
1993
+
1994
+ if var_name in fixed_values:
1995
+ grid_data[var_name] = fixed_values[var_name]
1996
+ else:
1997
+ # Use default value
1998
+ if var['type'] in ['real', 'integer']:
1999
+ grid_data[var_name] = (var['min'] + var['max']) / 2
2000
+ elif var['type'] == 'categorical':
2001
+ grid_data[var_name] = var['values'][0]
2002
+
2003
+ # Create DataFrame with columns in the same order as original training data
2004
+ # This is critical for model preprocessing to work correctly
2005
+ if hasattr(self.model, 'original_feature_names') and self.model.original_feature_names:
2006
+ # Use the model's stored column order
2007
+ column_order = self.model.original_feature_names
2008
+ else:
2009
+ # Fall back to search space order
2010
+ column_order = self.search_space.get_variable_names()
2011
+
2012
+ grid_df = pd.DataFrame(grid_data, columns=column_order)
2013
+
2014
+ # Get predictions - use Session's predict method for consistency
2015
+ predictions, _ = self.predict(grid_df)
2016
+
2017
+ # Reshape to grid
2018
+ predictions_grid = predictions.reshape(X_grid.shape)
2019
+
2020
+ # Prepare experimental data for overlay
2021
+ exp_x = None
2022
+ exp_y = None
2023
+ if show_experiments and not self.experiment_manager.df.empty:
2024
+ exp_df = self.experiment_manager.df
2025
+ if x_var in exp_df.columns and y_var in exp_df.columns:
2026
+ exp_x = exp_df[x_var].values
2027
+ exp_y = exp_df[y_var].values
2028
+
2029
+ # Prepare suggestion data for overlay
2030
+ sugg_x = None
2031
+ sugg_y = None
2032
+ if show_suggestions and len(self.last_suggestions) > 0:
2033
+ # last_suggestions is a DataFrame
2034
+ if isinstance(self.last_suggestions, pd.DataFrame):
2035
+ sugg_df = self.last_suggestions
2036
+ else:
2037
+ sugg_df = pd.DataFrame(self.last_suggestions)
2038
+
2039
+ if x_var in sugg_df.columns and y_var in sugg_df.columns:
2040
+ sugg_x = sugg_df[x_var].values
2041
+ sugg_y = sugg_df[y_var].values
2042
+
2043
+ # Delegate to visualization module
2044
+ fig, ax, cbar = create_contour_plot(
2045
+ x_grid=X_grid,
2046
+ y_grid=Y_grid,
2047
+ predictions_grid=predictions_grid,
2048
+ x_var=x_var,
2049
+ y_var=y_var,
2050
+ exp_x=exp_x,
2051
+ exp_y=exp_y,
2052
+ suggest_x=sugg_x,
2053
+ suggest_y=sugg_y,
2054
+ cmap=cmap,
2055
+ figsize=figsize,
2056
+ dpi=dpi,
2057
+ title=title or "Contour Plot of Model Predictions"
2058
+ )
2059
+
2060
+ logger.info(f"Generated contour plot for {x_var} vs {y_var}")
2061
+ # Return figure only for backwards compatibility (colorbar accessible via fig/ax)
2062
+ return fig
2063
+
2064
+ def plot_voxel(
2065
+ self,
2066
+ x_var: str,
2067
+ y_var: str,
2068
+ z_var: str,
2069
+ fixed_values: Optional[Dict[str, Any]] = None,
2070
+ grid_resolution: int = 15,
2071
+ show_experiments: bool = True,
2072
+ show_suggestions: bool = False,
2073
+ cmap: str = 'viridis',
2074
+ alpha: float = 0.5,
2075
+ use_log_scale: bool = False,
2076
+ figsize: Tuple[float, float] = (10, 8),
2077
+ dpi: int = 100,
2078
+ title: Optional[str] = None
2079
+ ) -> Figure: # pyright: ignore[reportInvalidTypeForm]
2080
+ """
2081
+ Create 3D voxel plot of model predictions over a variable space.
2082
+
2083
+ Visualizes the model's predicted response surface by varying three variables
2084
+ while holding others constant. Uses volumetric rendering to show the 3D
2085
+ prediction landscape with adjustable transparency.
2086
+
2087
+ Args:
2088
+ x_var: Variable name for X axis (must be 'real' or 'integer' type)
2089
+ y_var: Variable name for Y axis (must be 'real' or 'integer' type)
2090
+ z_var: Variable name for Z axis (must be 'real' or 'integer' type)
2091
+ fixed_values: Dict of {var_name: value} for other variables.
2092
+ If not provided, uses midpoint for real/integer,
2093
+ first category for categorical.
2094
+ grid_resolution: Grid density (NxNxN points, default: 15)
2095
+ Note: 15³ = 3375 points, scales as N³
2096
+ show_experiments: Plot experimental data points as scatter
2097
+ show_suggestions: Plot last suggested points (if available)
2098
+ cmap: Matplotlib colormap name (e.g., 'viridis', 'coolwarm', 'plasma')
2099
+ alpha: Transparency level (0.0=transparent, 1.0=opaque, default: 0.5)
2100
+ Lower values reveal interior structure better
2101
+ use_log_scale: Use logarithmic color scale for values spanning orders of magnitude
2102
+ figsize: Figure size as (width, height) in inches
2103
+ dpi: Dots per inch for figure resolution
2104
+ title: Custom title (default: "3D Voxel Plot of Model Predictions")
2105
+
2106
+ Returns:
2107
+ matplotlib Figure object with 3D axes
2108
+
2109
+ Example:
2110
+ >>> # Basic 3D voxel plot
2111
+ >>> fig = session.plot_voxel('temperature', 'pressure', 'flow_rate')
2112
+
2113
+ >>> # With transparency to see interior
2114
+ >>> fig = session.plot_voxel(
2115
+ ... 'temperature', 'pressure', 'flow_rate',
2116
+ ... alpha=0.3,
2117
+ ... grid_resolution=20
2118
+ ... )
2119
+ >>> fig.savefig('voxel_plot.png', dpi=150, bbox_inches='tight')
2120
+
2121
+ >>> # With fixed values for other variables
2122
+ >>> fig = session.plot_voxel(
2123
+ ... 'temperature', 'pressure', 'flow_rate',
2124
+ ... fixed_values={'catalyst': 'Pt', 'pH': 7.0},
2125
+ ... cmap='coolwarm'
2126
+ ... )
2127
+
2128
+ Raises:
2129
+ ValueError: If search space doesn't have at least 3 continuous variables
2130
+
2131
+ Note:
2132
+ - Requires at least 3 'real' or 'integer' type variables
2133
+ - Model must be trained before plotting
2134
+ - Computationally expensive: O(N³) evaluations
2135
+ - Lower grid_resolution for faster rendering
2136
+ - Use alpha < 0.5 to see interior structure
2137
+ - Interactive rotation available in some backends (notebook)
2138
+ """
2139
+ self._check_matplotlib()
2140
+ self._check_model_trained()
2141
+
2142
+ if fixed_values is None:
2143
+ fixed_values = {}
2144
+
2145
+ # Get all variable names and check for continuous variables
2146
+ var_names = self.search_space.get_variable_names()
2147
+
2148
+ # Count continuous variables (real or integer)
2149
+ continuous_vars = []
2150
+ for var in self.search_space.variables:
2151
+ if var['type'] in ['real', 'integer']:
2152
+ continuous_vars.append(var['name'])
2153
+
2154
+ # Check if we have at least 3 continuous variables
2155
+ if len(continuous_vars) < 3:
2156
+ raise ValueError(
2157
+ f"3D voxel plot requires at least 3 continuous (real or integer) variables. "
2158
+ f"Found only {len(continuous_vars)}: {continuous_vars}. "
2159
+ f"Use plot_slice() for 1D or plot_contour() for 2D visualization instead."
2160
+ )
2161
+
2162
+ # Validate that the requested variables exist and are continuous
2163
+ for var_name, var_label in [(x_var, 'X'), (y_var, 'Y'), (z_var, 'Z')]:
2164
+ if var_name not in var_names:
2165
+ raise ValueError(f"{var_label} variable '{var_name}' not in search space")
2166
+
2167
+ var_def = next(v for v in self.search_space.variables if v['name'] == var_name)
2168
+ if var_def['type'] not in ['real', 'integer']:
2169
+ raise ValueError(
2170
+ f"{var_label} variable '{var_name}' must be 'real' or 'integer' type for voxel plot, "
2171
+ f"got '{var_def['type']}'"
2172
+ )
2173
+
2174
+ # Get variable definitions
2175
+ x_var_def = next(v for v in self.search_space.variables if v['name'] == x_var)
2176
+ y_var_def = next(v for v in self.search_space.variables if v['name'] == y_var)
2177
+ z_var_def = next(v for v in self.search_space.variables if v['name'] == z_var)
2178
+
2179
+ # Get bounds
2180
+ x_bounds = (x_var_def['min'], x_var_def['max'])
2181
+ y_bounds = (y_var_def['min'], y_var_def['max'])
2182
+ z_bounds = (z_var_def['min'], z_var_def['max'])
2183
+
2184
+ # Create 3D meshgrid
2185
+ x = np.linspace(x_bounds[0], x_bounds[1], grid_resolution)
2186
+ y = np.linspace(y_bounds[0], y_bounds[1], grid_resolution)
2187
+ z = np.linspace(z_bounds[0], z_bounds[1], grid_resolution)
2188
+ X_grid, Y_grid, Z_grid = np.meshgrid(x, y, z, indexing='ij')
2189
+
2190
+ # Build prediction dataframe with ALL variables in proper order
2191
+ grid_data = {
2192
+ x_var: X_grid.ravel(),
2193
+ y_var: Y_grid.ravel(),
2194
+ z_var: Z_grid.ravel()
2195
+ }
2196
+
2197
+ # Add fixed values for other variables
2198
+ for var in self.search_space.variables:
2199
+ var_name = var['name']
2200
+ if var_name in [x_var, y_var, z_var]:
2201
+ continue
2202
+
2203
+ if var_name in fixed_values:
2204
+ grid_data[var_name] = fixed_values[var_name]
2205
+ else:
2206
+ # Use default value
2207
+ if var['type'] in ['real', 'integer']:
2208
+ grid_data[var_name] = (var['min'] + var['max']) / 2
2209
+ elif var['type'] == 'categorical':
2210
+ grid_data[var_name] = var['values'][0]
2211
+
2212
+ # Create DataFrame with columns in correct order
2213
+ if hasattr(self.model, 'original_feature_names') and self.model.original_feature_names:
2214
+ column_order = self.model.original_feature_names
2215
+ else:
2216
+ column_order = self.search_space.get_variable_names()
2217
+
2218
+ grid_df = pd.DataFrame(grid_data, columns=column_order)
2219
+
2220
+ # Get predictions
2221
+ predictions, _ = self.predict(grid_df)
2222
+
2223
+ # Reshape to 3D grid
2224
+ predictions_grid = predictions.reshape(X_grid.shape)
2225
+
2226
+ # Prepare experimental data for overlay
2227
+ exp_x = None
2228
+ exp_y = None
2229
+ exp_z = None
2230
+ if show_experiments and not self.experiment_manager.df.empty:
2231
+ exp_df = self.experiment_manager.df
2232
+ if x_var in exp_df.columns and y_var in exp_df.columns and z_var in exp_df.columns:
2233
+ exp_x = exp_df[x_var].values
2234
+ exp_y = exp_df[y_var].values
2235
+ exp_z = exp_df[z_var].values
2236
+
2237
+ # Prepare suggestion data for overlay
2238
+ sugg_x = None
2239
+ sugg_y = None
2240
+ sugg_z = None
2241
+ if show_suggestions and len(self.last_suggestions) > 0:
2242
+ if isinstance(self.last_suggestions, pd.DataFrame):
2243
+ sugg_df = self.last_suggestions
2244
+ else:
2245
+ sugg_df = pd.DataFrame(self.last_suggestions)
2246
+
2247
+ if x_var in sugg_df.columns and y_var in sugg_df.columns and z_var in sugg_df.columns:
2248
+ sugg_x = sugg_df[x_var].values
2249
+ sugg_y = sugg_df[y_var].values
2250
+ sugg_z = sugg_df[z_var].values
2251
+
2252
+ # Delegate to visualization module
2253
+ from alchemist_core.visualization.plots import create_voxel_plot
2254
+
2255
+ fig, ax = create_voxel_plot(
2256
+ x_grid=X_grid,
2257
+ y_grid=Y_grid,
2258
+ z_grid=Z_grid,
2259
+ predictions_grid=predictions_grid,
2260
+ x_var=x_var,
2261
+ y_var=y_var,
2262
+ z_var=z_var,
2263
+ exp_x=exp_x,
2264
+ exp_y=exp_y,
2265
+ exp_z=exp_z,
2266
+ suggest_x=sugg_x,
2267
+ suggest_y=sugg_y,
2268
+ suggest_z=sugg_z,
2269
+ cmap=cmap,
2270
+ alpha=alpha,
2271
+ use_log_scale=use_log_scale,
2272
+ figsize=figsize,
2273
+ dpi=dpi,
2274
+ title=title or "3D Voxel Plot of Model Predictions"
2275
+ )
2276
+
2277
+ logger.info(f"Generated 3D voxel plot for {x_var} vs {y_var} vs {z_var}")
2278
+ return fig
2279
+
2280
+ def plot_metrics(
2281
+ self,
2282
+ metric: Literal['rmse', 'mae', 'r2', 'mape'] = 'rmse',
2283
+ cv_splits: int = 5,
2284
+ figsize: Tuple[float, float] = (8, 6),
2285
+ dpi: int = 100,
2286
+ use_cached: bool = True
2287
+ ) -> Figure: # pyright: ignore[reportInvalidTypeForm]
2288
+ """
2289
+ Plot cross-validation metrics as a function of training set size.
2290
+
2291
+ Shows how model performance improves as more experimental data is added.
2292
+ This evaluates the model at each training set size from 5 observations up to
2293
+ the current total, providing insight into data efficiency and whether more
2294
+ experiments are needed.
2295
+
2296
+ Args:
2297
+ metric: Which metric to plot ('rmse', 'mae', 'r2', or 'mape')
2298
+ cv_splits: Number of cross-validation folds (default: 5)
2299
+ figsize: Figure size as (width, height) in inches
2300
+ dpi: Dots per inch for figure resolution
2301
+ use_cached: Use cached metrics if available (default: True)
2302
+
2303
+ Returns:
2304
+ matplotlib Figure object
2305
+
2306
+ Example:
2307
+ >>> # Plot RMSE vs number of experiments
2308
+ >>> fig = session.plot_metrics('rmse')
2309
+
2310
+ >>> # Plot R² to see improvement
2311
+ >>> fig = session.plot_metrics('r2')
2312
+
2313
+ >>> # Force recomputation of metrics
2314
+ >>> fig = session.plot_metrics('rmse', use_cached=False)
2315
+
2316
+ Note:
2317
+ Calls model.evaluate() if metrics not cached, which can be computationally
2318
+ expensive for large datasets. Set use_cached=False to force recomputation.
2319
+ """
2320
+ self._check_matplotlib()
2321
+ self._check_model_trained()
2322
+
2323
+ # Need at least 5 observations for CV
2324
+ n_total = len(self.experiment_manager.df)
2325
+ if n_total < 5:
2326
+ raise ValueError(f"Need at least 5 observations for metrics plot (have {n_total})")
2327
+
2328
+ # Check for cached metrics first
2329
+ cache_key = f'_cached_cv_metrics_{cv_splits}'
2330
+ if use_cached and hasattr(self.model, cache_key):
2331
+ cv_metrics = getattr(self.model, cache_key)
2332
+ logger.info(f"Using cached CV metrics for {metric.upper()}")
2333
+ else:
2334
+ # Call model's evaluate method to get metrics over training sizes
2335
+ logger.info(f"Computing {metric.upper()} over training set sizes (this may take a moment)...")
2336
+ cv_metrics = self.model.evaluate(
2337
+ self.experiment_manager,
2338
+ cv_splits=cv_splits,
2339
+ debug=False
2340
+ )
2341
+ # Cache the results
2342
+ setattr(self.model, cache_key, cv_metrics)
2343
+
2344
+ # Extract the requested metric
2345
+ metric_key_map = {
2346
+ 'rmse': 'RMSE',
2347
+ 'mae': 'MAE',
2348
+ 'r2': 'R²',
2349
+ 'mape': 'MAPE'
2350
+ }
2351
+
2352
+ if metric not in metric_key_map:
2353
+ raise ValueError(f"Unknown metric '{metric}'. Choose from: {list(metric_key_map.keys())}")
2354
+
2355
+ metric_key = metric_key_map[metric]
2356
+ metric_values = cv_metrics.get(metric_key, [])
2357
+
2358
+ if not metric_values:
2359
+ raise RuntimeError(f"Model did not return {metric_key} values from evaluate()")
2360
+
2361
+ # X-axis: training set sizes (starts at 5)
2362
+ x_range = np.arange(5, len(metric_values) + 5)
2363
+ metric_array = np.array(metric_values)
2364
+
2365
+ # Delegate to visualization module
2366
+ fig, ax = create_metrics_plot(
2367
+ training_sizes=x_range,
2368
+ metric_values=metric_array,
2369
+ metric_name=metric,
2370
+ figsize=figsize,
2371
+ dpi=dpi
2372
+ )
2373
+
2374
+ logger.info(f"Generated {metric} metrics plot with {len(metric_values)} points")
2375
+ return fig
2376
+
2377
+ def plot_qq(
2378
+ self,
2379
+ use_calibrated: bool = False,
2380
+ figsize: Tuple[float, float] = (8, 6),
2381
+ dpi: int = 100,
2382
+ title: Optional[str] = None
2383
+ ) -> Figure: # pyright: ignore[reportInvalidTypeForm]
2384
+ """
2385
+ Create Q-Q (quantile-quantile) plot for model residuals normality check.
2386
+
2387
+ Visualizes whether the model's prediction errors (residuals) follow a normal
2388
+ distribution. Points should lie close to the diagonal line if residuals are
2389
+ normally distributed, which is an assumption of Gaussian Process models.
2390
+
2391
+ Args:
2392
+ use_calibrated: Use calibrated uncertainty estimates if available
2393
+ figsize: Figure size as (width, height) in inches
2394
+ dpi: Dots per inch for figure resolution
2395
+ title: Custom title (default: "Q-Q Plot: Residuals Normality Check")
2396
+
2397
+ Returns:
2398
+ matplotlib Figure object
2399
+
2400
+ Example:
2401
+ >>> # Check if residuals are normally distributed
2402
+ >>> fig = session.plot_qq()
2403
+ >>> fig.savefig('qq_plot.png')
2404
+
2405
+ >>> # Use calibrated predictions if available
2406
+ >>> fig = session.plot_qq(use_calibrated=True)
2407
+
2408
+ Note:
2409
+ - Requires model to be trained with cross-validation
2410
+ - Significant deviations from the diagonal suggest non-normal residuals
2411
+ - Useful for diagnosing model assumptions and identifying outliers
2412
+ """
2413
+ self._check_matplotlib()
2414
+ self._check_model_trained()
2415
+
2416
+ # Get CV results
2417
+ cv_results = self._check_cv_results(use_calibrated)
2418
+ y_true = cv_results['y_true']
2419
+ y_pred = cv_results['y_pred']
2420
+ y_std = cv_results.get('y_std', None)
2421
+
2422
+ # Compute standardized residuals (z-scores)
2423
+ residuals = y_true - y_pred
2424
+ if y_std is not None and len(y_std) > 0:
2425
+ z_scores = residuals / y_std
2426
+ else:
2427
+ # Fallback: standardize by residual standard deviation
2428
+ z_scores = residuals / np.std(residuals)
2429
+
2430
+ # Delegate to visualization module
2431
+ fig, ax = create_qq_plot(
2432
+ z_scores=z_scores,
2433
+ figsize=figsize,
2434
+ dpi=dpi,
2435
+ title=title
2436
+ )
2437
+
2438
+ logger.info("Generated Q-Q plot for residuals")
2439
+ return fig
2440
+
2441
+ def plot_calibration(
2442
+ self,
2443
+ use_calibrated: bool = False,
2444
+ n_bins: int = 10,
2445
+ figsize: Tuple[float, float] = (8, 6),
2446
+ dpi: int = 100,
2447
+ title: Optional[str] = None
2448
+ ) -> Figure: # pyright: ignore[reportInvalidTypeForm]
2449
+ """
2450
+ Create calibration plot showing reliability of uncertainty estimates.
2451
+
2452
+ Compares predicted confidence intervals to actual coverage. For well-calibrated
2453
+ models, a 68% confidence interval should contain ~68% of true values, 95% should
2454
+ contain ~95%, etc. This plot helps diagnose if the model's uncertainty estimates
2455
+ are too narrow (overconfident) or too wide (underconfident).
2456
+
2457
+ Args:
2458
+ use_calibrated: Use calibrated uncertainty estimates if available
2459
+ n_bins: Number of bins for grouping predictions (default: 10)
2460
+ figsize: Figure size as (width, height) in inches
2461
+ dpi: Dots per inch for figure resolution
2462
+ title: Custom title (default: "Calibration Plot: Uncertainty Reliability")
2463
+
2464
+ Returns:
2465
+ matplotlib Figure object
2466
+
2467
+ Example:
2468
+ >>> # Check if uncertainty estimates are reliable
2469
+ >>> fig = session.plot_calibration()
2470
+ >>> fig.savefig('calibration_plot.png')
2471
+
2472
+ >>> # With more bins for finer resolution
2473
+ >>> fig = session.plot_calibration(n_bins=20)
2474
+
2475
+ Note:
2476
+ - Requires model to be trained with cross-validation and provide uncertainties
2477
+ - Points above diagonal = model is underconfident (intervals too wide)
2478
+ - Points below diagonal = model is overconfident (intervals too narrow)
2479
+ - Well-calibrated models have points close to the diagonal
2480
+ """
2481
+ self._check_matplotlib()
2482
+ self._check_model_trained()
2483
+
2484
+ # Get CV results
2485
+ cv_results = self._check_cv_results(use_calibrated)
2486
+ y_true = cv_results['y_true']
2487
+ y_pred = cv_results['y_pred']
2488
+ y_std = cv_results.get('y_std', None)
2489
+
2490
+ if y_std is None:
2491
+ raise ValueError(
2492
+ "Model does not provide uncertainty estimates (y_std). "
2493
+ "Calibration plot requires uncertainty predictions."
2494
+ )
2495
+
2496
+ # Compute calibration curve data
2497
+ from scipy import stats
2498
+
2499
+ # Compute empirical coverage for a range of nominal probabilities
2500
+ nominal_probs = np.arange(0.10, 1.00, 0.05)
2501
+ empirical_coverage = []
2502
+
2503
+ for prob in nominal_probs:
2504
+ # Convert probability to sigma multiplier
2505
+ sigma = stats.norm.ppf((1 + prob) / 2)
2506
+
2507
+ # Compute empirical coverage at this sigma level
2508
+ lower_bound = y_pred - sigma * y_std
2509
+ upper_bound = y_pred + sigma * y_std
2510
+ within_interval = (y_true >= lower_bound) & (y_true <= upper_bound)
2511
+ empirical_coverage.append(np.mean(within_interval))
2512
+
2513
+ empirical_coverage = np.array(empirical_coverage)
2514
+
2515
+ # Delegate to visualization module
2516
+ fig, ax = create_calibration_plot(
2517
+ nominal_probs=nominal_probs,
2518
+ empirical_coverage=empirical_coverage,
2519
+ figsize=figsize,
2520
+ dpi=dpi,
2521
+ title=title or "Calibration Plot: Uncertainty Reliability"
2522
+ )
2523
+
2524
+ logger.info("Generated calibration plot for uncertainty estimates")
2525
+ return fig
2526
+
2527
+ def plot_regret(
2528
+ self,
2529
+ goal: Literal['maximize', 'minimize'] = 'maximize',
2530
+ include_predictions: bool = True,
2531
+ show_cumulative: bool = False,
2532
+ backend: Optional[str] = None,
2533
+ kernel: Optional[str] = None,
2534
+ n_grid_points: int = 1000,
2535
+ sigma_bands: Optional[List[float]] = None,
2536
+ start_iteration: int = 5,
2537
+ reuse_hyperparameters: bool = True,
2538
+ use_calibrated_uncertainty: bool = False,
2539
+ figsize: Tuple[float, float] = (8, 6),
2540
+ dpi: int = 100,
2541
+ title: Optional[str] = None
2542
+ ) -> Figure: # pyright: ignore[reportInvalidTypeForm]
2543
+ """
2544
+ Plot optimization progress (regret curve).
2545
+
2546
+ Shows the best value found as a function of iteration number. The curve
2547
+ displays cumulative best results and all observed values, providing insight
2548
+ into optimization convergence.
2549
+
2550
+ A flattening curve indicates the optimization is converging (no further
2551
+ improvements being found). This is useful for determining when to stop
2552
+ an optimization campaign.
2553
+
2554
+ Optionally overlays the model's predicted best value (max posterior mean)
2555
+ with uncertainty bands, showing where the model believes the optimum lies.
2556
+
2557
+ Args:
2558
+ goal: 'maximize' or 'minimize' - which direction to optimize
2559
+ include_predictions: Whether to overlay max(posterior mean) with uncertainty bands
2560
+ backend: Model backend ('sklearn' or 'botorch'). Uses session default if None.
2561
+ kernel: Kernel type ('RBF', 'Matern', etc.). Uses session default if None.
2562
+ n_grid_points: Number of points to evaluate for finding max posterior mean
2563
+ sigma_bands: List of sigma values for uncertainty bands (e.g., [1.0, 2.0])
2564
+ start_iteration: First iteration to compute predictions (needs enough data)
2565
+ reuse_hyperparameters: Reuse final model's hyperparameters (faster, default True)
2566
+ use_calibrated_uncertainty: If True, apply calibration to uncertainties. If False,
2567
+ use raw GP uncertainties. Default False recommended for convergence assessment
2568
+ since raw uncertainties better reflect model's internal convergence. Set True
2569
+ for realistic prediction intervals that account for model miscalibration.
2570
+ figsize: Figure size as (width, height) in inches
2571
+ dpi: Dots per inch for figure resolution
2572
+ title: Custom plot title (auto-generated if None)
2573
+
2574
+ Returns:
2575
+ matplotlib Figure object
2576
+
2577
+ Example:
2578
+ >>> # For a maximization problem
2579
+ >>> fig = session.plot_regret(goal='maximize')
2580
+ >>> fig.savefig('optimization_progress.png')
2581
+
2582
+ >>> # With custom uncertainty bands (±1σ, ±2σ)
2583
+ >>> fig = session.plot_regret(goal='maximize', sigma_bands=[1.0, 2.0])
2584
+
2585
+ >>> # For a minimization problem
2586
+ >>> fig = session.plot_regret(goal='minimize')
2587
+
2588
+ Note:
2589
+ - Requires at least 2 experiments
2590
+ - Also known as "simple regret" or "incumbent trajectory"
2591
+ - Best used to visualize overall optimization progress
2592
+ """
2593
+ self._check_matplotlib()
2594
+
2595
+ # Check we have experiments
2596
+ n_exp = len(self.experiment_manager.df)
2597
+ if n_exp < 2:
2598
+ raise ValueError(f"Need at least 2 experiments for regret plot (have {n_exp})")
2599
+
2600
+ # Get observed values and create iteration array (1-based for user clarity)
2601
+ # Use first target column (single-objective optimization)
2602
+ target_col = self.experiment_manager.target_columns[0]
2603
+ observed_values = self.experiment_manager.df[target_col].values
2604
+ iterations = np.arange(1, n_exp + 1) # 1-based: [1, 2, 3, ..., n]
2605
+
2606
+ # Compute posterior predictions if requested
2607
+ predicted_means = None
2608
+ predicted_stds = None
2609
+
2610
+ if include_predictions and n_exp >= start_iteration:
2611
+ try:
2612
+ predicted_means, predicted_stds = self._compute_posterior_predictions(
2613
+ goal=goal,
2614
+ backend=backend,
2615
+ kernel=kernel,
2616
+ n_grid_points=n_grid_points,
2617
+ start_iteration=start_iteration,
2618
+ reuse_hyperparameters=reuse_hyperparameters,
2619
+ use_calibrated_uncertainty=use_calibrated_uncertainty
2620
+ )
2621
+ except Exception as e:
2622
+ logger.warning(f"Could not compute posterior predictions: {e}. Plotting observations only.")
2623
+
2624
+ # Import visualization function
2625
+ from alchemist_core.visualization.plots import create_regret_plot
2626
+
2627
+ # Delegate to visualization module
2628
+ fig, ax = create_regret_plot(
2629
+ iterations=iterations,
2630
+ observed_values=observed_values,
2631
+ show_cumulative=show_cumulative,
2632
+ goal=goal,
2633
+ predicted_means=predicted_means,
2634
+ predicted_stds=predicted_stds,
2635
+ sigma_bands=sigma_bands,
2636
+ figsize=figsize,
2637
+ dpi=dpi,
2638
+ title=title
2639
+ )
2640
+
2641
+ logger.info(f"Generated regret plot with {n_exp} experiments")
2642
+ return fig
2643
+
2644
+ def _generate_prediction_grid(self, n_grid_points: int) -> pd.DataFrame:
2645
+ """
2646
+ Generate grid of test points across search space for predictions.
2647
+
2648
+ Args:
2649
+ n_grid_points: Target number of grid points (actual number depends on dimensionality)
2650
+
2651
+ Returns:
2652
+ DataFrame with columns for each variable
2653
+ """
2654
+ grid_1d = []
2655
+ var_names = []
2656
+
2657
+ for var in self.search_space.variables:
2658
+ var_names.append(var['name'])
2659
+
2660
+ if var['type'] == 'real':
2661
+ # Continuous: linspace
2662
+ n_per_dim = int(n_grid_points ** (1/len(self.search_space.variables)))
2663
+ grid_1d.append(np.linspace(var['min'], var['max'], n_per_dim))
2664
+ elif var['type'] == 'integer':
2665
+ # Integer: range of integers
2666
+ n_per_dim = int(n_grid_points ** (1/len(self.search_space.variables)))
2667
+ grid_1d.append(np.linspace(var['min'], var['max'], n_per_dim).astype(int))
2668
+ else:
2669
+ # Categorical: use actual category values
2670
+ grid_1d.append(var['values'])
2671
+
2672
+ # Generate test points using Cartesian product
2673
+ from itertools import product
2674
+ X_test_tuples = list(product(*grid_1d))
2675
+
2676
+ # Convert to DataFrame with proper variable names and types
2677
+ grid = pd.DataFrame(X_test_tuples, columns=var_names)
2678
+
2679
+ # Ensure correct dtypes for categorical variables
2680
+ for var in self.search_space.variables:
2681
+ if var['type'] == 'categorical':
2682
+ grid[var['name']] = grid[var['name']].astype(str)
2683
+
2684
+ return grid
2685
+
2686
+ def _compute_posterior_predictions(
2687
+ self,
2688
+ goal: str,
2689
+ backend: Optional[str],
2690
+ kernel: Optional[str],
2691
+ n_grid_points: int,
2692
+ start_iteration: int,
2693
+ reuse_hyperparameters: bool,
2694
+ use_calibrated_uncertainty: bool
2695
+ ) -> Tuple[np.ndarray, np.ndarray]:
2696
+ """
2697
+ Compute max(posterior mean) and corresponding std at each iteration.
2698
+
2699
+ Helper method for regret plot to overlay model predictions with uncertainty.
2700
+
2701
+ IMPORTANT: When reuse_hyperparameters=True, this uses the final model's
2702
+ hyperparameters for ALL iterations by creating fresh GP models with those
2703
+ hyperparameters and subsets of data. This avoids numerical instability from
2704
+ repeated MLE optimization.
2705
+
2706
+ Returns:
2707
+ Tuple of (predicted_means, predicted_stds) arrays, same length as n_experiments
2708
+ """
2709
+ n_exp = len(self.experiment_manager.df)
2710
+
2711
+ # Initialize arrays (NaN for iterations before start_iteration)
2712
+ predicted_means = np.full(n_exp, np.nan)
2713
+ predicted_stds = np.full(n_exp, np.nan)
2714
+
2715
+ # Determine backend and kernel
2716
+ if backend is None:
2717
+ if self.model is None or not self.model.is_trained:
2718
+ raise ValueError("No trained model in session. Train a model first or specify backend/kernel.")
2719
+ backend = self.model_backend
2720
+
2721
+ if kernel is None:
2722
+ if self.model is None or not self.model.is_trained:
2723
+ raise ValueError("No trained model in session. Train a model first or specify backend/kernel.")
2724
+ if backend == 'sklearn':
2725
+ kernel = self.model.kernel_options.get('kernel_type', 'RBF')
2726
+ elif backend == 'botorch':
2727
+ # BoTorchModel stores kernel type in cont_kernel_type
2728
+ kernel = getattr(self.model, 'cont_kernel_type', 'Matern')
2729
+
2730
+ # Extract optimized state_dict for botorch or kernel params for sklearn
2731
+ optimized_state_dict = None
2732
+ optimized_kernel_params = None
2733
+ if reuse_hyperparameters and self.model is not None and self.model.is_trained:
2734
+ if backend == 'sklearn':
2735
+ optimized_kernel_params = self.model.optimized_kernel.get_params()
2736
+ elif backend == 'botorch':
2737
+ # Store the fitted state dict from the final model
2738
+ optimized_state_dict = self.model.fitted_state_dict
2739
+
2740
+ # Generate grid for predictions
2741
+ grid = self._generate_prediction_grid(n_grid_points)
2742
+
2743
+ # Get full dataset
2744
+ full_df = self.experiment_manager.df
2745
+ target_col = self.experiment_manager.target_columns[0]
2746
+
2747
+ # Suppress INFO logging for temp sessions to avoid spam
2748
+ import logging
2749
+ original_session_level = logger.level
2750
+ original_model_level = logging.getLogger('alchemist_core.models.botorch_model').level
2751
+ logger.setLevel(logging.WARNING)
2752
+ logging.getLogger('alchemist_core.models.botorch_model').setLevel(logging.WARNING)
2753
+
2754
+ # Loop through iterations
2755
+ for i in range(start_iteration, n_exp + 1):
2756
+ try:
2757
+ # Create temporary session with subset of data
2758
+ temp_session = OptimizationSession()
2759
+
2760
+ # Directly assign search space to avoid logging spam
2761
+ temp_session.search_space = self.search_space
2762
+ temp_session.experiment_manager.set_search_space(self.search_space)
2763
+
2764
+ # Add subset of experiments
2765
+ for idx in range(i):
2766
+ row = full_df.iloc[idx]
2767
+ inputs = {var['name']: row[var['name']] for var in self.experiment_manager.search_space.variables}
2768
+ temp_session.add_experiment(inputs, output=row[target_col])
2769
+
2770
+ # Train model on subset using SAME approach for all iterations
2771
+ if backend == 'sklearn':
2772
+ # Create model instance
2773
+ from alchemist_core.models.sklearn_model import SklearnModel
2774
+ temp_model = SklearnModel(kernel_options={'kernel_type': kernel})
2775
+
2776
+ if reuse_hyperparameters and optimized_kernel_params is not None:
2777
+ # Override n_restarts to disable optimization
2778
+ temp_model.n_restarts_optimizer = 0
2779
+ temp_model._custom_optimizer = None
2780
+ # Store the optimized kernel to use
2781
+ from sklearn.base import clone
2782
+ temp_model._reuse_kernel = clone(self.model.optimized_kernel)
2783
+
2784
+ # Attach model and train
2785
+ temp_session.model = temp_model
2786
+ temp_session.model_backend = 'sklearn'
2787
+
2788
+ # Train WITHOUT recomputing calibration (if reusing hyperparameters)
2789
+ if reuse_hyperparameters:
2790
+ temp_model.train(temp_session.experiment_manager, calibrate_uncertainty=False)
2791
+ # Transfer calibration factor from final model
2792
+ if hasattr(self.model, 'calibration_factor'):
2793
+ temp_model.calibration_factor = self.model.calibration_factor
2794
+ # Enable calibration only if user requested calibrated uncertainties
2795
+ temp_model.calibration_enabled = use_calibrated_uncertainty
2796
+ else:
2797
+ temp_model.train(temp_session.experiment_manager)
2798
+
2799
+ # Verify model was trained
2800
+ if not temp_model.is_trained:
2801
+ raise ValueError(f"Model training failed at iteration {i}")
2802
+ if temp_session.model is None:
2803
+ raise ValueError(f"temp_session.model is None after training at iteration {i}")
2804
+
2805
+ elif backend == 'botorch':
2806
+ # For BoTorch: create a fresh model and load the fitted hyperparameters
2807
+ from alchemist_core.models.botorch_model import BoTorchModel
2808
+ import torch
2809
+
2810
+ # Create model instance with same configuration as original model
2811
+ kernel_opts = {'cont_kernel_type': kernel}
2812
+ if hasattr(self.model, 'matern_nu'):
2813
+ kernel_opts['matern_nu'] = self.model.matern_nu
2814
+
2815
+ temp_model = BoTorchModel(
2816
+ kernel_options=kernel_opts,
2817
+ input_transform_type=self.model.input_transform_type if hasattr(self.model, 'input_transform_type') else 'normalize',
2818
+ output_transform_type=self.model.output_transform_type if hasattr(self.model, 'output_transform_type') else 'standardize'
2819
+ )
2820
+
2821
+ # Train model on subset (this creates the GP with subset of data)
2822
+ # Disable calibration computation if reusing hyperparameters
2823
+ if reuse_hyperparameters:
2824
+ temp_model.train(temp_session.experiment_manager, calibrate_uncertainty=False)
2825
+ else:
2826
+ temp_model.train(temp_session.experiment_manager)
2827
+
2828
+ # Apply optimized hyperparameters from final model to trained subset model
2829
+ # Only works for simple kernel structures (no categorical variables)
2830
+ if reuse_hyperparameters and optimized_state_dict is not None:
2831
+ try:
2832
+ with torch.no_grad():
2833
+ # Extract hyperparameters from final model
2834
+ # This only works for ScaleKernel(base_kernel), not AdditiveKernel
2835
+ final_lengthscale = self.model.model.covar_module.base_kernel.lengthscale.detach().clone()
2836
+ final_outputscale = self.model.model.covar_module.outputscale.detach().clone()
2837
+ final_noise = self.model.model.likelihood.noise.detach().clone()
2838
+
2839
+ # Set hyperparameters in temp model (trained on subset)
2840
+ temp_model.model.covar_module.base_kernel.lengthscale = final_lengthscale
2841
+ temp_model.model.covar_module.outputscale = final_outputscale
2842
+ temp_model.model.likelihood.noise = final_noise
2843
+ except AttributeError:
2844
+ # If kernel structure is complex (e.g., has categorical variables),
2845
+ # skip hyperparameter reuse - fall back to each iteration's own optimization
2846
+ pass
2847
+
2848
+ # Transfer calibration factor from final model (even if hyperparameters couldn't be transferred)
2849
+ # This ensures last iteration matches final model exactly
2850
+ if reuse_hyperparameters and hasattr(self.model, 'calibration_factor'):
2851
+ temp_model.calibration_factor = self.model.calibration_factor
2852
+ # Enable calibration only if user requested calibrated uncertainties
2853
+ temp_model.calibration_enabled = use_calibrated_uncertainty
2854
+
2855
+ # Attach to session
2856
+ temp_session.model = temp_model
2857
+ temp_session.model_backend = 'botorch'
2858
+
2859
+ # Predict on grid using temp_session.predict (consistent for all iterations)
2860
+ result = temp_session.predict(grid)
2861
+ if result is None:
2862
+ raise ValueError(f"predict() returned None at iteration {i}")
2863
+ means, stds = result
2864
+
2865
+ # Find max mean (or min for minimization)
2866
+ if goal.lower() == 'maximize':
2867
+ best_idx = np.argmax(means)
2868
+ else:
2869
+ best_idx = np.argmin(means)
2870
+
2871
+ predicted_means[i - 1] = means[best_idx]
2872
+ predicted_stds[i - 1] = stds[best_idx]
2873
+
2874
+ except Exception as e:
2875
+ import traceback
2876
+ logger.warning(f"Failed to compute predictions for iteration {i}: {e}")
2877
+ logger.debug(traceback.format_exc())
2878
+ # Leave as NaN
2879
+
2880
+ # Restore original logging levels
2881
+ logger.setLevel(original_session_level)
2882
+ logging.getLogger('alchemist_core.models.botorch_model').setLevel(original_model_level)
2883
+
2884
+ return predicted_means, predicted_stds
2885
+
2886
+ def plot_acquisition_slice(
2887
+ self,
2888
+ x_var: str,
2889
+ acq_func: str = 'ei',
2890
+ fixed_values: Optional[Dict[str, Any]] = None,
2891
+ n_points: int = 100,
2892
+ acq_func_kwargs: Optional[Dict[str, Any]] = None,
2893
+ goal: str = 'maximize',
2894
+ show_experiments: bool = True,
2895
+ show_suggestions: bool = True,
2896
+ figsize: Tuple[float, float] = (8, 6),
2897
+ dpi: int = 100,
2898
+ title: Optional[str] = None
2899
+ ) -> Figure: # pyright: ignore[reportInvalidTypeForm]
2900
+ """
2901
+ Create 1D slice plot showing acquisition function along one variable.
2902
+
2903
+ Visualizes how the acquisition function value changes as one variable is varied
2904
+ while all other variables are held constant. This shows which regions along that
2905
+ variable axis are most promising for the next experiment.
2906
+
2907
+ Args:
2908
+ x_var: Variable name to vary along X axis (must be 'real' or 'integer')
2909
+ acq_func: Acquisition function name ('ei', 'pi', 'ucb', 'logei', 'logpi')
2910
+ fixed_values: Dict of {var_name: value} for other variables.
2911
+ If not provided, uses midpoint for real/integer,
2912
+ first category for categorical.
2913
+ n_points: Number of points to evaluate along the slice
2914
+ acq_func_kwargs: Additional acquisition parameters (xi, kappa, beta)
2915
+ goal: 'maximize' or 'minimize' - optimization direction
2916
+ show_experiments: Plot experimental data points as scatter
2917
+ show_suggestions: Plot last suggested points (if available)
2918
+ figsize: Figure size as (width, height) in inches
2919
+ dpi: Dots per inch for figure resolution
2920
+ title: Custom title (default: auto-generated)
2921
+
2922
+ Returns:
2923
+ matplotlib Figure object
2924
+
2925
+ Example:
2926
+ >>> # Visualize Expected Improvement along temperature
2927
+ >>> fig = session.plot_acquisition_slice(
2928
+ ... 'temperature',
2929
+ ... acq_func='ei',
2930
+ ... fixed_values={'pressure': 5.0, 'catalyst': 'Pt'}
2931
+ ... )
2932
+ >>> fig.savefig('acq_slice.png', dpi=300)
2933
+
2934
+ >>> # See where UCB is highest
2935
+ >>> fig = session.plot_acquisition_slice(
2936
+ ... 'pressure',
2937
+ ... acq_func='ucb',
2938
+ ... acq_func_kwargs={'beta': 0.5}
2939
+ ... )
2940
+
2941
+ Note:
2942
+ - Model must be trained before plotting
2943
+ - Higher acquisition values indicate more promising regions
2944
+ - Use this to understand where the algorithm wants to explore next
2945
+ """
2946
+ self._check_matplotlib()
2947
+ self._check_model_trained()
2948
+
2949
+ from alchemist_core.utils.acquisition_utils import evaluate_acquisition
2950
+ from alchemist_core.visualization.plots import create_slice_plot
2951
+
2952
+ if fixed_values is None:
2953
+ fixed_values = {}
2954
+
2955
+ # Get variable info
2956
+ var_names = self.search_space.get_variable_names()
2957
+ if x_var not in var_names:
2958
+ raise ValueError(f"Variable '{x_var}' not in search space")
2959
+
2960
+ # Get x variable definition
2961
+ x_var_def = next(v for v in self.search_space.variables if v['name'] == x_var)
2962
+
2963
+ if x_var_def['type'] not in ['real', 'integer']:
2964
+ raise ValueError(f"Variable '{x_var}' must be 'real' or 'integer' type for slice plot")
2965
+
2966
+ # Create range for x variable
2967
+ x_min, x_max = x_var_def['min'], x_var_def['max']
2968
+ x_values = np.linspace(x_min, x_max, n_points)
2969
+
2970
+ # Build acquisition evaluation grid
2971
+ slice_data = {x_var: x_values}
2972
+
2973
+ for var in self.search_space.variables:
2974
+ var_name = var['name']
2975
+ if var_name == x_var:
2976
+ continue
2977
+
2978
+ if var_name in fixed_values:
2979
+ slice_data[var_name] = fixed_values[var_name]
2980
+ else:
2981
+ # Use default value
2982
+ if var['type'] in ['real', 'integer']:
2983
+ slice_data[var_name] = (var['min'] + var['max']) / 2
2984
+ elif var['type'] == 'categorical':
2985
+ slice_data[var_name] = var['values'][0]
2986
+
2987
+ # Create DataFrame with correct column order
2988
+ if hasattr(self.model, 'original_feature_names') and self.model.original_feature_names:
2989
+ column_order = self.model.original_feature_names
2990
+ else:
2991
+ column_order = self.search_space.get_variable_names()
2992
+
2993
+ slice_df = pd.DataFrame(slice_data, columns=column_order)
2994
+
2995
+ # Evaluate acquisition function
2996
+ acq_values, _ = evaluate_acquisition(
2997
+ self.model,
2998
+ slice_df,
2999
+ acq_func=acq_func,
3000
+ acq_func_kwargs=acq_func_kwargs,
3001
+ goal=goal
3002
+ )
3003
+
3004
+ # Prepare experimental data for plotting
3005
+ exp_x = None
3006
+ exp_y = None
3007
+ if show_experiments and len(self.experiment_manager.df) > 0:
3008
+ df = self.experiment_manager.df
3009
+
3010
+ # Filter points that match the fixed values
3011
+ mask = pd.Series([True] * len(df))
3012
+ for var_name, fixed_val in fixed_values.items():
3013
+ if var_name in df.columns:
3014
+ if isinstance(fixed_val, str):
3015
+ mask &= (df[var_name] == fixed_val)
3016
+ else:
3017
+ mask &= np.isclose(df[var_name], fixed_val, atol=1e-6)
3018
+
3019
+ if mask.any():
3020
+ filtered_df = df[mask]
3021
+ exp_x = filtered_df[x_var].values
3022
+ # For acquisition, we just mark where experiments exist (no y-value)
3023
+ exp_y = np.zeros_like(exp_x)
3024
+
3025
+ # Prepare suggestion data
3026
+ sugg_x = None
3027
+ if show_suggestions and len(self.last_suggestions) > 0:
3028
+ if isinstance(self.last_suggestions, pd.DataFrame):
3029
+ sugg_df = self.last_suggestions
3030
+ else:
3031
+ sugg_df = pd.DataFrame(self.last_suggestions)
3032
+
3033
+ if x_var in sugg_df.columns:
3034
+ sugg_x = sugg_df[x_var].values
3035
+
3036
+ # Generate title if not provided
3037
+ if title is None:
3038
+ acq_name = acq_func.upper()
3039
+ if fixed_values:
3040
+ fixed_str = ', '.join([f'{k}={v}' for k, v in fixed_values.items()])
3041
+ title = f"Acquisition Function ({acq_name}): {x_var}\n({fixed_str})"
3042
+ else:
3043
+ title = f"Acquisition Function ({acq_name}): {x_var}"
3044
+
3045
+ # Use create_slice_plot but with acquisition values
3046
+ # Note: We pass None for std since acquisition functions are deterministic
3047
+ fig, ax = create_slice_plot(
3048
+ x_values=x_values,
3049
+ predictions=acq_values,
3050
+ x_var=x_var,
3051
+ std=None,
3052
+ sigma_bands=None, # No uncertainty for acquisition
3053
+ exp_x=exp_x,
3054
+ exp_y=None, # Don't show experiment y-values for acquisition
3055
+ figsize=figsize,
3056
+ dpi=dpi,
3057
+ title=title,
3058
+ prediction_label=acq_func.upper(),
3059
+ line_color='darkgreen',
3060
+ line_width=1.5
3061
+ )
3062
+
3063
+ # Add green fill under acquisition curve
3064
+ ax.fill_between(x_values, 0, acq_values, alpha=0.3, color='green', zorder=0)
3065
+
3066
+ # Update y-label for acquisition
3067
+ ax.set_ylabel(f'{acq_func.upper()} Value')
3068
+
3069
+ # Mark suggestions with star markers if present
3070
+ if sugg_x is not None and len(sugg_x) > 0:
3071
+ # Evaluate acquisition at suggested points
3072
+ for i, sx in enumerate(sugg_x):
3073
+ # Find acquisition value at this x
3074
+ idx = np.argmin(np.abs(x_values - sx))
3075
+ sy = acq_values[idx]
3076
+ label = 'Suggestion' if i == 0 else None # Only label first marker
3077
+ ax.scatter([sx], [sy], color='black', s=102, marker='*', zorder=10, label=label)
3078
+
3079
+ logger.info(f"Generated acquisition slice plot for {x_var} using {acq_func}")
3080
+ return fig
3081
+
3082
+ def plot_acquisition_contour(
3083
+ self,
3084
+ x_var: str,
3085
+ y_var: str,
3086
+ acq_func: str = 'ei',
3087
+ fixed_values: Optional[Dict[str, Any]] = None,
3088
+ grid_resolution: int = 50,
3089
+ acq_func_kwargs: Optional[Dict[str, Any]] = None,
3090
+ goal: str = 'maximize',
3091
+ show_experiments: bool = True,
3092
+ show_suggestions: bool = True,
3093
+ cmap: str = 'viridis',
3094
+ use_log_scale: Optional[bool] = None,
3095
+ figsize: Tuple[float, float] = (8, 6),
3096
+ dpi: int = 100,
3097
+ title: Optional[str] = None
3098
+ ) -> Figure: # pyright: ignore[reportInvalidTypeForm]
3099
+ """
3100
+ Create 2D contour plot of acquisition function over variable space.
3101
+
3102
+ Visualizes the acquisition function surface by varying two variables
3103
+ while holding others constant. Shows "hot spots" where the algorithm
3104
+ believes the next experiment should be conducted. Higher values indicate
3105
+ more promising regions to explore.
3106
+
3107
+ Args:
3108
+ x_var: Variable name for X axis (must be 'real' type)
3109
+ y_var: Variable name for Y axis (must be 'real' type)
3110
+ acq_func: Acquisition function name ('ei', 'pi', 'ucb', 'logei', 'logpi')
3111
+ fixed_values: Dict of {var_name: value} for other variables.
3112
+ If not provided, uses midpoint for real/integer,
3113
+ first category for categorical.
3114
+ grid_resolution: Grid density (NxN points)
3115
+ acq_func_kwargs: Additional acquisition parameters (xi, kappa, beta)
3116
+ goal: 'maximize' or 'minimize' - optimization direction
3117
+ show_experiments: Plot experimental data points as scatter
3118
+ show_suggestions: Plot last suggested points (if available)
3119
+ cmap: Matplotlib colormap name (e.g., 'viridis', 'hot', 'plasma')
3120
+ use_log_scale: Use logarithmic color scale (default: auto-enable for logei/logpi)
3121
+ figsize: Figure size as (width, height) in inches
3122
+ dpi: Dots per inch for figure resolution
3123
+ title: Custom title (default: auto-generated)
3124
+
3125
+ Returns:
3126
+ matplotlib Figure object
3127
+
3128
+ Example:
3129
+ >>> # Visualize Expected Improvement surface
3130
+ >>> fig = session.plot_acquisition_contour(
3131
+ ... 'temperature', 'pressure',
3132
+ ... acq_func='ei'
3133
+ ... )
3134
+ >>> fig.savefig('acq_contour.png', dpi=300)
3135
+
3136
+ >>> # See UCB landscape with custom exploration
3137
+ >>> fig = session.plot_acquisition_contour(
3138
+ ... 'temperature', 'pressure',
3139
+ ... acq_func='ucb',
3140
+ ... acq_func_kwargs={'beta': 1.0},
3141
+ ... cmap='hot'
3142
+ ... )
3143
+
3144
+ Note:
3145
+ - Requires at least 2 'real' type variables
3146
+ - Model must be trained before plotting
3147
+ - Higher acquisition values = more promising regions
3148
+ - Suggestions are overlaid to show why they were chosen
3149
+ """
3150
+ self._check_matplotlib()
3151
+ self._check_model_trained()
3152
+
3153
+ from alchemist_core.utils.acquisition_utils import evaluate_acquisition
3154
+ from alchemist_core.visualization.plots import create_contour_plot
3155
+
3156
+ if fixed_values is None:
3157
+ fixed_values = {}
3158
+
3159
+ # Get variable names
3160
+ var_names = self.search_space.get_variable_names()
3161
+
3162
+ # Validate variables exist
3163
+ if x_var not in var_names:
3164
+ raise ValueError(f"Variable '{x_var}' not in search space")
3165
+ if y_var not in var_names:
3166
+ raise ValueError(f"Variable '{y_var}' not in search space")
3167
+
3168
+ # Get variable info
3169
+ x_var_info = next(v for v in self.search_space.variables if v['name'] == x_var)
3170
+ y_var_info = next(v for v in self.search_space.variables if v['name'] == y_var)
3171
+
3172
+ if x_var_info['type'] != 'real':
3173
+ raise ValueError(f"X variable '{x_var}' must be 'real' type, got '{x_var_info['type']}'")
3174
+ if y_var_info['type'] != 'real':
3175
+ raise ValueError(f"Y variable '{y_var}' must be 'real' type, got '{y_var_info['type']}'")
3176
+
3177
+ # Get bounds
3178
+ x_bounds = (x_var_info['min'], x_var_info['max'])
3179
+ y_bounds = (y_var_info['min'], y_var_info['max'])
3180
+
3181
+ # Create meshgrid
3182
+ x = np.linspace(x_bounds[0], x_bounds[1], grid_resolution)
3183
+ y = np.linspace(y_bounds[0], y_bounds[1], grid_resolution)
3184
+ X_grid, Y_grid = np.meshgrid(x, y)
3185
+
3186
+ # Build acquisition evaluation grid
3187
+ grid_data = {
3188
+ x_var: X_grid.ravel(),
3189
+ y_var: Y_grid.ravel()
3190
+ }
3191
+
3192
+ # Add fixed values for other variables
3193
+ for var in self.search_space.variables:
3194
+ var_name = var['name']
3195
+ if var_name in [x_var, y_var]:
3196
+ continue
3197
+
3198
+ if var_name in fixed_values:
3199
+ grid_data[var_name] = fixed_values[var_name]
3200
+ else:
3201
+ # Use default value
3202
+ if var['type'] in ['real', 'integer']:
3203
+ grid_data[var_name] = (var['min'] + var['max']) / 2
3204
+ elif var['type'] == 'categorical':
3205
+ grid_data[var_name] = var['values'][0]
3206
+
3207
+ # Create DataFrame with correct column order
3208
+ if hasattr(self.model, 'original_feature_names') and self.model.original_feature_names:
3209
+ column_order = self.model.original_feature_names
3210
+ else:
3211
+ column_order = self.search_space.get_variable_names()
3212
+
3213
+ grid_df = pd.DataFrame(grid_data, columns=column_order)
3214
+
3215
+ # Evaluate acquisition function
3216
+ acq_values, _ = evaluate_acquisition(
3217
+ self.model,
3218
+ grid_df,
3219
+ acq_func=acq_func,
3220
+ acq_func_kwargs=acq_func_kwargs,
3221
+ goal=goal
3222
+ )
3223
+
3224
+ # Reshape to grid
3225
+ acq_grid = acq_values.reshape(X_grid.shape)
3226
+
3227
+ # Prepare experimental data for overlay
3228
+ exp_x = None
3229
+ exp_y = None
3230
+ if show_experiments and not self.experiment_manager.df.empty:
3231
+ exp_df = self.experiment_manager.df
3232
+ if x_var in exp_df.columns and y_var in exp_df.columns:
3233
+ exp_x = exp_df[x_var].values
3234
+ exp_y = exp_df[y_var].values
3235
+
3236
+ # Prepare suggestion data for overlay
3237
+ sugg_x = None
3238
+ sugg_y = None
3239
+ if show_suggestions and len(self.last_suggestions) > 0:
3240
+ if isinstance(self.last_suggestions, pd.DataFrame):
3241
+ sugg_df = self.last_suggestions
3242
+ else:
3243
+ sugg_df = pd.DataFrame(self.last_suggestions)
3244
+
3245
+ if x_var in sugg_df.columns and y_var in sugg_df.columns:
3246
+ sugg_x = sugg_df[x_var].values
3247
+ sugg_y = sugg_df[y_var].values
3248
+
3249
+ # Auto-enable log scale for logei/logpi if not explicitly set
3250
+ if use_log_scale is None:
3251
+ use_log_scale = acq_func.lower() in ['logei', 'logpi']
3252
+
3253
+ # Generate title if not provided
3254
+ if title is None:
3255
+ acq_name = acq_func.upper()
3256
+ title = f"Acquisition Function ({acq_name}): {x_var} vs {y_var}"
3257
+
3258
+ # Delegate to visualization module
3259
+ fig, ax, cbar = create_contour_plot(
3260
+ x_grid=X_grid,
3261
+ y_grid=Y_grid,
3262
+ predictions_grid=acq_grid,
3263
+ x_var=x_var,
3264
+ y_var=y_var,
3265
+ exp_x=exp_x,
3266
+ exp_y=exp_y,
3267
+ suggest_x=sugg_x,
3268
+ suggest_y=sugg_y,
3269
+ cmap='Greens', # Green colormap for acquisition
3270
+ use_log_scale=use_log_scale,
3271
+ figsize=figsize,
3272
+ dpi=dpi,
3273
+ title=title
3274
+ )
3275
+
3276
+ # Update colorbar label for acquisition
3277
+ cbar.set_label(f'{acq_func.upper()} Value', rotation=270, labelpad=20)
3278
+
3279
+ logger.info(f"Generated acquisition contour plot for {x_var} vs {y_var} using {acq_func}")
3280
+ return fig
3281
+
3282
+ def plot_uncertainty_contour(
3283
+ self,
3284
+ x_var: str,
3285
+ y_var: str,
3286
+ fixed_values: Optional[Dict[str, Any]] = None,
3287
+ grid_resolution: int = 50,
3288
+ show_experiments: bool = True,
3289
+ show_suggestions: bool = False,
3290
+ cmap: str = 'Reds',
3291
+ figsize: Tuple[float, float] = (8, 6),
3292
+ dpi: int = 100,
3293
+ title: Optional[str] = None
3294
+ ) -> Figure: # pyright: ignore[reportInvalidTypeForm]
3295
+ """
3296
+ Create 2D contour plot of posterior uncertainty over a variable space.
3297
+
3298
+ Visualizes where the model is most uncertain about predictions, showing
3299
+ regions that may benefit from additional sampling. Higher values indicate
3300
+ greater uncertainty (standard deviation).
3301
+
3302
+ Args:
3303
+ x_var: Variable name for X axis (must be 'real' type)
3304
+ y_var: Variable name for Y axis (must be 'real' type)
3305
+ fixed_values: Dict of {var_name: value} for other variables.
3306
+ If not provided, uses midpoint for real/integer,
3307
+ first category for categorical.
3308
+ grid_resolution: Grid density (NxN points)
3309
+ show_experiments: Plot experimental data points as scatter
3310
+ show_suggestions: Plot last suggested points (if available)
3311
+ cmap: Matplotlib colormap name (default: 'Reds' - darker = more uncertain)
3312
+ figsize: Figure size as (width, height) in inches
3313
+ dpi: Dots per inch for figure resolution
3314
+ title: Custom title (default: auto-generated)
3315
+
3316
+ Returns:
3317
+ matplotlib Figure object
3318
+
3319
+ Example:
3320
+ >>> # Visualize uncertainty landscape
3321
+ >>> fig = session.plot_uncertainty_contour('temperature', 'pressure')
3322
+
3323
+ >>> # Custom colormap
3324
+ >>> fig = session.plot_uncertainty_contour(
3325
+ ... 'temperature', 'pressure',
3326
+ ... cmap='YlOrRd',
3327
+ ... grid_resolution=100
3328
+ ... )
3329
+ >>> fig.savefig('uncertainty_contour.png', dpi=300)
3330
+
3331
+ Note:
3332
+ - Requires at least 2 'real' type variables
3333
+ - Model must be trained and support std predictions
3334
+ - High uncertainty near data gaps is expected
3335
+ - Useful for planning exploration strategies
3336
+ """
3337
+ self._check_matplotlib()
3338
+ self._check_model_trained()
3339
+
3340
+ from alchemist_core.visualization.plots import create_uncertainty_contour_plot
3341
+
3342
+ if fixed_values is None:
3343
+ fixed_values = {}
3344
+
3345
+ # Get variable names
3346
+ var_names = self.search_space.get_variable_names()
3347
+
3348
+ # Validate variables exist
3349
+ if x_var not in var_names:
3350
+ raise ValueError(f"Variable '{x_var}' not in search space")
3351
+ if y_var not in var_names:
3352
+ raise ValueError(f"Variable '{y_var}' not in search space")
3353
+
3354
+ # Get variable info
3355
+ x_var_info = next(v for v in self.search_space.variables if v['name'] == x_var)
3356
+ y_var_info = next(v for v in self.search_space.variables if v['name'] == y_var)
3357
+
3358
+ if x_var_info['type'] != 'real':
3359
+ raise ValueError(f"X variable '{x_var}' must be 'real' type, got '{x_var_info['type']}'")
3360
+ if y_var_info['type'] != 'real':
3361
+ raise ValueError(f"Y variable '{y_var}' must be 'real' type, got '{y_var_info['type']}'")
3362
+
3363
+ # Get bounds
3364
+ x_bounds = (x_var_info['min'], x_var_info['max'])
3365
+ y_bounds = (y_var_info['min'], y_var_info['max'])
3366
+
3367
+ # Create meshgrid
3368
+ x = np.linspace(x_bounds[0], x_bounds[1], grid_resolution)
3369
+ y = np.linspace(y_bounds[0], y_bounds[1], grid_resolution)
3370
+ X_grid, Y_grid = np.meshgrid(x, y)
3371
+
3372
+ # Build prediction grid
3373
+ grid_data = {
3374
+ x_var: X_grid.ravel(),
3375
+ y_var: Y_grid.ravel()
3376
+ }
3377
+
3378
+ # Add fixed values for other variables
3379
+ for var in self.search_space.variables:
3380
+ var_name = var['name']
3381
+ if var_name in [x_var, y_var]:
3382
+ continue
3383
+
3384
+ if var_name in fixed_values:
3385
+ grid_data[var_name] = fixed_values[var_name]
3386
+ else:
3387
+ # Use default value
3388
+ if var['type'] in ['real', 'integer']:
3389
+ grid_data[var_name] = (var['min'] + var['max']) / 2
3390
+ elif var['type'] == 'categorical':
3391
+ grid_data[var_name] = var['values'][0]
3392
+
3393
+ # Create DataFrame with correct column order
3394
+ if hasattr(self.model, 'original_feature_names') and self.model.original_feature_names:
3395
+ column_order = self.model.original_feature_names
3396
+ else:
3397
+ column_order = self.search_space.get_variable_names()
3398
+
3399
+ grid_df = pd.DataFrame(grid_data, columns=column_order)
3400
+
3401
+ # Get predictions with uncertainty
3402
+ _, std = self.predict(grid_df)
3403
+
3404
+ # Reshape to grid
3405
+ uncertainty_grid = std.reshape(X_grid.shape)
3406
+
3407
+ # Prepare experimental data for overlay
3408
+ exp_x = None
3409
+ exp_y = None
3410
+ if show_experiments and not self.experiment_manager.df.empty:
3411
+ exp_df = self.experiment_manager.df
3412
+ if x_var in exp_df.columns and y_var in exp_df.columns:
3413
+ exp_x = exp_df[x_var].values
3414
+ exp_y = exp_df[y_var].values
3415
+
3416
+ # Prepare suggestion data for overlay
3417
+ sugg_x = None
3418
+ sugg_y = None
3419
+ if show_suggestions and len(self.last_suggestions) > 0:
3420
+ if isinstance(self.last_suggestions, pd.DataFrame):
3421
+ sugg_df = self.last_suggestions
3422
+ else:
3423
+ sugg_df = pd.DataFrame(self.last_suggestions)
3424
+
3425
+ if x_var in sugg_df.columns and y_var in sugg_df.columns:
3426
+ sugg_x = sugg_df[x_var].values
3427
+ sugg_y = sugg_df[y_var].values
3428
+
3429
+ # Generate title if not provided
3430
+ if title is None:
3431
+ title = f"Posterior Uncertainty: {x_var} vs {y_var}"
3432
+
3433
+ # Delegate to visualization module
3434
+ fig, ax, cbar = create_uncertainty_contour_plot(
3435
+ x_grid=X_grid,
3436
+ y_grid=Y_grid,
3437
+ uncertainty_grid=uncertainty_grid,
3438
+ x_var=x_var,
3439
+ y_var=y_var,
3440
+ exp_x=exp_x,
3441
+ exp_y=exp_y,
3442
+ suggest_x=sugg_x,
3443
+ suggest_y=sugg_y,
3444
+ cmap=cmap,
3445
+ figsize=figsize,
3446
+ dpi=dpi,
3447
+ title=title
3448
+ )
3449
+
3450
+ logger.info(f"Generated uncertainty contour plot for {x_var} vs {y_var}")
3451
+ return fig
3452
+
3453
+ def plot_uncertainty_voxel(
3454
+ self,
3455
+ x_var: str,
3456
+ y_var: str,
3457
+ z_var: str,
3458
+ fixed_values: Optional[Dict[str, Any]] = None,
3459
+ grid_resolution: int = 15,
3460
+ show_experiments: bool = True,
3461
+ show_suggestions: bool = False,
3462
+ cmap: str = 'Reds',
3463
+ alpha: float = 0.5,
3464
+ figsize: Tuple[float, float] = (10, 8),
3465
+ dpi: int = 100,
3466
+ title: Optional[str] = None
3467
+ ) -> Figure: # pyright: ignore[reportInvalidTypeForm]
3468
+ """
3469
+ Create 3D voxel plot of posterior uncertainty over variable space.
3470
+
3471
+ Visualizes where the model is most uncertain in 3D, helping identify
3472
+ under-explored regions that may benefit from additional sampling.
3473
+ Higher values indicate greater uncertainty (standard deviation).
3474
+
3475
+ Args:
3476
+ x_var: Variable name for X axis (must be 'real' or 'integer' type)
3477
+ y_var: Variable name for Y axis (must be 'real' or 'integer' type)
3478
+ z_var: Variable name for Z axis (must be 'real' or 'integer' type)
3479
+ fixed_values: Dict of {var_name: value} for other variables.
3480
+ If not provided, uses midpoint for real/integer,
3481
+ first category for categorical.
3482
+ grid_resolution: Grid density (NxNxN points, default: 15)
3483
+ show_experiments: Plot experimental data points as scatter
3484
+ show_suggestions: Plot last suggested points (if available)
3485
+ cmap: Matplotlib colormap name (default: 'Reds')
3486
+ alpha: Transparency level (0=transparent, 1=opaque)
3487
+ figsize: Figure size as (width, height) in inches
3488
+ dpi: Dots per inch for figure resolution
3489
+ title: Custom title (default: auto-generated)
3490
+
3491
+ Returns:
3492
+ matplotlib Figure object with 3D axes
3493
+
3494
+ Example:
3495
+ >>> # Visualize uncertainty in 3D
3496
+ >>> fig = session.plot_uncertainty_voxel('temperature', 'pressure', 'flow_rate')
3497
+
3498
+ >>> # With transparency to see interior
3499
+ >>> fig = session.plot_uncertainty_voxel(
3500
+ ... 'temperature', 'pressure', 'flow_rate',
3501
+ ... alpha=0.3,
3502
+ ... grid_resolution=20
3503
+ ... )
3504
+ >>> fig.savefig('uncertainty_voxel.png', dpi=150)
3505
+
3506
+ Raises:
3507
+ ValueError: If search space doesn't have at least 3 continuous variables
3508
+
3509
+ Note:
3510
+ - Requires at least 3 'real' or 'integer' type variables
3511
+ - Model must be trained and support std predictions
3512
+ - Computationally expensive: O(N³) evaluations
3513
+ - Useful for planning exploration in 3D space
3514
+ """
3515
+ self._check_matplotlib()
3516
+ self._check_model_trained()
3517
+
3518
+ from alchemist_core.visualization.plots import create_uncertainty_voxel_plot
3519
+
3520
+ if fixed_values is None:
3521
+ fixed_values = {}
3522
+
3523
+ # Get all variable names
3524
+ var_names = self.search_space.get_variable_names()
3525
+
3526
+ # Validate that the requested variables exist and are continuous
3527
+ for var_name, var_label in [(x_var, 'X'), (y_var, 'Y'), (z_var, 'Z')]:
3528
+ if var_name not in var_names:
3529
+ raise ValueError(f"{var_label} variable '{var_name}' not in search space")
3530
+
3531
+ var_def = next(v for v in self.search_space.variables if v['name'] == var_name)
3532
+ if var_def['type'] not in ['real', 'integer']:
3533
+ raise ValueError(
3534
+ f"{var_label} variable '{var_name}' must be 'real' or 'integer' type for voxel plot, "
3535
+ f"got '{var_def['type']}'"
3536
+ )
3537
+
3538
+ # Get variable definitions
3539
+ x_var_def = next(v for v in self.search_space.variables if v['name'] == x_var)
3540
+ y_var_def = next(v for v in self.search_space.variables if v['name'] == y_var)
3541
+ z_var_def = next(v for v in self.search_space.variables if v['name'] == z_var)
3542
+
3543
+ # Get bounds
3544
+ x_bounds = (x_var_def['min'], x_var_def['max'])
3545
+ y_bounds = (y_var_def['min'], y_var_def['max'])
3546
+ z_bounds = (z_var_def['min'], z_var_def['max'])
3547
+
3548
+ # Create 3D meshgrid
3549
+ x = np.linspace(x_bounds[0], x_bounds[1], grid_resolution)
3550
+ y = np.linspace(y_bounds[0], y_bounds[1], grid_resolution)
3551
+ z = np.linspace(z_bounds[0], z_bounds[1], grid_resolution)
3552
+ X_grid, Y_grid, Z_grid = np.meshgrid(x, y, z, indexing='ij')
3553
+
3554
+ # Build prediction grid
3555
+ grid_data = {
3556
+ x_var: X_grid.ravel(),
3557
+ y_var: Y_grid.ravel(),
3558
+ z_var: Z_grid.ravel()
3559
+ }
3560
+
3561
+ # Add fixed values for other variables
3562
+ for var in self.search_space.variables:
3563
+ var_name = var['name']
3564
+ if var_name in [x_var, y_var, z_var]:
3565
+ continue
3566
+
3567
+ if var_name in fixed_values:
3568
+ grid_data[var_name] = fixed_values[var_name]
3569
+ else:
3570
+ # Use default value
3571
+ if var['type'] in ['real', 'integer']:
3572
+ grid_data[var_name] = (var['min'] + var['max']) / 2
3573
+ elif var['type'] == 'categorical':
3574
+ grid_data[var_name] = var['values'][0]
3575
+
3576
+ # Create DataFrame with correct column order
3577
+ if hasattr(self.model, 'original_feature_names') and self.model.original_feature_names:
3578
+ column_order = self.model.original_feature_names
3579
+ else:
3580
+ column_order = self.search_space.get_variable_names()
3581
+
3582
+ grid_df = pd.DataFrame(grid_data, columns=column_order)
3583
+
3584
+ # Get predictions with uncertainty
3585
+ _, std = self.predict(grid_df)
3586
+
3587
+ # Reshape to 3D grid
3588
+ uncertainty_grid = std.reshape(X_grid.shape)
3589
+
3590
+ # Prepare experimental data for overlay
3591
+ exp_x = None
3592
+ exp_y = None
3593
+ exp_z = None
3594
+ if show_experiments and not self.experiment_manager.df.empty:
3595
+ exp_df = self.experiment_manager.df
3596
+ if x_var in exp_df.columns and y_var in exp_df.columns and z_var in exp_df.columns:
3597
+ exp_x = exp_df[x_var].values
3598
+ exp_y = exp_df[y_var].values
3599
+ exp_z = exp_df[z_var].values
3600
+
3601
+ # Prepare suggestion data for overlay
3602
+ sugg_x = None
3603
+ sugg_y = None
3604
+ sugg_z = None
3605
+ if show_suggestions and len(self.last_suggestions) > 0:
3606
+ if isinstance(self.last_suggestions, pd.DataFrame):
3607
+ sugg_df = self.last_suggestions
3608
+ else:
3609
+ sugg_df = pd.DataFrame(self.last_suggestions)
3610
+
3611
+ if x_var in sugg_df.columns and y_var in sugg_df.columns and z_var in sugg_df.columns:
3612
+ sugg_x = sugg_df[x_var].values
3613
+ sugg_y = sugg_df[y_var].values
3614
+ sugg_z = sugg_df[z_var].values
3615
+
3616
+ # Generate title if not provided
3617
+ if title is None:
3618
+ title = f"3D Posterior Uncertainty: {x_var} vs {y_var} vs {z_var}"
3619
+
3620
+ # Delegate to visualization module
3621
+ fig, ax = create_uncertainty_voxel_plot(
3622
+ x_grid=X_grid,
3623
+ y_grid=Y_grid,
3624
+ z_grid=Z_grid,
3625
+ uncertainty_grid=uncertainty_grid,
3626
+ x_var=x_var,
3627
+ y_var=y_var,
3628
+ z_var=z_var,
3629
+ exp_x=exp_x,
3630
+ exp_y=exp_y,
3631
+ exp_z=exp_z,
3632
+ suggest_x=sugg_x,
3633
+ suggest_y=sugg_y,
3634
+ suggest_z=sugg_z,
3635
+ cmap=cmap,
3636
+ alpha=alpha,
3637
+ figsize=figsize,
3638
+ dpi=dpi,
3639
+ title=title
3640
+ )
3641
+
3642
+ logger.info(f"Generated 3D uncertainty voxel plot for {x_var} vs {y_var} vs {z_var}")
3643
+ return fig
3644
+
3645
+ def plot_acquisition_voxel(
3646
+ self,
3647
+ x_var: str,
3648
+ y_var: str,
3649
+ z_var: str,
3650
+ acq_func: str = 'ei',
3651
+ fixed_values: Optional[Dict[str, Any]] = None,
3652
+ grid_resolution: int = 15,
3653
+ acq_func_kwargs: Optional[Dict[str, Any]] = None,
3654
+ goal: str = 'maximize',
3655
+ show_experiments: bool = True,
3656
+ show_suggestions: bool = True,
3657
+ cmap: str = 'hot',
3658
+ alpha: float = 0.5,
3659
+ use_log_scale: Optional[bool] = None,
3660
+ figsize: Tuple[float, float] = (10, 8),
3661
+ dpi: int = 100,
3662
+ title: Optional[str] = None
3663
+ ) -> Figure: # pyright: ignore[reportInvalidTypeForm]
3664
+ """
3665
+ Create 3D voxel plot of acquisition function over variable space.
3666
+
3667
+ Visualizes the acquisition function in 3D, showing "hot spots" where
3668
+ the optimization algorithm believes the next experiment should be conducted.
3669
+ Higher values indicate more promising regions.
3670
+
3671
+ Args:
3672
+ x_var: Variable name for X axis (must be 'real' or 'integer' type)
3673
+ y_var: Variable name for Y axis (must be 'real' or 'integer' type)
3674
+ z_var: Variable name for Z axis (must be 'real' or 'integer' type)
3675
+ acq_func: Acquisition function name ('ei', 'pi', 'ucb', 'logei', 'logpi')
3676
+ fixed_values: Dict of {var_name: value} for other variables.
3677
+ If not provided, uses midpoint for real/integer,
3678
+ first category for categorical.
3679
+ grid_resolution: Grid density (NxNxN points, default: 15)
3680
+ acq_func_kwargs: Additional acquisition parameters (xi, kappa, beta)
3681
+ goal: 'maximize' or 'minimize' - optimization direction
3682
+ show_experiments: Plot experimental data points as scatter
3683
+ show_suggestions: Plot last suggested points (if available)
3684
+ cmap: Matplotlib colormap name (default: 'hot')
3685
+ alpha: Transparency level (0=transparent, 1=opaque)
3686
+ use_log_scale: Use logarithmic color scale (default: auto for logei/logpi)
3687
+ figsize: Figure size as (width, height) in inches
3688
+ dpi: Dots per inch for figure resolution
3689
+ title: Custom title (default: auto-generated)
3690
+
3691
+ Returns:
3692
+ matplotlib Figure object with 3D axes
3693
+
3694
+ Example:
3695
+ >>> # Visualize Expected Improvement in 3D
3696
+ >>> fig = session.plot_acquisition_voxel(
3697
+ ... 'temperature', 'pressure', 'flow_rate',
3698
+ ... acq_func='ei'
3699
+ ... )
3700
+
3701
+ >>> # UCB with custom exploration
3702
+ >>> fig = session.plot_acquisition_voxel(
3703
+ ... 'temperature', 'pressure', 'flow_rate',
3704
+ ... acq_func='ucb',
3705
+ ... acq_func_kwargs={'beta': 1.0},
3706
+ ... alpha=0.3
3707
+ ... )
3708
+ >>> fig.savefig('acq_voxel.png', dpi=150)
3709
+
3710
+ Raises:
3711
+ ValueError: If search space doesn't have at least 3 continuous variables
3712
+
3713
+ Note:
3714
+ - Requires at least 3 'real' or 'integer' type variables
3715
+ - Model must be trained before plotting
3716
+ - Computationally expensive: O(N³) evaluations
3717
+ - Higher values = more promising for next experiment
3718
+ - Suggestions should align with high-value regions
3719
+ """
3720
+ self._check_matplotlib()
3721
+ self._check_model_trained()
3722
+
3723
+ from alchemist_core.utils.acquisition_utils import evaluate_acquisition
3724
+ from alchemist_core.visualization.plots import create_acquisition_voxel_plot
3725
+
3726
+ if fixed_values is None:
3727
+ fixed_values = {}
3728
+
3729
+ # Get all variable names
3730
+ var_names = self.search_space.get_variable_names()
3731
+
3732
+ # Validate that the requested variables exist and are continuous
3733
+ for var_name, var_label in [(x_var, 'X'), (y_var, 'Y'), (z_var, 'Z')]:
3734
+ if var_name not in var_names:
3735
+ raise ValueError(f"{var_label} variable '{var_name}' not in search space")
3736
+
3737
+ var_def = next(v for v in self.search_space.variables if v['name'] == var_name)
3738
+ if var_def['type'] not in ['real', 'integer']:
3739
+ raise ValueError(
3740
+ f"{var_label} variable '{var_name}' must be 'real' or 'integer' type for voxel plot, "
3741
+ f"got '{var_def['type']}'"
3742
+ )
3743
+
3744
+ # Get variable definitions
3745
+ x_var_def = next(v for v in self.search_space.variables if v['name'] == x_var)
3746
+ y_var_def = next(v for v in self.search_space.variables if v['name'] == y_var)
3747
+ z_var_def = next(v for v in self.search_space.variables if v['name'] == z_var)
3748
+
3749
+ # Get bounds
3750
+ x_bounds = (x_var_def['min'], x_var_def['max'])
3751
+ y_bounds = (y_var_def['min'], y_var_def['max'])
3752
+ z_bounds = (z_var_def['min'], z_var_def['max'])
3753
+
3754
+ # Create 3D meshgrid
3755
+ x = np.linspace(x_bounds[0], x_bounds[1], grid_resolution)
3756
+ y = np.linspace(y_bounds[0], y_bounds[1], grid_resolution)
3757
+ z = np.linspace(z_bounds[0], z_bounds[1], grid_resolution)
3758
+ X_grid, Y_grid, Z_grid = np.meshgrid(x, y, z, indexing='ij')
3759
+
3760
+ # Build acquisition evaluation grid
3761
+ grid_data = {
3762
+ x_var: X_grid.ravel(),
3763
+ y_var: Y_grid.ravel(),
3764
+ z_var: Z_grid.ravel()
3765
+ }
3766
+
3767
+ # Add fixed values for other variables
3768
+ for var in self.search_space.variables:
3769
+ var_name = var['name']
3770
+ if var_name in [x_var, y_var, z_var]:
3771
+ continue
3772
+
3773
+ if var_name in fixed_values:
3774
+ grid_data[var_name] = fixed_values[var_name]
3775
+ else:
3776
+ # Use default value
3777
+ if var['type'] in ['real', 'integer']:
3778
+ grid_data[var_name] = (var['min'] + var['max']) / 2
3779
+ elif var['type'] == 'categorical':
3780
+ grid_data[var_name] = var['values'][0]
3781
+
3782
+ # Create DataFrame with correct column order
3783
+ if hasattr(self.model, 'original_feature_names') and self.model.original_feature_names:
3784
+ column_order = self.model.original_feature_names
3785
+ else:
3786
+ column_order = self.search_space.get_variable_names()
3787
+
3788
+ grid_df = pd.DataFrame(grid_data, columns=column_order)
3789
+
3790
+ # Evaluate acquisition function
3791
+ acq_values, _ = evaluate_acquisition(
3792
+ self.model,
3793
+ grid_df,
3794
+ acq_func=acq_func,
3795
+ acq_func_kwargs=acq_func_kwargs,
3796
+ goal=goal
3797
+ )
3798
+
3799
+ # Reshape to 3D grid
3800
+ acquisition_grid = acq_values.reshape(X_grid.shape)
3801
+
3802
+ # Prepare experimental data for overlay
3803
+ exp_x = None
3804
+ exp_y = None
3805
+ exp_z = None
3806
+ if show_experiments and not self.experiment_manager.df.empty:
3807
+ exp_df = self.experiment_manager.df
3808
+ if x_var in exp_df.columns and y_var in exp_df.columns and z_var in exp_df.columns:
3809
+ exp_x = exp_df[x_var].values
3810
+ exp_y = exp_df[y_var].values
3811
+ exp_z = exp_df[z_var].values
3812
+
3813
+ # Prepare suggestion data for overlay
3814
+ sugg_x = None
3815
+ sugg_y = None
3816
+ sugg_z = None
3817
+ if show_suggestions and len(self.last_suggestions) > 0:
3818
+ if isinstance(self.last_suggestions, pd.DataFrame):
3819
+ sugg_df = self.last_suggestions
3820
+ else:
3821
+ sugg_df = pd.DataFrame(self.last_suggestions)
3822
+
3823
+ if x_var in sugg_df.columns and y_var in sugg_df.columns and z_var in sugg_df.columns:
3824
+ sugg_x = sugg_df[x_var].values
3825
+ sugg_y = sugg_df[y_var].values
3826
+ sugg_z = sugg_df[z_var].values
3827
+
3828
+ # Auto-enable log scale for logei/logpi if not explicitly set
3829
+ if use_log_scale is None:
3830
+ use_log_scale = acq_func.lower() in ['logei', 'logpi']
3831
+
3832
+ # Generate title if not provided
3833
+ if title is None:
3834
+ acq_name = acq_func.upper()
3835
+ title = f"3D Acquisition Function ({acq_name}): {x_var} vs {y_var} vs {z_var}"
3836
+
3837
+ # Delegate to visualization module
3838
+ fig, ax = create_acquisition_voxel_plot(
3839
+ x_grid=X_grid,
3840
+ y_grid=Y_grid,
3841
+ z_grid=Z_grid,
3842
+ acquisition_grid=acquisition_grid,
3843
+ x_var=x_var,
3844
+ y_var=y_var,
3845
+ z_var=z_var,
3846
+ exp_x=exp_x,
3847
+ exp_y=exp_y,
3848
+ exp_z=exp_z,
3849
+ suggest_x=sugg_x,
3850
+ suggest_y=sugg_y,
3851
+ suggest_z=sugg_z,
3852
+ cmap=cmap,
3853
+ alpha=alpha,
3854
+ use_log_scale=use_log_scale,
3855
+ figsize=figsize,
3856
+ dpi=dpi,
3857
+ title=title
3858
+ )
3859
+
3860
+ logger.info(f"Generated 3D acquisition voxel plot for {x_var} vs {y_var} vs {z_var} using {acq_func}")
3861
+ return fig
3862
+
3863
+ def plot_suggested_next(
3864
+ self,
3865
+ x_var: str,
3866
+ y_var: Optional[str] = None,
3867
+ z_var: Optional[str] = None,
3868
+ acq_func: Optional[str] = None,
3869
+ fixed_values: Optional[Dict[str, Any]] = None,
3870
+ suggestion_index: int = 0,
3871
+ n_points: int = 100,
3872
+ grid_resolution: int = 50,
3873
+ show_uncertainty: Optional[Union[bool, List[float]]] = [1.0, 2.0],
3874
+ show_experiments: bool = True,
3875
+ acq_func_kwargs: Optional[Dict[str, Any]] = None,
3876
+ goal: Optional[str] = None,
3877
+ figsize: Tuple[float, float] = (10, 12),
3878
+ dpi: int = 100,
3879
+ title_prefix: Optional[str] = None
3880
+ ) -> Figure: # pyright: ignore[reportInvalidTypeForm]
3881
+ """
3882
+ Create visualization of suggested next experiment with posterior and acquisition.
3883
+
3884
+ This creates a stacked subplot showing:
3885
+ - Top: Posterior mean prediction (slice/contour/voxel)
3886
+ - Bottom: Acquisition function with suggested point marked
3887
+
3888
+ The fixed values for non-varying dimensions are automatically extracted from
3889
+ the suggested point coordinates, making it easy to visualize why that point
3890
+ was chosen.
3891
+
3892
+ Args:
3893
+ x_var: Variable name for X axis (required)
3894
+ y_var: Variable name for Y axis (optional, creates 2D plot if provided)
3895
+ z_var: Variable name for Z axis (optional, creates 3D plot if provided with y_var)
3896
+ acq_func: Acquisition function used (if None, extracts from last run or defaults to 'ei')
3897
+ fixed_values: Override automatic fixed values from suggestion (optional)
3898
+ suggestion_index: Which suggestion to visualize if multiple (default: 0 = most recent)
3899
+ n_points: Points to evaluate for 1D slice (default: 100)
3900
+ grid_resolution: Grid density for 2D/3D plots (default: 50)
3901
+ show_uncertainty: For posterior plot - True, False, or list of sigma values (e.g., [1.0, 2.0])
3902
+ show_experiments: Overlay experimental data points
3903
+ acq_func_kwargs: Additional acquisition parameters (xi, kappa, beta)
3904
+ goal: 'maximize' or 'minimize' (if None, uses session's last goal)
3905
+ figsize: Figure size as (width, height) in inches
3906
+ dpi: Dots per inch
3907
+ title_prefix: Custom prefix for titles (default: auto-generated)
3908
+
3909
+ Returns:
3910
+ matplotlib Figure object with 2 subplots
3911
+
3912
+ Example:
3913
+ >>> # After running suggest_next()
3914
+ >>> session.suggest_next(strategy='ei')
3915
+ >>>
3916
+ >>> # Visualize the suggestion in 1D
3917
+ >>> fig = session.plot_suggested_next('temperature')
3918
+ >>>
3919
+ >>> # Visualize in 2D
3920
+ >>> fig = session.plot_suggested_next('temperature', 'pressure')
3921
+ >>>
3922
+ >>> # Visualize in 3D
3923
+ >>> fig = session.plot_suggested_next('temperature', 'pressure', 'time')
3924
+ >>> fig.savefig('suggestion_3d.png', dpi=300)
3925
+
3926
+ Note:
3927
+ - Must call suggest_next() before using this function
3928
+ - Automatically extracts fixed values from the suggested point
3929
+ - Creates intuitive visualization showing why the point was chosen
3930
+ """
3931
+ self._check_matplotlib()
3932
+ self._check_model_trained()
3933
+
3934
+ # Check if we have suggestions
3935
+ if not self.last_suggestions or len(self.last_suggestions) == 0:
3936
+ raise ValueError("No suggestions available. Call suggest_next() first.")
3937
+
3938
+ # Get the suggestion to visualize
3939
+ if isinstance(self.last_suggestions, pd.DataFrame):
3940
+ sugg_df = self.last_suggestions
3941
+ else:
3942
+ sugg_df = pd.DataFrame(self.last_suggestions)
3943
+
3944
+ if suggestion_index >= len(sugg_df):
3945
+ raise ValueError(f"Suggestion index {suggestion_index} out of range (have {len(sugg_df)} suggestions)")
3946
+
3947
+ suggestion = sugg_df.iloc[suggestion_index].to_dict()
3948
+
3949
+ # Determine plot dimensionality
3950
+ if z_var is not None and y_var is None:
3951
+ raise ValueError("Must provide y_var if z_var is specified")
3952
+
3953
+ is_1d = (y_var is None)
3954
+ is_2d = (y_var is not None and z_var is None)
3955
+ is_3d = (z_var is not None)
3956
+
3957
+ # Cap 3D resolution to prevent kernel crashes
3958
+ if is_3d and grid_resolution > 30:
3959
+ logger.warning(f"3D voxel resolution capped at 30 (requested {grid_resolution})")
3960
+ grid_resolution = 30
3961
+
3962
+ # Get variable names for the plot
3963
+ plot_vars = [x_var]
3964
+ if y_var is not None:
3965
+ plot_vars.append(y_var)
3966
+ if z_var is not None:
3967
+ plot_vars.append(z_var)
3968
+
3969
+ # Extract fixed values from suggestion (for non-varying dimensions)
3970
+ if fixed_values is None:
3971
+ fixed_values = {}
3972
+ for var_name in self.search_space.get_variable_names():
3973
+ if var_name not in plot_vars and var_name in suggestion:
3974
+ fixed_values[var_name] = suggestion[var_name]
3975
+
3976
+ # Get acquisition function and goal from last run if not specified
3977
+ if acq_func is None:
3978
+ # Try to get from last acquisition run
3979
+ if hasattr(self, '_last_acq_func'):
3980
+ acq_func = self._last_acq_func
3981
+ else:
3982
+ acq_func = 'ei' # Default fallback
3983
+
3984
+ if goal is None:
3985
+ if hasattr(self, '_last_goal'):
3986
+ goal = self._last_goal
3987
+ else:
3988
+ goal = 'maximize' # Default fallback
3989
+
3990
+ # Create figure with 2 subplots (stacked vertically)
3991
+ fig, (ax1, ax2) = plt.subplots(2, 1, figsize=figsize, dpi=dpi)
3992
+
3993
+ # Generate titles
3994
+ if title_prefix is None:
3995
+ title_prefix = "Suggested Next Experiment"
3996
+
3997
+ # Format fixed values with smart rounding (2 decimals for floats, no .00 for integers)
3998
+ def format_value(v):
3999
+ if isinstance(v, float):
4000
+ # Round to 2 decimals, but strip trailing zeros
4001
+ rounded = round(v, 2)
4002
+ # Check if it's effectively an integer
4003
+ if rounded == int(rounded):
4004
+ return str(int(rounded))
4005
+ return f"{rounded:.2f}".rstrip('0').rstrip('.')
4006
+ return str(v)
4007
+
4008
+ fixed_str = ', '.join([f'{k}={format_value(v)}' for k, v in fixed_values.items()])
4009
+
4010
+ # Plot 1: Posterior Mean
4011
+ if is_1d:
4012
+ # 1D slice plot
4013
+ x_var_def = next(v for v in self.search_space.variables if v['name'] == x_var)
4014
+ x_values = np.linspace(x_var_def['min'], x_var_def['max'], n_points)
4015
+
4016
+ # Build grid
4017
+ grid_data = {x_var: x_values}
4018
+
4019
+ for var in self.search_space.variables:
4020
+ var_name = var['name']
4021
+ if var_name == x_var:
4022
+ continue
4023
+
4024
+ if var_name in fixed_values:
4025
+ grid_data[var_name] = fixed_values[var_name]
4026
+ else:
4027
+ if var['type'] in ['real', 'integer']:
4028
+ grid_data[var_name] = (var['min'] + var['max']) / 2
4029
+ elif var['type'] == 'categorical':
4030
+ grid_data[var_name] = var['values'][0]
4031
+
4032
+ # Create DataFrame with correct column order
4033
+ if hasattr(self.model, 'original_feature_names') and self.model.original_feature_names:
4034
+ column_order = self.model.original_feature_names
4035
+ else:
4036
+ column_order = self.search_space.get_variable_names()
4037
+
4038
+ grid_df = pd.DataFrame(grid_data, columns=column_order)
4039
+
4040
+ # Get predictions
4041
+ predictions, std = self.predict(grid_df)
4042
+
4043
+ # Prepare experiment overlay
4044
+ exp_x, exp_y = None, None
4045
+ if show_experiments and not self.experiment_manager.df.empty:
4046
+ df = self.experiment_manager.df
4047
+ mask = pd.Series([True] * len(df))
4048
+ for var_name, fixed_val in fixed_values.items():
4049
+ if var_name in df.columns:
4050
+ if isinstance(fixed_val, str):
4051
+ mask &= (df[var_name] == fixed_val)
4052
+ else:
4053
+ mask &= np.isclose(df[var_name], fixed_val, atol=1e-6)
4054
+ if mask.any():
4055
+ filtered_df = df[mask]
4056
+ exp_x = filtered_df[x_var].values
4057
+ exp_y = filtered_df[self.experiment_manager.target_columns[0]].values
4058
+
4059
+ # Determine sigma bands
4060
+ sigma_bands = None
4061
+ if show_uncertainty is not None:
4062
+ if isinstance(show_uncertainty, bool):
4063
+ sigma_bands = [1.0, 2.0] if show_uncertainty else None
4064
+ else:
4065
+ sigma_bands = show_uncertainty
4066
+
4067
+ from alchemist_core.visualization.plots import create_slice_plot
4068
+ create_slice_plot(
4069
+ x_values=x_values,
4070
+ predictions=predictions,
4071
+ x_var=x_var,
4072
+ std=std,
4073
+ sigma_bands=sigma_bands,
4074
+ exp_x=exp_x,
4075
+ exp_y=exp_y,
4076
+ title=f"{title_prefix} - Posterior Mean\n({fixed_str})" if fixed_str else f"{title_prefix} - Posterior Mean",
4077
+ ax=ax1
4078
+ )
4079
+
4080
+ # Mark the suggested point on posterior plot
4081
+ sugg_x = suggestion[x_var]
4082
+ sugg_y_pred, _ = self.predict(pd.DataFrame([suggestion]))
4083
+ ax1.scatter([sugg_x], sugg_y_pred, color='black', s=102, marker='*', zorder=10,
4084
+ linewidths=1.5, label='Suggested')
4085
+ ax1.legend()
4086
+
4087
+ elif is_2d:
4088
+ # 2D contour plot
4089
+ x_var_def = next(v for v in self.search_space.variables if v['name'] == x_var)
4090
+ y_var_def = next(v for v in self.search_space.variables if v['name'] == y_var)
4091
+
4092
+ x_values = np.linspace(x_var_def['min'], x_var_def['max'], grid_resolution)
4093
+ y_values = np.linspace(y_var_def['min'], y_var_def['max'], grid_resolution)
4094
+ X_grid, Y_grid = np.meshgrid(x_values, y_values)
4095
+
4096
+ grid_data = {
4097
+ x_var: X_grid.ravel(),
4098
+ y_var: Y_grid.ravel()
4099
+ }
4100
+
4101
+ for var in self.search_space.variables:
4102
+ var_name = var['name']
4103
+ if var_name in [x_var, y_var]:
4104
+ continue
4105
+
4106
+ if var_name in fixed_values:
4107
+ grid_data[var_name] = fixed_values[var_name]
4108
+ else:
4109
+ if var['type'] in ['real', 'integer']:
4110
+ grid_data[var_name] = (var['min'] + var['max']) / 2
4111
+ elif var['type'] == 'categorical':
4112
+ grid_data[var_name] = var['values'][0]
4113
+
4114
+ if hasattr(self.model, 'original_feature_names') and self.model.original_feature_names:
4115
+ column_order = self.model.original_feature_names
4116
+ else:
4117
+ column_order = self.search_space.get_variable_names()
4118
+
4119
+ grid_df = pd.DataFrame(grid_data, columns=column_order)
4120
+
4121
+ predictions, _ = self.predict(grid_df)
4122
+ prediction_grid = predictions.reshape(X_grid.shape)
4123
+
4124
+ # Prepare overlays
4125
+ exp_x, exp_y = None, None
4126
+ if show_experiments and not self.experiment_manager.df.empty:
4127
+ exp_df = self.experiment_manager.df
4128
+ if x_var in exp_df.columns and y_var in exp_df.columns:
4129
+ exp_x = exp_df[x_var].values
4130
+ exp_y = exp_df[y_var].values
4131
+
4132
+ from alchemist_core.visualization.plots import create_contour_plot
4133
+ _, _, _ = create_contour_plot(
4134
+ x_grid=X_grid,
4135
+ y_grid=Y_grid,
4136
+ predictions_grid=prediction_grid,
4137
+ x_var=x_var,
4138
+ y_var=y_var,
4139
+ exp_x=exp_x,
4140
+ exp_y=exp_y,
4141
+ suggest_x=None,
4142
+ suggest_y=None,
4143
+ title=f"{title_prefix} - Posterior Mean\n({fixed_str})" if fixed_str else f"{title_prefix} - Posterior Mean",
4144
+ ax=ax1
4145
+ )
4146
+
4147
+ # Mark the suggested point
4148
+ sugg_x = suggestion[x_var]
4149
+ sugg_y = suggestion[y_var]
4150
+ ax1.scatter([sugg_x], [sugg_y], color='black', s=102, marker='*', zorder=10,
4151
+ linewidths=1.5, label='Suggested')
4152
+ ax1.legend()
4153
+
4154
+ else: # 3D
4155
+ # 3D voxel plot
4156
+ x_var_def = next(v for v in self.search_space.variables if v['name'] == x_var)
4157
+ y_var_def = next(v for v in self.search_space.variables if v['name'] == y_var)
4158
+ z_var_def = next(v for v in self.search_space.variables if v['name'] == z_var)
4159
+
4160
+ x_values = np.linspace(x_var_def['min'], x_var_def['max'], grid_resolution)
4161
+ y_values = np.linspace(y_var_def['min'], y_var_def['max'], grid_resolution)
4162
+ z_values = np.linspace(z_var_def['min'], z_var_def['max'], grid_resolution)
4163
+ X_grid, Y_grid, Z_grid = np.meshgrid(x_values, y_values, z_values, indexing='ij')
4164
+
4165
+ grid_data = {
4166
+ x_var: X_grid.ravel(),
4167
+ y_var: Y_grid.ravel(),
4168
+ z_var: Z_grid.ravel()
4169
+ }
4170
+
4171
+ for var in self.search_space.variables:
4172
+ var_name = var['name']
4173
+ if var_name in [x_var, y_var, z_var]:
4174
+ continue
4175
+
4176
+ if var_name in fixed_values:
4177
+ grid_data[var_name] = fixed_values[var_name]
4178
+ else:
4179
+ if var['type'] in ['real', 'integer']:
4180
+ grid_data[var_name] = (var['min'] + var['max']) / 2
4181
+ elif var['type'] == 'categorical':
4182
+ grid_data[var_name] = var['values'][0]
4183
+
4184
+ if hasattr(self.model, 'original_feature_names') and self.model.original_feature_names:
4185
+ column_order = self.model.original_feature_names
4186
+ else:
4187
+ column_order = self.search_space.get_variable_names()
4188
+
4189
+ grid_df = pd.DataFrame(grid_data, columns=column_order)
4190
+
4191
+ predictions, _ = self.predict(grid_df)
4192
+ prediction_grid = predictions.reshape(X_grid.shape)
4193
+
4194
+ # Prepare overlays
4195
+ exp_x, exp_y, exp_z = None, None, None
4196
+ if show_experiments and not self.experiment_manager.df.empty:
4197
+ exp_df = self.experiment_manager.df
4198
+ if all(v in exp_df.columns for v in [x_var, y_var, z_var]):
4199
+ exp_x = exp_df[x_var].values
4200
+ exp_y = exp_df[y_var].values
4201
+ exp_z = exp_df[z_var].values
4202
+
4203
+ from alchemist_core.visualization.plots import create_voxel_plot
4204
+ # Note: voxel plots don't support ax parameter yet, need to create separately
4205
+ # For now, we'll note this limitation
4206
+ logger.warning("3D voxel plots for suggestions not yet fully supported with subplots")
4207
+ ax1.text(0.5, 0.5, "3D voxel posterior visualization\n(use plot_voxel separately)",
4208
+ ha='center', va='center', transform=ax1.transAxes)
4209
+ ax1.axis('off')
4210
+
4211
+ # Plot 2: Acquisition Function
4212
+ if is_1d:
4213
+ # 1D acquisition slice
4214
+ from alchemist_core.utils.acquisition_utils import evaluate_acquisition
4215
+ from alchemist_core.visualization.plots import create_slice_plot
4216
+
4217
+ x_var_def = next(v for v in self.search_space.variables if v['name'] == x_var)
4218
+ x_values = np.linspace(x_var_def['min'], x_var_def['max'], n_points)
4219
+
4220
+ grid_data = {x_var: x_values}
4221
+
4222
+ for var in self.search_space.variables:
4223
+ var_name = var['name']
4224
+ if var_name == x_var:
4225
+ continue
4226
+
4227
+ if var_name in fixed_values:
4228
+ grid_data[var_name] = fixed_values[var_name]
4229
+ else:
4230
+ if var['type'] in ['real', 'integer']:
4231
+ grid_data[var_name] = (var['min'] + var['max']) / 2
4232
+ elif var['type'] == 'categorical':
4233
+ grid_data[var_name] = var['values'][0]
4234
+
4235
+ if hasattr(self.model, 'original_feature_names') and self.model.original_feature_names:
4236
+ column_order = self.model.original_feature_names
4237
+ else:
4238
+ column_order = self.search_space.get_variable_names()
4239
+
4240
+ grid_df = pd.DataFrame(grid_data, columns=column_order)
4241
+
4242
+ acq_values, _ = evaluate_acquisition(
4243
+ self.model,
4244
+ grid_df,
4245
+ acq_func=acq_func,
4246
+ acq_func_kwargs=acq_func_kwargs,
4247
+ goal=goal
4248
+ )
4249
+
4250
+ create_slice_plot(
4251
+ x_values=x_values,
4252
+ predictions=acq_values,
4253
+ x_var=x_var,
4254
+ std=None,
4255
+ sigma_bands=None,
4256
+ exp_x=None,
4257
+ exp_y=None,
4258
+ title=None, # No title for acquisition subplot
4259
+ ax=ax2,
4260
+ prediction_label=acq_func.upper(),
4261
+ line_color='darkgreen',
4262
+ line_width=1.5
4263
+ )
4264
+
4265
+ # Add green fill under acquisition curve
4266
+ ax2.fill_between(x_values, 0, acq_values, alpha=0.3, color='green', zorder=0)
4267
+
4268
+ ax2.set_ylabel(f'{acq_func.upper()} Value')
4269
+
4270
+ # Mark the suggested point
4271
+ sugg_x = suggestion[x_var]
4272
+ # Evaluate acquisition at the suggested point
4273
+ sugg_acq, _ = evaluate_acquisition(
4274
+ self.model,
4275
+ pd.DataFrame([suggestion]),
4276
+ acq_func=acq_func,
4277
+ acq_func_kwargs=acq_func_kwargs,
4278
+ goal=goal
4279
+ )
4280
+ ax2.scatter([sugg_x], sugg_acq, color='black', s=102, marker='*', zorder=10,
4281
+ linewidths=1.5, label=f'{acq_func.upper()} (suggested)')
4282
+ ax2.legend()
4283
+
4284
+ elif is_2d:
4285
+ # 2D acquisition contour
4286
+ from alchemist_core.utils.acquisition_utils import evaluate_acquisition
4287
+ from alchemist_core.visualization.plots import create_contour_plot
4288
+
4289
+ x_var_def = next(v for v in self.search_space.variables if v['name'] == x_var)
4290
+ y_var_def = next(v for v in self.search_space.variables if v['name'] == y_var)
4291
+
4292
+ x_values = np.linspace(x_var_def['min'], x_var_def['max'], grid_resolution)
4293
+ y_values = np.linspace(y_var_def['min'], y_var_def['max'], grid_resolution)
4294
+ X_grid, Y_grid = np.meshgrid(x_values, y_values)
4295
+
4296
+ grid_data = {
4297
+ x_var: X_grid.ravel(),
4298
+ y_var: Y_grid.ravel()
4299
+ }
4300
+
4301
+ for var in self.search_space.variables:
4302
+ var_name = var['name']
4303
+ if var_name in [x_var, y_var]:
4304
+ continue
4305
+
4306
+ if var_name in fixed_values:
4307
+ grid_data[var_name] = fixed_values[var_name]
4308
+ else:
4309
+ if var['type'] in ['real', 'integer']:
4310
+ grid_data[var_name] = (var['min'] + var['max']) / 2
4311
+ elif var['type'] == 'categorical':
4312
+ grid_data[var_name] = var['values'][0]
4313
+
4314
+ if hasattr(self.model, 'original_feature_names') and self.model.original_feature_names:
4315
+ column_order = self.model.original_feature_names
4316
+ else:
4317
+ column_order = self.search_space.get_variable_names()
4318
+
4319
+ grid_df = pd.DataFrame(grid_data, columns=column_order)
4320
+
4321
+ acq_values, _ = evaluate_acquisition(
4322
+ self.model,
4323
+ grid_df,
4324
+ acq_func=acq_func,
4325
+ acq_func_kwargs=acq_func_kwargs,
4326
+ goal=goal
4327
+ )
4328
+ acquisition_grid = acq_values.reshape(X_grid.shape)
4329
+
4330
+ _, _, _ = create_contour_plot(
4331
+ x_grid=X_grid,
4332
+ y_grid=Y_grid,
4333
+ predictions_grid=acquisition_grid,
4334
+ x_var=x_var,
4335
+ y_var=y_var,
4336
+ exp_x=None,
4337
+ exp_y=None,
4338
+ suggest_x=None,
4339
+ suggest_y=None,
4340
+ cmap='Greens', # Green colormap for acquisition
4341
+ title=None, # No title for acquisition subplot
4342
+ ax=ax2
4343
+ )
4344
+
4345
+ # Mark the suggested point
4346
+ sugg_x = suggestion[x_var]
4347
+ sugg_y = suggestion[y_var]
4348
+ ax2.scatter([sugg_x], [sugg_y], color='black', s=102, marker='*', zorder=10,
4349
+ linewidths=1.5, label=f'{acq_func.upper()} (suggested)')
4350
+ ax2.legend()
4351
+
4352
+ else: # 3D
4353
+ # 3D acquisition voxel
4354
+ logger.warning("3D voxel plots for acquisition not yet fully supported with subplots")
4355
+ ax2.text(0.5, 0.5, "3D voxel acquisition visualization\n(use plot_acquisition_voxel separately)",
4356
+ ha='center', va='center', transform=ax2.transAxes)
4357
+ ax2.axis('off')
4358
+
4359
+ plt.tight_layout()
4360
+
4361
+ logger.info(f"Generated suggested next experiment visualization ({len(plot_vars)}D)")
4362
+ return fig
4363
+
4364
+ def plot_probability_of_improvement(
4365
+ self,
4366
+ goal: Literal['maximize', 'minimize'] = 'maximize',
4367
+ backend: Optional[str] = None,
4368
+ kernel: Optional[str] = None,
4369
+ n_grid_points: int = 1000,
4370
+ start_iteration: int = 5,
4371
+ reuse_hyperparameters: bool = True,
4372
+ xi: float = 0.01,
4373
+ figsize: Tuple[float, float] = (8, 6),
4374
+ dpi: int = 100,
4375
+ title: Optional[str] = None
4376
+ ) -> Figure: # pyright: ignore[reportInvalidTypeForm]
4377
+ """
4378
+ Plot maximum probability of improvement over optimization iterations.
4379
+
4380
+ Retroactively computes how the probability of finding a better solution
4381
+ evolved during optimization. At each iteration:
4382
+ 1. Trains GP on observations up to that point (reusing hyperparameters)
4383
+ 2. Computes PI across the search space using native acquisition functions
4384
+ 3. Records the maximum PI value
4385
+
4386
+ Uses native PI implementations:
4387
+ - sklearn backend: skopt.acquisition.gaussian_pi
4388
+ - botorch backend: botorch.acquisition.ProbabilityOfImprovement
4389
+
4390
+ Decreasing max(PI) indicates the optimization is converging and has
4391
+ less potential for improvement remaining.
4392
+
4393
+ Args:
4394
+ goal: 'maximize' or 'minimize' - optimization direction
4395
+ backend: Model backend to use (defaults to session's model_backend)
4396
+ kernel: Kernel type for GP (defaults to session's kernel type)
4397
+ n_grid_points: Number of points to sample search space
4398
+ start_iteration: Minimum observations before computing PI (default: 5)
4399
+ reuse_hyperparameters: If True, use final model's optimized hyperparameters
4400
+ for all iterations (much faster, recommended)
4401
+ xi: PI parameter controlling improvement threshold (default: 0.01)
4402
+ figsize: Figure size as (width, height) in inches
4403
+ dpi: Dots per inch for figure resolution
4404
+ title: Custom plot title (auto-generated if None)
4405
+
4406
+ Returns:
4407
+ matplotlib Figure object
4408
+
4409
+ Example:
4410
+ >>> # After running optimization
4411
+ >>> fig = session.plot_probability_of_improvement(goal='maximize')
4412
+ >>> fig.savefig('pi_convergence.png')
4413
+
4414
+ Note:
4415
+ - Requires at least `start_iteration` experiments
4416
+ - Use fewer n_grid_points for faster computation
4417
+ - PI values near 0 suggest little room for improvement
4418
+ - Reusing hyperparameters (default) is much faster and usually sufficient
4419
+ - Uses rigorous acquisition function implementations (not approximations)
4420
+ """
4421
+ self._check_matplotlib()
4422
+
4423
+ # Check we have enough experiments
4424
+ n_exp = len(self.experiment_manager.df)
4425
+ if n_exp < start_iteration:
4426
+ raise ValueError(
4427
+ f"Need at least {start_iteration} experiments for PI plot "
4428
+ f"(have {n_exp}). Lower start_iteration if needed."
4429
+ )
4430
+
4431
+ # Default to session's model configuration if not specified
4432
+ if backend is None:
4433
+ if self.model_backend is None:
4434
+ raise ValueError(
4435
+ "No backend specified and session has no trained model. "
4436
+ "Either train a model first or specify backend parameter."
4437
+ )
4438
+ backend = self.model_backend
4439
+
4440
+ if kernel is None:
4441
+ if self.model is None:
4442
+ raise ValueError(
4443
+ "No kernel specified and session has no trained model. "
4444
+ "Either train a model first or specify kernel parameter."
4445
+ )
4446
+ # Extract kernel type from trained model
4447
+ if self.model_backend == 'sklearn' and hasattr(self.model, 'optimized_kernel'):
4448
+ # sklearn model
4449
+ kernel_obj = self.model.optimized_kernel
4450
+ if 'RBF' in str(type(kernel_obj)):
4451
+ kernel = 'RBF'
4452
+ elif 'Matern' in str(type(kernel_obj)):
4453
+ kernel = 'Matern'
4454
+ elif 'RationalQuadratic' in str(type(kernel_obj)):
4455
+ kernel = 'RationalQuadratic'
4456
+ else:
4457
+ kernel = 'RBF' # fallback
4458
+ elif self.model_backend == 'botorch' and hasattr(self.model, 'cont_kernel_type'):
4459
+ # botorch model - use the stored kernel type
4460
+ kernel = self.model.cont_kernel_type
4461
+ else:
4462
+ # Final fallback if we can't determine kernel
4463
+ kernel = 'Matern'
4464
+
4465
+ # Get optimized hyperparameters if reusing them
4466
+ optimized_kernel_params = None
4467
+ if reuse_hyperparameters and self.model is not None:
4468
+ if backend.lower() == 'sklearn' and hasattr(self.model, 'optimized_kernel'):
4469
+ # Extract the optimized kernel parameters
4470
+ optimized_kernel_params = self.model.optimized_kernel
4471
+ logger.info(f"Reusing optimized kernel hyperparameters from trained model")
4472
+ # Note: botorch hyperparameter reuse would go here if needed
4473
+
4474
+ # Get data
4475
+ target_col = self.experiment_manager.target_columns[0]
4476
+ X_all, y_all = self.experiment_manager.get_features_and_target()
4477
+
4478
+ # Generate grid of test points across search space
4479
+ X_test = self._generate_prediction_grid(n_grid_points)
4480
+
4481
+ logger.info(f"Computing PI convergence from iteration {start_iteration} to {n_exp}...")
4482
+ logger.info(f"Using {len(X_test)} test points across search space")
4483
+ logger.info(f"Using native PI acquisition functions (xi={xi})")
4484
+ if reuse_hyperparameters and optimized_kernel_params is not None:
4485
+ logger.info("Using optimized hyperparameters from final model (faster)")
4486
+ else:
4487
+ logger.info("Optimizing hyperparameters at each iteration (slower but more accurate)")
4488
+
4489
+ # Compute max PI at each iteration
4490
+ iterations = []
4491
+ max_pi_values = []
4492
+
4493
+ for i in range(start_iteration, n_exp + 1):
4494
+ # Get data up to iteration i
4495
+ X_train = X_all.iloc[:i]
4496
+ y_train = y_all[:i]
4497
+
4498
+ # Create temporary session for this iteration
4499
+ temp_session = OptimizationSession(
4500
+ search_space=self.search_space,
4501
+ experiment_manager=ExperimentManager(search_space=self.search_space)
4502
+ )
4503
+ temp_session.experiment_manager.df = self.experiment_manager.df.iloc[:i].copy()
4504
+
4505
+ # Train model with optimized hyperparameters if available
4506
+ try:
4507
+ if reuse_hyperparameters and optimized_kernel_params is not None and backend.lower() == 'sklearn':
4508
+ # For sklearn: directly access model and set optimized kernel
4509
+ from alchemist_core.models.sklearn_model import SklearnModel
4510
+
4511
+ # Create model instance with kernel options
4512
+ model_kwargs = {
4513
+ 'kernel_options': {'kernel_type': kernel},
4514
+ 'n_restarts_optimizer': 0 # Don't optimize since we're using fixed hyperparameters
4515
+ }
4516
+ temp_model = SklearnModel(**model_kwargs)
4517
+
4518
+ # Preprocess data
4519
+ X_processed, y_processed = temp_model._preprocess_data(temp_session.experiment_manager)
4520
+
4521
+ # Import sklearn's GP
4522
+ from sklearn.gaussian_process import GaussianProcessRegressor
4523
+
4524
+ # Create GP with the optimized kernel and optimizer=None to keep it fixed
4525
+ gp_params = {
4526
+ 'kernel': optimized_kernel_params,
4527
+ 'optimizer': None, # Keep hyperparameters fixed
4528
+ 'random_state': temp_model.random_state
4529
+ }
4530
+
4531
+ # Only add alpha if we have noise values
4532
+ if temp_model.alpha is not None:
4533
+ gp_params['alpha'] = temp_model.alpha
4534
+
4535
+ temp_model.model = GaussianProcessRegressor(**gp_params)
4536
+
4537
+ # Fit model (only computes GP weights, not hyperparameters)
4538
+ temp_model.model.fit(X_processed, y_processed)
4539
+ temp_model._is_trained = True
4540
+
4541
+ # Set the model in the session
4542
+ temp_session.model = temp_model
4543
+ temp_session.model_backend = 'sklearn'
4544
+ else:
4545
+ # Standard training with hyperparameter optimization
4546
+ temp_session.train_model(backend=backend, kernel=kernel)
4547
+ except Exception as e:
4548
+ logger.warning(f"Failed to train model at iteration {i}: {e}")
4549
+ continue
4550
+
4551
+ # Compute PI using native acquisition functions
4552
+ try:
4553
+ if backend.lower() == 'sklearn':
4554
+ # Use skopt's gaussian_pi function
4555
+ from skopt.acquisition import gaussian_pi
4556
+
4557
+ # For maximization, negate y values so skopt treats it as minimization
4558
+ if goal.lower() == 'maximize':
4559
+ y_opt = -y_train.max()
4560
+ else:
4561
+ y_opt = y_train.min()
4562
+
4563
+ # Preprocess X_test using the model's preprocessing pipeline
4564
+ # This handles categorical encoding and scaling
4565
+ X_test_processed = temp_session.model._preprocess_X(X_test)
4566
+
4567
+ # Compute PI for all test points using skopt's implementation
4568
+ # Note: gaussian_pi expects model with predict(X, return_std=True)
4569
+ pi_values = gaussian_pi(
4570
+ X=X_test_processed,
4571
+ model=temp_session.model.model, # sklearn GP model
4572
+ y_opt=y_opt,
4573
+ xi=xi
4574
+ )
4575
+
4576
+ max_pi = float(np.max(pi_values))
4577
+
4578
+ elif backend.lower() == 'botorch':
4579
+ # Use BoTorch's ProbabilityOfImprovement
4580
+ import torch
4581
+ from botorch.acquisition import ProbabilityOfImprovement
4582
+
4583
+ # Determine best value seen so far
4584
+ if goal.lower() == 'maximize':
4585
+ best_f = float(y_train.max())
4586
+ else:
4587
+ best_f = float(y_train.min())
4588
+
4589
+ # Encode categorical variables if present
4590
+ X_test_encoded = temp_session.model._encode_categorical_data(X_test)
4591
+
4592
+ # Convert to torch tensor
4593
+ X_tensor = torch.from_numpy(X_test_encoded.values).to(
4594
+ dtype=temp_session.model.model.train_inputs[0].dtype,
4595
+ device=temp_session.model.model.train_inputs[0].device
4596
+ )
4597
+
4598
+ # Create PI acquisition function
4599
+ if goal.lower() == 'maximize':
4600
+ pi_acq = ProbabilityOfImprovement(
4601
+ model=temp_session.model.model,
4602
+ best_f=best_f,
4603
+ maximize=True
4604
+ )
4605
+ else:
4606
+ pi_acq = ProbabilityOfImprovement(
4607
+ model=temp_session.model.model,
4608
+ best_f=best_f,
4609
+ maximize=False
4610
+ )
4611
+
4612
+ # Evaluate PI on all test points
4613
+ temp_session.model.model.eval()
4614
+ with torch.no_grad():
4615
+ pi_values = pi_acq(X_tensor.unsqueeze(-2)) # Add batch dimension
4616
+
4617
+ max_pi = float(pi_values.max().item())
4618
+
4619
+ else:
4620
+ raise ValueError(f"Unknown backend: {backend}")
4621
+
4622
+ except Exception as e:
4623
+ logger.warning(f"Failed to compute PI at iteration {i}: {e}")
4624
+ import traceback
4625
+ logger.debug(traceback.format_exc())
4626
+ continue
4627
+
4628
+ # Record max PI
4629
+ iterations.append(i)
4630
+ max_pi_values.append(max_pi)
4631
+
4632
+ if i % 5 == 0 or i == n_exp:
4633
+ logger.info(f" Iteration {i}/{n_exp}: max(PI) = {max_pi:.4f}")
4634
+
4635
+ if not iterations:
4636
+ raise RuntimeError("Failed to compute PI for any iterations")
4637
+
4638
+ # Import visualization function
4639
+ from alchemist_core.visualization.plots import create_probability_of_improvement_plot
4640
+
4641
+ # Create plot
4642
+ fig, ax = create_probability_of_improvement_plot(
4643
+ iterations=np.array(iterations),
4644
+ max_pi_values=np.array(max_pi_values),
4645
+ figsize=figsize,
4646
+ dpi=dpi,
4647
+ title=title
4648
+ )
4649
+
4650
+ logger.info(f"Generated PI convergence plot with {len(iterations)} points")
4651
+ return fig