alchemist-nrel 0.3.0__py3-none-any.whl → 0.3.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -268,6 +268,7 @@ class BoTorchAcquisition(BaseAcquisition):
268
268
  "batch_limit": batch_limit,
269
269
  "maxiter": max_iter,
270
270
  "ftol": 1e-3, # More relaxed convergence criteria
271
+ "factr": None, # Required when ftol is specified
271
272
  }
272
273
  else:
273
274
  # Standard parameters for other acquisition functions
@@ -45,16 +45,23 @@ class ExperimentManager:
45
45
 
46
46
  # Add iteration tracking
47
47
  if iteration is not None:
48
- # Use provided iteration and ensure _current_iteration reflects it
48
+ # Use provided iteration explicitly
49
49
  new_point['Iteration'] = int(iteration)
50
- # Keep _current_iteration in sync with the latest explicit iteration
51
- try:
52
- self._current_iteration = int(iteration)
53
- except Exception:
54
- pass
55
50
  else:
56
- # Use current iteration (doesn't increment until lock_acquisition)
57
- new_point['Iteration'] = int(self._current_iteration)
51
+ # Auto-calculate next iteration based on existing data
52
+ # This ensures proper iteration tracking across all clients
53
+ if len(self.df) > 0 and 'Iteration' in self.df.columns:
54
+ max_iteration = int(self.df['Iteration'].max())
55
+ new_point['Iteration'] = max_iteration + 1
56
+ else:
57
+ # First experiment defaults to iteration 0
58
+ new_point['Iteration'] = 0
59
+
60
+ # Keep _current_iteration in sync with latest iteration for backward compatibility
61
+ try:
62
+ self._current_iteration = int(new_point['Iteration'])
63
+ except Exception:
64
+ pass
58
65
 
59
66
  # Add reason
60
67
  new_point['Reason'] = reason if reason is not None else 'Manual'
@@ -396,8 +396,12 @@ class SklearnModel(BaseModel):
396
396
  if return_std:
397
397
  pred_mean, pred_std = predictions
398
398
 
399
+ # Safety check: replace invalid/negative std with small positive value
400
+ # Sklearn GP can produce negative variances due to numerical issues
401
+ pred_std = np.maximum(pred_std, 1e-6)
402
+
399
403
  # Apply calibration to standard deviation if enabled
400
- if self.calibration_enabled:
404
+ if self.calibration_enabled and np.isfinite(self.calibration_factor):
401
405
  pred_std = pred_std * self.calibration_factor
402
406
 
403
407
  # Inverse transform the mean predictions
@@ -636,11 +640,35 @@ class SklearnModel(BaseModel):
636
640
  y_pred = self.cv_cached_results['y_pred']
637
641
  y_std = self.cv_cached_results['y_std']
638
642
 
643
+ # Check for numerical issues (zero/negative variances)
644
+ if np.any(y_std <= 0) or np.any(~np.isfinite(y_std)):
645
+ logger.warning("Sklearn GP produced invalid uncertainties (zero/negative/inf). Disabling calibration.")
646
+ self.calibration_enabled = False
647
+ self.calibration_factor = 1.0
648
+ return
649
+
639
650
  # Compute standardized residuals (z-scores)
640
- z_scores = (y_true - y_pred) / y_std
651
+ # Add small epsilon to avoid division by zero
652
+ epsilon = 1e-10
653
+ z_scores = (y_true - y_pred) / (y_std + epsilon)
654
+
655
+ # Check for numerical validity
656
+ if not np.all(np.isfinite(z_scores)):
657
+ logger.warning("Z-scores contain NaN/inf. Disabling calibration.")
658
+ self.calibration_enabled = False
659
+ self.calibration_factor = 1.0
660
+ return
641
661
 
642
662
  # Calibration factor = std(z)
643
663
  self.calibration_factor = np.std(z_scores, ddof=1)
664
+
665
+ # Final check for valid calibration factor
666
+ if not np.isfinite(self.calibration_factor) or self.calibration_factor <= 0:
667
+ logger.warning(f"Invalid calibration factor: {self.calibration_factor}. Disabling calibration.")
668
+ self.calibration_enabled = False
669
+ self.calibration_factor = 1.0
670
+ return
671
+
644
672
  self.calibration_enabled = True
645
673
 
646
674
  # Create calibrated copy of CV results for plotting
alchemist_core/session.py CHANGED
@@ -31,23 +31,23 @@ class OptimizationSession:
31
31
  5. Iterate
32
32
 
33
33
  Example:
34
- >>> from alchemist_core import OptimizationSession
35
- >>>
36
- >>> # Create session with search space
37
- >>> session = OptimizationSession()
38
- >>> session.add_variable('temperature', 'real', bounds=(300, 500))
39
- >>> session.add_variable('pressure', 'real', bounds=(1, 10))
40
- >>> session.add_variable('catalyst', 'categorical', categories=['A', 'B', 'C'])
41
- >>>
42
- >>> # Load experimental data
43
- >>> session.load_data('experiments.csv', target_column='yield')
44
- >>>
45
- >>> # Train model
46
- >>> session.train_model(backend='botorch', kernel='Matern')
47
- >>>
48
- >>> # Suggest next experiment
49
- >>> next_point = session.suggest_next(strategy='EI', goal='maximize')
50
- >>> print(next_point)
34
+ > from alchemist_core import OptimizationSession
35
+ >
36
+ > # Create session with search space
37
+ > session = OptimizationSession()
38
+ > session.add_variable('temperature', 'real', bounds=(300, 500))
39
+ > session.add_variable('pressure', 'real', bounds=(1, 10))
40
+ > session.add_variable('catalyst', 'categorical', categories=['A', 'B', 'C'])
41
+ >
42
+ > # Load experimental data
43
+ > session.load_data('experiments.csv', target_column='yield')
44
+ >
45
+ > # Train model
46
+ > session.train_model(backend='botorch', kernel='Matern')
47
+ >
48
+ > # Suggest next experiment
49
+ > next_point = session.suggest_next(strategy='EI', goal='maximize')
50
+ > print(next_point)
51
51
  """
52
52
 
53
53
  def __init__(self, search_space: Optional[SearchSpace] = None,
@@ -79,10 +79,16 @@ class OptimizationSession:
79
79
  self.model_backend = None
80
80
  self.acquisition = None
81
81
 
82
+ # Staged experiments (for workflow management)
83
+ self.staged_experiments = [] # List of experiment dicts awaiting evaluation
84
+ self.last_suggestions = [] # Most recent acquisition suggestions (for UI)
85
+
82
86
  # Configuration
83
87
  self.config = {
84
88
  'random_state': 42,
85
- 'verbose': True
89
+ 'verbose': True,
90
+ 'auto_train': False, # Auto-train model after adding experiments
91
+ 'auto_train_threshold': 5 # Minimum experiments before auto-train
86
92
  }
87
93
 
88
94
  logger.info(f"OptimizationSession initialized: {self.metadata.session_id}")
@@ -103,8 +109,8 @@ class OptimizationSession:
103
109
  - For 'categorical': categories=[list of values] or values=[list]
104
110
 
105
111
  Example:
106
- >>> session.add_variable('temp', 'real', bounds=(300, 500))
107
- >>> session.add_variable('catalyst', 'categorical', categories=['A', 'B'])
112
+ > session.add_variable('temp', 'real', bounds=(300, 500))
113
+ > session.add_variable('catalyst', 'categorical', categories=['A', 'B'])
108
114
  """
109
115
  # Convert user-friendly API to internal format
110
116
  params = kwargs.copy()
@@ -196,7 +202,7 @@ class OptimizationSession:
196
202
  noise_column: Optional column with measurement noise/uncertainty
197
203
 
198
204
  Example:
199
- >>> session.load_data('experiments.csv', target_column='yield')
205
+ > session.load_data('experiments.csv', target_column='yield')
200
206
  """
201
207
  # Load the CSV
202
208
  import pandas as pd
@@ -245,7 +251,7 @@ class OptimizationSession:
245
251
  reason: Reason for this experiment (e.g., 'Manual', 'Expected Improvement')
246
252
 
247
253
  Example:
248
- >>> session.add_experiment(
254
+ > session.add_experiment(
249
255
  ... inputs={'temperature': 350, 'catalyst': 'A'},
250
256
  ... output=0.85,
251
257
  ... reason='Manual'
@@ -288,6 +294,124 @@ class OptimizationSession:
288
294
  'feature_names': list(X.columns)
289
295
  }
290
296
 
297
+ # ============================================================
298
+ # Staged Experiments (Workflow Management)
299
+ # ============================================================
300
+
301
+ def add_staged_experiment(self, inputs: Dict[str, Any]) -> None:
302
+ """
303
+ Add an experiment to the staging area (awaiting evaluation).
304
+
305
+ Staged experiments are typically suggested by acquisition functions
306
+ but not yet evaluated. They can be retrieved, evaluated externally,
307
+ and then added to the dataset with add_experiment().
308
+
309
+ Args:
310
+ inputs: Dictionary mapping variable names to values
311
+
312
+ Example:
313
+ > # Generate suggestions and stage them
314
+ > suggestions = session.suggest_next(n_suggestions=3)
315
+ > for point in suggestions.to_dict('records'):
316
+ > session.add_staged_experiment(point)
317
+ >
318
+ > # Later, evaluate and add
319
+ > staged = session.get_staged_experiments()
320
+ > for point in staged:
321
+ > output = run_experiment(**point)
322
+ > session.add_experiment(point, output=output)
323
+ > session.clear_staged_experiments()
324
+ """
325
+ self.staged_experiments.append(inputs)
326
+ logger.debug(f"Staged experiment: {inputs}")
327
+ self.events.emit('experiment_staged', {'inputs': inputs})
328
+
329
+ def get_staged_experiments(self) -> List[Dict[str, Any]]:
330
+ """
331
+ Get all staged experiments awaiting evaluation.
332
+
333
+ Returns:
334
+ List of experiment input dictionaries
335
+ """
336
+ return self.staged_experiments.copy()
337
+
338
+ def clear_staged_experiments(self) -> int:
339
+ """
340
+ Clear all staged experiments.
341
+
342
+ Returns:
343
+ Number of experiments cleared
344
+ """
345
+ count = len(self.staged_experiments)
346
+ self.staged_experiments.clear()
347
+ if count > 0:
348
+ logger.info(f"Cleared {count} staged experiments")
349
+ self.events.emit('staged_experiments_cleared', {'count': count})
350
+ return count
351
+
352
+ def move_staged_to_experiments(self, outputs: List[float],
353
+ noises: Optional[List[float]] = None,
354
+ iteration: Optional[int] = None,
355
+ reason: Optional[str] = None) -> int:
356
+ """
357
+ Evaluate staged experiments and add them to the dataset in batch.
358
+
359
+ Convenience method that pairs staged inputs with outputs and adds
360
+ them all to the experiment manager, then clears the staging area.
361
+
362
+ Args:
363
+ outputs: List of output values (must match length of staged experiments)
364
+ noises: Optional list of measurement uncertainties
365
+ iteration: Iteration number for all experiments (auto-assigned if None)
366
+ reason: Reason for these experiments (e.g., 'Expected Improvement')
367
+
368
+ Returns:
369
+ Number of experiments added
370
+
371
+ Example:
372
+ > # Stage some experiments
373
+ > session.add_staged_experiment({'x': 1.0, 'y': 2.0})
374
+ > session.add_staged_experiment({'x': 3.0, 'y': 4.0})
375
+ >
376
+ > # Evaluate them
377
+ > outputs = [run_experiment(**point) for point in session.get_staged_experiments()]
378
+ >
379
+ > # Add to dataset and clear staging
380
+ > session.move_staged_to_experiments(outputs, reason='LogEI')
381
+ """
382
+ if len(outputs) != len(self.staged_experiments):
383
+ raise ValueError(
384
+ f"Number of outputs ({len(outputs)}) must match "
385
+ f"number of staged experiments ({len(self.staged_experiments)})"
386
+ )
387
+
388
+ if noises is not None and len(noises) != len(self.staged_experiments):
389
+ raise ValueError(
390
+ f"Number of noise values ({len(noises)}) must match "
391
+ f"number of staged experiments ({len(self.staged_experiments)})"
392
+ )
393
+
394
+ # Add each experiment
395
+ for i, inputs in enumerate(self.staged_experiments):
396
+ noise = noises[i] if noises is not None else None
397
+ self.add_experiment(
398
+ inputs=inputs,
399
+ output=outputs[i],
400
+ noise=noise,
401
+ iteration=iteration,
402
+ reason=reason
403
+ )
404
+
405
+ count = len(self.staged_experiments)
406
+ self.clear_staged_experiments()
407
+
408
+ logger.info(f"Moved {count} staged experiments to dataset")
409
+ return count
410
+
411
+ # ============================================================
412
+ # Initial Design Generation
413
+ # ============================================================
414
+
291
415
  def generate_initial_design(
292
416
  self,
293
417
  method: str = "lhs",
@@ -320,16 +444,16 @@ class OptimizationSession:
320
444
  List of dictionaries with variable names and values (no outputs)
321
445
 
322
446
  Example:
323
- >>> # Generate initial design
324
- >>> points = session.generate_initial_design('lhs', n_points=10)
325
- >>>
326
- >>> # Run experiments and add results
327
- >>> for point in points:
328
- >>> output = run_experiment(**point) # Your experiment function
329
- >>> session.add_experiment(point, output=output)
330
- >>>
331
- >>> # Now ready to train model
332
- >>> session.train_model()
447
+ > # Generate initial design
448
+ > points = session.generate_initial_design('lhs', n_points=10)
449
+ >
450
+ > # Run experiments and add results
451
+ > for point in points:
452
+ > output = run_experiment(**point) # Your experiment function
453
+ > session.add_experiment(point, output=output)
454
+ >
455
+ > # Now ready to train model
456
+ > session.train_model()
333
457
  """
334
458
  if len(self.search_space.variables) == 0:
335
459
  raise ValueError(
@@ -389,8 +513,8 @@ class OptimizationSession:
389
513
  Dictionary with training results and hyperparameters
390
514
 
391
515
  Example:
392
- >>> results = session.train_model(backend='botorch', kernel='Matern')
393
- >>> print(results['metrics'])
516
+ > results = session.train_model(backend='botorch', kernel='Matern')
517
+ > print(results['metrics'])
394
518
  """
395
519
  df = self.experiment_manager.get_data()
396
520
  if df is None or df.empty:
@@ -410,6 +534,27 @@ class OptimizationSession:
410
534
  # Extract calibration_enabled before passing kwargs to model constructor
411
535
  calibration_enabled = kwargs.pop('calibration_enabled', False)
412
536
 
537
+ # Validate and map transform types based on backend
538
+ # BoTorch uses: 'normalize', 'standardize'
539
+ # Sklearn uses: 'minmax', 'standard', 'robust', 'none'
540
+ if self.model_backend == 'sklearn':
541
+ # Map BoTorch transform types to sklearn equivalents
542
+ transform_map = {
543
+ 'normalize': 'minmax', # BoTorch normalize → sklearn minmax
544
+ 'standardize': 'standard', # BoTorch standardize → sklearn standard
545
+ 'none': 'none'
546
+ }
547
+ if 'input_transform_type' in kwargs:
548
+ original = kwargs['input_transform_type']
549
+ kwargs['input_transform_type'] = transform_map.get(original, original)
550
+ if original != kwargs['input_transform_type']:
551
+ logger.debug(f"Mapped input transform '{original}' → '{kwargs['input_transform_type']}' for sklearn")
552
+ if 'output_transform_type' in kwargs:
553
+ original = kwargs['output_transform_type']
554
+ kwargs['output_transform_type'] = transform_map.get(original, original)
555
+ if original != kwargs['output_transform_type']:
556
+ logger.debug(f"Mapped output transform '{original}' → '{kwargs['output_transform_type']}' for sklearn")
557
+
413
558
  # Import appropriate model class
414
559
  if self.model_backend == 'sklearn':
415
560
  from alchemist_core.models.sklearn_model import SklearnModel
@@ -428,6 +573,15 @@ class OptimizationSession:
428
573
  elif self.model_backend == 'botorch':
429
574
  from alchemist_core.models.botorch_model import BoTorchModel
430
575
 
576
+ # Apply sensible defaults for BoTorch if not explicitly overridden
577
+ # Input normalization and output standardization are critical for performance
578
+ if 'input_transform_type' not in kwargs:
579
+ kwargs['input_transform_type'] = 'normalize'
580
+ logger.debug("Auto-applying input normalization for BoTorch model")
581
+ if 'output_transform_type' not in kwargs:
582
+ kwargs['output_transform_type'] = 'standardize'
583
+ logger.debug("Auto-applying output standardization for BoTorch model")
584
+
431
585
  # Build kernel options - BoTorch uses 'cont_kernel_type' not 'kernel_type'
432
586
  kernel_options = {'cont_kernel_type': kernel}
433
587
  if kernel_params:
@@ -598,8 +752,8 @@ class OptimizationSession:
598
752
  DataFrame with suggested experiment(s)
599
753
 
600
754
  Example:
601
- >>> next_point = session.suggest_next(strategy='EI', goal='maximize')
602
- >>> print(next_point)
755
+ > next_point = session.suggest_next(strategy='EI', goal='maximize')
756
+ > print(next_point)
603
757
  """
604
758
  if self.model is None:
605
759
  raise ValueError("No trained model available. Use train_model() first.")
@@ -663,6 +817,9 @@ class OptimizationSession:
663
817
  logger.info(f"Suggested point: {suggestion_dict}")
664
818
  self.events.emit('acquisition_completed', {'suggestion': suggestion_dict})
665
819
 
820
+ # Store suggestions for UI/API access
821
+ self.last_suggestions = result_df.to_dict('records')
822
+
666
823
  # Cache suggestion info for audit log
667
824
  self._last_acquisition_info = {
668
825
  'strategy': strategy,
@@ -685,11 +842,11 @@ class OptimizationSession:
685
842
  Tuple of (predictions, uncertainties)
686
843
 
687
844
  Example:
688
- >>> test_points = pd.DataFrame({
845
+ > test_points = pd.DataFrame({
689
846
  ... 'temperature': [350, 400],
690
847
  ... 'catalyst': ['A', 'B']
691
848
  ... })
692
- >>> predictions, uncertainties = session.predict(test_points)
849
+ > predictions, uncertainties = session.predict(test_points)
693
850
  """
694
851
  if self.model is None:
695
852
  raise ValueError("No trained model available. Use train_model() first.")
@@ -722,9 +879,9 @@ class OptimizationSession:
722
879
  callback: Callback function
723
880
 
724
881
  Example:
725
- >>> def on_training_done(data):
882
+ > def on_training_done(data):
726
883
  ... print(f"Training completed with R² = {data['metrics']['r2']}")
727
- >>> session.on('training_completed', on_training_done)
884
+ > session.on('training_completed', on_training_done)
728
885
  """
729
886
  self.events.on(event, callback)
730
887
 
@@ -740,7 +897,7 @@ class OptimizationSession:
740
897
  **kwargs: Configuration parameters to update
741
898
 
742
899
  Example:
743
- >>> session.set_config(random_state=123, verbose=False)
900
+ > session.set_config(random_state=123, verbose=False)
744
901
  """
745
902
  self.config.update(kwargs)
746
903
  logger.info(f"Updated config: {kwargs}")
@@ -764,8 +921,8 @@ class OptimizationSession:
764
921
  Created AuditEntry
765
922
 
766
923
  Example:
767
- >>> session.add_experiment({'temp': 100, 'pressure': 5}, output=85.2)
768
- >>> session.lock_data(notes="Initial screening dataset")
924
+ > session.add_experiment({'temp': 100, 'pressure': 5}, output=85.2)
925
+ > session.lock_data(notes="Initial screening dataset")
769
926
  """
770
927
  # Set search space in audit log (once)
771
928
  if self.audit_log.search_space_definition is None:
@@ -805,8 +962,8 @@ class OptimizationSession:
805
962
  ValueError: If no model has been trained
806
963
 
807
964
  Example:
808
- >>> session.train_model(backend='sklearn', kernel='matern')
809
- >>> session.lock_model(notes="Best cross-validation performance")
965
+ > session.train_model(backend='sklearn', kernel='matern')
966
+ > session.lock_model(notes="Best cross-validation performance")
810
967
  """
811
968
  if self.model is None:
812
969
  raise ValueError("No trained model available. Use train_model() first.")
@@ -898,8 +1055,8 @@ class OptimizationSession:
898
1055
  Created AuditEntry
899
1056
 
900
1057
  Example:
901
- >>> suggestions = session.suggest_next(strategy='EI', n_suggestions=3)
902
- >>> session.lock_acquisition(
1058
+ > suggestions = session.suggest_next(strategy='EI', n_suggestions=3)
1059
+ > session.lock_acquisition(
903
1060
  ... strategy='EI',
904
1061
  ... parameters={'xi': 0.01, 'goal': 'maximize'},
905
1062
  ... suggestions=suggestions,
@@ -967,7 +1124,7 @@ class OptimizationSession:
967
1124
  filepath: Path to save session file (.json extension recommended)
968
1125
 
969
1126
  Example:
970
- >>> session.save_session("~/ALchemist_Sessions/catalyst_study_nov2025.json")
1127
+ > session.save_session("~/ALchemist_Sessions/catalyst_study_nov2025.json")
971
1128
  """
972
1129
  filepath = Path(filepath)
973
1130
 
@@ -1066,7 +1223,7 @@ class OptimizationSession:
1066
1223
  OptimizationSession with restored state
1067
1224
 
1068
1225
  Example:
1069
- >>> session = OptimizationSession.load_session("my_session.json")
1226
+ > session = OptimizationSession.load_session("my_session.json")
1070
1227
  """
1071
1228
  filepath = Path(filepath)
1072
1229
 
@@ -1156,7 +1313,7 @@ class OptimizationSession:
1156
1313
  tags: New tags (optional)
1157
1314
 
1158
1315
  Example:
1159
- >>> session.update_metadata(
1316
+ > session.update_metadata(
1160
1317
  ... name="Catalyst Screening - Final",
1161
1318
  ... description="Optimized Pt/Pd ratios",
1162
1319
  ... tags=["catalyst", "platinum", "palladium", "final"]
@@ -1188,7 +1345,7 @@ class OptimizationSession:
1188
1345
  **kwargs: Configuration parameters to update
1189
1346
 
1190
1347
  Example:
1191
- >>> session.set_config(random_state=123, verbose=False)
1348
+ > session.set_config(random_state=123, verbose=False)
1192
1349
  """
1193
1350
  self.config.update(kwargs)
1194
1351
  logger.info(f"Updated config: {kwargs}")