alchemist-nrel 0.3.1__py3-none-any.whl → 0.3.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (32) hide show
  1. alchemist_core/__init__.py +2 -2
  2. alchemist_core/acquisition/botorch_acquisition.py +83 -126
  3. alchemist_core/data/experiment_manager.py +181 -12
  4. alchemist_core/models/botorch_model.py +292 -63
  5. alchemist_core/models/sklearn_model.py +145 -13
  6. alchemist_core/session.py +3330 -31
  7. alchemist_core/utils/__init__.py +3 -1
  8. alchemist_core/utils/acquisition_utils.py +60 -0
  9. alchemist_core/visualization/__init__.py +45 -0
  10. alchemist_core/visualization/helpers.py +130 -0
  11. alchemist_core/visualization/plots.py +1449 -0
  12. {alchemist_nrel-0.3.1.dist-info → alchemist_nrel-0.3.2.dist-info}/METADATA +13 -13
  13. {alchemist_nrel-0.3.1.dist-info → alchemist_nrel-0.3.2.dist-info}/RECORD +31 -26
  14. {alchemist_nrel-0.3.1.dist-info → alchemist_nrel-0.3.2.dist-info}/WHEEL +1 -1
  15. api/main.py +1 -1
  16. api/models/requests.py +52 -0
  17. api/models/responses.py +79 -2
  18. api/routers/experiments.py +333 -8
  19. api/routers/sessions.py +84 -9
  20. api/routers/visualizations.py +6 -4
  21. api/routers/websocket.py +2 -2
  22. api/services/session_store.py +295 -71
  23. api/static/assets/index-B6Cf6s_b.css +1 -0
  24. api/static/assets/{index-DWfIKU9j.js → index-B7njvc9r.js} +201 -196
  25. api/static/index.html +2 -2
  26. ui/gpr_panel.py +11 -5
  27. ui/target_column_dialog.py +299 -0
  28. ui/ui.py +52 -5
  29. api/static/assets/index-sMIa_1hV.css +0 -1
  30. {alchemist_nrel-0.3.1.dist-info → alchemist_nrel-0.3.2.dist-info}/entry_points.txt +0 -0
  31. {alchemist_nrel-0.3.1.dist-info → alchemist_nrel-0.3.2.dist-info}/licenses/LICENSE +0 -0
  32. {alchemist_nrel-0.3.1.dist-info → alchemist_nrel-0.3.2.dist-info}/top_level.txt +0 -0
@@ -4,6 +4,7 @@ from alchemist_core.data.experiment_manager import ExperimentManager
4
4
  from alchemist_core.config import get_logger
5
5
  import numpy as np
6
6
  import pandas as pd
7
+ from typing import Union, Tuple, Optional
7
8
  from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
8
9
  from sklearn.model_selection import KFold, cross_validate, train_test_split
9
10
  from sklearn.preprocessing import OneHotEncoder, StandardScaler, MinMaxScaler, RobustScaler
@@ -322,7 +323,13 @@ class SklearnModel(BaseModel):
322
323
  self.X_orig = X_orig # Store original data for contour generation
323
324
 
324
325
  X, y = self._preprocess_data(experiment_manager)
325
- self.kernel = self._build_kernel(X)
326
+
327
+ # Check if we should reuse a pre-optimized kernel
328
+ if hasattr(self, '_reuse_kernel') and self._reuse_kernel is not None:
329
+ self.kernel = self._reuse_kernel
330
+ logger.info("Reusing pre-optimized kernel hyperparameters")
331
+ else:
332
+ self.kernel = self._build_kernel(X)
326
333
 
327
334
  # Create base parameters dictionary
328
335
  params = {
@@ -463,13 +470,16 @@ class SklearnModel(BaseModel):
463
470
  subset_X_train = X_train.iloc[:i]
464
471
  subset_y_train = y_train.iloc[:i]
465
472
 
466
- # Use the ALREADY FITTED scalers (fit_scalers=False)
467
- X_processed = self._preprocess_subset(subset_X_train, categorical_variables, fit_scalers=False)
468
- y_processed = self._scale_output(subset_y_train.values.reshape(-1, 1), fit_scaler=False).ravel()
473
+ # Fit scalers on this subset
474
+ X_processed = self._preprocess_subset(subset_X_train, categorical_variables, fit_scalers=True)
475
+ y_processed = self._scale_output(subset_y_train.values.reshape(-1, 1), fit_scaler=True).ravel()
469
476
 
470
- # Create model with optimized hyperparameters but no re-optimization
477
+ # Build kernel for this subset's dimensionality
478
+ subset_kernel = self._build_kernel(X_processed)
479
+
480
+ # Create model with subset-specific kernel but no re-optimization
471
481
  eval_model = GaussianProcessRegressor(
472
- kernel=self.optimized_kernel,
482
+ kernel=subset_kernel,
473
483
  optimizer=None, # Don't re-optimize
474
484
  random_state=self.random_state
475
485
  )
@@ -515,13 +525,16 @@ class SklearnModel(BaseModel):
515
525
  X_test_fold = subset_X.iloc[test_idx]
516
526
  y_test_fold = subset_y.iloc[test_idx]
517
527
 
518
- # Use the ALREADY FITTED scalers (fit_scalers=False) - same scalers for all folds
519
- X_train_processed = self._preprocess_subset(X_train_fold, categorical_variables, fit_scalers=False)
520
- y_train_processed = self._scale_output(y_train_fold.values.reshape(-1, 1), fit_scaler=False).ravel()
528
+ # Fit scalers on this fold's training data
529
+ X_train_processed = self._preprocess_subset(X_train_fold, categorical_variables, fit_scalers=True)
530
+ y_train_processed = self._scale_output(y_train_fold.values.reshape(-1, 1), fit_scaler=True).ravel()
531
+
532
+ # Build kernel for this fold's dimensionality
533
+ fold_kernel = self._build_kernel(X_train_processed)
521
534
 
522
- # Create model with optimized hyperparameters but no re-optimization
535
+ # Create model with fold-specific kernel but no re-optimization
523
536
  eval_model = GaussianProcessRegressor(
524
- kernel=self.optimized_kernel,
537
+ kernel=fold_kernel,
525
538
  optimizer=None, # Don't re-optimize
526
539
  random_state=self.random_state
527
540
  )
@@ -590,9 +603,13 @@ class SklearnModel(BaseModel):
590
603
  X_train_processed = self._preprocess_subset(X_train_fold, categorical_variables, fit_scalers=True)
591
604
  y_train_processed = self._scale_output(y_train_fold.values.reshape(-1, 1), fit_scaler=True).ravel()
592
605
 
593
- # Create model with optimized hyperparameters but no re-optimization
606
+ # Create a kernel for this fold's dimensionality
607
+ # (categories might differ between folds, changing feature count)
608
+ fold_kernel = self._build_kernel(X_train_processed)
609
+
610
+ # Create model with fold-specific kernel but no re-optimization
594
611
  cv_model = GaussianProcessRegressor(
595
- kernel=self.optimized_kernel,
612
+ kernel=fold_kernel,
596
613
  optimizer=None, # Don't re-optimize
597
614
  random_state=self.random_state
598
615
  )
@@ -781,3 +798,118 @@ class SklearnModel(BaseModel):
781
798
  Z = predictions.reshape(X.shape)
782
799
 
783
800
  return X, Y, Z
801
+
802
+ def evaluate_acquisition(
803
+ self,
804
+ X: Union[pd.DataFrame, np.ndarray],
805
+ acq_func: str = 'ucb',
806
+ acq_func_kwargs: Optional[dict] = None,
807
+ maximize: bool = True
808
+ ) -> Tuple[np.ndarray, None]:
809
+ """
810
+ Evaluate acquisition function at given points using skopt functions.
811
+
812
+ Args:
813
+ X: Points to evaluate (DataFrame or array with shape (n, d))
814
+ acq_func: Acquisition function name ('ei', 'pi', 'ucb/lcb')
815
+ acq_func_kwargs: Additional parameters (e.g., {'xi': 0.01, 'kappa': 1.96})
816
+ maximize: Whether we're maximizing (True) or minimizing (False)
817
+
818
+ Returns:
819
+ Tuple of (acq_values, None) - None because acq functions are deterministic
820
+
821
+ Example:
822
+ >>> points = pd.DataFrame({'temp': [300, 350, 400], 'pressure': [1, 2, 3]})\n >>> acq_vals, _ = model.evaluate_acquisition(points, acq_func='ei', maximize=True)
823
+ """
824
+ from skopt.acquisition import gaussian_ei, gaussian_pi, gaussian_lcb
825
+
826
+ if not self.is_trained:
827
+ raise ValueError("Model must be trained before evaluating acquisition functions.")
828
+
829
+ # Convert input to expected format
830
+ X_processed = self._preprocess_X(X)
831
+
832
+ # Get y_opt from training data (in scaled space)
833
+ if maximize:
834
+ y_opt = np.max(self.y_train_)
835
+ else:
836
+ y_opt = np.min(self.y_train_)
837
+
838
+ # Map acquisition function names
839
+ acq_func_lower = acq_func.lower()
840
+
841
+ # Parse kwargs with defaults
842
+ if acq_func_kwargs is None:
843
+ acq_func_kwargs = {}
844
+
845
+ xi = acq_func_kwargs.get('xi', 0.01)
846
+ kappa = acq_func_kwargs.get('kappa', 1.96)
847
+
848
+ # Evaluate acquisition function
849
+ # NOTE: skopt's gaussian_ei/pi/lcb are designed for MINIMIZATION
850
+ # For maximization, we need to adapt the formulas
851
+ if acq_func_lower in ['ei', 'expectedimprovement']:
852
+ if maximize:
853
+ # For maximization: EI = E[max(f(x) - f(x_best) - xi, 0)]
854
+ # where f(x_best) = y_opt = max(y_train)
855
+ mu, std = self.model.predict(X_processed, return_std=True)
856
+ from scipy.stats import norm
857
+ improve = mu - y_opt + xi # Improvement over current max
858
+ z = improve / (std + 1e-9) # Avoid division by zero
859
+ ei = improve * norm.cdf(z) + std * norm.pdf(z)
860
+ acq_values = ei
861
+ else:
862
+ # For minimization: use gaussian_ei directly
863
+ acq_values = gaussian_ei(
864
+ X_processed,
865
+ self.model,
866
+ y_opt=y_opt,
867
+ xi=xi,
868
+ return_grad=False
869
+ )
870
+
871
+ elif acq_func_lower in ['pi', 'probabilityofimprovement']:
872
+ if maximize:
873
+ # For maximization: PI = P(f(x) > f(x_best) + xi)
874
+ mu, std = self.model.predict(X_processed, return_std=True)
875
+ from scipy.stats import norm
876
+ improve = mu - y_opt + xi
877
+ z = improve / (std + 1e-9)
878
+ pi = norm.cdf(z)
879
+ acq_values = pi
880
+ else:
881
+ # For minimization: use gaussian_pi directly
882
+ acq_values = gaussian_pi(
883
+ X_processed,
884
+ self.model,
885
+ y_opt=y_opt,
886
+ xi=xi,
887
+ return_grad=False
888
+ )
889
+
890
+ elif acq_func_lower in ['ucb', 'lcb', 'upperconfidencebound', 'lowerconfidencebound']:
891
+ # For maximization: UCB = mean + kappa*std (higher is better)
892
+ # For minimization: LCB = mean - kappa*std (lower is better)
893
+ if maximize:
894
+ # Calculate UCB directly from predictions
895
+ mu, std = self.model.predict(X_processed, return_std=True)
896
+ acq_values = mu + kappa * std
897
+ else:
898
+ # Use gaussian_lcb for minimization
899
+ acq_values = gaussian_lcb(
900
+ X_processed,
901
+ self.model,
902
+ kappa=kappa,
903
+ return_grad=False
904
+ )
905
+ else:
906
+ raise ValueError(
907
+ f"Unknown acquisition function '{acq_func}' for sklearn backend. "
908
+ f"Valid options are: 'ei', 'pi', 'ucb/lcb'"
909
+ )
910
+
911
+ # Ensure output is 1D array
912
+ if acq_values.ndim > 1:
913
+ acq_values = acq_values.ravel()
914
+
915
+ return acq_values, None