alchemist-nrel 0.3.1__py3-none-any.whl → 0.3.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- alchemist_core/__init__.py +2 -2
- alchemist_core/acquisition/botorch_acquisition.py +83 -126
- alchemist_core/data/experiment_manager.py +181 -12
- alchemist_core/models/botorch_model.py +292 -63
- alchemist_core/models/sklearn_model.py +145 -13
- alchemist_core/session.py +3330 -31
- alchemist_core/utils/__init__.py +3 -1
- alchemist_core/utils/acquisition_utils.py +60 -0
- alchemist_core/visualization/__init__.py +45 -0
- alchemist_core/visualization/helpers.py +130 -0
- alchemist_core/visualization/plots.py +1449 -0
- {alchemist_nrel-0.3.1.dist-info → alchemist_nrel-0.3.2.dist-info}/METADATA +13 -13
- {alchemist_nrel-0.3.1.dist-info → alchemist_nrel-0.3.2.dist-info}/RECORD +31 -26
- {alchemist_nrel-0.3.1.dist-info → alchemist_nrel-0.3.2.dist-info}/WHEEL +1 -1
- api/main.py +1 -1
- api/models/requests.py +52 -0
- api/models/responses.py +79 -2
- api/routers/experiments.py +333 -8
- api/routers/sessions.py +84 -9
- api/routers/visualizations.py +6 -4
- api/routers/websocket.py +2 -2
- api/services/session_store.py +295 -71
- api/static/assets/index-B6Cf6s_b.css +1 -0
- api/static/assets/{index-DWfIKU9j.js → index-B7njvc9r.js} +201 -196
- api/static/index.html +2 -2
- ui/gpr_panel.py +11 -5
- ui/target_column_dialog.py +299 -0
- ui/ui.py +52 -5
- api/static/assets/index-sMIa_1hV.css +0 -1
- {alchemist_nrel-0.3.1.dist-info → alchemist_nrel-0.3.2.dist-info}/entry_points.txt +0 -0
- {alchemist_nrel-0.3.1.dist-info → alchemist_nrel-0.3.2.dist-info}/licenses/LICENSE +0 -0
- {alchemist_nrel-0.3.1.dist-info → alchemist_nrel-0.3.2.dist-info}/top_level.txt +0 -0
|
@@ -4,6 +4,7 @@ from alchemist_core.data.experiment_manager import ExperimentManager
|
|
|
4
4
|
from alchemist_core.config import get_logger
|
|
5
5
|
import numpy as np
|
|
6
6
|
import pandas as pd
|
|
7
|
+
from typing import Union, Tuple, Optional
|
|
7
8
|
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
|
|
8
9
|
from sklearn.model_selection import KFold, cross_validate, train_test_split
|
|
9
10
|
from sklearn.preprocessing import OneHotEncoder, StandardScaler, MinMaxScaler, RobustScaler
|
|
@@ -322,7 +323,13 @@ class SklearnModel(BaseModel):
|
|
|
322
323
|
self.X_orig = X_orig # Store original data for contour generation
|
|
323
324
|
|
|
324
325
|
X, y = self._preprocess_data(experiment_manager)
|
|
325
|
-
|
|
326
|
+
|
|
327
|
+
# Check if we should reuse a pre-optimized kernel
|
|
328
|
+
if hasattr(self, '_reuse_kernel') and self._reuse_kernel is not None:
|
|
329
|
+
self.kernel = self._reuse_kernel
|
|
330
|
+
logger.info("Reusing pre-optimized kernel hyperparameters")
|
|
331
|
+
else:
|
|
332
|
+
self.kernel = self._build_kernel(X)
|
|
326
333
|
|
|
327
334
|
# Create base parameters dictionary
|
|
328
335
|
params = {
|
|
@@ -463,13 +470,16 @@ class SklearnModel(BaseModel):
|
|
|
463
470
|
subset_X_train = X_train.iloc[:i]
|
|
464
471
|
subset_y_train = y_train.iloc[:i]
|
|
465
472
|
|
|
466
|
-
#
|
|
467
|
-
X_processed = self._preprocess_subset(subset_X_train, categorical_variables, fit_scalers=
|
|
468
|
-
y_processed = self._scale_output(subset_y_train.values.reshape(-1, 1), fit_scaler=
|
|
473
|
+
# Fit scalers on this subset
|
|
474
|
+
X_processed = self._preprocess_subset(subset_X_train, categorical_variables, fit_scalers=True)
|
|
475
|
+
y_processed = self._scale_output(subset_y_train.values.reshape(-1, 1), fit_scaler=True).ravel()
|
|
469
476
|
|
|
470
|
-
#
|
|
477
|
+
# Build kernel for this subset's dimensionality
|
|
478
|
+
subset_kernel = self._build_kernel(X_processed)
|
|
479
|
+
|
|
480
|
+
# Create model with subset-specific kernel but no re-optimization
|
|
471
481
|
eval_model = GaussianProcessRegressor(
|
|
472
|
-
kernel=
|
|
482
|
+
kernel=subset_kernel,
|
|
473
483
|
optimizer=None, # Don't re-optimize
|
|
474
484
|
random_state=self.random_state
|
|
475
485
|
)
|
|
@@ -515,13 +525,16 @@ class SklearnModel(BaseModel):
|
|
|
515
525
|
X_test_fold = subset_X.iloc[test_idx]
|
|
516
526
|
y_test_fold = subset_y.iloc[test_idx]
|
|
517
527
|
|
|
518
|
-
#
|
|
519
|
-
X_train_processed = self._preprocess_subset(X_train_fold, categorical_variables, fit_scalers=
|
|
520
|
-
y_train_processed = self._scale_output(y_train_fold.values.reshape(-1, 1), fit_scaler=
|
|
528
|
+
# Fit scalers on this fold's training data
|
|
529
|
+
X_train_processed = self._preprocess_subset(X_train_fold, categorical_variables, fit_scalers=True)
|
|
530
|
+
y_train_processed = self._scale_output(y_train_fold.values.reshape(-1, 1), fit_scaler=True).ravel()
|
|
531
|
+
|
|
532
|
+
# Build kernel for this fold's dimensionality
|
|
533
|
+
fold_kernel = self._build_kernel(X_train_processed)
|
|
521
534
|
|
|
522
|
-
# Create model with
|
|
535
|
+
# Create model with fold-specific kernel but no re-optimization
|
|
523
536
|
eval_model = GaussianProcessRegressor(
|
|
524
|
-
kernel=
|
|
537
|
+
kernel=fold_kernel,
|
|
525
538
|
optimizer=None, # Don't re-optimize
|
|
526
539
|
random_state=self.random_state
|
|
527
540
|
)
|
|
@@ -590,9 +603,13 @@ class SklearnModel(BaseModel):
|
|
|
590
603
|
X_train_processed = self._preprocess_subset(X_train_fold, categorical_variables, fit_scalers=True)
|
|
591
604
|
y_train_processed = self._scale_output(y_train_fold.values.reshape(-1, 1), fit_scaler=True).ravel()
|
|
592
605
|
|
|
593
|
-
# Create
|
|
606
|
+
# Create a kernel for this fold's dimensionality
|
|
607
|
+
# (categories might differ between folds, changing feature count)
|
|
608
|
+
fold_kernel = self._build_kernel(X_train_processed)
|
|
609
|
+
|
|
610
|
+
# Create model with fold-specific kernel but no re-optimization
|
|
594
611
|
cv_model = GaussianProcessRegressor(
|
|
595
|
-
kernel=
|
|
612
|
+
kernel=fold_kernel,
|
|
596
613
|
optimizer=None, # Don't re-optimize
|
|
597
614
|
random_state=self.random_state
|
|
598
615
|
)
|
|
@@ -781,3 +798,118 @@ class SklearnModel(BaseModel):
|
|
|
781
798
|
Z = predictions.reshape(X.shape)
|
|
782
799
|
|
|
783
800
|
return X, Y, Z
|
|
801
|
+
|
|
802
|
+
def evaluate_acquisition(
|
|
803
|
+
self,
|
|
804
|
+
X: Union[pd.DataFrame, np.ndarray],
|
|
805
|
+
acq_func: str = 'ucb',
|
|
806
|
+
acq_func_kwargs: Optional[dict] = None,
|
|
807
|
+
maximize: bool = True
|
|
808
|
+
) -> Tuple[np.ndarray, None]:
|
|
809
|
+
"""
|
|
810
|
+
Evaluate acquisition function at given points using skopt functions.
|
|
811
|
+
|
|
812
|
+
Args:
|
|
813
|
+
X: Points to evaluate (DataFrame or array with shape (n, d))
|
|
814
|
+
acq_func: Acquisition function name ('ei', 'pi', 'ucb/lcb')
|
|
815
|
+
acq_func_kwargs: Additional parameters (e.g., {'xi': 0.01, 'kappa': 1.96})
|
|
816
|
+
maximize: Whether we're maximizing (True) or minimizing (False)
|
|
817
|
+
|
|
818
|
+
Returns:
|
|
819
|
+
Tuple of (acq_values, None) - None because acq functions are deterministic
|
|
820
|
+
|
|
821
|
+
Example:
|
|
822
|
+
>>> points = pd.DataFrame({'temp': [300, 350, 400], 'pressure': [1, 2, 3]})\n >>> acq_vals, _ = model.evaluate_acquisition(points, acq_func='ei', maximize=True)
|
|
823
|
+
"""
|
|
824
|
+
from skopt.acquisition import gaussian_ei, gaussian_pi, gaussian_lcb
|
|
825
|
+
|
|
826
|
+
if not self.is_trained:
|
|
827
|
+
raise ValueError("Model must be trained before evaluating acquisition functions.")
|
|
828
|
+
|
|
829
|
+
# Convert input to expected format
|
|
830
|
+
X_processed = self._preprocess_X(X)
|
|
831
|
+
|
|
832
|
+
# Get y_opt from training data (in scaled space)
|
|
833
|
+
if maximize:
|
|
834
|
+
y_opt = np.max(self.y_train_)
|
|
835
|
+
else:
|
|
836
|
+
y_opt = np.min(self.y_train_)
|
|
837
|
+
|
|
838
|
+
# Map acquisition function names
|
|
839
|
+
acq_func_lower = acq_func.lower()
|
|
840
|
+
|
|
841
|
+
# Parse kwargs with defaults
|
|
842
|
+
if acq_func_kwargs is None:
|
|
843
|
+
acq_func_kwargs = {}
|
|
844
|
+
|
|
845
|
+
xi = acq_func_kwargs.get('xi', 0.01)
|
|
846
|
+
kappa = acq_func_kwargs.get('kappa', 1.96)
|
|
847
|
+
|
|
848
|
+
# Evaluate acquisition function
|
|
849
|
+
# NOTE: skopt's gaussian_ei/pi/lcb are designed for MINIMIZATION
|
|
850
|
+
# For maximization, we need to adapt the formulas
|
|
851
|
+
if acq_func_lower in ['ei', 'expectedimprovement']:
|
|
852
|
+
if maximize:
|
|
853
|
+
# For maximization: EI = E[max(f(x) - f(x_best) - xi, 0)]
|
|
854
|
+
# where f(x_best) = y_opt = max(y_train)
|
|
855
|
+
mu, std = self.model.predict(X_processed, return_std=True)
|
|
856
|
+
from scipy.stats import norm
|
|
857
|
+
improve = mu - y_opt + xi # Improvement over current max
|
|
858
|
+
z = improve / (std + 1e-9) # Avoid division by zero
|
|
859
|
+
ei = improve * norm.cdf(z) + std * norm.pdf(z)
|
|
860
|
+
acq_values = ei
|
|
861
|
+
else:
|
|
862
|
+
# For minimization: use gaussian_ei directly
|
|
863
|
+
acq_values = gaussian_ei(
|
|
864
|
+
X_processed,
|
|
865
|
+
self.model,
|
|
866
|
+
y_opt=y_opt,
|
|
867
|
+
xi=xi,
|
|
868
|
+
return_grad=False
|
|
869
|
+
)
|
|
870
|
+
|
|
871
|
+
elif acq_func_lower in ['pi', 'probabilityofimprovement']:
|
|
872
|
+
if maximize:
|
|
873
|
+
# For maximization: PI = P(f(x) > f(x_best) + xi)
|
|
874
|
+
mu, std = self.model.predict(X_processed, return_std=True)
|
|
875
|
+
from scipy.stats import norm
|
|
876
|
+
improve = mu - y_opt + xi
|
|
877
|
+
z = improve / (std + 1e-9)
|
|
878
|
+
pi = norm.cdf(z)
|
|
879
|
+
acq_values = pi
|
|
880
|
+
else:
|
|
881
|
+
# For minimization: use gaussian_pi directly
|
|
882
|
+
acq_values = gaussian_pi(
|
|
883
|
+
X_processed,
|
|
884
|
+
self.model,
|
|
885
|
+
y_opt=y_opt,
|
|
886
|
+
xi=xi,
|
|
887
|
+
return_grad=False
|
|
888
|
+
)
|
|
889
|
+
|
|
890
|
+
elif acq_func_lower in ['ucb', 'lcb', 'upperconfidencebound', 'lowerconfidencebound']:
|
|
891
|
+
# For maximization: UCB = mean + kappa*std (higher is better)
|
|
892
|
+
# For minimization: LCB = mean - kappa*std (lower is better)
|
|
893
|
+
if maximize:
|
|
894
|
+
# Calculate UCB directly from predictions
|
|
895
|
+
mu, std = self.model.predict(X_processed, return_std=True)
|
|
896
|
+
acq_values = mu + kappa * std
|
|
897
|
+
else:
|
|
898
|
+
# Use gaussian_lcb for minimization
|
|
899
|
+
acq_values = gaussian_lcb(
|
|
900
|
+
X_processed,
|
|
901
|
+
self.model,
|
|
902
|
+
kappa=kappa,
|
|
903
|
+
return_grad=False
|
|
904
|
+
)
|
|
905
|
+
else:
|
|
906
|
+
raise ValueError(
|
|
907
|
+
f"Unknown acquisition function '{acq_func}' for sklearn backend. "
|
|
908
|
+
f"Valid options are: 'ei', 'pi', 'ucb/lcb'"
|
|
909
|
+
)
|
|
910
|
+
|
|
911
|
+
# Ensure output is 1D array
|
|
912
|
+
if acq_values.ndim > 1:
|
|
913
|
+
acq_values = acq_values.ravel()
|
|
914
|
+
|
|
915
|
+
return acq_values, None
|