alchemist-nrel 0.2.1__py3-none-any.whl → 0.3.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (42) hide show
  1. alchemist_core/__init__.py +14 -7
  2. alchemist_core/acquisition/botorch_acquisition.py +15 -6
  3. alchemist_core/audit_log.py +594 -0
  4. alchemist_core/data/experiment_manager.py +76 -5
  5. alchemist_core/models/botorch_model.py +6 -4
  6. alchemist_core/models/sklearn_model.py +74 -8
  7. alchemist_core/session.py +788 -39
  8. alchemist_core/utils/doe.py +200 -0
  9. alchemist_nrel-0.3.1.dist-info/METADATA +185 -0
  10. alchemist_nrel-0.3.1.dist-info/RECORD +66 -0
  11. {alchemist_nrel-0.2.1.dist-info → alchemist_nrel-0.3.1.dist-info}/entry_points.txt +1 -0
  12. api/example_client.py +7 -2
  13. api/main.py +21 -4
  14. api/models/requests.py +95 -1
  15. api/models/responses.py +167 -0
  16. api/routers/acquisition.py +25 -0
  17. api/routers/experiments.py +134 -6
  18. api/routers/sessions.py +438 -10
  19. api/routers/visualizations.py +10 -5
  20. api/routers/websocket.py +132 -0
  21. api/run_api.py +56 -0
  22. api/services/session_store.py +285 -54
  23. api/static/NEW_ICON.ico +0 -0
  24. api/static/NEW_ICON.png +0 -0
  25. api/static/NEW_LOGO_DARK.png +0 -0
  26. api/static/NEW_LOGO_LIGHT.png +0 -0
  27. api/static/assets/api-vcoXEqyq.js +1 -0
  28. api/static/assets/index-DWfIKU9j.js +4094 -0
  29. api/static/assets/index-sMIa_1hV.css +1 -0
  30. api/static/index.html +14 -0
  31. api/static/vite.svg +1 -0
  32. ui/gpr_panel.py +7 -2
  33. ui/notifications.py +197 -10
  34. ui/ui.py +1117 -68
  35. ui/variables_setup.py +47 -2
  36. ui/visualizations.py +60 -3
  37. alchemist_core/models/ax_model.py +0 -159
  38. alchemist_nrel-0.2.1.dist-info/METADATA +0 -206
  39. alchemist_nrel-0.2.1.dist-info/RECORD +0 -54
  40. {alchemist_nrel-0.2.1.dist-info → alchemist_nrel-0.3.1.dist-info}/WHEEL +0 -0
  41. {alchemist_nrel-0.2.1.dist-info → alchemist_nrel-0.3.1.dist-info}/licenses/LICENSE +0 -0
  42. {alchemist_nrel-0.2.1.dist-info → alchemist_nrel-0.3.1.dist-info}/top_level.txt +0 -0
@@ -13,13 +13,15 @@ class ExperimentManager:
13
13
  self.df = pd.DataFrame() # Raw experimental data
14
14
  self.search_space = search_space # Reference to the search space
15
15
  self.filepath = None # Path to saved experiment file
16
+ self._current_iteration = 0 # Track current iteration for audit log
16
17
 
17
18
  def set_search_space(self, search_space):
18
19
  """Set or update the search space reference."""
19
20
  self.search_space = search_space
20
21
 
21
22
  def add_experiment(self, point_dict: Dict[str, Union[float, str, int]], output_value: Optional[float] = None,
22
- noise_value: Optional[float] = None):
23
+ noise_value: Optional[float] = None, iteration: Optional[int] = None,
24
+ reason: Optional[str] = None):
23
25
  """
24
26
  Add a single experiment point.
25
27
 
@@ -27,6 +29,8 @@ class ExperimentManager:
27
29
  point_dict: Dictionary with variable names as keys and values
28
30
  output_value: The experiment output/target value (if known)
29
31
  noise_value: Optional observation noise/uncertainty value for regularization
32
+ iteration: Iteration number (auto-assigned if None)
33
+ reason: Reason for this experiment (e.g., 'Initial Design (LHS)', 'Expected Improvement')
30
34
  """
31
35
  # Create a copy of the point_dict to avoid modifying the original
32
36
  new_point = point_dict.copy()
@@ -38,6 +42,29 @@ class ExperimentManager:
38
42
  # Add noise value if provided
39
43
  if noise_value is not None:
40
44
  new_point['Noise'] = noise_value
45
+
46
+ # Add iteration tracking
47
+ if iteration is not None:
48
+ # Use provided iteration explicitly
49
+ new_point['Iteration'] = int(iteration)
50
+ else:
51
+ # Auto-calculate next iteration based on existing data
52
+ # This ensures proper iteration tracking across all clients
53
+ if len(self.df) > 0 and 'Iteration' in self.df.columns:
54
+ max_iteration = int(self.df['Iteration'].max())
55
+ new_point['Iteration'] = max_iteration + 1
56
+ else:
57
+ # First experiment defaults to iteration 0
58
+ new_point['Iteration'] = 0
59
+
60
+ # Keep _current_iteration in sync with latest iteration for backward compatibility
61
+ try:
62
+ self._current_iteration = int(new_point['Iteration'])
63
+ except Exception:
64
+ pass
65
+
66
+ # Add reason
67
+ new_point['Reason'] = reason if reason is not None else 'Manual'
41
68
 
42
69
  # Convert to DataFrame and append
43
70
  new_df = pd.DataFrame([new_point])
@@ -52,6 +79,20 @@ class ExperimentManager:
52
79
  if missing_cols:
53
80
  raise ValueError(f"DataFrame is missing required columns: {missing_cols}")
54
81
 
82
+ # Ensure each row has an Iteration value; default to current iteration
83
+ if 'Iteration' not in data_df.columns:
84
+ data_df = data_df.copy()
85
+ data_df['Iteration'] = int(self._current_iteration)
86
+ else:
87
+ # Fill missing iterations with current iteration
88
+ data_df = data_df.copy()
89
+ data_df['Iteration'] = pd.to_numeric(data_df['Iteration'], errors='coerce').fillna(self._current_iteration).astype(int)
90
+ # Update _current_iteration to the max iteration present
91
+ if len(data_df) > 0:
92
+ max_iter = int(data_df['Iteration'].max())
93
+ if max_iter > self._current_iteration:
94
+ self._current_iteration = max_iter
95
+
55
96
  # Append the data
56
97
  self.df = pd.concat([self.df, data_df], ignore_index=True)
57
98
 
@@ -69,8 +110,17 @@ class ExperimentManager:
69
110
  """
70
111
  if 'Output' not in self.df.columns:
71
112
  raise ValueError("DataFrame doesn't contain 'Output' column")
72
-
73
- X = self.df.drop(columns=['Output'] + (['Noise'] if 'Noise' in self.df.columns else []))
113
+
114
+ # Drop metadata columns (Output, Noise, Iteration, Reason)
115
+ metadata_cols = ['Output']
116
+ if 'Noise' in self.df.columns:
117
+ metadata_cols.append('Noise')
118
+ if 'Iteration' in self.df.columns:
119
+ metadata_cols.append('Iteration')
120
+ if 'Reason' in self.df.columns:
121
+ metadata_cols.append('Reason')
122
+
123
+ X = self.df.drop(columns=metadata_cols)
74
124
  y = self.df['Output']
75
125
  return X, y
76
126
 
@@ -85,8 +135,17 @@ class ExperimentManager:
85
135
  """
86
136
  if 'Output' not in self.df.columns:
87
137
  raise ValueError("DataFrame doesn't contain 'Output' column")
88
-
89
- X = self.df.drop(columns=['Output'] + (['Noise'] if 'Noise' in self.df.columns else []))
138
+
139
+ # Drop metadata columns
140
+ metadata_cols = ['Output']
141
+ if 'Noise' in self.df.columns:
142
+ metadata_cols.append('Noise')
143
+ if 'Iteration' in self.df.columns:
144
+ metadata_cols.append('Iteration')
145
+ if 'Reason' in self.df.columns:
146
+ metadata_cols.append('Reason')
147
+
148
+ X = self.df.drop(columns=metadata_cols)
90
149
  y = self.df['Output']
91
150
  noise = self.df['Noise'] if 'Noise' in self.df.columns else None
92
151
  return X, y, noise
@@ -129,6 +188,18 @@ class ExperimentManager:
129
188
  print("Warning: Noise column contains non-numeric values. Converting to default noise level.")
130
189
  self.df['Noise'] = 1e-10 # Default small noise
131
190
 
191
+ # Initialize iteration tracking from data
192
+ if 'Iteration' in self.df.columns:
193
+ self._current_iteration = int(self.df['Iteration'].max())
194
+ else:
195
+ # Add iteration column if missing (legacy data)
196
+ self.df['Iteration'] = 0
197
+ self._current_iteration = 0
198
+
199
+ # Add reason column if missing (legacy data)
200
+ if 'Reason' not in self.df.columns:
201
+ self.df['Reason'] = 'Initial Design'
202
+
132
203
  return self
133
204
 
134
205
  @classmethod
@@ -485,8 +485,10 @@ class BoTorchModel(BaseModel):
485
485
  outcome_transform=fold_outcome_transform
486
486
  )
487
487
 
488
- # Load the trained state - this keeps the hyperparameters without retraining
489
- fold_model.load_state_dict(self.fitted_state_dict, strict=False)
488
+ # Train the fold model from scratch (don't load state_dict to avoid dimension mismatches)
489
+ # This is necessary because folds may have different categorical values or data shapes
490
+ mll = ExactMarginalLogLikelihood(fold_model.likelihood, fold_model)
491
+ fit_gpytorch_mll(mll)
490
492
 
491
493
  # Make predictions on test fold
492
494
  fold_model.eval()
@@ -720,8 +722,8 @@ class BoTorchModel(BaseModel):
720
722
  y_vals = torch.linspace(y_range[0], y_range[1], 100)
721
723
  X, Y = torch.meshgrid(x_vals, y_vals, indexing='ij')
722
724
 
723
- # Total dimensions in the model
724
- input_dim = len(self.feature_names) if self.feature_names else 4
725
+ # Total dimensions in the model (use original_feature_names to match actual input dimensions)
726
+ input_dim = len(self.original_feature_names) if self.original_feature_names else 2
725
727
 
726
728
  # Create placeholder tensors for all dimensions
727
729
  grid_tensors = []
@@ -85,9 +85,30 @@ class SklearnModel(BaseModel):
85
85
  def _build_kernel(self, X):
86
86
  """Build the kernel using training data X to initialize length scales."""
87
87
  kernel_type = self.kernel_options.get("kernel_type", "RBF")
88
- # Compute initial length scales as the mean of the data along each dimension.
89
- ls_init = np.mean(X, axis=0)
90
- ls_bounds = [(1e-5, l * 1e5) for l in ls_init]
88
+ # Compute initial length scales from the data.
89
+ # Use standard deviation (positive) as a robust length-scale initializer.
90
+ try:
91
+ ls_init = np.std(X, axis=0)
92
+ ls_init = np.array(ls_init, dtype=float)
93
+ # Replace non-finite or non-positive values with sensible defaults
94
+ bad_mask = ~np.isfinite(ls_init) | (ls_init <= 0)
95
+ if np.any(bad_mask):
96
+ logger.debug("Replacing non-finite or non-positive length-scales with 1.0")
97
+ ls_init[bad_mask] = 1.0
98
+
99
+ # Build finite, positive bounds for each length-scale
100
+ ls_bounds = []
101
+ for l in ls_init:
102
+ # Protect against extremely small or non-finite upper bounds
103
+ upper = float(l * 1e5) if np.isfinite(l) else 1e5
104
+ if not np.isfinite(upper) or upper <= 1e-8:
105
+ upper = 1e3
106
+ ls_bounds.append((1e-5, upper))
107
+ except Exception as e:
108
+ logger.warning(f"Failed to compute sensible length-scales from data: {e}. Using safe defaults.")
109
+ n_dims = X.shape[1] if hasattr(X, 'shape') else 1
110
+ ls_init = np.ones(n_dims, dtype=float)
111
+ ls_bounds = [(1e-5, 1e5) for _ in range(n_dims)]
91
112
  constant = C()
92
113
  if kernel_type == "RBF":
93
114
  kernel = constant * RBF(length_scale=ls_init, length_scale_bounds=ls_bounds)
@@ -317,12 +338,29 @@ class SklearnModel(BaseModel):
317
338
 
318
339
  # Create model with appropriate parameters
319
340
  self.model = GaussianProcessRegressor(**params)
320
-
341
+
321
342
  # Store the raw training data for possible reuse with skopt
322
343
  self.X_train_ = X
323
344
  self.y_train_ = y
324
-
325
- self.model.fit(X, y)
345
+
346
+ # Fit the model, but be defensive: if sklearn complains about non-finite
347
+ # bounds when n_restarts_optimizer>0, retry with no restarts.
348
+ try:
349
+ self.model.fit(X, y)
350
+ except ValueError as e:
351
+ msg = str(e)
352
+ if 'requires that all bounds are finite' in msg or 'bounds' in msg.lower():
353
+ logger.warning("GaussianProcessRegressor failed due to non-finite bounds. "
354
+ "Retrying without optimizer restarts (n_restarts_optimizer=0).")
355
+ # Retry with safer parameters
356
+ safe_params = params.copy()
357
+ safe_params['n_restarts_optimizer'] = 0
358
+ safe_params['optimizer'] = None
359
+ self.model = GaussianProcessRegressor(**safe_params)
360
+ self.model.fit(X, y)
361
+ else:
362
+ # Re-raise other value errors
363
+ raise
326
364
  self.optimized_kernel = self.model.kernel_
327
365
  self._is_trained = True
328
366
 
@@ -358,8 +396,12 @@ class SklearnModel(BaseModel):
358
396
  if return_std:
359
397
  pred_mean, pred_std = predictions
360
398
 
399
+ # Safety check: replace invalid/negative std with small positive value
400
+ # Sklearn GP can produce negative variances due to numerical issues
401
+ pred_std = np.maximum(pred_std, 1e-6)
402
+
361
403
  # Apply calibration to standard deviation if enabled
362
- if self.calibration_enabled:
404
+ if self.calibration_enabled and np.isfinite(self.calibration_factor):
363
405
  pred_std = pred_std * self.calibration_factor
364
406
 
365
407
  # Inverse transform the mean predictions
@@ -598,11 +640,35 @@ class SklearnModel(BaseModel):
598
640
  y_pred = self.cv_cached_results['y_pred']
599
641
  y_std = self.cv_cached_results['y_std']
600
642
 
643
+ # Check for numerical issues (zero/negative variances)
644
+ if np.any(y_std <= 0) or np.any(~np.isfinite(y_std)):
645
+ logger.warning("Sklearn GP produced invalid uncertainties (zero/negative/inf). Disabling calibration.")
646
+ self.calibration_enabled = False
647
+ self.calibration_factor = 1.0
648
+ return
649
+
601
650
  # Compute standardized residuals (z-scores)
602
- z_scores = (y_true - y_pred) / y_std
651
+ # Add small epsilon to avoid division by zero
652
+ epsilon = 1e-10
653
+ z_scores = (y_true - y_pred) / (y_std + epsilon)
654
+
655
+ # Check for numerical validity
656
+ if not np.all(np.isfinite(z_scores)):
657
+ logger.warning("Z-scores contain NaN/inf. Disabling calibration.")
658
+ self.calibration_enabled = False
659
+ self.calibration_factor = 1.0
660
+ return
603
661
 
604
662
  # Calibration factor = std(z)
605
663
  self.calibration_factor = np.std(z_scores, ddof=1)
664
+
665
+ # Final check for valid calibration factor
666
+ if not np.isfinite(self.calibration_factor) or self.calibration_factor <= 0:
667
+ logger.warning(f"Invalid calibration factor: {self.calibration_factor}. Disabling calibration.")
668
+ self.calibration_enabled = False
669
+ self.calibration_factor = 1.0
670
+ return
671
+
606
672
  self.calibration_enabled = True
607
673
 
608
674
  # Create calibrated copy of CV results for plotting