alchemist-nrel 0.2.1__py3-none-any.whl → 0.3.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (42) hide show
  1. alchemist_core/__init__.py +14 -7
  2. alchemist_core/acquisition/botorch_acquisition.py +15 -6
  3. alchemist_core/audit_log.py +594 -0
  4. alchemist_core/data/experiment_manager.py +76 -5
  5. alchemist_core/models/botorch_model.py +6 -4
  6. alchemist_core/models/sklearn_model.py +74 -8
  7. alchemist_core/session.py +788 -39
  8. alchemist_core/utils/doe.py +200 -0
  9. alchemist_nrel-0.3.1.dist-info/METADATA +185 -0
  10. alchemist_nrel-0.3.1.dist-info/RECORD +66 -0
  11. {alchemist_nrel-0.2.1.dist-info → alchemist_nrel-0.3.1.dist-info}/entry_points.txt +1 -0
  12. api/example_client.py +7 -2
  13. api/main.py +21 -4
  14. api/models/requests.py +95 -1
  15. api/models/responses.py +167 -0
  16. api/routers/acquisition.py +25 -0
  17. api/routers/experiments.py +134 -6
  18. api/routers/sessions.py +438 -10
  19. api/routers/visualizations.py +10 -5
  20. api/routers/websocket.py +132 -0
  21. api/run_api.py +56 -0
  22. api/services/session_store.py +285 -54
  23. api/static/NEW_ICON.ico +0 -0
  24. api/static/NEW_ICON.png +0 -0
  25. api/static/NEW_LOGO_DARK.png +0 -0
  26. api/static/NEW_LOGO_LIGHT.png +0 -0
  27. api/static/assets/api-vcoXEqyq.js +1 -0
  28. api/static/assets/index-DWfIKU9j.js +4094 -0
  29. api/static/assets/index-sMIa_1hV.css +1 -0
  30. api/static/index.html +14 -0
  31. api/static/vite.svg +1 -0
  32. ui/gpr_panel.py +7 -2
  33. ui/notifications.py +197 -10
  34. ui/ui.py +1117 -68
  35. ui/variables_setup.py +47 -2
  36. ui/visualizations.py +60 -3
  37. alchemist_core/models/ax_model.py +0 -159
  38. alchemist_nrel-0.2.1.dist-info/METADATA +0 -206
  39. alchemist_nrel-0.2.1.dist-info/RECORD +0 -54
  40. {alchemist_nrel-0.2.1.dist-info → alchemist_nrel-0.3.1.dist-info}/WHEEL +0 -0
  41. {alchemist_nrel-0.2.1.dist-info → alchemist_nrel-0.3.1.dist-info}/licenses/LICENSE +0 -0
  42. {alchemist_nrel-0.2.1.dist-info → alchemist_nrel-0.3.1.dist-info}/top_level.txt +0 -0
alchemist_core/session.py CHANGED
@@ -7,10 +7,14 @@ This module provides the main entry point for using ALchemist as a headless libr
7
7
  from typing import Optional, Dict, Any, List, Tuple, Callable
8
8
  import pandas as pd
9
9
  import numpy as np
10
+ import json
11
+ import hashlib
12
+ from pathlib import Path
10
13
  from alchemist_core.data.search_space import SearchSpace
11
14
  from alchemist_core.data.experiment_manager import ExperimentManager
12
15
  from alchemist_core.events import EventEmitter
13
16
  from alchemist_core.config import get_logger
17
+ from alchemist_core.audit_log import AuditLog, SessionMetadata, AuditEntry
14
18
 
15
19
  logger = get_logger(__name__)
16
20
 
@@ -27,28 +31,29 @@ class OptimizationSession:
27
31
  5. Iterate
28
32
 
29
33
  Example:
30
- >>> from alchemist_core import OptimizationSession
31
- >>>
32
- >>> # Create session with search space
33
- >>> session = OptimizationSession()
34
- >>> session.add_variable('temperature', 'real', bounds=(300, 500))
35
- >>> session.add_variable('pressure', 'real', bounds=(1, 10))
36
- >>> session.add_variable('catalyst', 'categorical', categories=['A', 'B', 'C'])
37
- >>>
38
- >>> # Load experimental data
39
- >>> session.load_data('experiments.csv', target_column='yield')
40
- >>>
41
- >>> # Train model
42
- >>> session.train_model(backend='botorch', kernel='Matern')
43
- >>>
44
- >>> # Suggest next experiment
45
- >>> next_point = session.suggest_next(strategy='EI', goal='maximize')
46
- >>> print(next_point)
34
+ > from alchemist_core import OptimizationSession
35
+ >
36
+ > # Create session with search space
37
+ > session = OptimizationSession()
38
+ > session.add_variable('temperature', 'real', bounds=(300, 500))
39
+ > session.add_variable('pressure', 'real', bounds=(1, 10))
40
+ > session.add_variable('catalyst', 'categorical', categories=['A', 'B', 'C'])
41
+ >
42
+ > # Load experimental data
43
+ > session.load_data('experiments.csv', target_column='yield')
44
+ >
45
+ > # Train model
46
+ > session.train_model(backend='botorch', kernel='Matern')
47
+ >
48
+ > # Suggest next experiment
49
+ > next_point = session.suggest_next(strategy='EI', goal='maximize')
50
+ > print(next_point)
47
51
  """
48
52
 
49
53
  def __init__(self, search_space: Optional[SearchSpace] = None,
50
54
  experiment_manager: Optional[ExperimentManager] = None,
51
- event_emitter: Optional[EventEmitter] = None):
55
+ event_emitter: Optional[EventEmitter] = None,
56
+ session_metadata: Optional[SessionMetadata] = None):
52
57
  """
53
58
  Initialize optimization session.
54
59
 
@@ -56,11 +61,16 @@ class OptimizationSession:
56
61
  search_space: Pre-configured SearchSpace object (optional)
57
62
  experiment_manager: Pre-configured ExperimentManager (optional)
58
63
  event_emitter: EventEmitter for progress notifications (optional)
64
+ session_metadata: Pre-configured session metadata (optional)
59
65
  """
60
66
  self.search_space = search_space if search_space is not None else SearchSpace()
61
67
  self.experiment_manager = experiment_manager if experiment_manager is not None else ExperimentManager()
62
68
  self.events = event_emitter if event_emitter is not None else EventEmitter()
63
69
 
70
+ # Session metadata and audit log
71
+ self.metadata = session_metadata if session_metadata is not None else SessionMetadata.create()
72
+ self.audit_log = AuditLog()
73
+
64
74
  # Link search_space to experiment_manager
65
75
  self.experiment_manager.set_search_space(self.search_space)
66
76
 
@@ -69,13 +79,19 @@ class OptimizationSession:
69
79
  self.model_backend = None
70
80
  self.acquisition = None
71
81
 
82
+ # Staged experiments (for workflow management)
83
+ self.staged_experiments = [] # List of experiment dicts awaiting evaluation
84
+ self.last_suggestions = [] # Most recent acquisition suggestions (for UI)
85
+
72
86
  # Configuration
73
87
  self.config = {
74
88
  'random_state': 42,
75
- 'verbose': True
89
+ 'verbose': True,
90
+ 'auto_train': False, # Auto-train model after adding experiments
91
+ 'auto_train_threshold': 5 # Minimum experiments before auto-train
76
92
  }
77
93
 
78
- logger.info("OptimizationSession initialized")
94
+ logger.info(f"OptimizationSession initialized: {self.metadata.session_id}")
79
95
 
80
96
  # ============================================================
81
97
  # Search Space Management
@@ -93,8 +109,8 @@ class OptimizationSession:
93
109
  - For 'categorical': categories=[list of values] or values=[list]
94
110
 
95
111
  Example:
96
- >>> session.add_variable('temp', 'real', bounds=(300, 500))
97
- >>> session.add_variable('catalyst', 'categorical', categories=['A', 'B'])
112
+ > session.add_variable('temp', 'real', bounds=(300, 500))
113
+ > session.add_variable('catalyst', 'categorical', categories=['A', 'B'])
98
114
  """
99
115
  # Convert user-friendly API to internal format
100
116
  params = kwargs.copy()
@@ -186,7 +202,7 @@ class OptimizationSession:
186
202
  noise_column: Optional column with measurement noise/uncertainty
187
203
 
188
204
  Example:
189
- >>> session.load_data('experiments.csv', target_column='yield')
205
+ > session.load_data('experiments.csv', target_column='yield')
190
206
  """
191
207
  # Load the CSV
192
208
  import pandas as pd
@@ -222,7 +238,8 @@ class OptimizationSession:
222
238
  self.events.emit('data_loaded', {'n_experiments': n_experiments, 'filepath': filepath})
223
239
 
224
240
  def add_experiment(self, inputs: Dict[str, Any], output: float,
225
- noise: Optional[float] = None) -> None:
241
+ noise: Optional[float] = None, iteration: Optional[int] = None,
242
+ reason: Optional[str] = None) -> None:
226
243
  """
227
244
  Add a single experiment to the dataset.
228
245
 
@@ -230,18 +247,23 @@ class OptimizationSession:
230
247
  inputs: Dictionary mapping variable names to values
231
248
  output: Target/output value
232
249
  noise: Optional measurement uncertainty
250
+ iteration: Iteration number (auto-assigned if None)
251
+ reason: Reason for this experiment (e.g., 'Manual', 'Expected Improvement')
233
252
 
234
253
  Example:
235
- >>> session.add_experiment(
254
+ > session.add_experiment(
236
255
  ... inputs={'temperature': 350, 'catalyst': 'A'},
237
- ... output=0.85
256
+ ... output=0.85,
257
+ ... reason='Manual'
238
258
  ... )
239
259
  """
240
260
  # Use ExperimentManager's add_experiment method
241
261
  self.experiment_manager.add_experiment(
242
262
  point_dict=inputs,
243
263
  output_value=output,
244
- noise_value=noise
264
+ noise_value=noise,
265
+ iteration=iteration,
266
+ reason=reason
245
267
  )
246
268
 
247
269
  logger.info(f"Added experiment: {inputs} → {output}")
@@ -272,6 +294,206 @@ class OptimizationSession:
272
294
  'feature_names': list(X.columns)
273
295
  }
274
296
 
297
+ # ============================================================
298
+ # Staged Experiments (Workflow Management)
299
+ # ============================================================
300
+
301
+ def add_staged_experiment(self, inputs: Dict[str, Any]) -> None:
302
+ """
303
+ Add an experiment to the staging area (awaiting evaluation).
304
+
305
+ Staged experiments are typically suggested by acquisition functions
306
+ but not yet evaluated. They can be retrieved, evaluated externally,
307
+ and then added to the dataset with add_experiment().
308
+
309
+ Args:
310
+ inputs: Dictionary mapping variable names to values
311
+
312
+ Example:
313
+ > # Generate suggestions and stage them
314
+ > suggestions = session.suggest_next(n_suggestions=3)
315
+ > for point in suggestions.to_dict('records'):
316
+ > session.add_staged_experiment(point)
317
+ >
318
+ > # Later, evaluate and add
319
+ > staged = session.get_staged_experiments()
320
+ > for point in staged:
321
+ > output = run_experiment(**point)
322
+ > session.add_experiment(point, output=output)
323
+ > session.clear_staged_experiments()
324
+ """
325
+ self.staged_experiments.append(inputs)
326
+ logger.debug(f"Staged experiment: {inputs}")
327
+ self.events.emit('experiment_staged', {'inputs': inputs})
328
+
329
+ def get_staged_experiments(self) -> List[Dict[str, Any]]:
330
+ """
331
+ Get all staged experiments awaiting evaluation.
332
+
333
+ Returns:
334
+ List of experiment input dictionaries
335
+ """
336
+ return self.staged_experiments.copy()
337
+
338
+ def clear_staged_experiments(self) -> int:
339
+ """
340
+ Clear all staged experiments.
341
+
342
+ Returns:
343
+ Number of experiments cleared
344
+ """
345
+ count = len(self.staged_experiments)
346
+ self.staged_experiments.clear()
347
+ if count > 0:
348
+ logger.info(f"Cleared {count} staged experiments")
349
+ self.events.emit('staged_experiments_cleared', {'count': count})
350
+ return count
351
+
352
+ def move_staged_to_experiments(self, outputs: List[float],
353
+ noises: Optional[List[float]] = None,
354
+ iteration: Optional[int] = None,
355
+ reason: Optional[str] = None) -> int:
356
+ """
357
+ Evaluate staged experiments and add them to the dataset in batch.
358
+
359
+ Convenience method that pairs staged inputs with outputs and adds
360
+ them all to the experiment manager, then clears the staging area.
361
+
362
+ Args:
363
+ outputs: List of output values (must match length of staged experiments)
364
+ noises: Optional list of measurement uncertainties
365
+ iteration: Iteration number for all experiments (auto-assigned if None)
366
+ reason: Reason for these experiments (e.g., 'Expected Improvement')
367
+
368
+ Returns:
369
+ Number of experiments added
370
+
371
+ Example:
372
+ > # Stage some experiments
373
+ > session.add_staged_experiment({'x': 1.0, 'y': 2.0})
374
+ > session.add_staged_experiment({'x': 3.0, 'y': 4.0})
375
+ >
376
+ > # Evaluate them
377
+ > outputs = [run_experiment(**point) for point in session.get_staged_experiments()]
378
+ >
379
+ > # Add to dataset and clear staging
380
+ > session.move_staged_to_experiments(outputs, reason='LogEI')
381
+ """
382
+ if len(outputs) != len(self.staged_experiments):
383
+ raise ValueError(
384
+ f"Number of outputs ({len(outputs)}) must match "
385
+ f"number of staged experiments ({len(self.staged_experiments)})"
386
+ )
387
+
388
+ if noises is not None and len(noises) != len(self.staged_experiments):
389
+ raise ValueError(
390
+ f"Number of noise values ({len(noises)}) must match "
391
+ f"number of staged experiments ({len(self.staged_experiments)})"
392
+ )
393
+
394
+ # Add each experiment
395
+ for i, inputs in enumerate(self.staged_experiments):
396
+ noise = noises[i] if noises is not None else None
397
+ self.add_experiment(
398
+ inputs=inputs,
399
+ output=outputs[i],
400
+ noise=noise,
401
+ iteration=iteration,
402
+ reason=reason
403
+ )
404
+
405
+ count = len(self.staged_experiments)
406
+ self.clear_staged_experiments()
407
+
408
+ logger.info(f"Moved {count} staged experiments to dataset")
409
+ return count
410
+
411
+ # ============================================================
412
+ # Initial Design Generation
413
+ # ============================================================
414
+
415
+ def generate_initial_design(
416
+ self,
417
+ method: str = "lhs",
418
+ n_points: int = 10,
419
+ random_seed: Optional[int] = None,
420
+ **kwargs
421
+ ) -> List[Dict[str, Any]]:
422
+ """
423
+ Generate initial experimental design (Design of Experiments).
424
+
425
+ Creates a set of experimental conditions to evaluate before starting
426
+ Bayesian optimization. This does NOT add the experiments to the session -
427
+ you must evaluate them and add the results using add_experiment().
428
+
429
+ Supported methods:
430
+ - 'random': Uniform random sampling
431
+ - 'lhs': Latin Hypercube Sampling (recommended, good space-filling properties)
432
+ - 'sobol': Sobol quasi-random sequences (low discrepancy)
433
+ - 'halton': Halton sequences
434
+ - 'hammersly': Hammersly sequences (low discrepancy)
435
+
436
+ Args:
437
+ method: Sampling strategy to use
438
+ n_points: Number of points to generate
439
+ random_seed: Random seed for reproducibility
440
+ **kwargs: Additional method-specific parameters:
441
+ - lhs_criterion: For LHS method ("maximin", "correlation", "ratio")
442
+
443
+ Returns:
444
+ List of dictionaries with variable names and values (no outputs)
445
+
446
+ Example:
447
+ > # Generate initial design
448
+ > points = session.generate_initial_design('lhs', n_points=10)
449
+ >
450
+ > # Run experiments and add results
451
+ > for point in points:
452
+ > output = run_experiment(**point) # Your experiment function
453
+ > session.add_experiment(point, output=output)
454
+ >
455
+ > # Now ready to train model
456
+ > session.train_model()
457
+ """
458
+ if len(self.search_space.variables) == 0:
459
+ raise ValueError(
460
+ "No variables defined in search space. "
461
+ "Use add_variable() to define variables before generating initial design."
462
+ )
463
+
464
+ from alchemist_core.utils.doe import generate_initial_design
465
+
466
+ points = generate_initial_design(
467
+ search_space=self.search_space,
468
+ method=method,
469
+ n_points=n_points,
470
+ random_seed=random_seed,
471
+ **kwargs
472
+ )
473
+
474
+ # Store sampler info in config for audit trail
475
+ self.config['initial_design_method'] = method
476
+ self.config['initial_design_n_points'] = len(points)
477
+
478
+ logger.info(f"Generated {len(points)} initial design points using {method} method")
479
+ self.events.emit('initial_design_generated', {
480
+ 'method': method,
481
+ 'n_points': len(points)
482
+ })
483
+
484
+ # Add a lightweight audit data_locked entry for the initial design metadata
485
+ try:
486
+ extra = {'initial_design_method': method, 'initial_design_n_points': len(points)}
487
+ # Create an empty dataframe snapshot of the planned points
488
+ import pandas as pd
489
+ planned_df = pd.DataFrame(points)
490
+ self.audit_log.lock_data(planned_df, notes=f"Initial design ({method})", extra_parameters=extra)
491
+ except Exception:
492
+ # Audit logging should not block design generation
493
+ logger.debug("Failed to add initial design to audit log")
494
+
495
+ return points
496
+
275
497
  # ============================================================
276
498
  # Model Training
277
499
  # ============================================================
@@ -291,8 +513,8 @@ class OptimizationSession:
291
513
  Dictionary with training results and hyperparameters
292
514
 
293
515
  Example:
294
- >>> results = session.train_model(backend='botorch', kernel='Matern')
295
- >>> print(results['metrics'])
516
+ > results = session.train_model(backend='botorch', kernel='Matern')
517
+ > print(results['metrics'])
296
518
  """
297
519
  df = self.experiment_manager.get_data()
298
520
  if df is None or df.empty:
@@ -312,6 +534,27 @@ class OptimizationSession:
312
534
  # Extract calibration_enabled before passing kwargs to model constructor
313
535
  calibration_enabled = kwargs.pop('calibration_enabled', False)
314
536
 
537
+ # Validate and map transform types based on backend
538
+ # BoTorch uses: 'normalize', 'standardize'
539
+ # Sklearn uses: 'minmax', 'standard', 'robust', 'none'
540
+ if self.model_backend == 'sklearn':
541
+ # Map BoTorch transform types to sklearn equivalents
542
+ transform_map = {
543
+ 'normalize': 'minmax', # BoTorch normalize → sklearn minmax
544
+ 'standardize': 'standard', # BoTorch standardize → sklearn standard
545
+ 'none': 'none'
546
+ }
547
+ if 'input_transform_type' in kwargs:
548
+ original = kwargs['input_transform_type']
549
+ kwargs['input_transform_type'] = transform_map.get(original, original)
550
+ if original != kwargs['input_transform_type']:
551
+ logger.debug(f"Mapped input transform '{original}' → '{kwargs['input_transform_type']}' for sklearn")
552
+ if 'output_transform_type' in kwargs:
553
+ original = kwargs['output_transform_type']
554
+ kwargs['output_transform_type'] = transform_map.get(original, original)
555
+ if original != kwargs['output_transform_type']:
556
+ logger.debug(f"Mapped output transform '{original}' → '{kwargs['output_transform_type']}' for sklearn")
557
+
315
558
  # Import appropriate model class
316
559
  if self.model_backend == 'sklearn':
317
560
  from alchemist_core.models.sklearn_model import SklearnModel
@@ -330,6 +573,15 @@ class OptimizationSession:
330
573
  elif self.model_backend == 'botorch':
331
574
  from alchemist_core.models.botorch_model import BoTorchModel
332
575
 
576
+ # Apply sensible defaults for BoTorch if not explicitly overridden
577
+ # Input normalization and output standardization are critical for performance
578
+ if 'input_transform_type' not in kwargs:
579
+ kwargs['input_transform_type'] = 'normalize'
580
+ logger.debug("Auto-applying input normalization for BoTorch model")
581
+ if 'output_transform_type' not in kwargs:
582
+ kwargs['output_transform_type'] = 'standardize'
583
+ logger.debug("Auto-applying output standardization for BoTorch model")
584
+
333
585
  # Build kernel options - BoTorch uses 'cont_kernel_type' not 'kernel_type'
334
586
  kernel_options = {'cont_kernel_type': kernel}
335
587
  if kernel_params:
@@ -437,8 +689,45 @@ class OptimizationSession:
437
689
  # Convert complex objects to their string representation
438
690
  json_hyperparams[key] = str(value)
439
691
 
692
+ # Extract kernel name and parameters
693
+ kernel_name = 'unknown'
694
+ if self.model_backend == 'sklearn':
695
+ # First try kernel_options
696
+ if hasattr(self.model, 'kernel_options') and 'kernel_type' in self.model.kernel_options:
697
+ kernel_name = self.model.kernel_options['kernel_type']
698
+ # Add nu parameter for Matern kernels
699
+ if kernel_name == 'Matern' and 'matern_nu' in self.model.kernel_options:
700
+ json_hyperparams['matern_nu'] = self.model.kernel_options['matern_nu']
701
+ # Then try trained kernel
702
+ elif hasattr(self.model, 'model') and hasattr(self.model.model, 'kernel_'):
703
+ kernel_obj = self.model.model.kernel_
704
+ # Navigate through Product/Sum kernels to find base kernel
705
+ if hasattr(kernel_obj, 'k2'): # Product kernel (Constant * BaseKernel)
706
+ base_kernel = kernel_obj.k2
707
+ else:
708
+ base_kernel = kernel_obj
709
+
710
+ kernel_class = type(base_kernel).__name__
711
+ if 'Matern' in kernel_class:
712
+ kernel_name = 'Matern'
713
+ # Extract nu parameter if available
714
+ if hasattr(base_kernel, 'nu'):
715
+ json_hyperparams['matern_nu'] = float(base_kernel.nu)
716
+ elif 'RBF' in kernel_class:
717
+ kernel_name = 'RBF'
718
+ elif 'RationalQuadratic' in kernel_class:
719
+ kernel_name = 'RationalQuadratic'
720
+ else:
721
+ kernel_name = kernel_class
722
+ elif self.model_backend == 'botorch':
723
+ if hasattr(self.model, 'cont_kernel_type'):
724
+ kernel_name = self.model.cont_kernel_type
725
+ elif 'kernel_type' in json_hyperparams:
726
+ kernel_name = json_hyperparams['kernel_type']
727
+
440
728
  return {
441
729
  'backend': self.model_backend,
730
+ 'kernel': kernel_name,
442
731
  'hyperparameters': json_hyperparams,
443
732
  'metrics': metrics,
444
733
  'is_trained': True
@@ -463,8 +752,8 @@ class OptimizationSession:
463
752
  DataFrame with suggested experiment(s)
464
753
 
465
754
  Example:
466
- >>> next_point = session.suggest_next(strategy='EI', goal='maximize')
467
- >>> print(next_point)
755
+ > next_point = session.suggest_next(strategy='EI', goal='maximize')
756
+ > print(next_point)
468
757
  """
469
758
  if self.model is None:
470
759
  raise ValueError("No trained model available. Use train_model() first.")
@@ -478,7 +767,8 @@ class OptimizationSession:
478
767
  model=self.model, # Pass the full SklearnModel wrapper, not just .model
479
768
  acq_func=strategy.lower(),
480
769
  maximize=(goal.lower() == 'maximize'),
481
- random_state=self.config['random_state']
770
+ random_state=self.config['random_state'],
771
+ acq_func_kwargs=kwargs # Pass xi, kappa, etc. to acquisition function
482
772
  )
483
773
 
484
774
  # Update acquisition with existing experimental data (un-encoded)
@@ -527,6 +817,16 @@ class OptimizationSession:
527
817
  logger.info(f"Suggested point: {suggestion_dict}")
528
818
  self.events.emit('acquisition_completed', {'suggestion': suggestion_dict})
529
819
 
820
+ # Store suggestions for UI/API access
821
+ self.last_suggestions = result_df.to_dict('records')
822
+
823
+ # Cache suggestion info for audit log
824
+ self._last_acquisition_info = {
825
+ 'strategy': strategy,
826
+ 'goal': goal,
827
+ 'parameters': kwargs
828
+ }
829
+
530
830
  return result_df # ============================================================
531
831
  # Predictions
532
832
  # ============================================================
@@ -542,11 +842,11 @@ class OptimizationSession:
542
842
  Tuple of (predictions, uncertainties)
543
843
 
544
844
  Example:
545
- >>> test_points = pd.DataFrame({
845
+ > test_points = pd.DataFrame({
546
846
  ... 'temperature': [350, 400],
547
847
  ... 'catalyst': ['A', 'B']
548
848
  ... })
549
- >>> predictions, uncertainties = session.predict(test_points)
849
+ > predictions, uncertainties = session.predict(test_points)
550
850
  """
551
851
  if self.model is None:
552
852
  raise ValueError("No trained model available. Use train_model() first.")
@@ -579,9 +879,9 @@ class OptimizationSession:
579
879
  callback: Callback function
580
880
 
581
881
  Example:
582
- >>> def on_training_done(data):
882
+ > def on_training_done(data):
583
883
  ... print(f"Training completed with R² = {data['metrics']['r2']}")
584
- >>> session.on('training_completed', on_training_done)
884
+ > session.on('training_completed', on_training_done)
585
885
  """
586
886
  self.events.on(event, callback)
587
887
 
@@ -594,10 +894,459 @@ class OptimizationSession:
594
894
  Update session configuration.
595
895
 
596
896
  Args:
597
- **kwargs: Configuration parameters (random_state, verbose, etc.)
897
+ **kwargs: Configuration parameters to update
898
+
899
+ Example:
900
+ > session.set_config(random_state=123, verbose=False)
901
+ """
902
+ self.config.update(kwargs)
903
+ logger.info(f"Updated config: {kwargs}")
904
+
905
+ # ============================================================
906
+ # Audit Log & Session Management
907
+ # ============================================================
908
+
909
+ def lock_data(self, notes: str = "", extra_parameters: Optional[Dict[str, Any]] = None) -> AuditEntry:
910
+ """
911
+ Lock in current experimental data configuration.
912
+
913
+ Creates an immutable audit log entry capturing the current data state.
914
+ This should be called when you're satisfied with your experimental dataset
915
+ and ready to proceed with modeling.
916
+
917
+ Args:
918
+ notes: Optional user notes about this data configuration
919
+
920
+ Returns:
921
+ Created AuditEntry
922
+
923
+ Example:
924
+ > session.add_experiment({'temp': 100, 'pressure': 5}, output=85.2)
925
+ > session.lock_data(notes="Initial screening dataset")
926
+ """
927
+ # Set search space in audit log (once)
928
+ if self.audit_log.search_space_definition is None:
929
+ self.audit_log.set_search_space(self.search_space.variables)
930
+
931
+ # Get current experimental data
932
+ df = self.experiment_manager.get_data()
933
+
934
+ # Lock data in audit log
935
+ entry = self.audit_log.lock_data(
936
+ experiment_data=df,
937
+ notes=notes,
938
+ extra_parameters=extra_parameters
939
+ )
940
+
941
+ self.metadata.update_modified()
942
+ logger.info(f"Locked data: {len(df)} experiments")
943
+ self.events.emit('data_locked', {'entry': entry.to_dict()})
944
+
945
+ return entry
946
+
947
+ def lock_model(self, notes: str = "") -> AuditEntry:
948
+ """
949
+ Lock in current trained model configuration.
950
+
951
+ Creates an immutable audit log entry capturing the trained model state.
952
+ This should be called when you're satisfied with your model performance
953
+ and ready to use it for acquisition.
954
+
955
+ Args:
956
+ notes: Optional user notes about this model
957
+
958
+ Returns:
959
+ Created AuditEntry
960
+
961
+ Raises:
962
+ ValueError: If no model has been trained
963
+
964
+ Example:
965
+ > session.train_model(backend='sklearn', kernel='matern')
966
+ > session.lock_model(notes="Best cross-validation performance")
967
+ """
968
+ if self.model is None:
969
+ raise ValueError("No trained model available. Use train_model() first.")
970
+
971
+ # Set search space in audit log (once)
972
+ if self.audit_log.search_space_definition is None:
973
+ self.audit_log.set_search_space(self.search_space.variables)
974
+
975
+ # Get model info
976
+ model_info = self.get_model_summary()
977
+
978
+ # Extract hyperparameters
979
+ hyperparameters = model_info.get('hyperparameters', {})
980
+
981
+ # Get kernel name from model_info (which extracts it properly)
982
+ kernel_name = model_info.get('kernel', 'unknown')
983
+
984
+ # Get CV metrics if available - use model_info metrics which are already populated
985
+ cv_metrics = model_info.get('metrics', None)
986
+ if cv_metrics and all(k in cv_metrics for k in ['rmse', 'r2']):
987
+ # Metrics already in correct format from get_model_summary
988
+ pass
989
+ elif hasattr(self.model, 'cv_cached_results') and self.model.cv_cached_results:
990
+ # Fallback to direct access
991
+ cv_metrics = {
992
+ 'rmse': float(self.model.cv_cached_results.get('rmse', 0)),
993
+ 'r2': float(self.model.cv_cached_results.get('r2', 0)),
994
+ 'mae': float(self.model.cv_cached_results.get('mae', 0))
995
+ }
996
+ else:
997
+ cv_metrics = None
998
+
999
+ # Get current iteration number
1000
+ # Use the next iteration number for the model lock so model+acquisition share the same iteration
1001
+ iteration = self.experiment_manager._current_iteration + 1
1002
+
1003
+ # Include scaler information if available in hyperparameters
1004
+ try:
1005
+ if hasattr(self.model, 'input_transform_type'):
1006
+ hyperparameters['input_transform_type'] = self.model.input_transform_type
1007
+ if hasattr(self.model, 'output_transform_type'):
1008
+ hyperparameters['output_transform_type'] = self.model.output_transform_type
1009
+ except Exception:
1010
+ pass
1011
+
1012
+ # Try to extract Matern nu for sklearn models if not already present
1013
+ try:
1014
+ if self.model_backend == 'sklearn' and 'matern_nu' not in hyperparameters:
1015
+ # Try to navigate fitted kernel object for sklearn GaussianProcessRegressor
1016
+ if hasattr(self.model, 'model') and hasattr(self.model.model, 'kernel_'):
1017
+ kernel_obj = self.model.model.kernel_
1018
+ base_kernel = getattr(kernel_obj, 'k2', kernel_obj)
1019
+ if hasattr(base_kernel, 'nu'):
1020
+ hyperparameters['matern_nu'] = float(base_kernel.nu)
1021
+ except Exception:
1022
+ pass
1023
+
1024
+ entry = self.audit_log.lock_model(
1025
+ backend=self.model_backend,
1026
+ kernel=kernel_name,
1027
+ hyperparameters=hyperparameters,
1028
+ cv_metrics=cv_metrics,
1029
+ iteration=iteration,
1030
+ notes=notes
1031
+ )
1032
+
1033
+ self.metadata.update_modified()
1034
+ logger.info(f"Locked model: {self.model_backend}/{model_info.get('kernel')}, iteration {iteration}")
1035
+ self.events.emit('model_locked', {'entry': entry.to_dict()})
1036
+
1037
+ return entry
1038
+
1039
+ def lock_acquisition(self, strategy: str, parameters: Dict[str, Any],
1040
+ suggestions: List[Dict[str, Any]], notes: str = "") -> AuditEntry:
1041
+ """
1042
+ Lock in acquisition function decision and suggested experiments.
1043
+
1044
+ Creates an immutable audit log entry capturing the acquisition decision.
1045
+ This should be called when you've reviewed the suggestions and are ready
1046
+ to run the recommended experiments.
1047
+
1048
+ Args:
1049
+ strategy: Acquisition strategy name ('EI', 'PI', 'UCB', etc.)
1050
+ parameters: Acquisition function parameters (xi, kappa, etc.)
1051
+ suggestions: List of suggested experiment dictionaries
1052
+ notes: Optional user notes about this decision
1053
+
1054
+ Returns:
1055
+ Created AuditEntry
1056
+
1057
+ Example:
1058
+ > suggestions = session.suggest_next(strategy='EI', n_suggestions=3)
1059
+ > session.lock_acquisition(
1060
+ ... strategy='EI',
1061
+ ... parameters={'xi': 0.01, 'goal': 'maximize'},
1062
+ ... suggestions=suggestions,
1063
+ ... notes="Top 3 candidates for next batch"
1064
+ ... )
1065
+ """
1066
+ # Set search space in audit log (once)
1067
+ if self.audit_log.search_space_definition is None:
1068
+ self.audit_log.set_search_space(self.search_space.variables)
1069
+
1070
+ # Increment iteration counter first so this acquisition is logged as the next iteration
1071
+ self.experiment_manager._current_iteration += 1
1072
+ iteration = self.experiment_manager._current_iteration
1073
+
1074
+ entry = self.audit_log.lock_acquisition(
1075
+ strategy=strategy,
1076
+ parameters=parameters,
1077
+ suggestions=suggestions,
1078
+ iteration=iteration,
1079
+ notes=notes
1080
+ )
1081
+
1082
+ self.metadata.update_modified()
1083
+ logger.info(f"Locked acquisition: {strategy}, {len(suggestions)} suggestions")
1084
+ self.events.emit('acquisition_locked', {'entry': entry.to_dict()})
1085
+
1086
+ return entry
1087
+
1088
+ def get_audit_log(self) -> List[Dict[str, Any]]:
1089
+ """
1090
+ Get complete audit log as list of dictionaries.
1091
+
1092
+ Returns:
1093
+ List of audit entry dictionaries
1094
+ """
1095
+ return self.audit_log.to_dict()
1096
+
1097
+ def export_audit_markdown(self) -> str:
1098
+ """
1099
+ Export audit log as markdown for publications.
1100
+
1101
+ Returns:
1102
+ Markdown-formatted audit trail
1103
+ """
1104
+ # Pass session metadata to markdown exporter so user-entered metadata appears
1105
+ try:
1106
+ metadata_dict = self.metadata.to_dict()
1107
+ except Exception:
1108
+ metadata_dict = None
1109
+
1110
+ return self.audit_log.to_markdown(session_metadata=metadata_dict)
1111
+
1112
+ def save_session(self, filepath: str):
1113
+ """
1114
+ Save complete session state to JSON file.
1115
+
1116
+ Saves all session data including:
1117
+ - Session metadata (name, description, tags)
1118
+ - Search space definition
1119
+ - Experimental data
1120
+ - Trained model state (if available)
1121
+ - Complete audit log
1122
+
1123
+ Args:
1124
+ filepath: Path to save session file (.json extension recommended)
1125
+
1126
+ Example:
1127
+ > session.save_session("~/ALchemist_Sessions/catalyst_study_nov2025.json")
1128
+ """
1129
+ filepath = Path(filepath)
1130
+
1131
+ # Update audit log's experimental data snapshot to reflect current state
1132
+ # This ensures the data table in the audit log markdown is always up-to-date
1133
+ current_data = self.experiment_manager.get_data()
1134
+ if current_data is not None and len(current_data) > 0:
1135
+ self.audit_log.experiment_data = current_data.copy()
1136
+
1137
+ # Prepare session data
1138
+ session_data = {
1139
+ 'version': '1.0.0',
1140
+ 'metadata': self.metadata.to_dict(),
1141
+ 'audit_log': self.audit_log.to_dict(),
1142
+ 'search_space': {
1143
+ 'variables': self.search_space.variables
1144
+ },
1145
+ 'experiments': {
1146
+ 'data': self.experiment_manager.get_data().to_dict(orient='records'),
1147
+ 'n_total': len(self.experiment_manager.df)
1148
+ },
1149
+ 'config': self.config
1150
+ }
1151
+
1152
+ # Add model state if available
1153
+ if self.model is not None:
1154
+ model_info = self.get_model_summary()
1155
+
1156
+ # Get kernel name from model_info which properly extracts it
1157
+ kernel_name = model_info.get('kernel', 'unknown')
1158
+
1159
+ # Extract kernel parameters if available
1160
+ kernel_params = {}
1161
+ if self.model_backend == 'sklearn' and hasattr(self.model, 'model'):
1162
+ kernel_obj = self.model.model.kernel
1163
+ # Extract kernel-specific parameters
1164
+ if hasattr(kernel_obj, 'get_params'):
1165
+ kernel_params = kernel_obj.get_params()
1166
+ elif self.model_backend == 'botorch':
1167
+ # For BoTorch, parameters are in hyperparameters
1168
+ hyperparams = model_info.get('hyperparameters', {})
1169
+ if 'matern_nu' in hyperparams:
1170
+ kernel_params['nu'] = hyperparams['matern_nu']
1171
+
1172
+ session_data['model_config'] = {
1173
+ 'backend': self.model_backend,
1174
+ 'kernel': kernel_name,
1175
+ 'kernel_params': kernel_params,
1176
+ 'hyperparameters': model_info.get('hyperparameters', {}),
1177
+ 'metrics': model_info.get('metrics', {})
1178
+ }
1179
+
1180
+ # Create directory if needed
1181
+ filepath.parent.mkdir(parents=True, exist_ok=True)
1182
+
1183
+ # Write JSON
1184
+ with open(filepath, 'w') as f:
1185
+ json.dump(session_data, f, indent=2, default=str)
1186
+
1187
+ self.metadata.update_modified()
1188
+ logger.info(f"Saved session to {filepath}")
1189
+ self.events.emit('session_saved', {'filepath': str(filepath)})
1190
+
1191
+ def export_session_json(self) -> str:
1192
+ """
1193
+ Export current session state as a JSON string (no filesystem side-effects for caller).
1194
+
1195
+ Returns:
1196
+ JSON string of session data
1197
+ """
1198
+ import tempfile
1199
+ from pathlib import Path
1200
+
1201
+ with tempfile.NamedTemporaryFile(mode='w', suffix='.json', delete=False) as tmp:
1202
+ tmp_path = tmp.name
1203
+ # Use existing save_session logic to write a complete JSON
1204
+ self.save_session(tmp_path)
1205
+
1206
+ try:
1207
+ with open(tmp_path, 'r') as f:
1208
+ content = f.read()
1209
+ finally:
1210
+ Path(tmp_path).unlink(missing_ok=True)
1211
+
1212
+ return content
1213
+
1214
+ @staticmethod
1215
+ def load_session(filepath: str, retrain_on_load: bool = True) -> 'OptimizationSession':
1216
+ """
1217
+ Load session from JSON file.
1218
+
1219
+ Args:
1220
+ filepath: Path to session file
1221
+
1222
+ Returns:
1223
+ OptimizationSession with restored state
1224
+
1225
+ Example:
1226
+ > session = OptimizationSession.load_session("my_session.json")
1227
+ """
1228
+ filepath = Path(filepath)
1229
+
1230
+ with open(filepath, 'r') as f:
1231
+ session_data = json.load(f)
1232
+
1233
+ # Check version compatibility
1234
+ version = session_data.get('version', '1.0.0')
1235
+ if not version.startswith('1.'):
1236
+ logger.warning(f"Session file version {version} may not be fully compatible")
1237
+
1238
+ # Create session
1239
+ session = OptimizationSession()
1240
+
1241
+ # Restore metadata
1242
+ if 'metadata' in session_data:
1243
+ session.metadata = SessionMetadata.from_dict(session_data['metadata'])
1244
+
1245
+ # Restore audit log
1246
+ if 'audit_log' in session_data:
1247
+ session.audit_log.from_dict(session_data['audit_log'])
1248
+
1249
+ # Restore search space
1250
+ if 'search_space' in session_data:
1251
+ for var in session_data['search_space']['variables']:
1252
+ session.search_space.add_variable(
1253
+ var['name'],
1254
+ var['type'],
1255
+ **{k: v for k, v in var.items() if k not in ['name', 'type']}
1256
+ )
1257
+
1258
+ # Restore experimental data
1259
+ if 'experiments' in session_data and session_data['experiments']['data']:
1260
+ df = pd.DataFrame(session_data['experiments']['data'])
1261
+
1262
+ # Metadata columns to exclude from inputs
1263
+ metadata_cols = {'Output', 'Noise', 'Iteration', 'Reason'}
1264
+
1265
+ # Add experiments one by one
1266
+ for _, row in df.iterrows():
1267
+ # Only include actual input variables, not metadata
1268
+ inputs = {col: row[col] for col in df.columns if col not in metadata_cols}
1269
+ output = row.get('Output')
1270
+ noise = row.get('Noise') if pd.notna(row.get('Noise')) else None
1271
+ iteration = row.get('Iteration') if pd.notna(row.get('Iteration')) else None
1272
+ reason = row.get('Reason') if pd.notna(row.get('Reason')) else None
1273
+
1274
+ session.add_experiment(inputs, output, noise=noise, iteration=iteration, reason=reason)
1275
+
1276
+ # Restore config
1277
+ if 'config' in session_data:
1278
+ session.config.update(session_data['config'])
1279
+
1280
+ # Auto-retrain model if configuration exists (optional)
1281
+ if 'model_config' in session_data and retrain_on_load:
1282
+ model_config = session_data['model_config']
1283
+ logger.info(f"Auto-retraining model: {model_config['backend']} with {model_config.get('kernel', 'default')} kernel")
1284
+
1285
+ try:
1286
+ # Trigger model training with saved configuration
1287
+ session.train_model(
1288
+ backend=model_config['backend'],
1289
+ kernel=model_config.get('kernel', 'Matern'),
1290
+ kernel_params=model_config.get('kernel_params', {})
1291
+ )
1292
+ logger.info("Model retrained successfully")
1293
+ session.events.emit('model_retrained', {'backend': model_config['backend']})
1294
+ except Exception as e:
1295
+ logger.warning(f"Failed to retrain model: {e}")
1296
+ session.events.emit('model_retrain_failed', {'error': str(e)})
1297
+
1298
+ logger.info(f"Loaded session from {filepath}")
1299
+ session.events.emit('session_loaded', {'filepath': str(filepath)})
1300
+
1301
+ return session
1302
+
1303
+ def update_metadata(self, name: Optional[str] = None,
1304
+ description: Optional[str] = None,
1305
+ tags: Optional[List[str]] = None,
1306
+ author: Optional[str] = None):
1307
+ """
1308
+ Update session metadata.
1309
+
1310
+ Args:
1311
+ name: New session name (optional)
1312
+ description: New description (optional)
1313
+ tags: New tags (optional)
1314
+
1315
+ Example:
1316
+ > session.update_metadata(
1317
+ ... name="Catalyst Screening - Final",
1318
+ ... description="Optimized Pt/Pd ratios",
1319
+ ... tags=["catalyst", "platinum", "palladium", "final"]
1320
+ ... )
1321
+ """
1322
+ if name is not None:
1323
+ self.metadata.name = name
1324
+ if description is not None:
1325
+ self.metadata.description = description
1326
+ if author is not None:
1327
+ # Backwards compatible: store author if provided
1328
+ setattr(self.metadata, 'author', author)
1329
+ if tags is not None:
1330
+ self.metadata.tags = tags
1331
+
1332
+ self.metadata.update_modified()
1333
+ logger.info("Updated session metadata")
1334
+ self.events.emit('metadata_updated', self.metadata.to_dict())
1335
+
1336
+ # ============================================================
1337
+ # Legacy Configuration
1338
+ # ============================================================
1339
+
1340
+ def set_config(self, **kwargs) -> None:
1341
+ """
1342
+ Update session configuration.
1343
+
1344
+ Args:
1345
+ **kwargs: Configuration parameters to update
598
1346
 
599
1347
  Example:
600
- >>> session.set_config(random_state=123, verbose=False)
1348
+ > session.set_config(random_state=123, verbose=False)
601
1349
  """
602
1350
  self.config.update(kwargs)
603
- logger.info(f"Updated configuration: {kwargs}")
1351
+ logger.info(f"Updated config: {kwargs}")
1352
+