alchemist-nrel 0.2.1__py3-none-any.whl → 0.3.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- alchemist_core/__init__.py +14 -7
- alchemist_core/acquisition/botorch_acquisition.py +14 -6
- alchemist_core/audit_log.py +594 -0
- alchemist_core/data/experiment_manager.py +69 -5
- alchemist_core/models/botorch_model.py +6 -4
- alchemist_core/models/sklearn_model.py +44 -6
- alchemist_core/session.py +600 -8
- alchemist_core/utils/doe.py +200 -0
- {alchemist_nrel-0.2.1.dist-info → alchemist_nrel-0.3.0.dist-info}/METADATA +57 -40
- alchemist_nrel-0.3.0.dist-info/RECORD +66 -0
- {alchemist_nrel-0.2.1.dist-info → alchemist_nrel-0.3.0.dist-info}/entry_points.txt +1 -0
- {alchemist_nrel-0.2.1.dist-info → alchemist_nrel-0.3.0.dist-info}/top_level.txt +1 -0
- api/main.py +19 -3
- api/models/requests.py +71 -0
- api/models/responses.py +144 -0
- api/routers/experiments.py +117 -5
- api/routers/sessions.py +329 -10
- api/routers/visualizations.py +10 -5
- api/services/session_store.py +210 -54
- api/static/NEW_ICON.ico +0 -0
- api/static/NEW_ICON.png +0 -0
- api/static/NEW_LOGO_DARK.png +0 -0
- api/static/NEW_LOGO_LIGHT.png +0 -0
- api/static/assets/api-vcoXEqyq.js +1 -0
- api/static/assets/index-C0_glioA.js +4084 -0
- api/static/assets/index-CB4V1LI5.css +1 -0
- api/static/index.html +14 -0
- api/static/vite.svg +1 -0
- run_api.py +55 -0
- ui/gpr_panel.py +7 -2
- ui/notifications.py +197 -10
- ui/ui.py +1117 -68
- ui/variables_setup.py +47 -2
- ui/visualizations.py +60 -3
- alchemist_nrel-0.2.1.dist-info/RECORD +0 -54
- {alchemist_nrel-0.2.1.dist-info → alchemist_nrel-0.3.0.dist-info}/WHEEL +0 -0
- {alchemist_nrel-0.2.1.dist-info → alchemist_nrel-0.3.0.dist-info}/licenses/LICENSE +0 -0
alchemist_core/session.py
CHANGED
|
@@ -7,10 +7,14 @@ This module provides the main entry point for using ALchemist as a headless libr
|
|
|
7
7
|
from typing import Optional, Dict, Any, List, Tuple, Callable
|
|
8
8
|
import pandas as pd
|
|
9
9
|
import numpy as np
|
|
10
|
+
import json
|
|
11
|
+
import hashlib
|
|
12
|
+
from pathlib import Path
|
|
10
13
|
from alchemist_core.data.search_space import SearchSpace
|
|
11
14
|
from alchemist_core.data.experiment_manager import ExperimentManager
|
|
12
15
|
from alchemist_core.events import EventEmitter
|
|
13
16
|
from alchemist_core.config import get_logger
|
|
17
|
+
from alchemist_core.audit_log import AuditLog, SessionMetadata, AuditEntry
|
|
14
18
|
|
|
15
19
|
logger = get_logger(__name__)
|
|
16
20
|
|
|
@@ -48,7 +52,8 @@ class OptimizationSession:
|
|
|
48
52
|
|
|
49
53
|
def __init__(self, search_space: Optional[SearchSpace] = None,
|
|
50
54
|
experiment_manager: Optional[ExperimentManager] = None,
|
|
51
|
-
event_emitter: Optional[EventEmitter] = None
|
|
55
|
+
event_emitter: Optional[EventEmitter] = None,
|
|
56
|
+
session_metadata: Optional[SessionMetadata] = None):
|
|
52
57
|
"""
|
|
53
58
|
Initialize optimization session.
|
|
54
59
|
|
|
@@ -56,11 +61,16 @@ class OptimizationSession:
|
|
|
56
61
|
search_space: Pre-configured SearchSpace object (optional)
|
|
57
62
|
experiment_manager: Pre-configured ExperimentManager (optional)
|
|
58
63
|
event_emitter: EventEmitter for progress notifications (optional)
|
|
64
|
+
session_metadata: Pre-configured session metadata (optional)
|
|
59
65
|
"""
|
|
60
66
|
self.search_space = search_space if search_space is not None else SearchSpace()
|
|
61
67
|
self.experiment_manager = experiment_manager if experiment_manager is not None else ExperimentManager()
|
|
62
68
|
self.events = event_emitter if event_emitter is not None else EventEmitter()
|
|
63
69
|
|
|
70
|
+
# Session metadata and audit log
|
|
71
|
+
self.metadata = session_metadata if session_metadata is not None else SessionMetadata.create()
|
|
72
|
+
self.audit_log = AuditLog()
|
|
73
|
+
|
|
64
74
|
# Link search_space to experiment_manager
|
|
65
75
|
self.experiment_manager.set_search_space(self.search_space)
|
|
66
76
|
|
|
@@ -75,7 +85,7 @@ class OptimizationSession:
|
|
|
75
85
|
'verbose': True
|
|
76
86
|
}
|
|
77
87
|
|
|
78
|
-
logger.info("OptimizationSession initialized")
|
|
88
|
+
logger.info(f"OptimizationSession initialized: {self.metadata.session_id}")
|
|
79
89
|
|
|
80
90
|
# ============================================================
|
|
81
91
|
# Search Space Management
|
|
@@ -222,7 +232,8 @@ class OptimizationSession:
|
|
|
222
232
|
self.events.emit('data_loaded', {'n_experiments': n_experiments, 'filepath': filepath})
|
|
223
233
|
|
|
224
234
|
def add_experiment(self, inputs: Dict[str, Any], output: float,
|
|
225
|
-
noise: Optional[float] = None
|
|
235
|
+
noise: Optional[float] = None, iteration: Optional[int] = None,
|
|
236
|
+
reason: Optional[str] = None) -> None:
|
|
226
237
|
"""
|
|
227
238
|
Add a single experiment to the dataset.
|
|
228
239
|
|
|
@@ -230,18 +241,23 @@ class OptimizationSession:
|
|
|
230
241
|
inputs: Dictionary mapping variable names to values
|
|
231
242
|
output: Target/output value
|
|
232
243
|
noise: Optional measurement uncertainty
|
|
244
|
+
iteration: Iteration number (auto-assigned if None)
|
|
245
|
+
reason: Reason for this experiment (e.g., 'Manual', 'Expected Improvement')
|
|
233
246
|
|
|
234
247
|
Example:
|
|
235
248
|
>>> session.add_experiment(
|
|
236
249
|
... inputs={'temperature': 350, 'catalyst': 'A'},
|
|
237
|
-
... output=0.85
|
|
250
|
+
... output=0.85,
|
|
251
|
+
... reason='Manual'
|
|
238
252
|
... )
|
|
239
253
|
"""
|
|
240
254
|
# Use ExperimentManager's add_experiment method
|
|
241
255
|
self.experiment_manager.add_experiment(
|
|
242
256
|
point_dict=inputs,
|
|
243
257
|
output_value=output,
|
|
244
|
-
noise_value=noise
|
|
258
|
+
noise_value=noise,
|
|
259
|
+
iteration=iteration,
|
|
260
|
+
reason=reason
|
|
245
261
|
)
|
|
246
262
|
|
|
247
263
|
logger.info(f"Added experiment: {inputs} → {output}")
|
|
@@ -272,6 +288,88 @@ class OptimizationSession:
|
|
|
272
288
|
'feature_names': list(X.columns)
|
|
273
289
|
}
|
|
274
290
|
|
|
291
|
+
def generate_initial_design(
|
|
292
|
+
self,
|
|
293
|
+
method: str = "lhs",
|
|
294
|
+
n_points: int = 10,
|
|
295
|
+
random_seed: Optional[int] = None,
|
|
296
|
+
**kwargs
|
|
297
|
+
) -> List[Dict[str, Any]]:
|
|
298
|
+
"""
|
|
299
|
+
Generate initial experimental design (Design of Experiments).
|
|
300
|
+
|
|
301
|
+
Creates a set of experimental conditions to evaluate before starting
|
|
302
|
+
Bayesian optimization. This does NOT add the experiments to the session -
|
|
303
|
+
you must evaluate them and add the results using add_experiment().
|
|
304
|
+
|
|
305
|
+
Supported methods:
|
|
306
|
+
- 'random': Uniform random sampling
|
|
307
|
+
- 'lhs': Latin Hypercube Sampling (recommended, good space-filling properties)
|
|
308
|
+
- 'sobol': Sobol quasi-random sequences (low discrepancy)
|
|
309
|
+
- 'halton': Halton sequences
|
|
310
|
+
- 'hammersly': Hammersly sequences (low discrepancy)
|
|
311
|
+
|
|
312
|
+
Args:
|
|
313
|
+
method: Sampling strategy to use
|
|
314
|
+
n_points: Number of points to generate
|
|
315
|
+
random_seed: Random seed for reproducibility
|
|
316
|
+
**kwargs: Additional method-specific parameters:
|
|
317
|
+
- lhs_criterion: For LHS method ("maximin", "correlation", "ratio")
|
|
318
|
+
|
|
319
|
+
Returns:
|
|
320
|
+
List of dictionaries with variable names and values (no outputs)
|
|
321
|
+
|
|
322
|
+
Example:
|
|
323
|
+
>>> # Generate initial design
|
|
324
|
+
>>> points = session.generate_initial_design('lhs', n_points=10)
|
|
325
|
+
>>>
|
|
326
|
+
>>> # Run experiments and add results
|
|
327
|
+
>>> for point in points:
|
|
328
|
+
>>> output = run_experiment(**point) # Your experiment function
|
|
329
|
+
>>> session.add_experiment(point, output=output)
|
|
330
|
+
>>>
|
|
331
|
+
>>> # Now ready to train model
|
|
332
|
+
>>> session.train_model()
|
|
333
|
+
"""
|
|
334
|
+
if len(self.search_space.variables) == 0:
|
|
335
|
+
raise ValueError(
|
|
336
|
+
"No variables defined in search space. "
|
|
337
|
+
"Use add_variable() to define variables before generating initial design."
|
|
338
|
+
)
|
|
339
|
+
|
|
340
|
+
from alchemist_core.utils.doe import generate_initial_design
|
|
341
|
+
|
|
342
|
+
points = generate_initial_design(
|
|
343
|
+
search_space=self.search_space,
|
|
344
|
+
method=method,
|
|
345
|
+
n_points=n_points,
|
|
346
|
+
random_seed=random_seed,
|
|
347
|
+
**kwargs
|
|
348
|
+
)
|
|
349
|
+
|
|
350
|
+
# Store sampler info in config for audit trail
|
|
351
|
+
self.config['initial_design_method'] = method
|
|
352
|
+
self.config['initial_design_n_points'] = len(points)
|
|
353
|
+
|
|
354
|
+
logger.info(f"Generated {len(points)} initial design points using {method} method")
|
|
355
|
+
self.events.emit('initial_design_generated', {
|
|
356
|
+
'method': method,
|
|
357
|
+
'n_points': len(points)
|
|
358
|
+
})
|
|
359
|
+
|
|
360
|
+
# Add a lightweight audit data_locked entry for the initial design metadata
|
|
361
|
+
try:
|
|
362
|
+
extra = {'initial_design_method': method, 'initial_design_n_points': len(points)}
|
|
363
|
+
# Create an empty dataframe snapshot of the planned points
|
|
364
|
+
import pandas as pd
|
|
365
|
+
planned_df = pd.DataFrame(points)
|
|
366
|
+
self.audit_log.lock_data(planned_df, notes=f"Initial design ({method})", extra_parameters=extra)
|
|
367
|
+
except Exception:
|
|
368
|
+
# Audit logging should not block design generation
|
|
369
|
+
logger.debug("Failed to add initial design to audit log")
|
|
370
|
+
|
|
371
|
+
return points
|
|
372
|
+
|
|
275
373
|
# ============================================================
|
|
276
374
|
# Model Training
|
|
277
375
|
# ============================================================
|
|
@@ -437,8 +535,45 @@ class OptimizationSession:
|
|
|
437
535
|
# Convert complex objects to their string representation
|
|
438
536
|
json_hyperparams[key] = str(value)
|
|
439
537
|
|
|
538
|
+
# Extract kernel name and parameters
|
|
539
|
+
kernel_name = 'unknown'
|
|
540
|
+
if self.model_backend == 'sklearn':
|
|
541
|
+
# First try kernel_options
|
|
542
|
+
if hasattr(self.model, 'kernel_options') and 'kernel_type' in self.model.kernel_options:
|
|
543
|
+
kernel_name = self.model.kernel_options['kernel_type']
|
|
544
|
+
# Add nu parameter for Matern kernels
|
|
545
|
+
if kernel_name == 'Matern' and 'matern_nu' in self.model.kernel_options:
|
|
546
|
+
json_hyperparams['matern_nu'] = self.model.kernel_options['matern_nu']
|
|
547
|
+
# Then try trained kernel
|
|
548
|
+
elif hasattr(self.model, 'model') and hasattr(self.model.model, 'kernel_'):
|
|
549
|
+
kernel_obj = self.model.model.kernel_
|
|
550
|
+
# Navigate through Product/Sum kernels to find base kernel
|
|
551
|
+
if hasattr(kernel_obj, 'k2'): # Product kernel (Constant * BaseKernel)
|
|
552
|
+
base_kernel = kernel_obj.k2
|
|
553
|
+
else:
|
|
554
|
+
base_kernel = kernel_obj
|
|
555
|
+
|
|
556
|
+
kernel_class = type(base_kernel).__name__
|
|
557
|
+
if 'Matern' in kernel_class:
|
|
558
|
+
kernel_name = 'Matern'
|
|
559
|
+
# Extract nu parameter if available
|
|
560
|
+
if hasattr(base_kernel, 'nu'):
|
|
561
|
+
json_hyperparams['matern_nu'] = float(base_kernel.nu)
|
|
562
|
+
elif 'RBF' in kernel_class:
|
|
563
|
+
kernel_name = 'RBF'
|
|
564
|
+
elif 'RationalQuadratic' in kernel_class:
|
|
565
|
+
kernel_name = 'RationalQuadratic'
|
|
566
|
+
else:
|
|
567
|
+
kernel_name = kernel_class
|
|
568
|
+
elif self.model_backend == 'botorch':
|
|
569
|
+
if hasattr(self.model, 'cont_kernel_type'):
|
|
570
|
+
kernel_name = self.model.cont_kernel_type
|
|
571
|
+
elif 'kernel_type' in json_hyperparams:
|
|
572
|
+
kernel_name = json_hyperparams['kernel_type']
|
|
573
|
+
|
|
440
574
|
return {
|
|
441
575
|
'backend': self.model_backend,
|
|
576
|
+
'kernel': kernel_name,
|
|
442
577
|
'hyperparameters': json_hyperparams,
|
|
443
578
|
'metrics': metrics,
|
|
444
579
|
'is_trained': True
|
|
@@ -478,7 +613,8 @@ class OptimizationSession:
|
|
|
478
613
|
model=self.model, # Pass the full SklearnModel wrapper, not just .model
|
|
479
614
|
acq_func=strategy.lower(),
|
|
480
615
|
maximize=(goal.lower() == 'maximize'),
|
|
481
|
-
random_state=self.config['random_state']
|
|
616
|
+
random_state=self.config['random_state'],
|
|
617
|
+
acq_func_kwargs=kwargs # Pass xi, kappa, etc. to acquisition function
|
|
482
618
|
)
|
|
483
619
|
|
|
484
620
|
# Update acquisition with existing experimental data (un-encoded)
|
|
@@ -527,6 +663,13 @@ class OptimizationSession:
|
|
|
527
663
|
logger.info(f"Suggested point: {suggestion_dict}")
|
|
528
664
|
self.events.emit('acquisition_completed', {'suggestion': suggestion_dict})
|
|
529
665
|
|
|
666
|
+
# Cache suggestion info for audit log
|
|
667
|
+
self._last_acquisition_info = {
|
|
668
|
+
'strategy': strategy,
|
|
669
|
+
'goal': goal,
|
|
670
|
+
'parameters': kwargs
|
|
671
|
+
}
|
|
672
|
+
|
|
530
673
|
return result_df # ============================================================
|
|
531
674
|
# Predictions
|
|
532
675
|
# ============================================================
|
|
@@ -594,10 +737,459 @@ class OptimizationSession:
|
|
|
594
737
|
Update session configuration.
|
|
595
738
|
|
|
596
739
|
Args:
|
|
597
|
-
**kwargs: Configuration parameters
|
|
740
|
+
**kwargs: Configuration parameters to update
|
|
598
741
|
|
|
599
742
|
Example:
|
|
600
743
|
>>> session.set_config(random_state=123, verbose=False)
|
|
601
744
|
"""
|
|
602
745
|
self.config.update(kwargs)
|
|
603
|
-
logger.info(f"Updated
|
|
746
|
+
logger.info(f"Updated config: {kwargs}")
|
|
747
|
+
|
|
748
|
+
# ============================================================
|
|
749
|
+
# Audit Log & Session Management
|
|
750
|
+
# ============================================================
|
|
751
|
+
|
|
752
|
+
def lock_data(self, notes: str = "", extra_parameters: Optional[Dict[str, Any]] = None) -> AuditEntry:
|
|
753
|
+
"""
|
|
754
|
+
Lock in current experimental data configuration.
|
|
755
|
+
|
|
756
|
+
Creates an immutable audit log entry capturing the current data state.
|
|
757
|
+
This should be called when you're satisfied with your experimental dataset
|
|
758
|
+
and ready to proceed with modeling.
|
|
759
|
+
|
|
760
|
+
Args:
|
|
761
|
+
notes: Optional user notes about this data configuration
|
|
762
|
+
|
|
763
|
+
Returns:
|
|
764
|
+
Created AuditEntry
|
|
765
|
+
|
|
766
|
+
Example:
|
|
767
|
+
>>> session.add_experiment({'temp': 100, 'pressure': 5}, output=85.2)
|
|
768
|
+
>>> session.lock_data(notes="Initial screening dataset")
|
|
769
|
+
"""
|
|
770
|
+
# Set search space in audit log (once)
|
|
771
|
+
if self.audit_log.search_space_definition is None:
|
|
772
|
+
self.audit_log.set_search_space(self.search_space.variables)
|
|
773
|
+
|
|
774
|
+
# Get current experimental data
|
|
775
|
+
df = self.experiment_manager.get_data()
|
|
776
|
+
|
|
777
|
+
# Lock data in audit log
|
|
778
|
+
entry = self.audit_log.lock_data(
|
|
779
|
+
experiment_data=df,
|
|
780
|
+
notes=notes,
|
|
781
|
+
extra_parameters=extra_parameters
|
|
782
|
+
)
|
|
783
|
+
|
|
784
|
+
self.metadata.update_modified()
|
|
785
|
+
logger.info(f"Locked data: {len(df)} experiments")
|
|
786
|
+
self.events.emit('data_locked', {'entry': entry.to_dict()})
|
|
787
|
+
|
|
788
|
+
return entry
|
|
789
|
+
|
|
790
|
+
def lock_model(self, notes: str = "") -> AuditEntry:
|
|
791
|
+
"""
|
|
792
|
+
Lock in current trained model configuration.
|
|
793
|
+
|
|
794
|
+
Creates an immutable audit log entry capturing the trained model state.
|
|
795
|
+
This should be called when you're satisfied with your model performance
|
|
796
|
+
and ready to use it for acquisition.
|
|
797
|
+
|
|
798
|
+
Args:
|
|
799
|
+
notes: Optional user notes about this model
|
|
800
|
+
|
|
801
|
+
Returns:
|
|
802
|
+
Created AuditEntry
|
|
803
|
+
|
|
804
|
+
Raises:
|
|
805
|
+
ValueError: If no model has been trained
|
|
806
|
+
|
|
807
|
+
Example:
|
|
808
|
+
>>> session.train_model(backend='sklearn', kernel='matern')
|
|
809
|
+
>>> session.lock_model(notes="Best cross-validation performance")
|
|
810
|
+
"""
|
|
811
|
+
if self.model is None:
|
|
812
|
+
raise ValueError("No trained model available. Use train_model() first.")
|
|
813
|
+
|
|
814
|
+
# Set search space in audit log (once)
|
|
815
|
+
if self.audit_log.search_space_definition is None:
|
|
816
|
+
self.audit_log.set_search_space(self.search_space.variables)
|
|
817
|
+
|
|
818
|
+
# Get model info
|
|
819
|
+
model_info = self.get_model_summary()
|
|
820
|
+
|
|
821
|
+
# Extract hyperparameters
|
|
822
|
+
hyperparameters = model_info.get('hyperparameters', {})
|
|
823
|
+
|
|
824
|
+
# Get kernel name from model_info (which extracts it properly)
|
|
825
|
+
kernel_name = model_info.get('kernel', 'unknown')
|
|
826
|
+
|
|
827
|
+
# Get CV metrics if available - use model_info metrics which are already populated
|
|
828
|
+
cv_metrics = model_info.get('metrics', None)
|
|
829
|
+
if cv_metrics and all(k in cv_metrics for k in ['rmse', 'r2']):
|
|
830
|
+
# Metrics already in correct format from get_model_summary
|
|
831
|
+
pass
|
|
832
|
+
elif hasattr(self.model, 'cv_cached_results') and self.model.cv_cached_results:
|
|
833
|
+
# Fallback to direct access
|
|
834
|
+
cv_metrics = {
|
|
835
|
+
'rmse': float(self.model.cv_cached_results.get('rmse', 0)),
|
|
836
|
+
'r2': float(self.model.cv_cached_results.get('r2', 0)),
|
|
837
|
+
'mae': float(self.model.cv_cached_results.get('mae', 0))
|
|
838
|
+
}
|
|
839
|
+
else:
|
|
840
|
+
cv_metrics = None
|
|
841
|
+
|
|
842
|
+
# Get current iteration number
|
|
843
|
+
# Use the next iteration number for the model lock so model+acquisition share the same iteration
|
|
844
|
+
iteration = self.experiment_manager._current_iteration + 1
|
|
845
|
+
|
|
846
|
+
# Include scaler information if available in hyperparameters
|
|
847
|
+
try:
|
|
848
|
+
if hasattr(self.model, 'input_transform_type'):
|
|
849
|
+
hyperparameters['input_transform_type'] = self.model.input_transform_type
|
|
850
|
+
if hasattr(self.model, 'output_transform_type'):
|
|
851
|
+
hyperparameters['output_transform_type'] = self.model.output_transform_type
|
|
852
|
+
except Exception:
|
|
853
|
+
pass
|
|
854
|
+
|
|
855
|
+
# Try to extract Matern nu for sklearn models if not already present
|
|
856
|
+
try:
|
|
857
|
+
if self.model_backend == 'sklearn' and 'matern_nu' not in hyperparameters:
|
|
858
|
+
# Try to navigate fitted kernel object for sklearn GaussianProcessRegressor
|
|
859
|
+
if hasattr(self.model, 'model') and hasattr(self.model.model, 'kernel_'):
|
|
860
|
+
kernel_obj = self.model.model.kernel_
|
|
861
|
+
base_kernel = getattr(kernel_obj, 'k2', kernel_obj)
|
|
862
|
+
if hasattr(base_kernel, 'nu'):
|
|
863
|
+
hyperparameters['matern_nu'] = float(base_kernel.nu)
|
|
864
|
+
except Exception:
|
|
865
|
+
pass
|
|
866
|
+
|
|
867
|
+
entry = self.audit_log.lock_model(
|
|
868
|
+
backend=self.model_backend,
|
|
869
|
+
kernel=kernel_name,
|
|
870
|
+
hyperparameters=hyperparameters,
|
|
871
|
+
cv_metrics=cv_metrics,
|
|
872
|
+
iteration=iteration,
|
|
873
|
+
notes=notes
|
|
874
|
+
)
|
|
875
|
+
|
|
876
|
+
self.metadata.update_modified()
|
|
877
|
+
logger.info(f"Locked model: {self.model_backend}/{model_info.get('kernel')}, iteration {iteration}")
|
|
878
|
+
self.events.emit('model_locked', {'entry': entry.to_dict()})
|
|
879
|
+
|
|
880
|
+
return entry
|
|
881
|
+
|
|
882
|
+
def lock_acquisition(self, strategy: str, parameters: Dict[str, Any],
|
|
883
|
+
suggestions: List[Dict[str, Any]], notes: str = "") -> AuditEntry:
|
|
884
|
+
"""
|
|
885
|
+
Lock in acquisition function decision and suggested experiments.
|
|
886
|
+
|
|
887
|
+
Creates an immutable audit log entry capturing the acquisition decision.
|
|
888
|
+
This should be called when you've reviewed the suggestions and are ready
|
|
889
|
+
to run the recommended experiments.
|
|
890
|
+
|
|
891
|
+
Args:
|
|
892
|
+
strategy: Acquisition strategy name ('EI', 'PI', 'UCB', etc.)
|
|
893
|
+
parameters: Acquisition function parameters (xi, kappa, etc.)
|
|
894
|
+
suggestions: List of suggested experiment dictionaries
|
|
895
|
+
notes: Optional user notes about this decision
|
|
896
|
+
|
|
897
|
+
Returns:
|
|
898
|
+
Created AuditEntry
|
|
899
|
+
|
|
900
|
+
Example:
|
|
901
|
+
>>> suggestions = session.suggest_next(strategy='EI', n_suggestions=3)
|
|
902
|
+
>>> session.lock_acquisition(
|
|
903
|
+
... strategy='EI',
|
|
904
|
+
... parameters={'xi': 0.01, 'goal': 'maximize'},
|
|
905
|
+
... suggestions=suggestions,
|
|
906
|
+
... notes="Top 3 candidates for next batch"
|
|
907
|
+
... )
|
|
908
|
+
"""
|
|
909
|
+
# Set search space in audit log (once)
|
|
910
|
+
if self.audit_log.search_space_definition is None:
|
|
911
|
+
self.audit_log.set_search_space(self.search_space.variables)
|
|
912
|
+
|
|
913
|
+
# Increment iteration counter first so this acquisition is logged as the next iteration
|
|
914
|
+
self.experiment_manager._current_iteration += 1
|
|
915
|
+
iteration = self.experiment_manager._current_iteration
|
|
916
|
+
|
|
917
|
+
entry = self.audit_log.lock_acquisition(
|
|
918
|
+
strategy=strategy,
|
|
919
|
+
parameters=parameters,
|
|
920
|
+
suggestions=suggestions,
|
|
921
|
+
iteration=iteration,
|
|
922
|
+
notes=notes
|
|
923
|
+
)
|
|
924
|
+
|
|
925
|
+
self.metadata.update_modified()
|
|
926
|
+
logger.info(f"Locked acquisition: {strategy}, {len(suggestions)} suggestions")
|
|
927
|
+
self.events.emit('acquisition_locked', {'entry': entry.to_dict()})
|
|
928
|
+
|
|
929
|
+
return entry
|
|
930
|
+
|
|
931
|
+
def get_audit_log(self) -> List[Dict[str, Any]]:
|
|
932
|
+
"""
|
|
933
|
+
Get complete audit log as list of dictionaries.
|
|
934
|
+
|
|
935
|
+
Returns:
|
|
936
|
+
List of audit entry dictionaries
|
|
937
|
+
"""
|
|
938
|
+
return self.audit_log.to_dict()
|
|
939
|
+
|
|
940
|
+
def export_audit_markdown(self) -> str:
|
|
941
|
+
"""
|
|
942
|
+
Export audit log as markdown for publications.
|
|
943
|
+
|
|
944
|
+
Returns:
|
|
945
|
+
Markdown-formatted audit trail
|
|
946
|
+
"""
|
|
947
|
+
# Pass session metadata to markdown exporter so user-entered metadata appears
|
|
948
|
+
try:
|
|
949
|
+
metadata_dict = self.metadata.to_dict()
|
|
950
|
+
except Exception:
|
|
951
|
+
metadata_dict = None
|
|
952
|
+
|
|
953
|
+
return self.audit_log.to_markdown(session_metadata=metadata_dict)
|
|
954
|
+
|
|
955
|
+
def save_session(self, filepath: str):
|
|
956
|
+
"""
|
|
957
|
+
Save complete session state to JSON file.
|
|
958
|
+
|
|
959
|
+
Saves all session data including:
|
|
960
|
+
- Session metadata (name, description, tags)
|
|
961
|
+
- Search space definition
|
|
962
|
+
- Experimental data
|
|
963
|
+
- Trained model state (if available)
|
|
964
|
+
- Complete audit log
|
|
965
|
+
|
|
966
|
+
Args:
|
|
967
|
+
filepath: Path to save session file (.json extension recommended)
|
|
968
|
+
|
|
969
|
+
Example:
|
|
970
|
+
>>> session.save_session("~/ALchemist_Sessions/catalyst_study_nov2025.json")
|
|
971
|
+
"""
|
|
972
|
+
filepath = Path(filepath)
|
|
973
|
+
|
|
974
|
+
# Update audit log's experimental data snapshot to reflect current state
|
|
975
|
+
# This ensures the data table in the audit log markdown is always up-to-date
|
|
976
|
+
current_data = self.experiment_manager.get_data()
|
|
977
|
+
if current_data is not None and len(current_data) > 0:
|
|
978
|
+
self.audit_log.experiment_data = current_data.copy()
|
|
979
|
+
|
|
980
|
+
# Prepare session data
|
|
981
|
+
session_data = {
|
|
982
|
+
'version': '1.0.0',
|
|
983
|
+
'metadata': self.metadata.to_dict(),
|
|
984
|
+
'audit_log': self.audit_log.to_dict(),
|
|
985
|
+
'search_space': {
|
|
986
|
+
'variables': self.search_space.variables
|
|
987
|
+
},
|
|
988
|
+
'experiments': {
|
|
989
|
+
'data': self.experiment_manager.get_data().to_dict(orient='records'),
|
|
990
|
+
'n_total': len(self.experiment_manager.df)
|
|
991
|
+
},
|
|
992
|
+
'config': self.config
|
|
993
|
+
}
|
|
994
|
+
|
|
995
|
+
# Add model state if available
|
|
996
|
+
if self.model is not None:
|
|
997
|
+
model_info = self.get_model_summary()
|
|
998
|
+
|
|
999
|
+
# Get kernel name from model_info which properly extracts it
|
|
1000
|
+
kernel_name = model_info.get('kernel', 'unknown')
|
|
1001
|
+
|
|
1002
|
+
# Extract kernel parameters if available
|
|
1003
|
+
kernel_params = {}
|
|
1004
|
+
if self.model_backend == 'sklearn' and hasattr(self.model, 'model'):
|
|
1005
|
+
kernel_obj = self.model.model.kernel
|
|
1006
|
+
# Extract kernel-specific parameters
|
|
1007
|
+
if hasattr(kernel_obj, 'get_params'):
|
|
1008
|
+
kernel_params = kernel_obj.get_params()
|
|
1009
|
+
elif self.model_backend == 'botorch':
|
|
1010
|
+
# For BoTorch, parameters are in hyperparameters
|
|
1011
|
+
hyperparams = model_info.get('hyperparameters', {})
|
|
1012
|
+
if 'matern_nu' in hyperparams:
|
|
1013
|
+
kernel_params['nu'] = hyperparams['matern_nu']
|
|
1014
|
+
|
|
1015
|
+
session_data['model_config'] = {
|
|
1016
|
+
'backend': self.model_backend,
|
|
1017
|
+
'kernel': kernel_name,
|
|
1018
|
+
'kernel_params': kernel_params,
|
|
1019
|
+
'hyperparameters': model_info.get('hyperparameters', {}),
|
|
1020
|
+
'metrics': model_info.get('metrics', {})
|
|
1021
|
+
}
|
|
1022
|
+
|
|
1023
|
+
# Create directory if needed
|
|
1024
|
+
filepath.parent.mkdir(parents=True, exist_ok=True)
|
|
1025
|
+
|
|
1026
|
+
# Write JSON
|
|
1027
|
+
with open(filepath, 'w') as f:
|
|
1028
|
+
json.dump(session_data, f, indent=2, default=str)
|
|
1029
|
+
|
|
1030
|
+
self.metadata.update_modified()
|
|
1031
|
+
logger.info(f"Saved session to {filepath}")
|
|
1032
|
+
self.events.emit('session_saved', {'filepath': str(filepath)})
|
|
1033
|
+
|
|
1034
|
+
def export_session_json(self) -> str:
|
|
1035
|
+
"""
|
|
1036
|
+
Export current session state as a JSON string (no filesystem side-effects for caller).
|
|
1037
|
+
|
|
1038
|
+
Returns:
|
|
1039
|
+
JSON string of session data
|
|
1040
|
+
"""
|
|
1041
|
+
import tempfile
|
|
1042
|
+
from pathlib import Path
|
|
1043
|
+
|
|
1044
|
+
with tempfile.NamedTemporaryFile(mode='w', suffix='.json', delete=False) as tmp:
|
|
1045
|
+
tmp_path = tmp.name
|
|
1046
|
+
# Use existing save_session logic to write a complete JSON
|
|
1047
|
+
self.save_session(tmp_path)
|
|
1048
|
+
|
|
1049
|
+
try:
|
|
1050
|
+
with open(tmp_path, 'r') as f:
|
|
1051
|
+
content = f.read()
|
|
1052
|
+
finally:
|
|
1053
|
+
Path(tmp_path).unlink(missing_ok=True)
|
|
1054
|
+
|
|
1055
|
+
return content
|
|
1056
|
+
|
|
1057
|
+
@staticmethod
|
|
1058
|
+
def load_session(filepath: str, retrain_on_load: bool = True) -> 'OptimizationSession':
|
|
1059
|
+
"""
|
|
1060
|
+
Load session from JSON file.
|
|
1061
|
+
|
|
1062
|
+
Args:
|
|
1063
|
+
filepath: Path to session file
|
|
1064
|
+
|
|
1065
|
+
Returns:
|
|
1066
|
+
OptimizationSession with restored state
|
|
1067
|
+
|
|
1068
|
+
Example:
|
|
1069
|
+
>>> session = OptimizationSession.load_session("my_session.json")
|
|
1070
|
+
"""
|
|
1071
|
+
filepath = Path(filepath)
|
|
1072
|
+
|
|
1073
|
+
with open(filepath, 'r') as f:
|
|
1074
|
+
session_data = json.load(f)
|
|
1075
|
+
|
|
1076
|
+
# Check version compatibility
|
|
1077
|
+
version = session_data.get('version', '1.0.0')
|
|
1078
|
+
if not version.startswith('1.'):
|
|
1079
|
+
logger.warning(f"Session file version {version} may not be fully compatible")
|
|
1080
|
+
|
|
1081
|
+
# Create session
|
|
1082
|
+
session = OptimizationSession()
|
|
1083
|
+
|
|
1084
|
+
# Restore metadata
|
|
1085
|
+
if 'metadata' in session_data:
|
|
1086
|
+
session.metadata = SessionMetadata.from_dict(session_data['metadata'])
|
|
1087
|
+
|
|
1088
|
+
# Restore audit log
|
|
1089
|
+
if 'audit_log' in session_data:
|
|
1090
|
+
session.audit_log.from_dict(session_data['audit_log'])
|
|
1091
|
+
|
|
1092
|
+
# Restore search space
|
|
1093
|
+
if 'search_space' in session_data:
|
|
1094
|
+
for var in session_data['search_space']['variables']:
|
|
1095
|
+
session.search_space.add_variable(
|
|
1096
|
+
var['name'],
|
|
1097
|
+
var['type'],
|
|
1098
|
+
**{k: v for k, v in var.items() if k not in ['name', 'type']}
|
|
1099
|
+
)
|
|
1100
|
+
|
|
1101
|
+
# Restore experimental data
|
|
1102
|
+
if 'experiments' in session_data and session_data['experiments']['data']:
|
|
1103
|
+
df = pd.DataFrame(session_data['experiments']['data'])
|
|
1104
|
+
|
|
1105
|
+
# Metadata columns to exclude from inputs
|
|
1106
|
+
metadata_cols = {'Output', 'Noise', 'Iteration', 'Reason'}
|
|
1107
|
+
|
|
1108
|
+
# Add experiments one by one
|
|
1109
|
+
for _, row in df.iterrows():
|
|
1110
|
+
# Only include actual input variables, not metadata
|
|
1111
|
+
inputs = {col: row[col] for col in df.columns if col not in metadata_cols}
|
|
1112
|
+
output = row.get('Output')
|
|
1113
|
+
noise = row.get('Noise') if pd.notna(row.get('Noise')) else None
|
|
1114
|
+
iteration = row.get('Iteration') if pd.notna(row.get('Iteration')) else None
|
|
1115
|
+
reason = row.get('Reason') if pd.notna(row.get('Reason')) else None
|
|
1116
|
+
|
|
1117
|
+
session.add_experiment(inputs, output, noise=noise, iteration=iteration, reason=reason)
|
|
1118
|
+
|
|
1119
|
+
# Restore config
|
|
1120
|
+
if 'config' in session_data:
|
|
1121
|
+
session.config.update(session_data['config'])
|
|
1122
|
+
|
|
1123
|
+
# Auto-retrain model if configuration exists (optional)
|
|
1124
|
+
if 'model_config' in session_data and retrain_on_load:
|
|
1125
|
+
model_config = session_data['model_config']
|
|
1126
|
+
logger.info(f"Auto-retraining model: {model_config['backend']} with {model_config.get('kernel', 'default')} kernel")
|
|
1127
|
+
|
|
1128
|
+
try:
|
|
1129
|
+
# Trigger model training with saved configuration
|
|
1130
|
+
session.train_model(
|
|
1131
|
+
backend=model_config['backend'],
|
|
1132
|
+
kernel=model_config.get('kernel', 'Matern'),
|
|
1133
|
+
kernel_params=model_config.get('kernel_params', {})
|
|
1134
|
+
)
|
|
1135
|
+
logger.info("Model retrained successfully")
|
|
1136
|
+
session.events.emit('model_retrained', {'backend': model_config['backend']})
|
|
1137
|
+
except Exception as e:
|
|
1138
|
+
logger.warning(f"Failed to retrain model: {e}")
|
|
1139
|
+
session.events.emit('model_retrain_failed', {'error': str(e)})
|
|
1140
|
+
|
|
1141
|
+
logger.info(f"Loaded session from {filepath}")
|
|
1142
|
+
session.events.emit('session_loaded', {'filepath': str(filepath)})
|
|
1143
|
+
|
|
1144
|
+
return session
|
|
1145
|
+
|
|
1146
|
+
def update_metadata(self, name: Optional[str] = None,
|
|
1147
|
+
description: Optional[str] = None,
|
|
1148
|
+
tags: Optional[List[str]] = None,
|
|
1149
|
+
author: Optional[str] = None):
|
|
1150
|
+
"""
|
|
1151
|
+
Update session metadata.
|
|
1152
|
+
|
|
1153
|
+
Args:
|
|
1154
|
+
name: New session name (optional)
|
|
1155
|
+
description: New description (optional)
|
|
1156
|
+
tags: New tags (optional)
|
|
1157
|
+
|
|
1158
|
+
Example:
|
|
1159
|
+
>>> session.update_metadata(
|
|
1160
|
+
... name="Catalyst Screening - Final",
|
|
1161
|
+
... description="Optimized Pt/Pd ratios",
|
|
1162
|
+
... tags=["catalyst", "platinum", "palladium", "final"]
|
|
1163
|
+
... )
|
|
1164
|
+
"""
|
|
1165
|
+
if name is not None:
|
|
1166
|
+
self.metadata.name = name
|
|
1167
|
+
if description is not None:
|
|
1168
|
+
self.metadata.description = description
|
|
1169
|
+
if author is not None:
|
|
1170
|
+
# Backwards compatible: store author if provided
|
|
1171
|
+
setattr(self.metadata, 'author', author)
|
|
1172
|
+
if tags is not None:
|
|
1173
|
+
self.metadata.tags = tags
|
|
1174
|
+
|
|
1175
|
+
self.metadata.update_modified()
|
|
1176
|
+
logger.info("Updated session metadata")
|
|
1177
|
+
self.events.emit('metadata_updated', self.metadata.to_dict())
|
|
1178
|
+
|
|
1179
|
+
# ============================================================
|
|
1180
|
+
# Legacy Configuration
|
|
1181
|
+
# ============================================================
|
|
1182
|
+
|
|
1183
|
+
def set_config(self, **kwargs) -> None:
|
|
1184
|
+
"""
|
|
1185
|
+
Update session configuration.
|
|
1186
|
+
|
|
1187
|
+
Args:
|
|
1188
|
+
**kwargs: Configuration parameters to update
|
|
1189
|
+
|
|
1190
|
+
Example:
|
|
1191
|
+
>>> session.set_config(random_state=123, verbose=False)
|
|
1192
|
+
"""
|
|
1193
|
+
self.config.update(kwargs)
|
|
1194
|
+
logger.info(f"Updated config: {kwargs}")
|
|
1195
|
+
|