alchemist-nrel 0.2.1__py3-none-any.whl → 0.3.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- alchemist_core/__init__.py +14 -7
- alchemist_core/acquisition/botorch_acquisition.py +15 -6
- alchemist_core/audit_log.py +594 -0
- alchemist_core/data/experiment_manager.py +76 -5
- alchemist_core/models/botorch_model.py +6 -4
- alchemist_core/models/sklearn_model.py +74 -8
- alchemist_core/session.py +788 -39
- alchemist_core/utils/doe.py +200 -0
- alchemist_nrel-0.3.1.dist-info/METADATA +185 -0
- alchemist_nrel-0.3.1.dist-info/RECORD +66 -0
- {alchemist_nrel-0.2.1.dist-info → alchemist_nrel-0.3.1.dist-info}/entry_points.txt +1 -0
- api/example_client.py +7 -2
- api/main.py +21 -4
- api/models/requests.py +95 -1
- api/models/responses.py +167 -0
- api/routers/acquisition.py +25 -0
- api/routers/experiments.py +134 -6
- api/routers/sessions.py +438 -10
- api/routers/visualizations.py +10 -5
- api/routers/websocket.py +132 -0
- api/run_api.py +56 -0
- api/services/session_store.py +285 -54
- api/static/NEW_ICON.ico +0 -0
- api/static/NEW_ICON.png +0 -0
- api/static/NEW_LOGO_DARK.png +0 -0
- api/static/NEW_LOGO_LIGHT.png +0 -0
- api/static/assets/api-vcoXEqyq.js +1 -0
- api/static/assets/index-DWfIKU9j.js +4094 -0
- api/static/assets/index-sMIa_1hV.css +1 -0
- api/static/index.html +14 -0
- api/static/vite.svg +1 -0
- ui/gpr_panel.py +7 -2
- ui/notifications.py +197 -10
- ui/ui.py +1117 -68
- ui/variables_setup.py +47 -2
- ui/visualizations.py +60 -3
- alchemist_core/models/ax_model.py +0 -159
- alchemist_nrel-0.2.1.dist-info/METADATA +0 -206
- alchemist_nrel-0.2.1.dist-info/RECORD +0 -54
- {alchemist_nrel-0.2.1.dist-info → alchemist_nrel-0.3.1.dist-info}/WHEEL +0 -0
- {alchemist_nrel-0.2.1.dist-info → alchemist_nrel-0.3.1.dist-info}/licenses/LICENSE +0 -0
- {alchemist_nrel-0.2.1.dist-info → alchemist_nrel-0.3.1.dist-info}/top_level.txt +0 -0
alchemist_core/session.py
CHANGED
|
@@ -7,10 +7,14 @@ This module provides the main entry point for using ALchemist as a headless libr
|
|
|
7
7
|
from typing import Optional, Dict, Any, List, Tuple, Callable
|
|
8
8
|
import pandas as pd
|
|
9
9
|
import numpy as np
|
|
10
|
+
import json
|
|
11
|
+
import hashlib
|
|
12
|
+
from pathlib import Path
|
|
10
13
|
from alchemist_core.data.search_space import SearchSpace
|
|
11
14
|
from alchemist_core.data.experiment_manager import ExperimentManager
|
|
12
15
|
from alchemist_core.events import EventEmitter
|
|
13
16
|
from alchemist_core.config import get_logger
|
|
17
|
+
from alchemist_core.audit_log import AuditLog, SessionMetadata, AuditEntry
|
|
14
18
|
|
|
15
19
|
logger = get_logger(__name__)
|
|
16
20
|
|
|
@@ -27,28 +31,29 @@ class OptimizationSession:
|
|
|
27
31
|
5. Iterate
|
|
28
32
|
|
|
29
33
|
Example:
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
34
|
+
> from alchemist_core import OptimizationSession
|
|
35
|
+
>
|
|
36
|
+
> # Create session with search space
|
|
37
|
+
> session = OptimizationSession()
|
|
38
|
+
> session.add_variable('temperature', 'real', bounds=(300, 500))
|
|
39
|
+
> session.add_variable('pressure', 'real', bounds=(1, 10))
|
|
40
|
+
> session.add_variable('catalyst', 'categorical', categories=['A', 'B', 'C'])
|
|
41
|
+
>
|
|
42
|
+
> # Load experimental data
|
|
43
|
+
> session.load_data('experiments.csv', target_column='yield')
|
|
44
|
+
>
|
|
45
|
+
> # Train model
|
|
46
|
+
> session.train_model(backend='botorch', kernel='Matern')
|
|
47
|
+
>
|
|
48
|
+
> # Suggest next experiment
|
|
49
|
+
> next_point = session.suggest_next(strategy='EI', goal='maximize')
|
|
50
|
+
> print(next_point)
|
|
47
51
|
"""
|
|
48
52
|
|
|
49
53
|
def __init__(self, search_space: Optional[SearchSpace] = None,
|
|
50
54
|
experiment_manager: Optional[ExperimentManager] = None,
|
|
51
|
-
event_emitter: Optional[EventEmitter] = None
|
|
55
|
+
event_emitter: Optional[EventEmitter] = None,
|
|
56
|
+
session_metadata: Optional[SessionMetadata] = None):
|
|
52
57
|
"""
|
|
53
58
|
Initialize optimization session.
|
|
54
59
|
|
|
@@ -56,11 +61,16 @@ class OptimizationSession:
|
|
|
56
61
|
search_space: Pre-configured SearchSpace object (optional)
|
|
57
62
|
experiment_manager: Pre-configured ExperimentManager (optional)
|
|
58
63
|
event_emitter: EventEmitter for progress notifications (optional)
|
|
64
|
+
session_metadata: Pre-configured session metadata (optional)
|
|
59
65
|
"""
|
|
60
66
|
self.search_space = search_space if search_space is not None else SearchSpace()
|
|
61
67
|
self.experiment_manager = experiment_manager if experiment_manager is not None else ExperimentManager()
|
|
62
68
|
self.events = event_emitter if event_emitter is not None else EventEmitter()
|
|
63
69
|
|
|
70
|
+
# Session metadata and audit log
|
|
71
|
+
self.metadata = session_metadata if session_metadata is not None else SessionMetadata.create()
|
|
72
|
+
self.audit_log = AuditLog()
|
|
73
|
+
|
|
64
74
|
# Link search_space to experiment_manager
|
|
65
75
|
self.experiment_manager.set_search_space(self.search_space)
|
|
66
76
|
|
|
@@ -69,13 +79,19 @@ class OptimizationSession:
|
|
|
69
79
|
self.model_backend = None
|
|
70
80
|
self.acquisition = None
|
|
71
81
|
|
|
82
|
+
# Staged experiments (for workflow management)
|
|
83
|
+
self.staged_experiments = [] # List of experiment dicts awaiting evaluation
|
|
84
|
+
self.last_suggestions = [] # Most recent acquisition suggestions (for UI)
|
|
85
|
+
|
|
72
86
|
# Configuration
|
|
73
87
|
self.config = {
|
|
74
88
|
'random_state': 42,
|
|
75
|
-
'verbose': True
|
|
89
|
+
'verbose': True,
|
|
90
|
+
'auto_train': False, # Auto-train model after adding experiments
|
|
91
|
+
'auto_train_threshold': 5 # Minimum experiments before auto-train
|
|
76
92
|
}
|
|
77
93
|
|
|
78
|
-
logger.info("OptimizationSession initialized")
|
|
94
|
+
logger.info(f"OptimizationSession initialized: {self.metadata.session_id}")
|
|
79
95
|
|
|
80
96
|
# ============================================================
|
|
81
97
|
# Search Space Management
|
|
@@ -93,8 +109,8 @@ class OptimizationSession:
|
|
|
93
109
|
- For 'categorical': categories=[list of values] or values=[list]
|
|
94
110
|
|
|
95
111
|
Example:
|
|
96
|
-
|
|
97
|
-
|
|
112
|
+
> session.add_variable('temp', 'real', bounds=(300, 500))
|
|
113
|
+
> session.add_variable('catalyst', 'categorical', categories=['A', 'B'])
|
|
98
114
|
"""
|
|
99
115
|
# Convert user-friendly API to internal format
|
|
100
116
|
params = kwargs.copy()
|
|
@@ -186,7 +202,7 @@ class OptimizationSession:
|
|
|
186
202
|
noise_column: Optional column with measurement noise/uncertainty
|
|
187
203
|
|
|
188
204
|
Example:
|
|
189
|
-
|
|
205
|
+
> session.load_data('experiments.csv', target_column='yield')
|
|
190
206
|
"""
|
|
191
207
|
# Load the CSV
|
|
192
208
|
import pandas as pd
|
|
@@ -222,7 +238,8 @@ class OptimizationSession:
|
|
|
222
238
|
self.events.emit('data_loaded', {'n_experiments': n_experiments, 'filepath': filepath})
|
|
223
239
|
|
|
224
240
|
def add_experiment(self, inputs: Dict[str, Any], output: float,
|
|
225
|
-
noise: Optional[float] = None
|
|
241
|
+
noise: Optional[float] = None, iteration: Optional[int] = None,
|
|
242
|
+
reason: Optional[str] = None) -> None:
|
|
226
243
|
"""
|
|
227
244
|
Add a single experiment to the dataset.
|
|
228
245
|
|
|
@@ -230,18 +247,23 @@ class OptimizationSession:
|
|
|
230
247
|
inputs: Dictionary mapping variable names to values
|
|
231
248
|
output: Target/output value
|
|
232
249
|
noise: Optional measurement uncertainty
|
|
250
|
+
iteration: Iteration number (auto-assigned if None)
|
|
251
|
+
reason: Reason for this experiment (e.g., 'Manual', 'Expected Improvement')
|
|
233
252
|
|
|
234
253
|
Example:
|
|
235
|
-
|
|
254
|
+
> session.add_experiment(
|
|
236
255
|
... inputs={'temperature': 350, 'catalyst': 'A'},
|
|
237
|
-
... output=0.85
|
|
256
|
+
... output=0.85,
|
|
257
|
+
... reason='Manual'
|
|
238
258
|
... )
|
|
239
259
|
"""
|
|
240
260
|
# Use ExperimentManager's add_experiment method
|
|
241
261
|
self.experiment_manager.add_experiment(
|
|
242
262
|
point_dict=inputs,
|
|
243
263
|
output_value=output,
|
|
244
|
-
noise_value=noise
|
|
264
|
+
noise_value=noise,
|
|
265
|
+
iteration=iteration,
|
|
266
|
+
reason=reason
|
|
245
267
|
)
|
|
246
268
|
|
|
247
269
|
logger.info(f"Added experiment: {inputs} → {output}")
|
|
@@ -272,6 +294,206 @@ class OptimizationSession:
|
|
|
272
294
|
'feature_names': list(X.columns)
|
|
273
295
|
}
|
|
274
296
|
|
|
297
|
+
# ============================================================
|
|
298
|
+
# Staged Experiments (Workflow Management)
|
|
299
|
+
# ============================================================
|
|
300
|
+
|
|
301
|
+
def add_staged_experiment(self, inputs: Dict[str, Any]) -> None:
|
|
302
|
+
"""
|
|
303
|
+
Add an experiment to the staging area (awaiting evaluation).
|
|
304
|
+
|
|
305
|
+
Staged experiments are typically suggested by acquisition functions
|
|
306
|
+
but not yet evaluated. They can be retrieved, evaluated externally,
|
|
307
|
+
and then added to the dataset with add_experiment().
|
|
308
|
+
|
|
309
|
+
Args:
|
|
310
|
+
inputs: Dictionary mapping variable names to values
|
|
311
|
+
|
|
312
|
+
Example:
|
|
313
|
+
> # Generate suggestions and stage them
|
|
314
|
+
> suggestions = session.suggest_next(n_suggestions=3)
|
|
315
|
+
> for point in suggestions.to_dict('records'):
|
|
316
|
+
> session.add_staged_experiment(point)
|
|
317
|
+
>
|
|
318
|
+
> # Later, evaluate and add
|
|
319
|
+
> staged = session.get_staged_experiments()
|
|
320
|
+
> for point in staged:
|
|
321
|
+
> output = run_experiment(**point)
|
|
322
|
+
> session.add_experiment(point, output=output)
|
|
323
|
+
> session.clear_staged_experiments()
|
|
324
|
+
"""
|
|
325
|
+
self.staged_experiments.append(inputs)
|
|
326
|
+
logger.debug(f"Staged experiment: {inputs}")
|
|
327
|
+
self.events.emit('experiment_staged', {'inputs': inputs})
|
|
328
|
+
|
|
329
|
+
def get_staged_experiments(self) -> List[Dict[str, Any]]:
|
|
330
|
+
"""
|
|
331
|
+
Get all staged experiments awaiting evaluation.
|
|
332
|
+
|
|
333
|
+
Returns:
|
|
334
|
+
List of experiment input dictionaries
|
|
335
|
+
"""
|
|
336
|
+
return self.staged_experiments.copy()
|
|
337
|
+
|
|
338
|
+
def clear_staged_experiments(self) -> int:
|
|
339
|
+
"""
|
|
340
|
+
Clear all staged experiments.
|
|
341
|
+
|
|
342
|
+
Returns:
|
|
343
|
+
Number of experiments cleared
|
|
344
|
+
"""
|
|
345
|
+
count = len(self.staged_experiments)
|
|
346
|
+
self.staged_experiments.clear()
|
|
347
|
+
if count > 0:
|
|
348
|
+
logger.info(f"Cleared {count} staged experiments")
|
|
349
|
+
self.events.emit('staged_experiments_cleared', {'count': count})
|
|
350
|
+
return count
|
|
351
|
+
|
|
352
|
+
def move_staged_to_experiments(self, outputs: List[float],
|
|
353
|
+
noises: Optional[List[float]] = None,
|
|
354
|
+
iteration: Optional[int] = None,
|
|
355
|
+
reason: Optional[str] = None) -> int:
|
|
356
|
+
"""
|
|
357
|
+
Evaluate staged experiments and add them to the dataset in batch.
|
|
358
|
+
|
|
359
|
+
Convenience method that pairs staged inputs with outputs and adds
|
|
360
|
+
them all to the experiment manager, then clears the staging area.
|
|
361
|
+
|
|
362
|
+
Args:
|
|
363
|
+
outputs: List of output values (must match length of staged experiments)
|
|
364
|
+
noises: Optional list of measurement uncertainties
|
|
365
|
+
iteration: Iteration number for all experiments (auto-assigned if None)
|
|
366
|
+
reason: Reason for these experiments (e.g., 'Expected Improvement')
|
|
367
|
+
|
|
368
|
+
Returns:
|
|
369
|
+
Number of experiments added
|
|
370
|
+
|
|
371
|
+
Example:
|
|
372
|
+
> # Stage some experiments
|
|
373
|
+
> session.add_staged_experiment({'x': 1.0, 'y': 2.0})
|
|
374
|
+
> session.add_staged_experiment({'x': 3.0, 'y': 4.0})
|
|
375
|
+
>
|
|
376
|
+
> # Evaluate them
|
|
377
|
+
> outputs = [run_experiment(**point) for point in session.get_staged_experiments()]
|
|
378
|
+
>
|
|
379
|
+
> # Add to dataset and clear staging
|
|
380
|
+
> session.move_staged_to_experiments(outputs, reason='LogEI')
|
|
381
|
+
"""
|
|
382
|
+
if len(outputs) != len(self.staged_experiments):
|
|
383
|
+
raise ValueError(
|
|
384
|
+
f"Number of outputs ({len(outputs)}) must match "
|
|
385
|
+
f"number of staged experiments ({len(self.staged_experiments)})"
|
|
386
|
+
)
|
|
387
|
+
|
|
388
|
+
if noises is not None and len(noises) != len(self.staged_experiments):
|
|
389
|
+
raise ValueError(
|
|
390
|
+
f"Number of noise values ({len(noises)}) must match "
|
|
391
|
+
f"number of staged experiments ({len(self.staged_experiments)})"
|
|
392
|
+
)
|
|
393
|
+
|
|
394
|
+
# Add each experiment
|
|
395
|
+
for i, inputs in enumerate(self.staged_experiments):
|
|
396
|
+
noise = noises[i] if noises is not None else None
|
|
397
|
+
self.add_experiment(
|
|
398
|
+
inputs=inputs,
|
|
399
|
+
output=outputs[i],
|
|
400
|
+
noise=noise,
|
|
401
|
+
iteration=iteration,
|
|
402
|
+
reason=reason
|
|
403
|
+
)
|
|
404
|
+
|
|
405
|
+
count = len(self.staged_experiments)
|
|
406
|
+
self.clear_staged_experiments()
|
|
407
|
+
|
|
408
|
+
logger.info(f"Moved {count} staged experiments to dataset")
|
|
409
|
+
return count
|
|
410
|
+
|
|
411
|
+
# ============================================================
|
|
412
|
+
# Initial Design Generation
|
|
413
|
+
# ============================================================
|
|
414
|
+
|
|
415
|
+
def generate_initial_design(
|
|
416
|
+
self,
|
|
417
|
+
method: str = "lhs",
|
|
418
|
+
n_points: int = 10,
|
|
419
|
+
random_seed: Optional[int] = None,
|
|
420
|
+
**kwargs
|
|
421
|
+
) -> List[Dict[str, Any]]:
|
|
422
|
+
"""
|
|
423
|
+
Generate initial experimental design (Design of Experiments).
|
|
424
|
+
|
|
425
|
+
Creates a set of experimental conditions to evaluate before starting
|
|
426
|
+
Bayesian optimization. This does NOT add the experiments to the session -
|
|
427
|
+
you must evaluate them and add the results using add_experiment().
|
|
428
|
+
|
|
429
|
+
Supported methods:
|
|
430
|
+
- 'random': Uniform random sampling
|
|
431
|
+
- 'lhs': Latin Hypercube Sampling (recommended, good space-filling properties)
|
|
432
|
+
- 'sobol': Sobol quasi-random sequences (low discrepancy)
|
|
433
|
+
- 'halton': Halton sequences
|
|
434
|
+
- 'hammersly': Hammersly sequences (low discrepancy)
|
|
435
|
+
|
|
436
|
+
Args:
|
|
437
|
+
method: Sampling strategy to use
|
|
438
|
+
n_points: Number of points to generate
|
|
439
|
+
random_seed: Random seed for reproducibility
|
|
440
|
+
**kwargs: Additional method-specific parameters:
|
|
441
|
+
- lhs_criterion: For LHS method ("maximin", "correlation", "ratio")
|
|
442
|
+
|
|
443
|
+
Returns:
|
|
444
|
+
List of dictionaries with variable names and values (no outputs)
|
|
445
|
+
|
|
446
|
+
Example:
|
|
447
|
+
> # Generate initial design
|
|
448
|
+
> points = session.generate_initial_design('lhs', n_points=10)
|
|
449
|
+
>
|
|
450
|
+
> # Run experiments and add results
|
|
451
|
+
> for point in points:
|
|
452
|
+
> output = run_experiment(**point) # Your experiment function
|
|
453
|
+
> session.add_experiment(point, output=output)
|
|
454
|
+
>
|
|
455
|
+
> # Now ready to train model
|
|
456
|
+
> session.train_model()
|
|
457
|
+
"""
|
|
458
|
+
if len(self.search_space.variables) == 0:
|
|
459
|
+
raise ValueError(
|
|
460
|
+
"No variables defined in search space. "
|
|
461
|
+
"Use add_variable() to define variables before generating initial design."
|
|
462
|
+
)
|
|
463
|
+
|
|
464
|
+
from alchemist_core.utils.doe import generate_initial_design
|
|
465
|
+
|
|
466
|
+
points = generate_initial_design(
|
|
467
|
+
search_space=self.search_space,
|
|
468
|
+
method=method,
|
|
469
|
+
n_points=n_points,
|
|
470
|
+
random_seed=random_seed,
|
|
471
|
+
**kwargs
|
|
472
|
+
)
|
|
473
|
+
|
|
474
|
+
# Store sampler info in config for audit trail
|
|
475
|
+
self.config['initial_design_method'] = method
|
|
476
|
+
self.config['initial_design_n_points'] = len(points)
|
|
477
|
+
|
|
478
|
+
logger.info(f"Generated {len(points)} initial design points using {method} method")
|
|
479
|
+
self.events.emit('initial_design_generated', {
|
|
480
|
+
'method': method,
|
|
481
|
+
'n_points': len(points)
|
|
482
|
+
})
|
|
483
|
+
|
|
484
|
+
# Add a lightweight audit data_locked entry for the initial design metadata
|
|
485
|
+
try:
|
|
486
|
+
extra = {'initial_design_method': method, 'initial_design_n_points': len(points)}
|
|
487
|
+
# Create an empty dataframe snapshot of the planned points
|
|
488
|
+
import pandas as pd
|
|
489
|
+
planned_df = pd.DataFrame(points)
|
|
490
|
+
self.audit_log.lock_data(planned_df, notes=f"Initial design ({method})", extra_parameters=extra)
|
|
491
|
+
except Exception:
|
|
492
|
+
# Audit logging should not block design generation
|
|
493
|
+
logger.debug("Failed to add initial design to audit log")
|
|
494
|
+
|
|
495
|
+
return points
|
|
496
|
+
|
|
275
497
|
# ============================================================
|
|
276
498
|
# Model Training
|
|
277
499
|
# ============================================================
|
|
@@ -291,8 +513,8 @@ class OptimizationSession:
|
|
|
291
513
|
Dictionary with training results and hyperparameters
|
|
292
514
|
|
|
293
515
|
Example:
|
|
294
|
-
|
|
295
|
-
|
|
516
|
+
> results = session.train_model(backend='botorch', kernel='Matern')
|
|
517
|
+
> print(results['metrics'])
|
|
296
518
|
"""
|
|
297
519
|
df = self.experiment_manager.get_data()
|
|
298
520
|
if df is None or df.empty:
|
|
@@ -312,6 +534,27 @@ class OptimizationSession:
|
|
|
312
534
|
# Extract calibration_enabled before passing kwargs to model constructor
|
|
313
535
|
calibration_enabled = kwargs.pop('calibration_enabled', False)
|
|
314
536
|
|
|
537
|
+
# Validate and map transform types based on backend
|
|
538
|
+
# BoTorch uses: 'normalize', 'standardize'
|
|
539
|
+
# Sklearn uses: 'minmax', 'standard', 'robust', 'none'
|
|
540
|
+
if self.model_backend == 'sklearn':
|
|
541
|
+
# Map BoTorch transform types to sklearn equivalents
|
|
542
|
+
transform_map = {
|
|
543
|
+
'normalize': 'minmax', # BoTorch normalize → sklearn minmax
|
|
544
|
+
'standardize': 'standard', # BoTorch standardize → sklearn standard
|
|
545
|
+
'none': 'none'
|
|
546
|
+
}
|
|
547
|
+
if 'input_transform_type' in kwargs:
|
|
548
|
+
original = kwargs['input_transform_type']
|
|
549
|
+
kwargs['input_transform_type'] = transform_map.get(original, original)
|
|
550
|
+
if original != kwargs['input_transform_type']:
|
|
551
|
+
logger.debug(f"Mapped input transform '{original}' → '{kwargs['input_transform_type']}' for sklearn")
|
|
552
|
+
if 'output_transform_type' in kwargs:
|
|
553
|
+
original = kwargs['output_transform_type']
|
|
554
|
+
kwargs['output_transform_type'] = transform_map.get(original, original)
|
|
555
|
+
if original != kwargs['output_transform_type']:
|
|
556
|
+
logger.debug(f"Mapped output transform '{original}' → '{kwargs['output_transform_type']}' for sklearn")
|
|
557
|
+
|
|
315
558
|
# Import appropriate model class
|
|
316
559
|
if self.model_backend == 'sklearn':
|
|
317
560
|
from alchemist_core.models.sklearn_model import SklearnModel
|
|
@@ -330,6 +573,15 @@ class OptimizationSession:
|
|
|
330
573
|
elif self.model_backend == 'botorch':
|
|
331
574
|
from alchemist_core.models.botorch_model import BoTorchModel
|
|
332
575
|
|
|
576
|
+
# Apply sensible defaults for BoTorch if not explicitly overridden
|
|
577
|
+
# Input normalization and output standardization are critical for performance
|
|
578
|
+
if 'input_transform_type' not in kwargs:
|
|
579
|
+
kwargs['input_transform_type'] = 'normalize'
|
|
580
|
+
logger.debug("Auto-applying input normalization for BoTorch model")
|
|
581
|
+
if 'output_transform_type' not in kwargs:
|
|
582
|
+
kwargs['output_transform_type'] = 'standardize'
|
|
583
|
+
logger.debug("Auto-applying output standardization for BoTorch model")
|
|
584
|
+
|
|
333
585
|
# Build kernel options - BoTorch uses 'cont_kernel_type' not 'kernel_type'
|
|
334
586
|
kernel_options = {'cont_kernel_type': kernel}
|
|
335
587
|
if kernel_params:
|
|
@@ -437,8 +689,45 @@ class OptimizationSession:
|
|
|
437
689
|
# Convert complex objects to their string representation
|
|
438
690
|
json_hyperparams[key] = str(value)
|
|
439
691
|
|
|
692
|
+
# Extract kernel name and parameters
|
|
693
|
+
kernel_name = 'unknown'
|
|
694
|
+
if self.model_backend == 'sklearn':
|
|
695
|
+
# First try kernel_options
|
|
696
|
+
if hasattr(self.model, 'kernel_options') and 'kernel_type' in self.model.kernel_options:
|
|
697
|
+
kernel_name = self.model.kernel_options['kernel_type']
|
|
698
|
+
# Add nu parameter for Matern kernels
|
|
699
|
+
if kernel_name == 'Matern' and 'matern_nu' in self.model.kernel_options:
|
|
700
|
+
json_hyperparams['matern_nu'] = self.model.kernel_options['matern_nu']
|
|
701
|
+
# Then try trained kernel
|
|
702
|
+
elif hasattr(self.model, 'model') and hasattr(self.model.model, 'kernel_'):
|
|
703
|
+
kernel_obj = self.model.model.kernel_
|
|
704
|
+
# Navigate through Product/Sum kernels to find base kernel
|
|
705
|
+
if hasattr(kernel_obj, 'k2'): # Product kernel (Constant * BaseKernel)
|
|
706
|
+
base_kernel = kernel_obj.k2
|
|
707
|
+
else:
|
|
708
|
+
base_kernel = kernel_obj
|
|
709
|
+
|
|
710
|
+
kernel_class = type(base_kernel).__name__
|
|
711
|
+
if 'Matern' in kernel_class:
|
|
712
|
+
kernel_name = 'Matern'
|
|
713
|
+
# Extract nu parameter if available
|
|
714
|
+
if hasattr(base_kernel, 'nu'):
|
|
715
|
+
json_hyperparams['matern_nu'] = float(base_kernel.nu)
|
|
716
|
+
elif 'RBF' in kernel_class:
|
|
717
|
+
kernel_name = 'RBF'
|
|
718
|
+
elif 'RationalQuadratic' in kernel_class:
|
|
719
|
+
kernel_name = 'RationalQuadratic'
|
|
720
|
+
else:
|
|
721
|
+
kernel_name = kernel_class
|
|
722
|
+
elif self.model_backend == 'botorch':
|
|
723
|
+
if hasattr(self.model, 'cont_kernel_type'):
|
|
724
|
+
kernel_name = self.model.cont_kernel_type
|
|
725
|
+
elif 'kernel_type' in json_hyperparams:
|
|
726
|
+
kernel_name = json_hyperparams['kernel_type']
|
|
727
|
+
|
|
440
728
|
return {
|
|
441
729
|
'backend': self.model_backend,
|
|
730
|
+
'kernel': kernel_name,
|
|
442
731
|
'hyperparameters': json_hyperparams,
|
|
443
732
|
'metrics': metrics,
|
|
444
733
|
'is_trained': True
|
|
@@ -463,8 +752,8 @@ class OptimizationSession:
|
|
|
463
752
|
DataFrame with suggested experiment(s)
|
|
464
753
|
|
|
465
754
|
Example:
|
|
466
|
-
|
|
467
|
-
|
|
755
|
+
> next_point = session.suggest_next(strategy='EI', goal='maximize')
|
|
756
|
+
> print(next_point)
|
|
468
757
|
"""
|
|
469
758
|
if self.model is None:
|
|
470
759
|
raise ValueError("No trained model available. Use train_model() first.")
|
|
@@ -478,7 +767,8 @@ class OptimizationSession:
|
|
|
478
767
|
model=self.model, # Pass the full SklearnModel wrapper, not just .model
|
|
479
768
|
acq_func=strategy.lower(),
|
|
480
769
|
maximize=(goal.lower() == 'maximize'),
|
|
481
|
-
random_state=self.config['random_state']
|
|
770
|
+
random_state=self.config['random_state'],
|
|
771
|
+
acq_func_kwargs=kwargs # Pass xi, kappa, etc. to acquisition function
|
|
482
772
|
)
|
|
483
773
|
|
|
484
774
|
# Update acquisition with existing experimental data (un-encoded)
|
|
@@ -527,6 +817,16 @@ class OptimizationSession:
|
|
|
527
817
|
logger.info(f"Suggested point: {suggestion_dict}")
|
|
528
818
|
self.events.emit('acquisition_completed', {'suggestion': suggestion_dict})
|
|
529
819
|
|
|
820
|
+
# Store suggestions for UI/API access
|
|
821
|
+
self.last_suggestions = result_df.to_dict('records')
|
|
822
|
+
|
|
823
|
+
# Cache suggestion info for audit log
|
|
824
|
+
self._last_acquisition_info = {
|
|
825
|
+
'strategy': strategy,
|
|
826
|
+
'goal': goal,
|
|
827
|
+
'parameters': kwargs
|
|
828
|
+
}
|
|
829
|
+
|
|
530
830
|
return result_df # ============================================================
|
|
531
831
|
# Predictions
|
|
532
832
|
# ============================================================
|
|
@@ -542,11 +842,11 @@ class OptimizationSession:
|
|
|
542
842
|
Tuple of (predictions, uncertainties)
|
|
543
843
|
|
|
544
844
|
Example:
|
|
545
|
-
|
|
845
|
+
> test_points = pd.DataFrame({
|
|
546
846
|
... 'temperature': [350, 400],
|
|
547
847
|
... 'catalyst': ['A', 'B']
|
|
548
848
|
... })
|
|
549
|
-
|
|
849
|
+
> predictions, uncertainties = session.predict(test_points)
|
|
550
850
|
"""
|
|
551
851
|
if self.model is None:
|
|
552
852
|
raise ValueError("No trained model available. Use train_model() first.")
|
|
@@ -579,9 +879,9 @@ class OptimizationSession:
|
|
|
579
879
|
callback: Callback function
|
|
580
880
|
|
|
581
881
|
Example:
|
|
582
|
-
|
|
882
|
+
> def on_training_done(data):
|
|
583
883
|
... print(f"Training completed with R² = {data['metrics']['r2']}")
|
|
584
|
-
|
|
884
|
+
> session.on('training_completed', on_training_done)
|
|
585
885
|
"""
|
|
586
886
|
self.events.on(event, callback)
|
|
587
887
|
|
|
@@ -594,10 +894,459 @@ class OptimizationSession:
|
|
|
594
894
|
Update session configuration.
|
|
595
895
|
|
|
596
896
|
Args:
|
|
597
|
-
**kwargs: Configuration parameters
|
|
897
|
+
**kwargs: Configuration parameters to update
|
|
898
|
+
|
|
899
|
+
Example:
|
|
900
|
+
> session.set_config(random_state=123, verbose=False)
|
|
901
|
+
"""
|
|
902
|
+
self.config.update(kwargs)
|
|
903
|
+
logger.info(f"Updated config: {kwargs}")
|
|
904
|
+
|
|
905
|
+
# ============================================================
|
|
906
|
+
# Audit Log & Session Management
|
|
907
|
+
# ============================================================
|
|
908
|
+
|
|
909
|
+
def lock_data(self, notes: str = "", extra_parameters: Optional[Dict[str, Any]] = None) -> AuditEntry:
|
|
910
|
+
"""
|
|
911
|
+
Lock in current experimental data configuration.
|
|
912
|
+
|
|
913
|
+
Creates an immutable audit log entry capturing the current data state.
|
|
914
|
+
This should be called when you're satisfied with your experimental dataset
|
|
915
|
+
and ready to proceed with modeling.
|
|
916
|
+
|
|
917
|
+
Args:
|
|
918
|
+
notes: Optional user notes about this data configuration
|
|
919
|
+
|
|
920
|
+
Returns:
|
|
921
|
+
Created AuditEntry
|
|
922
|
+
|
|
923
|
+
Example:
|
|
924
|
+
> session.add_experiment({'temp': 100, 'pressure': 5}, output=85.2)
|
|
925
|
+
> session.lock_data(notes="Initial screening dataset")
|
|
926
|
+
"""
|
|
927
|
+
# Set search space in audit log (once)
|
|
928
|
+
if self.audit_log.search_space_definition is None:
|
|
929
|
+
self.audit_log.set_search_space(self.search_space.variables)
|
|
930
|
+
|
|
931
|
+
# Get current experimental data
|
|
932
|
+
df = self.experiment_manager.get_data()
|
|
933
|
+
|
|
934
|
+
# Lock data in audit log
|
|
935
|
+
entry = self.audit_log.lock_data(
|
|
936
|
+
experiment_data=df,
|
|
937
|
+
notes=notes,
|
|
938
|
+
extra_parameters=extra_parameters
|
|
939
|
+
)
|
|
940
|
+
|
|
941
|
+
self.metadata.update_modified()
|
|
942
|
+
logger.info(f"Locked data: {len(df)} experiments")
|
|
943
|
+
self.events.emit('data_locked', {'entry': entry.to_dict()})
|
|
944
|
+
|
|
945
|
+
return entry
|
|
946
|
+
|
|
947
|
+
def lock_model(self, notes: str = "") -> AuditEntry:
|
|
948
|
+
"""
|
|
949
|
+
Lock in current trained model configuration.
|
|
950
|
+
|
|
951
|
+
Creates an immutable audit log entry capturing the trained model state.
|
|
952
|
+
This should be called when you're satisfied with your model performance
|
|
953
|
+
and ready to use it for acquisition.
|
|
954
|
+
|
|
955
|
+
Args:
|
|
956
|
+
notes: Optional user notes about this model
|
|
957
|
+
|
|
958
|
+
Returns:
|
|
959
|
+
Created AuditEntry
|
|
960
|
+
|
|
961
|
+
Raises:
|
|
962
|
+
ValueError: If no model has been trained
|
|
963
|
+
|
|
964
|
+
Example:
|
|
965
|
+
> session.train_model(backend='sklearn', kernel='matern')
|
|
966
|
+
> session.lock_model(notes="Best cross-validation performance")
|
|
967
|
+
"""
|
|
968
|
+
if self.model is None:
|
|
969
|
+
raise ValueError("No trained model available. Use train_model() first.")
|
|
970
|
+
|
|
971
|
+
# Set search space in audit log (once)
|
|
972
|
+
if self.audit_log.search_space_definition is None:
|
|
973
|
+
self.audit_log.set_search_space(self.search_space.variables)
|
|
974
|
+
|
|
975
|
+
# Get model info
|
|
976
|
+
model_info = self.get_model_summary()
|
|
977
|
+
|
|
978
|
+
# Extract hyperparameters
|
|
979
|
+
hyperparameters = model_info.get('hyperparameters', {})
|
|
980
|
+
|
|
981
|
+
# Get kernel name from model_info (which extracts it properly)
|
|
982
|
+
kernel_name = model_info.get('kernel', 'unknown')
|
|
983
|
+
|
|
984
|
+
# Get CV metrics if available - use model_info metrics which are already populated
|
|
985
|
+
cv_metrics = model_info.get('metrics', None)
|
|
986
|
+
if cv_metrics and all(k in cv_metrics for k in ['rmse', 'r2']):
|
|
987
|
+
# Metrics already in correct format from get_model_summary
|
|
988
|
+
pass
|
|
989
|
+
elif hasattr(self.model, 'cv_cached_results') and self.model.cv_cached_results:
|
|
990
|
+
# Fallback to direct access
|
|
991
|
+
cv_metrics = {
|
|
992
|
+
'rmse': float(self.model.cv_cached_results.get('rmse', 0)),
|
|
993
|
+
'r2': float(self.model.cv_cached_results.get('r2', 0)),
|
|
994
|
+
'mae': float(self.model.cv_cached_results.get('mae', 0))
|
|
995
|
+
}
|
|
996
|
+
else:
|
|
997
|
+
cv_metrics = None
|
|
998
|
+
|
|
999
|
+
# Get current iteration number
|
|
1000
|
+
# Use the next iteration number for the model lock so model+acquisition share the same iteration
|
|
1001
|
+
iteration = self.experiment_manager._current_iteration + 1
|
|
1002
|
+
|
|
1003
|
+
# Include scaler information if available in hyperparameters
|
|
1004
|
+
try:
|
|
1005
|
+
if hasattr(self.model, 'input_transform_type'):
|
|
1006
|
+
hyperparameters['input_transform_type'] = self.model.input_transform_type
|
|
1007
|
+
if hasattr(self.model, 'output_transform_type'):
|
|
1008
|
+
hyperparameters['output_transform_type'] = self.model.output_transform_type
|
|
1009
|
+
except Exception:
|
|
1010
|
+
pass
|
|
1011
|
+
|
|
1012
|
+
# Try to extract Matern nu for sklearn models if not already present
|
|
1013
|
+
try:
|
|
1014
|
+
if self.model_backend == 'sklearn' and 'matern_nu' not in hyperparameters:
|
|
1015
|
+
# Try to navigate fitted kernel object for sklearn GaussianProcessRegressor
|
|
1016
|
+
if hasattr(self.model, 'model') and hasattr(self.model.model, 'kernel_'):
|
|
1017
|
+
kernel_obj = self.model.model.kernel_
|
|
1018
|
+
base_kernel = getattr(kernel_obj, 'k2', kernel_obj)
|
|
1019
|
+
if hasattr(base_kernel, 'nu'):
|
|
1020
|
+
hyperparameters['matern_nu'] = float(base_kernel.nu)
|
|
1021
|
+
except Exception:
|
|
1022
|
+
pass
|
|
1023
|
+
|
|
1024
|
+
entry = self.audit_log.lock_model(
|
|
1025
|
+
backend=self.model_backend,
|
|
1026
|
+
kernel=kernel_name,
|
|
1027
|
+
hyperparameters=hyperparameters,
|
|
1028
|
+
cv_metrics=cv_metrics,
|
|
1029
|
+
iteration=iteration,
|
|
1030
|
+
notes=notes
|
|
1031
|
+
)
|
|
1032
|
+
|
|
1033
|
+
self.metadata.update_modified()
|
|
1034
|
+
logger.info(f"Locked model: {self.model_backend}/{model_info.get('kernel')}, iteration {iteration}")
|
|
1035
|
+
self.events.emit('model_locked', {'entry': entry.to_dict()})
|
|
1036
|
+
|
|
1037
|
+
return entry
|
|
1038
|
+
|
|
1039
|
+
def lock_acquisition(self, strategy: str, parameters: Dict[str, Any],
|
|
1040
|
+
suggestions: List[Dict[str, Any]], notes: str = "") -> AuditEntry:
|
|
1041
|
+
"""
|
|
1042
|
+
Lock in acquisition function decision and suggested experiments.
|
|
1043
|
+
|
|
1044
|
+
Creates an immutable audit log entry capturing the acquisition decision.
|
|
1045
|
+
This should be called when you've reviewed the suggestions and are ready
|
|
1046
|
+
to run the recommended experiments.
|
|
1047
|
+
|
|
1048
|
+
Args:
|
|
1049
|
+
strategy: Acquisition strategy name ('EI', 'PI', 'UCB', etc.)
|
|
1050
|
+
parameters: Acquisition function parameters (xi, kappa, etc.)
|
|
1051
|
+
suggestions: List of suggested experiment dictionaries
|
|
1052
|
+
notes: Optional user notes about this decision
|
|
1053
|
+
|
|
1054
|
+
Returns:
|
|
1055
|
+
Created AuditEntry
|
|
1056
|
+
|
|
1057
|
+
Example:
|
|
1058
|
+
> suggestions = session.suggest_next(strategy='EI', n_suggestions=3)
|
|
1059
|
+
> session.lock_acquisition(
|
|
1060
|
+
... strategy='EI',
|
|
1061
|
+
... parameters={'xi': 0.01, 'goal': 'maximize'},
|
|
1062
|
+
... suggestions=suggestions,
|
|
1063
|
+
... notes="Top 3 candidates for next batch"
|
|
1064
|
+
... )
|
|
1065
|
+
"""
|
|
1066
|
+
# Set search space in audit log (once)
|
|
1067
|
+
if self.audit_log.search_space_definition is None:
|
|
1068
|
+
self.audit_log.set_search_space(self.search_space.variables)
|
|
1069
|
+
|
|
1070
|
+
# Increment iteration counter first so this acquisition is logged as the next iteration
|
|
1071
|
+
self.experiment_manager._current_iteration += 1
|
|
1072
|
+
iteration = self.experiment_manager._current_iteration
|
|
1073
|
+
|
|
1074
|
+
entry = self.audit_log.lock_acquisition(
|
|
1075
|
+
strategy=strategy,
|
|
1076
|
+
parameters=parameters,
|
|
1077
|
+
suggestions=suggestions,
|
|
1078
|
+
iteration=iteration,
|
|
1079
|
+
notes=notes
|
|
1080
|
+
)
|
|
1081
|
+
|
|
1082
|
+
self.metadata.update_modified()
|
|
1083
|
+
logger.info(f"Locked acquisition: {strategy}, {len(suggestions)} suggestions")
|
|
1084
|
+
self.events.emit('acquisition_locked', {'entry': entry.to_dict()})
|
|
1085
|
+
|
|
1086
|
+
return entry
|
|
1087
|
+
|
|
1088
|
+
def get_audit_log(self) -> List[Dict[str, Any]]:
|
|
1089
|
+
"""
|
|
1090
|
+
Get complete audit log as list of dictionaries.
|
|
1091
|
+
|
|
1092
|
+
Returns:
|
|
1093
|
+
List of audit entry dictionaries
|
|
1094
|
+
"""
|
|
1095
|
+
return self.audit_log.to_dict()
|
|
1096
|
+
|
|
1097
|
+
def export_audit_markdown(self) -> str:
|
|
1098
|
+
"""
|
|
1099
|
+
Export audit log as markdown for publications.
|
|
1100
|
+
|
|
1101
|
+
Returns:
|
|
1102
|
+
Markdown-formatted audit trail
|
|
1103
|
+
"""
|
|
1104
|
+
# Pass session metadata to markdown exporter so user-entered metadata appears
|
|
1105
|
+
try:
|
|
1106
|
+
metadata_dict = self.metadata.to_dict()
|
|
1107
|
+
except Exception:
|
|
1108
|
+
metadata_dict = None
|
|
1109
|
+
|
|
1110
|
+
return self.audit_log.to_markdown(session_metadata=metadata_dict)
|
|
1111
|
+
|
|
1112
|
+
def save_session(self, filepath: str):
|
|
1113
|
+
"""
|
|
1114
|
+
Save complete session state to JSON file.
|
|
1115
|
+
|
|
1116
|
+
Saves all session data including:
|
|
1117
|
+
- Session metadata (name, description, tags)
|
|
1118
|
+
- Search space definition
|
|
1119
|
+
- Experimental data
|
|
1120
|
+
- Trained model state (if available)
|
|
1121
|
+
- Complete audit log
|
|
1122
|
+
|
|
1123
|
+
Args:
|
|
1124
|
+
filepath: Path to save session file (.json extension recommended)
|
|
1125
|
+
|
|
1126
|
+
Example:
|
|
1127
|
+
> session.save_session("~/ALchemist_Sessions/catalyst_study_nov2025.json")
|
|
1128
|
+
"""
|
|
1129
|
+
filepath = Path(filepath)
|
|
1130
|
+
|
|
1131
|
+
# Update audit log's experimental data snapshot to reflect current state
|
|
1132
|
+
# This ensures the data table in the audit log markdown is always up-to-date
|
|
1133
|
+
current_data = self.experiment_manager.get_data()
|
|
1134
|
+
if current_data is not None and len(current_data) > 0:
|
|
1135
|
+
self.audit_log.experiment_data = current_data.copy()
|
|
1136
|
+
|
|
1137
|
+
# Prepare session data
|
|
1138
|
+
session_data = {
|
|
1139
|
+
'version': '1.0.0',
|
|
1140
|
+
'metadata': self.metadata.to_dict(),
|
|
1141
|
+
'audit_log': self.audit_log.to_dict(),
|
|
1142
|
+
'search_space': {
|
|
1143
|
+
'variables': self.search_space.variables
|
|
1144
|
+
},
|
|
1145
|
+
'experiments': {
|
|
1146
|
+
'data': self.experiment_manager.get_data().to_dict(orient='records'),
|
|
1147
|
+
'n_total': len(self.experiment_manager.df)
|
|
1148
|
+
},
|
|
1149
|
+
'config': self.config
|
|
1150
|
+
}
|
|
1151
|
+
|
|
1152
|
+
# Add model state if available
|
|
1153
|
+
if self.model is not None:
|
|
1154
|
+
model_info = self.get_model_summary()
|
|
1155
|
+
|
|
1156
|
+
# Get kernel name from model_info which properly extracts it
|
|
1157
|
+
kernel_name = model_info.get('kernel', 'unknown')
|
|
1158
|
+
|
|
1159
|
+
# Extract kernel parameters if available
|
|
1160
|
+
kernel_params = {}
|
|
1161
|
+
if self.model_backend == 'sklearn' and hasattr(self.model, 'model'):
|
|
1162
|
+
kernel_obj = self.model.model.kernel
|
|
1163
|
+
# Extract kernel-specific parameters
|
|
1164
|
+
if hasattr(kernel_obj, 'get_params'):
|
|
1165
|
+
kernel_params = kernel_obj.get_params()
|
|
1166
|
+
elif self.model_backend == 'botorch':
|
|
1167
|
+
# For BoTorch, parameters are in hyperparameters
|
|
1168
|
+
hyperparams = model_info.get('hyperparameters', {})
|
|
1169
|
+
if 'matern_nu' in hyperparams:
|
|
1170
|
+
kernel_params['nu'] = hyperparams['matern_nu']
|
|
1171
|
+
|
|
1172
|
+
session_data['model_config'] = {
|
|
1173
|
+
'backend': self.model_backend,
|
|
1174
|
+
'kernel': kernel_name,
|
|
1175
|
+
'kernel_params': kernel_params,
|
|
1176
|
+
'hyperparameters': model_info.get('hyperparameters', {}),
|
|
1177
|
+
'metrics': model_info.get('metrics', {})
|
|
1178
|
+
}
|
|
1179
|
+
|
|
1180
|
+
# Create directory if needed
|
|
1181
|
+
filepath.parent.mkdir(parents=True, exist_ok=True)
|
|
1182
|
+
|
|
1183
|
+
# Write JSON
|
|
1184
|
+
with open(filepath, 'w') as f:
|
|
1185
|
+
json.dump(session_data, f, indent=2, default=str)
|
|
1186
|
+
|
|
1187
|
+
self.metadata.update_modified()
|
|
1188
|
+
logger.info(f"Saved session to {filepath}")
|
|
1189
|
+
self.events.emit('session_saved', {'filepath': str(filepath)})
|
|
1190
|
+
|
|
1191
|
+
def export_session_json(self) -> str:
|
|
1192
|
+
"""
|
|
1193
|
+
Export current session state as a JSON string (no filesystem side-effects for caller).
|
|
1194
|
+
|
|
1195
|
+
Returns:
|
|
1196
|
+
JSON string of session data
|
|
1197
|
+
"""
|
|
1198
|
+
import tempfile
|
|
1199
|
+
from pathlib import Path
|
|
1200
|
+
|
|
1201
|
+
with tempfile.NamedTemporaryFile(mode='w', suffix='.json', delete=False) as tmp:
|
|
1202
|
+
tmp_path = tmp.name
|
|
1203
|
+
# Use existing save_session logic to write a complete JSON
|
|
1204
|
+
self.save_session(tmp_path)
|
|
1205
|
+
|
|
1206
|
+
try:
|
|
1207
|
+
with open(tmp_path, 'r') as f:
|
|
1208
|
+
content = f.read()
|
|
1209
|
+
finally:
|
|
1210
|
+
Path(tmp_path).unlink(missing_ok=True)
|
|
1211
|
+
|
|
1212
|
+
return content
|
|
1213
|
+
|
|
1214
|
+
@staticmethod
|
|
1215
|
+
def load_session(filepath: str, retrain_on_load: bool = True) -> 'OptimizationSession':
|
|
1216
|
+
"""
|
|
1217
|
+
Load session from JSON file.
|
|
1218
|
+
|
|
1219
|
+
Args:
|
|
1220
|
+
filepath: Path to session file
|
|
1221
|
+
|
|
1222
|
+
Returns:
|
|
1223
|
+
OptimizationSession with restored state
|
|
1224
|
+
|
|
1225
|
+
Example:
|
|
1226
|
+
> session = OptimizationSession.load_session("my_session.json")
|
|
1227
|
+
"""
|
|
1228
|
+
filepath = Path(filepath)
|
|
1229
|
+
|
|
1230
|
+
with open(filepath, 'r') as f:
|
|
1231
|
+
session_data = json.load(f)
|
|
1232
|
+
|
|
1233
|
+
# Check version compatibility
|
|
1234
|
+
version = session_data.get('version', '1.0.0')
|
|
1235
|
+
if not version.startswith('1.'):
|
|
1236
|
+
logger.warning(f"Session file version {version} may not be fully compatible")
|
|
1237
|
+
|
|
1238
|
+
# Create session
|
|
1239
|
+
session = OptimizationSession()
|
|
1240
|
+
|
|
1241
|
+
# Restore metadata
|
|
1242
|
+
if 'metadata' in session_data:
|
|
1243
|
+
session.metadata = SessionMetadata.from_dict(session_data['metadata'])
|
|
1244
|
+
|
|
1245
|
+
# Restore audit log
|
|
1246
|
+
if 'audit_log' in session_data:
|
|
1247
|
+
session.audit_log.from_dict(session_data['audit_log'])
|
|
1248
|
+
|
|
1249
|
+
# Restore search space
|
|
1250
|
+
if 'search_space' in session_data:
|
|
1251
|
+
for var in session_data['search_space']['variables']:
|
|
1252
|
+
session.search_space.add_variable(
|
|
1253
|
+
var['name'],
|
|
1254
|
+
var['type'],
|
|
1255
|
+
**{k: v for k, v in var.items() if k not in ['name', 'type']}
|
|
1256
|
+
)
|
|
1257
|
+
|
|
1258
|
+
# Restore experimental data
|
|
1259
|
+
if 'experiments' in session_data and session_data['experiments']['data']:
|
|
1260
|
+
df = pd.DataFrame(session_data['experiments']['data'])
|
|
1261
|
+
|
|
1262
|
+
# Metadata columns to exclude from inputs
|
|
1263
|
+
metadata_cols = {'Output', 'Noise', 'Iteration', 'Reason'}
|
|
1264
|
+
|
|
1265
|
+
# Add experiments one by one
|
|
1266
|
+
for _, row in df.iterrows():
|
|
1267
|
+
# Only include actual input variables, not metadata
|
|
1268
|
+
inputs = {col: row[col] for col in df.columns if col not in metadata_cols}
|
|
1269
|
+
output = row.get('Output')
|
|
1270
|
+
noise = row.get('Noise') if pd.notna(row.get('Noise')) else None
|
|
1271
|
+
iteration = row.get('Iteration') if pd.notna(row.get('Iteration')) else None
|
|
1272
|
+
reason = row.get('Reason') if pd.notna(row.get('Reason')) else None
|
|
1273
|
+
|
|
1274
|
+
session.add_experiment(inputs, output, noise=noise, iteration=iteration, reason=reason)
|
|
1275
|
+
|
|
1276
|
+
# Restore config
|
|
1277
|
+
if 'config' in session_data:
|
|
1278
|
+
session.config.update(session_data['config'])
|
|
1279
|
+
|
|
1280
|
+
# Auto-retrain model if configuration exists (optional)
|
|
1281
|
+
if 'model_config' in session_data and retrain_on_load:
|
|
1282
|
+
model_config = session_data['model_config']
|
|
1283
|
+
logger.info(f"Auto-retraining model: {model_config['backend']} with {model_config.get('kernel', 'default')} kernel")
|
|
1284
|
+
|
|
1285
|
+
try:
|
|
1286
|
+
# Trigger model training with saved configuration
|
|
1287
|
+
session.train_model(
|
|
1288
|
+
backend=model_config['backend'],
|
|
1289
|
+
kernel=model_config.get('kernel', 'Matern'),
|
|
1290
|
+
kernel_params=model_config.get('kernel_params', {})
|
|
1291
|
+
)
|
|
1292
|
+
logger.info("Model retrained successfully")
|
|
1293
|
+
session.events.emit('model_retrained', {'backend': model_config['backend']})
|
|
1294
|
+
except Exception as e:
|
|
1295
|
+
logger.warning(f"Failed to retrain model: {e}")
|
|
1296
|
+
session.events.emit('model_retrain_failed', {'error': str(e)})
|
|
1297
|
+
|
|
1298
|
+
logger.info(f"Loaded session from {filepath}")
|
|
1299
|
+
session.events.emit('session_loaded', {'filepath': str(filepath)})
|
|
1300
|
+
|
|
1301
|
+
return session
|
|
1302
|
+
|
|
1303
|
+
def update_metadata(self, name: Optional[str] = None,
|
|
1304
|
+
description: Optional[str] = None,
|
|
1305
|
+
tags: Optional[List[str]] = None,
|
|
1306
|
+
author: Optional[str] = None):
|
|
1307
|
+
"""
|
|
1308
|
+
Update session metadata.
|
|
1309
|
+
|
|
1310
|
+
Args:
|
|
1311
|
+
name: New session name (optional)
|
|
1312
|
+
description: New description (optional)
|
|
1313
|
+
tags: New tags (optional)
|
|
1314
|
+
|
|
1315
|
+
Example:
|
|
1316
|
+
> session.update_metadata(
|
|
1317
|
+
... name="Catalyst Screening - Final",
|
|
1318
|
+
... description="Optimized Pt/Pd ratios",
|
|
1319
|
+
... tags=["catalyst", "platinum", "palladium", "final"]
|
|
1320
|
+
... )
|
|
1321
|
+
"""
|
|
1322
|
+
if name is not None:
|
|
1323
|
+
self.metadata.name = name
|
|
1324
|
+
if description is not None:
|
|
1325
|
+
self.metadata.description = description
|
|
1326
|
+
if author is not None:
|
|
1327
|
+
# Backwards compatible: store author if provided
|
|
1328
|
+
setattr(self.metadata, 'author', author)
|
|
1329
|
+
if tags is not None:
|
|
1330
|
+
self.metadata.tags = tags
|
|
1331
|
+
|
|
1332
|
+
self.metadata.update_modified()
|
|
1333
|
+
logger.info("Updated session metadata")
|
|
1334
|
+
self.events.emit('metadata_updated', self.metadata.to_dict())
|
|
1335
|
+
|
|
1336
|
+
# ============================================================
|
|
1337
|
+
# Legacy Configuration
|
|
1338
|
+
# ============================================================
|
|
1339
|
+
|
|
1340
|
+
def set_config(self, **kwargs) -> None:
|
|
1341
|
+
"""
|
|
1342
|
+
Update session configuration.
|
|
1343
|
+
|
|
1344
|
+
Args:
|
|
1345
|
+
**kwargs: Configuration parameters to update
|
|
598
1346
|
|
|
599
1347
|
Example:
|
|
600
|
-
|
|
1348
|
+
> session.set_config(random_state=123, verbose=False)
|
|
601
1349
|
"""
|
|
602
1350
|
self.config.update(kwargs)
|
|
603
|
-
logger.info(f"Updated
|
|
1351
|
+
logger.info(f"Updated config: {kwargs}")
|
|
1352
|
+
|