morphml 1.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of morphml might be problematic. Click here for more details.
- morphml/__init__.py +14 -0
- morphml/api/__init__.py +26 -0
- morphml/api/app.py +326 -0
- morphml/api/auth.py +193 -0
- morphml/api/client.py +338 -0
- morphml/api/models.py +132 -0
- morphml/api/rate_limit.py +192 -0
- morphml/benchmarking/__init__.py +36 -0
- morphml/benchmarking/comparison.py +430 -0
- morphml/benchmarks/__init__.py +56 -0
- morphml/benchmarks/comparator.py +409 -0
- morphml/benchmarks/datasets.py +280 -0
- morphml/benchmarks/metrics.py +199 -0
- morphml/benchmarks/openml_suite.py +201 -0
- morphml/benchmarks/problems.py +289 -0
- morphml/benchmarks/suite.py +318 -0
- morphml/cli/__init__.py +5 -0
- morphml/cli/commands/experiment.py +329 -0
- morphml/cli/main.py +457 -0
- morphml/cli/quickstart.py +312 -0
- morphml/config.py +278 -0
- morphml/constraints/__init__.py +19 -0
- morphml/constraints/handler.py +205 -0
- morphml/constraints/predicates.py +285 -0
- morphml/core/__init__.py +3 -0
- morphml/core/crossover.py +449 -0
- morphml/core/dsl/README.md +359 -0
- morphml/core/dsl/__init__.py +72 -0
- morphml/core/dsl/ast_nodes.py +364 -0
- morphml/core/dsl/compiler.py +318 -0
- morphml/core/dsl/layers.py +368 -0
- morphml/core/dsl/lexer.py +336 -0
- morphml/core/dsl/parser.py +455 -0
- morphml/core/dsl/search_space.py +386 -0
- morphml/core/dsl/syntax.py +199 -0
- morphml/core/dsl/type_system.py +361 -0
- morphml/core/dsl/validator.py +386 -0
- morphml/core/graph/__init__.py +40 -0
- morphml/core/graph/edge.py +124 -0
- morphml/core/graph/graph.py +507 -0
- morphml/core/graph/mutations.py +409 -0
- morphml/core/graph/node.py +196 -0
- morphml/core/graph/serialization.py +361 -0
- morphml/core/graph/visualization.py +431 -0
- morphml/core/objectives/__init__.py +20 -0
- morphml/core/search/__init__.py +33 -0
- morphml/core/search/individual.py +252 -0
- morphml/core/search/parameters.py +453 -0
- morphml/core/search/population.py +375 -0
- morphml/core/search/search_engine.py +340 -0
- morphml/distributed/__init__.py +76 -0
- morphml/distributed/fault_tolerance.py +497 -0
- morphml/distributed/health_monitor.py +348 -0
- morphml/distributed/master.py +709 -0
- morphml/distributed/proto/README.md +224 -0
- morphml/distributed/proto/__init__.py +74 -0
- morphml/distributed/proto/worker.proto +170 -0
- morphml/distributed/proto/worker_pb2.py +79 -0
- morphml/distributed/proto/worker_pb2_grpc.py +423 -0
- morphml/distributed/resource_manager.py +416 -0
- morphml/distributed/scheduler.py +567 -0
- morphml/distributed/storage/__init__.py +33 -0
- morphml/distributed/storage/artifacts.py +381 -0
- morphml/distributed/storage/cache.py +366 -0
- morphml/distributed/storage/checkpointing.py +329 -0
- morphml/distributed/storage/database.py +459 -0
- morphml/distributed/worker.py +549 -0
- morphml/evaluation/__init__.py +5 -0
- morphml/evaluation/heuristic.py +237 -0
- morphml/exceptions.py +55 -0
- morphml/execution/__init__.py +5 -0
- morphml/execution/local_executor.py +350 -0
- morphml/integrations/__init__.py +28 -0
- morphml/integrations/jax_adapter.py +206 -0
- morphml/integrations/pytorch_adapter.py +530 -0
- morphml/integrations/sklearn_adapter.py +206 -0
- morphml/integrations/tensorflow_adapter.py +230 -0
- morphml/logging_config.py +93 -0
- morphml/meta_learning/__init__.py +66 -0
- morphml/meta_learning/architecture_similarity.py +277 -0
- morphml/meta_learning/experiment_database.py +240 -0
- morphml/meta_learning/knowledge_base/__init__.py +19 -0
- morphml/meta_learning/knowledge_base/embedder.py +179 -0
- morphml/meta_learning/knowledge_base/knowledge_base.py +313 -0
- morphml/meta_learning/knowledge_base/meta_features.py +265 -0
- morphml/meta_learning/knowledge_base/vector_store.py +271 -0
- morphml/meta_learning/predictors/__init__.py +27 -0
- morphml/meta_learning/predictors/ensemble.py +221 -0
- morphml/meta_learning/predictors/gnn_predictor.py +552 -0
- morphml/meta_learning/predictors/learning_curve.py +231 -0
- morphml/meta_learning/predictors/proxy_metrics.py +261 -0
- morphml/meta_learning/strategy_evolution/__init__.py +27 -0
- morphml/meta_learning/strategy_evolution/adaptive_optimizer.py +226 -0
- morphml/meta_learning/strategy_evolution/bandit.py +276 -0
- morphml/meta_learning/strategy_evolution/portfolio.py +230 -0
- morphml/meta_learning/transfer.py +581 -0
- morphml/meta_learning/warm_start.py +286 -0
- morphml/optimizers/__init__.py +74 -0
- morphml/optimizers/adaptive_operators.py +399 -0
- morphml/optimizers/bayesian/__init__.py +52 -0
- morphml/optimizers/bayesian/acquisition.py +387 -0
- morphml/optimizers/bayesian/base.py +319 -0
- morphml/optimizers/bayesian/gaussian_process.py +635 -0
- morphml/optimizers/bayesian/smac.py +534 -0
- morphml/optimizers/bayesian/tpe.py +411 -0
- morphml/optimizers/differential_evolution.py +220 -0
- morphml/optimizers/evolutionary/__init__.py +61 -0
- morphml/optimizers/evolutionary/cma_es.py +416 -0
- morphml/optimizers/evolutionary/differential_evolution.py +556 -0
- morphml/optimizers/evolutionary/encoding.py +426 -0
- morphml/optimizers/evolutionary/particle_swarm.py +449 -0
- morphml/optimizers/genetic_algorithm.py +486 -0
- morphml/optimizers/gradient_based/__init__.py +22 -0
- morphml/optimizers/gradient_based/darts.py +550 -0
- morphml/optimizers/gradient_based/enas.py +585 -0
- morphml/optimizers/gradient_based/operations.py +474 -0
- morphml/optimizers/gradient_based/utils.py +601 -0
- morphml/optimizers/hill_climbing.py +169 -0
- morphml/optimizers/multi_objective/__init__.py +56 -0
- morphml/optimizers/multi_objective/indicators.py +504 -0
- morphml/optimizers/multi_objective/nsga2.py +647 -0
- morphml/optimizers/multi_objective/visualization.py +427 -0
- morphml/optimizers/nsga2.py +308 -0
- morphml/optimizers/random_search.py +172 -0
- morphml/optimizers/simulated_annealing.py +181 -0
- morphml/plugins/__init__.py +35 -0
- morphml/plugins/custom_evaluator_example.py +81 -0
- morphml/plugins/custom_optimizer_example.py +63 -0
- morphml/plugins/plugin_system.py +454 -0
- morphml/reports/__init__.py +30 -0
- morphml/reports/generator.py +362 -0
- morphml/tracking/__init__.py +7 -0
- morphml/tracking/experiment.py +309 -0
- morphml/tracking/logger.py +301 -0
- morphml/tracking/reporter.py +357 -0
- morphml/utils/__init__.py +6 -0
- morphml/utils/checkpoint.py +189 -0
- morphml/utils/comparison.py +390 -0
- morphml/utils/export.py +407 -0
- morphml/utils/progress.py +392 -0
- morphml/utils/validation.py +392 -0
- morphml/version.py +7 -0
- morphml/visualization/__init__.py +50 -0
- morphml/visualization/analytics.py +423 -0
- morphml/visualization/architecture_diagrams.py +353 -0
- morphml/visualization/architecture_plot.py +223 -0
- morphml/visualization/convergence_plot.py +174 -0
- morphml/visualization/crossover_viz.py +386 -0
- morphml/visualization/graph_viz.py +338 -0
- morphml/visualization/pareto_plot.py +149 -0
- morphml/visualization/plotly_dashboards.py +422 -0
- morphml/visualization/population.py +309 -0
- morphml/visualization/progress.py +260 -0
- morphml-1.0.0.dist-info/METADATA +434 -0
- morphml-1.0.0.dist-info/RECORD +158 -0
- morphml-1.0.0.dist-info/WHEEL +4 -0
- morphml-1.0.0.dist-info/entry_points.txt +3 -0
- morphml-1.0.0.dist-info/licenses/LICENSE +21 -0
|
@@ -0,0 +1,635 @@
|
|
|
1
|
+
"""Gaussian Process-based Bayesian optimization for neural architecture search.
|
|
2
|
+
|
|
3
|
+
This module implements Bayesian optimization using Gaussian Process (GP) as the
|
|
4
|
+
surrogate model. GP provides a probabilistic model of the fitness function,
|
|
5
|
+
enabling intelligent exploration-exploitation trade-offs through acquisition functions.
|
|
6
|
+
|
|
7
|
+
Key Features:
|
|
8
|
+
- Multiple kernel options (Matern, RBF, etc.)
|
|
9
|
+
- Multiple acquisition functions (EI, UCB, PI)
|
|
10
|
+
- Efficient architecture encoding for GP modeling
|
|
11
|
+
- Multi-restart optimization for acquisition maximization
|
|
12
|
+
- Automatic hyperparameter tuning for GP
|
|
13
|
+
|
|
14
|
+
Author: Eshan Roy <eshanized@proton.me>
|
|
15
|
+
Organization: TONMOY INFRASTRUCTURE & VISION
|
|
16
|
+
"""
|
|
17
|
+
|
|
18
|
+
from typing import Any, Dict, List, Optional, Tuple
|
|
19
|
+
|
|
20
|
+
import numpy as np
|
|
21
|
+
from sklearn.gaussian_process import GaussianProcessRegressor
|
|
22
|
+
from sklearn.gaussian_process.kernels import (
|
|
23
|
+
RBF,
|
|
24
|
+
ConstantKernel,
|
|
25
|
+
Kernel,
|
|
26
|
+
Matern,
|
|
27
|
+
WhiteKernel,
|
|
28
|
+
)
|
|
29
|
+
|
|
30
|
+
from morphml.core.dsl import SearchSpace
|
|
31
|
+
from morphml.core.graph import ModelGraph
|
|
32
|
+
from morphml.core.search import Individual
|
|
33
|
+
from morphml.logging_config import get_logger
|
|
34
|
+
from morphml.optimizers.bayesian.acquisition import (
|
|
35
|
+
AcquisitionOptimizer,
|
|
36
|
+
expected_improvement,
|
|
37
|
+
probability_of_improvement,
|
|
38
|
+
upper_confidence_bound,
|
|
39
|
+
)
|
|
40
|
+
from morphml.optimizers.bayesian.base import BaseBayesianOptimizer
|
|
41
|
+
|
|
42
|
+
logger = get_logger(__name__)
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
class GaussianProcessOptimizer(BaseBayesianOptimizer):
|
|
46
|
+
"""
|
|
47
|
+
Gaussian Process-based Bayesian optimization.
|
|
48
|
+
|
|
49
|
+
Uses a Gaussian Process as a surrogate model to approximate the
|
|
50
|
+
fitness function. The GP provides both mean predictions (exploitation)
|
|
51
|
+
and uncertainty estimates (exploration), enabling intelligent sampling
|
|
52
|
+
through acquisition functions.
|
|
53
|
+
|
|
54
|
+
Algorithm:
|
|
55
|
+
1. Initialize with random samples
|
|
56
|
+
2. Fit GP on observed (x, y) pairs
|
|
57
|
+
3. Optimize acquisition function to find next x
|
|
58
|
+
4. Evaluate fitness at x
|
|
59
|
+
5. Update GP and repeat
|
|
60
|
+
|
|
61
|
+
Configuration:
|
|
62
|
+
acquisition: 'ei', 'ucb', 'pi' (default: 'ei')
|
|
63
|
+
kernel: 'matern', 'rbf', 'matern52' (default: 'matern')
|
|
64
|
+
n_initial_points: Random samples before GP (default: 10)
|
|
65
|
+
xi: Exploration parameter for EI/PI (default: 0.01)
|
|
66
|
+
kappa: Exploration parameter for UCB (default: 2.576)
|
|
67
|
+
acq_optimizer: 'lbfgs', 'de', 'random' (default: 'lbfgs')
|
|
68
|
+
normalize_y: Normalize fitness values (default: True)
|
|
69
|
+
n_restarts: GP hyperparameter optimization restarts (default: 5)
|
|
70
|
+
|
|
71
|
+
Example:
|
|
72
|
+
>>> from morphml.optimizers.bayesian import GaussianProcessOptimizer
|
|
73
|
+
>>> from morphml.core.dsl import create_cnn_space
|
|
74
|
+
>>>
|
|
75
|
+
>>> space = create_cnn_space(num_classes=10)
|
|
76
|
+
>>> optimizer = GaussianProcessOptimizer(
|
|
77
|
+
... search_space=space,
|
|
78
|
+
... config={
|
|
79
|
+
... 'acquisition': 'ei',
|
|
80
|
+
... 'kernel': 'matern',
|
|
81
|
+
... 'n_initial_points': 10
|
|
82
|
+
... }
|
|
83
|
+
... )
|
|
84
|
+
>>>
|
|
85
|
+
>>> def evaluate(graph):
|
|
86
|
+
... return train_and_evaluate(graph)
|
|
87
|
+
>>>
|
|
88
|
+
>>> best = optimizer.optimize(evaluate, max_evaluations=100)
|
|
89
|
+
>>> print(f"Best fitness: {best.fitness:.4f}")
|
|
90
|
+
"""
|
|
91
|
+
|
|
92
|
+
def __init__(self, search_space: SearchSpace, config: Optional[Dict[str, Any]] = None):
|
|
93
|
+
"""
|
|
94
|
+
Initialize Gaussian Process optimizer.
|
|
95
|
+
|
|
96
|
+
Args:
|
|
97
|
+
search_space: SearchSpace defining architecture options
|
|
98
|
+
config: Configuration dictionary with optional keys:
|
|
99
|
+
- acquisition: Acquisition function type
|
|
100
|
+
- kernel: GP kernel type
|
|
101
|
+
- n_initial_points: Initial random samples
|
|
102
|
+
- xi: EI/PI exploration parameter
|
|
103
|
+
- kappa: UCB exploration parameter
|
|
104
|
+
- acq_optimizer: Acquisition optimization method
|
|
105
|
+
- normalize_y: Whether to normalize targets
|
|
106
|
+
- n_restarts: GP hyperparameter optimization restarts
|
|
107
|
+
"""
|
|
108
|
+
super().__init__(search_space, config or {})
|
|
109
|
+
|
|
110
|
+
# Acquisition function configuration
|
|
111
|
+
self.acquisition_type = self.config.get("acquisition", "ei")
|
|
112
|
+
self.xi = self.config.get("xi", 0.01)
|
|
113
|
+
self.kappa = self.config.get("kappa", 2.576)
|
|
114
|
+
|
|
115
|
+
# GP configuration
|
|
116
|
+
self.kernel_type = self.config.get("kernel", "matern")
|
|
117
|
+
self.normalize_y = self.config.get("normalize_y", True)
|
|
118
|
+
self.n_restarts_optimizer = self.config.get("n_restarts", 5)
|
|
119
|
+
self.alpha = self.config.get("alpha", 1e-6) # Noise level
|
|
120
|
+
|
|
121
|
+
# Acquisition optimizer configuration
|
|
122
|
+
self.acq_optimizer_method = self.config.get("acq_optimizer", "lbfgs")
|
|
123
|
+
self.acq_n_restarts = self.config.get("acq_n_restarts", 10)
|
|
124
|
+
|
|
125
|
+
# Initialize kernel
|
|
126
|
+
self.kernel = self._create_kernel()
|
|
127
|
+
|
|
128
|
+
# Initialize GP
|
|
129
|
+
self.gp = GaussianProcessRegressor(
|
|
130
|
+
kernel=self.kernel,
|
|
131
|
+
alpha=self.alpha,
|
|
132
|
+
normalize_y=self.normalize_y,
|
|
133
|
+
n_restarts_optimizer=self.n_restarts_optimizer,
|
|
134
|
+
random_state=self.random_state,
|
|
135
|
+
)
|
|
136
|
+
|
|
137
|
+
# Initialize acquisition optimizer
|
|
138
|
+
self.acq_optimizer = AcquisitionOptimizer(
|
|
139
|
+
method=self.acq_optimizer_method,
|
|
140
|
+
n_restarts=self.acq_n_restarts,
|
|
141
|
+
random_state=self.random_state,
|
|
142
|
+
)
|
|
143
|
+
|
|
144
|
+
# Observation storage
|
|
145
|
+
self.X_observed: List[np.ndarray] = []
|
|
146
|
+
self.y_observed: List[float] = []
|
|
147
|
+
|
|
148
|
+
# Track whether GP is fitted
|
|
149
|
+
self._gp_fitted = False
|
|
150
|
+
|
|
151
|
+
logger.info(
|
|
152
|
+
f"Initialized GaussianProcessOptimizer with "
|
|
153
|
+
f"kernel={self.kernel_type}, acquisition={self.acquisition_type}"
|
|
154
|
+
)
|
|
155
|
+
|
|
156
|
+
def _create_kernel(self) -> Kernel:
|
|
157
|
+
"""
|
|
158
|
+
Create GP kernel based on configuration.
|
|
159
|
+
|
|
160
|
+
Returns:
|
|
161
|
+
Configured kernel for Gaussian Process
|
|
162
|
+
|
|
163
|
+
Raises:
|
|
164
|
+
ValueError: If kernel type is unknown
|
|
165
|
+
"""
|
|
166
|
+
if self.kernel_type == "matern":
|
|
167
|
+
# Matern kernel with nu=2.5 (twice differentiable)
|
|
168
|
+
kernel = ConstantKernel(1.0, (1e-3, 1e3)) * Matern(
|
|
169
|
+
length_scale=1.0, length_scale_bounds=(1e-2, 1e2), nu=2.5
|
|
170
|
+
)
|
|
171
|
+
|
|
172
|
+
elif self.kernel_type == "matern52":
|
|
173
|
+
# Matern kernel with nu=5/2
|
|
174
|
+
kernel = ConstantKernel(1.0, (1e-3, 1e3)) * Matern(
|
|
175
|
+
length_scale=1.0, length_scale_bounds=(1e-2, 1e2), nu=2.5
|
|
176
|
+
)
|
|
177
|
+
|
|
178
|
+
elif self.kernel_type == "matern32":
|
|
179
|
+
# Matern kernel with nu=3/2
|
|
180
|
+
kernel = ConstantKernel(1.0, (1e-3, 1e3)) * Matern(
|
|
181
|
+
length_scale=1.0, length_scale_bounds=(1e-2, 1e2), nu=1.5
|
|
182
|
+
)
|
|
183
|
+
|
|
184
|
+
elif self.kernel_type == "rbf":
|
|
185
|
+
# Radial Basis Function (squared exponential)
|
|
186
|
+
kernel = ConstantKernel(1.0, (1e-3, 1e3)) * RBF(
|
|
187
|
+
length_scale=1.0, length_scale_bounds=(1e-2, 1e2)
|
|
188
|
+
)
|
|
189
|
+
|
|
190
|
+
else:
|
|
191
|
+
raise ValueError(
|
|
192
|
+
f"Unknown kernel type: {self.kernel_type}. "
|
|
193
|
+
f"Choose from: 'matern', 'matern52', 'matern32', 'rbf'"
|
|
194
|
+
)
|
|
195
|
+
|
|
196
|
+
# Add white noise kernel for numerical stability
|
|
197
|
+
kernel = kernel + WhiteKernel(noise_level=1e-5, noise_level_bounds=(1e-10, 1e-1))
|
|
198
|
+
|
|
199
|
+
return kernel
|
|
200
|
+
|
|
201
|
+
def ask(self) -> List[ModelGraph]:
|
|
202
|
+
"""
|
|
203
|
+
Generate next candidate architecture using acquisition function.
|
|
204
|
+
|
|
205
|
+
During initial phase (< n_initial_points), samples randomly.
|
|
206
|
+
After that, uses GP + acquisition function to select promising candidates.
|
|
207
|
+
|
|
208
|
+
Returns:
|
|
209
|
+
List containing single ModelGraph candidate
|
|
210
|
+
"""
|
|
211
|
+
# Random exploration during initialization
|
|
212
|
+
if len(self.y_observed) < self.n_initial_points:
|
|
213
|
+
candidate = self.search_space.sample()
|
|
214
|
+
logger.debug(f"Random sampling ({len(self.y_observed)}/{self.n_initial_points})")
|
|
215
|
+
return [candidate]
|
|
216
|
+
|
|
217
|
+
# Fit GP on all observations
|
|
218
|
+
self._fit_gp()
|
|
219
|
+
|
|
220
|
+
# Optimize acquisition function
|
|
221
|
+
x_next = self._optimize_acquisition()
|
|
222
|
+
|
|
223
|
+
# Decode to architecture
|
|
224
|
+
candidate = self._decode_architecture(x_next)
|
|
225
|
+
|
|
226
|
+
logger.debug(
|
|
227
|
+
f"Selected candidate via {self.acquisition_type} acquisition "
|
|
228
|
+
f"(iteration {len(self.y_observed)})"
|
|
229
|
+
)
|
|
230
|
+
|
|
231
|
+
return [candidate]
|
|
232
|
+
|
|
233
|
+
def tell(self, results: List[Tuple[ModelGraph, float]]) -> None:
|
|
234
|
+
"""
|
|
235
|
+
Update GP with new evaluation results.
|
|
236
|
+
|
|
237
|
+
Args:
|
|
238
|
+
results: List of (graph, fitness) tuples from evaluation
|
|
239
|
+
"""
|
|
240
|
+
for graph, fitness in results:
|
|
241
|
+
# Encode architecture
|
|
242
|
+
x = self._encode_architecture(graph)
|
|
243
|
+
|
|
244
|
+
# Store observation
|
|
245
|
+
self.X_observed.append(x)
|
|
246
|
+
self.y_observed.append(fitness)
|
|
247
|
+
|
|
248
|
+
# Update history
|
|
249
|
+
self.history.append(
|
|
250
|
+
{"generation": self.generation, "genome": graph, "fitness": fitness, "encoding": x}
|
|
251
|
+
)
|
|
252
|
+
|
|
253
|
+
logger.debug(f"Added observation: fitness={fitness:.4f}")
|
|
254
|
+
|
|
255
|
+
# Mark GP as needing refit
|
|
256
|
+
self._gp_fitted = False
|
|
257
|
+
|
|
258
|
+
self.generation += 1
|
|
259
|
+
|
|
260
|
+
def _fit_gp(self) -> None:
|
|
261
|
+
"""
|
|
262
|
+
Fit Gaussian Process on observed data.
|
|
263
|
+
|
|
264
|
+
Fits the GP surrogate model on all (X, y) observations,
|
|
265
|
+
including hyperparameter optimization via maximum likelihood.
|
|
266
|
+
"""
|
|
267
|
+
if self._gp_fitted and len(self.X_observed) > 0:
|
|
268
|
+
return # Already fitted with current data
|
|
269
|
+
|
|
270
|
+
if len(self.X_observed) == 0:
|
|
271
|
+
logger.warning("No observations to fit GP")
|
|
272
|
+
return
|
|
273
|
+
|
|
274
|
+
X = np.array(self.X_observed)
|
|
275
|
+
y = np.array(self.y_observed)
|
|
276
|
+
|
|
277
|
+
logger.debug(f"Fitting GP on {len(X)} observations")
|
|
278
|
+
|
|
279
|
+
try:
|
|
280
|
+
self.gp.fit(X, y)
|
|
281
|
+
self._gp_fitted = True
|
|
282
|
+
|
|
283
|
+
# Log learned hyperparameters
|
|
284
|
+
logger.debug(f"GP kernel: {self.gp.kernel_}")
|
|
285
|
+
logger.debug(f"GP log-likelihood: {self.gp.log_marginal_likelihood():.2f}")
|
|
286
|
+
|
|
287
|
+
except Exception as e:
|
|
288
|
+
logger.error(f"GP fitting failed: {e}")
|
|
289
|
+
raise
|
|
290
|
+
|
|
291
|
+
def _optimize_acquisition(self) -> np.ndarray:
|
|
292
|
+
"""
|
|
293
|
+
Find architecture encoding that maximizes acquisition function.
|
|
294
|
+
|
|
295
|
+
Uses the acquisition optimizer to find the point with highest
|
|
296
|
+
acquisition value (most promising for evaluation).
|
|
297
|
+
|
|
298
|
+
Returns:
|
|
299
|
+
Optimal architecture encoding
|
|
300
|
+
"""
|
|
301
|
+
# Get current best fitness
|
|
302
|
+
f_best = max(self.y_observed)
|
|
303
|
+
|
|
304
|
+
# Create acquisition function
|
|
305
|
+
def acquisition_fn(x: np.ndarray) -> np.ndarray:
|
|
306
|
+
"""
|
|
307
|
+
Evaluate acquisition at given point(s).
|
|
308
|
+
|
|
309
|
+
Args:
|
|
310
|
+
x: Architecture encoding(s), shape (n_samples, n_features)
|
|
311
|
+
|
|
312
|
+
Returns:
|
|
313
|
+
Acquisition values, shape (n_samples,)
|
|
314
|
+
"""
|
|
315
|
+
# Ensure 2D
|
|
316
|
+
if x.ndim == 1:
|
|
317
|
+
x = x.reshape(1, -1)
|
|
318
|
+
|
|
319
|
+
# Get GP predictions
|
|
320
|
+
mu, sigma = self.gp.predict(x, return_std=True)
|
|
321
|
+
|
|
322
|
+
# Compute acquisition values
|
|
323
|
+
if self.acquisition_type == "ei":
|
|
324
|
+
acq_values = expected_improvement(mu, sigma, f_best, self.xi)
|
|
325
|
+
elif self.acquisition_type == "ucb":
|
|
326
|
+
acq_values = upper_confidence_bound(mu, sigma, self.kappa)
|
|
327
|
+
elif self.acquisition_type == "pi":
|
|
328
|
+
acq_values = probability_of_improvement(mu, sigma, f_best, self.xi)
|
|
329
|
+
else:
|
|
330
|
+
raise ValueError(f"Unknown acquisition: {self.acquisition_type}")
|
|
331
|
+
|
|
332
|
+
return acq_values
|
|
333
|
+
|
|
334
|
+
# Get bounds for optimization
|
|
335
|
+
bounds = self._get_encoding_bounds()
|
|
336
|
+
|
|
337
|
+
# Optimize acquisition
|
|
338
|
+
try:
|
|
339
|
+
x_next = self.acq_optimizer.optimize(
|
|
340
|
+
acquisition_fn=acquisition_fn, bounds=bounds, n_candidates=1
|
|
341
|
+
)
|
|
342
|
+
|
|
343
|
+
# Extract single candidate
|
|
344
|
+
if x_next.ndim == 2:
|
|
345
|
+
x_next = x_next[0]
|
|
346
|
+
|
|
347
|
+
# Log acquisition value
|
|
348
|
+
acq_value = acquisition_fn(x_next.reshape(1, -1))[0]
|
|
349
|
+
logger.debug(f"Acquisition optimum: {acq_value:.6f}")
|
|
350
|
+
|
|
351
|
+
return x_next
|
|
352
|
+
|
|
353
|
+
except Exception as e:
|
|
354
|
+
logger.warning(f"Acquisition optimization failed: {e}. Using random sample.")
|
|
355
|
+
# Fallback to random sample
|
|
356
|
+
return np.array([np.random.uniform(low, high) for low, high in bounds])
|
|
357
|
+
|
|
358
|
+
def predict(self, graphs: List[ModelGraph], return_std: bool = False) -> np.ndarray:
|
|
359
|
+
"""
|
|
360
|
+
Predict fitness for given architectures using GP.
|
|
361
|
+
|
|
362
|
+
Args:
|
|
363
|
+
graphs: List of ModelGraph instances
|
|
364
|
+
return_std: If True, return (mean, std), else just mean
|
|
365
|
+
|
|
366
|
+
Returns:
|
|
367
|
+
Predicted fitness values (and optionally standard deviations)
|
|
368
|
+
|
|
369
|
+
Example:
|
|
370
|
+
>>> graphs = [space.sample() for _ in range(5)]
|
|
371
|
+
>>> predictions = optimizer.predict(graphs, return_std=True)
|
|
372
|
+
>>> means, stds = predictions
|
|
373
|
+
"""
|
|
374
|
+
# Ensure GP is fitted
|
|
375
|
+
if not self._gp_fitted:
|
|
376
|
+
self._fit_gp()
|
|
377
|
+
|
|
378
|
+
# Encode architectures
|
|
379
|
+
X = np.array([self._encode_architecture(g) for g in graphs])
|
|
380
|
+
|
|
381
|
+
# Predict
|
|
382
|
+
if return_std:
|
|
383
|
+
mu, sigma = self.gp.predict(X, return_std=True)
|
|
384
|
+
return mu, sigma
|
|
385
|
+
else:
|
|
386
|
+
mu = self.gp.predict(X, return_std=False)
|
|
387
|
+
return mu
|
|
388
|
+
|
|
389
|
+
def get_best_predicted(self, n_samples: int = 100) -> ModelGraph:
|
|
390
|
+
"""
|
|
391
|
+
Sample architectures and return the one with highest predicted fitness.
|
|
392
|
+
|
|
393
|
+
Useful for suggesting good architectures without evaluation.
|
|
394
|
+
|
|
395
|
+
Args:
|
|
396
|
+
n_samples: Number of random samples to evaluate
|
|
397
|
+
|
|
398
|
+
Returns:
|
|
399
|
+
Architecture with highest predicted mean fitness
|
|
400
|
+
"""
|
|
401
|
+
# Sample random architectures
|
|
402
|
+
candidates = [self.search_space.sample() for _ in range(n_samples)]
|
|
403
|
+
|
|
404
|
+
# Predict fitness
|
|
405
|
+
predictions = self.predict(candidates, return_std=False)
|
|
406
|
+
|
|
407
|
+
# Return best
|
|
408
|
+
best_idx = np.argmax(predictions)
|
|
409
|
+
return candidates[best_idx]
|
|
410
|
+
|
|
411
|
+
def get_uncertainty_map(
|
|
412
|
+
self, n_samples: int = 100
|
|
413
|
+
) -> Tuple[List[ModelGraph], np.ndarray, np.ndarray]:
|
|
414
|
+
"""
|
|
415
|
+
Sample architectures and get prediction uncertainty.
|
|
416
|
+
|
|
417
|
+
Useful for understanding which regions of search space are uncertain.
|
|
418
|
+
|
|
419
|
+
Args:
|
|
420
|
+
n_samples: Number of random samples
|
|
421
|
+
|
|
422
|
+
Returns:
|
|
423
|
+
(graphs, means, stds) tuple
|
|
424
|
+
"""
|
|
425
|
+
candidates = [self.search_space.sample() for _ in range(n_samples)]
|
|
426
|
+
means, stds = self.predict(candidates, return_std=True)
|
|
427
|
+
return candidates, means, stds
|
|
428
|
+
|
|
429
|
+
def get_gp_statistics(self) -> Dict[str, Any]:
|
|
430
|
+
"""
|
|
431
|
+
Get statistics about the fitted GP.
|
|
432
|
+
|
|
433
|
+
Returns:
|
|
434
|
+
Dictionary with GP information
|
|
435
|
+
"""
|
|
436
|
+
if not self._gp_fitted:
|
|
437
|
+
return {"fitted": False}
|
|
438
|
+
|
|
439
|
+
return {
|
|
440
|
+
"fitted": True,
|
|
441
|
+
"n_observations": len(self.X_observed),
|
|
442
|
+
"kernel": str(self.gp.kernel_),
|
|
443
|
+
"log_marginal_likelihood": self.gp.log_marginal_likelihood(),
|
|
444
|
+
"best_observed": max(self.y_observed) if self.y_observed else None,
|
|
445
|
+
"mean_observed": np.mean(self.y_observed) if self.y_observed else None,
|
|
446
|
+
"std_observed": np.std(self.y_observed) if self.y_observed else None,
|
|
447
|
+
}
|
|
448
|
+
|
|
449
|
+
def plot_convergence(self, save_path: Optional[str] = None) -> None:
|
|
450
|
+
"""
|
|
451
|
+
Plot optimization convergence.
|
|
452
|
+
|
|
453
|
+
Shows best fitness over iterations.
|
|
454
|
+
|
|
455
|
+
Args:
|
|
456
|
+
save_path: Optional path to save plot
|
|
457
|
+
"""
|
|
458
|
+
try:
|
|
459
|
+
import matplotlib.pyplot as plt
|
|
460
|
+
except ImportError:
|
|
461
|
+
logger.warning("matplotlib not installed, cannot plot")
|
|
462
|
+
return
|
|
463
|
+
|
|
464
|
+
if not self.y_observed:
|
|
465
|
+
logger.warning("No observations to plot")
|
|
466
|
+
return
|
|
467
|
+
|
|
468
|
+
# Compute best-so-far
|
|
469
|
+
best_so_far = []
|
|
470
|
+
current_best = -np.inf
|
|
471
|
+
for y in self.y_observed:
|
|
472
|
+
if y > current_best:
|
|
473
|
+
current_best = y
|
|
474
|
+
best_so_far.append(current_best)
|
|
475
|
+
|
|
476
|
+
# Plot
|
|
477
|
+
plt.figure(figsize=(10, 6))
|
|
478
|
+
plt.plot(best_so_far, "b-", linewidth=2, label="Best fitness")
|
|
479
|
+
plt.plot(self.y_observed, "ko", alpha=0.3, markersize=4, label="Observations")
|
|
480
|
+
plt.xlabel("Iteration", fontsize=12)
|
|
481
|
+
plt.ylabel("Fitness", fontsize=12)
|
|
482
|
+
plt.title("Bayesian Optimization Convergence", fontsize=14)
|
|
483
|
+
plt.legend()
|
|
484
|
+
plt.grid(True, alpha=0.3)
|
|
485
|
+
|
|
486
|
+
if save_path:
|
|
487
|
+
plt.savefig(save_path, dpi=300, bbox_inches="tight")
|
|
488
|
+
logger.info(f"Convergence plot saved to {save_path}")
|
|
489
|
+
else:
|
|
490
|
+
plt.show()
|
|
491
|
+
|
|
492
|
+
plt.close()
|
|
493
|
+
|
|
494
|
+
def plot_acquisition_landscape(
|
|
495
|
+
self, n_samples: int = 1000, save_path: Optional[str] = None
|
|
496
|
+
) -> None:
|
|
497
|
+
"""
|
|
498
|
+
Visualize acquisition function landscape (2D projection).
|
|
499
|
+
|
|
500
|
+
Projects high-dimensional acquisition to 2D for visualization.
|
|
501
|
+
|
|
502
|
+
Args:
|
|
503
|
+
n_samples: Number of points to sample
|
|
504
|
+
save_path: Optional path to save plot
|
|
505
|
+
"""
|
|
506
|
+
try:
|
|
507
|
+
import matplotlib.pyplot as plt
|
|
508
|
+
except ImportError:
|
|
509
|
+
logger.warning("matplotlib not installed, cannot plot")
|
|
510
|
+
return
|
|
511
|
+
|
|
512
|
+
if not self._gp_fitted:
|
|
513
|
+
logger.warning("GP not fitted, cannot plot acquisition")
|
|
514
|
+
return
|
|
515
|
+
|
|
516
|
+
# Sample random architectures
|
|
517
|
+
candidates = [self.search_space.sample() for _ in range(n_samples)]
|
|
518
|
+
X = np.array([self._encode_architecture(g) for g in candidates])
|
|
519
|
+
|
|
520
|
+
# Get GP predictions
|
|
521
|
+
mu, sigma = self.gp.predict(X, return_std=True)
|
|
522
|
+
|
|
523
|
+
# Compute acquisition values
|
|
524
|
+
f_best = max(self.y_observed)
|
|
525
|
+
if self.acquisition_type == "ei":
|
|
526
|
+
acq = expected_improvement(mu, sigma, f_best, self.xi)
|
|
527
|
+
elif self.acquisition_type == "ucb":
|
|
528
|
+
acq = upper_confidence_bound(mu, sigma, self.kappa)
|
|
529
|
+
else:
|
|
530
|
+
acq = probability_of_improvement(mu, sigma, f_best, self.xi)
|
|
531
|
+
|
|
532
|
+
# Project to 2D using PCA for visualization
|
|
533
|
+
from sklearn.decomposition import PCA
|
|
534
|
+
|
|
535
|
+
pca = PCA(n_components=2)
|
|
536
|
+
X_2d = pca.fit_transform(X)
|
|
537
|
+
|
|
538
|
+
# Plot
|
|
539
|
+
fig, axes = plt.subplots(1, 3, figsize=(18, 5))
|
|
540
|
+
|
|
541
|
+
# Mean predictions
|
|
542
|
+
scatter1 = axes[0].scatter(X_2d[:, 0], X_2d[:, 1], c=mu, cmap="viridis", s=20)
|
|
543
|
+
axes[0].set_title("GP Mean Predictions")
|
|
544
|
+
axes[0].set_xlabel("PC1")
|
|
545
|
+
axes[0].set_ylabel("PC2")
|
|
546
|
+
plt.colorbar(scatter1, ax=axes[0])
|
|
547
|
+
|
|
548
|
+
# Uncertainty
|
|
549
|
+
scatter2 = axes[1].scatter(X_2d[:, 0], X_2d[:, 1], c=sigma, cmap="plasma", s=20)
|
|
550
|
+
axes[1].set_title("GP Uncertainty (σ)")
|
|
551
|
+
axes[1].set_xlabel("PC1")
|
|
552
|
+
axes[1].set_ylabel("PC2")
|
|
553
|
+
plt.colorbar(scatter2, ax=axes[1])
|
|
554
|
+
|
|
555
|
+
# Acquisition
|
|
556
|
+
scatter3 = axes[2].scatter(X_2d[:, 0], X_2d[:, 1], c=acq, cmap="coolwarm", s=20)
|
|
557
|
+
axes[2].set_title(f"Acquisition ({self.acquisition_type.upper()})")
|
|
558
|
+
axes[2].set_xlabel("PC1")
|
|
559
|
+
axes[2].set_ylabel("PC2")
|
|
560
|
+
plt.colorbar(scatter3, ax=axes[2])
|
|
561
|
+
|
|
562
|
+
plt.tight_layout()
|
|
563
|
+
|
|
564
|
+
if save_path:
|
|
565
|
+
plt.savefig(save_path, dpi=300, bbox_inches="tight")
|
|
566
|
+
logger.info(f"Acquisition landscape saved to {save_path}")
|
|
567
|
+
else:
|
|
568
|
+
plt.show()
|
|
569
|
+
|
|
570
|
+
plt.close()
|
|
571
|
+
|
|
572
|
+
def __repr__(self) -> str:
|
|
573
|
+
"""String representation."""
|
|
574
|
+
return (
|
|
575
|
+
f"GaussianProcessOptimizer("
|
|
576
|
+
f"kernel={self.kernel_type}, "
|
|
577
|
+
f"acquisition={self.acquisition_type}, "
|
|
578
|
+
f"n_obs={len(self.y_observed)})"
|
|
579
|
+
)
|
|
580
|
+
|
|
581
|
+
|
|
582
|
+
# Convenience function for quick GP optimization
|
|
583
|
+
def optimize_with_gp(
|
|
584
|
+
search_space: SearchSpace,
|
|
585
|
+
evaluator: Any,
|
|
586
|
+
n_iterations: int = 50,
|
|
587
|
+
n_initial: int = 10,
|
|
588
|
+
acquisition: str = "ei",
|
|
589
|
+
kernel: str = "matern",
|
|
590
|
+
verbose: bool = True,
|
|
591
|
+
) -> Individual:
|
|
592
|
+
"""
|
|
593
|
+
Quick Gaussian Process optimization with sensible defaults.
|
|
594
|
+
|
|
595
|
+
Args:
|
|
596
|
+
search_space: SearchSpace to optimize over
|
|
597
|
+
evaluator: Fitness evaluation function
|
|
598
|
+
n_iterations: Total number of evaluations
|
|
599
|
+
n_initial: Random samples before GP
|
|
600
|
+
acquisition: Acquisition function ('ei', 'ucb', 'pi')
|
|
601
|
+
kernel: GP kernel ('matern', 'rbf')
|
|
602
|
+
verbose: Print progress
|
|
603
|
+
|
|
604
|
+
Returns:
|
|
605
|
+
Best Individual found
|
|
606
|
+
|
|
607
|
+
Example:
|
|
608
|
+
>>> from morphml.core.dsl import create_cnn_space
|
|
609
|
+
>>> space = create_cnn_space(num_classes=10)
|
|
610
|
+
>>> best = optimize_with_gp(
|
|
611
|
+
... search_space=space,
|
|
612
|
+
... evaluator=my_evaluator,
|
|
613
|
+
... n_iterations=50,
|
|
614
|
+
... acquisition='ei'
|
|
615
|
+
... )
|
|
616
|
+
"""
|
|
617
|
+
optimizer = GaussianProcessOptimizer(
|
|
618
|
+
search_space=search_space,
|
|
619
|
+
config={
|
|
620
|
+
"n_initial_points": n_initial,
|
|
621
|
+
"acquisition": acquisition,
|
|
622
|
+
"kernel": kernel,
|
|
623
|
+
"max_iterations": n_iterations,
|
|
624
|
+
},
|
|
625
|
+
)
|
|
626
|
+
|
|
627
|
+
def callback(iteration: int, best: Individual, history: List) -> None:
|
|
628
|
+
if verbose:
|
|
629
|
+
print(f"Iteration {iteration}: Best fitness = {best.fitness:.4f}")
|
|
630
|
+
|
|
631
|
+
best = optimizer.optimize(
|
|
632
|
+
evaluator=evaluator, max_evaluations=n_iterations, callback=callback if verbose else None
|
|
633
|
+
)
|
|
634
|
+
|
|
635
|
+
return best
|