morphml 1.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of morphml might be problematic. Click here for more details.
- morphml/__init__.py +14 -0
- morphml/api/__init__.py +26 -0
- morphml/api/app.py +326 -0
- morphml/api/auth.py +193 -0
- morphml/api/client.py +338 -0
- morphml/api/models.py +132 -0
- morphml/api/rate_limit.py +192 -0
- morphml/benchmarking/__init__.py +36 -0
- morphml/benchmarking/comparison.py +430 -0
- morphml/benchmarks/__init__.py +56 -0
- morphml/benchmarks/comparator.py +409 -0
- morphml/benchmarks/datasets.py +280 -0
- morphml/benchmarks/metrics.py +199 -0
- morphml/benchmarks/openml_suite.py +201 -0
- morphml/benchmarks/problems.py +289 -0
- morphml/benchmarks/suite.py +318 -0
- morphml/cli/__init__.py +5 -0
- morphml/cli/commands/experiment.py +329 -0
- morphml/cli/main.py +457 -0
- morphml/cli/quickstart.py +312 -0
- morphml/config.py +278 -0
- morphml/constraints/__init__.py +19 -0
- morphml/constraints/handler.py +205 -0
- morphml/constraints/predicates.py +285 -0
- morphml/core/__init__.py +3 -0
- morphml/core/crossover.py +449 -0
- morphml/core/dsl/README.md +359 -0
- morphml/core/dsl/__init__.py +72 -0
- morphml/core/dsl/ast_nodes.py +364 -0
- morphml/core/dsl/compiler.py +318 -0
- morphml/core/dsl/layers.py +368 -0
- morphml/core/dsl/lexer.py +336 -0
- morphml/core/dsl/parser.py +455 -0
- morphml/core/dsl/search_space.py +386 -0
- morphml/core/dsl/syntax.py +199 -0
- morphml/core/dsl/type_system.py +361 -0
- morphml/core/dsl/validator.py +386 -0
- morphml/core/graph/__init__.py +40 -0
- morphml/core/graph/edge.py +124 -0
- morphml/core/graph/graph.py +507 -0
- morphml/core/graph/mutations.py +409 -0
- morphml/core/graph/node.py +196 -0
- morphml/core/graph/serialization.py +361 -0
- morphml/core/graph/visualization.py +431 -0
- morphml/core/objectives/__init__.py +20 -0
- morphml/core/search/__init__.py +33 -0
- morphml/core/search/individual.py +252 -0
- morphml/core/search/parameters.py +453 -0
- morphml/core/search/population.py +375 -0
- morphml/core/search/search_engine.py +340 -0
- morphml/distributed/__init__.py +76 -0
- morphml/distributed/fault_tolerance.py +497 -0
- morphml/distributed/health_monitor.py +348 -0
- morphml/distributed/master.py +709 -0
- morphml/distributed/proto/README.md +224 -0
- morphml/distributed/proto/__init__.py +74 -0
- morphml/distributed/proto/worker.proto +170 -0
- morphml/distributed/proto/worker_pb2.py +79 -0
- morphml/distributed/proto/worker_pb2_grpc.py +423 -0
- morphml/distributed/resource_manager.py +416 -0
- morphml/distributed/scheduler.py +567 -0
- morphml/distributed/storage/__init__.py +33 -0
- morphml/distributed/storage/artifacts.py +381 -0
- morphml/distributed/storage/cache.py +366 -0
- morphml/distributed/storage/checkpointing.py +329 -0
- morphml/distributed/storage/database.py +459 -0
- morphml/distributed/worker.py +549 -0
- morphml/evaluation/__init__.py +5 -0
- morphml/evaluation/heuristic.py +237 -0
- morphml/exceptions.py +55 -0
- morphml/execution/__init__.py +5 -0
- morphml/execution/local_executor.py +350 -0
- morphml/integrations/__init__.py +28 -0
- morphml/integrations/jax_adapter.py +206 -0
- morphml/integrations/pytorch_adapter.py +530 -0
- morphml/integrations/sklearn_adapter.py +206 -0
- morphml/integrations/tensorflow_adapter.py +230 -0
- morphml/logging_config.py +93 -0
- morphml/meta_learning/__init__.py +66 -0
- morphml/meta_learning/architecture_similarity.py +277 -0
- morphml/meta_learning/experiment_database.py +240 -0
- morphml/meta_learning/knowledge_base/__init__.py +19 -0
- morphml/meta_learning/knowledge_base/embedder.py +179 -0
- morphml/meta_learning/knowledge_base/knowledge_base.py +313 -0
- morphml/meta_learning/knowledge_base/meta_features.py +265 -0
- morphml/meta_learning/knowledge_base/vector_store.py +271 -0
- morphml/meta_learning/predictors/__init__.py +27 -0
- morphml/meta_learning/predictors/ensemble.py +221 -0
- morphml/meta_learning/predictors/gnn_predictor.py +552 -0
- morphml/meta_learning/predictors/learning_curve.py +231 -0
- morphml/meta_learning/predictors/proxy_metrics.py +261 -0
- morphml/meta_learning/strategy_evolution/__init__.py +27 -0
- morphml/meta_learning/strategy_evolution/adaptive_optimizer.py +226 -0
- morphml/meta_learning/strategy_evolution/bandit.py +276 -0
- morphml/meta_learning/strategy_evolution/portfolio.py +230 -0
- morphml/meta_learning/transfer.py +581 -0
- morphml/meta_learning/warm_start.py +286 -0
- morphml/optimizers/__init__.py +74 -0
- morphml/optimizers/adaptive_operators.py +399 -0
- morphml/optimizers/bayesian/__init__.py +52 -0
- morphml/optimizers/bayesian/acquisition.py +387 -0
- morphml/optimizers/bayesian/base.py +319 -0
- morphml/optimizers/bayesian/gaussian_process.py +635 -0
- morphml/optimizers/bayesian/smac.py +534 -0
- morphml/optimizers/bayesian/tpe.py +411 -0
- morphml/optimizers/differential_evolution.py +220 -0
- morphml/optimizers/evolutionary/__init__.py +61 -0
- morphml/optimizers/evolutionary/cma_es.py +416 -0
- morphml/optimizers/evolutionary/differential_evolution.py +556 -0
- morphml/optimizers/evolutionary/encoding.py +426 -0
- morphml/optimizers/evolutionary/particle_swarm.py +449 -0
- morphml/optimizers/genetic_algorithm.py +486 -0
- morphml/optimizers/gradient_based/__init__.py +22 -0
- morphml/optimizers/gradient_based/darts.py +550 -0
- morphml/optimizers/gradient_based/enas.py +585 -0
- morphml/optimizers/gradient_based/operations.py +474 -0
- morphml/optimizers/gradient_based/utils.py +601 -0
- morphml/optimizers/hill_climbing.py +169 -0
- morphml/optimizers/multi_objective/__init__.py +56 -0
- morphml/optimizers/multi_objective/indicators.py +504 -0
- morphml/optimizers/multi_objective/nsga2.py +647 -0
- morphml/optimizers/multi_objective/visualization.py +427 -0
- morphml/optimizers/nsga2.py +308 -0
- morphml/optimizers/random_search.py +172 -0
- morphml/optimizers/simulated_annealing.py +181 -0
- morphml/plugins/__init__.py +35 -0
- morphml/plugins/custom_evaluator_example.py +81 -0
- morphml/plugins/custom_optimizer_example.py +63 -0
- morphml/plugins/plugin_system.py +454 -0
- morphml/reports/__init__.py +30 -0
- morphml/reports/generator.py +362 -0
- morphml/tracking/__init__.py +7 -0
- morphml/tracking/experiment.py +309 -0
- morphml/tracking/logger.py +301 -0
- morphml/tracking/reporter.py +357 -0
- morphml/utils/__init__.py +6 -0
- morphml/utils/checkpoint.py +189 -0
- morphml/utils/comparison.py +390 -0
- morphml/utils/export.py +407 -0
- morphml/utils/progress.py +392 -0
- morphml/utils/validation.py +392 -0
- morphml/version.py +7 -0
- morphml/visualization/__init__.py +50 -0
- morphml/visualization/analytics.py +423 -0
- morphml/visualization/architecture_diagrams.py +353 -0
- morphml/visualization/architecture_plot.py +223 -0
- morphml/visualization/convergence_plot.py +174 -0
- morphml/visualization/crossover_viz.py +386 -0
- morphml/visualization/graph_viz.py +338 -0
- morphml/visualization/pareto_plot.py +149 -0
- morphml/visualization/plotly_dashboards.py +422 -0
- morphml/visualization/population.py +309 -0
- morphml/visualization/progress.py +260 -0
- morphml-1.0.0.dist-info/METADATA +434 -0
- morphml-1.0.0.dist-info/RECORD +158 -0
- morphml-1.0.0.dist-info/WHEEL +4 -0
- morphml-1.0.0.dist-info/entry_points.txt +3 -0
- morphml-1.0.0.dist-info/licenses/LICENSE +21 -0
|
@@ -0,0 +1,387 @@
|
|
|
1
|
+
"""Acquisition functions for Bayesian optimization.
|
|
2
|
+
|
|
3
|
+
Acquisition functions balance exploration (uncertain regions) and exploitation
|
|
4
|
+
(promising regions) by quantifying the value of sampling at a given point.
|
|
5
|
+
|
|
6
|
+
Common acquisition functions:
|
|
7
|
+
- Expected Improvement (EI): Expected gain over current best
|
|
8
|
+
- Upper Confidence Bound (UCB): Optimistic estimate
|
|
9
|
+
- Probability of Improvement (PI): Probability of beating current best
|
|
10
|
+
|
|
11
|
+
Author: Eshan Roy <eshanized@proton.me>
|
|
12
|
+
Organization: TONMOY INFRASTRUCTURE & VISION
|
|
13
|
+
"""
|
|
14
|
+
|
|
15
|
+
from typing import Callable, List, Optional, Tuple
|
|
16
|
+
|
|
17
|
+
import numpy as np
|
|
18
|
+
from scipy.optimize import differential_evolution, minimize
|
|
19
|
+
from scipy.stats import norm
|
|
20
|
+
|
|
21
|
+
from morphml.logging_config import get_logger
|
|
22
|
+
|
|
23
|
+
logger = get_logger(__name__)
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
def expected_improvement(
|
|
27
|
+
mu: np.ndarray, sigma: np.ndarray, f_best: float, xi: float = 0.01
|
|
28
|
+
) -> np.ndarray:
|
|
29
|
+
"""
|
|
30
|
+
Expected Improvement acquisition function.
|
|
31
|
+
|
|
32
|
+
EI balances exploration and exploitation by computing the expected
|
|
33
|
+
amount by which a point improves over the current best.
|
|
34
|
+
|
|
35
|
+
Formula:
|
|
36
|
+
EI(x) = E[max(f(x) - f*, 0)]
|
|
37
|
+
= (μ - f* - ξ) * Φ(Z) + σ * φ(Z)
|
|
38
|
+
where Z = (μ - f* - ξ) / σ
|
|
39
|
+
|
|
40
|
+
Args:
|
|
41
|
+
mu: Predicted mean(s)
|
|
42
|
+
sigma: Predicted standard deviation(s)
|
|
43
|
+
f_best: Current best fitness value
|
|
44
|
+
xi: Exploration parameter (higher = more exploration)
|
|
45
|
+
|
|
46
|
+
Returns:
|
|
47
|
+
Expected improvement value(s)
|
|
48
|
+
|
|
49
|
+
Example:
|
|
50
|
+
>>> mu = np.array([0.5, 0.7, 0.3])
|
|
51
|
+
>>> sigma = np.array([0.1, 0.2, 0.05])
|
|
52
|
+
>>> f_best = 0.6
|
|
53
|
+
>>> ei = expected_improvement(mu, sigma, f_best, xi=0.01)
|
|
54
|
+
"""
|
|
55
|
+
with np.errstate(divide="warn", invalid="warn"):
|
|
56
|
+
# Compute improvement
|
|
57
|
+
imp = mu - f_best - xi
|
|
58
|
+
|
|
59
|
+
# Compute Z-score
|
|
60
|
+
Z = imp / sigma
|
|
61
|
+
|
|
62
|
+
# Expected improvement
|
|
63
|
+
ei = imp * norm.cdf(Z) + sigma * norm.pdf(Z)
|
|
64
|
+
|
|
65
|
+
# Handle zero sigma (no uncertainty)
|
|
66
|
+
ei[sigma == 0.0] = 0.0
|
|
67
|
+
|
|
68
|
+
return ei
|
|
69
|
+
|
|
70
|
+
|
|
71
|
+
def upper_confidence_bound(mu: np.ndarray, sigma: np.ndarray, kappa: float = 2.576) -> np.ndarray:
|
|
72
|
+
"""
|
|
73
|
+
Upper Confidence Bound acquisition function.
|
|
74
|
+
|
|
75
|
+
UCB provides an optimistic estimate by adding a multiple of the
|
|
76
|
+
uncertainty to the predicted mean. The kappa parameter controls
|
|
77
|
+
the exploration-exploitation trade-off.
|
|
78
|
+
|
|
79
|
+
Formula:
|
|
80
|
+
UCB(x) = μ(x) + κ * σ(x)
|
|
81
|
+
|
|
82
|
+
Common kappa values:
|
|
83
|
+
- 1.96: 95% confidence (moderate exploration)
|
|
84
|
+
- 2.576: 99% confidence (high exploration)
|
|
85
|
+
- 1.0: Balanced
|
|
86
|
+
|
|
87
|
+
Args:
|
|
88
|
+
mu: Predicted mean(s)
|
|
89
|
+
sigma: Predicted standard deviation(s)
|
|
90
|
+
kappa: Exploration parameter (higher = more exploration)
|
|
91
|
+
|
|
92
|
+
Returns:
|
|
93
|
+
UCB value(s)
|
|
94
|
+
|
|
95
|
+
Example:
|
|
96
|
+
>>> mu = np.array([0.5, 0.7])
|
|
97
|
+
>>> sigma = np.array([0.1, 0.2])
|
|
98
|
+
>>> ucb = upper_confidence_bound(mu, sigma, kappa=2.0)
|
|
99
|
+
"""
|
|
100
|
+
return mu + kappa * sigma
|
|
101
|
+
|
|
102
|
+
|
|
103
|
+
def lower_confidence_bound(mu: np.ndarray, sigma: np.ndarray, kappa: float = 2.576) -> np.ndarray:
|
|
104
|
+
"""
|
|
105
|
+
Lower Confidence Bound acquisition function (for minimization).
|
|
106
|
+
|
|
107
|
+
LCB is the opposite of UCB, used for minimization problems.
|
|
108
|
+
It balances exploitation (low mean) with exploration (high uncertainty).
|
|
109
|
+
|
|
110
|
+
Args:
|
|
111
|
+
mu: Predicted means from the surrogate model
|
|
112
|
+
sigma: Predicted standard deviations
|
|
113
|
+
kappa: Exploration-exploitation trade-off parameter.
|
|
114
|
+
Higher values favor exploration.
|
|
115
|
+
Default 2.576 corresponds to 99% confidence interval.
|
|
116
|
+
|
|
117
|
+
Returns:
|
|
118
|
+
LCB values for each point
|
|
119
|
+
|
|
120
|
+
Example:
|
|
121
|
+
>>> mu = np.array([0.5, 0.7])
|
|
122
|
+
>>> sigma = np.array([0.1, 0.2])
|
|
123
|
+
>>> lcb = lower_confidence_bound(mu, sigma, kappa=2.0)
|
|
124
|
+
"""
|
|
125
|
+
return mu - kappa * sigma
|
|
126
|
+
|
|
127
|
+
|
|
128
|
+
def probability_of_improvement(
|
|
129
|
+
mu: np.ndarray, sigma: np.ndarray, f_best: float, xi: float = 0.01
|
|
130
|
+
) -> np.ndarray:
|
|
131
|
+
"""
|
|
132
|
+
Probability of Improvement acquisition function.
|
|
133
|
+
|
|
134
|
+
PI computes the probability that a point will improve over the
|
|
135
|
+
current best. More conservative than EI.
|
|
136
|
+
|
|
137
|
+
Formula:
|
|
138
|
+
PI(x) = P(f(x) > f*)
|
|
139
|
+
= Φ((μ - f* - ξ) / σ)
|
|
140
|
+
|
|
141
|
+
Args:
|
|
142
|
+
mu: Predicted mean(s)
|
|
143
|
+
sigma: Predicted standard deviation(s)
|
|
144
|
+
f_best: Current best fitness value
|
|
145
|
+
xi: Exploration parameter
|
|
146
|
+
|
|
147
|
+
Returns:
|
|
148
|
+
Probability of improvement value(s)
|
|
149
|
+
|
|
150
|
+
Example:
|
|
151
|
+
>>> mu = np.array([0.5, 0.7])
|
|
152
|
+
>>> sigma = np.array([0.1, 0.2])
|
|
153
|
+
>>> f_best = 0.6
|
|
154
|
+
>>> pi = probability_of_improvement(mu, sigma, f_best)
|
|
155
|
+
"""
|
|
156
|
+
with np.errstate(divide="warn", invalid="warn"):
|
|
157
|
+
Z = (mu - f_best - xi) / sigma
|
|
158
|
+
pi = norm.cdf(Z)
|
|
159
|
+
|
|
160
|
+
# Handle zero sigma
|
|
161
|
+
pi[sigma == 0.0] = 0.0
|
|
162
|
+
|
|
163
|
+
return pi
|
|
164
|
+
|
|
165
|
+
|
|
166
|
+
def thompson_sampling(
|
|
167
|
+
mu: np.ndarray, sigma: np.ndarray, random_state: Optional[int] = None
|
|
168
|
+
) -> np.ndarray:
|
|
169
|
+
"""
|
|
170
|
+
Thompson Sampling for acquisition.
|
|
171
|
+
|
|
172
|
+
Sample from the posterior distribution and select the point
|
|
173
|
+
with the highest sample. Naturally balances exploration/exploitation.
|
|
174
|
+
|
|
175
|
+
Args:
|
|
176
|
+
mu: Predicted mean(s)
|
|
177
|
+
sigma: Predicted standard deviation(s)
|
|
178
|
+
random_state: Random seed for reproducibility
|
|
179
|
+
|
|
180
|
+
Returns:
|
|
181
|
+
Sampled values from posterior
|
|
182
|
+
"""
|
|
183
|
+
if random_state is not None:
|
|
184
|
+
np.random.seed(random_state)
|
|
185
|
+
|
|
186
|
+
samples = np.random.normal(mu, sigma)
|
|
187
|
+
return samples
|
|
188
|
+
|
|
189
|
+
|
|
190
|
+
class AcquisitionOptimizer:
|
|
191
|
+
"""
|
|
192
|
+
Optimizer for acquisition functions.
|
|
193
|
+
|
|
194
|
+
Finds the point that maximizes the acquisition function value,
|
|
195
|
+
which determines where to sample next.
|
|
196
|
+
|
|
197
|
+
Attributes:
|
|
198
|
+
method: Optimization method ('lbfgs', 'de', 'random')
|
|
199
|
+
n_restarts: Number of random restarts for local optimization
|
|
200
|
+
n_samples: Number of random samples for 'random' method
|
|
201
|
+
|
|
202
|
+
Example:
|
|
203
|
+
>>> def my_acquisition(x):
|
|
204
|
+
... return expected_improvement(gp.predict(x), f_best=0.8)
|
|
205
|
+
>>> optimizer = AcquisitionOptimizer(method='lbfgs', n_restarts=10)
|
|
206
|
+
>>> x_next = optimizer.optimize(my_acquisition, bounds)
|
|
207
|
+
"""
|
|
208
|
+
|
|
209
|
+
def __init__(
|
|
210
|
+
self,
|
|
211
|
+
method: str = "lbfgs",
|
|
212
|
+
n_restarts: int = 10,
|
|
213
|
+
n_samples: int = 1000,
|
|
214
|
+
random_state: Optional[int] = None,
|
|
215
|
+
):
|
|
216
|
+
"""
|
|
217
|
+
Initialize acquisition optimizer.
|
|
218
|
+
|
|
219
|
+
Args:
|
|
220
|
+
method: Optimization method ('lbfgs', 'de', 'random')
|
|
221
|
+
n_restarts: Number of random restarts for 'lbfgs'
|
|
222
|
+
n_samples: Number of samples for 'random' method
|
|
223
|
+
random_state: Random seed for reproducibility
|
|
224
|
+
"""
|
|
225
|
+
self.method = method
|
|
226
|
+
self.n_restarts = n_restarts
|
|
227
|
+
self.n_samples = n_samples
|
|
228
|
+
self.random_state = random_state
|
|
229
|
+
|
|
230
|
+
if random_state is not None:
|
|
231
|
+
np.random.seed(random_state)
|
|
232
|
+
|
|
233
|
+
def optimize(
|
|
234
|
+
self,
|
|
235
|
+
acquisition_fn: Callable[[np.ndarray], float],
|
|
236
|
+
bounds: List[Tuple[float, float]],
|
|
237
|
+
n_candidates: int = 1,
|
|
238
|
+
) -> np.ndarray:
|
|
239
|
+
"""
|
|
240
|
+
Find point(s) that maximize acquisition function.
|
|
241
|
+
|
|
242
|
+
Args:
|
|
243
|
+
acquisition_fn: Function to maximize (takes array, returns scalar)
|
|
244
|
+
bounds: List of (min, max) tuples for each dimension
|
|
245
|
+
n_candidates: Number of candidates to return
|
|
246
|
+
|
|
247
|
+
Returns:
|
|
248
|
+
Best point(s) as numpy array of shape (n_candidates, n_dims)
|
|
249
|
+
|
|
250
|
+
Raises:
|
|
251
|
+
ValueError: If method is unknown
|
|
252
|
+
"""
|
|
253
|
+
if self.method == "lbfgs":
|
|
254
|
+
return self._optimize_lbfgs(acquisition_fn, bounds, n_candidates)
|
|
255
|
+
elif self.method == "de":
|
|
256
|
+
return self._optimize_differential_evolution(acquisition_fn, bounds)
|
|
257
|
+
elif self.method == "random":
|
|
258
|
+
return self._optimize_random_search(acquisition_fn, bounds, n_candidates)
|
|
259
|
+
else:
|
|
260
|
+
raise ValueError(f"Unknown optimization method: {self.method}")
|
|
261
|
+
|
|
262
|
+
def _optimize_lbfgs(
|
|
263
|
+
self, acquisition_fn: Callable, bounds: List[Tuple[float, float]], n_candidates: int
|
|
264
|
+
) -> np.ndarray:
|
|
265
|
+
"""
|
|
266
|
+
Multi-start L-BFGS-B optimization.
|
|
267
|
+
|
|
268
|
+
Performs multiple local optimizations from random starting points
|
|
269
|
+
and returns the best result.
|
|
270
|
+
"""
|
|
271
|
+
best_x = None
|
|
272
|
+
best_value = -np.inf
|
|
273
|
+
|
|
274
|
+
for _ in range(self.n_restarts):
|
|
275
|
+
# Random starting point
|
|
276
|
+
x0 = np.array([np.random.uniform(low, high) for low, high in bounds])
|
|
277
|
+
|
|
278
|
+
# Minimize negative (to maximize)
|
|
279
|
+
try:
|
|
280
|
+
result = minimize(
|
|
281
|
+
lambda x: -acquisition_fn(x.reshape(1, -1))[0],
|
|
282
|
+
x0=x0,
|
|
283
|
+
bounds=bounds,
|
|
284
|
+
method="L-BFGS-B",
|
|
285
|
+
options={"maxiter": 100},
|
|
286
|
+
)
|
|
287
|
+
|
|
288
|
+
value = -result.fun
|
|
289
|
+
if value > best_value:
|
|
290
|
+
best_value = value
|
|
291
|
+
best_x = result.x
|
|
292
|
+
|
|
293
|
+
except Exception as e:
|
|
294
|
+
logger.warning(f"L-BFGS-B optimization failed: {e}")
|
|
295
|
+
continue
|
|
296
|
+
|
|
297
|
+
if best_x is None:
|
|
298
|
+
# Fallback to random sample
|
|
299
|
+
best_x = np.array([np.random.uniform(low, high) for low, high in bounds])
|
|
300
|
+
|
|
301
|
+
return best_x.reshape(1, -1) if n_candidates == 1 else best_x
|
|
302
|
+
|
|
303
|
+
def _optimize_differential_evolution(
|
|
304
|
+
self, acquisition_fn: Callable, bounds: List[Tuple[float, float]]
|
|
305
|
+
) -> np.ndarray:
|
|
306
|
+
"""
|
|
307
|
+
Global optimization using Differential Evolution.
|
|
308
|
+
|
|
309
|
+
More robust than L-BFGS but slower. Good for multimodal acquisitions.
|
|
310
|
+
"""
|
|
311
|
+
try:
|
|
312
|
+
result = differential_evolution(
|
|
313
|
+
lambda x: -acquisition_fn(x.reshape(1, -1))[0],
|
|
314
|
+
bounds=bounds,
|
|
315
|
+
maxiter=100,
|
|
316
|
+
seed=self.random_state,
|
|
317
|
+
workers=1,
|
|
318
|
+
polish=True,
|
|
319
|
+
)
|
|
320
|
+
return result.x.reshape(1, -1)
|
|
321
|
+
|
|
322
|
+
except Exception as e:
|
|
323
|
+
logger.warning(f"Differential evolution failed: {e}")
|
|
324
|
+
# Fallback to random
|
|
325
|
+
return np.array([[np.random.uniform(low, high) for low, high in bounds]])
|
|
326
|
+
|
|
327
|
+
def _optimize_random_search(
|
|
328
|
+
self, acquisition_fn: Callable, bounds: List[Tuple[float, float]], n_candidates: int
|
|
329
|
+
) -> np.ndarray:
|
|
330
|
+
"""
|
|
331
|
+
Random search: sample many points and pick best.
|
|
332
|
+
|
|
333
|
+
Simple but surprisingly effective baseline.
|
|
334
|
+
"""
|
|
335
|
+
# Generate random candidates
|
|
336
|
+
n_dims = len(bounds)
|
|
337
|
+
candidates = np.zeros((self.n_samples, n_dims))
|
|
338
|
+
|
|
339
|
+
for i, (low, high) in enumerate(bounds):
|
|
340
|
+
candidates[:, i] = np.random.uniform(low, high, self.n_samples)
|
|
341
|
+
|
|
342
|
+
# Evaluate all candidates
|
|
343
|
+
values = np.array([acquisition_fn(x.reshape(1, -1))[0] for x in candidates])
|
|
344
|
+
|
|
345
|
+
# Return top n_candidates
|
|
346
|
+
top_indices = np.argsort(values)[-n_candidates:][::-1]
|
|
347
|
+
|
|
348
|
+
return candidates[top_indices]
|
|
349
|
+
|
|
350
|
+
|
|
351
|
+
def get_acquisition_function(name: str, **kwargs) -> Callable:
|
|
352
|
+
"""
|
|
353
|
+
Factory function for acquisition functions.
|
|
354
|
+
|
|
355
|
+
Args:
|
|
356
|
+
name: Acquisition function name ('ei', 'ucb', 'pi', 'ts')
|
|
357
|
+
**kwargs: Additional parameters for the acquisition function
|
|
358
|
+
|
|
359
|
+
Returns:
|
|
360
|
+
Acquisition function
|
|
361
|
+
|
|
362
|
+
Example:
|
|
363
|
+
>>> acq_fn = get_acquisition_function('ei', f_best=0.8, xi=0.01)
|
|
364
|
+
>>> value = acq_fn(mu=0.9, sigma=0.1)
|
|
365
|
+
"""
|
|
366
|
+
if name.lower() == "ei":
|
|
367
|
+
f_best = kwargs.get("f_best", 0.0)
|
|
368
|
+
xi = kwargs.get("xi", 0.01)
|
|
369
|
+
return lambda mu, sigma: expected_improvement(mu, sigma, f_best, xi)
|
|
370
|
+
|
|
371
|
+
elif name.lower() == "ucb":
|
|
372
|
+
kappa = kwargs.get("kappa", 2.576)
|
|
373
|
+
return lambda mu, sigma: upper_confidence_bound(mu, sigma, kappa)
|
|
374
|
+
|
|
375
|
+
elif name.lower() == "pi":
|
|
376
|
+
f_best = kwargs.get("f_best", 0.0)
|
|
377
|
+
xi = kwargs.get("xi", 0.01)
|
|
378
|
+
return lambda mu, sigma: probability_of_improvement(mu, sigma, f_best, xi)
|
|
379
|
+
|
|
380
|
+
elif name.lower() == "ts":
|
|
381
|
+
random_state = kwargs.get("random_state", None)
|
|
382
|
+
return lambda mu, sigma: thompson_sampling(mu, sigma, random_state)
|
|
383
|
+
|
|
384
|
+
else:
|
|
385
|
+
raise ValueError(
|
|
386
|
+
f"Unknown acquisition function: {name}. " f"Choose from: 'ei', 'ucb', 'pi', 'ts'"
|
|
387
|
+
)
|
|
@@ -0,0 +1,319 @@
|
|
|
1
|
+
"""Base class for Bayesian optimization algorithms.
|
|
2
|
+
|
|
3
|
+
This module provides the foundation for sample-efficient Bayesian optimization
|
|
4
|
+
methods that use surrogate models to guide the search process.
|
|
5
|
+
|
|
6
|
+
Author: Eshan Roy <eshanized@proton.me>
|
|
7
|
+
Organization: TONMOY INFRASTRUCTURE & VISION
|
|
8
|
+
"""
|
|
9
|
+
|
|
10
|
+
from abc import abstractmethod
|
|
11
|
+
from typing import Any, Dict, List, Optional, Tuple
|
|
12
|
+
|
|
13
|
+
import numpy as np
|
|
14
|
+
|
|
15
|
+
from morphml.core.dsl import SearchSpace
|
|
16
|
+
from morphml.core.graph import ModelGraph
|
|
17
|
+
from morphml.core.search import Individual
|
|
18
|
+
from morphml.logging_config import get_logger
|
|
19
|
+
|
|
20
|
+
logger = get_logger(__name__)
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
class BaseBayesianOptimizer:
|
|
24
|
+
"""
|
|
25
|
+
Base class for Bayesian optimization algorithms.
|
|
26
|
+
|
|
27
|
+
Bayesian optimization uses a surrogate model to approximate the
|
|
28
|
+
expensive-to-evaluate fitness function, enabling intelligent
|
|
29
|
+
exploration-exploitation trade-offs.
|
|
30
|
+
|
|
31
|
+
Key components:
|
|
32
|
+
1. **Surrogate Model**: Approximates f(x) (e.g., GP, RF, TPE)
|
|
33
|
+
2. **Acquisition Function**: Decides where to sample next
|
|
34
|
+
3. **Architecture Encoding**: Maps graphs to continuous/discrete vectors
|
|
35
|
+
|
|
36
|
+
Attributes:
|
|
37
|
+
search_space: SearchSpace defining architecture options
|
|
38
|
+
config: Algorithm configuration
|
|
39
|
+
generation: Current generation/iteration
|
|
40
|
+
history: List of all evaluated architectures
|
|
41
|
+
best_individual: Best architecture found so far
|
|
42
|
+
|
|
43
|
+
Example:
|
|
44
|
+
>>> from morphml.optimizers.bayesian import GaussianProcessOptimizer
|
|
45
|
+
>>> optimizer = GaussianProcessOptimizer(
|
|
46
|
+
... search_space=space,
|
|
47
|
+
... config={'acquisition': 'ei', 'n_initial_points': 10}
|
|
48
|
+
... )
|
|
49
|
+
>>> best = optimizer.optimize(evaluator)
|
|
50
|
+
"""
|
|
51
|
+
|
|
52
|
+
def __init__(self, search_space: SearchSpace, config: Optional[Dict[str, Any]] = None):
|
|
53
|
+
"""
|
|
54
|
+
Initialize Bayesian optimizer.
|
|
55
|
+
|
|
56
|
+
Args:
|
|
57
|
+
search_space: SearchSpace to sample architectures from
|
|
58
|
+
config: Algorithm configuration dictionary
|
|
59
|
+
"""
|
|
60
|
+
self.search_space = search_space
|
|
61
|
+
self.config = config or {}
|
|
62
|
+
|
|
63
|
+
self.generation = 0
|
|
64
|
+
self.history: List[Dict[str, Any]] = []
|
|
65
|
+
self.best_individual: Optional[Individual] = None
|
|
66
|
+
|
|
67
|
+
# Configuration parameters
|
|
68
|
+
self.n_initial_points = self.config.get("n_initial_points", 10)
|
|
69
|
+
self.max_iterations = self.config.get("max_iterations", 100)
|
|
70
|
+
self.random_state = self.config.get("random_state", None)
|
|
71
|
+
|
|
72
|
+
if self.random_state is not None:
|
|
73
|
+
np.random.seed(self.random_state)
|
|
74
|
+
|
|
75
|
+
logger.info(
|
|
76
|
+
f"Initialized {self.__class__.__name__} with " f"{self.n_initial_points} initial points"
|
|
77
|
+
)
|
|
78
|
+
|
|
79
|
+
def optimize(
|
|
80
|
+
self, evaluator: Any, max_evaluations: Optional[int] = None, callback: Optional[Any] = None
|
|
81
|
+
) -> Individual:
|
|
82
|
+
"""
|
|
83
|
+
Run Bayesian optimization loop.
|
|
84
|
+
|
|
85
|
+
Args:
|
|
86
|
+
evaluator: Function that evaluates ModelGraph fitness
|
|
87
|
+
max_evaluations: Maximum number of evaluations (overrides config)
|
|
88
|
+
callback: Optional callback function called each iteration
|
|
89
|
+
|
|
90
|
+
Returns:
|
|
91
|
+
Best Individual found
|
|
92
|
+
|
|
93
|
+
Example:
|
|
94
|
+
>>> def my_evaluator(graph):
|
|
95
|
+
... return train_and_evaluate(graph)
|
|
96
|
+
>>> best = optimizer.optimize(my_evaluator, max_evaluations=50)
|
|
97
|
+
"""
|
|
98
|
+
max_evals = max_evaluations or self.max_iterations
|
|
99
|
+
|
|
100
|
+
logger.info(f"Starting Bayesian optimization for {max_evals} evaluations")
|
|
101
|
+
|
|
102
|
+
for iteration in range(max_evals):
|
|
103
|
+
# Ask: Get next candidate(s) to evaluate
|
|
104
|
+
candidates = self.ask()
|
|
105
|
+
|
|
106
|
+
# Evaluate candidates
|
|
107
|
+
results = []
|
|
108
|
+
for graph in candidates:
|
|
109
|
+
fitness = evaluator(graph)
|
|
110
|
+
results.append((graph, fitness))
|
|
111
|
+
|
|
112
|
+
# Track best
|
|
113
|
+
individual = Individual(graph)
|
|
114
|
+
individual.fitness = fitness
|
|
115
|
+
|
|
116
|
+
if self.best_individual is None or fitness > self.best_individual.fitness:
|
|
117
|
+
self.best_individual = individual
|
|
118
|
+
logger.info(f"Iteration {iteration}: New best fitness = {fitness:.4f}")
|
|
119
|
+
|
|
120
|
+
# Tell: Update surrogate model with results
|
|
121
|
+
self.tell(results)
|
|
122
|
+
|
|
123
|
+
# Callback
|
|
124
|
+
if callback is not None:
|
|
125
|
+
callback(iteration, self.best_individual, self.history)
|
|
126
|
+
|
|
127
|
+
self.generation += 1
|
|
128
|
+
|
|
129
|
+
logger.info(f"Optimization complete. Best fitness: {self.best_individual.fitness:.4f}")
|
|
130
|
+
|
|
131
|
+
return self.best_individual
|
|
132
|
+
|
|
133
|
+
@abstractmethod
|
|
134
|
+
def ask(self) -> List[ModelGraph]:
|
|
135
|
+
"""
|
|
136
|
+
Generate next candidate architecture(s) to evaluate.
|
|
137
|
+
|
|
138
|
+
Uses the surrogate model and acquisition function to select
|
|
139
|
+
promising architectures.
|
|
140
|
+
|
|
141
|
+
Returns:
|
|
142
|
+
List of ModelGraph candidates
|
|
143
|
+
"""
|
|
144
|
+
pass
|
|
145
|
+
|
|
146
|
+
@abstractmethod
|
|
147
|
+
def tell(self, results: List[Tuple[ModelGraph, float]]) -> None:
|
|
148
|
+
"""
|
|
149
|
+
Update surrogate model with evaluation results.
|
|
150
|
+
|
|
151
|
+
Args:
|
|
152
|
+
results: List of (graph, fitness) tuples
|
|
153
|
+
"""
|
|
154
|
+
pass
|
|
155
|
+
|
|
156
|
+
def get_best(self) -> Optional[Individual]:
|
|
157
|
+
"""
|
|
158
|
+
Get best individual found so far.
|
|
159
|
+
|
|
160
|
+
Returns:
|
|
161
|
+
Best Individual or None if no evaluations yet
|
|
162
|
+
"""
|
|
163
|
+
return self.best_individual
|
|
164
|
+
|
|
165
|
+
def get_history(self) -> List[Dict[str, Any]]:
|
|
166
|
+
"""
|
|
167
|
+
Get optimization history.
|
|
168
|
+
|
|
169
|
+
Returns:
|
|
170
|
+
List of dictionaries with generation, graph, fitness
|
|
171
|
+
"""
|
|
172
|
+
return self.history
|
|
173
|
+
|
|
174
|
+
def reset(self) -> None:
|
|
175
|
+
"""Reset optimizer to initial state."""
|
|
176
|
+
self.generation = 0
|
|
177
|
+
self.history = []
|
|
178
|
+
self.best_individual = None
|
|
179
|
+
logger.info(f"{self.__class__.__name__} reset to initial state")
|
|
180
|
+
|
|
181
|
+
def _encode_architecture(self, graph: ModelGraph) -> np.ndarray:
|
|
182
|
+
"""
|
|
183
|
+
Encode ModelGraph as fixed-length numerical vector.
|
|
184
|
+
|
|
185
|
+
This is a critical method that maps complex graph structures
|
|
186
|
+
to continuous/discrete vectors suitable for surrogate models.
|
|
187
|
+
|
|
188
|
+
Encoding strategies:
|
|
189
|
+
1. **Positional Encoding**: Represent nodes by position in topological order
|
|
190
|
+
2. **Operation One-Hot**: Encode operation types
|
|
191
|
+
3. **Hyperparameters**: Include numerical parameters
|
|
192
|
+
4. **Connectivity**: Encode edge structure
|
|
193
|
+
|
|
194
|
+
Args:
|
|
195
|
+
graph: ModelGraph to encode
|
|
196
|
+
|
|
197
|
+
Returns:
|
|
198
|
+
Fixed-length numpy array
|
|
199
|
+
"""
|
|
200
|
+
# Get topological ordering
|
|
201
|
+
try:
|
|
202
|
+
topo_order = list(graph.topological_sort())
|
|
203
|
+
except Exception:
|
|
204
|
+
# Handle invalid graphs
|
|
205
|
+
topo_order = list(graph.nodes.values())
|
|
206
|
+
|
|
207
|
+
# Define operation vocabulary
|
|
208
|
+
operation_types = [
|
|
209
|
+
"input",
|
|
210
|
+
"output",
|
|
211
|
+
"conv2d",
|
|
212
|
+
"dense",
|
|
213
|
+
"relu",
|
|
214
|
+
"sigmoid",
|
|
215
|
+
"tanh",
|
|
216
|
+
"maxpool",
|
|
217
|
+
"avgpool",
|
|
218
|
+
"batchnorm",
|
|
219
|
+
"dropout",
|
|
220
|
+
"flatten",
|
|
221
|
+
"add",
|
|
222
|
+
"concat",
|
|
223
|
+
]
|
|
224
|
+
|
|
225
|
+
# Fixed encoding length (max depth)
|
|
226
|
+
max_depth = 20
|
|
227
|
+
encoding_per_node = 3 # operation_id, param1, param2
|
|
228
|
+
|
|
229
|
+
encoding = []
|
|
230
|
+
|
|
231
|
+
for i in range(max_depth):
|
|
232
|
+
if i < len(topo_order):
|
|
233
|
+
node = topo_order[i]
|
|
234
|
+
|
|
235
|
+
# Encode operation type
|
|
236
|
+
if node.operation in operation_types:
|
|
237
|
+
op_id = operation_types.index(node.operation)
|
|
238
|
+
else:
|
|
239
|
+
op_id = 0 # Unknown operation
|
|
240
|
+
encoding.append(float(op_id))
|
|
241
|
+
|
|
242
|
+
# Encode key hyperparameters
|
|
243
|
+
if node.operation == "conv2d":
|
|
244
|
+
filters = node.params.get("filters", 32)
|
|
245
|
+
kernel_size = node.params.get("kernel_size", 3)
|
|
246
|
+
encoding.extend([float(filters), float(kernel_size)])
|
|
247
|
+
|
|
248
|
+
elif node.operation == "dense":
|
|
249
|
+
units = node.params.get("units", 128)
|
|
250
|
+
encoding.extend([float(units), 0.0])
|
|
251
|
+
|
|
252
|
+
elif node.operation == "dropout":
|
|
253
|
+
rate = node.params.get("rate", 0.5)
|
|
254
|
+
encoding.extend([float(rate * 100), 0.0])
|
|
255
|
+
|
|
256
|
+
elif node.operation in ["maxpool", "avgpool"]:
|
|
257
|
+
pool_size = node.params.get("pool_size", 2)
|
|
258
|
+
encoding.extend([float(pool_size), 0.0])
|
|
259
|
+
|
|
260
|
+
else:
|
|
261
|
+
encoding.extend([0.0, 0.0])
|
|
262
|
+
else:
|
|
263
|
+
# Padding for shorter architectures
|
|
264
|
+
encoding.extend([0.0] * encoding_per_node)
|
|
265
|
+
|
|
266
|
+
return np.array(encoding, dtype=np.float64)
|
|
267
|
+
|
|
268
|
+
def _decode_architecture(self, x: np.ndarray) -> ModelGraph:
|
|
269
|
+
"""
|
|
270
|
+
Decode numerical vector back to ModelGraph.
|
|
271
|
+
|
|
272
|
+
This is challenging because the mapping is many-to-one
|
|
273
|
+
(many vectors may decode to similar graphs).
|
|
274
|
+
|
|
275
|
+
Strategy:
|
|
276
|
+
1. Sample random architecture from search space
|
|
277
|
+
2. Use vector to guide mutations toward desired structure
|
|
278
|
+
|
|
279
|
+
Args:
|
|
280
|
+
x: Numerical encoding
|
|
281
|
+
|
|
282
|
+
Returns:
|
|
283
|
+
Decoded ModelGraph
|
|
284
|
+
"""
|
|
285
|
+
# Simplified decoding: sample from search space
|
|
286
|
+
# In practice, this would use more sophisticated reconstruction
|
|
287
|
+
graph = self.search_space.sample()
|
|
288
|
+
|
|
289
|
+
# TODO: Could add logic to mutate graph toward target encoding
|
|
290
|
+
# For now, return sampled graph (acquisition still guides search)
|
|
291
|
+
|
|
292
|
+
return graph
|
|
293
|
+
|
|
294
|
+
def _get_encoding_bounds(self) -> List[Tuple[float, float]]:
|
|
295
|
+
"""
|
|
296
|
+
Get bounds for architecture encoding dimensions.
|
|
297
|
+
|
|
298
|
+
Returns:
|
|
299
|
+
List of (min, max) tuples for each encoding dimension
|
|
300
|
+
"""
|
|
301
|
+
operation_types_count = 14 # Number of supported operations
|
|
302
|
+
max_filters = 512
|
|
303
|
+
max_kernel_size = 7
|
|
304
|
+
|
|
305
|
+
max_depth = 20
|
|
306
|
+
|
|
307
|
+
bounds = []
|
|
308
|
+
for _ in range(max_depth):
|
|
309
|
+
bounds.append((0, operation_types_count)) # Operation ID
|
|
310
|
+
bounds.append((0, max_filters)) # Param 1 (filters/units)
|
|
311
|
+
bounds.append((0, max_kernel_size)) # Param 2 (kernel/pool size)
|
|
312
|
+
|
|
313
|
+
return bounds
|
|
314
|
+
|
|
315
|
+
|
|
316
|
+
class BayesianOptimizationError(Exception):
|
|
317
|
+
"""Exception raised for errors in Bayesian optimization."""
|
|
318
|
+
|
|
319
|
+
pass
|