morphml 1.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of morphml might be problematic. Click here for more details.
- morphml/__init__.py +14 -0
- morphml/api/__init__.py +26 -0
- morphml/api/app.py +326 -0
- morphml/api/auth.py +193 -0
- morphml/api/client.py +338 -0
- morphml/api/models.py +132 -0
- morphml/api/rate_limit.py +192 -0
- morphml/benchmarking/__init__.py +36 -0
- morphml/benchmarking/comparison.py +430 -0
- morphml/benchmarks/__init__.py +56 -0
- morphml/benchmarks/comparator.py +409 -0
- morphml/benchmarks/datasets.py +280 -0
- morphml/benchmarks/metrics.py +199 -0
- morphml/benchmarks/openml_suite.py +201 -0
- morphml/benchmarks/problems.py +289 -0
- morphml/benchmarks/suite.py +318 -0
- morphml/cli/__init__.py +5 -0
- morphml/cli/commands/experiment.py +329 -0
- morphml/cli/main.py +457 -0
- morphml/cli/quickstart.py +312 -0
- morphml/config.py +278 -0
- morphml/constraints/__init__.py +19 -0
- morphml/constraints/handler.py +205 -0
- morphml/constraints/predicates.py +285 -0
- morphml/core/__init__.py +3 -0
- morphml/core/crossover.py +449 -0
- morphml/core/dsl/README.md +359 -0
- morphml/core/dsl/__init__.py +72 -0
- morphml/core/dsl/ast_nodes.py +364 -0
- morphml/core/dsl/compiler.py +318 -0
- morphml/core/dsl/layers.py +368 -0
- morphml/core/dsl/lexer.py +336 -0
- morphml/core/dsl/parser.py +455 -0
- morphml/core/dsl/search_space.py +386 -0
- morphml/core/dsl/syntax.py +199 -0
- morphml/core/dsl/type_system.py +361 -0
- morphml/core/dsl/validator.py +386 -0
- morphml/core/graph/__init__.py +40 -0
- morphml/core/graph/edge.py +124 -0
- morphml/core/graph/graph.py +507 -0
- morphml/core/graph/mutations.py +409 -0
- morphml/core/graph/node.py +196 -0
- morphml/core/graph/serialization.py +361 -0
- morphml/core/graph/visualization.py +431 -0
- morphml/core/objectives/__init__.py +20 -0
- morphml/core/search/__init__.py +33 -0
- morphml/core/search/individual.py +252 -0
- morphml/core/search/parameters.py +453 -0
- morphml/core/search/population.py +375 -0
- morphml/core/search/search_engine.py +340 -0
- morphml/distributed/__init__.py +76 -0
- morphml/distributed/fault_tolerance.py +497 -0
- morphml/distributed/health_monitor.py +348 -0
- morphml/distributed/master.py +709 -0
- morphml/distributed/proto/README.md +224 -0
- morphml/distributed/proto/__init__.py +74 -0
- morphml/distributed/proto/worker.proto +170 -0
- morphml/distributed/proto/worker_pb2.py +79 -0
- morphml/distributed/proto/worker_pb2_grpc.py +423 -0
- morphml/distributed/resource_manager.py +416 -0
- morphml/distributed/scheduler.py +567 -0
- morphml/distributed/storage/__init__.py +33 -0
- morphml/distributed/storage/artifacts.py +381 -0
- morphml/distributed/storage/cache.py +366 -0
- morphml/distributed/storage/checkpointing.py +329 -0
- morphml/distributed/storage/database.py +459 -0
- morphml/distributed/worker.py +549 -0
- morphml/evaluation/__init__.py +5 -0
- morphml/evaluation/heuristic.py +237 -0
- morphml/exceptions.py +55 -0
- morphml/execution/__init__.py +5 -0
- morphml/execution/local_executor.py +350 -0
- morphml/integrations/__init__.py +28 -0
- morphml/integrations/jax_adapter.py +206 -0
- morphml/integrations/pytorch_adapter.py +530 -0
- morphml/integrations/sklearn_adapter.py +206 -0
- morphml/integrations/tensorflow_adapter.py +230 -0
- morphml/logging_config.py +93 -0
- morphml/meta_learning/__init__.py +66 -0
- morphml/meta_learning/architecture_similarity.py +277 -0
- morphml/meta_learning/experiment_database.py +240 -0
- morphml/meta_learning/knowledge_base/__init__.py +19 -0
- morphml/meta_learning/knowledge_base/embedder.py +179 -0
- morphml/meta_learning/knowledge_base/knowledge_base.py +313 -0
- morphml/meta_learning/knowledge_base/meta_features.py +265 -0
- morphml/meta_learning/knowledge_base/vector_store.py +271 -0
- morphml/meta_learning/predictors/__init__.py +27 -0
- morphml/meta_learning/predictors/ensemble.py +221 -0
- morphml/meta_learning/predictors/gnn_predictor.py +552 -0
- morphml/meta_learning/predictors/learning_curve.py +231 -0
- morphml/meta_learning/predictors/proxy_metrics.py +261 -0
- morphml/meta_learning/strategy_evolution/__init__.py +27 -0
- morphml/meta_learning/strategy_evolution/adaptive_optimizer.py +226 -0
- morphml/meta_learning/strategy_evolution/bandit.py +276 -0
- morphml/meta_learning/strategy_evolution/portfolio.py +230 -0
- morphml/meta_learning/transfer.py +581 -0
- morphml/meta_learning/warm_start.py +286 -0
- morphml/optimizers/__init__.py +74 -0
- morphml/optimizers/adaptive_operators.py +399 -0
- morphml/optimizers/bayesian/__init__.py +52 -0
- morphml/optimizers/bayesian/acquisition.py +387 -0
- morphml/optimizers/bayesian/base.py +319 -0
- morphml/optimizers/bayesian/gaussian_process.py +635 -0
- morphml/optimizers/bayesian/smac.py +534 -0
- morphml/optimizers/bayesian/tpe.py +411 -0
- morphml/optimizers/differential_evolution.py +220 -0
- morphml/optimizers/evolutionary/__init__.py +61 -0
- morphml/optimizers/evolutionary/cma_es.py +416 -0
- morphml/optimizers/evolutionary/differential_evolution.py +556 -0
- morphml/optimizers/evolutionary/encoding.py +426 -0
- morphml/optimizers/evolutionary/particle_swarm.py +449 -0
- morphml/optimizers/genetic_algorithm.py +486 -0
- morphml/optimizers/gradient_based/__init__.py +22 -0
- morphml/optimizers/gradient_based/darts.py +550 -0
- morphml/optimizers/gradient_based/enas.py +585 -0
- morphml/optimizers/gradient_based/operations.py +474 -0
- morphml/optimizers/gradient_based/utils.py +601 -0
- morphml/optimizers/hill_climbing.py +169 -0
- morphml/optimizers/multi_objective/__init__.py +56 -0
- morphml/optimizers/multi_objective/indicators.py +504 -0
- morphml/optimizers/multi_objective/nsga2.py +647 -0
- morphml/optimizers/multi_objective/visualization.py +427 -0
- morphml/optimizers/nsga2.py +308 -0
- morphml/optimizers/random_search.py +172 -0
- morphml/optimizers/simulated_annealing.py +181 -0
- morphml/plugins/__init__.py +35 -0
- morphml/plugins/custom_evaluator_example.py +81 -0
- morphml/plugins/custom_optimizer_example.py +63 -0
- morphml/plugins/plugin_system.py +454 -0
- morphml/reports/__init__.py +30 -0
- morphml/reports/generator.py +362 -0
- morphml/tracking/__init__.py +7 -0
- morphml/tracking/experiment.py +309 -0
- morphml/tracking/logger.py +301 -0
- morphml/tracking/reporter.py +357 -0
- morphml/utils/__init__.py +6 -0
- morphml/utils/checkpoint.py +189 -0
- morphml/utils/comparison.py +390 -0
- morphml/utils/export.py +407 -0
- morphml/utils/progress.py +392 -0
- morphml/utils/validation.py +392 -0
- morphml/version.py +7 -0
- morphml/visualization/__init__.py +50 -0
- morphml/visualization/analytics.py +423 -0
- morphml/visualization/architecture_diagrams.py +353 -0
- morphml/visualization/architecture_plot.py +223 -0
- morphml/visualization/convergence_plot.py +174 -0
- morphml/visualization/crossover_viz.py +386 -0
- morphml/visualization/graph_viz.py +338 -0
- morphml/visualization/pareto_plot.py +149 -0
- morphml/visualization/plotly_dashboards.py +422 -0
- morphml/visualization/population.py +309 -0
- morphml/visualization/progress.py +260 -0
- morphml-1.0.0.dist-info/METADATA +434 -0
- morphml-1.0.0.dist-info/RECORD +158 -0
- morphml-1.0.0.dist-info/WHEEL +4 -0
- morphml-1.0.0.dist-info/entry_points.txt +3 -0
- morphml-1.0.0.dist-info/licenses/LICENSE +21 -0
|
@@ -0,0 +1,534 @@
|
|
|
1
|
+
"""SMAC (Sequential Model-based Algorithm Configuration) optimizer.
|
|
2
|
+
|
|
3
|
+
SMAC uses Random Forest instead of Gaussian Process as the surrogate model,
|
|
4
|
+
making it more scalable and robust for neural architecture search with mixed
|
|
5
|
+
continuous/categorical spaces.
|
|
6
|
+
|
|
7
|
+
Key advantages:
|
|
8
|
+
- Scales to high dimensions better than GP
|
|
9
|
+
- Handles categorical variables natively
|
|
10
|
+
- More robust to noisy evaluations
|
|
11
|
+
- Efficient with limited data
|
|
12
|
+
|
|
13
|
+
Reference:
|
|
14
|
+
Hutter, F., et al. "Sequential Model-Based Optimization for General
|
|
15
|
+
Algorithm Configuration." LION 2011.
|
|
16
|
+
|
|
17
|
+
Author: Eshan Roy <eshanized@proton.me>
|
|
18
|
+
Organization: TONMOY INFRASTRUCTURE & VISION
|
|
19
|
+
"""
|
|
20
|
+
|
|
21
|
+
from typing import Any, Dict, List, Optional, Tuple
|
|
22
|
+
|
|
23
|
+
import numpy as np
|
|
24
|
+
from sklearn.ensemble import RandomForestRegressor
|
|
25
|
+
|
|
26
|
+
from morphml.core.dsl import SearchSpace
|
|
27
|
+
from morphml.core.graph import ModelGraph
|
|
28
|
+
from morphml.core.search import Individual
|
|
29
|
+
from morphml.logging_config import get_logger
|
|
30
|
+
from morphml.optimizers.bayesian.acquisition import (
|
|
31
|
+
AcquisitionOptimizer,
|
|
32
|
+
expected_improvement,
|
|
33
|
+
)
|
|
34
|
+
from morphml.optimizers.bayesian.base import BaseBayesianOptimizer
|
|
35
|
+
|
|
36
|
+
logger = get_logger(__name__)
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
class SMACOptimizer(BaseBayesianOptimizer):
|
|
40
|
+
"""
|
|
41
|
+
Sequential Model-based Algorithm Configuration optimizer.
|
|
42
|
+
|
|
43
|
+
SMAC uses Random Forest as a surrogate model instead of Gaussian Process.
|
|
44
|
+
The Random Forest provides:
|
|
45
|
+
- Predictions via ensemble averaging
|
|
46
|
+
- Uncertainty via variance across trees
|
|
47
|
+
- Scalability to high dimensions
|
|
48
|
+
- Natural handling of mixed spaces
|
|
49
|
+
|
|
50
|
+
Algorithm:
|
|
51
|
+
1. Initialize with random samples
|
|
52
|
+
2. Fit Random Forest on observed (x, y) pairs
|
|
53
|
+
3. Optimize acquisition function (typically EI)
|
|
54
|
+
4. Evaluate selected architecture
|
|
55
|
+
5. Update forest and repeat
|
|
56
|
+
|
|
57
|
+
Configuration:
|
|
58
|
+
n_initial_points: Random samples before RF (default: 15)
|
|
59
|
+
n_estimators: Number of trees in forest (default: 50)
|
|
60
|
+
max_depth: Maximum tree depth (default: 10)
|
|
61
|
+
min_samples_split: Min samples to split node (default: 2)
|
|
62
|
+
acquisition: Acquisition function (default: 'ei')
|
|
63
|
+
xi: EI exploration parameter (default: 0.01)
|
|
64
|
+
acq_optimizer: Acquisition optimization method (default: 'random')
|
|
65
|
+
|
|
66
|
+
Example:
|
|
67
|
+
>>> from morphml.optimizers.bayesian import SMACOptimizer
|
|
68
|
+
>>> optimizer = SMACOptimizer(
|
|
69
|
+
... search_space=space,
|
|
70
|
+
... config={
|
|
71
|
+
... 'n_estimators': 50,
|
|
72
|
+
... 'max_depth': 10,
|
|
73
|
+
... 'acquisition': 'ei'
|
|
74
|
+
... }
|
|
75
|
+
... )
|
|
76
|
+
>>> best = optimizer.optimize(evaluator, max_evaluations=100)
|
|
77
|
+
"""
|
|
78
|
+
|
|
79
|
+
def __init__(self, search_space: SearchSpace, config: Optional[Dict[str, Any]] = None):
|
|
80
|
+
"""
|
|
81
|
+
Initialize SMAC optimizer.
|
|
82
|
+
|
|
83
|
+
Args:
|
|
84
|
+
search_space: SearchSpace defining architecture options
|
|
85
|
+
config: Configuration dictionary with optional keys:
|
|
86
|
+
- n_initial_points: Initial random samples
|
|
87
|
+
- n_estimators: Number of RF trees
|
|
88
|
+
- max_depth: Maximum tree depth
|
|
89
|
+
- min_samples_split: Min samples for split
|
|
90
|
+
- acquisition: Acquisition function type
|
|
91
|
+
- xi: EI exploration parameter
|
|
92
|
+
- acq_optimizer: Acquisition optimization method
|
|
93
|
+
"""
|
|
94
|
+
super().__init__(search_space, config or {})
|
|
95
|
+
|
|
96
|
+
# Random Forest configuration
|
|
97
|
+
self.n_estimators = self.config.get("n_estimators", 50)
|
|
98
|
+
self.max_depth = self.config.get("max_depth", 10)
|
|
99
|
+
self.min_samples_split = self.config.get("min_samples_split", 2)
|
|
100
|
+
self.min_samples_leaf = self.config.get("min_samples_leaf", 1)
|
|
101
|
+
|
|
102
|
+
# Acquisition configuration
|
|
103
|
+
self.acquisition_type = self.config.get("acquisition", "ei")
|
|
104
|
+
self.xi = self.config.get("xi", 0.01)
|
|
105
|
+
|
|
106
|
+
# Override n_initial (SMAC typically needs fewer than TPE)
|
|
107
|
+
self.n_initial_points = self.config.get("n_initial_points", 15)
|
|
108
|
+
|
|
109
|
+
# Acquisition optimizer (random search works well with RF)
|
|
110
|
+
self.acq_optimizer_method = self.config.get("acq_optimizer", "random")
|
|
111
|
+
self.acq_n_samples = self.config.get("acq_n_samples", 1000)
|
|
112
|
+
|
|
113
|
+
# Initialize Random Forest
|
|
114
|
+
self.rf = RandomForestRegressor(
|
|
115
|
+
n_estimators=self.n_estimators,
|
|
116
|
+
max_depth=self.max_depth,
|
|
117
|
+
min_samples_split=self.min_samples_split,
|
|
118
|
+
min_samples_leaf=self.min_samples_leaf,
|
|
119
|
+
random_state=self.random_state,
|
|
120
|
+
n_jobs=-1, # Use all CPU cores
|
|
121
|
+
)
|
|
122
|
+
|
|
123
|
+
# Initialize acquisition optimizer
|
|
124
|
+
self.acq_optimizer = AcquisitionOptimizer(
|
|
125
|
+
method=self.acq_optimizer_method,
|
|
126
|
+
n_samples=self.acq_n_samples,
|
|
127
|
+
random_state=self.random_state,
|
|
128
|
+
)
|
|
129
|
+
|
|
130
|
+
# Observation storage
|
|
131
|
+
self.X_observed: List[np.ndarray] = []
|
|
132
|
+
self.y_observed: List[float] = []
|
|
133
|
+
|
|
134
|
+
# Track whether RF is fitted
|
|
135
|
+
self._rf_fitted = False
|
|
136
|
+
|
|
137
|
+
logger.info(
|
|
138
|
+
f"Initialized SMACOptimizer with "
|
|
139
|
+
f"n_estimators={self.n_estimators}, max_depth={self.max_depth}"
|
|
140
|
+
)
|
|
141
|
+
|
|
142
|
+
def ask(self) -> List[ModelGraph]:
|
|
143
|
+
"""
|
|
144
|
+
Generate next candidate using Random Forest + acquisition.
|
|
145
|
+
|
|
146
|
+
Returns:
|
|
147
|
+
List containing single ModelGraph candidate
|
|
148
|
+
"""
|
|
149
|
+
# Random exploration during initialization
|
|
150
|
+
if len(self.y_observed) < self.n_initial_points:
|
|
151
|
+
candidate = self.search_space.sample()
|
|
152
|
+
logger.debug(f"Random sampling ({len(self.y_observed)}/{self.n_initial_points})")
|
|
153
|
+
return [candidate]
|
|
154
|
+
|
|
155
|
+
# Fit Random Forest
|
|
156
|
+
self._fit_rf()
|
|
157
|
+
|
|
158
|
+
# Optimize acquisition function
|
|
159
|
+
x_next = self._optimize_acquisition()
|
|
160
|
+
|
|
161
|
+
# Decode to architecture
|
|
162
|
+
candidate = self._decode_architecture(x_next)
|
|
163
|
+
|
|
164
|
+
logger.debug(
|
|
165
|
+
f"Selected candidate via {self.acquisition_type} acquisition "
|
|
166
|
+
f"(iteration {len(self.y_observed)})"
|
|
167
|
+
)
|
|
168
|
+
|
|
169
|
+
return [candidate]
|
|
170
|
+
|
|
171
|
+
def tell(self, results: List[Tuple[ModelGraph, float]]) -> None:
|
|
172
|
+
"""
|
|
173
|
+
Update Random Forest with new evaluation results.
|
|
174
|
+
|
|
175
|
+
Args:
|
|
176
|
+
results: List of (graph, fitness) tuples
|
|
177
|
+
"""
|
|
178
|
+
for graph, fitness in results:
|
|
179
|
+
# Encode architecture
|
|
180
|
+
x = self._encode_architecture(graph)
|
|
181
|
+
|
|
182
|
+
# Store observation
|
|
183
|
+
self.X_observed.append(x)
|
|
184
|
+
self.y_observed.append(fitness)
|
|
185
|
+
|
|
186
|
+
# Update history
|
|
187
|
+
self.history.append(
|
|
188
|
+
{"generation": self.generation, "genome": graph, "fitness": fitness, "encoding": x}
|
|
189
|
+
)
|
|
190
|
+
|
|
191
|
+
logger.debug(f"Added observation: fitness={fitness:.4f}")
|
|
192
|
+
|
|
193
|
+
# Mark RF as needing refit
|
|
194
|
+
self._rf_fitted = False
|
|
195
|
+
|
|
196
|
+
self.generation += 1
|
|
197
|
+
|
|
198
|
+
def _fit_rf(self) -> None:
|
|
199
|
+
"""
|
|
200
|
+
Fit Random Forest on observed data.
|
|
201
|
+
|
|
202
|
+
Trains the Random Forest surrogate on all (X, y) observations.
|
|
203
|
+
"""
|
|
204
|
+
if self._rf_fitted and len(self.X_observed) > 0:
|
|
205
|
+
return # Already fitted with current data
|
|
206
|
+
|
|
207
|
+
if len(self.X_observed) == 0:
|
|
208
|
+
logger.warning("No observations to fit Random Forest")
|
|
209
|
+
return
|
|
210
|
+
|
|
211
|
+
X = np.array(self.X_observed)
|
|
212
|
+
y = np.array(self.y_observed)
|
|
213
|
+
|
|
214
|
+
logger.debug(f"Fitting Random Forest on {len(X)} observations")
|
|
215
|
+
|
|
216
|
+
try:
|
|
217
|
+
self.rf.fit(X, y)
|
|
218
|
+
self._rf_fitted = True
|
|
219
|
+
|
|
220
|
+
# Log forest statistics
|
|
221
|
+
if hasattr(self.rf, "estimators_"):
|
|
222
|
+
avg_nodes = np.mean([tree.tree_.node_count for tree in self.rf.estimators_])
|
|
223
|
+
logger.debug(
|
|
224
|
+
f"RF fitted: {len(self.rf.estimators_)} trees, "
|
|
225
|
+
f"avg {avg_nodes:.0f} nodes per tree"
|
|
226
|
+
)
|
|
227
|
+
|
|
228
|
+
except Exception as e:
|
|
229
|
+
logger.error(f"Random Forest fitting failed: {e}")
|
|
230
|
+
raise
|
|
231
|
+
|
|
232
|
+
def _optimize_acquisition(self) -> np.ndarray:
|
|
233
|
+
"""
|
|
234
|
+
Find architecture encoding that maximizes acquisition function.
|
|
235
|
+
|
|
236
|
+
Returns:
|
|
237
|
+
Optimal architecture encoding
|
|
238
|
+
"""
|
|
239
|
+
# Get current best fitness
|
|
240
|
+
f_best = max(self.y_observed)
|
|
241
|
+
|
|
242
|
+
# Create acquisition function
|
|
243
|
+
def acquisition_fn(x: np.ndarray) -> np.ndarray:
|
|
244
|
+
"""
|
|
245
|
+
Evaluate acquisition at given point(s).
|
|
246
|
+
|
|
247
|
+
For Random Forest, uncertainty is estimated from variance
|
|
248
|
+
across trees in the ensemble.
|
|
249
|
+
|
|
250
|
+
Args:
|
|
251
|
+
x: Architecture encoding(s), shape (n_samples, n_features)
|
|
252
|
+
|
|
253
|
+
Returns:
|
|
254
|
+
Acquisition values, shape (n_samples,)
|
|
255
|
+
"""
|
|
256
|
+
# Ensure 2D
|
|
257
|
+
if x.ndim == 1:
|
|
258
|
+
x = x.reshape(1, -1)
|
|
259
|
+
|
|
260
|
+
# Get RF predictions
|
|
261
|
+
mu, sigma = self._predict_with_uncertainty(x)
|
|
262
|
+
|
|
263
|
+
# Compute acquisition values (EI)
|
|
264
|
+
acq_values = expected_improvement(mu, sigma, f_best, self.xi)
|
|
265
|
+
|
|
266
|
+
return acq_values
|
|
267
|
+
|
|
268
|
+
# Get bounds for optimization
|
|
269
|
+
bounds = self._get_encoding_bounds()
|
|
270
|
+
|
|
271
|
+
# Optimize acquisition
|
|
272
|
+
try:
|
|
273
|
+
x_next = self.acq_optimizer.optimize(
|
|
274
|
+
acquisition_fn=acquisition_fn, bounds=bounds, n_candidates=1
|
|
275
|
+
)
|
|
276
|
+
|
|
277
|
+
# Extract single candidate
|
|
278
|
+
if x_next.ndim == 2:
|
|
279
|
+
x_next = x_next[0]
|
|
280
|
+
|
|
281
|
+
# Log acquisition value
|
|
282
|
+
acq_value = acquisition_fn(x_next.reshape(1, -1))[0]
|
|
283
|
+
logger.debug(f"Acquisition optimum: {acq_value:.6f}")
|
|
284
|
+
|
|
285
|
+
return x_next
|
|
286
|
+
|
|
287
|
+
except Exception as e:
|
|
288
|
+
logger.warning(f"Acquisition optimization failed: {e}. Using random sample.")
|
|
289
|
+
# Fallback to random sample
|
|
290
|
+
return np.array([np.random.uniform(low, high) for low, high in bounds])
|
|
291
|
+
|
|
292
|
+
def _predict_with_uncertainty(self, X: np.ndarray) -> Tuple[np.ndarray, np.ndarray]:
|
|
293
|
+
"""
|
|
294
|
+
Predict mean and uncertainty using Random Forest.
|
|
295
|
+
|
|
296
|
+
Uncertainty is estimated as the standard deviation of predictions
|
|
297
|
+
across all trees in the forest.
|
|
298
|
+
|
|
299
|
+
Args:
|
|
300
|
+
X: Input points, shape (n_samples, n_features)
|
|
301
|
+
|
|
302
|
+
Returns:
|
|
303
|
+
(means, stds) tuple of shape (n_samples,)
|
|
304
|
+
"""
|
|
305
|
+
# Get predictions from all trees
|
|
306
|
+
predictions = np.array([tree.predict(X) for tree in self.rf.estimators_])
|
|
307
|
+
|
|
308
|
+
# Mean across trees
|
|
309
|
+
mu = np.mean(predictions, axis=0)
|
|
310
|
+
|
|
311
|
+
# Standard deviation across trees
|
|
312
|
+
sigma = np.std(predictions, axis=0)
|
|
313
|
+
|
|
314
|
+
# Add small constant to avoid zero uncertainty
|
|
315
|
+
sigma = sigma + 1e-6
|
|
316
|
+
|
|
317
|
+
return mu, sigma
|
|
318
|
+
|
|
319
|
+
def predict(self, graphs: List[ModelGraph], return_std: bool = False) -> np.ndarray:
|
|
320
|
+
"""
|
|
321
|
+
Predict fitness for given architectures using Random Forest.
|
|
322
|
+
|
|
323
|
+
Args:
|
|
324
|
+
graphs: List of ModelGraph instances
|
|
325
|
+
return_std: If True, return (mean, std), else just mean
|
|
326
|
+
|
|
327
|
+
Returns:
|
|
328
|
+
Predicted fitness values (and optionally standard deviations)
|
|
329
|
+
|
|
330
|
+
Example:
|
|
331
|
+
>>> graphs = [space.sample() for _ in range(5)]
|
|
332
|
+
>>> predictions = optimizer.predict(graphs, return_std=True)
|
|
333
|
+
>>> means, stds = predictions
|
|
334
|
+
"""
|
|
335
|
+
# Ensure RF is fitted
|
|
336
|
+
if not self._rf_fitted:
|
|
337
|
+
self._fit_rf()
|
|
338
|
+
|
|
339
|
+
# Encode architectures
|
|
340
|
+
X = np.array([self._encode_architecture(g) for g in graphs])
|
|
341
|
+
|
|
342
|
+
# Predict
|
|
343
|
+
if return_std:
|
|
344
|
+
mu, sigma = self._predict_with_uncertainty(X)
|
|
345
|
+
return mu, sigma
|
|
346
|
+
else:
|
|
347
|
+
mu = self.rf.predict(X)
|
|
348
|
+
return mu
|
|
349
|
+
|
|
350
|
+
def get_feature_importances(self) -> np.ndarray:
|
|
351
|
+
"""
|
|
352
|
+
Get feature importances from Random Forest.
|
|
353
|
+
|
|
354
|
+
Useful for understanding which architecture components matter most.
|
|
355
|
+
|
|
356
|
+
Returns:
|
|
357
|
+
Feature importance array
|
|
358
|
+
"""
|
|
359
|
+
if not self._rf_fitted:
|
|
360
|
+
logger.warning("RF not fitted, no feature importances available")
|
|
361
|
+
return np.array([])
|
|
362
|
+
|
|
363
|
+
return self.rf.feature_importances_
|
|
364
|
+
|
|
365
|
+
def get_rf_statistics(self) -> Dict[str, Any]:
|
|
366
|
+
"""
|
|
367
|
+
Get statistics about the fitted Random Forest.
|
|
368
|
+
|
|
369
|
+
Returns:
|
|
370
|
+
Dictionary with RF information
|
|
371
|
+
"""
|
|
372
|
+
if not self._rf_fitted:
|
|
373
|
+
return {"fitted": False}
|
|
374
|
+
|
|
375
|
+
# Tree statistics
|
|
376
|
+
tree_depths = [tree.tree_.max_depth for tree in self.rf.estimators_]
|
|
377
|
+
tree_nodes = [tree.tree_.node_count for tree in self.rf.estimators_]
|
|
378
|
+
|
|
379
|
+
return {
|
|
380
|
+
"fitted": True,
|
|
381
|
+
"n_observations": len(self.X_observed),
|
|
382
|
+
"n_estimators": len(self.rf.estimators_),
|
|
383
|
+
"avg_tree_depth": np.mean(tree_depths),
|
|
384
|
+
"max_tree_depth": max(tree_depths),
|
|
385
|
+
"avg_tree_nodes": np.mean(tree_nodes),
|
|
386
|
+
"best_observed": max(self.y_observed) if self.y_observed else None,
|
|
387
|
+
"mean_observed": np.mean(self.y_observed) if self.y_observed else None,
|
|
388
|
+
"std_observed": np.std(self.y_observed) if self.y_observed else None,
|
|
389
|
+
}
|
|
390
|
+
|
|
391
|
+
def plot_convergence(self, save_path: Optional[str] = None) -> None:
|
|
392
|
+
"""
|
|
393
|
+
Plot optimization convergence.
|
|
394
|
+
|
|
395
|
+
Args:
|
|
396
|
+
save_path: Optional path to save plot
|
|
397
|
+
"""
|
|
398
|
+
try:
|
|
399
|
+
import matplotlib.pyplot as plt
|
|
400
|
+
except ImportError:
|
|
401
|
+
logger.warning("matplotlib not installed, cannot plot")
|
|
402
|
+
return
|
|
403
|
+
|
|
404
|
+
if not self.y_observed:
|
|
405
|
+
logger.warning("No observations to plot")
|
|
406
|
+
return
|
|
407
|
+
|
|
408
|
+
# Compute best-so-far
|
|
409
|
+
best_so_far = []
|
|
410
|
+
current_best = -np.inf
|
|
411
|
+
for y in self.y_observed:
|
|
412
|
+
if y > current_best:
|
|
413
|
+
current_best = y
|
|
414
|
+
best_so_far.append(current_best)
|
|
415
|
+
|
|
416
|
+
# Plot
|
|
417
|
+
plt.figure(figsize=(10, 6))
|
|
418
|
+
plt.plot(best_so_far, "b-", linewidth=2, label="Best fitness")
|
|
419
|
+
plt.plot(self.y_observed, "ro", alpha=0.3, markersize=4, label="Observations")
|
|
420
|
+
plt.xlabel("Iteration", fontsize=12)
|
|
421
|
+
plt.ylabel("Fitness", fontsize=12)
|
|
422
|
+
plt.title("SMAC Optimization Convergence", fontsize=14)
|
|
423
|
+
plt.legend()
|
|
424
|
+
plt.grid(True, alpha=0.3)
|
|
425
|
+
|
|
426
|
+
if save_path:
|
|
427
|
+
plt.savefig(save_path, dpi=300, bbox_inches="tight")
|
|
428
|
+
logger.info(f"Convergence plot saved to {save_path}")
|
|
429
|
+
else:
|
|
430
|
+
plt.show()
|
|
431
|
+
|
|
432
|
+
plt.close()
|
|
433
|
+
|
|
434
|
+
def plot_feature_importance(self, top_k: int = 20, save_path: Optional[str] = None) -> None:
|
|
435
|
+
"""
|
|
436
|
+
Plot feature importances from Random Forest.
|
|
437
|
+
|
|
438
|
+
Args:
|
|
439
|
+
top_k: Number of top features to show
|
|
440
|
+
save_path: Optional path to save plot
|
|
441
|
+
"""
|
|
442
|
+
try:
|
|
443
|
+
import matplotlib.pyplot as plt
|
|
444
|
+
except ImportError:
|
|
445
|
+
logger.warning("matplotlib not installed, cannot plot")
|
|
446
|
+
return
|
|
447
|
+
|
|
448
|
+
importances = self.get_feature_importances()
|
|
449
|
+
|
|
450
|
+
if len(importances) == 0:
|
|
451
|
+
logger.warning("No feature importances available")
|
|
452
|
+
return
|
|
453
|
+
|
|
454
|
+
# Get top-k features
|
|
455
|
+
indices = np.argsort(importances)[-top_k:][::-1]
|
|
456
|
+
values = importances[indices]
|
|
457
|
+
|
|
458
|
+
# Plot
|
|
459
|
+
plt.figure(figsize=(10, 6))
|
|
460
|
+
plt.barh(range(len(indices)), values)
|
|
461
|
+
plt.yticks(range(len(indices)), [f"Feature {i}" for i in indices])
|
|
462
|
+
plt.xlabel("Importance", fontsize=12)
|
|
463
|
+
plt.title(f"Top {top_k} Feature Importances (Random Forest)", fontsize=14)
|
|
464
|
+
plt.grid(True, alpha=0.3, axis="x")
|
|
465
|
+
|
|
466
|
+
if save_path:
|
|
467
|
+
plt.savefig(save_path, dpi=300, bbox_inches="tight")
|
|
468
|
+
logger.info(f"Feature importance plot saved to {save_path}")
|
|
469
|
+
else:
|
|
470
|
+
plt.show()
|
|
471
|
+
|
|
472
|
+
plt.close()
|
|
473
|
+
|
|
474
|
+
def __repr__(self) -> str:
|
|
475
|
+
"""String representation."""
|
|
476
|
+
return (
|
|
477
|
+
f"SMACOptimizer("
|
|
478
|
+
f"n_estimators={self.n_estimators}, "
|
|
479
|
+
f"max_depth={self.max_depth}, "
|
|
480
|
+
f"n_obs={len(self.y_observed)})"
|
|
481
|
+
)
|
|
482
|
+
|
|
483
|
+
|
|
484
|
+
# Convenience function for quick SMAC optimization
|
|
485
|
+
def optimize_with_smac(
|
|
486
|
+
search_space: SearchSpace,
|
|
487
|
+
evaluator: Any,
|
|
488
|
+
n_iterations: int = 100,
|
|
489
|
+
n_initial: int = 15,
|
|
490
|
+
n_estimators: int = 50,
|
|
491
|
+
verbose: bool = True,
|
|
492
|
+
) -> Individual:
|
|
493
|
+
"""
|
|
494
|
+
Quick SMAC optimization with sensible defaults.
|
|
495
|
+
|
|
496
|
+
Args:
|
|
497
|
+
search_space: SearchSpace to optimize over
|
|
498
|
+
evaluator: Fitness evaluation function
|
|
499
|
+
n_iterations: Total number of evaluations
|
|
500
|
+
n_initial: Random samples before SMAC
|
|
501
|
+
n_estimators: Number of trees in Random Forest
|
|
502
|
+
verbose: Print progress
|
|
503
|
+
|
|
504
|
+
Returns:
|
|
505
|
+
Best Individual found
|
|
506
|
+
|
|
507
|
+
Example:
|
|
508
|
+
>>> from morphml.core.dsl import create_cnn_space
|
|
509
|
+
>>> space = create_cnn_space(num_classes=10)
|
|
510
|
+
>>> best = optimize_with_smac(
|
|
511
|
+
... search_space=space,
|
|
512
|
+
... evaluator=my_evaluator,
|
|
513
|
+
... n_iterations=100,
|
|
514
|
+
... n_estimators=50
|
|
515
|
+
... )
|
|
516
|
+
"""
|
|
517
|
+
optimizer = SMACOptimizer(
|
|
518
|
+
search_space=search_space,
|
|
519
|
+
config={
|
|
520
|
+
"n_initial_points": n_initial,
|
|
521
|
+
"n_estimators": n_estimators,
|
|
522
|
+
"max_iterations": n_iterations,
|
|
523
|
+
},
|
|
524
|
+
)
|
|
525
|
+
|
|
526
|
+
def callback(iteration: int, best: Individual, history: List) -> None:
|
|
527
|
+
if verbose and iteration % 10 == 0:
|
|
528
|
+
print(f"Iteration {iteration}: Best fitness = {best.fitness:.4f}")
|
|
529
|
+
|
|
530
|
+
best = optimizer.optimize(
|
|
531
|
+
evaluator=evaluator, max_evaluations=n_iterations, callback=callback if verbose else None
|
|
532
|
+
)
|
|
533
|
+
|
|
534
|
+
return best
|