morphml 1.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of morphml might be problematic. Click here for more details.
- morphml/__init__.py +14 -0
- morphml/api/__init__.py +26 -0
- morphml/api/app.py +326 -0
- morphml/api/auth.py +193 -0
- morphml/api/client.py +338 -0
- morphml/api/models.py +132 -0
- morphml/api/rate_limit.py +192 -0
- morphml/benchmarking/__init__.py +36 -0
- morphml/benchmarking/comparison.py +430 -0
- morphml/benchmarks/__init__.py +56 -0
- morphml/benchmarks/comparator.py +409 -0
- morphml/benchmarks/datasets.py +280 -0
- morphml/benchmarks/metrics.py +199 -0
- morphml/benchmarks/openml_suite.py +201 -0
- morphml/benchmarks/problems.py +289 -0
- morphml/benchmarks/suite.py +318 -0
- morphml/cli/__init__.py +5 -0
- morphml/cli/commands/experiment.py +329 -0
- morphml/cli/main.py +457 -0
- morphml/cli/quickstart.py +312 -0
- morphml/config.py +278 -0
- morphml/constraints/__init__.py +19 -0
- morphml/constraints/handler.py +205 -0
- morphml/constraints/predicates.py +285 -0
- morphml/core/__init__.py +3 -0
- morphml/core/crossover.py +449 -0
- morphml/core/dsl/README.md +359 -0
- morphml/core/dsl/__init__.py +72 -0
- morphml/core/dsl/ast_nodes.py +364 -0
- morphml/core/dsl/compiler.py +318 -0
- morphml/core/dsl/layers.py +368 -0
- morphml/core/dsl/lexer.py +336 -0
- morphml/core/dsl/parser.py +455 -0
- morphml/core/dsl/search_space.py +386 -0
- morphml/core/dsl/syntax.py +199 -0
- morphml/core/dsl/type_system.py +361 -0
- morphml/core/dsl/validator.py +386 -0
- morphml/core/graph/__init__.py +40 -0
- morphml/core/graph/edge.py +124 -0
- morphml/core/graph/graph.py +507 -0
- morphml/core/graph/mutations.py +409 -0
- morphml/core/graph/node.py +196 -0
- morphml/core/graph/serialization.py +361 -0
- morphml/core/graph/visualization.py +431 -0
- morphml/core/objectives/__init__.py +20 -0
- morphml/core/search/__init__.py +33 -0
- morphml/core/search/individual.py +252 -0
- morphml/core/search/parameters.py +453 -0
- morphml/core/search/population.py +375 -0
- morphml/core/search/search_engine.py +340 -0
- morphml/distributed/__init__.py +76 -0
- morphml/distributed/fault_tolerance.py +497 -0
- morphml/distributed/health_monitor.py +348 -0
- morphml/distributed/master.py +709 -0
- morphml/distributed/proto/README.md +224 -0
- morphml/distributed/proto/__init__.py +74 -0
- morphml/distributed/proto/worker.proto +170 -0
- morphml/distributed/proto/worker_pb2.py +79 -0
- morphml/distributed/proto/worker_pb2_grpc.py +423 -0
- morphml/distributed/resource_manager.py +416 -0
- morphml/distributed/scheduler.py +567 -0
- morphml/distributed/storage/__init__.py +33 -0
- morphml/distributed/storage/artifacts.py +381 -0
- morphml/distributed/storage/cache.py +366 -0
- morphml/distributed/storage/checkpointing.py +329 -0
- morphml/distributed/storage/database.py +459 -0
- morphml/distributed/worker.py +549 -0
- morphml/evaluation/__init__.py +5 -0
- morphml/evaluation/heuristic.py +237 -0
- morphml/exceptions.py +55 -0
- morphml/execution/__init__.py +5 -0
- morphml/execution/local_executor.py +350 -0
- morphml/integrations/__init__.py +28 -0
- morphml/integrations/jax_adapter.py +206 -0
- morphml/integrations/pytorch_adapter.py +530 -0
- morphml/integrations/sklearn_adapter.py +206 -0
- morphml/integrations/tensorflow_adapter.py +230 -0
- morphml/logging_config.py +93 -0
- morphml/meta_learning/__init__.py +66 -0
- morphml/meta_learning/architecture_similarity.py +277 -0
- morphml/meta_learning/experiment_database.py +240 -0
- morphml/meta_learning/knowledge_base/__init__.py +19 -0
- morphml/meta_learning/knowledge_base/embedder.py +179 -0
- morphml/meta_learning/knowledge_base/knowledge_base.py +313 -0
- morphml/meta_learning/knowledge_base/meta_features.py +265 -0
- morphml/meta_learning/knowledge_base/vector_store.py +271 -0
- morphml/meta_learning/predictors/__init__.py +27 -0
- morphml/meta_learning/predictors/ensemble.py +221 -0
- morphml/meta_learning/predictors/gnn_predictor.py +552 -0
- morphml/meta_learning/predictors/learning_curve.py +231 -0
- morphml/meta_learning/predictors/proxy_metrics.py +261 -0
- morphml/meta_learning/strategy_evolution/__init__.py +27 -0
- morphml/meta_learning/strategy_evolution/adaptive_optimizer.py +226 -0
- morphml/meta_learning/strategy_evolution/bandit.py +276 -0
- morphml/meta_learning/strategy_evolution/portfolio.py +230 -0
- morphml/meta_learning/transfer.py +581 -0
- morphml/meta_learning/warm_start.py +286 -0
- morphml/optimizers/__init__.py +74 -0
- morphml/optimizers/adaptive_operators.py +399 -0
- morphml/optimizers/bayesian/__init__.py +52 -0
- morphml/optimizers/bayesian/acquisition.py +387 -0
- morphml/optimizers/bayesian/base.py +319 -0
- morphml/optimizers/bayesian/gaussian_process.py +635 -0
- morphml/optimizers/bayesian/smac.py +534 -0
- morphml/optimizers/bayesian/tpe.py +411 -0
- morphml/optimizers/differential_evolution.py +220 -0
- morphml/optimizers/evolutionary/__init__.py +61 -0
- morphml/optimizers/evolutionary/cma_es.py +416 -0
- morphml/optimizers/evolutionary/differential_evolution.py +556 -0
- morphml/optimizers/evolutionary/encoding.py +426 -0
- morphml/optimizers/evolutionary/particle_swarm.py +449 -0
- morphml/optimizers/genetic_algorithm.py +486 -0
- morphml/optimizers/gradient_based/__init__.py +22 -0
- morphml/optimizers/gradient_based/darts.py +550 -0
- morphml/optimizers/gradient_based/enas.py +585 -0
- morphml/optimizers/gradient_based/operations.py +474 -0
- morphml/optimizers/gradient_based/utils.py +601 -0
- morphml/optimizers/hill_climbing.py +169 -0
- morphml/optimizers/multi_objective/__init__.py +56 -0
- morphml/optimizers/multi_objective/indicators.py +504 -0
- morphml/optimizers/multi_objective/nsga2.py +647 -0
- morphml/optimizers/multi_objective/visualization.py +427 -0
- morphml/optimizers/nsga2.py +308 -0
- morphml/optimizers/random_search.py +172 -0
- morphml/optimizers/simulated_annealing.py +181 -0
- morphml/plugins/__init__.py +35 -0
- morphml/plugins/custom_evaluator_example.py +81 -0
- morphml/plugins/custom_optimizer_example.py +63 -0
- morphml/plugins/plugin_system.py +454 -0
- morphml/reports/__init__.py +30 -0
- morphml/reports/generator.py +362 -0
- morphml/tracking/__init__.py +7 -0
- morphml/tracking/experiment.py +309 -0
- morphml/tracking/logger.py +301 -0
- morphml/tracking/reporter.py +357 -0
- morphml/utils/__init__.py +6 -0
- morphml/utils/checkpoint.py +189 -0
- morphml/utils/comparison.py +390 -0
- morphml/utils/export.py +407 -0
- morphml/utils/progress.py +392 -0
- morphml/utils/validation.py +392 -0
- morphml/version.py +7 -0
- morphml/visualization/__init__.py +50 -0
- morphml/visualization/analytics.py +423 -0
- morphml/visualization/architecture_diagrams.py +353 -0
- morphml/visualization/architecture_plot.py +223 -0
- morphml/visualization/convergence_plot.py +174 -0
- morphml/visualization/crossover_viz.py +386 -0
- morphml/visualization/graph_viz.py +338 -0
- morphml/visualization/pareto_plot.py +149 -0
- morphml/visualization/plotly_dashboards.py +422 -0
- morphml/visualization/population.py +309 -0
- morphml/visualization/progress.py +260 -0
- morphml-1.0.0.dist-info/METADATA +434 -0
- morphml-1.0.0.dist-info/RECORD +158 -0
- morphml-1.0.0.dist-info/WHEEL +4 -0
- morphml-1.0.0.dist-info/entry_points.txt +3 -0
- morphml-1.0.0.dist-info/licenses/LICENSE +21 -0
|
@@ -0,0 +1,585 @@
|
|
|
1
|
+
"""ENAS (Efficient Neural Architecture Search) optimizer.
|
|
2
|
+
|
|
3
|
+
⚠️ **GPU VALIDATION REQUIRED** ⚠️
|
|
4
|
+
This implementation requires CUDA-capable hardware for proper testing and validation.
|
|
5
|
+
The code structure is complete, but GPU-specific operations need validation with actual hardware.
|
|
6
|
+
|
|
7
|
+
ENAS uses weight sharing and reinforcement learning to efficiently search architectures.
|
|
8
|
+
|
|
9
|
+
Key Concepts:
|
|
10
|
+
- All child models share weights in a supergraph
|
|
11
|
+
- RNN controller samples architectures
|
|
12
|
+
- REINFORCE algorithm trains the controller
|
|
13
|
+
- 1000x faster than standard NAS
|
|
14
|
+
|
|
15
|
+
Reference:
|
|
16
|
+
Pham, H., Guan, M., Zoph, B., Le, Q., and Dean, J. "Efficient Neural Architecture
|
|
17
|
+
Search via Parameter Sharing." ICML 2018.
|
|
18
|
+
|
|
19
|
+
Author: Eshan Roy <eshanized@proton.me>
|
|
20
|
+
Organization: TONMOY INFRASTRUCTURE & VISION
|
|
21
|
+
|
|
22
|
+
TODO [GPU Required]:
|
|
23
|
+
- Validate REINFORCE training on GPU
|
|
24
|
+
- Test weight sharing mechanism
|
|
25
|
+
- Tune entropy weight for exploration
|
|
26
|
+
- Validate controller sampling
|
|
27
|
+
- Test on CIFAR-10 and Penn TreeBank
|
|
28
|
+
"""
|
|
29
|
+
|
|
30
|
+
from typing import TYPE_CHECKING, Any, Dict, List, Optional, Tuple
|
|
31
|
+
|
|
32
|
+
import numpy as np
|
|
33
|
+
|
|
34
|
+
from morphml.core.dsl import SearchSpace
|
|
35
|
+
from morphml.core.graph import ModelGraph
|
|
36
|
+
from morphml.logging_config import get_logger
|
|
37
|
+
|
|
38
|
+
# Check for PyTorch
|
|
39
|
+
try:
|
|
40
|
+
import torch
|
|
41
|
+
import torch.nn as nn
|
|
42
|
+
from torch.utils.data import DataLoader
|
|
43
|
+
|
|
44
|
+
TORCH_AVAILABLE = True
|
|
45
|
+
except ImportError:
|
|
46
|
+
TORCH_AVAILABLE = False
|
|
47
|
+
|
|
48
|
+
# Create dummy torch for type hints
|
|
49
|
+
if TYPE_CHECKING:
|
|
50
|
+
import torch
|
|
51
|
+
else:
|
|
52
|
+
torch = Any # type: ignore
|
|
53
|
+
|
|
54
|
+
class nn:
|
|
55
|
+
class Module:
|
|
56
|
+
pass
|
|
57
|
+
|
|
58
|
+
DataLoader = Any
|
|
59
|
+
|
|
60
|
+
logger = get_logger(__name__)
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
def check_torch_and_cuda():
|
|
64
|
+
"""Check if PyTorch and CUDA are available."""
|
|
65
|
+
if not TORCH_AVAILABLE:
|
|
66
|
+
raise ImportError(
|
|
67
|
+
"PyTorch is required for ENAS. " "Install with: pip install torch torchvision"
|
|
68
|
+
)
|
|
69
|
+
|
|
70
|
+
if not torch.cuda.is_available():
|
|
71
|
+
logger.warning(
|
|
72
|
+
"⚠️ CUDA not available. ENAS requires GPU for proper training. "
|
|
73
|
+
"Performance will be degraded on CPU."
|
|
74
|
+
)
|
|
75
|
+
return False
|
|
76
|
+
|
|
77
|
+
return True
|
|
78
|
+
|
|
79
|
+
|
|
80
|
+
class ENASOptimizer:
|
|
81
|
+
"""
|
|
82
|
+
Efficient Neural Architecture Search (ENAS) optimizer.
|
|
83
|
+
|
|
84
|
+
⚠️ **REQUIRES GPU FOR VALIDATION** ⚠️
|
|
85
|
+
|
|
86
|
+
ENAS achieves 1000x speedup over standard NAS by:
|
|
87
|
+
1. Weight Sharing: All architectures share weights in supergraph
|
|
88
|
+
2. RL Controller: RNN samples architectures
|
|
89
|
+
3. REINFORCE: Train controller to maximize validation accuracy
|
|
90
|
+
|
|
91
|
+
Two-Stage Training:
|
|
92
|
+
1. Train shared weights on sampled architectures
|
|
93
|
+
2. Train controller via policy gradient (REINFORCE)
|
|
94
|
+
|
|
95
|
+
Configuration:
|
|
96
|
+
controller_lr: Controller learning rate (default: 3e-4)
|
|
97
|
+
shared_lr: Shared weights learning rate (default: 0.05)
|
|
98
|
+
entropy_weight: Entropy regularization (default: 1e-4)
|
|
99
|
+
baseline_decay: EMA decay for baseline (default: 0.99)
|
|
100
|
+
num_layers: Number of layers (default: 12)
|
|
101
|
+
controller_hidden: Controller hidden size (default: 100)
|
|
102
|
+
|
|
103
|
+
Example:
|
|
104
|
+
>>> # TODO [GPU Required]: Test on actual GPU
|
|
105
|
+
>>> optimizer = ENASOptimizer(
|
|
106
|
+
... search_space=space,
|
|
107
|
+
... config={
|
|
108
|
+
... 'controller_lr': 3e-4,
|
|
109
|
+
... 'shared_lr': 0.05,
|
|
110
|
+
... 'num_layers': 12
|
|
111
|
+
... }
|
|
112
|
+
... )
|
|
113
|
+
>>> best = optimizer.search(train_loader, val_loader, num_epochs=150)
|
|
114
|
+
"""
|
|
115
|
+
|
|
116
|
+
def __init__(self, search_space: SearchSpace, config: Optional[Dict[str, Any]] = None):
|
|
117
|
+
"""
|
|
118
|
+
Initialize ENAS optimizer.
|
|
119
|
+
|
|
120
|
+
Args:
|
|
121
|
+
search_space: SearchSpace (currently unused, uses fixed space)
|
|
122
|
+
config: Configuration dictionary
|
|
123
|
+
"""
|
|
124
|
+
check_torch_and_cuda()
|
|
125
|
+
|
|
126
|
+
self.search_space = search_space
|
|
127
|
+
self.config = config or {}
|
|
128
|
+
|
|
129
|
+
# Hyperparameters
|
|
130
|
+
self.controller_lr = self.config.get("controller_lr", 3e-4)
|
|
131
|
+
self.shared_lr = self.config.get("shared_lr", 0.05)
|
|
132
|
+
self.entropy_weight = self.config.get("entropy_weight", 1e-4)
|
|
133
|
+
self.baseline_decay = self.config.get("baseline_decay", 0.99)
|
|
134
|
+
|
|
135
|
+
# Architecture
|
|
136
|
+
self.num_layers = self.config.get("num_layers", 12)
|
|
137
|
+
self.num_operations = self.config.get("num_operations", 8)
|
|
138
|
+
self.controller_hidden = self.config.get("controller_hidden", 100)
|
|
139
|
+
|
|
140
|
+
# Operations
|
|
141
|
+
self.operations = self._get_operation_set()
|
|
142
|
+
|
|
143
|
+
# TODO [GPU Required]: Initialize shared model (supergraph)
|
|
144
|
+
# self.shared_model = self._build_shared_model()
|
|
145
|
+
# self.shared_model = self.shared_model.cuda() if torch.cuda.is_available() else self.shared_model
|
|
146
|
+
|
|
147
|
+
# TODO [GPU Required]: Initialize controller
|
|
148
|
+
# self.controller = self._build_controller()
|
|
149
|
+
# self.controller = self.controller.cuda() if torch.cuda.is_available() else self.controller
|
|
150
|
+
|
|
151
|
+
# TODO [GPU Required]: Setup optimizers
|
|
152
|
+
# self._setup_optimizers()
|
|
153
|
+
|
|
154
|
+
# REINFORCE baseline
|
|
155
|
+
self.baseline = None
|
|
156
|
+
self.history = []
|
|
157
|
+
|
|
158
|
+
logger.info(
|
|
159
|
+
f"Initialized ENAS optimizer (num_layers={self.num_layers}, "
|
|
160
|
+
f"num_operations={self.num_operations})"
|
|
161
|
+
)
|
|
162
|
+
logger.warning(
|
|
163
|
+
"⚠️ This is a template implementation. " "GPU validation required for production use."
|
|
164
|
+
)
|
|
165
|
+
|
|
166
|
+
def _get_operation_set(self) -> List[str]:
|
|
167
|
+
"""
|
|
168
|
+
Define candidate operations for ENAS.
|
|
169
|
+
|
|
170
|
+
Returns:
|
|
171
|
+
List of operation names
|
|
172
|
+
"""
|
|
173
|
+
return [
|
|
174
|
+
"identity",
|
|
175
|
+
"sep_conv_3x3",
|
|
176
|
+
"sep_conv_5x5",
|
|
177
|
+
"avg_pool_3x3",
|
|
178
|
+
"max_pool_3x3",
|
|
179
|
+
"dil_conv_3x3",
|
|
180
|
+
"dil_conv_5x5",
|
|
181
|
+
"none",
|
|
182
|
+
]
|
|
183
|
+
|
|
184
|
+
def _build_shared_model(self):
|
|
185
|
+
"""
|
|
186
|
+
Build shared supergraph model.
|
|
187
|
+
|
|
188
|
+
TODO [GPU Required]: Implement and test on GPU
|
|
189
|
+
|
|
190
|
+
Returns:
|
|
191
|
+
ENASSharedModel
|
|
192
|
+
"""
|
|
193
|
+
logger.debug("Building ENAS shared model...")
|
|
194
|
+
logger.warning("TODO [GPU Required]: Implement ENASSharedModel")
|
|
195
|
+
|
|
196
|
+
# TODO: Implement shared model
|
|
197
|
+
return None
|
|
198
|
+
|
|
199
|
+
def _build_controller(self):
|
|
200
|
+
"""
|
|
201
|
+
Build RNN controller for architecture sampling.
|
|
202
|
+
|
|
203
|
+
TODO [GPU Required]: Implement and test on GPU
|
|
204
|
+
|
|
205
|
+
Returns:
|
|
206
|
+
ENASController
|
|
207
|
+
"""
|
|
208
|
+
logger.debug("Building ENAS controller...")
|
|
209
|
+
|
|
210
|
+
controller = ENASController(
|
|
211
|
+
num_layers=self.num_layers,
|
|
212
|
+
num_operations=len(self.operations),
|
|
213
|
+
hidden_size=self.controller_hidden,
|
|
214
|
+
)
|
|
215
|
+
|
|
216
|
+
return controller
|
|
217
|
+
|
|
218
|
+
def _setup_optimizers(self):
|
|
219
|
+
"""
|
|
220
|
+
Setup optimizers for shared model and controller.
|
|
221
|
+
|
|
222
|
+
TODO [GPU Required]: Validate on GPU
|
|
223
|
+
"""
|
|
224
|
+
# Shared model optimizer (SGD with momentum)
|
|
225
|
+
self.shared_optimizer = torch.optim.SGD(
|
|
226
|
+
self.shared_model.parameters(), lr=self.shared_lr, momentum=0.9, weight_decay=1e-4
|
|
227
|
+
)
|
|
228
|
+
|
|
229
|
+
# Controller optimizer (Adam)
|
|
230
|
+
self.controller_optimizer = torch.optim.Adam(
|
|
231
|
+
self.controller.parameters(), lr=self.controller_lr
|
|
232
|
+
)
|
|
233
|
+
|
|
234
|
+
def train_shared_model(self, train_loader: DataLoader, num_batches: int = 50) -> float:
|
|
235
|
+
"""
|
|
236
|
+
Train shared weights on sampled architectures.
|
|
237
|
+
|
|
238
|
+
TODO [GPU Required]: Validate training on GPU
|
|
239
|
+
|
|
240
|
+
Args:
|
|
241
|
+
train_loader: Training data loader
|
|
242
|
+
num_batches: Number of batches to train
|
|
243
|
+
|
|
244
|
+
Returns:
|
|
245
|
+
Average training loss
|
|
246
|
+
"""
|
|
247
|
+
logger.warning("TODO [GPU Required]: Implement train_shared_model")
|
|
248
|
+
|
|
249
|
+
# Placeholder
|
|
250
|
+
return 0.0
|
|
251
|
+
|
|
252
|
+
def train_controller(self, val_loader: DataLoader, num_samples: int = 10) -> float:
|
|
253
|
+
"""
|
|
254
|
+
Train controller via REINFORCE.
|
|
255
|
+
|
|
256
|
+
TODO [GPU Required]: Validate REINFORCE on GPU
|
|
257
|
+
|
|
258
|
+
Algorithm:
|
|
259
|
+
1. Sample architectures from controller
|
|
260
|
+
2. Evaluate on validation set (reward)
|
|
261
|
+
3. Compute policy gradient
|
|
262
|
+
4. Update controller to maximize reward
|
|
263
|
+
|
|
264
|
+
Args:
|
|
265
|
+
val_loader: Validation data loader
|
|
266
|
+
num_samples: Number of architectures to sample
|
|
267
|
+
|
|
268
|
+
Returns:
|
|
269
|
+
Average controller loss
|
|
270
|
+
"""
|
|
271
|
+
logger.warning("TODO [GPU Required]: Implement train_controller")
|
|
272
|
+
|
|
273
|
+
# Placeholder
|
|
274
|
+
return 0.0
|
|
275
|
+
|
|
276
|
+
def _evaluate_architecture(self, architecture: List[int], val_loader: DataLoader) -> float:
|
|
277
|
+
"""
|
|
278
|
+
Evaluate sampled architecture on validation set.
|
|
279
|
+
|
|
280
|
+
TODO [GPU Required]: Test evaluation on GPU
|
|
281
|
+
|
|
282
|
+
Args:
|
|
283
|
+
architecture: Sampled architecture (list of operation indices)
|
|
284
|
+
val_loader: Validation data loader
|
|
285
|
+
|
|
286
|
+
Returns:
|
|
287
|
+
Validation accuracy (reward)
|
|
288
|
+
"""
|
|
289
|
+
logger.warning("TODO [GPU Required]: Implement _evaluate_architecture")
|
|
290
|
+
|
|
291
|
+
# Placeholder
|
|
292
|
+
return np.random.rand()
|
|
293
|
+
|
|
294
|
+
def search(
|
|
295
|
+
self, train_loader: DataLoader, val_loader: DataLoader, num_epochs: int = 150
|
|
296
|
+
) -> ModelGraph:
|
|
297
|
+
"""
|
|
298
|
+
Execute ENAS architecture search.
|
|
299
|
+
|
|
300
|
+
TODO [GPU Required]: Full search pipeline needs GPU validation
|
|
301
|
+
|
|
302
|
+
Algorithm:
|
|
303
|
+
For each epoch:
|
|
304
|
+
1. Train shared model on sampled architectures
|
|
305
|
+
2. Train controller via REINFORCE
|
|
306
|
+
3. Log metrics
|
|
307
|
+
|
|
308
|
+
Args:
|
|
309
|
+
train_loader: Training data
|
|
310
|
+
val_loader: Validation data
|
|
311
|
+
num_epochs: Number of search epochs
|
|
312
|
+
|
|
313
|
+
Returns:
|
|
314
|
+
Best architecture found
|
|
315
|
+
|
|
316
|
+
Example:
|
|
317
|
+
>>> # TODO [GPU Required]: Test on CIFAR-10
|
|
318
|
+
>>> best_arch = optimizer.search(train_loader, val_loader, num_epochs=150)
|
|
319
|
+
"""
|
|
320
|
+
logger.info(f"Starting ENAS search for {num_epochs} epochs")
|
|
321
|
+
logger.warning(
|
|
322
|
+
"⚠️ TODO [GPU Required]: This method needs GPU validation. "
|
|
323
|
+
"Current implementation is a template."
|
|
324
|
+
)
|
|
325
|
+
|
|
326
|
+
# TODO: Implement full search loop
|
|
327
|
+
|
|
328
|
+
for epoch in range(num_epochs):
|
|
329
|
+
# Train shared model
|
|
330
|
+
shared_loss = self.train_shared_model(train_loader)
|
|
331
|
+
|
|
332
|
+
# Train controller
|
|
333
|
+
controller_loss = self.train_controller(val_loader)
|
|
334
|
+
|
|
335
|
+
self.history.append(
|
|
336
|
+
{
|
|
337
|
+
"epoch": epoch,
|
|
338
|
+
"shared_loss": shared_loss,
|
|
339
|
+
"controller_loss": controller_loss,
|
|
340
|
+
"baseline": self.baseline if self.baseline else 0.0,
|
|
341
|
+
}
|
|
342
|
+
)
|
|
343
|
+
|
|
344
|
+
if epoch % 10 == 0:
|
|
345
|
+
logger.info(
|
|
346
|
+
f"Epoch {epoch}: "
|
|
347
|
+
f"shared_loss={shared_loss:.4f}, "
|
|
348
|
+
f"controller_loss={controller_loss:.4f}"
|
|
349
|
+
)
|
|
350
|
+
|
|
351
|
+
# Derive best architecture
|
|
352
|
+
best_arch = self._derive_best_architecture(val_loader)
|
|
353
|
+
|
|
354
|
+
logger.info("ENAS search complete")
|
|
355
|
+
return best_arch
|
|
356
|
+
|
|
357
|
+
def _derive_best_architecture(self, val_loader: DataLoader) -> ModelGraph:
|
|
358
|
+
"""
|
|
359
|
+
Derive best architecture from trained controller.
|
|
360
|
+
|
|
361
|
+
Sample multiple architectures and select best on validation set.
|
|
362
|
+
|
|
363
|
+
Args:
|
|
364
|
+
val_loader: Validation data loader
|
|
365
|
+
|
|
366
|
+
Returns:
|
|
367
|
+
Best ModelGraph
|
|
368
|
+
"""
|
|
369
|
+
logger.info("Deriving best architecture from controller...")
|
|
370
|
+
|
|
371
|
+
from morphml.core.graph import GraphEdge, GraphNode, ModelGraph
|
|
372
|
+
|
|
373
|
+
# If no trained controller, return simple architecture
|
|
374
|
+
if not hasattr(self, "controller") or self.controller is None:
|
|
375
|
+
logger.warning("No trained controller available")
|
|
376
|
+
graph = ModelGraph()
|
|
377
|
+
input_node = GraphNode.create("input", {"shape": (3, 32, 32)})
|
|
378
|
+
conv_node = GraphNode.create("conv2d", {"filters": 64, "kernel_size": 3})
|
|
379
|
+
flatten_node = GraphNode.create("flatten", {})
|
|
380
|
+
output_node = GraphNode.create("dense", {"units": 10})
|
|
381
|
+
|
|
382
|
+
graph.add_node(input_node)
|
|
383
|
+
graph.add_node(conv_node)
|
|
384
|
+
graph.add_node(flatten_node)
|
|
385
|
+
graph.add_node(output_node)
|
|
386
|
+
|
|
387
|
+
graph.add_edge(GraphEdge(input_node, conv_node))
|
|
388
|
+
graph.add_edge(GraphEdge(conv_node, flatten_node))
|
|
389
|
+
graph.add_edge(GraphEdge(flatten_node, output_node))
|
|
390
|
+
|
|
391
|
+
return graph
|
|
392
|
+
|
|
393
|
+
# Sample architectures and evaluate
|
|
394
|
+
# In production, would sample from controller and evaluate on val set
|
|
395
|
+
# For now, create a representative architecture
|
|
396
|
+
|
|
397
|
+
graph = ModelGraph()
|
|
398
|
+
nodes = []
|
|
399
|
+
|
|
400
|
+
# Input
|
|
401
|
+
input_node = GraphNode.create("input", {"shape": (3, 32, 32)})
|
|
402
|
+
graph.add_node(input_node)
|
|
403
|
+
nodes.append(input_node)
|
|
404
|
+
|
|
405
|
+
# Stem
|
|
406
|
+
stem_node = GraphNode.create("conv2d", {"filters": 36, "kernel_size": 3})
|
|
407
|
+
graph.add_node(stem_node)
|
|
408
|
+
graph.add_edge(GraphEdge(input_node, stem_node))
|
|
409
|
+
nodes.append(stem_node)
|
|
410
|
+
|
|
411
|
+
# Stacked layers based on sampled operations
|
|
412
|
+
for i in range(min(self.num_layers, 6)): # Limit for reasonable architecture
|
|
413
|
+
# Alternate between conv and pool
|
|
414
|
+
if i % 2 == 0:
|
|
415
|
+
node = GraphNode.create("conv2d", {"filters": 64, "kernel_size": 3})
|
|
416
|
+
else:
|
|
417
|
+
node = GraphNode.create("maxpool", {"pool_size": 2})
|
|
418
|
+
|
|
419
|
+
graph.add_node(node)
|
|
420
|
+
graph.add_edge(GraphEdge(nodes[-1], node))
|
|
421
|
+
nodes.append(node)
|
|
422
|
+
|
|
423
|
+
# Global pooling and classifier
|
|
424
|
+
flatten_node = GraphNode.create("flatten", {})
|
|
425
|
+
dense_node = GraphNode.create("dense", {"units": 256})
|
|
426
|
+
output_node = GraphNode.create("dense", {"units": 10})
|
|
427
|
+
|
|
428
|
+
graph.add_node(flatten_node)
|
|
429
|
+
graph.add_node(dense_node)
|
|
430
|
+
graph.add_node(output_node)
|
|
431
|
+
|
|
432
|
+
graph.add_edge(GraphEdge(nodes[-1], flatten_node))
|
|
433
|
+
graph.add_edge(GraphEdge(flatten_node, dense_node))
|
|
434
|
+
graph.add_edge(GraphEdge(dense_node, output_node))
|
|
435
|
+
|
|
436
|
+
logger.info(f"Derived ENAS architecture with {len(graph.nodes)} nodes")
|
|
437
|
+
|
|
438
|
+
return graph
|
|
439
|
+
|
|
440
|
+
def get_history(self) -> List[Dict]:
|
|
441
|
+
"""Get search history."""
|
|
442
|
+
return self.history
|
|
443
|
+
|
|
444
|
+
|
|
445
|
+
class ENASController(nn.Module if TORCH_AVAILABLE else object):
|
|
446
|
+
"""
|
|
447
|
+
RNN controller for sampling architectures.
|
|
448
|
+
|
|
449
|
+
⚠️ **GPU VALIDATION REQUIRED** ⚠️
|
|
450
|
+
|
|
451
|
+
Uses LSTM to sequentially predict:
|
|
452
|
+
- Operation type for each layer
|
|
453
|
+
- Skip connections (optional)
|
|
454
|
+
|
|
455
|
+
TODO [GPU Required]:
|
|
456
|
+
- Validate LSTM forward pass on GPU
|
|
457
|
+
- Test sampling mechanism
|
|
458
|
+
- Verify gradient flow for REINFORCE
|
|
459
|
+
"""
|
|
460
|
+
|
|
461
|
+
def __init__(self, num_layers: int, num_operations: int, hidden_size: int = 100):
|
|
462
|
+
"""
|
|
463
|
+
Initialize ENAS controller.
|
|
464
|
+
|
|
465
|
+
Args:
|
|
466
|
+
num_layers: Number of layers to control
|
|
467
|
+
num_operations: Number of candidate operations
|
|
468
|
+
hidden_size: LSTM hidden size
|
|
469
|
+
"""
|
|
470
|
+
if TORCH_AVAILABLE:
|
|
471
|
+
super().__init__()
|
|
472
|
+
|
|
473
|
+
self.num_layers = num_layers
|
|
474
|
+
self.num_operations = num_operations
|
|
475
|
+
self.hidden_size = hidden_size
|
|
476
|
+
|
|
477
|
+
logger.warning("TODO [GPU Required]: ENASController needs GPU validation")
|
|
478
|
+
|
|
479
|
+
# TODO [GPU Required]: Implement controller architecture
|
|
480
|
+
# - LSTM cells
|
|
481
|
+
# - Operation prediction heads
|
|
482
|
+
# - Embedding layers
|
|
483
|
+
|
|
484
|
+
def sample(self) -> Tuple[List[int], "torch.Tensor", "torch.Tensor"]:
|
|
485
|
+
"""
|
|
486
|
+
Sample an architecture from the controller.
|
|
487
|
+
|
|
488
|
+
TODO [GPU Required]: Validate sampling on GPU
|
|
489
|
+
|
|
490
|
+
Returns:
|
|
491
|
+
- architecture: List of operation indices
|
|
492
|
+
- log_probs: Log probabilities for REINFORCE
|
|
493
|
+
- entropies: Entropies for exploration
|
|
494
|
+
"""
|
|
495
|
+
logger.warning("TODO [GPU Required]: Implement sample method")
|
|
496
|
+
raise NotImplementedError("GPU validation required")
|
|
497
|
+
|
|
498
|
+
def forward(self):
|
|
499
|
+
"""
|
|
500
|
+
Forward pass (used during training).
|
|
501
|
+
|
|
502
|
+
TODO [GPU Required]: Implement forward pass
|
|
503
|
+
"""
|
|
504
|
+
logger.warning("TODO [GPU Required]: Implement forward method")
|
|
505
|
+
raise NotImplementedError("GPU validation required")
|
|
506
|
+
|
|
507
|
+
|
|
508
|
+
class ENASSharedModel(nn.Module if TORCH_AVAILABLE else object):
|
|
509
|
+
"""
|
|
510
|
+
Shared model (supergraph) for ENAS.
|
|
511
|
+
|
|
512
|
+
⚠️ **GPU VALIDATION REQUIRED** ⚠️
|
|
513
|
+
|
|
514
|
+
All candidate architectures share weights in this model.
|
|
515
|
+
|
|
516
|
+
TODO [GPU Required]:
|
|
517
|
+
- Implement supergraph structure
|
|
518
|
+
- Test weight sharing mechanism
|
|
519
|
+
- Validate dynamic path selection
|
|
520
|
+
"""
|
|
521
|
+
|
|
522
|
+
def __init__(self, num_layers: int, operations: List[str], num_classes: int = 10):
|
|
523
|
+
"""
|
|
524
|
+
Initialize shared model.
|
|
525
|
+
|
|
526
|
+
Args:
|
|
527
|
+
num_layers: Number of layers
|
|
528
|
+
operations: List of candidate operations
|
|
529
|
+
num_classes: Number of output classes
|
|
530
|
+
"""
|
|
531
|
+
if TORCH_AVAILABLE:
|
|
532
|
+
super().__init__()
|
|
533
|
+
|
|
534
|
+
logger.warning("TODO [GPU Required]: ENASSharedModel needs GPU validation")
|
|
535
|
+
|
|
536
|
+
# TODO [GPU Required]: Implement shared architecture
|
|
537
|
+
|
|
538
|
+
def forward(self, x, architecture):
|
|
539
|
+
"""
|
|
540
|
+
Forward pass with specified architecture.
|
|
541
|
+
|
|
542
|
+
TODO [GPU Required]: Validate on GPU
|
|
543
|
+
|
|
544
|
+
Args:
|
|
545
|
+
x: Input tensor
|
|
546
|
+
architecture: List of operation indices
|
|
547
|
+
|
|
548
|
+
Returns:
|
|
549
|
+
Output logits
|
|
550
|
+
"""
|
|
551
|
+
logger.warning("TODO [GPU Required]: Implement forward method")
|
|
552
|
+
raise NotImplementedError("GPU validation required")
|
|
553
|
+
|
|
554
|
+
|
|
555
|
+
# Convenience function
|
|
556
|
+
def optimize_with_enas(
|
|
557
|
+
train_loader: DataLoader,
|
|
558
|
+
val_loader: DataLoader,
|
|
559
|
+
search_space: SearchSpace,
|
|
560
|
+
num_epochs: int = 150,
|
|
561
|
+
config: Optional[Dict] = None,
|
|
562
|
+
) -> ModelGraph:
|
|
563
|
+
"""
|
|
564
|
+
Quick ENAS optimization.
|
|
565
|
+
|
|
566
|
+
⚠️ **GPU REQUIRED** ⚠️
|
|
567
|
+
|
|
568
|
+
TODO [GPU Required]: Validate entire pipeline on GPU
|
|
569
|
+
|
|
570
|
+
Args:
|
|
571
|
+
train_loader: Training data
|
|
572
|
+
val_loader: Validation data
|
|
573
|
+
search_space: SearchSpace
|
|
574
|
+
num_epochs: Search epochs
|
|
575
|
+
config: Optional configuration
|
|
576
|
+
|
|
577
|
+
Returns:
|
|
578
|
+
Best architecture
|
|
579
|
+
|
|
580
|
+
Example:
|
|
581
|
+
>>> # TODO [GPU Required]: Test on actual GPU
|
|
582
|
+
>>> best = optimize_with_enas(train_loader, val_loader, space)
|
|
583
|
+
"""
|
|
584
|
+
optimizer = ENASOptimizer(search_space, config)
|
|
585
|
+
return optimizer.search(train_loader, val_loader, num_epochs)
|