morphml 1.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of morphml might be problematic. Click here for more details.
- morphml/__init__.py +14 -0
- morphml/api/__init__.py +26 -0
- morphml/api/app.py +326 -0
- morphml/api/auth.py +193 -0
- morphml/api/client.py +338 -0
- morphml/api/models.py +132 -0
- morphml/api/rate_limit.py +192 -0
- morphml/benchmarking/__init__.py +36 -0
- morphml/benchmarking/comparison.py +430 -0
- morphml/benchmarks/__init__.py +56 -0
- morphml/benchmarks/comparator.py +409 -0
- morphml/benchmarks/datasets.py +280 -0
- morphml/benchmarks/metrics.py +199 -0
- morphml/benchmarks/openml_suite.py +201 -0
- morphml/benchmarks/problems.py +289 -0
- morphml/benchmarks/suite.py +318 -0
- morphml/cli/__init__.py +5 -0
- morphml/cli/commands/experiment.py +329 -0
- morphml/cli/main.py +457 -0
- morphml/cli/quickstart.py +312 -0
- morphml/config.py +278 -0
- morphml/constraints/__init__.py +19 -0
- morphml/constraints/handler.py +205 -0
- morphml/constraints/predicates.py +285 -0
- morphml/core/__init__.py +3 -0
- morphml/core/crossover.py +449 -0
- morphml/core/dsl/README.md +359 -0
- morphml/core/dsl/__init__.py +72 -0
- morphml/core/dsl/ast_nodes.py +364 -0
- morphml/core/dsl/compiler.py +318 -0
- morphml/core/dsl/layers.py +368 -0
- morphml/core/dsl/lexer.py +336 -0
- morphml/core/dsl/parser.py +455 -0
- morphml/core/dsl/search_space.py +386 -0
- morphml/core/dsl/syntax.py +199 -0
- morphml/core/dsl/type_system.py +361 -0
- morphml/core/dsl/validator.py +386 -0
- morphml/core/graph/__init__.py +40 -0
- morphml/core/graph/edge.py +124 -0
- morphml/core/graph/graph.py +507 -0
- morphml/core/graph/mutations.py +409 -0
- morphml/core/graph/node.py +196 -0
- morphml/core/graph/serialization.py +361 -0
- morphml/core/graph/visualization.py +431 -0
- morphml/core/objectives/__init__.py +20 -0
- morphml/core/search/__init__.py +33 -0
- morphml/core/search/individual.py +252 -0
- morphml/core/search/parameters.py +453 -0
- morphml/core/search/population.py +375 -0
- morphml/core/search/search_engine.py +340 -0
- morphml/distributed/__init__.py +76 -0
- morphml/distributed/fault_tolerance.py +497 -0
- morphml/distributed/health_monitor.py +348 -0
- morphml/distributed/master.py +709 -0
- morphml/distributed/proto/README.md +224 -0
- morphml/distributed/proto/__init__.py +74 -0
- morphml/distributed/proto/worker.proto +170 -0
- morphml/distributed/proto/worker_pb2.py +79 -0
- morphml/distributed/proto/worker_pb2_grpc.py +423 -0
- morphml/distributed/resource_manager.py +416 -0
- morphml/distributed/scheduler.py +567 -0
- morphml/distributed/storage/__init__.py +33 -0
- morphml/distributed/storage/artifacts.py +381 -0
- morphml/distributed/storage/cache.py +366 -0
- morphml/distributed/storage/checkpointing.py +329 -0
- morphml/distributed/storage/database.py +459 -0
- morphml/distributed/worker.py +549 -0
- morphml/evaluation/__init__.py +5 -0
- morphml/evaluation/heuristic.py +237 -0
- morphml/exceptions.py +55 -0
- morphml/execution/__init__.py +5 -0
- morphml/execution/local_executor.py +350 -0
- morphml/integrations/__init__.py +28 -0
- morphml/integrations/jax_adapter.py +206 -0
- morphml/integrations/pytorch_adapter.py +530 -0
- morphml/integrations/sklearn_adapter.py +206 -0
- morphml/integrations/tensorflow_adapter.py +230 -0
- morphml/logging_config.py +93 -0
- morphml/meta_learning/__init__.py +66 -0
- morphml/meta_learning/architecture_similarity.py +277 -0
- morphml/meta_learning/experiment_database.py +240 -0
- morphml/meta_learning/knowledge_base/__init__.py +19 -0
- morphml/meta_learning/knowledge_base/embedder.py +179 -0
- morphml/meta_learning/knowledge_base/knowledge_base.py +313 -0
- morphml/meta_learning/knowledge_base/meta_features.py +265 -0
- morphml/meta_learning/knowledge_base/vector_store.py +271 -0
- morphml/meta_learning/predictors/__init__.py +27 -0
- morphml/meta_learning/predictors/ensemble.py +221 -0
- morphml/meta_learning/predictors/gnn_predictor.py +552 -0
- morphml/meta_learning/predictors/learning_curve.py +231 -0
- morphml/meta_learning/predictors/proxy_metrics.py +261 -0
- morphml/meta_learning/strategy_evolution/__init__.py +27 -0
- morphml/meta_learning/strategy_evolution/adaptive_optimizer.py +226 -0
- morphml/meta_learning/strategy_evolution/bandit.py +276 -0
- morphml/meta_learning/strategy_evolution/portfolio.py +230 -0
- morphml/meta_learning/transfer.py +581 -0
- morphml/meta_learning/warm_start.py +286 -0
- morphml/optimizers/__init__.py +74 -0
- morphml/optimizers/adaptive_operators.py +399 -0
- morphml/optimizers/bayesian/__init__.py +52 -0
- morphml/optimizers/bayesian/acquisition.py +387 -0
- morphml/optimizers/bayesian/base.py +319 -0
- morphml/optimizers/bayesian/gaussian_process.py +635 -0
- morphml/optimizers/bayesian/smac.py +534 -0
- morphml/optimizers/bayesian/tpe.py +411 -0
- morphml/optimizers/differential_evolution.py +220 -0
- morphml/optimizers/evolutionary/__init__.py +61 -0
- morphml/optimizers/evolutionary/cma_es.py +416 -0
- morphml/optimizers/evolutionary/differential_evolution.py +556 -0
- morphml/optimizers/evolutionary/encoding.py +426 -0
- morphml/optimizers/evolutionary/particle_swarm.py +449 -0
- morphml/optimizers/genetic_algorithm.py +486 -0
- morphml/optimizers/gradient_based/__init__.py +22 -0
- morphml/optimizers/gradient_based/darts.py +550 -0
- morphml/optimizers/gradient_based/enas.py +585 -0
- morphml/optimizers/gradient_based/operations.py +474 -0
- morphml/optimizers/gradient_based/utils.py +601 -0
- morphml/optimizers/hill_climbing.py +169 -0
- morphml/optimizers/multi_objective/__init__.py +56 -0
- morphml/optimizers/multi_objective/indicators.py +504 -0
- morphml/optimizers/multi_objective/nsga2.py +647 -0
- morphml/optimizers/multi_objective/visualization.py +427 -0
- morphml/optimizers/nsga2.py +308 -0
- morphml/optimizers/random_search.py +172 -0
- morphml/optimizers/simulated_annealing.py +181 -0
- morphml/plugins/__init__.py +35 -0
- morphml/plugins/custom_evaluator_example.py +81 -0
- morphml/plugins/custom_optimizer_example.py +63 -0
- morphml/plugins/plugin_system.py +454 -0
- morphml/reports/__init__.py +30 -0
- morphml/reports/generator.py +362 -0
- morphml/tracking/__init__.py +7 -0
- morphml/tracking/experiment.py +309 -0
- morphml/tracking/logger.py +301 -0
- morphml/tracking/reporter.py +357 -0
- morphml/utils/__init__.py +6 -0
- morphml/utils/checkpoint.py +189 -0
- morphml/utils/comparison.py +390 -0
- morphml/utils/export.py +407 -0
- morphml/utils/progress.py +392 -0
- morphml/utils/validation.py +392 -0
- morphml/version.py +7 -0
- morphml/visualization/__init__.py +50 -0
- morphml/visualization/analytics.py +423 -0
- morphml/visualization/architecture_diagrams.py +353 -0
- morphml/visualization/architecture_plot.py +223 -0
- morphml/visualization/convergence_plot.py +174 -0
- morphml/visualization/crossover_viz.py +386 -0
- morphml/visualization/graph_viz.py +338 -0
- morphml/visualization/pareto_plot.py +149 -0
- morphml/visualization/plotly_dashboards.py +422 -0
- morphml/visualization/population.py +309 -0
- morphml/visualization/progress.py +260 -0
- morphml-1.0.0.dist-info/METADATA +434 -0
- morphml-1.0.0.dist-info/RECORD +158 -0
- morphml-1.0.0.dist-info/WHEEL +4 -0
- morphml-1.0.0.dist-info/entry_points.txt +3 -0
- morphml-1.0.0.dist-info/licenses/LICENSE +21 -0
|
@@ -0,0 +1,231 @@
|
|
|
1
|
+
"""Learning curve extrapolation for early stopping.
|
|
2
|
+
|
|
3
|
+
Predicts final performance from early training epochs.
|
|
4
|
+
|
|
5
|
+
Author: Eshan Roy <eshanized@proton.me>
|
|
6
|
+
Organization: TONMOY INFRASTRUCTURE & VISION
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
from typing import List, Optional, Tuple
|
|
10
|
+
|
|
11
|
+
import numpy as np
|
|
12
|
+
|
|
13
|
+
from morphml.logging_config import get_logger
|
|
14
|
+
|
|
15
|
+
logger = get_logger(__name__)
|
|
16
|
+
|
|
17
|
+
try:
|
|
18
|
+
from scipy.optimize import curve_fit
|
|
19
|
+
|
|
20
|
+
SCIPY_AVAILABLE = True
|
|
21
|
+
except ImportError:
|
|
22
|
+
SCIPY_AVAILABLE = False
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
class LearningCurvePredictor:
|
|
26
|
+
"""
|
|
27
|
+
Predict final accuracy from early training epochs.
|
|
28
|
+
|
|
29
|
+
Uses curve fitting to extrapolate learning curves:
|
|
30
|
+
- Power law: acc(t) = a - b * t^(-c)
|
|
31
|
+
- Exponential: acc(t) = a * (1 - exp(-b * t))
|
|
32
|
+
|
|
33
|
+
Example:
|
|
34
|
+
>>> predictor = LearningCurvePredictor()
|
|
35
|
+
>>>
|
|
36
|
+
>>> # Observe first 10 epochs
|
|
37
|
+
>>> epochs = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
|
|
38
|
+
>>> accuracies = [0.3, 0.45, 0.55, 0.62, 0.67, 0.71, 0.74, 0.76, 0.78, 0.79]
|
|
39
|
+
>>>
|
|
40
|
+
>>> # Predict final accuracy at epoch 200
|
|
41
|
+
>>> final_acc = predictor.predict_final_accuracy(accuracies, epochs, final_epoch=200)
|
|
42
|
+
>>> print(f"Predicted final: {final_acc:.3f}")
|
|
43
|
+
"""
|
|
44
|
+
|
|
45
|
+
def __init__(self, curve_type: str = "power_law"):
|
|
46
|
+
"""
|
|
47
|
+
Initialize predictor.
|
|
48
|
+
|
|
49
|
+
Args:
|
|
50
|
+
curve_type: Type of curve to fit ('power_law' or 'exponential')
|
|
51
|
+
"""
|
|
52
|
+
self.curve_type = curve_type
|
|
53
|
+
logger.info(f"Initialized LearningCurvePredictor (type={curve_type})")
|
|
54
|
+
|
|
55
|
+
@staticmethod
|
|
56
|
+
def power_law(t: np.ndarray, a: float, b: float, c: float) -> np.ndarray:
|
|
57
|
+
"""
|
|
58
|
+
Power law curve: acc(t) = a - b * t^(-c)
|
|
59
|
+
|
|
60
|
+
Args:
|
|
61
|
+
t: Time steps (epochs)
|
|
62
|
+
a: Asymptotic accuracy
|
|
63
|
+
b: Scale parameter
|
|
64
|
+
c: Decay rate
|
|
65
|
+
|
|
66
|
+
Returns:
|
|
67
|
+
Predicted accuracies
|
|
68
|
+
"""
|
|
69
|
+
return a - b * np.power(t, -c)
|
|
70
|
+
|
|
71
|
+
@staticmethod
|
|
72
|
+
def exponential(t: np.ndarray, a: float, b: float) -> np.ndarray:
|
|
73
|
+
"""
|
|
74
|
+
Exponential curve: acc(t) = a * (1 - exp(-b * t))
|
|
75
|
+
|
|
76
|
+
Args:
|
|
77
|
+
t: Time steps (epochs)
|
|
78
|
+
a: Asymptotic accuracy
|
|
79
|
+
b: Convergence rate
|
|
80
|
+
|
|
81
|
+
Returns:
|
|
82
|
+
Predicted accuracies
|
|
83
|
+
"""
|
|
84
|
+
return a * (1 - np.exp(-b * t))
|
|
85
|
+
|
|
86
|
+
def predict_final_accuracy(
|
|
87
|
+
self,
|
|
88
|
+
observed_accuracies: List[float],
|
|
89
|
+
observed_epochs: Optional[List[int]] = None,
|
|
90
|
+
final_epoch: int = 200,
|
|
91
|
+
) -> float:
|
|
92
|
+
"""
|
|
93
|
+
Extrapolate learning curve to predict final accuracy.
|
|
94
|
+
|
|
95
|
+
Args:
|
|
96
|
+
observed_accuracies: Observed accuracies
|
|
97
|
+
observed_epochs: Corresponding epochs (default: [1, 2, 3, ...])
|
|
98
|
+
final_epoch: Epoch to predict
|
|
99
|
+
|
|
100
|
+
Returns:
|
|
101
|
+
Predicted final accuracy
|
|
102
|
+
"""
|
|
103
|
+
if not SCIPY_AVAILABLE:
|
|
104
|
+
logger.warning("scipy not available, returning last observed accuracy")
|
|
105
|
+
return observed_accuracies[-1] if observed_accuracies else 0.5
|
|
106
|
+
|
|
107
|
+
# Default epochs
|
|
108
|
+
if observed_epochs is None:
|
|
109
|
+
observed_epochs = list(range(1, len(observed_accuracies) + 1))
|
|
110
|
+
|
|
111
|
+
if len(observed_accuracies) < 3:
|
|
112
|
+
logger.warning("Too few observations for curve fitting")
|
|
113
|
+
return observed_accuracies[-1] if observed_accuracies else 0.5
|
|
114
|
+
|
|
115
|
+
# Convert to arrays
|
|
116
|
+
t = np.array(observed_epochs, dtype=float)
|
|
117
|
+
acc = np.array(observed_accuracies, dtype=float)
|
|
118
|
+
|
|
119
|
+
try:
|
|
120
|
+
if self.curve_type == "power_law":
|
|
121
|
+
# Fit power law
|
|
122
|
+
params, _ = curve_fit(
|
|
123
|
+
self.power_law,
|
|
124
|
+
t,
|
|
125
|
+
acc,
|
|
126
|
+
p0=[0.9, 0.1, 0.5],
|
|
127
|
+
bounds=([0, 0, 0], [1.0, 1.0, 5.0]),
|
|
128
|
+
maxfev=1000,
|
|
129
|
+
)
|
|
130
|
+
|
|
131
|
+
# Predict
|
|
132
|
+
prediction = self.power_law(final_epoch, *params)
|
|
133
|
+
|
|
134
|
+
elif self.curve_type == "exponential":
|
|
135
|
+
# Fit exponential
|
|
136
|
+
params, _ = curve_fit(
|
|
137
|
+
self.exponential,
|
|
138
|
+
t,
|
|
139
|
+
acc,
|
|
140
|
+
p0=[0.9, 0.01],
|
|
141
|
+
bounds=([0, 0], [1.0, 1.0]),
|
|
142
|
+
maxfev=1000,
|
|
143
|
+
)
|
|
144
|
+
|
|
145
|
+
# Predict
|
|
146
|
+
prediction = self.exponential(final_epoch, *params)
|
|
147
|
+
|
|
148
|
+
else:
|
|
149
|
+
raise ValueError(f"Unknown curve type: {self.curve_type}")
|
|
150
|
+
|
|
151
|
+
# Clip to valid range
|
|
152
|
+
prediction = np.clip(prediction, 0.0, 1.0)
|
|
153
|
+
|
|
154
|
+
logger.debug(
|
|
155
|
+
f"Extrapolated from {len(acc)} epochs to epoch {final_epoch}: " f"{prediction:.4f}"
|
|
156
|
+
)
|
|
157
|
+
|
|
158
|
+
return float(prediction)
|
|
159
|
+
|
|
160
|
+
except Exception as e:
|
|
161
|
+
logger.warning(f"Curve fitting failed: {e}, using last observed")
|
|
162
|
+
return observed_accuracies[-1]
|
|
163
|
+
|
|
164
|
+
def should_early_stop(
|
|
165
|
+
self,
|
|
166
|
+
observed_accuracies: List[float],
|
|
167
|
+
observed_epochs: Optional[List[int]] = None,
|
|
168
|
+
threshold: float = 0.8,
|
|
169
|
+
confidence: float = 0.95,
|
|
170
|
+
) -> bool:
|
|
171
|
+
"""
|
|
172
|
+
Decide whether to stop training early.
|
|
173
|
+
|
|
174
|
+
Args:
|
|
175
|
+
observed_accuracies: Observed accuracies
|
|
176
|
+
observed_epochs: Corresponding epochs
|
|
177
|
+
threshold: Minimum required final accuracy
|
|
178
|
+
confidence: Confidence level for prediction
|
|
179
|
+
|
|
180
|
+
Returns:
|
|
181
|
+
True if should stop early (predicted to not reach threshold)
|
|
182
|
+
"""
|
|
183
|
+
# Predict final accuracy
|
|
184
|
+
predicted_final = self.predict_final_accuracy(observed_accuracies, observed_epochs)
|
|
185
|
+
|
|
186
|
+
# Conservative: add margin for uncertainty
|
|
187
|
+
margin = 0.05 # 5% margin
|
|
188
|
+
predicted_with_margin = predicted_final - margin
|
|
189
|
+
|
|
190
|
+
# Stop if predicted final is below threshold
|
|
191
|
+
should_stop = predicted_with_margin < threshold
|
|
192
|
+
|
|
193
|
+
if should_stop:
|
|
194
|
+
logger.info(
|
|
195
|
+
f"Early stopping recommended: predicted final {predicted_final:.3f} "
|
|
196
|
+
f"(with margin: {predicted_with_margin:.3f}) < threshold {threshold:.3f}"
|
|
197
|
+
)
|
|
198
|
+
|
|
199
|
+
return should_stop
|
|
200
|
+
|
|
201
|
+
def fit_curve(
|
|
202
|
+
self,
|
|
203
|
+
observed_accuracies: List[float],
|
|
204
|
+
observed_epochs: Optional[List[int]] = None,
|
|
205
|
+
) -> Tuple[np.ndarray, np.ndarray]:
|
|
206
|
+
"""
|
|
207
|
+
Fit curve to observed data and return full trajectory.
|
|
208
|
+
|
|
209
|
+
Args:
|
|
210
|
+
observed_accuracies: Observed accuracies
|
|
211
|
+
observed_epochs: Corresponding epochs
|
|
212
|
+
|
|
213
|
+
Returns:
|
|
214
|
+
(fitted_epochs, fitted_accuracies) for plotting
|
|
215
|
+
"""
|
|
216
|
+
if observed_epochs is None:
|
|
217
|
+
observed_epochs = list(range(1, len(observed_accuracies) + 1))
|
|
218
|
+
|
|
219
|
+
# Generate dense time steps for smooth curve
|
|
220
|
+
max_epoch = max(observed_epochs)
|
|
221
|
+
fitted_epochs = np.linspace(1, max_epoch * 2, 100)
|
|
222
|
+
|
|
223
|
+
# Predict at each point
|
|
224
|
+
fitted_accuracies = []
|
|
225
|
+
for epoch in fitted_epochs:
|
|
226
|
+
pred = self.predict_final_accuracy(
|
|
227
|
+
observed_accuracies, observed_epochs, final_epoch=int(epoch)
|
|
228
|
+
)
|
|
229
|
+
fitted_accuracies.append(pred)
|
|
230
|
+
|
|
231
|
+
return fitted_epochs, np.array(fitted_accuracies)
|
|
@@ -0,0 +1,261 @@
|
|
|
1
|
+
"""Proxy metric-based performance prediction.
|
|
2
|
+
|
|
3
|
+
Fast prediction using cheap architectural features.
|
|
4
|
+
|
|
5
|
+
Author: Eshan Roy <eshanized@proton.me>
|
|
6
|
+
Organization: TONMOY INFRASTRUCTURE & VISION
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
from typing import Dict, List, Tuple
|
|
10
|
+
|
|
11
|
+
import numpy as np
|
|
12
|
+
|
|
13
|
+
from morphml.core.graph import ModelGraph
|
|
14
|
+
from morphml.logging_config import get_logger
|
|
15
|
+
|
|
16
|
+
logger = get_logger(__name__)
|
|
17
|
+
|
|
18
|
+
try:
|
|
19
|
+
from sklearn.ensemble import RandomForestRegressor
|
|
20
|
+
from sklearn.preprocessing import StandardScaler
|
|
21
|
+
|
|
22
|
+
SKLEARN_AVAILABLE = True
|
|
23
|
+
except ImportError:
|
|
24
|
+
SKLEARN_AVAILABLE = False
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
class ProxyMetricPredictor:
|
|
28
|
+
"""
|
|
29
|
+
Predict performance using cheap proxy metrics.
|
|
30
|
+
|
|
31
|
+
Uses architectural features that can be computed instantly:
|
|
32
|
+
- Number of parameters
|
|
33
|
+
- Network depth
|
|
34
|
+
- Network width
|
|
35
|
+
- Operation diversity
|
|
36
|
+
- Skip connections
|
|
37
|
+
|
|
38
|
+
Args:
|
|
39
|
+
use_scaler: Whether to normalize features
|
|
40
|
+
|
|
41
|
+
Example:
|
|
42
|
+
>>> predictor = ProxyMetricPredictor()
|
|
43
|
+
>>>
|
|
44
|
+
>>> # Train on historical data
|
|
45
|
+
>>> training_data = [(graph1, 0.92), (graph2, 0.87), ...]
|
|
46
|
+
>>> predictor.train(training_data)
|
|
47
|
+
>>>
|
|
48
|
+
>>> # Predict new architecture
|
|
49
|
+
>>> predicted_acc = predictor.predict(new_graph)
|
|
50
|
+
"""
|
|
51
|
+
|
|
52
|
+
def __init__(self, use_scaler: bool = True):
|
|
53
|
+
"""Initialize predictor."""
|
|
54
|
+
self.model = None
|
|
55
|
+
self.scaler = StandardScaler() if use_scaler else None
|
|
56
|
+
self.feature_names = []
|
|
57
|
+
self.is_trained = False
|
|
58
|
+
|
|
59
|
+
logger.info("Initialized ProxyMetricPredictor")
|
|
60
|
+
|
|
61
|
+
def extract_features(self, graph: ModelGraph) -> Dict[str, float]:
|
|
62
|
+
"""
|
|
63
|
+
Extract proxy features from architecture.
|
|
64
|
+
|
|
65
|
+
Args:
|
|
66
|
+
graph: Architecture graph
|
|
67
|
+
|
|
68
|
+
Returns:
|
|
69
|
+
Dictionary of proxy metrics
|
|
70
|
+
"""
|
|
71
|
+
features = {}
|
|
72
|
+
|
|
73
|
+
# Basic metrics
|
|
74
|
+
features["num_layers"] = len(graph.layers)
|
|
75
|
+
features["num_parameters"] = graph.count_parameters()
|
|
76
|
+
|
|
77
|
+
# Operation counts
|
|
78
|
+
op_counts = {}
|
|
79
|
+
for layer in graph.layers:
|
|
80
|
+
op_type = layer.layer_type
|
|
81
|
+
op_counts[op_type] = op_counts.get(op_type, 0) + 1
|
|
82
|
+
|
|
83
|
+
features["num_conv"] = op_counts.get("conv2d", 0)
|
|
84
|
+
features["num_dense"] = op_counts.get("dense", 0)
|
|
85
|
+
features["num_pool"] = op_counts.get("maxpool2d", 0) + op_counts.get("avgpool2d", 0)
|
|
86
|
+
features["num_norm"] = op_counts.get("batchnorm", 0)
|
|
87
|
+
features["num_activation"] = op_counts.get("relu", 0) + op_counts.get("tanh", 0)
|
|
88
|
+
features["num_dropout"] = op_counts.get("dropout", 0)
|
|
89
|
+
|
|
90
|
+
# Diversity
|
|
91
|
+
features["operation_diversity"] = len(op_counts)
|
|
92
|
+
|
|
93
|
+
# Network shape metrics
|
|
94
|
+
layer_widths = []
|
|
95
|
+
for layer in graph.layers:
|
|
96
|
+
if layer.layer_type == "conv2d":
|
|
97
|
+
filters = layer.config.get("filters", 64)
|
|
98
|
+
layer_widths.append(filters)
|
|
99
|
+
elif layer.layer_type == "dense":
|
|
100
|
+
units = layer.config.get("units", 128)
|
|
101
|
+
layer_widths.append(units)
|
|
102
|
+
|
|
103
|
+
if layer_widths:
|
|
104
|
+
features["avg_width"] = np.mean(layer_widths)
|
|
105
|
+
features["max_width"] = np.max(layer_widths)
|
|
106
|
+
features["min_width"] = np.min(layer_widths)
|
|
107
|
+
else:
|
|
108
|
+
features["avg_width"] = 0
|
|
109
|
+
features["max_width"] = 0
|
|
110
|
+
features["min_width"] = 0
|
|
111
|
+
|
|
112
|
+
# Depth-to-width ratio
|
|
113
|
+
if features["avg_width"] > 0:
|
|
114
|
+
features["depth_to_width_ratio"] = features["num_layers"] / features["avg_width"]
|
|
115
|
+
else:
|
|
116
|
+
features["depth_to_width_ratio"] = 0
|
|
117
|
+
|
|
118
|
+
# Parameter efficiency
|
|
119
|
+
if features["num_layers"] > 0:
|
|
120
|
+
features["params_per_layer"] = features["num_parameters"] / features["num_layers"]
|
|
121
|
+
else:
|
|
122
|
+
features["params_per_layer"] = 0
|
|
123
|
+
|
|
124
|
+
return features
|
|
125
|
+
|
|
126
|
+
def train(
|
|
127
|
+
self, training_data: List[Tuple[ModelGraph, float]], validation_split: float = 0.2
|
|
128
|
+
) -> Dict[str, float]:
|
|
129
|
+
"""
|
|
130
|
+
Train predictor on historical data.
|
|
131
|
+
|
|
132
|
+
Args:
|
|
133
|
+
training_data: List of (architecture, accuracy) pairs
|
|
134
|
+
validation_split: Fraction for validation
|
|
135
|
+
|
|
136
|
+
Returns:
|
|
137
|
+
Training metrics
|
|
138
|
+
"""
|
|
139
|
+
if not SKLEARN_AVAILABLE:
|
|
140
|
+
logger.warning("scikit-learn not available, using dummy predictor")
|
|
141
|
+
self.is_trained = True
|
|
142
|
+
return {"error": "sklearn_not_available"}
|
|
143
|
+
|
|
144
|
+
logger.info(f"Training ProxyMetricPredictor on {len(training_data)} samples")
|
|
145
|
+
|
|
146
|
+
# Extract features
|
|
147
|
+
X = []
|
|
148
|
+
y = []
|
|
149
|
+
|
|
150
|
+
for graph, accuracy in training_data:
|
|
151
|
+
features = self.extract_features(graph)
|
|
152
|
+
|
|
153
|
+
# Store feature names from first sample
|
|
154
|
+
if not self.feature_names:
|
|
155
|
+
self.feature_names = sorted(features.keys())
|
|
156
|
+
|
|
157
|
+
# Convert to vector
|
|
158
|
+
feature_vec = [features[name] for name in self.feature_names]
|
|
159
|
+
X.append(feature_vec)
|
|
160
|
+
y.append(accuracy)
|
|
161
|
+
|
|
162
|
+
X = np.array(X)
|
|
163
|
+
y = np.array(y)
|
|
164
|
+
|
|
165
|
+
# Split data
|
|
166
|
+
n_train = int(len(X) * (1 - validation_split))
|
|
167
|
+
X_train, X_val = X[:n_train], X[n_train:]
|
|
168
|
+
y_train, y_val = y[:n_train], y[n_train:]
|
|
169
|
+
|
|
170
|
+
# Scale features
|
|
171
|
+
if self.scaler:
|
|
172
|
+
X_train = self.scaler.fit_transform(X_train)
|
|
173
|
+
X_val = self.scaler.transform(X_val)
|
|
174
|
+
|
|
175
|
+
# Train model
|
|
176
|
+
self.model = RandomForestRegressor(
|
|
177
|
+
n_estimators=100, max_depth=10, min_samples_split=5, random_state=42
|
|
178
|
+
)
|
|
179
|
+
|
|
180
|
+
self.model.fit(X_train, y_train)
|
|
181
|
+
self.is_trained = True
|
|
182
|
+
|
|
183
|
+
# Evaluate
|
|
184
|
+
train_score = self.model.score(X_train, y_train)
|
|
185
|
+
val_score = self.model.score(X_val, y_val) if len(X_val) > 0 else 0.0
|
|
186
|
+
|
|
187
|
+
# Feature importance
|
|
188
|
+
importances = self.model.feature_importances_
|
|
189
|
+
top_features = sorted(
|
|
190
|
+
zip(self.feature_names, importances), key=lambda x: x[1], reverse=True
|
|
191
|
+
)[:5]
|
|
192
|
+
|
|
193
|
+
logger.info(f"Training complete: R²={train_score:.3f}, Val R²={val_score:.3f}")
|
|
194
|
+
logger.info(f"Top features: {top_features}")
|
|
195
|
+
|
|
196
|
+
return {
|
|
197
|
+
"train_score": train_score,
|
|
198
|
+
"val_score": val_score,
|
|
199
|
+
"top_features": dict(top_features),
|
|
200
|
+
}
|
|
201
|
+
|
|
202
|
+
def predict(self, graph: ModelGraph) -> float:
|
|
203
|
+
"""
|
|
204
|
+
Predict accuracy for architecture.
|
|
205
|
+
|
|
206
|
+
Args:
|
|
207
|
+
graph: Architecture to evaluate
|
|
208
|
+
|
|
209
|
+
Returns:
|
|
210
|
+
Predicted accuracy (0-1)
|
|
211
|
+
"""
|
|
212
|
+
if not self.is_trained:
|
|
213
|
+
logger.warning("Predictor not trained, returning 0.5")
|
|
214
|
+
return 0.5
|
|
215
|
+
|
|
216
|
+
# Extract features
|
|
217
|
+
features = self.extract_features(graph)
|
|
218
|
+
|
|
219
|
+
# Convert to vector
|
|
220
|
+
feature_vec = np.array([[features[name] for name in self.feature_names]])
|
|
221
|
+
|
|
222
|
+
# Scale
|
|
223
|
+
if self.scaler:
|
|
224
|
+
feature_vec = self.scaler.transform(feature_vec)
|
|
225
|
+
|
|
226
|
+
# Predict
|
|
227
|
+
prediction = self.model.predict(feature_vec)[0]
|
|
228
|
+
|
|
229
|
+
# Clip to valid range
|
|
230
|
+
return np.clip(prediction, 0.0, 1.0)
|
|
231
|
+
|
|
232
|
+
def batch_predict(self, graphs: List[ModelGraph]) -> np.ndarray:
|
|
233
|
+
"""
|
|
234
|
+
Predict for multiple architectures efficiently.
|
|
235
|
+
|
|
236
|
+
Args:
|
|
237
|
+
graphs: List of architectures
|
|
238
|
+
|
|
239
|
+
Returns:
|
|
240
|
+
Array of predictions
|
|
241
|
+
"""
|
|
242
|
+
if not self.is_trained:
|
|
243
|
+
return np.full(len(graphs), 0.5)
|
|
244
|
+
|
|
245
|
+
# Extract all features
|
|
246
|
+
X = []
|
|
247
|
+
for graph in graphs:
|
|
248
|
+
features = self.extract_features(graph)
|
|
249
|
+
feature_vec = [features[name] for name in self.feature_names]
|
|
250
|
+
X.append(feature_vec)
|
|
251
|
+
|
|
252
|
+
X = np.array(X)
|
|
253
|
+
|
|
254
|
+
# Scale
|
|
255
|
+
if self.scaler:
|
|
256
|
+
X = self.scaler.transform(X)
|
|
257
|
+
|
|
258
|
+
# Predict
|
|
259
|
+
predictions = self.model.predict(X)
|
|
260
|
+
|
|
261
|
+
return np.clip(predictions, 0.0, 1.0)
|
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
"""Strategy evolution and adaptive optimization.
|
|
2
|
+
|
|
3
|
+
Learns which optimization strategies work best through:
|
|
4
|
+
- Multi-armed bandits
|
|
5
|
+
- Adaptive strategy selection
|
|
6
|
+
- Portfolio optimization
|
|
7
|
+
- Hyperparameter tuning
|
|
8
|
+
|
|
9
|
+
Author: Eshan Roy <eshanized@proton.me>
|
|
10
|
+
Organization: TONMOY INFRASTRUCTURE & VISION
|
|
11
|
+
"""
|
|
12
|
+
|
|
13
|
+
from morphml.meta_learning.strategy_evolution.adaptive_optimizer import AdaptiveOptimizer
|
|
14
|
+
from morphml.meta_learning.strategy_evolution.bandit import (
|
|
15
|
+
StrategySelector,
|
|
16
|
+
ThompsonSamplingSelector,
|
|
17
|
+
UCBSelector,
|
|
18
|
+
)
|
|
19
|
+
from morphml.meta_learning.strategy_evolution.portfolio import PortfolioOptimizer
|
|
20
|
+
|
|
21
|
+
__all__ = [
|
|
22
|
+
"StrategySelector",
|
|
23
|
+
"UCBSelector",
|
|
24
|
+
"ThompsonSamplingSelector",
|
|
25
|
+
"AdaptiveOptimizer",
|
|
26
|
+
"PortfolioOptimizer",
|
|
27
|
+
]
|