morphml 1.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of morphml might be problematic. Click here for more details.
- morphml/__init__.py +14 -0
- morphml/api/__init__.py +26 -0
- morphml/api/app.py +326 -0
- morphml/api/auth.py +193 -0
- morphml/api/client.py +338 -0
- morphml/api/models.py +132 -0
- morphml/api/rate_limit.py +192 -0
- morphml/benchmarking/__init__.py +36 -0
- morphml/benchmarking/comparison.py +430 -0
- morphml/benchmarks/__init__.py +56 -0
- morphml/benchmarks/comparator.py +409 -0
- morphml/benchmarks/datasets.py +280 -0
- morphml/benchmarks/metrics.py +199 -0
- morphml/benchmarks/openml_suite.py +201 -0
- morphml/benchmarks/problems.py +289 -0
- morphml/benchmarks/suite.py +318 -0
- morphml/cli/__init__.py +5 -0
- morphml/cli/commands/experiment.py +329 -0
- morphml/cli/main.py +457 -0
- morphml/cli/quickstart.py +312 -0
- morphml/config.py +278 -0
- morphml/constraints/__init__.py +19 -0
- morphml/constraints/handler.py +205 -0
- morphml/constraints/predicates.py +285 -0
- morphml/core/__init__.py +3 -0
- morphml/core/crossover.py +449 -0
- morphml/core/dsl/README.md +359 -0
- morphml/core/dsl/__init__.py +72 -0
- morphml/core/dsl/ast_nodes.py +364 -0
- morphml/core/dsl/compiler.py +318 -0
- morphml/core/dsl/layers.py +368 -0
- morphml/core/dsl/lexer.py +336 -0
- morphml/core/dsl/parser.py +455 -0
- morphml/core/dsl/search_space.py +386 -0
- morphml/core/dsl/syntax.py +199 -0
- morphml/core/dsl/type_system.py +361 -0
- morphml/core/dsl/validator.py +386 -0
- morphml/core/graph/__init__.py +40 -0
- morphml/core/graph/edge.py +124 -0
- morphml/core/graph/graph.py +507 -0
- morphml/core/graph/mutations.py +409 -0
- morphml/core/graph/node.py +196 -0
- morphml/core/graph/serialization.py +361 -0
- morphml/core/graph/visualization.py +431 -0
- morphml/core/objectives/__init__.py +20 -0
- morphml/core/search/__init__.py +33 -0
- morphml/core/search/individual.py +252 -0
- morphml/core/search/parameters.py +453 -0
- morphml/core/search/population.py +375 -0
- morphml/core/search/search_engine.py +340 -0
- morphml/distributed/__init__.py +76 -0
- morphml/distributed/fault_tolerance.py +497 -0
- morphml/distributed/health_monitor.py +348 -0
- morphml/distributed/master.py +709 -0
- morphml/distributed/proto/README.md +224 -0
- morphml/distributed/proto/__init__.py +74 -0
- morphml/distributed/proto/worker.proto +170 -0
- morphml/distributed/proto/worker_pb2.py +79 -0
- morphml/distributed/proto/worker_pb2_grpc.py +423 -0
- morphml/distributed/resource_manager.py +416 -0
- morphml/distributed/scheduler.py +567 -0
- morphml/distributed/storage/__init__.py +33 -0
- morphml/distributed/storage/artifacts.py +381 -0
- morphml/distributed/storage/cache.py +366 -0
- morphml/distributed/storage/checkpointing.py +329 -0
- morphml/distributed/storage/database.py +459 -0
- morphml/distributed/worker.py +549 -0
- morphml/evaluation/__init__.py +5 -0
- morphml/evaluation/heuristic.py +237 -0
- morphml/exceptions.py +55 -0
- morphml/execution/__init__.py +5 -0
- morphml/execution/local_executor.py +350 -0
- morphml/integrations/__init__.py +28 -0
- morphml/integrations/jax_adapter.py +206 -0
- morphml/integrations/pytorch_adapter.py +530 -0
- morphml/integrations/sklearn_adapter.py +206 -0
- morphml/integrations/tensorflow_adapter.py +230 -0
- morphml/logging_config.py +93 -0
- morphml/meta_learning/__init__.py +66 -0
- morphml/meta_learning/architecture_similarity.py +277 -0
- morphml/meta_learning/experiment_database.py +240 -0
- morphml/meta_learning/knowledge_base/__init__.py +19 -0
- morphml/meta_learning/knowledge_base/embedder.py +179 -0
- morphml/meta_learning/knowledge_base/knowledge_base.py +313 -0
- morphml/meta_learning/knowledge_base/meta_features.py +265 -0
- morphml/meta_learning/knowledge_base/vector_store.py +271 -0
- morphml/meta_learning/predictors/__init__.py +27 -0
- morphml/meta_learning/predictors/ensemble.py +221 -0
- morphml/meta_learning/predictors/gnn_predictor.py +552 -0
- morphml/meta_learning/predictors/learning_curve.py +231 -0
- morphml/meta_learning/predictors/proxy_metrics.py +261 -0
- morphml/meta_learning/strategy_evolution/__init__.py +27 -0
- morphml/meta_learning/strategy_evolution/adaptive_optimizer.py +226 -0
- morphml/meta_learning/strategy_evolution/bandit.py +276 -0
- morphml/meta_learning/strategy_evolution/portfolio.py +230 -0
- morphml/meta_learning/transfer.py +581 -0
- morphml/meta_learning/warm_start.py +286 -0
- morphml/optimizers/__init__.py +74 -0
- morphml/optimizers/adaptive_operators.py +399 -0
- morphml/optimizers/bayesian/__init__.py +52 -0
- morphml/optimizers/bayesian/acquisition.py +387 -0
- morphml/optimizers/bayesian/base.py +319 -0
- morphml/optimizers/bayesian/gaussian_process.py +635 -0
- morphml/optimizers/bayesian/smac.py +534 -0
- morphml/optimizers/bayesian/tpe.py +411 -0
- morphml/optimizers/differential_evolution.py +220 -0
- morphml/optimizers/evolutionary/__init__.py +61 -0
- morphml/optimizers/evolutionary/cma_es.py +416 -0
- morphml/optimizers/evolutionary/differential_evolution.py +556 -0
- morphml/optimizers/evolutionary/encoding.py +426 -0
- morphml/optimizers/evolutionary/particle_swarm.py +449 -0
- morphml/optimizers/genetic_algorithm.py +486 -0
- morphml/optimizers/gradient_based/__init__.py +22 -0
- morphml/optimizers/gradient_based/darts.py +550 -0
- morphml/optimizers/gradient_based/enas.py +585 -0
- morphml/optimizers/gradient_based/operations.py +474 -0
- morphml/optimizers/gradient_based/utils.py +601 -0
- morphml/optimizers/hill_climbing.py +169 -0
- morphml/optimizers/multi_objective/__init__.py +56 -0
- morphml/optimizers/multi_objective/indicators.py +504 -0
- morphml/optimizers/multi_objective/nsga2.py +647 -0
- morphml/optimizers/multi_objective/visualization.py +427 -0
- morphml/optimizers/nsga2.py +308 -0
- morphml/optimizers/random_search.py +172 -0
- morphml/optimizers/simulated_annealing.py +181 -0
- morphml/plugins/__init__.py +35 -0
- morphml/plugins/custom_evaluator_example.py +81 -0
- morphml/plugins/custom_optimizer_example.py +63 -0
- morphml/plugins/plugin_system.py +454 -0
- morphml/reports/__init__.py +30 -0
- morphml/reports/generator.py +362 -0
- morphml/tracking/__init__.py +7 -0
- morphml/tracking/experiment.py +309 -0
- morphml/tracking/logger.py +301 -0
- morphml/tracking/reporter.py +357 -0
- morphml/utils/__init__.py +6 -0
- morphml/utils/checkpoint.py +189 -0
- morphml/utils/comparison.py +390 -0
- morphml/utils/export.py +407 -0
- morphml/utils/progress.py +392 -0
- morphml/utils/validation.py +392 -0
- morphml/version.py +7 -0
- morphml/visualization/__init__.py +50 -0
- morphml/visualization/analytics.py +423 -0
- morphml/visualization/architecture_diagrams.py +353 -0
- morphml/visualization/architecture_plot.py +223 -0
- morphml/visualization/convergence_plot.py +174 -0
- morphml/visualization/crossover_viz.py +386 -0
- morphml/visualization/graph_viz.py +338 -0
- morphml/visualization/pareto_plot.py +149 -0
- morphml/visualization/plotly_dashboards.py +422 -0
- morphml/visualization/population.py +309 -0
- morphml/visualization/progress.py +260 -0
- morphml-1.0.0.dist-info/METADATA +434 -0
- morphml-1.0.0.dist-info/RECORD +158 -0
- morphml-1.0.0.dist-info/WHEEL +4 -0
- morphml-1.0.0.dist-info/entry_points.txt +3 -0
- morphml-1.0.0.dist-info/licenses/LICENSE +21 -0
|
@@ -0,0 +1,265 @@
|
|
|
1
|
+
"""Meta-feature extraction for tasks and architectures.
|
|
2
|
+
|
|
3
|
+
Author: Eshan Roy <eshanized@proton.me>
|
|
4
|
+
Organization: TONMOY INFRASTRUCTURE & VISION
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from collections import Counter
|
|
8
|
+
from typing import Dict
|
|
9
|
+
|
|
10
|
+
import numpy as np
|
|
11
|
+
|
|
12
|
+
from morphml.core.graph import ModelGraph
|
|
13
|
+
from morphml.logging_config import get_logger
|
|
14
|
+
from morphml.meta_learning.experiment_database import TaskMetadata
|
|
15
|
+
|
|
16
|
+
logger = get_logger(__name__)
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
class MetaFeatureExtractor:
|
|
20
|
+
"""
|
|
21
|
+
Extract meta-features from tasks and architectures.
|
|
22
|
+
|
|
23
|
+
Task meta-features:
|
|
24
|
+
- Dataset statistics
|
|
25
|
+
- Class distribution
|
|
26
|
+
- Input dimensionality
|
|
27
|
+
|
|
28
|
+
Architecture meta-features:
|
|
29
|
+
- Structural properties
|
|
30
|
+
- Operation statistics
|
|
31
|
+
- Connectivity patterns
|
|
32
|
+
- Parameter counts
|
|
33
|
+
|
|
34
|
+
Example:
|
|
35
|
+
>>> extractor = MetaFeatureExtractor()
|
|
36
|
+
>>> task_features = extractor.extract_task_features(task_metadata)
|
|
37
|
+
>>> arch_features = extractor.extract_architecture_features(graph)
|
|
38
|
+
"""
|
|
39
|
+
|
|
40
|
+
def __init__(self):
|
|
41
|
+
"""Initialize extractor."""
|
|
42
|
+
logger.info("Initialized MetaFeatureExtractor")
|
|
43
|
+
|
|
44
|
+
def extract_task_features(self, task: TaskMetadata, normalize: bool = True) -> Dict[str, float]:
|
|
45
|
+
"""
|
|
46
|
+
Extract meta-features from task.
|
|
47
|
+
|
|
48
|
+
Args:
|
|
49
|
+
task: Task metadata
|
|
50
|
+
normalize: Whether to normalize features
|
|
51
|
+
|
|
52
|
+
Returns:
|
|
53
|
+
Dictionary of meta-features
|
|
54
|
+
"""
|
|
55
|
+
features = {}
|
|
56
|
+
|
|
57
|
+
# Dataset size features
|
|
58
|
+
features["num_samples"] = float(task.num_samples)
|
|
59
|
+
features["num_classes"] = float(task.num_classes)
|
|
60
|
+
|
|
61
|
+
# Input dimensionality
|
|
62
|
+
if isinstance(task.input_size, (tuple, list)):
|
|
63
|
+
features["input_channels"] = float(task.input_size[0])
|
|
64
|
+
features["input_height"] = float(task.input_size[1])
|
|
65
|
+
features["input_width"] = float(
|
|
66
|
+
task.input_size[2] if len(task.input_size) > 2 else task.input_size[1]
|
|
67
|
+
)
|
|
68
|
+
features["input_dim"] = float(np.prod(task.input_size))
|
|
69
|
+
else:
|
|
70
|
+
features["input_dim"] = float(task.input_size)
|
|
71
|
+
|
|
72
|
+
# Problem type encoding
|
|
73
|
+
problem_types = ["classification", "detection", "segmentation", "regression"]
|
|
74
|
+
for pt in problem_types:
|
|
75
|
+
features[f"problem_{pt}"] = 1.0 if task.problem_type == pt else 0.0
|
|
76
|
+
|
|
77
|
+
# Derived features
|
|
78
|
+
if task.num_samples > 0:
|
|
79
|
+
features["samples_per_class"] = task.num_samples / max(task.num_classes, 1)
|
|
80
|
+
|
|
81
|
+
# Normalize if requested
|
|
82
|
+
if normalize:
|
|
83
|
+
features["num_samples"] /= 1000000.0 # Scale to millions
|
|
84
|
+
features["num_classes"] /= 1000.0 # Scale to thousands
|
|
85
|
+
features["input_dim"] /= 10000.0 # Scale
|
|
86
|
+
|
|
87
|
+
return features
|
|
88
|
+
|
|
89
|
+
def extract_architecture_features(
|
|
90
|
+
self, graph: ModelGraph, normalize: bool = True
|
|
91
|
+
) -> Dict[str, float]:
|
|
92
|
+
"""
|
|
93
|
+
Extract meta-features from architecture.
|
|
94
|
+
|
|
95
|
+
Args:
|
|
96
|
+
graph: Architecture graph
|
|
97
|
+
normalize: Whether to normalize features
|
|
98
|
+
|
|
99
|
+
Returns:
|
|
100
|
+
Dictionary of meta-features
|
|
101
|
+
"""
|
|
102
|
+
features = {}
|
|
103
|
+
|
|
104
|
+
# Basic structure
|
|
105
|
+
features["num_layers"] = float(len(graph.layers))
|
|
106
|
+
features["num_parameters"] = float(graph.count_parameters())
|
|
107
|
+
|
|
108
|
+
# Operation type counts
|
|
109
|
+
op_counts = Counter(layer.layer_type for layer in graph.layers)
|
|
110
|
+
|
|
111
|
+
op_types = [
|
|
112
|
+
"conv2d",
|
|
113
|
+
"maxpool2d",
|
|
114
|
+
"avgpool2d",
|
|
115
|
+
"dense",
|
|
116
|
+
"relu",
|
|
117
|
+
"tanh",
|
|
118
|
+
"sigmoid",
|
|
119
|
+
"batchnorm",
|
|
120
|
+
"dropout",
|
|
121
|
+
"flatten",
|
|
122
|
+
"input",
|
|
123
|
+
"output",
|
|
124
|
+
]
|
|
125
|
+
|
|
126
|
+
for op_type in op_types:
|
|
127
|
+
features[f"num_{op_type}"] = float(op_counts.get(op_type, 0))
|
|
128
|
+
|
|
129
|
+
# Operation diversity
|
|
130
|
+
features["num_unique_ops"] = float(len(op_counts))
|
|
131
|
+
features["operation_diversity"] = features["num_unique_ops"] / max(
|
|
132
|
+
features["num_layers"], 1
|
|
133
|
+
)
|
|
134
|
+
|
|
135
|
+
# Layer configuration statistics
|
|
136
|
+
conv_filters = []
|
|
137
|
+
conv_kernels = []
|
|
138
|
+
dense_units = []
|
|
139
|
+
dropout_rates = []
|
|
140
|
+
|
|
141
|
+
for layer in graph.layers:
|
|
142
|
+
if layer.layer_type == "conv2d":
|
|
143
|
+
conv_filters.append(layer.config.get("filters", 64))
|
|
144
|
+
conv_kernels.append(layer.config.get("kernel_size", 3))
|
|
145
|
+
elif layer.layer_type == "dense":
|
|
146
|
+
dense_units.append(layer.config.get("units", 128))
|
|
147
|
+
elif layer.layer_type == "dropout":
|
|
148
|
+
dropout_rates.append(layer.config.get("rate", 0.5))
|
|
149
|
+
|
|
150
|
+
# Convolutional layer stats
|
|
151
|
+
if conv_filters:
|
|
152
|
+
features["conv_avg_filters"] = float(np.mean(conv_filters))
|
|
153
|
+
features["conv_max_filters"] = float(np.max(conv_filters))
|
|
154
|
+
features["conv_min_filters"] = float(np.min(conv_filters))
|
|
155
|
+
features["conv_std_filters"] = float(np.std(conv_filters))
|
|
156
|
+
else:
|
|
157
|
+
features["conv_avg_filters"] = 0.0
|
|
158
|
+
features["conv_max_filters"] = 0.0
|
|
159
|
+
features["conv_min_filters"] = 0.0
|
|
160
|
+
features["conv_std_filters"] = 0.0
|
|
161
|
+
|
|
162
|
+
if conv_kernels:
|
|
163
|
+
features["conv_avg_kernel"] = float(np.mean(conv_kernels))
|
|
164
|
+
else:
|
|
165
|
+
features["conv_avg_kernel"] = 0.0
|
|
166
|
+
|
|
167
|
+
# Dense layer stats
|
|
168
|
+
if dense_units:
|
|
169
|
+
features["dense_avg_units"] = float(np.mean(dense_units))
|
|
170
|
+
features["dense_max_units"] = float(np.max(dense_units))
|
|
171
|
+
else:
|
|
172
|
+
features["dense_avg_units"] = 0.0
|
|
173
|
+
features["dense_max_units"] = 0.0
|
|
174
|
+
|
|
175
|
+
# Dropout stats
|
|
176
|
+
if dropout_rates:
|
|
177
|
+
features["avg_dropout_rate"] = float(np.mean(dropout_rates))
|
|
178
|
+
else:
|
|
179
|
+
features["avg_dropout_rate"] = 0.0
|
|
180
|
+
|
|
181
|
+
# Depth and width ratios
|
|
182
|
+
if features["num_layers"] > 0:
|
|
183
|
+
features["params_per_layer"] = features["num_parameters"] / features["num_layers"]
|
|
184
|
+
else:
|
|
185
|
+
features["params_per_layer"] = 0.0
|
|
186
|
+
|
|
187
|
+
# Normalize if requested
|
|
188
|
+
if normalize:
|
|
189
|
+
features["num_layers"] /= 100.0
|
|
190
|
+
features["num_parameters"] /= 10000000.0 # Scale to 10M
|
|
191
|
+
features["conv_avg_filters"] /= 1024.0
|
|
192
|
+
features["conv_max_filters"] /= 2048.0
|
|
193
|
+
features["dense_avg_units"] /= 4096.0
|
|
194
|
+
features["dense_max_units"] /= 8192.0
|
|
195
|
+
|
|
196
|
+
return features
|
|
197
|
+
|
|
198
|
+
def extract_combined_features(self, task: TaskMetadata, graph: ModelGraph) -> Dict[str, float]:
|
|
199
|
+
"""
|
|
200
|
+
Extract combined task and architecture features.
|
|
201
|
+
|
|
202
|
+
Args:
|
|
203
|
+
task: Task metadata
|
|
204
|
+
graph: Architecture graph
|
|
205
|
+
|
|
206
|
+
Returns:
|
|
207
|
+
Combined feature dictionary
|
|
208
|
+
"""
|
|
209
|
+
task_features = self.extract_task_features(task)
|
|
210
|
+
arch_features = self.extract_architecture_features(graph)
|
|
211
|
+
|
|
212
|
+
# Combine
|
|
213
|
+
combined = {**task_features, **arch_features}
|
|
214
|
+
|
|
215
|
+
# Add interaction features
|
|
216
|
+
if "num_classes" in task_features and "num_layers" in arch_features:
|
|
217
|
+
combined["layers_per_class"] = arch_features["num_layers"] / max(
|
|
218
|
+
task_features["num_classes"], 1
|
|
219
|
+
)
|
|
220
|
+
|
|
221
|
+
return combined
|
|
222
|
+
|
|
223
|
+
def feature_vector(self, features: Dict[str, float]) -> np.ndarray:
|
|
224
|
+
"""
|
|
225
|
+
Convert feature dict to numpy array.
|
|
226
|
+
|
|
227
|
+
Args:
|
|
228
|
+
features: Feature dictionary
|
|
229
|
+
|
|
230
|
+
Returns:
|
|
231
|
+
Feature vector
|
|
232
|
+
"""
|
|
233
|
+
# Sort keys for consistency
|
|
234
|
+
keys = sorted(features.keys())
|
|
235
|
+
values = [features[k] for k in keys]
|
|
236
|
+
|
|
237
|
+
return np.array(values, dtype=np.float32)
|
|
238
|
+
|
|
239
|
+
def compute_feature_similarity(
|
|
240
|
+
self, features1: Dict[str, float], features2: Dict[str, float]
|
|
241
|
+
) -> float:
|
|
242
|
+
"""
|
|
243
|
+
Compute cosine similarity between feature vectors.
|
|
244
|
+
|
|
245
|
+
Args:
|
|
246
|
+
features1: First feature dict
|
|
247
|
+
features2: Second feature dict
|
|
248
|
+
|
|
249
|
+
Returns:
|
|
250
|
+
Similarity score (0-1)
|
|
251
|
+
"""
|
|
252
|
+
# Get common keys
|
|
253
|
+
common_keys = sorted(set(features1.keys()) & set(features2.keys()))
|
|
254
|
+
|
|
255
|
+
if not common_keys:
|
|
256
|
+
return 0.0
|
|
257
|
+
|
|
258
|
+
# Create vectors
|
|
259
|
+
vec1 = np.array([features1[k] for k in common_keys])
|
|
260
|
+
vec2 = np.array([features2[k] for k in common_keys])
|
|
261
|
+
|
|
262
|
+
# Cosine similarity
|
|
263
|
+
similarity = np.dot(vec1, vec2) / (np.linalg.norm(vec1) * np.linalg.norm(vec2) + 1e-8)
|
|
264
|
+
|
|
265
|
+
return max(0.0, min(1.0, similarity))
|
|
@@ -0,0 +1,271 @@
|
|
|
1
|
+
"""Vector storage for fast similarity search.
|
|
2
|
+
|
|
3
|
+
Lightweight implementation without external dependencies.
|
|
4
|
+
Can be upgraded to FAISS when available.
|
|
5
|
+
|
|
6
|
+
Author: Eshan Roy <eshanized@proton.me>
|
|
7
|
+
Organization: TONMOY INFRASTRUCTURE & VISION
|
|
8
|
+
"""
|
|
9
|
+
|
|
10
|
+
import json
|
|
11
|
+
import pickle
|
|
12
|
+
from pathlib import Path
|
|
13
|
+
from typing import Any, Dict, List, Optional, Tuple
|
|
14
|
+
|
|
15
|
+
import numpy as np
|
|
16
|
+
|
|
17
|
+
from morphml.logging_config import get_logger
|
|
18
|
+
|
|
19
|
+
logger = get_logger(__name__)
|
|
20
|
+
|
|
21
|
+
# Try to import FAISS (optional)
|
|
22
|
+
try:
|
|
23
|
+
import faiss
|
|
24
|
+
|
|
25
|
+
FAISS_AVAILABLE = True
|
|
26
|
+
except ImportError:
|
|
27
|
+
FAISS_AVAILABLE = False
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
class VectorStore:
|
|
31
|
+
"""
|
|
32
|
+
Vector storage with similarity search.
|
|
33
|
+
|
|
34
|
+
Uses FAISS if available, otherwise falls back to NumPy.
|
|
35
|
+
|
|
36
|
+
Args:
|
|
37
|
+
embedding_dim: Dimension of embeddings
|
|
38
|
+
use_faiss: Whether to use FAISS (if available)
|
|
39
|
+
persist_path: Path to save/load store
|
|
40
|
+
|
|
41
|
+
Example:
|
|
42
|
+
>>> store = VectorStore(embedding_dim=128)
|
|
43
|
+
>>> store.add(embedding, metadata={'id': 'arch1'}, data='...')
|
|
44
|
+
>>> results = store.search(query_embedding, top_k=10)
|
|
45
|
+
"""
|
|
46
|
+
|
|
47
|
+
def __init__(
|
|
48
|
+
self,
|
|
49
|
+
embedding_dim: int = 128,
|
|
50
|
+
use_faiss: bool = True,
|
|
51
|
+
persist_path: Optional[str] = None,
|
|
52
|
+
):
|
|
53
|
+
"""Initialize vector store."""
|
|
54
|
+
self.embedding_dim = embedding_dim
|
|
55
|
+
self.use_faiss = use_faiss and FAISS_AVAILABLE
|
|
56
|
+
self.persist_path = persist_path
|
|
57
|
+
|
|
58
|
+
# Storage
|
|
59
|
+
self.embeddings: List[np.ndarray] = []
|
|
60
|
+
self.metadatas: List[Dict[str, Any]] = []
|
|
61
|
+
self.data: List[Any] = []
|
|
62
|
+
|
|
63
|
+
# FAISS index
|
|
64
|
+
self.index = None
|
|
65
|
+
if self.use_faiss:
|
|
66
|
+
self.index = faiss.IndexFlatL2(embedding_dim)
|
|
67
|
+
logger.info(f"Initialized VectorStore with FAISS (dim={embedding_dim})")
|
|
68
|
+
else:
|
|
69
|
+
logger.info(f"Initialized VectorStore with NumPy (dim={embedding_dim})")
|
|
70
|
+
|
|
71
|
+
# Load if path exists
|
|
72
|
+
if persist_path and Path(persist_path).exists():
|
|
73
|
+
self.load(persist_path)
|
|
74
|
+
|
|
75
|
+
def add(
|
|
76
|
+
self,
|
|
77
|
+
embedding: np.ndarray,
|
|
78
|
+
metadata: Dict[str, Any],
|
|
79
|
+
data: Any = None,
|
|
80
|
+
) -> int:
|
|
81
|
+
"""
|
|
82
|
+
Add item to store.
|
|
83
|
+
|
|
84
|
+
Args:
|
|
85
|
+
embedding: Embedding vector
|
|
86
|
+
metadata: Metadata dict
|
|
87
|
+
data: Associated data (e.g., ModelGraph)
|
|
88
|
+
|
|
89
|
+
Returns:
|
|
90
|
+
ID of added item
|
|
91
|
+
"""
|
|
92
|
+
# Validate embedding
|
|
93
|
+
if embedding.shape[0] != self.embedding_dim:
|
|
94
|
+
raise ValueError(f"Embedding dimension {embedding.shape[0]} != {self.embedding_dim}")
|
|
95
|
+
|
|
96
|
+
# Normalize embedding
|
|
97
|
+
norm = np.linalg.norm(embedding)
|
|
98
|
+
if norm > 0:
|
|
99
|
+
embedding = embedding / norm
|
|
100
|
+
|
|
101
|
+
# Add to storage
|
|
102
|
+
idx = len(self.embeddings)
|
|
103
|
+
self.embeddings.append(embedding)
|
|
104
|
+
self.metadatas.append(metadata)
|
|
105
|
+
self.data.append(data)
|
|
106
|
+
|
|
107
|
+
# Add to FAISS index
|
|
108
|
+
if self.use_faiss and self.index is not None:
|
|
109
|
+
self.index.add(embedding.reshape(1, -1).astype(np.float32))
|
|
110
|
+
|
|
111
|
+
logger.debug(f"Added item {idx} to vector store")
|
|
112
|
+
|
|
113
|
+
return idx
|
|
114
|
+
|
|
115
|
+
def search(
|
|
116
|
+
self,
|
|
117
|
+
query_embedding: np.ndarray,
|
|
118
|
+
top_k: int = 10,
|
|
119
|
+
filter_fn: Optional[callable] = None,
|
|
120
|
+
) -> List[Tuple[int, float, Dict[str, Any], Any]]:
|
|
121
|
+
"""
|
|
122
|
+
Search for similar items.
|
|
123
|
+
|
|
124
|
+
Args:
|
|
125
|
+
query_embedding: Query vector
|
|
126
|
+
top_k: Number of results
|
|
127
|
+
filter_fn: Optional filter function on metadata
|
|
128
|
+
|
|
129
|
+
Returns:
|
|
130
|
+
List of (id, distance, metadata, data) tuples
|
|
131
|
+
"""
|
|
132
|
+
if len(self.embeddings) == 0:
|
|
133
|
+
return []
|
|
134
|
+
|
|
135
|
+
# Normalize query
|
|
136
|
+
norm = np.linalg.norm(query_embedding)
|
|
137
|
+
if norm > 0:
|
|
138
|
+
query_embedding = query_embedding / norm
|
|
139
|
+
|
|
140
|
+
if self.use_faiss and self.index is not None:
|
|
141
|
+
# FAISS search
|
|
142
|
+
distances, indices = self.index.search(
|
|
143
|
+
query_embedding.reshape(1, -1).astype(np.float32),
|
|
144
|
+
min(top_k * 2, len(self.embeddings)), # Get extra for filtering
|
|
145
|
+
)
|
|
146
|
+
|
|
147
|
+
candidates = [
|
|
148
|
+
(int(indices[0, i]), float(distances[0, i]))
|
|
149
|
+
for i in range(len(indices[0]))
|
|
150
|
+
if indices[0, i] >= 0
|
|
151
|
+
]
|
|
152
|
+
else:
|
|
153
|
+
# NumPy search
|
|
154
|
+
embeddings_array = np.array(self.embeddings)
|
|
155
|
+
|
|
156
|
+
# Compute distances (L2)
|
|
157
|
+
distances = np.linalg.norm(embeddings_array - query_embedding, axis=1)
|
|
158
|
+
|
|
159
|
+
# Get top candidates
|
|
160
|
+
top_indices = np.argsort(distances)[: top_k * 2]
|
|
161
|
+
candidates = [(int(idx), float(distances[idx])) for idx in top_indices]
|
|
162
|
+
|
|
163
|
+
# Apply filter
|
|
164
|
+
results = []
|
|
165
|
+
for idx, distance in candidates:
|
|
166
|
+
metadata = self.metadatas[idx]
|
|
167
|
+
|
|
168
|
+
# Apply filter
|
|
169
|
+
if filter_fn is not None and not filter_fn(metadata):
|
|
170
|
+
continue
|
|
171
|
+
|
|
172
|
+
results.append((idx, distance, metadata, self.data[idx]))
|
|
173
|
+
|
|
174
|
+
if len(results) >= top_k:
|
|
175
|
+
break
|
|
176
|
+
|
|
177
|
+
return results
|
|
178
|
+
|
|
179
|
+
def get(self, idx: int) -> Tuple[np.ndarray, Dict[str, Any], Any]:
|
|
180
|
+
"""
|
|
181
|
+
Get item by ID.
|
|
182
|
+
|
|
183
|
+
Args:
|
|
184
|
+
idx: Item ID
|
|
185
|
+
|
|
186
|
+
Returns:
|
|
187
|
+
(embedding, metadata, data) tuple
|
|
188
|
+
"""
|
|
189
|
+
if idx < 0 or idx >= len(self.embeddings):
|
|
190
|
+
raise IndexError(f"Invalid index: {idx}")
|
|
191
|
+
|
|
192
|
+
return self.embeddings[idx], self.metadatas[idx], self.data[idx]
|
|
193
|
+
|
|
194
|
+
def update_metadata(self, idx: int, metadata: Dict[str, Any]) -> None:
|
|
195
|
+
"""Update metadata for an item."""
|
|
196
|
+
if idx < 0 or idx >= len(self.embeddings):
|
|
197
|
+
raise IndexError(f"Invalid index: {idx}")
|
|
198
|
+
|
|
199
|
+
self.metadatas[idx] = metadata
|
|
200
|
+
|
|
201
|
+
def size(self) -> int:
|
|
202
|
+
"""Get number of items in store."""
|
|
203
|
+
return len(self.embeddings)
|
|
204
|
+
|
|
205
|
+
def save(self, path: str) -> None:
|
|
206
|
+
"""
|
|
207
|
+
Save store to disk.
|
|
208
|
+
|
|
209
|
+
Args:
|
|
210
|
+
path: Directory path to save to
|
|
211
|
+
"""
|
|
212
|
+
path = Path(path)
|
|
213
|
+
path.mkdir(parents=True, exist_ok=True)
|
|
214
|
+
|
|
215
|
+
# Save embeddings
|
|
216
|
+
np.save(path / "embeddings.npy", np.array(self.embeddings))
|
|
217
|
+
|
|
218
|
+
# Save metadatas
|
|
219
|
+
with open(path / "metadatas.json", "w") as f:
|
|
220
|
+
json.dump(self.metadatas, f)
|
|
221
|
+
|
|
222
|
+
# Save data (pickle)
|
|
223
|
+
with open(path / "data.pkl", "wb") as f:
|
|
224
|
+
pickle.dump(self.data, f)
|
|
225
|
+
|
|
226
|
+
# Save FAISS index if available
|
|
227
|
+
if self.use_faiss and self.index is not None:
|
|
228
|
+
faiss.write_index(self.index, str(path / "faiss.index"))
|
|
229
|
+
|
|
230
|
+
logger.info(f"Saved vector store to {path}")
|
|
231
|
+
|
|
232
|
+
def load(self, path: str) -> None:
|
|
233
|
+
"""
|
|
234
|
+
Load store from disk.
|
|
235
|
+
|
|
236
|
+
Args:
|
|
237
|
+
path: Directory path to load from
|
|
238
|
+
"""
|
|
239
|
+
path = Path(path)
|
|
240
|
+
|
|
241
|
+
if not path.exists():
|
|
242
|
+
raise FileNotFoundError(f"Store not found: {path}")
|
|
243
|
+
|
|
244
|
+
# Load embeddings
|
|
245
|
+
embeddings_array = np.load(path / "embeddings.npy")
|
|
246
|
+
self.embeddings = [embeddings_array[i] for i in range(len(embeddings_array))]
|
|
247
|
+
|
|
248
|
+
# Load metadatas
|
|
249
|
+
with open(path / "metadatas.json", "r") as f:
|
|
250
|
+
self.metadatas = json.load(f)
|
|
251
|
+
|
|
252
|
+
# Load data
|
|
253
|
+
with open(path / "data.pkl", "rb") as f:
|
|
254
|
+
self.data = pickle.load(f)
|
|
255
|
+
|
|
256
|
+
# Load FAISS index if available
|
|
257
|
+
if self.use_faiss and (path / "faiss.index").exists():
|
|
258
|
+
self.index = faiss.read_index(str(path / "faiss.index"))
|
|
259
|
+
|
|
260
|
+
logger.info(f"Loaded vector store from {path} ({len(self.embeddings)} items)")
|
|
261
|
+
|
|
262
|
+
def clear(self) -> None:
|
|
263
|
+
"""Clear all items from store."""
|
|
264
|
+
self.embeddings = []
|
|
265
|
+
self.metadatas = []
|
|
266
|
+
self.data = []
|
|
267
|
+
|
|
268
|
+
if self.use_faiss:
|
|
269
|
+
self.index = faiss.IndexFlatL2(self.embedding_dim)
|
|
270
|
+
|
|
271
|
+
logger.info("Cleared vector store")
|
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
"""Performance predictors for architecture evaluation.
|
|
2
|
+
|
|
3
|
+
Provides fast performance estimation without full training:
|
|
4
|
+
- Proxy metrics (parameters, FLOPs, depth)
|
|
5
|
+
- Learning curve extrapolation
|
|
6
|
+
- GNN-based prediction (when PyTorch available)
|
|
7
|
+
- Ensemble methods
|
|
8
|
+
|
|
9
|
+
Author: Eshan Roy <eshanized@proton.me>
|
|
10
|
+
Organization: TONMOY INFRASTRUCTURE & VISION
|
|
11
|
+
"""
|
|
12
|
+
|
|
13
|
+
from morphml.meta_learning.predictors.ensemble import EnsemblePredictor
|
|
14
|
+
from morphml.meta_learning.predictors.learning_curve import LearningCurvePredictor
|
|
15
|
+
from morphml.meta_learning.predictors.proxy_metrics import ProxyMetricPredictor
|
|
16
|
+
|
|
17
|
+
__all__ = [
|
|
18
|
+
"ProxyMetricPredictor",
|
|
19
|
+
"LearningCurvePredictor",
|
|
20
|
+
"EnsemblePredictor",
|
|
21
|
+
]
|
|
22
|
+
|
|
23
|
+
# Optional GNN predictor (requires PyTorch)
|
|
24
|
+
try:
|
|
25
|
+
__all__.append("GNNPredictor")
|
|
26
|
+
except ImportError:
|
|
27
|
+
pass
|