morphml 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of morphml might be problematic. Click here for more details.

Files changed (158) hide show
  1. morphml/__init__.py +14 -0
  2. morphml/api/__init__.py +26 -0
  3. morphml/api/app.py +326 -0
  4. morphml/api/auth.py +193 -0
  5. morphml/api/client.py +338 -0
  6. morphml/api/models.py +132 -0
  7. morphml/api/rate_limit.py +192 -0
  8. morphml/benchmarking/__init__.py +36 -0
  9. morphml/benchmarking/comparison.py +430 -0
  10. morphml/benchmarks/__init__.py +56 -0
  11. morphml/benchmarks/comparator.py +409 -0
  12. morphml/benchmarks/datasets.py +280 -0
  13. morphml/benchmarks/metrics.py +199 -0
  14. morphml/benchmarks/openml_suite.py +201 -0
  15. morphml/benchmarks/problems.py +289 -0
  16. morphml/benchmarks/suite.py +318 -0
  17. morphml/cli/__init__.py +5 -0
  18. morphml/cli/commands/experiment.py +329 -0
  19. morphml/cli/main.py +457 -0
  20. morphml/cli/quickstart.py +312 -0
  21. morphml/config.py +278 -0
  22. morphml/constraints/__init__.py +19 -0
  23. morphml/constraints/handler.py +205 -0
  24. morphml/constraints/predicates.py +285 -0
  25. morphml/core/__init__.py +3 -0
  26. morphml/core/crossover.py +449 -0
  27. morphml/core/dsl/README.md +359 -0
  28. morphml/core/dsl/__init__.py +72 -0
  29. morphml/core/dsl/ast_nodes.py +364 -0
  30. morphml/core/dsl/compiler.py +318 -0
  31. morphml/core/dsl/layers.py +368 -0
  32. morphml/core/dsl/lexer.py +336 -0
  33. morphml/core/dsl/parser.py +455 -0
  34. morphml/core/dsl/search_space.py +386 -0
  35. morphml/core/dsl/syntax.py +199 -0
  36. morphml/core/dsl/type_system.py +361 -0
  37. morphml/core/dsl/validator.py +386 -0
  38. morphml/core/graph/__init__.py +40 -0
  39. morphml/core/graph/edge.py +124 -0
  40. morphml/core/graph/graph.py +507 -0
  41. morphml/core/graph/mutations.py +409 -0
  42. morphml/core/graph/node.py +196 -0
  43. morphml/core/graph/serialization.py +361 -0
  44. morphml/core/graph/visualization.py +431 -0
  45. morphml/core/objectives/__init__.py +20 -0
  46. morphml/core/search/__init__.py +33 -0
  47. morphml/core/search/individual.py +252 -0
  48. morphml/core/search/parameters.py +453 -0
  49. morphml/core/search/population.py +375 -0
  50. morphml/core/search/search_engine.py +340 -0
  51. morphml/distributed/__init__.py +76 -0
  52. morphml/distributed/fault_tolerance.py +497 -0
  53. morphml/distributed/health_monitor.py +348 -0
  54. morphml/distributed/master.py +709 -0
  55. morphml/distributed/proto/README.md +224 -0
  56. morphml/distributed/proto/__init__.py +74 -0
  57. morphml/distributed/proto/worker.proto +170 -0
  58. morphml/distributed/proto/worker_pb2.py +79 -0
  59. morphml/distributed/proto/worker_pb2_grpc.py +423 -0
  60. morphml/distributed/resource_manager.py +416 -0
  61. morphml/distributed/scheduler.py +567 -0
  62. morphml/distributed/storage/__init__.py +33 -0
  63. morphml/distributed/storage/artifacts.py +381 -0
  64. morphml/distributed/storage/cache.py +366 -0
  65. morphml/distributed/storage/checkpointing.py +329 -0
  66. morphml/distributed/storage/database.py +459 -0
  67. morphml/distributed/worker.py +549 -0
  68. morphml/evaluation/__init__.py +5 -0
  69. morphml/evaluation/heuristic.py +237 -0
  70. morphml/exceptions.py +55 -0
  71. morphml/execution/__init__.py +5 -0
  72. morphml/execution/local_executor.py +350 -0
  73. morphml/integrations/__init__.py +28 -0
  74. morphml/integrations/jax_adapter.py +206 -0
  75. morphml/integrations/pytorch_adapter.py +530 -0
  76. morphml/integrations/sklearn_adapter.py +206 -0
  77. morphml/integrations/tensorflow_adapter.py +230 -0
  78. morphml/logging_config.py +93 -0
  79. morphml/meta_learning/__init__.py +66 -0
  80. morphml/meta_learning/architecture_similarity.py +277 -0
  81. morphml/meta_learning/experiment_database.py +240 -0
  82. morphml/meta_learning/knowledge_base/__init__.py +19 -0
  83. morphml/meta_learning/knowledge_base/embedder.py +179 -0
  84. morphml/meta_learning/knowledge_base/knowledge_base.py +313 -0
  85. morphml/meta_learning/knowledge_base/meta_features.py +265 -0
  86. morphml/meta_learning/knowledge_base/vector_store.py +271 -0
  87. morphml/meta_learning/predictors/__init__.py +27 -0
  88. morphml/meta_learning/predictors/ensemble.py +221 -0
  89. morphml/meta_learning/predictors/gnn_predictor.py +552 -0
  90. morphml/meta_learning/predictors/learning_curve.py +231 -0
  91. morphml/meta_learning/predictors/proxy_metrics.py +261 -0
  92. morphml/meta_learning/strategy_evolution/__init__.py +27 -0
  93. morphml/meta_learning/strategy_evolution/adaptive_optimizer.py +226 -0
  94. morphml/meta_learning/strategy_evolution/bandit.py +276 -0
  95. morphml/meta_learning/strategy_evolution/portfolio.py +230 -0
  96. morphml/meta_learning/transfer.py +581 -0
  97. morphml/meta_learning/warm_start.py +286 -0
  98. morphml/optimizers/__init__.py +74 -0
  99. morphml/optimizers/adaptive_operators.py +399 -0
  100. morphml/optimizers/bayesian/__init__.py +52 -0
  101. morphml/optimizers/bayesian/acquisition.py +387 -0
  102. morphml/optimizers/bayesian/base.py +319 -0
  103. morphml/optimizers/bayesian/gaussian_process.py +635 -0
  104. morphml/optimizers/bayesian/smac.py +534 -0
  105. morphml/optimizers/bayesian/tpe.py +411 -0
  106. morphml/optimizers/differential_evolution.py +220 -0
  107. morphml/optimizers/evolutionary/__init__.py +61 -0
  108. morphml/optimizers/evolutionary/cma_es.py +416 -0
  109. morphml/optimizers/evolutionary/differential_evolution.py +556 -0
  110. morphml/optimizers/evolutionary/encoding.py +426 -0
  111. morphml/optimizers/evolutionary/particle_swarm.py +449 -0
  112. morphml/optimizers/genetic_algorithm.py +486 -0
  113. morphml/optimizers/gradient_based/__init__.py +22 -0
  114. morphml/optimizers/gradient_based/darts.py +550 -0
  115. morphml/optimizers/gradient_based/enas.py +585 -0
  116. morphml/optimizers/gradient_based/operations.py +474 -0
  117. morphml/optimizers/gradient_based/utils.py +601 -0
  118. morphml/optimizers/hill_climbing.py +169 -0
  119. morphml/optimizers/multi_objective/__init__.py +56 -0
  120. morphml/optimizers/multi_objective/indicators.py +504 -0
  121. morphml/optimizers/multi_objective/nsga2.py +647 -0
  122. morphml/optimizers/multi_objective/visualization.py +427 -0
  123. morphml/optimizers/nsga2.py +308 -0
  124. morphml/optimizers/random_search.py +172 -0
  125. morphml/optimizers/simulated_annealing.py +181 -0
  126. morphml/plugins/__init__.py +35 -0
  127. morphml/plugins/custom_evaluator_example.py +81 -0
  128. morphml/plugins/custom_optimizer_example.py +63 -0
  129. morphml/plugins/plugin_system.py +454 -0
  130. morphml/reports/__init__.py +30 -0
  131. morphml/reports/generator.py +362 -0
  132. morphml/tracking/__init__.py +7 -0
  133. morphml/tracking/experiment.py +309 -0
  134. morphml/tracking/logger.py +301 -0
  135. morphml/tracking/reporter.py +357 -0
  136. morphml/utils/__init__.py +6 -0
  137. morphml/utils/checkpoint.py +189 -0
  138. morphml/utils/comparison.py +390 -0
  139. morphml/utils/export.py +407 -0
  140. morphml/utils/progress.py +392 -0
  141. morphml/utils/validation.py +392 -0
  142. morphml/version.py +7 -0
  143. morphml/visualization/__init__.py +50 -0
  144. morphml/visualization/analytics.py +423 -0
  145. morphml/visualization/architecture_diagrams.py +353 -0
  146. morphml/visualization/architecture_plot.py +223 -0
  147. morphml/visualization/convergence_plot.py +174 -0
  148. morphml/visualization/crossover_viz.py +386 -0
  149. morphml/visualization/graph_viz.py +338 -0
  150. morphml/visualization/pareto_plot.py +149 -0
  151. morphml/visualization/plotly_dashboards.py +422 -0
  152. morphml/visualization/population.py +309 -0
  153. morphml/visualization/progress.py +260 -0
  154. morphml-1.0.0.dist-info/METADATA +434 -0
  155. morphml-1.0.0.dist-info/RECORD +158 -0
  156. morphml-1.0.0.dist-info/WHEEL +4 -0
  157. morphml-1.0.0.dist-info/entry_points.txt +3 -0
  158. morphml-1.0.0.dist-info/licenses/LICENSE +21 -0
@@ -0,0 +1,416 @@
1
+ """Resource management for distributed workers.
2
+
3
+ Tracks and manages computational resources across worker nodes:
4
+ - GPU availability and memory
5
+ - CPU utilization
6
+ - Memory usage
7
+ - Task placement based on requirements
8
+
9
+ Author: Eshan Roy <eshanized@proton.me>
10
+ Organization: TONMOY INFRASTRUCTURE & VISION
11
+ """
12
+
13
+ from dataclasses import dataclass, field
14
+ from typing import Any, Dict, List, Optional
15
+
16
+ from morphml.logging_config import get_logger
17
+
18
+ logger = get_logger(__name__)
19
+
20
+
21
+ @dataclass
22
+ class WorkerResources:
23
+ """
24
+ Worker computational resources.
25
+
26
+ Tracks available and total resources for a worker node.
27
+
28
+ Attributes:
29
+ worker_id: Unique worker identifier
30
+ total_gpus: Total number of GPUs
31
+ available_gpus: Number of available GPUs
32
+ gpu_memory_total: Total GPU memory per GPU (GB)
33
+ gpu_memory_available: Available GPU memory (GB)
34
+ cpu_percent: CPU utilization percentage
35
+ memory_percent: RAM utilization percentage
36
+ network_bandwidth: Network bandwidth (Mbps)
37
+ """
38
+
39
+ worker_id: str
40
+ total_gpus: int = 0
41
+ available_gpus: int = 0
42
+ gpu_memory_total: float = 0.0 # GB
43
+ gpu_memory_available: float = 0.0 # GB
44
+ cpu_percent: float = 0.0
45
+ memory_percent: float = 0.0
46
+ network_bandwidth: float = 1000.0 # Mbps
47
+ metadata: Dict[str, Any] = field(default_factory=dict)
48
+
49
+ @property
50
+ def gpu_utilization(self) -> float:
51
+ """Calculate GPU utilization percentage."""
52
+ if self.total_gpus == 0:
53
+ return 0.0
54
+ return (1.0 - self.available_gpus / self.total_gpus) * 100
55
+
56
+ @property
57
+ def gpu_memory_utilization(self) -> float:
58
+ """Calculate GPU memory utilization percentage."""
59
+ if self.gpu_memory_total == 0:
60
+ return 0.0
61
+ return (1.0 - self.gpu_memory_available / self.gpu_memory_total) * 100
62
+
63
+ def can_run_task(self, requirements: "TaskRequirements") -> bool:
64
+ """
65
+ Check if worker can run task with given requirements.
66
+
67
+ Args:
68
+ requirements: Task resource requirements
69
+
70
+ Returns:
71
+ True if worker has sufficient resources
72
+ """
73
+ return (
74
+ self.available_gpus >= requirements.min_gpus
75
+ and self.gpu_memory_available >= requirements.min_gpu_memory
76
+ and self.memory_percent < 90.0 # Don't overload memory
77
+ )
78
+
79
+ def allocate(self, requirements: "TaskRequirements") -> bool:
80
+ """
81
+ Allocate resources for task.
82
+
83
+ Args:
84
+ requirements: Task requirements
85
+
86
+ Returns:
87
+ True if allocation successful
88
+ """
89
+ if not self.can_run_task(requirements):
90
+ return False
91
+
92
+ self.available_gpus -= requirements.min_gpus
93
+ self.gpu_memory_available -= requirements.min_gpu_memory
94
+
95
+ return True
96
+
97
+ def release(self, requirements: "TaskRequirements") -> None:
98
+ """
99
+ Release allocated resources.
100
+
101
+ Args:
102
+ requirements: Task requirements to release
103
+ """
104
+ self.available_gpus = min(self.total_gpus, self.available_gpus + requirements.min_gpus)
105
+ self.gpu_memory_available = min(
106
+ self.gpu_memory_total,
107
+ self.gpu_memory_available + requirements.min_gpu_memory,
108
+ )
109
+
110
+
111
+ @dataclass
112
+ class TaskRequirements:
113
+ """
114
+ Task resource requirements.
115
+
116
+ Specifies minimum resources needed to execute a task.
117
+
118
+ Attributes:
119
+ min_gpus: Minimum number of GPUs
120
+ min_gpu_memory: Minimum GPU memory per GPU (GB)
121
+ min_cpu_cores: Minimum CPU cores
122
+ min_memory: Minimum RAM (GB)
123
+ estimated_time: Estimated execution time (seconds)
124
+ priority: Task priority (higher = more important)
125
+ """
126
+
127
+ min_gpus: int = 1
128
+ min_gpu_memory: float = 2.0 # GB
129
+ min_cpu_cores: int = 1
130
+ min_memory: float = 4.0 # GB
131
+ estimated_time: float = 300.0 # seconds
132
+ priority: float = 1.0
133
+
134
+ def __post_init__(self) -> None:
135
+ """Validate requirements."""
136
+ if self.min_gpus < 0:
137
+ raise ValueError("min_gpus must be >= 0")
138
+ if self.min_gpu_memory < 0:
139
+ raise ValueError("min_gpu_memory must be >= 0")
140
+ if self.estimated_time < 0:
141
+ raise ValueError("estimated_time must be >= 0")
142
+
143
+
144
+ class ResourceManager:
145
+ """
146
+ Manage computational resources across workers.
147
+
148
+ Tracks resource availability and helps with intelligent task placement.
149
+
150
+ Example:
151
+ >>> manager = ResourceManager()
152
+ >>> manager.update_resources('worker-1', {
153
+ ... 'total_gpus': 4,
154
+ ... 'available_gpus': 3,
155
+ ... 'gpu_memory_total': 16.0,
156
+ ... 'gpu_memory_available': 12.0,
157
+ ... })
158
+ >>> requirements = TaskRequirements(min_gpus=1, min_gpu_memory=4.0)
159
+ >>> worker_id = manager.find_suitable_worker(requirements)
160
+ """
161
+
162
+ def __init__(self) -> None:
163
+ """Initialize resource manager."""
164
+ self.resources: Dict[str, WorkerResources] = {}
165
+ self.allocation_history: List[Dict[str, Any]] = []
166
+
167
+ def register_worker(self, worker_id: str, resources: Dict[str, Any]) -> None:
168
+ """
169
+ Register worker with initial resources.
170
+
171
+ Args:
172
+ worker_id: Unique worker identifier
173
+ resources: Initial resource state
174
+ """
175
+ self.resources[worker_id] = WorkerResources(worker_id=worker_id, **resources)
176
+
177
+ logger.info(
178
+ f"Registered worker {worker_id}: "
179
+ f"{resources.get('total_gpus', 0)} GPUs, "
180
+ f"{resources.get('gpu_memory_total', 0):.1f}GB GPU memory"
181
+ )
182
+
183
+ def update_resources(self, worker_id: str, resources: Dict[str, Any]) -> None:
184
+ """
185
+ Update worker resource information.
186
+
187
+ Args:
188
+ worker_id: Worker identifier
189
+ resources: Updated resource state
190
+ """
191
+ if worker_id not in self.resources:
192
+ self.register_worker(worker_id, resources)
193
+ else:
194
+ # Update existing
195
+ for key, value in resources.items():
196
+ if hasattr(self.resources[worker_id], key):
197
+ setattr(self.resources[worker_id], key, value)
198
+
199
+ def find_suitable_worker(
200
+ self, requirements: TaskRequirements, strategy: str = "best_fit"
201
+ ) -> Optional[str]:
202
+ """
203
+ Find worker that meets task requirements.
204
+
205
+ Args:
206
+ requirements: Task resource requirements
207
+ strategy: Placement strategy
208
+ - 'first_fit': First worker that fits
209
+ - 'best_fit': Worker with least excess capacity
210
+ - 'worst_fit': Worker with most excess capacity
211
+
212
+ Returns:
213
+ worker_id or None if no suitable worker
214
+ """
215
+ suitable_workers = [
216
+ (wid, res) for wid, res in self.resources.items() if res.can_run_task(requirements)
217
+ ]
218
+
219
+ if not suitable_workers:
220
+ logger.debug("No suitable worker found for task requirements")
221
+ return None
222
+
223
+ # Apply placement strategy
224
+ if strategy == "first_fit":
225
+ return suitable_workers[0][0]
226
+
227
+ elif strategy == "best_fit":
228
+ # Worker with least excess GPUs
229
+ best = min(
230
+ suitable_workers,
231
+ key=lambda x: x[1].available_gpus - requirements.min_gpus,
232
+ )
233
+ return best[0]
234
+
235
+ elif strategy == "worst_fit":
236
+ # Worker with most excess GPUs (best for load balancing)
237
+ worst = max(
238
+ suitable_workers,
239
+ key=lambda x: x[1].available_gpus,
240
+ )
241
+ return worst[0]
242
+
243
+ else:
244
+ raise ValueError(f"Unknown placement strategy: {strategy}")
245
+
246
+ def find_all_suitable_workers(self, requirements: TaskRequirements) -> List[str]:
247
+ """
248
+ Find all workers that meet requirements.
249
+
250
+ Args:
251
+ requirements: Task requirements
252
+
253
+ Returns:
254
+ List of suitable worker IDs
255
+ """
256
+ return [wid for wid, res in self.resources.items() if res.can_run_task(requirements)]
257
+
258
+ def allocate_resources(self, worker_id: str, requirements: TaskRequirements) -> bool:
259
+ """
260
+ Allocate resources for task on worker.
261
+
262
+ Args:
263
+ worker_id: Worker to allocate on
264
+ requirements: Resource requirements
265
+
266
+ Returns:
267
+ True if allocation successful
268
+ """
269
+ if worker_id not in self.resources:
270
+ logger.error(f"Unknown worker: {worker_id}")
271
+ return False
272
+
273
+ success = self.resources[worker_id].allocate(requirements)
274
+
275
+ if success:
276
+ self.allocation_history.append(
277
+ {
278
+ "worker_id": worker_id,
279
+ "gpus": requirements.min_gpus,
280
+ "memory": requirements.min_gpu_memory,
281
+ }
282
+ )
283
+
284
+ logger.debug(
285
+ f"Allocated resources on {worker_id}: "
286
+ f"{requirements.min_gpus} GPUs, "
287
+ f"{requirements.min_gpu_memory:.1f}GB memory"
288
+ )
289
+
290
+ return success
291
+
292
+ def release_resources(self, worker_id: str, requirements: TaskRequirements) -> None:
293
+ """
294
+ Release allocated resources.
295
+
296
+ Args:
297
+ worker_id: Worker to release resources from
298
+ requirements: Resource requirements to release
299
+ """
300
+ if worker_id in self.resources:
301
+ self.resources[worker_id].release(requirements)
302
+
303
+ logger.debug(
304
+ f"Released resources on {worker_id}: "
305
+ f"{requirements.min_gpus} GPUs, "
306
+ f"{requirements.min_gpu_memory:.1f}GB memory"
307
+ )
308
+
309
+ def get_total_resources(self) -> Dict[str, Any]:
310
+ """
311
+ Get total resources across all workers.
312
+
313
+ Returns:
314
+ Dictionary with aggregate resource statistics
315
+ """
316
+ total_gpus = sum(r.total_gpus for r in self.resources.values())
317
+ available_gpus = sum(r.available_gpus for r in self.resources.values())
318
+ total_gpu_memory = sum(r.gpu_memory_total for r in self.resources.values())
319
+ available_gpu_memory = sum(r.gpu_memory_available for r in self.resources.values())
320
+
321
+ return {
322
+ "total_workers": len(self.resources),
323
+ "total_gpus": total_gpus,
324
+ "available_gpus": available_gpus,
325
+ "gpu_utilization": (
326
+ (1.0 - available_gpus / total_gpus) * 100 if total_gpus > 0 else 0.0
327
+ ),
328
+ "total_gpu_memory": total_gpu_memory,
329
+ "available_gpu_memory": available_gpu_memory,
330
+ "memory_utilization": (
331
+ (1.0 - available_gpu_memory / total_gpu_memory) * 100
332
+ if total_gpu_memory > 0
333
+ else 0.0
334
+ ),
335
+ }
336
+
337
+ def get_worker_resources(self, worker_id: str) -> Optional[WorkerResources]:
338
+ """
339
+ Get resources for specific worker.
340
+
341
+ Args:
342
+ worker_id: Worker identifier
343
+
344
+ Returns:
345
+ WorkerResources or None if worker not found
346
+ """
347
+ return self.resources.get(worker_id)
348
+
349
+ def get_statistics(self) -> Dict[str, Any]:
350
+ """Get resource manager statistics."""
351
+ stats = self.get_total_resources()
352
+ stats["allocations"] = len(self.allocation_history)
353
+ stats["worker_details"] = {
354
+ wid: {
355
+ "total_gpus": res.total_gpus,
356
+ "available_gpus": res.available_gpus,
357
+ "gpu_utilization": res.gpu_utilization,
358
+ "cpu_percent": res.cpu_percent,
359
+ "memory_percent": res.memory_percent,
360
+ }
361
+ for wid, res in self.resources.items()
362
+ }
363
+ return stats
364
+
365
+
366
+ class GPUAffinityManager:
367
+ """
368
+ Manage GPU affinity for tasks.
369
+
370
+ Ensures tasks are pinned to specific GPUs for better performance.
371
+ """
372
+
373
+ def __init__(self) -> None:
374
+ """Initialize GPU affinity manager."""
375
+ self.gpu_assignments: Dict[str, List[int]] = {}
376
+
377
+ def assign_gpus(self, worker_id: str, task_id: str, gpu_ids: List[int]) -> None:
378
+ """
379
+ Assign specific GPUs to task.
380
+
381
+ Args:
382
+ worker_id: Worker identifier
383
+ task_id: Task identifier
384
+ gpu_ids: List of GPU IDs to assign
385
+ """
386
+ key = f"{worker_id}:{task_id}"
387
+ self.gpu_assignments[key] = gpu_ids
388
+
389
+ logger.debug(f"Assigned GPUs {gpu_ids} to task {task_id} on {worker_id}")
390
+
391
+ def get_assigned_gpus(self, worker_id: str, task_id: str) -> Optional[List[int]]:
392
+ """
393
+ Get assigned GPUs for task.
394
+
395
+ Args:
396
+ worker_id: Worker identifier
397
+ task_id: Task identifier
398
+
399
+ Returns:
400
+ List of GPU IDs or None
401
+ """
402
+ key = f"{worker_id}:{task_id}"
403
+ return self.gpu_assignments.get(key)
404
+
405
+ def release_gpus(self, worker_id: str, task_id: str) -> None:
406
+ """
407
+ Release GPU assignment.
408
+
409
+ Args:
410
+ worker_id: Worker identifier
411
+ task_id: Task identifier
412
+ """
413
+ key = f"{worker_id}:{task_id}"
414
+ if key in self.gpu_assignments:
415
+ del self.gpu_assignments[key]
416
+ logger.debug(f"Released GPU assignment for task {task_id}")