morphml 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of morphml might be problematic. Click here for more details.

Files changed (158) hide show
  1. morphml/__init__.py +14 -0
  2. morphml/api/__init__.py +26 -0
  3. morphml/api/app.py +326 -0
  4. morphml/api/auth.py +193 -0
  5. morphml/api/client.py +338 -0
  6. morphml/api/models.py +132 -0
  7. morphml/api/rate_limit.py +192 -0
  8. morphml/benchmarking/__init__.py +36 -0
  9. morphml/benchmarking/comparison.py +430 -0
  10. morphml/benchmarks/__init__.py +56 -0
  11. morphml/benchmarks/comparator.py +409 -0
  12. morphml/benchmarks/datasets.py +280 -0
  13. morphml/benchmarks/metrics.py +199 -0
  14. morphml/benchmarks/openml_suite.py +201 -0
  15. morphml/benchmarks/problems.py +289 -0
  16. morphml/benchmarks/suite.py +318 -0
  17. morphml/cli/__init__.py +5 -0
  18. morphml/cli/commands/experiment.py +329 -0
  19. morphml/cli/main.py +457 -0
  20. morphml/cli/quickstart.py +312 -0
  21. morphml/config.py +278 -0
  22. morphml/constraints/__init__.py +19 -0
  23. morphml/constraints/handler.py +205 -0
  24. morphml/constraints/predicates.py +285 -0
  25. morphml/core/__init__.py +3 -0
  26. morphml/core/crossover.py +449 -0
  27. morphml/core/dsl/README.md +359 -0
  28. morphml/core/dsl/__init__.py +72 -0
  29. morphml/core/dsl/ast_nodes.py +364 -0
  30. morphml/core/dsl/compiler.py +318 -0
  31. morphml/core/dsl/layers.py +368 -0
  32. morphml/core/dsl/lexer.py +336 -0
  33. morphml/core/dsl/parser.py +455 -0
  34. morphml/core/dsl/search_space.py +386 -0
  35. morphml/core/dsl/syntax.py +199 -0
  36. morphml/core/dsl/type_system.py +361 -0
  37. morphml/core/dsl/validator.py +386 -0
  38. morphml/core/graph/__init__.py +40 -0
  39. morphml/core/graph/edge.py +124 -0
  40. morphml/core/graph/graph.py +507 -0
  41. morphml/core/graph/mutations.py +409 -0
  42. morphml/core/graph/node.py +196 -0
  43. morphml/core/graph/serialization.py +361 -0
  44. morphml/core/graph/visualization.py +431 -0
  45. morphml/core/objectives/__init__.py +20 -0
  46. morphml/core/search/__init__.py +33 -0
  47. morphml/core/search/individual.py +252 -0
  48. morphml/core/search/parameters.py +453 -0
  49. morphml/core/search/population.py +375 -0
  50. morphml/core/search/search_engine.py +340 -0
  51. morphml/distributed/__init__.py +76 -0
  52. morphml/distributed/fault_tolerance.py +497 -0
  53. morphml/distributed/health_monitor.py +348 -0
  54. morphml/distributed/master.py +709 -0
  55. morphml/distributed/proto/README.md +224 -0
  56. morphml/distributed/proto/__init__.py +74 -0
  57. morphml/distributed/proto/worker.proto +170 -0
  58. morphml/distributed/proto/worker_pb2.py +79 -0
  59. morphml/distributed/proto/worker_pb2_grpc.py +423 -0
  60. morphml/distributed/resource_manager.py +416 -0
  61. morphml/distributed/scheduler.py +567 -0
  62. morphml/distributed/storage/__init__.py +33 -0
  63. morphml/distributed/storage/artifacts.py +381 -0
  64. morphml/distributed/storage/cache.py +366 -0
  65. morphml/distributed/storage/checkpointing.py +329 -0
  66. morphml/distributed/storage/database.py +459 -0
  67. morphml/distributed/worker.py +549 -0
  68. morphml/evaluation/__init__.py +5 -0
  69. morphml/evaluation/heuristic.py +237 -0
  70. morphml/exceptions.py +55 -0
  71. morphml/execution/__init__.py +5 -0
  72. morphml/execution/local_executor.py +350 -0
  73. morphml/integrations/__init__.py +28 -0
  74. morphml/integrations/jax_adapter.py +206 -0
  75. morphml/integrations/pytorch_adapter.py +530 -0
  76. morphml/integrations/sklearn_adapter.py +206 -0
  77. morphml/integrations/tensorflow_adapter.py +230 -0
  78. morphml/logging_config.py +93 -0
  79. morphml/meta_learning/__init__.py +66 -0
  80. morphml/meta_learning/architecture_similarity.py +277 -0
  81. morphml/meta_learning/experiment_database.py +240 -0
  82. morphml/meta_learning/knowledge_base/__init__.py +19 -0
  83. morphml/meta_learning/knowledge_base/embedder.py +179 -0
  84. morphml/meta_learning/knowledge_base/knowledge_base.py +313 -0
  85. morphml/meta_learning/knowledge_base/meta_features.py +265 -0
  86. morphml/meta_learning/knowledge_base/vector_store.py +271 -0
  87. morphml/meta_learning/predictors/__init__.py +27 -0
  88. morphml/meta_learning/predictors/ensemble.py +221 -0
  89. morphml/meta_learning/predictors/gnn_predictor.py +552 -0
  90. morphml/meta_learning/predictors/learning_curve.py +231 -0
  91. morphml/meta_learning/predictors/proxy_metrics.py +261 -0
  92. morphml/meta_learning/strategy_evolution/__init__.py +27 -0
  93. morphml/meta_learning/strategy_evolution/adaptive_optimizer.py +226 -0
  94. morphml/meta_learning/strategy_evolution/bandit.py +276 -0
  95. morphml/meta_learning/strategy_evolution/portfolio.py +230 -0
  96. morphml/meta_learning/transfer.py +581 -0
  97. morphml/meta_learning/warm_start.py +286 -0
  98. morphml/optimizers/__init__.py +74 -0
  99. morphml/optimizers/adaptive_operators.py +399 -0
  100. morphml/optimizers/bayesian/__init__.py +52 -0
  101. morphml/optimizers/bayesian/acquisition.py +387 -0
  102. morphml/optimizers/bayesian/base.py +319 -0
  103. morphml/optimizers/bayesian/gaussian_process.py +635 -0
  104. morphml/optimizers/bayesian/smac.py +534 -0
  105. morphml/optimizers/bayesian/tpe.py +411 -0
  106. morphml/optimizers/differential_evolution.py +220 -0
  107. morphml/optimizers/evolutionary/__init__.py +61 -0
  108. morphml/optimizers/evolutionary/cma_es.py +416 -0
  109. morphml/optimizers/evolutionary/differential_evolution.py +556 -0
  110. morphml/optimizers/evolutionary/encoding.py +426 -0
  111. morphml/optimizers/evolutionary/particle_swarm.py +449 -0
  112. morphml/optimizers/genetic_algorithm.py +486 -0
  113. morphml/optimizers/gradient_based/__init__.py +22 -0
  114. morphml/optimizers/gradient_based/darts.py +550 -0
  115. morphml/optimizers/gradient_based/enas.py +585 -0
  116. morphml/optimizers/gradient_based/operations.py +474 -0
  117. morphml/optimizers/gradient_based/utils.py +601 -0
  118. morphml/optimizers/hill_climbing.py +169 -0
  119. morphml/optimizers/multi_objective/__init__.py +56 -0
  120. morphml/optimizers/multi_objective/indicators.py +504 -0
  121. morphml/optimizers/multi_objective/nsga2.py +647 -0
  122. morphml/optimizers/multi_objective/visualization.py +427 -0
  123. morphml/optimizers/nsga2.py +308 -0
  124. morphml/optimizers/random_search.py +172 -0
  125. morphml/optimizers/simulated_annealing.py +181 -0
  126. morphml/plugins/__init__.py +35 -0
  127. morphml/plugins/custom_evaluator_example.py +81 -0
  128. morphml/plugins/custom_optimizer_example.py +63 -0
  129. morphml/plugins/plugin_system.py +454 -0
  130. morphml/reports/__init__.py +30 -0
  131. morphml/reports/generator.py +362 -0
  132. morphml/tracking/__init__.py +7 -0
  133. morphml/tracking/experiment.py +309 -0
  134. morphml/tracking/logger.py +301 -0
  135. morphml/tracking/reporter.py +357 -0
  136. morphml/utils/__init__.py +6 -0
  137. morphml/utils/checkpoint.py +189 -0
  138. morphml/utils/comparison.py +390 -0
  139. morphml/utils/export.py +407 -0
  140. morphml/utils/progress.py +392 -0
  141. morphml/utils/validation.py +392 -0
  142. morphml/version.py +7 -0
  143. morphml/visualization/__init__.py +50 -0
  144. morphml/visualization/analytics.py +423 -0
  145. morphml/visualization/architecture_diagrams.py +353 -0
  146. morphml/visualization/architecture_plot.py +223 -0
  147. morphml/visualization/convergence_plot.py +174 -0
  148. morphml/visualization/crossover_viz.py +386 -0
  149. morphml/visualization/graph_viz.py +338 -0
  150. morphml/visualization/pareto_plot.py +149 -0
  151. morphml/visualization/plotly_dashboards.py +422 -0
  152. morphml/visualization/population.py +309 -0
  153. morphml/visualization/progress.py +260 -0
  154. morphml-1.0.0.dist-info/METADATA +434 -0
  155. morphml-1.0.0.dist-info/RECORD +158 -0
  156. morphml-1.0.0.dist-info/WHEEL +4 -0
  157. morphml-1.0.0.dist-info/entry_points.txt +3 -0
  158. morphml-1.0.0.dist-info/licenses/LICENSE +21 -0
@@ -0,0 +1,348 @@
1
+ """System health monitoring for workers.
2
+
3
+ Tracks CPU, memory, GPU, disk, and network health metrics.
4
+
5
+ Author: Eshan Roy <eshanized@proton.me>
6
+ Organization: TONMOY INFRASTRUCTURE & VISION
7
+ """
8
+
9
+ import platform
10
+ import time
11
+ from dataclasses import dataclass, field
12
+ from typing import Any, Dict, List, Optional
13
+
14
+ try:
15
+ import psutil
16
+
17
+ PSUTIL_AVAILABLE = True
18
+ except ImportError:
19
+ PSUTIL_AVAILABLE = False
20
+
21
+ try:
22
+ import pynvml
23
+
24
+ PYNVML_AVAILABLE = True
25
+ try:
26
+ pynvml.nvmlInit()
27
+ except Exception:
28
+ PYNVML_AVAILABLE = False
29
+ except ImportError:
30
+ PYNVML_AVAILABLE = False
31
+
32
+ from morphml.logging_config import get_logger
33
+
34
+ logger = get_logger(__name__)
35
+
36
+
37
+ @dataclass
38
+ class HealthMetrics:
39
+ """System health metrics."""
40
+
41
+ timestamp: float = field(default_factory=time.time)
42
+ cpu_percent: float = 0.0
43
+ memory_percent: float = 0.0
44
+ memory_available_gb: float = 0.0
45
+ disk_percent: float = 0.0
46
+ disk_free_gb: float = 0.0
47
+ gpu_stats: List[Dict[str, Any]] = field(default_factory=list)
48
+ network_latency_ms: Optional[float] = None
49
+ is_healthy: bool = True
50
+ issues: List[str] = field(default_factory=list)
51
+
52
+
53
+ class HealthMonitor:
54
+ """
55
+ Monitor system health metrics for workers.
56
+
57
+ Tracks:
58
+ - CPU utilization
59
+ - Memory usage
60
+ - GPU utilization and memory
61
+ - Disk space
62
+ - Optional: Network latency
63
+
64
+ Args:
65
+ thresholds: Health thresholds dictionary
66
+ - cpu_critical: CPU % to mark unhealthy (default: 95)
67
+ - memory_critical: Memory % to mark unhealthy (default: 95)
68
+ - disk_critical: Disk % to mark unhealthy (default: 95)
69
+ - gpu_temp_critical: GPU temperature °C (default: 85)
70
+ - gpu_memory_critical: GPU memory % (default: 95)
71
+
72
+ Example:
73
+ >>> monitor = HealthMonitor()
74
+ >>> metrics = monitor.get_health_metrics()
75
+ >>> if not metrics.is_healthy:
76
+ ... print(f"Health issues: {metrics.issues}")
77
+ """
78
+
79
+ def __init__(self, thresholds: Optional[Dict[str, float]] = None):
80
+ """Initialize health monitor."""
81
+ if not PSUTIL_AVAILABLE:
82
+ logger.warning("psutil not available, health monitoring limited")
83
+
84
+ thresholds = thresholds or {}
85
+ self.cpu_critical = thresholds.get("cpu_critical", 95.0)
86
+ self.memory_critical = thresholds.get("memory_critical", 95.0)
87
+ self.disk_critical = thresholds.get("disk_critical", 95.0)
88
+ self.gpu_temp_critical = thresholds.get("gpu_temp_critical", 85.0)
89
+ self.gpu_memory_critical = thresholds.get("gpu_memory_critical", 95.0)
90
+
91
+ # Initialize GPU monitoring
92
+ self.gpu_count = 0
93
+ if PYNVML_AVAILABLE:
94
+ try:
95
+ self.gpu_count = pynvml.nvmlDeviceGetCount()
96
+ logger.info(f"Detected {self.gpu_count} GPUs")
97
+ except Exception as e:
98
+ logger.warning(f"Failed to detect GPUs: {e}")
99
+
100
+ logger.info("Initialized HealthMonitor")
101
+
102
+ def get_health_metrics(self) -> HealthMetrics:
103
+ """
104
+ Get current system health metrics.
105
+
106
+ Returns:
107
+ HealthMetrics object
108
+ """
109
+ metrics = HealthMetrics()
110
+
111
+ if not PSUTIL_AVAILABLE:
112
+ metrics.is_healthy = True # Assume healthy if can't check
113
+ return metrics
114
+
115
+ # CPU
116
+ try:
117
+ metrics.cpu_percent = psutil.cpu_percent(interval=0.1)
118
+ except Exception as e:
119
+ logger.warning(f"Failed to get CPU metrics: {e}")
120
+
121
+ # Memory
122
+ try:
123
+ mem = psutil.virtual_memory()
124
+ metrics.memory_percent = mem.percent
125
+ metrics.memory_available_gb = mem.available / (1024**3)
126
+ except Exception as e:
127
+ logger.warning(f"Failed to get memory metrics: {e}")
128
+
129
+ # Disk
130
+ try:
131
+ disk = psutil.disk_usage("/")
132
+ metrics.disk_percent = disk.percent
133
+ metrics.disk_free_gb = disk.free / (1024**3)
134
+ except Exception as e:
135
+ logger.warning(f"Failed to get disk metrics: {e}")
136
+
137
+ # GPU
138
+ if PYNVML_AVAILABLE and self.gpu_count > 0:
139
+ metrics.gpu_stats = self._get_gpu_stats()
140
+
141
+ # Check health
142
+ metrics.is_healthy, metrics.issues = self._check_health(metrics)
143
+
144
+ return metrics
145
+
146
+ def _get_gpu_stats(self) -> List[Dict[str, Any]]:
147
+ """Get GPU statistics using pynvml."""
148
+ gpu_stats = []
149
+
150
+ for i in range(self.gpu_count):
151
+ try:
152
+ handle = pynvml.nvmlDeviceGetHandleByIndex(i)
153
+
154
+ # Utilization
155
+ util = pynvml.nvmlDeviceGetUtilizationRates(handle)
156
+
157
+ # Memory
158
+ mem_info = pynvml.nvmlDeviceGetMemoryInfo(handle)
159
+
160
+ # Temperature
161
+ temp = pynvml.nvmlDeviceGetTemperature(handle, pynvml.NVML_TEMPERATURE_GPU)
162
+
163
+ # Name
164
+ name = pynvml.nvmlDeviceGetName(handle)
165
+ if isinstance(name, bytes):
166
+ name = name.decode("utf-8")
167
+
168
+ gpu_stats.append(
169
+ {
170
+ "id": i,
171
+ "name": name,
172
+ "load": util.gpu,
173
+ "memory_used_mb": mem_info.used / (1024**2),
174
+ "memory_total_mb": mem_info.total / (1024**2),
175
+ "memory_percent": (mem_info.used / mem_info.total) * 100,
176
+ "temperature": temp,
177
+ }
178
+ )
179
+
180
+ except Exception as e:
181
+ logger.warning(f"Failed to get stats for GPU {i}: {e}")
182
+
183
+ return gpu_stats
184
+
185
+ def _check_health(self, metrics: HealthMetrics) -> tuple[bool, List[str]]:
186
+ """
187
+ Check if system is healthy based on metrics.
188
+
189
+ Returns:
190
+ (is_healthy, list of issues)
191
+ """
192
+ issues = []
193
+
194
+ # Check CPU
195
+ if metrics.cpu_percent > self.cpu_critical:
196
+ issues.append(f"CPU overload: {metrics.cpu_percent:.1f}%")
197
+
198
+ # Check memory
199
+ if metrics.memory_percent > self.memory_critical:
200
+ issues.append(f"Memory critical: {metrics.memory_percent:.1f}%")
201
+
202
+ # Check disk
203
+ if metrics.disk_percent > self.disk_critical:
204
+ issues.append(
205
+ f"Disk critical: {metrics.disk_percent:.1f}% "
206
+ f"({metrics.disk_free_gb:.1f}GB free)"
207
+ )
208
+
209
+ # Check GPUs
210
+ for gpu in metrics.gpu_stats:
211
+ gpu_id = gpu["id"]
212
+
213
+ # Temperature
214
+ if gpu["temperature"] > self.gpu_temp_critical:
215
+ issues.append(f"GPU {gpu_id} overheating: {gpu['temperature']}°C")
216
+
217
+ # Memory
218
+ if gpu["memory_percent"] > self.gpu_memory_critical:
219
+ issues.append(f"GPU {gpu_id} memory critical: {gpu['memory_percent']:.1f}%")
220
+
221
+ is_healthy = len(issues) == 0
222
+
223
+ return is_healthy, issues
224
+
225
+ def get_system_info(self) -> Dict[str, Any]:
226
+ """
227
+ Get static system information.
228
+
229
+ Returns:
230
+ System info dictionary
231
+ """
232
+ info = {
233
+ "platform": platform.system(),
234
+ "platform_release": platform.release(),
235
+ "platform_version": platform.version(),
236
+ "architecture": platform.machine(),
237
+ "processor": platform.processor(),
238
+ "python_version": platform.python_version(),
239
+ }
240
+
241
+ if PSUTIL_AVAILABLE:
242
+ try:
243
+ info["cpu_count_logical"] = psutil.cpu_count(logical=True)
244
+ info["cpu_count_physical"] = psutil.cpu_count(logical=False)
245
+ info["memory_total_gb"] = psutil.virtual_memory().total / (1024**3)
246
+ info["disk_total_gb"] = psutil.disk_usage("/").total / (1024**3)
247
+ except Exception as e:
248
+ logger.warning(f"Failed to get system info: {e}")
249
+
250
+ if PYNVML_AVAILABLE and self.gpu_count > 0:
251
+ gpu_info = []
252
+ for i in range(self.gpu_count):
253
+ try:
254
+ handle = pynvml.nvmlDeviceGetHandleByIndex(i)
255
+ name = pynvml.nvmlDeviceGetName(handle)
256
+ if isinstance(name, bytes):
257
+ name = name.decode("utf-8")
258
+
259
+ mem_info = pynvml.nvmlDeviceGetMemoryInfo(handle)
260
+
261
+ gpu_info.append(
262
+ {
263
+ "id": i,
264
+ "name": name,
265
+ "memory_total_gb": mem_info.total / (1024**3),
266
+ }
267
+ )
268
+ except Exception as e:
269
+ logger.warning(f"Failed to get info for GPU {i}: {e}")
270
+
271
+ info["gpus"] = gpu_info
272
+ else:
273
+ info["gpus"] = []
274
+
275
+ return info
276
+
277
+ def monitor_continuously(
278
+ self, interval: float = 60.0, callback: Optional[callable] = None
279
+ ) -> None:
280
+ """
281
+ Continuously monitor health (blocking).
282
+
283
+ Args:
284
+ interval: Monitoring interval in seconds
285
+ callback: Optional callback function(metrics)
286
+ """
287
+ logger.info(f"Starting continuous monitoring (interval={interval}s)")
288
+
289
+ try:
290
+ while True:
291
+ metrics = self.get_health_metrics()
292
+
293
+ if not metrics.is_healthy:
294
+ logger.warning(f"Health issues detected: {metrics.issues}")
295
+
296
+ if callback:
297
+ callback(metrics)
298
+
299
+ time.sleep(interval)
300
+
301
+ except KeyboardInterrupt:
302
+ logger.info("Stopping continuous monitoring")
303
+
304
+ def cleanup(self) -> None:
305
+ """Cleanup resources."""
306
+ if PYNVML_AVAILABLE:
307
+ try:
308
+ pynvml.nvmlShutdown()
309
+ except Exception:
310
+ pass
311
+
312
+
313
+ def get_system_health() -> Dict[str, Any]:
314
+ """
315
+ Convenience function to get system health.
316
+
317
+ Returns:
318
+ Health metrics dictionary
319
+ """
320
+ monitor = HealthMonitor()
321
+ metrics = monitor.get_health_metrics()
322
+
323
+ return {
324
+ "timestamp": metrics.timestamp,
325
+ "cpu_percent": metrics.cpu_percent,
326
+ "memory_percent": metrics.memory_percent,
327
+ "memory_available_gb": metrics.memory_available_gb,
328
+ "disk_percent": metrics.disk_percent,
329
+ "disk_free_gb": metrics.disk_free_gb,
330
+ "gpus": metrics.gpu_stats,
331
+ "is_healthy": metrics.is_healthy,
332
+ "issues": metrics.issues,
333
+ }
334
+
335
+
336
+ def is_system_healthy(thresholds: Optional[Dict[str, float]] = None) -> bool:
337
+ """
338
+ Quick health check.
339
+
340
+ Args:
341
+ thresholds: Optional custom thresholds
342
+
343
+ Returns:
344
+ True if system is healthy
345
+ """
346
+ monitor = HealthMonitor(thresholds)
347
+ metrics = monitor.get_health_metrics()
348
+ return metrics.is_healthy