morphml 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of morphml might be problematic. Click here for more details.

Files changed (158) hide show
  1. morphml/__init__.py +14 -0
  2. morphml/api/__init__.py +26 -0
  3. morphml/api/app.py +326 -0
  4. morphml/api/auth.py +193 -0
  5. morphml/api/client.py +338 -0
  6. morphml/api/models.py +132 -0
  7. morphml/api/rate_limit.py +192 -0
  8. morphml/benchmarking/__init__.py +36 -0
  9. morphml/benchmarking/comparison.py +430 -0
  10. morphml/benchmarks/__init__.py +56 -0
  11. morphml/benchmarks/comparator.py +409 -0
  12. morphml/benchmarks/datasets.py +280 -0
  13. morphml/benchmarks/metrics.py +199 -0
  14. morphml/benchmarks/openml_suite.py +201 -0
  15. morphml/benchmarks/problems.py +289 -0
  16. morphml/benchmarks/suite.py +318 -0
  17. morphml/cli/__init__.py +5 -0
  18. morphml/cli/commands/experiment.py +329 -0
  19. morphml/cli/main.py +457 -0
  20. morphml/cli/quickstart.py +312 -0
  21. morphml/config.py +278 -0
  22. morphml/constraints/__init__.py +19 -0
  23. morphml/constraints/handler.py +205 -0
  24. morphml/constraints/predicates.py +285 -0
  25. morphml/core/__init__.py +3 -0
  26. morphml/core/crossover.py +449 -0
  27. morphml/core/dsl/README.md +359 -0
  28. morphml/core/dsl/__init__.py +72 -0
  29. morphml/core/dsl/ast_nodes.py +364 -0
  30. morphml/core/dsl/compiler.py +318 -0
  31. morphml/core/dsl/layers.py +368 -0
  32. morphml/core/dsl/lexer.py +336 -0
  33. morphml/core/dsl/parser.py +455 -0
  34. morphml/core/dsl/search_space.py +386 -0
  35. morphml/core/dsl/syntax.py +199 -0
  36. morphml/core/dsl/type_system.py +361 -0
  37. morphml/core/dsl/validator.py +386 -0
  38. morphml/core/graph/__init__.py +40 -0
  39. morphml/core/graph/edge.py +124 -0
  40. morphml/core/graph/graph.py +507 -0
  41. morphml/core/graph/mutations.py +409 -0
  42. morphml/core/graph/node.py +196 -0
  43. morphml/core/graph/serialization.py +361 -0
  44. morphml/core/graph/visualization.py +431 -0
  45. morphml/core/objectives/__init__.py +20 -0
  46. morphml/core/search/__init__.py +33 -0
  47. morphml/core/search/individual.py +252 -0
  48. morphml/core/search/parameters.py +453 -0
  49. morphml/core/search/population.py +375 -0
  50. morphml/core/search/search_engine.py +340 -0
  51. morphml/distributed/__init__.py +76 -0
  52. morphml/distributed/fault_tolerance.py +497 -0
  53. morphml/distributed/health_monitor.py +348 -0
  54. morphml/distributed/master.py +709 -0
  55. morphml/distributed/proto/README.md +224 -0
  56. morphml/distributed/proto/__init__.py +74 -0
  57. morphml/distributed/proto/worker.proto +170 -0
  58. morphml/distributed/proto/worker_pb2.py +79 -0
  59. morphml/distributed/proto/worker_pb2_grpc.py +423 -0
  60. morphml/distributed/resource_manager.py +416 -0
  61. morphml/distributed/scheduler.py +567 -0
  62. morphml/distributed/storage/__init__.py +33 -0
  63. morphml/distributed/storage/artifacts.py +381 -0
  64. morphml/distributed/storage/cache.py +366 -0
  65. morphml/distributed/storage/checkpointing.py +329 -0
  66. morphml/distributed/storage/database.py +459 -0
  67. morphml/distributed/worker.py +549 -0
  68. morphml/evaluation/__init__.py +5 -0
  69. morphml/evaluation/heuristic.py +237 -0
  70. morphml/exceptions.py +55 -0
  71. morphml/execution/__init__.py +5 -0
  72. morphml/execution/local_executor.py +350 -0
  73. morphml/integrations/__init__.py +28 -0
  74. morphml/integrations/jax_adapter.py +206 -0
  75. morphml/integrations/pytorch_adapter.py +530 -0
  76. morphml/integrations/sklearn_adapter.py +206 -0
  77. morphml/integrations/tensorflow_adapter.py +230 -0
  78. morphml/logging_config.py +93 -0
  79. morphml/meta_learning/__init__.py +66 -0
  80. morphml/meta_learning/architecture_similarity.py +277 -0
  81. morphml/meta_learning/experiment_database.py +240 -0
  82. morphml/meta_learning/knowledge_base/__init__.py +19 -0
  83. morphml/meta_learning/knowledge_base/embedder.py +179 -0
  84. morphml/meta_learning/knowledge_base/knowledge_base.py +313 -0
  85. morphml/meta_learning/knowledge_base/meta_features.py +265 -0
  86. morphml/meta_learning/knowledge_base/vector_store.py +271 -0
  87. morphml/meta_learning/predictors/__init__.py +27 -0
  88. morphml/meta_learning/predictors/ensemble.py +221 -0
  89. morphml/meta_learning/predictors/gnn_predictor.py +552 -0
  90. morphml/meta_learning/predictors/learning_curve.py +231 -0
  91. morphml/meta_learning/predictors/proxy_metrics.py +261 -0
  92. morphml/meta_learning/strategy_evolution/__init__.py +27 -0
  93. morphml/meta_learning/strategy_evolution/adaptive_optimizer.py +226 -0
  94. morphml/meta_learning/strategy_evolution/bandit.py +276 -0
  95. morphml/meta_learning/strategy_evolution/portfolio.py +230 -0
  96. morphml/meta_learning/transfer.py +581 -0
  97. morphml/meta_learning/warm_start.py +286 -0
  98. morphml/optimizers/__init__.py +74 -0
  99. morphml/optimizers/adaptive_operators.py +399 -0
  100. morphml/optimizers/bayesian/__init__.py +52 -0
  101. morphml/optimizers/bayesian/acquisition.py +387 -0
  102. morphml/optimizers/bayesian/base.py +319 -0
  103. morphml/optimizers/bayesian/gaussian_process.py +635 -0
  104. morphml/optimizers/bayesian/smac.py +534 -0
  105. morphml/optimizers/bayesian/tpe.py +411 -0
  106. morphml/optimizers/differential_evolution.py +220 -0
  107. morphml/optimizers/evolutionary/__init__.py +61 -0
  108. morphml/optimizers/evolutionary/cma_es.py +416 -0
  109. morphml/optimizers/evolutionary/differential_evolution.py +556 -0
  110. morphml/optimizers/evolutionary/encoding.py +426 -0
  111. morphml/optimizers/evolutionary/particle_swarm.py +449 -0
  112. morphml/optimizers/genetic_algorithm.py +486 -0
  113. morphml/optimizers/gradient_based/__init__.py +22 -0
  114. morphml/optimizers/gradient_based/darts.py +550 -0
  115. morphml/optimizers/gradient_based/enas.py +585 -0
  116. morphml/optimizers/gradient_based/operations.py +474 -0
  117. morphml/optimizers/gradient_based/utils.py +601 -0
  118. morphml/optimizers/hill_climbing.py +169 -0
  119. morphml/optimizers/multi_objective/__init__.py +56 -0
  120. morphml/optimizers/multi_objective/indicators.py +504 -0
  121. morphml/optimizers/multi_objective/nsga2.py +647 -0
  122. morphml/optimizers/multi_objective/visualization.py +427 -0
  123. morphml/optimizers/nsga2.py +308 -0
  124. morphml/optimizers/random_search.py +172 -0
  125. morphml/optimizers/simulated_annealing.py +181 -0
  126. morphml/plugins/__init__.py +35 -0
  127. morphml/plugins/custom_evaluator_example.py +81 -0
  128. morphml/plugins/custom_optimizer_example.py +63 -0
  129. morphml/plugins/plugin_system.py +454 -0
  130. morphml/reports/__init__.py +30 -0
  131. morphml/reports/generator.py +362 -0
  132. morphml/tracking/__init__.py +7 -0
  133. morphml/tracking/experiment.py +309 -0
  134. morphml/tracking/logger.py +301 -0
  135. morphml/tracking/reporter.py +357 -0
  136. morphml/utils/__init__.py +6 -0
  137. morphml/utils/checkpoint.py +189 -0
  138. morphml/utils/comparison.py +390 -0
  139. morphml/utils/export.py +407 -0
  140. morphml/utils/progress.py +392 -0
  141. morphml/utils/validation.py +392 -0
  142. morphml/version.py +7 -0
  143. morphml/visualization/__init__.py +50 -0
  144. morphml/visualization/analytics.py +423 -0
  145. morphml/visualization/architecture_diagrams.py +353 -0
  146. morphml/visualization/architecture_plot.py +223 -0
  147. morphml/visualization/convergence_plot.py +174 -0
  148. morphml/visualization/crossover_viz.py +386 -0
  149. morphml/visualization/graph_viz.py +338 -0
  150. morphml/visualization/pareto_plot.py +149 -0
  151. morphml/visualization/plotly_dashboards.py +422 -0
  152. morphml/visualization/population.py +309 -0
  153. morphml/visualization/progress.py +260 -0
  154. morphml-1.0.0.dist-info/METADATA +434 -0
  155. morphml-1.0.0.dist-info/RECORD +158 -0
  156. morphml-1.0.0.dist-info/WHEEL +4 -0
  157. morphml-1.0.0.dist-info/entry_points.txt +3 -0
  158. morphml-1.0.0.dist-info/licenses/LICENSE +21 -0
@@ -0,0 +1,549 @@
1
+ """Worker node for distributed architecture evaluation.
2
+
3
+ Workers execute architecture evaluation tasks assigned by the master node.
4
+
5
+ Author: Eshan Roy <eshanized@proton.me>
6
+ Organization: TONMOY INFRASTRUCTURE & VISION
7
+ """
8
+
9
+ import socket
10
+ import threading
11
+ import time
12
+ import uuid
13
+ from concurrent import futures
14
+ from typing import Any, Dict, Optional
15
+
16
+ try:
17
+ import grpc
18
+
19
+ from morphml.distributed.proto import worker_pb2, worker_pb2_grpc
20
+
21
+ GRPC_AVAILABLE = True
22
+ except ImportError:
23
+ GRPC_AVAILABLE = False
24
+
25
+ # Create stub modules when grpc is not available
26
+ class _StubModule:
27
+ def __getattr__(self, name):
28
+ raise ImportError(
29
+ "grpc is not installed. Install with: pip install grpcio grpcio-tools"
30
+ )
31
+
32
+ worker_pb2 = _StubModule()
33
+ worker_pb2_grpc = _StubModule()
34
+ grpc = _StubModule()
35
+
36
+ from morphml.core.graph import ModelGraph
37
+ from morphml.exceptions import DistributedError
38
+ from morphml.logging_config import get_logger
39
+
40
+ logger = get_logger(__name__)
41
+
42
+
43
+ class WorkerNode:
44
+ """
45
+ Worker node for distributed architecture evaluation.
46
+
47
+ Responsibilities:
48
+ 1. Register with master node
49
+ 2. Receive evaluation tasks
50
+ 3. Train and evaluate architectures
51
+ 4. Send results back to master
52
+ 5. Send periodic heartbeat
53
+ 6. Handle graceful shutdown
54
+
55
+ Args:
56
+ config: Worker configuration
57
+ - worker_id: Unique worker ID (generated if not provided)
58
+ - master_host: Master node hostname/IP (required)
59
+ - master_port: Master node port (default: 50051)
60
+ - port: Worker port (default: 50052)
61
+ - num_gpus: Number of GPUs available (default: 1)
62
+ - gpu_ids: Specific GPU IDs to use (default: range(num_gpus))
63
+ - heartbeat_interval: Heartbeat interval (seconds, default: 10)
64
+ - evaluator: Custom evaluation function
65
+
66
+ Example:
67
+ >>> worker = WorkerNode({
68
+ ... 'master_host': 'localhost',
69
+ ... 'master_port': 50051,
70
+ ... 'port': 50052,
71
+ ... 'num_gpus': 1,
72
+ ... 'evaluator': my_eval_function
73
+ ... })
74
+ >>> worker.start()
75
+ >>> # Worker runs until stopped
76
+ >>> worker.stop()
77
+ """
78
+
79
+ def __init__(self, config: Dict[str, Any]):
80
+ """Initialize worker node."""
81
+ if not GRPC_AVAILABLE:
82
+ raise DistributedError(
83
+ "gRPC not available. Install with: pip install grpcio grpcio-tools"
84
+ )
85
+
86
+ self.config = config
87
+
88
+ # Worker identification
89
+ self.worker_id = config.get("worker_id", str(uuid.uuid4()))
90
+ self.master_host = config["master_host"]
91
+ self.master_port = config.get("master_port", 50051)
92
+ self.port = config.get("port", 50052)
93
+
94
+ # GPU configuration
95
+ self.num_gpus = config.get("num_gpus", 1)
96
+ self.gpu_ids = config.get("gpu_ids", list(range(self.num_gpus)))
97
+
98
+ # Evaluation configuration
99
+ self.evaluator = config.get("evaluator")
100
+ self.heartbeat_interval = config.get("heartbeat_interval", 10)
101
+
102
+ # State
103
+ self.running = False
104
+ self.current_task_id: Optional[str] = None
105
+ self.tasks_completed = 0
106
+ self.tasks_failed = 0
107
+ self.start_time = time.time()
108
+
109
+ # gRPC server
110
+ self.server: Optional[grpc.Server] = None
111
+
112
+ logger.info(
113
+ f"Initialized WorkerNode (id={self.worker_id[:12]}) " f"with {self.num_gpus} GPU(s)"
114
+ )
115
+
116
+ def start(self) -> None:
117
+ """Start worker node."""
118
+ logger.info(f"Starting worker {self.worker_id[:12]} on port {self.port}")
119
+
120
+ # Register with master
121
+ self._register_with_master()
122
+
123
+ # Start gRPC server
124
+ self.server = grpc.server(
125
+ futures.ThreadPoolExecutor(max_workers=5),
126
+ options=[
127
+ ("grpc.max_send_message_length", 100 * 1024 * 1024),
128
+ ("grpc.max_receive_message_length", 100 * 1024 * 1024),
129
+ ],
130
+ )
131
+
132
+ # Add servicer
133
+ worker_pb2_grpc.add_WorkerServiceServicer_to_server(WorkerServicer(self), self.server)
134
+
135
+ # Start server
136
+ self.server.add_insecure_port(f"[::]:{self.port}")
137
+ self.server.start()
138
+
139
+ self.running = True
140
+ self.start_time = time.time()
141
+
142
+ # Start heartbeat
143
+ self._start_heartbeat()
144
+
145
+ logger.info(f"Worker {self.worker_id[:12]} started successfully")
146
+
147
+ def stop(self) -> None:
148
+ """Stop worker gracefully."""
149
+ logger.info(f"Stopping worker {self.worker_id[:12]}")
150
+ self.running = False
151
+
152
+ if self.server:
153
+ self.server.stop(grace=5)
154
+
155
+ logger.info(f"Worker {self.worker_id[:12]} stopped")
156
+
157
+ def wait_for_shutdown(self) -> None:
158
+ """Block until worker is stopped."""
159
+ if self.server:
160
+ self.server.wait_for_termination()
161
+
162
+ def evaluate_architecture(
163
+ self,
164
+ architecture: ModelGraph,
165
+ config: Optional[Dict[str, Any]] = None,
166
+ ) -> Dict[str, float]:
167
+ """
168
+ Evaluate architecture.
169
+
170
+ Args:
171
+ architecture: ModelGraph to evaluate
172
+ config: Evaluation configuration (epochs, batch_size, etc.)
173
+
174
+ Returns:
175
+ Dictionary with metrics (accuracy, params, latency, etc.)
176
+
177
+ Example:
178
+ >>> result = worker.evaluate_architecture(graph)
179
+ >>> print(result['val_accuracy'])
180
+ """
181
+ logger.info(f"Evaluating architecture on worker {self.worker_id[:12]}")
182
+
183
+ start_time = time.time()
184
+
185
+ try:
186
+ # Use custom evaluator if provided
187
+ if self.evaluator:
188
+ result = self.evaluator(architecture)
189
+
190
+ # Ensure result is a dictionary
191
+ if not isinstance(result, dict):
192
+ result = {"fitness": float(result)}
193
+
194
+ else:
195
+ # Default evaluation (heuristic)
196
+ result = self._default_evaluation(architecture, config or {})
197
+
198
+ # Add metadata
199
+ result["worker_id"] = self.worker_id
200
+ result["evaluation_time"] = time.time() - start_time
201
+ result["gpu_id"] = self.gpu_ids[0] if self.gpu_ids else -1
202
+
203
+ logger.debug(
204
+ f"Evaluation complete in {result['evaluation_time']:.2f}s: "
205
+ f"fitness={result.get('fitness', 'N/A')}"
206
+ )
207
+
208
+ return result
209
+
210
+ except Exception as e:
211
+ logger.error(f"Evaluation failed: {e}")
212
+ raise
213
+
214
+ def _default_evaluation(
215
+ self, architecture: ModelGraph, config: Dict[str, Any]
216
+ ) -> Dict[str, float]:
217
+ """
218
+ Default heuristic evaluation when no evaluator provided.
219
+
220
+ This is a fast proxy evaluation based on architecture properties.
221
+ For actual training, provide a custom evaluator.
222
+ """
223
+ from morphml.evaluation import HeuristicEvaluator
224
+
225
+ evaluator = HeuristicEvaluator()
226
+ fitness = evaluator(architecture)
227
+
228
+ # Estimate other metrics
229
+ params = architecture.estimate_parameters()
230
+ depth = len(list(architecture.topological_sort()))
231
+
232
+ return {
233
+ "fitness": fitness,
234
+ "val_accuracy": fitness,
235
+ "params": params,
236
+ "depth": depth,
237
+ }
238
+
239
+ def _register_with_master(self) -> None:
240
+ """Register with master node."""
241
+ logger.info(f"Registering with master at {self.master_host}:{self.master_port}")
242
+
243
+ max_retries = 10
244
+ retry_delay = 2
245
+
246
+ for attempt in range(max_retries):
247
+ try:
248
+ channel = grpc.insecure_channel(f"{self.master_host}:{self.master_port}")
249
+ stub = worker_pb2_grpc.MasterServiceStub(channel)
250
+
251
+ request = worker_pb2.RegisterRequest(
252
+ worker_id=self.worker_id,
253
+ host=socket.gethostname(),
254
+ port=self.port,
255
+ num_gpus=self.num_gpus,
256
+ gpu_ids=self.gpu_ids,
257
+ )
258
+
259
+ response = stub.RegisterWorker(request, timeout=10)
260
+
261
+ if response.success:
262
+ logger.info(f"Successfully registered with master (id={response.master_id})")
263
+ return
264
+ else:
265
+ raise DistributedError(f"Registration failed: {response.message}")
266
+
267
+ except grpc.RpcError as e:
268
+ if attempt < max_retries - 1:
269
+ logger.warning(
270
+ f"Registration attempt {attempt + 1}/{max_retries} failed: {e}. "
271
+ f"Retrying in {retry_delay}s..."
272
+ )
273
+ time.sleep(retry_delay)
274
+ else:
275
+ raise DistributedError(
276
+ f"Failed to register with master after {max_retries} attempts"
277
+ )
278
+
279
+ def _start_heartbeat(self) -> None:
280
+ """Start periodic heartbeat to master."""
281
+
282
+ def heartbeat_loop() -> None:
283
+ while self.running:
284
+ try:
285
+ channel = grpc.insecure_channel(f"{self.master_host}:{self.master_port}")
286
+ stub = worker_pb2_grpc.MasterServiceStub(channel)
287
+
288
+ # Determine status
289
+ status = "busy" if self.current_task_id else "idle"
290
+
291
+ # Create metrics
292
+ metrics = worker_pb2.WorkerMetrics(
293
+ cpu_usage=self._get_cpu_usage(),
294
+ memory_usage=self._get_memory_usage(),
295
+ gpu_usage=self._get_gpu_usage(),
296
+ gpu_memory=self._get_gpu_memory(),
297
+ tasks_completed=self.tasks_completed,
298
+ tasks_failed=self.tasks_failed,
299
+ )
300
+
301
+ request = worker_pb2.HeartbeatRequest(
302
+ worker_id=self.worker_id,
303
+ status=status,
304
+ current_task_id=self.current_task_id or "",
305
+ metrics=metrics,
306
+ )
307
+
308
+ response = stub.Heartbeat(request, timeout=5)
309
+
310
+ # Check if master wants us to shutdown
311
+ if not response.should_continue:
312
+ logger.info("Master requested shutdown")
313
+ self.running = False
314
+ break
315
+
316
+ except grpc.RpcError as e:
317
+ logger.error(f"Heartbeat failed: {e}")
318
+
319
+ except Exception as e:
320
+ logger.error(f"Heartbeat error: {e}")
321
+
322
+ time.sleep(self.heartbeat_interval)
323
+
324
+ thread = threading.Thread(target=heartbeat_loop, daemon=True, name="HeartbeatThread")
325
+ thread.start()
326
+ logger.debug("Heartbeat thread started")
327
+
328
+ def _submit_result(
329
+ self,
330
+ task_id: str,
331
+ success: bool,
332
+ metrics: Dict[str, float],
333
+ error: str = "",
334
+ duration: float = 0.0,
335
+ ) -> None:
336
+ """Submit task result to master."""
337
+ try:
338
+ channel = grpc.insecure_channel(f"{self.master_host}:{self.master_port}")
339
+ stub = worker_pb2_grpc.MasterServiceStub(channel)
340
+
341
+ request = worker_pb2.ResultRequest(
342
+ task_id=task_id,
343
+ worker_id=self.worker_id,
344
+ success=success,
345
+ metrics=metrics,
346
+ error=error,
347
+ duration=duration,
348
+ )
349
+
350
+ response = stub.SubmitResult(request, timeout=10)
351
+
352
+ if not response.acknowledged:
353
+ logger.warning(f"Master did not acknowledge result for task {task_id}")
354
+
355
+ except grpc.RpcError as e:
356
+ logger.error(f"Failed to submit result for task {task_id}: {e}")
357
+
358
+ def _get_cpu_usage(self) -> float:
359
+ """Get CPU usage percentage."""
360
+ try:
361
+ import psutil
362
+
363
+ return psutil.cpu_percent(interval=0.1)
364
+ except Exception:
365
+ return 0.0
366
+
367
+ def _get_memory_usage(self) -> float:
368
+ """Get memory usage percentage."""
369
+ try:
370
+ import psutil
371
+
372
+ return psutil.virtual_memory().percent
373
+ except Exception:
374
+ return 0.0
375
+
376
+ def _get_gpu_usage(self) -> float:
377
+ """Get GPU usage percentage."""
378
+ try:
379
+ import pynvml
380
+
381
+ pynvml.nvmlInit()
382
+ handle = pynvml.nvmlDeviceGetHandleByIndex(self.gpu_ids[0])
383
+ utilization = pynvml.nvmlDeviceGetUtilizationRates(handle)
384
+ return float(utilization.gpu)
385
+ except Exception:
386
+ return 0.0
387
+
388
+ def _get_gpu_memory(self) -> float:
389
+ """Get GPU memory usage percentage."""
390
+ try:
391
+ import pynvml
392
+
393
+ pynvml.nvmlInit()
394
+ handle = pynvml.nvmlDeviceGetHandleByIndex(self.gpu_ids[0])
395
+ mem_info = pynvml.nvmlDeviceGetMemoryInfo(handle)
396
+ return (mem_info.used / mem_info.total) * 100
397
+ except Exception:
398
+ return 0.0
399
+
400
+ def get_status(self) -> Dict[str, Any]:
401
+ """Get worker status."""
402
+ return {
403
+ "worker_id": self.worker_id,
404
+ "status": "busy" if self.current_task_id else "idle",
405
+ "current_task": self.current_task_id,
406
+ "tasks_completed": self.tasks_completed,
407
+ "tasks_failed": self.tasks_failed,
408
+ "uptime_seconds": time.time() - self.start_time,
409
+ "cpu_usage": self._get_cpu_usage(),
410
+ "memory_usage": self._get_memory_usage(),
411
+ "gpu_usage": self._get_gpu_usage(),
412
+ "gpu_memory": self._get_gpu_memory(),
413
+ }
414
+
415
+
416
+ if GRPC_AVAILABLE:
417
+
418
+ class WorkerServicer(worker_pb2_grpc.WorkerServiceServicer):
419
+ """gRPC servicer for worker node."""
420
+
421
+ def __init__(self, worker: WorkerNode):
422
+ """Initialize servicer."""
423
+ self.worker = worker
424
+
425
+ def Evaluate(
426
+ self, request: worker_pb2.EvaluateRequest, context: grpc.ServicerContext
427
+ ) -> worker_pb2.EvaluateResponse:
428
+ """Handle evaluation task."""
429
+ task_id = request.task_id
430
+
431
+ logger.info(f"Received evaluation task: {task_id}")
432
+
433
+ self.worker.current_task_id = task_id
434
+ start_time = time.time()
435
+
436
+ try:
437
+ # Deserialize architecture
438
+ architecture = ModelGraph.from_json(request.architecture)
439
+
440
+ # Evaluate
441
+ result = self.worker.evaluate_architecture(architecture)
442
+
443
+ duration = time.time() - start_time
444
+
445
+ # Update stats
446
+ self.worker.tasks_completed += 1
447
+ self.worker.current_task_id = None
448
+
449
+ # Submit result to master
450
+ self.worker._submit_result(
451
+ task_id=task_id,
452
+ success=True,
453
+ metrics=result,
454
+ duration=duration,
455
+ )
456
+
457
+ # Return response
458
+ return worker_pb2.EvaluateResponse(
459
+ task_id=task_id,
460
+ success=True,
461
+ metrics=result,
462
+ error="",
463
+ duration=duration,
464
+ )
465
+
466
+ except Exception as e:
467
+ duration = time.time() - start_time
468
+ error_msg = str(e)
469
+
470
+ logger.error(f"Evaluation failed for task {task_id}: {error_msg}")
471
+
472
+ # Update stats
473
+ self.worker.tasks_failed += 1
474
+ self.worker.current_task_id = None
475
+
476
+ # Submit failure to master
477
+ self.worker._submit_result(
478
+ task_id=task_id,
479
+ success=False,
480
+ metrics={},
481
+ error=error_msg,
482
+ duration=duration,
483
+ )
484
+
485
+ return worker_pb2.EvaluateResponse(
486
+ task_id=task_id,
487
+ success=False,
488
+ metrics={},
489
+ error=error_msg,
490
+ duration=duration,
491
+ )
492
+
493
+ def GetStatus(
494
+ self, request: worker_pb2.StatusRequest, context: grpc.ServicerContext
495
+ ) -> worker_pb2.StatusResponse:
496
+ """Handle status request."""
497
+ status = self.worker.get_status()
498
+
499
+ metrics = worker_pb2.WorkerMetrics(
500
+ cpu_usage=status["cpu_usage"],
501
+ memory_usage=status["memory_usage"],
502
+ gpu_usage=status["gpu_usage"],
503
+ gpu_memory=status["gpu_memory"],
504
+ tasks_completed=status["tasks_completed"],
505
+ tasks_failed=status["tasks_failed"],
506
+ )
507
+
508
+ return worker_pb2.StatusResponse(
509
+ status=status["status"],
510
+ current_task_id=status["current_task"] or "",
511
+ metrics=metrics,
512
+ uptime_seconds=int(status["uptime_seconds"]),
513
+ )
514
+
515
+ def Shutdown(
516
+ self, request: worker_pb2.ShutdownRequest, context: grpc.ServicerContext
517
+ ) -> worker_pb2.ShutdownResponse:
518
+ """Handle shutdown request."""
519
+ logger.info(
520
+ f"Shutdown requested (graceful={request.graceful}) for worker {request.worker_id}"
521
+ )
522
+
523
+ if request.graceful:
524
+ # Wait for current task to finish
525
+ while self.worker.current_task_id:
526
+ time.sleep(1)
527
+
528
+ # Stop worker
529
+ self.worker.stop()
530
+
531
+ return worker_pb2.ShutdownResponse(acknowledged=True)
532
+
533
+ def CancelTask(
534
+ self, request: worker_pb2.CancelRequest, context: grpc.ServicerContext
535
+ ) -> worker_pb2.CancelResponse:
536
+ """Handle task cancellation."""
537
+ task_id = request.task_id
538
+
539
+ if self.worker.current_task_id == task_id:
540
+ logger.warning(f"Cancelling task {task_id}")
541
+ # Note: Actual cancellation would require more complex logic
542
+ # For now, just clear the task ID
543
+ self.worker.current_task_id = None
544
+
545
+ return worker_pb2.CancelResponse(success=True, message="Task cancelled")
546
+ else:
547
+ return worker_pb2.CancelResponse(
548
+ success=False, message=f"Task {task_id} not running on this worker"
549
+ )
@@ -0,0 +1,5 @@
1
+ """Evaluation utilities for architecture fitness."""
2
+
3
+ from morphml.evaluation.heuristic import HeuristicEvaluator
4
+
5
+ __all__ = ["HeuristicEvaluator"]