morphml 1.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of morphml might be problematic. Click here for more details.
- morphml/__init__.py +14 -0
- morphml/api/__init__.py +26 -0
- morphml/api/app.py +326 -0
- morphml/api/auth.py +193 -0
- morphml/api/client.py +338 -0
- morphml/api/models.py +132 -0
- morphml/api/rate_limit.py +192 -0
- morphml/benchmarking/__init__.py +36 -0
- morphml/benchmarking/comparison.py +430 -0
- morphml/benchmarks/__init__.py +56 -0
- morphml/benchmarks/comparator.py +409 -0
- morphml/benchmarks/datasets.py +280 -0
- morphml/benchmarks/metrics.py +199 -0
- morphml/benchmarks/openml_suite.py +201 -0
- morphml/benchmarks/problems.py +289 -0
- morphml/benchmarks/suite.py +318 -0
- morphml/cli/__init__.py +5 -0
- morphml/cli/commands/experiment.py +329 -0
- morphml/cli/main.py +457 -0
- morphml/cli/quickstart.py +312 -0
- morphml/config.py +278 -0
- morphml/constraints/__init__.py +19 -0
- morphml/constraints/handler.py +205 -0
- morphml/constraints/predicates.py +285 -0
- morphml/core/__init__.py +3 -0
- morphml/core/crossover.py +449 -0
- morphml/core/dsl/README.md +359 -0
- morphml/core/dsl/__init__.py +72 -0
- morphml/core/dsl/ast_nodes.py +364 -0
- morphml/core/dsl/compiler.py +318 -0
- morphml/core/dsl/layers.py +368 -0
- morphml/core/dsl/lexer.py +336 -0
- morphml/core/dsl/parser.py +455 -0
- morphml/core/dsl/search_space.py +386 -0
- morphml/core/dsl/syntax.py +199 -0
- morphml/core/dsl/type_system.py +361 -0
- morphml/core/dsl/validator.py +386 -0
- morphml/core/graph/__init__.py +40 -0
- morphml/core/graph/edge.py +124 -0
- morphml/core/graph/graph.py +507 -0
- morphml/core/graph/mutations.py +409 -0
- morphml/core/graph/node.py +196 -0
- morphml/core/graph/serialization.py +361 -0
- morphml/core/graph/visualization.py +431 -0
- morphml/core/objectives/__init__.py +20 -0
- morphml/core/search/__init__.py +33 -0
- morphml/core/search/individual.py +252 -0
- morphml/core/search/parameters.py +453 -0
- morphml/core/search/population.py +375 -0
- morphml/core/search/search_engine.py +340 -0
- morphml/distributed/__init__.py +76 -0
- morphml/distributed/fault_tolerance.py +497 -0
- morphml/distributed/health_monitor.py +348 -0
- morphml/distributed/master.py +709 -0
- morphml/distributed/proto/README.md +224 -0
- morphml/distributed/proto/__init__.py +74 -0
- morphml/distributed/proto/worker.proto +170 -0
- morphml/distributed/proto/worker_pb2.py +79 -0
- morphml/distributed/proto/worker_pb2_grpc.py +423 -0
- morphml/distributed/resource_manager.py +416 -0
- morphml/distributed/scheduler.py +567 -0
- morphml/distributed/storage/__init__.py +33 -0
- morphml/distributed/storage/artifacts.py +381 -0
- morphml/distributed/storage/cache.py +366 -0
- morphml/distributed/storage/checkpointing.py +329 -0
- morphml/distributed/storage/database.py +459 -0
- morphml/distributed/worker.py +549 -0
- morphml/evaluation/__init__.py +5 -0
- morphml/evaluation/heuristic.py +237 -0
- morphml/exceptions.py +55 -0
- morphml/execution/__init__.py +5 -0
- morphml/execution/local_executor.py +350 -0
- morphml/integrations/__init__.py +28 -0
- morphml/integrations/jax_adapter.py +206 -0
- morphml/integrations/pytorch_adapter.py +530 -0
- morphml/integrations/sklearn_adapter.py +206 -0
- morphml/integrations/tensorflow_adapter.py +230 -0
- morphml/logging_config.py +93 -0
- morphml/meta_learning/__init__.py +66 -0
- morphml/meta_learning/architecture_similarity.py +277 -0
- morphml/meta_learning/experiment_database.py +240 -0
- morphml/meta_learning/knowledge_base/__init__.py +19 -0
- morphml/meta_learning/knowledge_base/embedder.py +179 -0
- morphml/meta_learning/knowledge_base/knowledge_base.py +313 -0
- morphml/meta_learning/knowledge_base/meta_features.py +265 -0
- morphml/meta_learning/knowledge_base/vector_store.py +271 -0
- morphml/meta_learning/predictors/__init__.py +27 -0
- morphml/meta_learning/predictors/ensemble.py +221 -0
- morphml/meta_learning/predictors/gnn_predictor.py +552 -0
- morphml/meta_learning/predictors/learning_curve.py +231 -0
- morphml/meta_learning/predictors/proxy_metrics.py +261 -0
- morphml/meta_learning/strategy_evolution/__init__.py +27 -0
- morphml/meta_learning/strategy_evolution/adaptive_optimizer.py +226 -0
- morphml/meta_learning/strategy_evolution/bandit.py +276 -0
- morphml/meta_learning/strategy_evolution/portfolio.py +230 -0
- morphml/meta_learning/transfer.py +581 -0
- morphml/meta_learning/warm_start.py +286 -0
- morphml/optimizers/__init__.py +74 -0
- morphml/optimizers/adaptive_operators.py +399 -0
- morphml/optimizers/bayesian/__init__.py +52 -0
- morphml/optimizers/bayesian/acquisition.py +387 -0
- morphml/optimizers/bayesian/base.py +319 -0
- morphml/optimizers/bayesian/gaussian_process.py +635 -0
- morphml/optimizers/bayesian/smac.py +534 -0
- morphml/optimizers/bayesian/tpe.py +411 -0
- morphml/optimizers/differential_evolution.py +220 -0
- morphml/optimizers/evolutionary/__init__.py +61 -0
- morphml/optimizers/evolutionary/cma_es.py +416 -0
- morphml/optimizers/evolutionary/differential_evolution.py +556 -0
- morphml/optimizers/evolutionary/encoding.py +426 -0
- morphml/optimizers/evolutionary/particle_swarm.py +449 -0
- morphml/optimizers/genetic_algorithm.py +486 -0
- morphml/optimizers/gradient_based/__init__.py +22 -0
- morphml/optimizers/gradient_based/darts.py +550 -0
- morphml/optimizers/gradient_based/enas.py +585 -0
- morphml/optimizers/gradient_based/operations.py +474 -0
- morphml/optimizers/gradient_based/utils.py +601 -0
- morphml/optimizers/hill_climbing.py +169 -0
- morphml/optimizers/multi_objective/__init__.py +56 -0
- morphml/optimizers/multi_objective/indicators.py +504 -0
- morphml/optimizers/multi_objective/nsga2.py +647 -0
- morphml/optimizers/multi_objective/visualization.py +427 -0
- morphml/optimizers/nsga2.py +308 -0
- morphml/optimizers/random_search.py +172 -0
- morphml/optimizers/simulated_annealing.py +181 -0
- morphml/plugins/__init__.py +35 -0
- morphml/plugins/custom_evaluator_example.py +81 -0
- morphml/plugins/custom_optimizer_example.py +63 -0
- morphml/plugins/plugin_system.py +454 -0
- morphml/reports/__init__.py +30 -0
- morphml/reports/generator.py +362 -0
- morphml/tracking/__init__.py +7 -0
- morphml/tracking/experiment.py +309 -0
- morphml/tracking/logger.py +301 -0
- morphml/tracking/reporter.py +357 -0
- morphml/utils/__init__.py +6 -0
- morphml/utils/checkpoint.py +189 -0
- morphml/utils/comparison.py +390 -0
- morphml/utils/export.py +407 -0
- morphml/utils/progress.py +392 -0
- morphml/utils/validation.py +392 -0
- morphml/version.py +7 -0
- morphml/visualization/__init__.py +50 -0
- morphml/visualization/analytics.py +423 -0
- morphml/visualization/architecture_diagrams.py +353 -0
- morphml/visualization/architecture_plot.py +223 -0
- morphml/visualization/convergence_plot.py +174 -0
- morphml/visualization/crossover_viz.py +386 -0
- morphml/visualization/graph_viz.py +338 -0
- morphml/visualization/pareto_plot.py +149 -0
- morphml/visualization/plotly_dashboards.py +422 -0
- morphml/visualization/population.py +309 -0
- morphml/visualization/progress.py +260 -0
- morphml-1.0.0.dist-info/METADATA +434 -0
- morphml-1.0.0.dist-info/RECORD +158 -0
- morphml-1.0.0.dist-info/WHEEL +4 -0
- morphml-1.0.0.dist-info/entry_points.txt +3 -0
- morphml-1.0.0.dist-info/licenses/LICENSE +21 -0
|
@@ -0,0 +1,224 @@
|
|
|
1
|
+
# Protocol Buffer Stubs for gRPC Communication
|
|
2
|
+
|
|
3
|
+
This directory contains Protocol Buffer definitions and generated Python stubs for distributed communication between Master and Worker nodes.
|
|
4
|
+
|
|
5
|
+
## Files
|
|
6
|
+
|
|
7
|
+
- **`worker.proto`** - Protocol Buffer definition (source)
|
|
8
|
+
- **`worker_pb2.py`** - Generated message classes (auto-generated)
|
|
9
|
+
- **`worker_pb2_grpc.py`** - Generated gRPC service stubs (auto-generated)
|
|
10
|
+
- **`__init__.py`** - Package initialization with exports
|
|
11
|
+
|
|
12
|
+
## Services Defined
|
|
13
|
+
|
|
14
|
+
### MasterService (called by workers)
|
|
15
|
+
- `RegisterWorker()` - Register a new worker with the master
|
|
16
|
+
- `Heartbeat()` - Send periodic heartbeat to indicate worker is alive
|
|
17
|
+
- `SubmitResult()` - Submit evaluation results back to master
|
|
18
|
+
- `RequestTask()` - Request tasks from master (pull model)
|
|
19
|
+
|
|
20
|
+
### WorkerService (called by master)
|
|
21
|
+
- `Evaluate()` - Evaluate an architecture (push model)
|
|
22
|
+
- `GetStatus()` - Get current worker status
|
|
23
|
+
- `Shutdown()` - Gracefully shutdown worker
|
|
24
|
+
- `CancelTask()` - Cancel a running task
|
|
25
|
+
|
|
26
|
+
## Message Types
|
|
27
|
+
|
|
28
|
+
### Registration
|
|
29
|
+
- `RegisterRequest` / `RegisterResponse`
|
|
30
|
+
|
|
31
|
+
### Heartbeat
|
|
32
|
+
- `HeartbeatRequest` / `HeartbeatResponse`
|
|
33
|
+
- `WorkerMetrics` - CPU, memory, GPU usage stats
|
|
34
|
+
|
|
35
|
+
### Task Evaluation
|
|
36
|
+
- `EvaluateRequest` / `EvaluateResponse`
|
|
37
|
+
- `EvaluationConfig` - Training configuration
|
|
38
|
+
- `TaskRequest` / `TaskResponse`
|
|
39
|
+
|
|
40
|
+
### Results
|
|
41
|
+
- `ResultRequest` / `ResultResponse`
|
|
42
|
+
|
|
43
|
+
### Control
|
|
44
|
+
- `StatusRequest` / `StatusResponse`
|
|
45
|
+
- `ShutdownRequest` / `ShutdownResponse`
|
|
46
|
+
- `CancelRequest` / `CancelResponse`
|
|
47
|
+
|
|
48
|
+
## Regenerating Stubs
|
|
49
|
+
|
|
50
|
+
If you modify `worker.proto`, regenerate the Python stubs:
|
|
51
|
+
|
|
52
|
+
### Option 1: Using the provided script (Recommended)
|
|
53
|
+
|
|
54
|
+
```bash
|
|
55
|
+
python scripts/compile_protos.py
|
|
56
|
+
```
|
|
57
|
+
|
|
58
|
+
### Option 2: Manual compilation
|
|
59
|
+
|
|
60
|
+
```bash
|
|
61
|
+
python -m grpc_tools.protoc \
|
|
62
|
+
-I morphml/distributed/proto \
|
|
63
|
+
--python_out=morphml/distributed/proto \
|
|
64
|
+
--grpc_python_out=morphml/distributed/proto \
|
|
65
|
+
--pyi_out=morphml/distributed/proto \
|
|
66
|
+
morphml/distributed/proto/worker.proto
|
|
67
|
+
```
|
|
68
|
+
|
|
69
|
+
**Note:** Requires `grpcio-tools` to be installed:
|
|
70
|
+
```bash
|
|
71
|
+
pip install grpcio-tools
|
|
72
|
+
# or with poetry
|
|
73
|
+
poetry install --extras distributed
|
|
74
|
+
```
|
|
75
|
+
|
|
76
|
+
## Usage
|
|
77
|
+
|
|
78
|
+
### Importing in Python
|
|
79
|
+
|
|
80
|
+
```python
|
|
81
|
+
from morphml.distributed.proto import (
|
|
82
|
+
worker_pb2,
|
|
83
|
+
worker_pb2_grpc,
|
|
84
|
+
RegisterRequest,
|
|
85
|
+
EvaluateRequest,
|
|
86
|
+
# ... other message types
|
|
87
|
+
)
|
|
88
|
+
```
|
|
89
|
+
|
|
90
|
+
### Master Node Example
|
|
91
|
+
|
|
92
|
+
```python
|
|
93
|
+
import grpc
|
|
94
|
+
from morphml.distributed.proto import worker_pb2_grpc, RegisterResponse
|
|
95
|
+
|
|
96
|
+
class MyMasterServicer(worker_pb2_grpc.MasterServiceServicer):
|
|
97
|
+
def RegisterWorker(self, request, context):
|
|
98
|
+
print(f"Worker {request.worker_id} registered from {request.host}:{request.port}")
|
|
99
|
+
return RegisterResponse(
|
|
100
|
+
success=True,
|
|
101
|
+
message="Registration successful",
|
|
102
|
+
master_id="master-001"
|
|
103
|
+
)
|
|
104
|
+
|
|
105
|
+
# Create server
|
|
106
|
+
server = grpc.server(futures.ThreadPoolExecutor(max_workers=10))
|
|
107
|
+
worker_pb2_grpc.add_MasterServiceServicer_to_server(MyMasterServicer(), server)
|
|
108
|
+
server.add_insecure_port('[::]:50051')
|
|
109
|
+
server.start()
|
|
110
|
+
```
|
|
111
|
+
|
|
112
|
+
### Worker Node Example
|
|
113
|
+
|
|
114
|
+
```python
|
|
115
|
+
import grpc
|
|
116
|
+
from morphml.distributed.proto import worker_pb2, worker_pb2_grpc
|
|
117
|
+
|
|
118
|
+
# Connect to master
|
|
119
|
+
channel = grpc.insecure_channel('localhost:50051')
|
|
120
|
+
stub = worker_pb2_grpc.MasterServiceStub(channel)
|
|
121
|
+
|
|
122
|
+
# Register with master
|
|
123
|
+
request = worker_pb2.RegisterRequest(
|
|
124
|
+
worker_id='worker-1',
|
|
125
|
+
host='localhost',
|
|
126
|
+
port=50052,
|
|
127
|
+
num_gpus=2,
|
|
128
|
+
gpu_ids=[0, 1]
|
|
129
|
+
)
|
|
130
|
+
response = stub.RegisterWorker(request)
|
|
131
|
+
print(f"Registration: {response.success} - {response.message}")
|
|
132
|
+
```
|
|
133
|
+
|
|
134
|
+
## Dependencies
|
|
135
|
+
|
|
136
|
+
Required packages:
|
|
137
|
+
- `grpcio>=1.54.0` - gRPC runtime
|
|
138
|
+
- `protobuf>=4.23.0` - Protocol Buffer runtime
|
|
139
|
+
- `grpcio-tools>=1.54.0` - For compilation (development only)
|
|
140
|
+
|
|
141
|
+
Install with:
|
|
142
|
+
```bash
|
|
143
|
+
pip install grpcio protobuf grpcio-tools
|
|
144
|
+
```
|
|
145
|
+
|
|
146
|
+
Or using poetry:
|
|
147
|
+
```bash
|
|
148
|
+
poetry install --extras distributed
|
|
149
|
+
```
|
|
150
|
+
|
|
151
|
+
## Communication Flow
|
|
152
|
+
|
|
153
|
+
```
|
|
154
|
+
Worker Master
|
|
155
|
+
| |
|
|
156
|
+
|-- RegisterWorker() ---------->|
|
|
157
|
+
|<-- RegisterResponse ----------|
|
|
158
|
+
| |
|
|
159
|
+
|-- Heartbeat() (every 10s) -->|
|
|
160
|
+
|<-- HeartbeatResponse ---------|
|
|
161
|
+
| |
|
|
162
|
+
|<-- Evaluate(task) ------------| (push model)
|
|
163
|
+
|-- SubmitResult(metrics) ----->|
|
|
164
|
+
| |
|
|
165
|
+
|-- RequestTask() ------------->| (pull model)
|
|
166
|
+
|<-- TaskResponse(tasks) -------|
|
|
167
|
+
| |
|
|
168
|
+
```
|
|
169
|
+
|
|
170
|
+
## Protocol Details
|
|
171
|
+
|
|
172
|
+
- **Serialization:** Protocol Buffers v3 (binary)
|
|
173
|
+
- **Transport:** gRPC over HTTP/2
|
|
174
|
+
- **Default Ports:**
|
|
175
|
+
- Master: 50051
|
|
176
|
+
- Worker: 50052+
|
|
177
|
+
- **Timeout:** Configurable per-request
|
|
178
|
+
- **Compression:** Optional (gzip)
|
|
179
|
+
|
|
180
|
+
## Troubleshooting
|
|
181
|
+
|
|
182
|
+
### Import Errors
|
|
183
|
+
|
|
184
|
+
If you see:
|
|
185
|
+
```
|
|
186
|
+
ImportError: cannot import name 'worker_pb2' from 'morphml.distributed.proto'
|
|
187
|
+
```
|
|
188
|
+
|
|
189
|
+
**Solution:** Generate the protobuf files:
|
|
190
|
+
```bash
|
|
191
|
+
python scripts/compile_protos.py
|
|
192
|
+
```
|
|
193
|
+
|
|
194
|
+
### gRPC Connection Issues
|
|
195
|
+
|
|
196
|
+
If workers can't connect to master:
|
|
197
|
+
1. Check firewall settings
|
|
198
|
+
2. Verify master is running and listening
|
|
199
|
+
3. Check network connectivity: `telnet master-host 50051`
|
|
200
|
+
4. Review master logs for errors
|
|
201
|
+
|
|
202
|
+
### Version Incompatibility
|
|
203
|
+
|
|
204
|
+
Ensure compatible versions:
|
|
205
|
+
```bash
|
|
206
|
+
pip list | grep grpc
|
|
207
|
+
pip list | grep protobuf
|
|
208
|
+
```
|
|
209
|
+
|
|
210
|
+
Recommended versions:
|
|
211
|
+
- grpcio: ≥1.54.0, <2.0.0
|
|
212
|
+
- protobuf: ≥4.23.0, <5.0.0
|
|
213
|
+
|
|
214
|
+
## Learn More
|
|
215
|
+
|
|
216
|
+
- [gRPC Python Documentation](https://grpc.io/docs/languages/python/)
|
|
217
|
+
- [Protocol Buffers Guide](https://developers.google.com/protocol-buffers)
|
|
218
|
+
- [MorphML Distributed Module](/morphml/distributed/)
|
|
219
|
+
|
|
220
|
+
---
|
|
221
|
+
|
|
222
|
+
**Authors:** Vedanth ([@vedanthq](https://github.com/vedanthq)) & Eshan Roy ([@eshanized](https://github.com/eshanized))
|
|
223
|
+
**Organization:** TONMOY INFRASTRUCTURE & VISION
|
|
224
|
+
**Auto-generated:** Do not edit `*_pb2.py` files manually
|
|
@@ -0,0 +1,74 @@
|
|
|
1
|
+
"""Generated Protocol Buffer stubs for distributed communication.
|
|
2
|
+
|
|
3
|
+
Auto-generated files. To regenerate, run:
|
|
4
|
+
python scripts/compile_protos.py
|
|
5
|
+
|
|
6
|
+
Author: Eshan Roy <eshanized@proton.me>
|
|
7
|
+
Organization: TONMOY INFRASTRUCTURE & VISION
|
|
8
|
+
"""
|
|
9
|
+
|
|
10
|
+
try:
|
|
11
|
+
from morphml.distributed.proto.worker_pb2 import (
|
|
12
|
+
CancelRequest,
|
|
13
|
+
CancelResponse,
|
|
14
|
+
EvaluateRequest,
|
|
15
|
+
EvaluateResponse,
|
|
16
|
+
EvaluationConfig,
|
|
17
|
+
HeartbeatRequest,
|
|
18
|
+
HeartbeatResponse,
|
|
19
|
+
RegisterRequest,
|
|
20
|
+
RegisterResponse,
|
|
21
|
+
ResultRequest,
|
|
22
|
+
ResultResponse,
|
|
23
|
+
ShutdownRequest,
|
|
24
|
+
ShutdownResponse,
|
|
25
|
+
StatusRequest,
|
|
26
|
+
StatusResponse,
|
|
27
|
+
TaskRequest,
|
|
28
|
+
TaskResponse,
|
|
29
|
+
WorkerMetrics,
|
|
30
|
+
)
|
|
31
|
+
from morphml.distributed.proto.worker_pb2_grpc import (
|
|
32
|
+
MasterService,
|
|
33
|
+
MasterServiceServicer,
|
|
34
|
+
MasterServiceStub,
|
|
35
|
+
WorkerService,
|
|
36
|
+
WorkerServiceServicer,
|
|
37
|
+
WorkerServiceStub,
|
|
38
|
+
add_MasterServiceServicer_to_server,
|
|
39
|
+
add_WorkerServiceServicer_to_server,
|
|
40
|
+
)
|
|
41
|
+
|
|
42
|
+
__all__ = [
|
|
43
|
+
# Messages
|
|
44
|
+
"RegisterRequest",
|
|
45
|
+
"RegisterResponse",
|
|
46
|
+
"HeartbeatRequest",
|
|
47
|
+
"HeartbeatResponse",
|
|
48
|
+
"WorkerMetrics",
|
|
49
|
+
"EvaluateRequest",
|
|
50
|
+
"EvaluateResponse",
|
|
51
|
+
"EvaluationConfig",
|
|
52
|
+
"TaskRequest",
|
|
53
|
+
"TaskResponse",
|
|
54
|
+
"ResultRequest",
|
|
55
|
+
"ResultResponse",
|
|
56
|
+
"StatusRequest",
|
|
57
|
+
"StatusResponse",
|
|
58
|
+
"ShutdownRequest",
|
|
59
|
+
"ShutdownResponse",
|
|
60
|
+
"CancelRequest",
|
|
61
|
+
"CancelResponse",
|
|
62
|
+
# Services
|
|
63
|
+
"MasterServiceStub",
|
|
64
|
+
"MasterServiceServicer",
|
|
65
|
+
"MasterService",
|
|
66
|
+
"add_MasterServiceServicer_to_server",
|
|
67
|
+
"WorkerServiceStub",
|
|
68
|
+
"WorkerServiceServicer",
|
|
69
|
+
"WorkerService",
|
|
70
|
+
"add_WorkerServiceServicer_to_server",
|
|
71
|
+
]
|
|
72
|
+
except ImportError:
|
|
73
|
+
# Protobuf files not generated or grpc not installed
|
|
74
|
+
pass
|
|
@@ -0,0 +1,170 @@
|
|
|
1
|
+
syntax = "proto3";
|
|
2
|
+
|
|
3
|
+
package morphml.distributed;
|
|
4
|
+
|
|
5
|
+
// Master service - called by workers
|
|
6
|
+
service MasterService {
|
|
7
|
+
// Register a new worker with the master
|
|
8
|
+
rpc RegisterWorker(RegisterRequest) returns (RegisterResponse);
|
|
9
|
+
|
|
10
|
+
// Send periodic heartbeat
|
|
11
|
+
rpc Heartbeat(HeartbeatRequest) returns (HeartbeatResponse);
|
|
12
|
+
|
|
13
|
+
// Submit evaluation result
|
|
14
|
+
rpc SubmitResult(ResultRequest) returns (ResultResponse);
|
|
15
|
+
|
|
16
|
+
// Request a task (pull model)
|
|
17
|
+
rpc RequestTask(TaskRequest) returns (TaskResponse);
|
|
18
|
+
}
|
|
19
|
+
|
|
20
|
+
// Worker service - called by master
|
|
21
|
+
service WorkerService {
|
|
22
|
+
// Evaluate an architecture (push model)
|
|
23
|
+
rpc Evaluate(EvaluateRequest) returns (EvaluateResponse);
|
|
24
|
+
|
|
25
|
+
// Get worker status
|
|
26
|
+
rpc GetStatus(StatusRequest) returns (StatusResponse);
|
|
27
|
+
|
|
28
|
+
// Shutdown worker gracefully
|
|
29
|
+
rpc Shutdown(ShutdownRequest) returns (ShutdownResponse);
|
|
30
|
+
|
|
31
|
+
// Cancel running task
|
|
32
|
+
rpc CancelTask(CancelRequest) returns (CancelResponse);
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
// ====================
|
|
36
|
+
// Registration
|
|
37
|
+
// ====================
|
|
38
|
+
|
|
39
|
+
message RegisterRequest {
|
|
40
|
+
string worker_id = 1;
|
|
41
|
+
string host = 2;
|
|
42
|
+
int32 port = 3;
|
|
43
|
+
int32 num_gpus = 4;
|
|
44
|
+
repeated int32 gpu_ids = 5;
|
|
45
|
+
map<string, string> metadata = 6;
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
message RegisterResponse {
|
|
49
|
+
bool success = 1;
|
|
50
|
+
string message = 2;
|
|
51
|
+
string master_id = 3;
|
|
52
|
+
}
|
|
53
|
+
|
|
54
|
+
// ====================
|
|
55
|
+
// Heartbeat
|
|
56
|
+
// ====================
|
|
57
|
+
|
|
58
|
+
message HeartbeatRequest {
|
|
59
|
+
string worker_id = 1;
|
|
60
|
+
string status = 2; // 'idle', 'busy', 'error'
|
|
61
|
+
string current_task_id = 3;
|
|
62
|
+
WorkerMetrics metrics = 4;
|
|
63
|
+
}
|
|
64
|
+
|
|
65
|
+
message HeartbeatResponse {
|
|
66
|
+
bool acknowledged = 1;
|
|
67
|
+
bool should_continue = 2; // false = shutdown
|
|
68
|
+
}
|
|
69
|
+
|
|
70
|
+
message WorkerMetrics {
|
|
71
|
+
double cpu_usage = 1;
|
|
72
|
+
double memory_usage = 2;
|
|
73
|
+
double gpu_usage = 3;
|
|
74
|
+
double gpu_memory = 4;
|
|
75
|
+
int32 tasks_completed = 5;
|
|
76
|
+
int32 tasks_failed = 6;
|
|
77
|
+
}
|
|
78
|
+
|
|
79
|
+
// ====================
|
|
80
|
+
// Task Evaluation
|
|
81
|
+
// ====================
|
|
82
|
+
|
|
83
|
+
message EvaluateRequest {
|
|
84
|
+
string task_id = 1;
|
|
85
|
+
string architecture = 2; // JSON serialized ModelGraph
|
|
86
|
+
EvaluationConfig config = 3;
|
|
87
|
+
}
|
|
88
|
+
|
|
89
|
+
message EvaluateResponse {
|
|
90
|
+
string task_id = 1;
|
|
91
|
+
bool success = 2;
|
|
92
|
+
map<string, double> metrics = 3;
|
|
93
|
+
string error = 4;
|
|
94
|
+
double duration = 5;
|
|
95
|
+
}
|
|
96
|
+
|
|
97
|
+
message EvaluationConfig {
|
|
98
|
+
int32 num_epochs = 1;
|
|
99
|
+
int32 batch_size = 2;
|
|
100
|
+
double learning_rate = 3;
|
|
101
|
+
string dataset = 4;
|
|
102
|
+
int32 gpu_id = 5;
|
|
103
|
+
map<string, string> extra = 6;
|
|
104
|
+
}
|
|
105
|
+
|
|
106
|
+
// ====================
|
|
107
|
+
// Task Request (Pull Model)
|
|
108
|
+
// ====================
|
|
109
|
+
|
|
110
|
+
message TaskRequest {
|
|
111
|
+
string worker_id = 1;
|
|
112
|
+
int32 max_tasks = 2; // Number of tasks to request
|
|
113
|
+
}
|
|
114
|
+
|
|
115
|
+
message TaskResponse {
|
|
116
|
+
bool has_task = 1;
|
|
117
|
+
repeated EvaluateRequest tasks = 2;
|
|
118
|
+
}
|
|
119
|
+
|
|
120
|
+
// ====================
|
|
121
|
+
// Result Submission
|
|
122
|
+
// ====================
|
|
123
|
+
|
|
124
|
+
message ResultRequest {
|
|
125
|
+
string task_id = 1;
|
|
126
|
+
string worker_id = 2;
|
|
127
|
+
bool success = 3;
|
|
128
|
+
map<string, double> metrics = 4;
|
|
129
|
+
string error = 5;
|
|
130
|
+
double duration = 6;
|
|
131
|
+
string architecture = 7; // Optional: return modified architecture
|
|
132
|
+
}
|
|
133
|
+
|
|
134
|
+
message ResultResponse {
|
|
135
|
+
bool acknowledged = 1;
|
|
136
|
+
string message = 2;
|
|
137
|
+
}
|
|
138
|
+
|
|
139
|
+
// ====================
|
|
140
|
+
// Status & Control
|
|
141
|
+
// ====================
|
|
142
|
+
|
|
143
|
+
message StatusRequest {
|
|
144
|
+
string worker_id = 1;
|
|
145
|
+
}
|
|
146
|
+
|
|
147
|
+
message StatusResponse {
|
|
148
|
+
string status = 1; // 'idle', 'busy', 'error', 'dead'
|
|
149
|
+
string current_task_id = 2;
|
|
150
|
+
WorkerMetrics metrics = 3;
|
|
151
|
+
int64 uptime_seconds = 4;
|
|
152
|
+
}
|
|
153
|
+
|
|
154
|
+
message ShutdownRequest {
|
|
155
|
+
string worker_id = 1;
|
|
156
|
+
bool graceful = 2; // Wait for current task to finish
|
|
157
|
+
}
|
|
158
|
+
|
|
159
|
+
message ShutdownResponse {
|
|
160
|
+
bool acknowledged = 1;
|
|
161
|
+
}
|
|
162
|
+
|
|
163
|
+
message CancelRequest {
|
|
164
|
+
string task_id = 1;
|
|
165
|
+
}
|
|
166
|
+
|
|
167
|
+
message CancelResponse {
|
|
168
|
+
bool success = 1;
|
|
169
|
+
string message = 2;
|
|
170
|
+
}
|
|
@@ -0,0 +1,79 @@
|
|
|
1
|
+
# -*- coding: utf-8 -*-
|
|
2
|
+
# Generated by the protocol buffer compiler. DO NOT EDIT!
|
|
3
|
+
# source: worker.proto
|
|
4
|
+
"""Generated protocol buffer code."""
|
|
5
|
+
from google.protobuf import descriptor as _descriptor
|
|
6
|
+
from google.protobuf import descriptor_pool as _descriptor_pool
|
|
7
|
+
from google.protobuf import symbol_database as _symbol_database
|
|
8
|
+
from google.protobuf.internal import builder as _builder
|
|
9
|
+
|
|
10
|
+
# @@protoc_insertion_point(imports)
|
|
11
|
+
|
|
12
|
+
_sym_db = _symbol_database.Default()
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(
|
|
16
|
+
b'\n\x0cworker.proto\x12\x15morphml.distributed"\x87\x01\n\x0fRegisterRequest\x12\x11\n\tworker_id\x18\x01 \x01(\t\x12\x0c\n\x04host\x18\x02 \x01(\t\x12\x0c\n\x04port\x18\x03 \x01(\x05\x12\x10\n\x08num_gpus\x18\x04 \x01(\x05\x12\x0f\n\x07gpu_ids\x18\x05 \x03(\x05\x12"\n\x08metadata\x18\x06 \x03(\x0b\x32\x10.RegisterRequest.MetadataEntry\x1a/\n\rMetadataEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\t:\x02\x38\x01"I\n\x10RegisterResponse\x12\x0f\n\x07success\x18\x01 \x01(\x08\x12\x0f\n\x07message\x18\x02 \x01(\t\x12\x13\n\x0bmaster_id\x18\x03 \x01(\t"q\n\x10HeartbeatRequest\x12\x11\n\tworker_id\x18\x01 \x01(\t\x12\x0e\n\x06status\x18\x02 \x01(\t\x12\x17\n\x0fcurrent_task_id\x18\x03 \x01(\t\x12!\n\x07metrics\x18\x04 \x01(\x0b\x32\x10.WorkerMetrics">\n\x11HeartbeatResponse\x12\x14\n\x0cacknowledged\x18\x01 \x01(\x08\x12\x13\n\x0bshould_continue\x18\x02 \x01(\x08"\xae\x01\n\rWorkerMetrics\x12\x11\n\tcpu_usage\x18\x01 \x01(\x01\x12\x14\n\x0cmemory_usage\x18\x02 \x01(\x01\x12\x11\n\tgpu_usage\x18\x03 \x01(\x01\x12\x12\n\ngpu_memory\x18\x04 \x01(\x01\x12\x17\n\x0ftasks_completed\x18\x05 \x01(\x05\x12\x14\n\x0ctasks_failed\x18\x06 \x01(\x05"^\n\x0fEvaluateRequest\x12\x0f\n\x07task_id\x18\x01 \x01(\t\x12\x14\n\x0carchitecture\x18\x02 \x01(\t\x12$\n\x06\x63onfig\x18\x03 \x01(\x0b\x32\x14.EvaluationConfig"\x8b\x01\n\x10\x45valuateResponse\x12\x0f\n\x07task_id\x18\x01 \x01(\x08\x12\x0f\n\x07success\x18\x02 \x01(\x08\x12+\n\x07metrics\x18\x03 \x03(\x0b\x32\x1a.EvaluateResponse.MetricsEntry\x12\r\n\x05\x65rror\x18\x04 \x01(\t\x12\x10\n\x08\x64uration\x18\x05 \x01(\x01\x1a\x31\n\x0cMetricsEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\x01:\x02\x38\x01"\xae\x01\n\x10\x45valuationConfig\x12\x12\n\nnum_epochs\x18\x01 \x01(\x05\x12\x12\n\nbatch_size\x18\x02 \x01(\x05\x12\x15\n\rlearning_rate\x18\x03 \x01(\x01\x12\x0f\n\x07\x64\x61taset\x18\x04 \x01(\t\x12\x0e\n\x06gpu_id\x18\x05 \x01(\x05\x12*\n\x05\x65xtra\x18\x06 \x03(\x0b\x32\x1b.EvaluationConfig.ExtraEntry\x1a,\n\nExtraEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\t:\x02\x38\x01"5\n\x0bTaskRequest\x12\x11\n\tworker_id\x18\x01 \x01(\t\x12\x13\n\x0bmax_tasks\x18\x02 \x01(\x05"G\n\x0cTaskResponse\x12\x10\n\x08has_task\x18\x01 \x01(\x08\x12%\n\x05tasks\x18\x02 \x03(\x0b\x32\x16.EvaluateRequest"\xb1\x01\n\rResultRequest\x12\x0f\n\x07task_id\x18\x01 \x01(\t\x12\x11\n\tworker_id\x18\x02 \x01(\t\x12\x0f\n\x07success\x18\x03 \x01(\x08\x12*\n\x07metrics\x18\x04 \x03(\x0b\x32\x19.ResultRequest.MetricsEntry\x12\r\n\x05\x65rror\x18\x05 \x01(\t\x12\x10\n\x08\x64uration\x18\x06 \x01(\x01\x12\x14\n\x0carchitecture\x18\x07 \x01(\t\x1a/\n\x0cMetricsEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\x01:\x02\x38\x01"9\n\x0eResultResponse\x12\x14\n\x0cacknowledged\x18\x01 \x01(\x08\x12\x0f\n\x07message\x18\x02 \x01(\t"$\n\rStatusRequest\x12\x11\n\tworker_id\x18\x01 \x01(\t"l\n\x0eStatusResponse\x12\x0e\n\x06status\x18\x01 \x01(\t\x12\x17\n\x0fcurrent_task_id\x18\x02 \x01(\t\x12!\n\x07metrics\x18\x03 \x01(\x0b\x32\x10.WorkerMetrics\x12\x0e\n\x06uptime\x18\x04 \x01(\x03"?\n\x0fShutdownRequest\x12\x11\n\tworker_id\x18\x01 \x01(\t\x12\x10\n\x08graceful\x18\x02 \x01(\x08")\n\x10ShutdownResponse\x12\x14\n\x0cacknowledged\x18\x01 \x01(\x08"!\n\rCancelRequest\x12\x0f\n\x07task_id\x18\x01 \x01(\t"3\n\x0e\x43\x61ncelResponse\x12\x0f\n\x07success\x18\x01 \x01(\x08\x12\x0f\n\x07message\x18\x02 \x01(\t2\xf1\x01\n\rMasterService\x12H\n\x0eRegisterWorker\x12\x1a.RegisterRequest\x1a\x1a.RegisterResponse\x12\x38\n\tHeartbeat\x12\x1a.HeartbeatRequest\x1a\x1a.HeartbeatResponse\x12;\n\x0cSubmitResult\x12\x17.ResultRequest\x1a\x18.ResultResponse\x12\x1f\n\x0bRequestTask\x12\x15.TaskRequest\x1a\x16.TaskResponse2\xf1\x01\n\rWorkerService\x12;\n\x08\x45valuate\x12\x1a.EvaluateRequest\x1a\x1b.EvaluateResponse\x12\x35\n\tGetStatus\x12\x17.StatusRequest\x1a\x18.StatusResponse\x12\x38\n\x08Shutdown\x12\x19.ShutdownRequest\x1a\x1a.ShutdownResponse\x12\x32\n\nCancelTask\x12\x17.CancelRequest\x1a\x18.CancelResponseb\x06proto3'
|
|
17
|
+
)
|
|
18
|
+
|
|
19
|
+
_builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, globals())
|
|
20
|
+
_builder.BuildTopDescriptorsAndMessages(DESCRIPTOR, "worker_pb2", globals())
|
|
21
|
+
if _descriptor._USE_C_DESCRIPTORS is False:
|
|
22
|
+
DESCRIPTOR._options = None
|
|
23
|
+
_REGISTERREQUEST_METADATAENTRY._options = None
|
|
24
|
+
_REGISTERREQUEST_METADATAENTRY._serialized_options = b"8\001"
|
|
25
|
+
_EVALUATERESPONSE_METRICSENTRY._options = None
|
|
26
|
+
_EVALUATERESPONSE_METRICSENTRY._serialized_options = b"8\001"
|
|
27
|
+
_EVALUATIONCONFIG_EXTRAENTRY._options = None
|
|
28
|
+
_EVALUATIONCONFIG_EXTRAENTRY._serialized_options = b"8\001"
|
|
29
|
+
_RESULTREQUEST_METRICSENTRY._options = None
|
|
30
|
+
_RESULTREQUEST_METRICSENTRY._serialized_options = b"8\001"
|
|
31
|
+
_REGISTERREQUEST._serialized_start = 40
|
|
32
|
+
_REGISTERREQUEST._serialized_end = 175
|
|
33
|
+
_REGISTERREQUEST_METADATAENTRY._serialized_start = 128
|
|
34
|
+
_REGISTERREQUEST_METADATAENTRY._serialized_end = 175
|
|
35
|
+
_REGISTERRESPONSE._serialized_start = 177
|
|
36
|
+
_REGISTERRESPONSE._serialized_end = 250
|
|
37
|
+
_HEARTBEATREQUEST._serialized_start = 252
|
|
38
|
+
_HEARTBEATREQUEST._serialized_end = 365
|
|
39
|
+
_HEARTBEATRESPONSE._serialized_start = 367
|
|
40
|
+
_HEARTBEATRESPONSE._serialized_end = 429
|
|
41
|
+
_WORKERMETRICS._serialized_start = 432
|
|
42
|
+
_WORKERMETRICS._serialized_end = 606
|
|
43
|
+
_EVALUATEREQUEST._serialized_start = 608
|
|
44
|
+
_EVALUATEREQUEST._serialized_end = 702
|
|
45
|
+
_EVALUATERESPONSE._serialized_start = 705
|
|
46
|
+
_EVALUATERESPONSE._serialized_end = 844
|
|
47
|
+
_EVALUATERESPONSE_METRICSENTRY._serialized_start = 795
|
|
48
|
+
_EVALUATERESPONSE_METRICSENTRY._serialized_end = 844
|
|
49
|
+
_EVALUATIONCONFIG._serialized_start = 847
|
|
50
|
+
_EVALUATIONCONFIG._serialized_end = 1021
|
|
51
|
+
_EVALUATIONCONFIG_EXTRAENTRY._serialized_start = 977
|
|
52
|
+
_EVALUATIONCONFIG_EXTRAENTRY._serialized_end = 1021
|
|
53
|
+
_TASKREQUEST._serialized_start = 1023
|
|
54
|
+
_TASKREQUEST._serialized_end = 1076
|
|
55
|
+
_TASKRESPONSE._serialized_start = 1078
|
|
56
|
+
_TASKRESPONSE._serialized_end = 1149
|
|
57
|
+
_RESULTREQUEST._serialized_start = 1152
|
|
58
|
+
_RESULTREQUEST._serialized_end = 1329
|
|
59
|
+
_RESULTREQUEST_METRICSENTRY._serialized_start = 795
|
|
60
|
+
_RESULTREQUEST_METRICSENTRY._serialized_end = 844
|
|
61
|
+
_RESULTRESPONSE._serialized_start = 1331
|
|
62
|
+
_RESULTRESPONSE._serialized_end = 1388
|
|
63
|
+
_STATUSREQUEST._serialized_start = 1390
|
|
64
|
+
_STATUSREQUEST._serialized_end = 1426
|
|
65
|
+
_STATUSRESPONSE._serialized_start = 1428
|
|
66
|
+
_STATUSRESPONSE._serialized_end = 1536
|
|
67
|
+
_SHUTDOWNREQUEST._serialized_start = 1538
|
|
68
|
+
_SHUTDOWNREQUEST._serialized_end = 1601
|
|
69
|
+
_SHUTDOWNRESPONSE._serialized_start = 1603
|
|
70
|
+
_SHUTDOWNRESPONSE._serialized_end = 1644
|
|
71
|
+
_CANCELREQUEST._serialized_start = 1646
|
|
72
|
+
_CANCELREQUEST._serialized_end = 1679
|
|
73
|
+
_CANCELRESPONSE._serialized_start = 1681
|
|
74
|
+
_CANCELRESPONSE._serialized_end = 1732
|
|
75
|
+
_MASTERSERVICE._serialized_start = 1735
|
|
76
|
+
_MASTERSERVICE._serialized_end = 1976
|
|
77
|
+
_WORKERSERVICE._serialized_start = 1979
|
|
78
|
+
_WORKERSERVICE._serialized_end = 2220
|
|
79
|
+
# @@protoc_insertion_point(module_scope)
|