flwr-nightly 1.10.0.dev20240619__py3-none-any.whl → 1.10.0.dev20240707__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of flwr-nightly might be problematic. Click here for more details.
- flwr/cli/app.py +3 -0
- flwr/cli/build.py +5 -9
- flwr/cli/new/new.py +104 -28
- flwr/cli/new/templates/app/README.flowertune.md.tpl +56 -0
- flwr/cli/new/templates/app/code/flwr_tune/__init__.py +15 -0
- flwr/cli/new/templates/app/code/flwr_tune/app.py.tpl +86 -0
- flwr/cli/new/templates/app/code/flwr_tune/client.py.tpl +124 -0
- flwr/cli/new/templates/app/code/flwr_tune/config.yaml.tpl +34 -0
- flwr/cli/new/templates/app/code/flwr_tune/dataset.py.tpl +57 -0
- flwr/cli/new/templates/app/code/flwr_tune/models.py.tpl +59 -0
- flwr/cli/new/templates/app/code/flwr_tune/server.py.tpl +48 -0
- flwr/cli/new/templates/app/code/flwr_tune/static_config.yaml.tpl +11 -0
- flwr/cli/new/templates/app/pyproject.flowertune.toml.tpl +42 -0
- flwr/cli/run/run.py +21 -5
- flwr/client/__init__.py +2 -0
- flwr/client/app.py +15 -10
- flwr/client/client_app.py +30 -5
- flwr/client/dpfedavg_numpy_client.py +1 -1
- flwr/client/grpc_rere_client/__init__.py +1 -1
- flwr/client/grpc_rere_client/connection.py +1 -1
- flwr/client/message_handler/__init__.py +1 -1
- flwr/client/message_handler/message_handler.py +4 -5
- flwr/client/mod/__init__.py +1 -1
- flwr/client/mod/secure_aggregation/__init__.py +1 -1
- flwr/client/mod/utils.py +1 -1
- flwr/client/node_state.py +6 -3
- flwr/client/node_state_tests.py +1 -1
- flwr/client/rest_client/__init__.py +1 -1
- flwr/client/rest_client/connection.py +1 -1
- flwr/client/supernode/app.py +12 -4
- flwr/client/typing.py +2 -1
- flwr/common/address.py +1 -1
- flwr/common/config.py +8 -6
- flwr/common/constant.py +4 -1
- flwr/common/context.py +11 -1
- flwr/common/date.py +1 -1
- flwr/common/dp.py +1 -1
- flwr/common/grpc.py +1 -1
- flwr/common/logger.py +13 -0
- flwr/common/message.py +0 -17
- flwr/common/secure_aggregation/__init__.py +1 -1
- flwr/common/secure_aggregation/crypto/__init__.py +1 -1
- flwr/common/secure_aggregation/crypto/shamir.py +1 -1
- flwr/common/secure_aggregation/crypto/symmetric_encryption.py +1 -1
- flwr/common/secure_aggregation/ndarrays_arithmetic.py +1 -1
- flwr/common/secure_aggregation/quantization.py +1 -1
- flwr/common/secure_aggregation/secaggplus_constants.py +1 -1
- flwr/common/secure_aggregation/secaggplus_utils.py +1 -1
- flwr/common/version.py +14 -0
- flwr/server/compat/app.py +1 -1
- flwr/server/compat/app_utils.py +1 -1
- flwr/server/compat/driver_client_proxy.py +1 -1
- flwr/server/driver/driver.py +6 -0
- flwr/server/driver/grpc_driver.py +85 -63
- flwr/server/driver/inmemory_driver.py +28 -26
- flwr/server/run_serverapp.py +61 -18
- flwr/server/strategy/bulyan.py +1 -1
- flwr/server/strategy/dpfedavg_adaptive.py +1 -1
- flwr/server/strategy/dpfedavg_fixed.py +1 -1
- flwr/server/strategy/fedadagrad.py +1 -1
- flwr/server/strategy/fedadam.py +1 -1
- flwr/server/strategy/fedavg_android.py +1 -1
- flwr/server/strategy/fedavgm.py +1 -1
- flwr/server/strategy/fedmedian.py +1 -1
- flwr/server/strategy/fedopt.py +1 -1
- flwr/server/strategy/fedprox.py +1 -1
- flwr/server/strategy/fedxgb_bagging.py +1 -1
- flwr/server/strategy/fedxgb_cyclic.py +1 -1
- flwr/server/strategy/fedxgb_nn_avg.py +1 -1
- flwr/server/strategy/fedyogi.py +1 -1
- flwr/server/strategy/krum.py +1 -1
- flwr/server/strategy/qfedavg.py +1 -1
- flwr/server/superlink/driver/__init__.py +1 -1
- flwr/server/superlink/driver/driver_grpc.py +1 -1
- flwr/server/superlink/driver/driver_servicer.py +15 -3
- flwr/server/superlink/fleet/__init__.py +1 -1
- flwr/server/superlink/fleet/grpc_bidi/__init__.py +1 -1
- flwr/server/superlink/fleet/grpc_bidi/flower_service_servicer.py +1 -1
- flwr/server/superlink/fleet/grpc_bidi/grpc_bridge.py +1 -1
- flwr/server/superlink/fleet/grpc_bidi/grpc_client_proxy.py +1 -1
- flwr/server/superlink/fleet/grpc_bidi/grpc_server.py +1 -1
- flwr/server/superlink/fleet/grpc_rere/__init__.py +1 -1
- flwr/server/superlink/fleet/grpc_rere/fleet_servicer.py +1 -1
- flwr/server/superlink/fleet/message_handler/__init__.py +1 -1
- flwr/server/superlink/fleet/message_handler/message_handler.py +1 -1
- flwr/server/superlink/fleet/rest_rere/__init__.py +1 -1
- flwr/server/superlink/fleet/rest_rere/rest_api.py +1 -1
- flwr/server/superlink/fleet/vce/backend/raybackend.py +45 -26
- flwr/server/superlink/fleet/vce/vce_api.py +3 -8
- flwr/server/superlink/state/__init__.py +1 -1
- flwr/server/superlink/state/in_memory_state.py +5 -5
- flwr/server/superlink/state/sqlite_state.py +5 -5
- flwr/server/superlink/state/state.py +1 -1
- flwr/server/superlink/state/state_factory.py +11 -2
- flwr/server/superlink/state/utils.py +6 -0
- flwr/server/utils/__init__.py +1 -1
- flwr/server/utils/tensorboard.py +1 -1
- flwr/simulation/__init__.py +1 -1
- flwr/simulation/app.py +52 -37
- flwr/simulation/ray_transport/__init__.py +1 -1
- flwr/simulation/ray_transport/ray_actor.py +0 -6
- flwr/simulation/ray_transport/ray_client_proxy.py +17 -10
- flwr/simulation/run_simulation.py +47 -28
- flwr/superexec/deployment.py +109 -0
- {flwr_nightly-1.10.0.dev20240619.dist-info → flwr_nightly-1.10.0.dev20240707.dist-info}/METADATA +2 -1
- {flwr_nightly-1.10.0.dev20240619.dist-info → flwr_nightly-1.10.0.dev20240707.dist-info}/RECORD +109 -98
- {flwr_nightly-1.10.0.dev20240619.dist-info → flwr_nightly-1.10.0.dev20240707.dist-info}/LICENSE +0 -0
- {flwr_nightly-1.10.0.dev20240619.dist-info → flwr_nightly-1.10.0.dev20240707.dist-info}/WHEEL +0 -0
- {flwr_nightly-1.10.0.dev20240619.dist-info → flwr_nightly-1.10.0.dev20240707.dist-info}/entry_points.txt +0 -0
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
# Copyright
|
|
1
|
+
# Copyright 2024 Flower Labs GmbH. All Rights Reserved.
|
|
2
2
|
#
|
|
3
3
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
4
|
# you may not use this file except in compliance with the License.
|
|
@@ -26,7 +26,16 @@ from .state import State
|
|
|
26
26
|
|
|
27
27
|
|
|
28
28
|
class StateFactory:
|
|
29
|
-
"""Factory class that creates State instances.
|
|
29
|
+
"""Factory class that creates State instances.
|
|
30
|
+
|
|
31
|
+
Parameters
|
|
32
|
+
----------
|
|
33
|
+
database : str
|
|
34
|
+
A string representing the path to the database file that will be opened.
|
|
35
|
+
Note that passing ':memory:' will open a connection to a database that is
|
|
36
|
+
in RAM, instead of on disk. For more information on special in-memory
|
|
37
|
+
databases, please refer to https://sqlite.org/inmemorydb.html.
|
|
38
|
+
"""
|
|
30
39
|
|
|
31
40
|
def __init__(self, database: str) -> None:
|
|
32
41
|
self.database = database
|
|
@@ -17,6 +17,7 @@
|
|
|
17
17
|
|
|
18
18
|
import time
|
|
19
19
|
from logging import ERROR
|
|
20
|
+
from os import urandom
|
|
20
21
|
from uuid import uuid4
|
|
21
22
|
|
|
22
23
|
from flwr.common import log
|
|
@@ -31,6 +32,11 @@ NODE_UNAVAILABLE_ERROR_REASON = (
|
|
|
31
32
|
)
|
|
32
33
|
|
|
33
34
|
|
|
35
|
+
def generate_rand_int_from_bytes(num_bytes: int) -> int:
|
|
36
|
+
"""Generate a random `num_bytes` integer."""
|
|
37
|
+
return int.from_bytes(urandom(num_bytes), "little", signed=True)
|
|
38
|
+
|
|
39
|
+
|
|
34
40
|
def make_node_unavailable_taskres(ref_taskins: TaskIns) -> TaskRes:
|
|
35
41
|
"""Generate a TaskRes with a node unavailable error from a TaskIns."""
|
|
36
42
|
current_time = time.time()
|
flwr/server/utils/__init__.py
CHANGED
flwr/server/utils/tensorboard.py
CHANGED
flwr/simulation/__init__.py
CHANGED
flwr/simulation/app.py
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
# Copyright
|
|
1
|
+
# Copyright 2021 Flower Labs GmbH. All Rights Reserved.
|
|
2
2
|
#
|
|
3
3
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
4
|
# you may not use this file except in compliance with the License.
|
|
@@ -27,14 +27,16 @@ from typing import Any, Dict, List, Optional, Type, Union
|
|
|
27
27
|
import ray
|
|
28
28
|
from ray.util.scheduling_strategies import NodeAffinitySchedulingStrategy
|
|
29
29
|
|
|
30
|
-
from flwr.client import
|
|
30
|
+
from flwr.client import ClientFnExt
|
|
31
31
|
from flwr.common import EventType, event
|
|
32
|
-
from flwr.common.
|
|
32
|
+
from flwr.common.constant import NODE_ID_NUM_BYTES
|
|
33
|
+
from flwr.common.logger import log, set_logger_propagation, warn_unsupported_feature
|
|
33
34
|
from flwr.server.client_manager import ClientManager
|
|
34
35
|
from flwr.server.history import History
|
|
35
36
|
from flwr.server.server import Server, init_defaults, run_fl
|
|
36
37
|
from flwr.server.server_config import ServerConfig
|
|
37
38
|
from flwr.server.strategy import Strategy
|
|
39
|
+
from flwr.server.superlink.state.utils import generate_rand_int_from_bytes
|
|
38
40
|
from flwr.simulation.ray_transport.ray_actor import (
|
|
39
41
|
ClientAppActor,
|
|
40
42
|
VirtualClientEngineActor,
|
|
@@ -51,7 +53,7 @@ Invalid Arguments in method:
|
|
|
51
53
|
`start_simulation(
|
|
52
54
|
*,
|
|
53
55
|
client_fn: ClientFn,
|
|
54
|
-
num_clients:
|
|
56
|
+
num_clients: int,
|
|
55
57
|
clients_ids: Optional[List[str]] = None,
|
|
56
58
|
client_resources: Optional[Dict[str, float]] = None,
|
|
57
59
|
server: Optional[Server] = None,
|
|
@@ -70,13 +72,29 @@ REASON:
|
|
|
70
72
|
|
|
71
73
|
"""
|
|
72
74
|
|
|
75
|
+
NodeToPartitionMapping = Dict[int, int]
|
|
76
|
+
|
|
77
|
+
|
|
78
|
+
def _create_node_id_to_partition_mapping(
|
|
79
|
+
num_clients: int,
|
|
80
|
+
) -> NodeToPartitionMapping:
|
|
81
|
+
"""Generate a node_id:partition_id mapping."""
|
|
82
|
+
nodes_mapping: NodeToPartitionMapping = {} # {node-id; partition-id}
|
|
83
|
+
for i in range(num_clients):
|
|
84
|
+
while True:
|
|
85
|
+
node_id = generate_rand_int_from_bytes(NODE_ID_NUM_BYTES)
|
|
86
|
+
if node_id not in nodes_mapping:
|
|
87
|
+
break
|
|
88
|
+
nodes_mapping[node_id] = i
|
|
89
|
+
return nodes_mapping
|
|
90
|
+
|
|
73
91
|
|
|
74
92
|
# pylint: disable=too-many-arguments,too-many-statements,too-many-branches
|
|
75
93
|
def start_simulation(
|
|
76
94
|
*,
|
|
77
|
-
client_fn:
|
|
78
|
-
num_clients:
|
|
79
|
-
clients_ids: Optional[List[str]] = None,
|
|
95
|
+
client_fn: ClientFnExt,
|
|
96
|
+
num_clients: int,
|
|
97
|
+
clients_ids: Optional[List[str]] = None, # UNSUPPORTED, WILL BE REMOVED
|
|
80
98
|
client_resources: Optional[Dict[str, float]] = None,
|
|
81
99
|
server: Optional[Server] = None,
|
|
82
100
|
config: Optional[ServerConfig] = None,
|
|
@@ -92,23 +110,24 @@ def start_simulation(
|
|
|
92
110
|
|
|
93
111
|
Parameters
|
|
94
112
|
----------
|
|
95
|
-
client_fn :
|
|
96
|
-
A function creating
|
|
97
|
-
`
|
|
98
|
-
of type Client. Note that the created client
|
|
99
|
-
and will often be destroyed after a single method
|
|
100
|
-
instances are not long-lived, they should not attempt
|
|
101
|
-
method invocations. Any state required by the instance
|
|
102
|
-
hyperparameters, ...) should be (re-)created in either the
|
|
103
|
-
or the call to any of the client methods (e.g., load
|
|
104
|
-
`evaluate` method itself).
|
|
105
|
-
num_clients :
|
|
106
|
-
The total number of clients in this simulation.
|
|
107
|
-
`clients_ids` is not set and vice-versa.
|
|
113
|
+
client_fn : ClientFnExt
|
|
114
|
+
A function creating Client instances. The function must have the signature
|
|
115
|
+
`client_fn(node_id: int, partition_id: Optional[int]). It should return
|
|
116
|
+
a single client instance of type Client. Note that the created client
|
|
117
|
+
instances are ephemeral and will often be destroyed after a single method
|
|
118
|
+
invocation. Since client instances are not long-lived, they should not attempt
|
|
119
|
+
to carry state over method invocations. Any state required by the instance
|
|
120
|
+
(model, dataset, hyperparameters, ...) should be (re-)created in either the
|
|
121
|
+
call to `client_fn` or the call to any of the client methods (e.g., load
|
|
122
|
+
evaluation data in the `evaluate` method itself).
|
|
123
|
+
num_clients : int
|
|
124
|
+
The total number of clients in this simulation.
|
|
108
125
|
clients_ids : Optional[List[str]]
|
|
126
|
+
UNSUPPORTED, WILL BE REMOVED. USE `num_clients` INSTEAD.
|
|
109
127
|
List `client_id`s for each client. This is only required if
|
|
110
128
|
`num_clients` is not set. Setting both `num_clients` and `clients_ids`
|
|
111
129
|
with `len(clients_ids)` not equal to `num_clients` generates an error.
|
|
130
|
+
Using this argument will raise an error.
|
|
112
131
|
client_resources : Optional[Dict[str, float]] (default: `{"num_cpus": 1, "num_gpus": 0.0}`)
|
|
113
132
|
CPU and GPU resources for a single client. Supported keys
|
|
114
133
|
are `num_cpus` and `num_gpus`. To understand the GPU utilization caused by
|
|
@@ -158,7 +177,6 @@ def start_simulation(
|
|
|
158
177
|
is an advanced feature. For all details, please refer to the Ray documentation:
|
|
159
178
|
https://docs.ray.io/en/latest/ray-core/scheduling/index.html
|
|
160
179
|
|
|
161
|
-
|
|
162
180
|
Returns
|
|
163
181
|
-------
|
|
164
182
|
hist : flwr.server.history.History
|
|
@@ -170,6 +188,14 @@ def start_simulation(
|
|
|
170
188
|
{"num_clients": len(clients_ids) if clients_ids is not None else num_clients},
|
|
171
189
|
)
|
|
172
190
|
|
|
191
|
+
if clients_ids is not None:
|
|
192
|
+
warn_unsupported_feature(
|
|
193
|
+
"Passing `clients_ids` to `start_simulation` is deprecated and not longer "
|
|
194
|
+
"used by `start_simulation`. Use `num_clients` exclusively instead."
|
|
195
|
+
)
|
|
196
|
+
log(ERROR, "`clients_ids` argument used.")
|
|
197
|
+
sys.exit()
|
|
198
|
+
|
|
173
199
|
# Set logger propagation
|
|
174
200
|
loop: Optional[asyncio.AbstractEventLoop] = None
|
|
175
201
|
try:
|
|
@@ -196,20 +222,8 @@ def start_simulation(
|
|
|
196
222
|
initialized_config,
|
|
197
223
|
)
|
|
198
224
|
|
|
199
|
-
#
|
|
200
|
-
|
|
201
|
-
if clients_ids is not None:
|
|
202
|
-
if (num_clients is not None) and (len(clients_ids) != num_clients):
|
|
203
|
-
log(ERROR, INVALID_ARGUMENTS_START_SIMULATION)
|
|
204
|
-
sys.exit()
|
|
205
|
-
else:
|
|
206
|
-
cids = clients_ids
|
|
207
|
-
else:
|
|
208
|
-
if num_clients is None:
|
|
209
|
-
log(ERROR, INVALID_ARGUMENTS_START_SIMULATION)
|
|
210
|
-
sys.exit()
|
|
211
|
-
else:
|
|
212
|
-
cids = [str(x) for x in range(num_clients)]
|
|
225
|
+
# Create node-id to partition-id mapping
|
|
226
|
+
nodes_mapping = _create_node_id_to_partition_mapping(num_clients)
|
|
213
227
|
|
|
214
228
|
# Default arguments for Ray initialization
|
|
215
229
|
if not ray_init_args:
|
|
@@ -308,10 +322,11 @@ def start_simulation(
|
|
|
308
322
|
)
|
|
309
323
|
|
|
310
324
|
# Register one RayClientProxy object for each client with the ClientManager
|
|
311
|
-
for
|
|
325
|
+
for node_id, partition_id in nodes_mapping.items():
|
|
312
326
|
client_proxy = RayActorClientProxy(
|
|
313
327
|
client_fn=client_fn,
|
|
314
|
-
|
|
328
|
+
node_id=node_id,
|
|
329
|
+
partition_id=partition_id,
|
|
315
330
|
actor_pool=pool,
|
|
316
331
|
)
|
|
317
332
|
initialized_server.client_manager().register(client=client_proxy)
|
|
@@ -399,12 +399,6 @@ class VirtualClientEngineActorPool(ActorPool):
|
|
|
399
399
|
return self._fetch_future_result(cid)
|
|
400
400
|
|
|
401
401
|
|
|
402
|
-
def init_ray(*args: Any, **kwargs: Any) -> None:
|
|
403
|
-
"""Intialises Ray if not already initialised."""
|
|
404
|
-
if not ray.is_initialized():
|
|
405
|
-
ray.init(*args, **kwargs)
|
|
406
|
-
|
|
407
|
-
|
|
408
402
|
class BasicActorPool:
|
|
409
403
|
"""A basic actor pool."""
|
|
410
404
|
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
# Copyright
|
|
1
|
+
# Copyright 2021 Flower Labs GmbH. All Rights Reserved.
|
|
2
2
|
#
|
|
3
3
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
4
|
# you may not use this file except in compliance with the License.
|
|
@@ -20,7 +20,7 @@ from logging import ERROR
|
|
|
20
20
|
from typing import Optional
|
|
21
21
|
|
|
22
22
|
from flwr import common
|
|
23
|
-
from flwr.client import
|
|
23
|
+
from flwr.client import ClientFnExt
|
|
24
24
|
from flwr.client.client_app import ClientApp
|
|
25
25
|
from flwr.client.node_state import NodeState
|
|
26
26
|
from flwr.common import DEFAULT_TTL, Message, Metadata, RecordSet
|
|
@@ -44,16 +44,22 @@ class RayActorClientProxy(ClientProxy):
|
|
|
44
44
|
"""Flower client proxy which delegates work using Ray."""
|
|
45
45
|
|
|
46
46
|
def __init__(
|
|
47
|
-
self,
|
|
47
|
+
self,
|
|
48
|
+
client_fn: ClientFnExt,
|
|
49
|
+
node_id: int,
|
|
50
|
+
partition_id: int,
|
|
51
|
+
actor_pool: VirtualClientEngineActorPool,
|
|
48
52
|
):
|
|
49
|
-
super().__init__(cid)
|
|
53
|
+
super().__init__(cid=str(node_id))
|
|
54
|
+
self.node_id = node_id
|
|
55
|
+
self.partition_id = partition_id
|
|
50
56
|
|
|
51
57
|
def _load_app() -> ClientApp:
|
|
52
58
|
return ClientApp(client_fn=client_fn)
|
|
53
59
|
|
|
54
60
|
self.app_fn = _load_app
|
|
55
61
|
self.actor_pool = actor_pool
|
|
56
|
-
self.proxy_state = NodeState()
|
|
62
|
+
self.proxy_state = NodeState(partition_id=self.partition_id)
|
|
57
63
|
|
|
58
64
|
def _submit_job(self, message: Message, timeout: Optional[float]) -> Message:
|
|
59
65
|
"""Sumbit a message to the ActorPool."""
|
|
@@ -67,11 +73,13 @@ class RayActorClientProxy(ClientProxy):
|
|
|
67
73
|
|
|
68
74
|
try:
|
|
69
75
|
self.actor_pool.submit_client_job(
|
|
70
|
-
lambda a, a_fn, mssg,
|
|
71
|
-
|
|
76
|
+
lambda a, a_fn, mssg, partition_id, state: a.run.remote(
|
|
77
|
+
a_fn, mssg, partition_id, state
|
|
78
|
+
),
|
|
79
|
+
(self.app_fn, message, str(self.partition_id), state),
|
|
72
80
|
)
|
|
73
81
|
out_mssg, updated_context = self.actor_pool.get_client_result(
|
|
74
|
-
self.
|
|
82
|
+
str(self.partition_id), timeout
|
|
75
83
|
)
|
|
76
84
|
|
|
77
85
|
# Update state
|
|
@@ -103,11 +111,10 @@ class RayActorClientProxy(ClientProxy):
|
|
|
103
111
|
message_id="",
|
|
104
112
|
group_id=str(group_id) if group_id is not None else "",
|
|
105
113
|
src_node_id=0,
|
|
106
|
-
dst_node_id=
|
|
114
|
+
dst_node_id=self.node_id,
|
|
107
115
|
reply_to_message="",
|
|
108
116
|
ttl=timeout if timeout else DEFAULT_TTL,
|
|
109
117
|
message_type=message_type,
|
|
110
|
-
partition_id=int(self.cid),
|
|
111
118
|
),
|
|
112
119
|
)
|
|
113
120
|
|
|
@@ -22,16 +22,17 @@ import threading
|
|
|
22
22
|
import traceback
|
|
23
23
|
from logging import DEBUG, ERROR, INFO, WARNING
|
|
24
24
|
from time import sleep
|
|
25
|
-
from typing import
|
|
25
|
+
from typing import Optional
|
|
26
26
|
|
|
27
27
|
from flwr.client import ClientApp
|
|
28
28
|
from flwr.common import EventType, event, log
|
|
29
29
|
from flwr.common.logger import set_logger_propagation, update_console_handler
|
|
30
|
-
from flwr.common.typing import
|
|
30
|
+
from flwr.common.typing import Run
|
|
31
31
|
from flwr.server.driver import Driver, InMemoryDriver
|
|
32
32
|
from flwr.server.run_serverapp import run
|
|
33
33
|
from flwr.server.server_app import ServerApp
|
|
34
34
|
from flwr.server.superlink.fleet import vce
|
|
35
|
+
from flwr.server.superlink.fleet.vce.backend.backend import BackendConfig
|
|
35
36
|
from flwr.server.superlink.state import StateFactory
|
|
36
37
|
from flwr.simulation.ray_transport.utils import (
|
|
37
38
|
enable_tf_gpu_growth as enable_gpu_growth,
|
|
@@ -66,7 +67,7 @@ def run_simulation(
|
|
|
66
67
|
client_app: ClientApp,
|
|
67
68
|
num_supernodes: int,
|
|
68
69
|
backend_name: str = "ray",
|
|
69
|
-
backend_config: Optional[
|
|
70
|
+
backend_config: Optional[BackendConfig] = None,
|
|
70
71
|
enable_tf_gpu_growth: bool = False,
|
|
71
72
|
verbose_logging: bool = False,
|
|
72
73
|
) -> None:
|
|
@@ -90,9 +91,12 @@ def run_simulation(
|
|
|
90
91
|
backend_name : str (default: ray)
|
|
91
92
|
A simulation backend that runs `ClientApp`s.
|
|
92
93
|
|
|
93
|
-
backend_config : Optional[
|
|
94
|
-
'A dictionary
|
|
95
|
-
backend.
|
|
94
|
+
backend_config : Optional[BackendConfig]
|
|
95
|
+
'A dictionary to configure a backend. Separate dictionaries to configure
|
|
96
|
+
different elements of backend. Supported top-level keys are `init_args`
|
|
97
|
+
for values parsed to initialisation of backend, `client_resources`
|
|
98
|
+
to define the resources for clients, and `actor` to define the actor
|
|
99
|
+
parameters. Values supported in <value> are those included by
|
|
96
100
|
`flwr.common.typing.ConfigsRecordValues`.
|
|
97
101
|
|
|
98
102
|
enable_tf_gpu_growth : bool (default: False)
|
|
@@ -104,7 +108,7 @@ def run_simulation(
|
|
|
104
108
|
works in the TensorFlow documentation: https://www.tensorflow.org/api/stable.
|
|
105
109
|
|
|
106
110
|
verbose_logging : bool (default: False)
|
|
107
|
-
When
|
|
111
|
+
When disabled, only INFO, WARNING and ERROR log messages will be shown. If
|
|
108
112
|
enabled, DEBUG-level logs will be displayed.
|
|
109
113
|
"""
|
|
110
114
|
_run_simulation(
|
|
@@ -133,7 +137,7 @@ def run_serverapp_th(
|
|
|
133
137
|
def server_th_with_start_checks( # type: ignore
|
|
134
138
|
tf_gpu_growth: bool, stop_event: asyncio.Event, **kwargs
|
|
135
139
|
) -> None:
|
|
136
|
-
"""Run SeverApp, after check if GPU memory
|
|
140
|
+
"""Run SeverApp, after check if GPU memory growth has to be set.
|
|
137
141
|
|
|
138
142
|
Upon exception, trigger stop event for Simulation Engine.
|
|
139
143
|
"""
|
|
@@ -169,11 +173,14 @@ def run_serverapp_th(
|
|
|
169
173
|
return serverapp_th
|
|
170
174
|
|
|
171
175
|
|
|
172
|
-
def
|
|
173
|
-
"""
|
|
176
|
+
def _override_run_id(state: StateFactory, run_id_to_replace: int, run_id: int) -> None:
|
|
177
|
+
"""Override the run_id of an existing Run."""
|
|
174
178
|
log(DEBUG, "Pre-registering run with id %s", run_id)
|
|
175
|
-
|
|
176
|
-
|
|
179
|
+
# Remove run
|
|
180
|
+
run_info: Run = state.state().run_ids.pop(run_id_to_replace) # type: ignore
|
|
181
|
+
# Update with new run_id and insert back in state
|
|
182
|
+
run_info.run_id = run_id
|
|
183
|
+
state.state().run_ids[run_id] = run_info # type: ignore
|
|
177
184
|
|
|
178
185
|
|
|
179
186
|
# pylint: disable=too-many-locals
|
|
@@ -191,7 +198,7 @@ def _main_loop(
|
|
|
191
198
|
) -> None:
|
|
192
199
|
"""Launch SuperLink with Simulation Engine, then ServerApp on a separate thread.
|
|
193
200
|
|
|
194
|
-
Everything runs on the main thread or a separate one,
|
|
201
|
+
Everything runs on the main thread or a separate one, depending on whether the main
|
|
195
202
|
thread already contains a running Asyncio event loop. This is the case if running
|
|
196
203
|
the Simulation Engine on a Jupyter/Colab notebook.
|
|
197
204
|
"""
|
|
@@ -201,11 +208,15 @@ def _main_loop(
|
|
|
201
208
|
f_stop = asyncio.Event()
|
|
202
209
|
serverapp_th = None
|
|
203
210
|
try:
|
|
204
|
-
#
|
|
205
|
-
|
|
211
|
+
# Create run (with empty fab_id and fab_version)
|
|
212
|
+
run_id_ = state_factory.state().create_run("", "")
|
|
206
213
|
|
|
207
214
|
if run_id:
|
|
208
|
-
|
|
215
|
+
_override_run_id(state_factory, run_id_to_replace=run_id_, run_id=run_id)
|
|
216
|
+
run_id_ = run_id
|
|
217
|
+
|
|
218
|
+
# Initialize Driver
|
|
219
|
+
driver = InMemoryDriver(run_id=run_id_, state_factory=state_factory)
|
|
209
220
|
|
|
210
221
|
# Get and run ServerApp thread
|
|
211
222
|
serverapp_th = run_serverapp_th(
|
|
@@ -252,7 +263,7 @@ def _run_simulation(
|
|
|
252
263
|
client_app: Optional[ClientApp] = None,
|
|
253
264
|
server_app: Optional[ServerApp] = None,
|
|
254
265
|
backend_name: str = "ray",
|
|
255
|
-
backend_config: Optional[
|
|
266
|
+
backend_config: Optional[BackendConfig] = None,
|
|
256
267
|
client_app_attr: Optional[str] = None,
|
|
257
268
|
server_app_attr: Optional[str] = None,
|
|
258
269
|
app_dir: str = "",
|
|
@@ -279,9 +290,12 @@ def _run_simulation(
|
|
|
279
290
|
backend_name : str (default: ray)
|
|
280
291
|
A simulation backend that runs `ClientApp`s.
|
|
281
292
|
|
|
282
|
-
backend_config : Optional[
|
|
283
|
-
'A dictionary
|
|
284
|
-
backend.
|
|
293
|
+
backend_config : Optional[BackendConfig]
|
|
294
|
+
'A dictionary to configure a backend. Separate dictionaries to configure
|
|
295
|
+
different elements of backend. Supported top-level keys are `init_args`
|
|
296
|
+
for values parsed to initialisation of backend, `client_resources`
|
|
297
|
+
to define the resources for clients, and `actor` to define the actor
|
|
298
|
+
parameters. Values supported in <value> are those included by
|
|
285
299
|
`flwr.common.typing.ConfigsRecordValues`.
|
|
286
300
|
|
|
287
301
|
client_app_attr : str
|
|
@@ -303,30 +317,34 @@ def _run_simulation(
|
|
|
303
317
|
A boolean to indicate whether to enable GPU growth on the main thread. This is
|
|
304
318
|
desirable if you make use of a TensorFlow model on your `ServerApp` while
|
|
305
319
|
having your `ClientApp` running on the same GPU. Without enabling this, you
|
|
306
|
-
might encounter an out-of-memory error
|
|
320
|
+
might encounter an out-of-memory error because TensorFlow by default allocates
|
|
307
321
|
all GPU memory. Read mor about how `tf.config.experimental.set_memory_growth()`
|
|
308
322
|
works in the TensorFlow documentation: https://www.tensorflow.org/api/stable.
|
|
309
323
|
|
|
310
324
|
verbose_logging : bool (default: False)
|
|
311
|
-
When
|
|
325
|
+
When disabled, only INFO, WARNING and ERROR log messages will be shown. If
|
|
312
326
|
enabled, DEBUG-level logs will be displayed.
|
|
313
327
|
"""
|
|
314
328
|
if backend_config is None:
|
|
315
329
|
backend_config = {}
|
|
316
330
|
|
|
331
|
+
if "init_args" not in backend_config:
|
|
332
|
+
backend_config["init_args"] = {}
|
|
333
|
+
|
|
317
334
|
# Set logging level
|
|
318
335
|
logger = logging.getLogger("flwr")
|
|
319
336
|
if verbose_logging:
|
|
320
337
|
update_console_handler(level=DEBUG, timestamps=True, colored=True)
|
|
321
338
|
else:
|
|
322
|
-
backend_config["
|
|
339
|
+
backend_config["init_args"]["logging_level"] = WARNING
|
|
340
|
+
backend_config["init_args"]["log_to_driver"] = True
|
|
323
341
|
|
|
324
342
|
if enable_tf_gpu_growth:
|
|
325
343
|
# Check that Backend config has also enabled using GPU growth
|
|
326
|
-
use_tf = backend_config.get("tensorflow", False)
|
|
344
|
+
use_tf = backend_config.get("actor", {}).get("tensorflow", False)
|
|
327
345
|
if not use_tf:
|
|
328
346
|
log(WARNING, "Enabling GPU growth for your backend.")
|
|
329
|
-
backend_config["tensorflow"] = True
|
|
347
|
+
backend_config["actor"]["tensorflow"] = True
|
|
330
348
|
|
|
331
349
|
# Convert config to original JSON-stream format
|
|
332
350
|
backend_config_stream = json.dumps(backend_config)
|
|
@@ -345,7 +363,7 @@ def _run_simulation(
|
|
|
345
363
|
server_app_attr,
|
|
346
364
|
)
|
|
347
365
|
# Detect if there is an Asyncio event loop already running.
|
|
348
|
-
# If yes, run everything on a separate thread. In
|
|
366
|
+
# If yes, run everything on a separate thread. In environments
|
|
349
367
|
# like Jupyter/Colab notebooks, there is an event loop present.
|
|
350
368
|
run_in_thread = False
|
|
351
369
|
try:
|
|
@@ -357,7 +375,7 @@ def _run_simulation(
|
|
|
357
375
|
run_in_thread = True
|
|
358
376
|
|
|
359
377
|
except RuntimeError:
|
|
360
|
-
log(DEBUG, "No asyncio event loop
|
|
378
|
+
log(DEBUG, "No asyncio event loop running")
|
|
361
379
|
|
|
362
380
|
finally:
|
|
363
381
|
if run_in_thread:
|
|
@@ -402,7 +420,8 @@ def _parse_args_run_simulation() -> argparse.ArgumentParser:
|
|
|
402
420
|
parser.add_argument(
|
|
403
421
|
"--backend-config",
|
|
404
422
|
type=str,
|
|
405
|
-
default='{"client_resources": {"num_cpus":2, "num_gpus":0.0},
|
|
423
|
+
default='{"client_resources": {"num_cpus":2, "num_gpus":0.0},'
|
|
424
|
+
'"actor": {"tensorflow": 0}}',
|
|
406
425
|
help='A JSON formatted stream, e.g \'{"<keyA>":<value>, "<keyB>":<value>}\' to '
|
|
407
426
|
"configure a backend. Values supported in <value> are those included by "
|
|
408
427
|
"`flwr.common.typing.ConfigsRecordValues`. ",
|
|
@@ -0,0 +1,109 @@
|
|
|
1
|
+
# Copyright 2024 Flower Labs GmbH. All Rights Reserved.
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
# ==============================================================================
|
|
15
|
+
"""Deployment engine executor."""
|
|
16
|
+
|
|
17
|
+
import subprocess
|
|
18
|
+
import sys
|
|
19
|
+
from logging import ERROR, INFO
|
|
20
|
+
from typing import Optional
|
|
21
|
+
|
|
22
|
+
from typing_extensions import override
|
|
23
|
+
|
|
24
|
+
from flwr.cli.config_utils import get_fab_metadata
|
|
25
|
+
from flwr.cli.install import install_from_fab
|
|
26
|
+
from flwr.common.grpc import create_channel
|
|
27
|
+
from flwr.common.logger import log
|
|
28
|
+
from flwr.proto.driver_pb2 import CreateRunRequest # pylint: disable=E0611
|
|
29
|
+
from flwr.proto.driver_pb2_grpc import DriverStub
|
|
30
|
+
from flwr.server.driver.grpc_driver import DEFAULT_SERVER_ADDRESS_DRIVER
|
|
31
|
+
|
|
32
|
+
from .executor import Executor, RunTracker
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
class DeploymentEngine(Executor):
|
|
36
|
+
"""Deployment engine executor."""
|
|
37
|
+
|
|
38
|
+
def __init__(
|
|
39
|
+
self,
|
|
40
|
+
address: str = DEFAULT_SERVER_ADDRESS_DRIVER,
|
|
41
|
+
root_certificates: Optional[bytes] = None,
|
|
42
|
+
) -> None:
|
|
43
|
+
self.address = address
|
|
44
|
+
self.root_certificates = root_certificates
|
|
45
|
+
self.stub: Optional[DriverStub] = None
|
|
46
|
+
|
|
47
|
+
def _connect(self) -> None:
|
|
48
|
+
if self.stub is None:
|
|
49
|
+
channel = create_channel(
|
|
50
|
+
server_address=self.address,
|
|
51
|
+
insecure=(self.root_certificates is None),
|
|
52
|
+
root_certificates=self.root_certificates,
|
|
53
|
+
)
|
|
54
|
+
self.stub = DriverStub(channel)
|
|
55
|
+
|
|
56
|
+
def _create_run(self, fab_id: str, fab_version: str) -> int:
|
|
57
|
+
if self.stub is None:
|
|
58
|
+
self._connect()
|
|
59
|
+
|
|
60
|
+
assert self.stub is not None
|
|
61
|
+
|
|
62
|
+
req = CreateRunRequest(fab_id=fab_id, fab_version=fab_version)
|
|
63
|
+
res = self.stub.CreateRun(request=req)
|
|
64
|
+
return int(res.run_id)
|
|
65
|
+
|
|
66
|
+
@override
|
|
67
|
+
def start_run(self, fab_file: bytes) -> Optional[RunTracker]:
|
|
68
|
+
"""Start run using the Flower Deployment Engine."""
|
|
69
|
+
try:
|
|
70
|
+
# Install FAB to flwr dir
|
|
71
|
+
fab_version, fab_id = get_fab_metadata(fab_file)
|
|
72
|
+
fab_path = install_from_fab(fab_file, None, True)
|
|
73
|
+
|
|
74
|
+
# Install FAB Python package
|
|
75
|
+
subprocess.check_call(
|
|
76
|
+
[sys.executable, "-m", "pip", "install", str(fab_path)],
|
|
77
|
+
stdout=subprocess.DEVNULL,
|
|
78
|
+
stderr=subprocess.DEVNULL,
|
|
79
|
+
)
|
|
80
|
+
|
|
81
|
+
# Call SuperLink to create run
|
|
82
|
+
run_id: int = self._create_run(fab_id, fab_version)
|
|
83
|
+
log(INFO, "Created run %s", str(run_id))
|
|
84
|
+
|
|
85
|
+
# Start ServerApp
|
|
86
|
+
proc = subprocess.Popen( # pylint: disable=consider-using-with
|
|
87
|
+
[
|
|
88
|
+
"flower-server-app",
|
|
89
|
+
"--run-id",
|
|
90
|
+
str(run_id),
|
|
91
|
+
"--insecure",
|
|
92
|
+
],
|
|
93
|
+
stdout=subprocess.PIPE,
|
|
94
|
+
stderr=subprocess.PIPE,
|
|
95
|
+
text=True,
|
|
96
|
+
)
|
|
97
|
+
log(INFO, "Started run %s", str(run_id))
|
|
98
|
+
|
|
99
|
+
return RunTracker(
|
|
100
|
+
run_id=run_id,
|
|
101
|
+
proc=proc,
|
|
102
|
+
)
|
|
103
|
+
# pylint: disable-next=broad-except
|
|
104
|
+
except Exception as e:
|
|
105
|
+
log(ERROR, "Could not start run: %s", str(e))
|
|
106
|
+
return None
|
|
107
|
+
|
|
108
|
+
|
|
109
|
+
executor = DeploymentEngine()
|
{flwr_nightly-1.10.0.dev20240619.dist-info → flwr_nightly-1.10.0.dev20240707.dist-info}/METADATA
RENAMED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: flwr-nightly
|
|
3
|
-
Version: 1.10.0.
|
|
3
|
+
Version: 1.10.0.dev20240707
|
|
4
4
|
Summary: Flower: A Friendly Federated Learning Framework
|
|
5
5
|
Home-page: https://flower.ai
|
|
6
6
|
License: Apache-2.0
|
|
@@ -204,6 +204,7 @@ Other [examples](https://github.com/adap/flower/tree/main/examples):
|
|
|
204
204
|
- [Flower with KaplanMeierFitter from the lifelines library](https://github.com/adap/flower/tree/main/examples/federated-kaplan-meier-fitter)
|
|
205
205
|
- [Sample Level Privacy with Opacus](https://github.com/adap/flower/tree/main/examples/opacus)
|
|
206
206
|
- [Sample Level Privacy with TensorFlow-Privacy](https://github.com/adap/flower/tree/main/examples/tensorflow-privacy)
|
|
207
|
+
- [Flower with a Tabular Dataset](https://github.com/adap/flower/tree/main/examples/fl-tabular)
|
|
207
208
|
|
|
208
209
|
## Community
|
|
209
210
|
|