flwr-nightly 1.10.0.dev20240612__py3-none-any.whl → 1.10.0.dev20240624__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of flwr-nightly might be problematic. Click here for more details.
- flwr/cli/app.py +3 -0
- flwr/cli/build.py +6 -8
- flwr/cli/config_utils.py +53 -3
- flwr/cli/install.py +35 -20
- flwr/cli/new/new.py +104 -28
- flwr/cli/new/templates/app/README.flowertune.md.tpl +56 -0
- flwr/cli/new/templates/app/code/flwr_tune/__init__.py +15 -0
- flwr/cli/new/templates/app/code/flwr_tune/app.py.tpl +86 -0
- flwr/cli/new/templates/app/code/flwr_tune/client.py.tpl +124 -0
- flwr/cli/new/templates/app/code/flwr_tune/config.yaml.tpl +34 -0
- flwr/cli/new/templates/app/code/flwr_tune/dataset.py.tpl +57 -0
- flwr/cli/new/templates/app/code/flwr_tune/models.py.tpl +59 -0
- flwr/cli/new/templates/app/code/flwr_tune/server.py.tpl +48 -0
- flwr/cli/new/templates/app/code/flwr_tune/static_config.yaml.tpl +11 -0
- flwr/cli/new/templates/app/pyproject.flowertune.toml.tpl +42 -0
- flwr/cli/run/run.py +46 -2
- flwr/client/__init__.py +1 -1
- flwr/client/app.py +22 -10
- flwr/client/client_app.py +1 -1
- flwr/client/dpfedavg_numpy_client.py +1 -1
- flwr/client/grpc_adapter_client/__init__.py +15 -0
- flwr/client/grpc_adapter_client/connection.py +94 -0
- flwr/client/grpc_client/connection.py +5 -1
- flwr/client/grpc_rere_client/__init__.py +1 -1
- flwr/client/grpc_rere_client/connection.py +9 -2
- flwr/client/grpc_rere_client/grpc_adapter.py +133 -0
- flwr/client/message_handler/__init__.py +1 -1
- flwr/client/message_handler/message_handler.py +1 -1
- flwr/client/mod/__init__.py +4 -4
- flwr/client/mod/secure_aggregation/__init__.py +1 -1
- flwr/client/mod/utils.py +1 -1
- flwr/client/rest_client/__init__.py +1 -1
- flwr/client/rest_client/connection.py +10 -2
- flwr/client/supernode/app.py +141 -41
- flwr/common/__init__.py +12 -12
- flwr/common/address.py +1 -1
- flwr/common/config.py +73 -0
- flwr/common/constant.py +16 -1
- flwr/common/date.py +1 -1
- flwr/common/dp.py +1 -1
- flwr/common/grpc.py +1 -1
- flwr/common/object_ref.py +39 -5
- flwr/common/record/__init__.py +1 -1
- flwr/common/secure_aggregation/__init__.py +1 -1
- flwr/common/secure_aggregation/crypto/__init__.py +1 -1
- flwr/common/secure_aggregation/crypto/shamir.py +1 -1
- flwr/common/secure_aggregation/crypto/symmetric_encryption.py +1 -1
- flwr/common/secure_aggregation/ndarrays_arithmetic.py +1 -1
- flwr/common/secure_aggregation/quantization.py +1 -1
- flwr/common/secure_aggregation/secaggplus_constants.py +1 -1
- flwr/common/secure_aggregation/secaggplus_utils.py +1 -1
- flwr/common/telemetry.py +4 -0
- flwr/common/typing.py +9 -0
- flwr/common/version.py +14 -0
- flwr/proto/exec_pb2.py +34 -0
- flwr/proto/exec_pb2.pyi +55 -0
- flwr/proto/exec_pb2_grpc.py +101 -0
- flwr/proto/exec_pb2_grpc.pyi +41 -0
- flwr/proto/fab_pb2.py +30 -0
- flwr/proto/fab_pb2.pyi +56 -0
- flwr/proto/fab_pb2_grpc.py +4 -0
- flwr/proto/fab_pb2_grpc.pyi +4 -0
- flwr/server/__init__.py +2 -2
- flwr/server/app.py +62 -25
- flwr/server/compat/app.py +1 -1
- flwr/server/compat/app_utils.py +1 -1
- flwr/server/compat/driver_client_proxy.py +1 -1
- flwr/server/driver/driver.py +6 -0
- flwr/server/driver/grpc_driver.py +85 -63
- flwr/server/driver/inmemory_driver.py +28 -26
- flwr/server/run_serverapp.py +65 -20
- flwr/server/strategy/__init__.py +2 -2
- flwr/server/strategy/bulyan.py +1 -1
- flwr/server/strategy/dpfedavg_adaptive.py +1 -1
- flwr/server/strategy/dpfedavg_fixed.py +1 -1
- flwr/server/strategy/fedadagrad.py +1 -1
- flwr/server/strategy/fedadam.py +1 -1
- flwr/server/strategy/fedavg_android.py +1 -1
- flwr/server/strategy/fedavgm.py +1 -1
- flwr/server/strategy/fedmedian.py +1 -1
- flwr/server/strategy/fedopt.py +1 -1
- flwr/server/strategy/fedprox.py +1 -1
- flwr/server/strategy/fedxgb_bagging.py +1 -1
- flwr/server/strategy/fedxgb_cyclic.py +1 -1
- flwr/server/strategy/fedxgb_nn_avg.py +1 -1
- flwr/server/strategy/fedyogi.py +1 -1
- flwr/server/strategy/krum.py +1 -1
- flwr/server/strategy/qfedavg.py +1 -1
- flwr/server/superlink/driver/__init__.py +1 -1
- flwr/server/superlink/driver/driver_grpc.py +1 -1
- flwr/server/superlink/driver/driver_servicer.py +15 -3
- flwr/server/superlink/fleet/__init__.py +1 -1
- flwr/server/superlink/fleet/grpc_adapter/__init__.py +15 -0
- flwr/server/superlink/fleet/grpc_adapter/grpc_adapter_servicer.py +131 -0
- flwr/server/superlink/fleet/grpc_bidi/__init__.py +1 -1
- flwr/server/superlink/fleet/grpc_bidi/flower_service_servicer.py +1 -1
- flwr/server/superlink/fleet/grpc_bidi/grpc_bridge.py +1 -1
- flwr/server/superlink/fleet/grpc_bidi/grpc_client_proxy.py +1 -1
- flwr/server/superlink/fleet/grpc_bidi/grpc_server.py +5 -1
- flwr/server/superlink/fleet/grpc_rere/__init__.py +1 -1
- flwr/server/superlink/fleet/grpc_rere/fleet_servicer.py +1 -1
- flwr/server/superlink/fleet/message_handler/__init__.py +1 -1
- flwr/server/superlink/fleet/message_handler/message_handler.py +4 -4
- flwr/server/superlink/fleet/rest_rere/__init__.py +1 -1
- flwr/server/superlink/fleet/rest_rere/rest_api.py +1 -1
- flwr/server/superlink/fleet/vce/backend/raybackend.py +44 -25
- flwr/server/superlink/fleet/vce/vce_api.py +3 -1
- flwr/server/superlink/state/__init__.py +1 -1
- flwr/server/superlink/state/in_memory_state.py +9 -6
- flwr/server/superlink/state/sqlite_state.py +7 -4
- flwr/server/superlink/state/state.py +6 -5
- flwr/server/superlink/state/state_factory.py +11 -2
- flwr/server/utils/__init__.py +1 -1
- flwr/server/utils/tensorboard.py +1 -1
- flwr/simulation/__init__.py +5 -2
- flwr/simulation/app.py +1 -1
- flwr/simulation/ray_transport/__init__.py +1 -1
- flwr/simulation/ray_transport/ray_actor.py +0 -6
- flwr/simulation/ray_transport/ray_client_proxy.py +1 -1
- flwr/simulation/run_simulation.py +63 -22
- flwr/superexec/__init__.py +21 -0
- flwr/superexec/app.py +178 -0
- flwr/superexec/exec_grpc.py +51 -0
- flwr/superexec/exec_servicer.py +65 -0
- flwr/superexec/executor.py +54 -0
- {flwr_nightly-1.10.0.dev20240612.dist-info → flwr_nightly-1.10.0.dev20240624.dist-info}/METADATA +2 -1
- {flwr_nightly-1.10.0.dev20240612.dist-info → flwr_nightly-1.10.0.dev20240624.dist-info}/RECORD +130 -101
- {flwr_nightly-1.10.0.dev20240612.dist-info → flwr_nightly-1.10.0.dev20240624.dist-info}/entry_points.txt +1 -0
- {flwr_nightly-1.10.0.dev20240612.dist-info → flwr_nightly-1.10.0.dev20240624.dist-info}/LICENSE +0 -0
- {flwr_nightly-1.10.0.dev20240612.dist-info → flwr_nightly-1.10.0.dev20240624.dist-info}/WHEEL +0 -0
|
@@ -15,7 +15,7 @@
|
|
|
15
15
|
"""Ray backend for the Fleet API using the Simulation Engine."""
|
|
16
16
|
|
|
17
17
|
import pathlib
|
|
18
|
-
from logging import DEBUG, ERROR
|
|
18
|
+
from logging import DEBUG, ERROR
|
|
19
19
|
from typing import Callable, Dict, List, Tuple, Union
|
|
20
20
|
|
|
21
21
|
import ray
|
|
@@ -24,16 +24,15 @@ from flwr.client.client_app import ClientApp
|
|
|
24
24
|
from flwr.common.context import Context
|
|
25
25
|
from flwr.common.logger import log
|
|
26
26
|
from flwr.common.message import Message
|
|
27
|
-
from flwr.
|
|
28
|
-
|
|
29
|
-
ClientAppActor,
|
|
30
|
-
init_ray,
|
|
31
|
-
)
|
|
27
|
+
from flwr.common.typing import ConfigsRecordValues
|
|
28
|
+
from flwr.simulation.ray_transport.ray_actor import BasicActorPool, ClientAppActor
|
|
32
29
|
from flwr.simulation.ray_transport.utils import enable_tf_gpu_growth
|
|
33
30
|
|
|
34
31
|
from .backend import Backend, BackendConfig
|
|
35
32
|
|
|
36
33
|
ClientResourcesDict = Dict[str, Union[int, float]]
|
|
34
|
+
ActorArgsDict = Dict[str, Union[int, float, Callable[[], None]]]
|
|
35
|
+
RunTimeEnvDict = Dict[str, Union[str, List[str]]]
|
|
37
36
|
|
|
38
37
|
|
|
39
38
|
class RayBackend(Backend):
|
|
@@ -51,40 +50,29 @@ class RayBackend(Backend):
|
|
|
51
50
|
if not pathlib.Path(work_dir).exists():
|
|
52
51
|
raise ValueError(f"Specified work_dir {work_dir} does not exist.")
|
|
53
52
|
|
|
54
|
-
#
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
)
|
|
58
|
-
|
|
59
|
-
if backend_config.get("mute_logging", False):
|
|
60
|
-
init_ray(
|
|
61
|
-
logging_level=WARNING, log_to_driver=False, runtime_env=runtime_env
|
|
62
|
-
)
|
|
63
|
-
elif backend_config.get("silent", False):
|
|
64
|
-
init_ray(logging_level=WARNING, log_to_driver=True, runtime_env=runtime_env)
|
|
65
|
-
else:
|
|
66
|
-
init_ray(runtime_env=runtime_env)
|
|
53
|
+
# Initialise ray
|
|
54
|
+
self.init_args_key = "init_args"
|
|
55
|
+
self.init_ray(backend_config, work_dir)
|
|
67
56
|
|
|
68
57
|
# Validate client resources
|
|
69
58
|
self.client_resources_key = "client_resources"
|
|
59
|
+
client_resources = self._validate_client_resources(config=backend_config)
|
|
70
60
|
|
|
71
61
|
# Create actor pool
|
|
72
|
-
|
|
73
|
-
actor_kwargs = {"on_actor_init_fn": enable_tf_gpu_growth} if use_tf else {}
|
|
62
|
+
actor_kwargs = self._validate_actor_arguments(config=backend_config)
|
|
74
63
|
|
|
75
|
-
client_resources = self._validate_client_resources(config=backend_config)
|
|
76
64
|
self.pool = BasicActorPool(
|
|
77
65
|
actor_type=ClientAppActor,
|
|
78
66
|
client_resources=client_resources,
|
|
79
67
|
actor_kwargs=actor_kwargs,
|
|
80
68
|
)
|
|
81
69
|
|
|
82
|
-
def _configure_runtime_env(self, work_dir: str) ->
|
|
70
|
+
def _configure_runtime_env(self, work_dir: str) -> RunTimeEnvDict:
|
|
83
71
|
"""Return list of files/subdirectories to exclude relative to work_dir.
|
|
84
72
|
|
|
85
73
|
Without this, Ray will push everything to the Ray Cluster.
|
|
86
74
|
"""
|
|
87
|
-
runtime_env:
|
|
75
|
+
runtime_env: RunTimeEnvDict = {"working_dir": work_dir}
|
|
88
76
|
|
|
89
77
|
excludes = []
|
|
90
78
|
path = pathlib.Path(work_dir)
|
|
@@ -125,6 +113,37 @@ class RayBackend(Backend):
|
|
|
125
113
|
|
|
126
114
|
return client_resources
|
|
127
115
|
|
|
116
|
+
def _validate_actor_arguments(self, config: BackendConfig) -> ActorArgsDict:
|
|
117
|
+
actor_args_config = config.get("actor", False)
|
|
118
|
+
actor_args: ActorArgsDict = {}
|
|
119
|
+
if actor_args_config:
|
|
120
|
+
use_tf = actor_args.get("tensorflow", False)
|
|
121
|
+
if use_tf:
|
|
122
|
+
actor_args["on_actor_init_fn"] = enable_tf_gpu_growth
|
|
123
|
+
return actor_args
|
|
124
|
+
|
|
125
|
+
def init_ray(self, backend_config: BackendConfig, work_dir: str) -> None:
|
|
126
|
+
"""Intialises Ray if not already initialised."""
|
|
127
|
+
if not ray.is_initialized():
|
|
128
|
+
# Init ray and append working dir if needed
|
|
129
|
+
runtime_env = (
|
|
130
|
+
self._configure_runtime_env(work_dir=work_dir) if work_dir else None
|
|
131
|
+
)
|
|
132
|
+
|
|
133
|
+
ray_init_args: Dict[
|
|
134
|
+
str,
|
|
135
|
+
Union[ConfigsRecordValues, RunTimeEnvDict],
|
|
136
|
+
] = {}
|
|
137
|
+
|
|
138
|
+
if backend_config.get(self.init_args_key):
|
|
139
|
+
for k, v in backend_config[self.init_args_key].items():
|
|
140
|
+
ray_init_args[k] = v
|
|
141
|
+
|
|
142
|
+
if runtime_env is not None:
|
|
143
|
+
ray_init_args["runtime_env"] = runtime_env
|
|
144
|
+
|
|
145
|
+
ray.init(**ray_init_args)
|
|
146
|
+
|
|
128
147
|
@property
|
|
129
148
|
def num_workers(self) -> int:
|
|
130
149
|
"""Return number of actors in pool."""
|
|
@@ -152,7 +171,7 @@ class RayBackend(Backend):
|
|
|
152
171
|
partition_id = message.metadata.partition_id
|
|
153
172
|
|
|
154
173
|
try:
|
|
155
|
-
#
|
|
174
|
+
# Submit a task to the pool
|
|
156
175
|
future = await self.pool.submit(
|
|
157
176
|
lambda a, a_fn, mssg, cid, state: a.run.remote(a_fn, mssg, cid, state),
|
|
158
177
|
(app, message, str(partition_id), context),
|
|
@@ -20,6 +20,7 @@ import sys
|
|
|
20
20
|
import time
|
|
21
21
|
import traceback
|
|
22
22
|
from logging import DEBUG, ERROR, INFO, WARN
|
|
23
|
+
from pathlib import Path
|
|
23
24
|
from typing import Callable, Dict, List, Optional
|
|
24
25
|
|
|
25
26
|
from flwr.client.client_app import ClientApp, ClientAppException, LoadClientAppError
|
|
@@ -274,6 +275,7 @@ def start_vce(
|
|
|
274
275
|
# Use mapping constructed externally. This also means nodes
|
|
275
276
|
# have previously being registered.
|
|
276
277
|
nodes_mapping = existing_nodes_mapping
|
|
278
|
+
app_dir = str(Path(app_dir).absolute())
|
|
277
279
|
|
|
278
280
|
if not state_factory:
|
|
279
281
|
log(INFO, "A StateFactory was not supplied to the SimulationEngine.")
|
|
@@ -323,7 +325,7 @@ def start_vce(
|
|
|
323
325
|
if app_dir is not None:
|
|
324
326
|
sys.path.insert(0, app_dir)
|
|
325
327
|
|
|
326
|
-
app: ClientApp = load_app(client_app_attr, LoadClientAppError)
|
|
328
|
+
app: ClientApp = load_app(client_app_attr, LoadClientAppError, app_dir)
|
|
327
329
|
|
|
328
330
|
if not isinstance(app, ClientApp):
|
|
329
331
|
raise LoadClientAppError(
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
# Copyright
|
|
1
|
+
# Copyright 2024 Flower Labs GmbH. All Rights Reserved.
|
|
2
2
|
#
|
|
3
3
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
4
|
# you may not use this file except in compliance with the License.
|
|
@@ -23,6 +23,7 @@ from typing import Dict, List, Optional, Set, Tuple
|
|
|
23
23
|
from uuid import UUID, uuid4
|
|
24
24
|
|
|
25
25
|
from flwr.common import log, now
|
|
26
|
+
from flwr.common.typing import Run
|
|
26
27
|
from flwr.proto.task_pb2 import TaskIns, TaskRes # pylint: disable=E0611
|
|
27
28
|
from flwr.server.superlink.state.state import State
|
|
28
29
|
from flwr.server.utils import validate_task_ins_or_res
|
|
@@ -40,7 +41,7 @@ class InMemoryState(State): # pylint: disable=R0902,R0904
|
|
|
40
41
|
self.public_key_to_node_id: Dict[bytes, int] = {}
|
|
41
42
|
|
|
42
43
|
# Map run_id to (fab_id, fab_version)
|
|
43
|
-
self.run_ids: Dict[int,
|
|
44
|
+
self.run_ids: Dict[int, Run] = {}
|
|
44
45
|
self.task_ins_store: Dict[UUID, TaskIns] = {}
|
|
45
46
|
self.task_res_store: Dict[UUID, TaskRes] = {}
|
|
46
47
|
|
|
@@ -281,7 +282,9 @@ class InMemoryState(State): # pylint: disable=R0902,R0904
|
|
|
281
282
|
run_id: int = int.from_bytes(os.urandom(8), "little", signed=True)
|
|
282
283
|
|
|
283
284
|
if run_id not in self.run_ids:
|
|
284
|
-
self.run_ids[run_id] = (
|
|
285
|
+
self.run_ids[run_id] = Run(
|
|
286
|
+
run_id=run_id, fab_id=fab_id, fab_version=fab_version
|
|
287
|
+
)
|
|
285
288
|
return run_id
|
|
286
289
|
log(ERROR, "Unexpected run creation failure.")
|
|
287
290
|
return 0
|
|
@@ -319,13 +322,13 @@ class InMemoryState(State): # pylint: disable=R0902,R0904
|
|
|
319
322
|
"""Retrieve all currently stored `client_public_keys` as a set."""
|
|
320
323
|
return self.client_public_keys
|
|
321
324
|
|
|
322
|
-
def get_run(self, run_id: int) ->
|
|
325
|
+
def get_run(self, run_id: int) -> Optional[Run]:
|
|
323
326
|
"""Retrieve information about the run with the specified `run_id`."""
|
|
324
327
|
with self.lock:
|
|
325
328
|
if run_id not in self.run_ids:
|
|
326
329
|
log(ERROR, "`run_id` is invalid")
|
|
327
|
-
return
|
|
328
|
-
return
|
|
330
|
+
return None
|
|
331
|
+
return self.run_ids[run_id]
|
|
329
332
|
|
|
330
333
|
def acknowledge_ping(self, node_id: int, ping_interval: float) -> bool:
|
|
331
334
|
"""Acknowledge a ping received from a node, serving as a heartbeat."""
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
# Copyright
|
|
1
|
+
# Copyright 2024 Flower Labs GmbH. All Rights Reserved.
|
|
2
2
|
#
|
|
3
3
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
4
|
# you may not use this file except in compliance with the License.
|
|
@@ -24,6 +24,7 @@ from typing import Any, Dict, List, Optional, Sequence, Set, Tuple, Union, cast
|
|
|
24
24
|
from uuid import UUID, uuid4
|
|
25
25
|
|
|
26
26
|
from flwr.common import log, now
|
|
27
|
+
from flwr.common.typing import Run
|
|
27
28
|
from flwr.proto.node_pb2 import Node # pylint: disable=E0611
|
|
28
29
|
from flwr.proto.recordset_pb2 import RecordSet # pylint: disable=E0611
|
|
29
30
|
from flwr.proto.task_pb2 import Task, TaskIns, TaskRes # pylint: disable=E0611
|
|
@@ -680,15 +681,17 @@ class SqliteState(State): # pylint: disable=R0904
|
|
|
680
681
|
result: Set[bytes] = {row["public_key"] for row in rows}
|
|
681
682
|
return result
|
|
682
683
|
|
|
683
|
-
def get_run(self, run_id: int) ->
|
|
684
|
+
def get_run(self, run_id: int) -> Optional[Run]:
|
|
684
685
|
"""Retrieve information about the run with the specified `run_id`."""
|
|
685
686
|
query = "SELECT * FROM run WHERE run_id = ?;"
|
|
686
687
|
try:
|
|
687
688
|
row = self.query(query, (run_id,))[0]
|
|
688
|
-
return
|
|
689
|
+
return Run(
|
|
690
|
+
run_id=run_id, fab_id=row["fab_id"], fab_version=row["fab_version"]
|
|
691
|
+
)
|
|
689
692
|
except sqlite3.IntegrityError:
|
|
690
693
|
log(ERROR, "`run_id` does not exist.")
|
|
691
|
-
return
|
|
694
|
+
return None
|
|
692
695
|
|
|
693
696
|
def acknowledge_ping(self, node_id: int, ping_interval: float) -> bool:
|
|
694
697
|
"""Acknowledge a ping received from a node, serving as a heartbeat."""
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
# Copyright
|
|
1
|
+
# Copyright 2024 Flower Labs GmbH. All Rights Reserved.
|
|
2
2
|
#
|
|
3
3
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
4
|
# you may not use this file except in compliance with the License.
|
|
@@ -16,9 +16,10 @@
|
|
|
16
16
|
|
|
17
17
|
|
|
18
18
|
import abc
|
|
19
|
-
from typing import List, Optional, Set
|
|
19
|
+
from typing import List, Optional, Set
|
|
20
20
|
from uuid import UUID
|
|
21
21
|
|
|
22
|
+
from flwr.common.typing import Run
|
|
22
23
|
from flwr.proto.task_pb2 import TaskIns, TaskRes # pylint: disable=E0611
|
|
23
24
|
|
|
24
25
|
|
|
@@ -160,7 +161,7 @@ class State(abc.ABC): # pylint: disable=R0904
|
|
|
160
161
|
"""Create a new run for the specified `fab_id` and `fab_version`."""
|
|
161
162
|
|
|
162
163
|
@abc.abstractmethod
|
|
163
|
-
def get_run(self, run_id: int) ->
|
|
164
|
+
def get_run(self, run_id: int) -> Optional[Run]:
|
|
164
165
|
"""Retrieve information about the run with the specified `run_id`.
|
|
165
166
|
|
|
166
167
|
Parameters
|
|
@@ -170,8 +171,8 @@ class State(abc.ABC): # pylint: disable=R0904
|
|
|
170
171
|
|
|
171
172
|
Returns
|
|
172
173
|
-------
|
|
173
|
-
|
|
174
|
-
A
|
|
174
|
+
Optional[Run]
|
|
175
|
+
A dataclass instance containing three elements if `run_id` is valid:
|
|
175
176
|
- `run_id`: The identifier of the run, same as the specified `run_id`.
|
|
176
177
|
- `fab_id`: The identifier of the FAB used in the specified run.
|
|
177
178
|
- `fab_version`: The version of the FAB used in the specified run.
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
# Copyright
|
|
1
|
+
# Copyright 2024 Flower Labs GmbH. All Rights Reserved.
|
|
2
2
|
#
|
|
3
3
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
4
|
# you may not use this file except in compliance with the License.
|
|
@@ -26,7 +26,16 @@ from .state import State
|
|
|
26
26
|
|
|
27
27
|
|
|
28
28
|
class StateFactory:
|
|
29
|
-
"""Factory class that creates State instances.
|
|
29
|
+
"""Factory class that creates State instances.
|
|
30
|
+
|
|
31
|
+
Parameters
|
|
32
|
+
----------
|
|
33
|
+
database : str
|
|
34
|
+
A string representing the path to the database file that will be opened.
|
|
35
|
+
Note that passing ':memory:' will open a connection to a database that is
|
|
36
|
+
in RAM, instead of on disk. For more information on special in-memory
|
|
37
|
+
databases, please refer to https://sqlite.org/inmemorydb.html.
|
|
38
|
+
"""
|
|
30
39
|
|
|
31
40
|
def __init__(self, database: str) -> None:
|
|
32
41
|
self.database = database
|
flwr/server/utils/__init__.py
CHANGED
flwr/server/utils/tensorboard.py
CHANGED
flwr/simulation/__init__.py
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
# Copyright
|
|
1
|
+
# Copyright 2021 Flower Labs GmbH. All Rights Reserved.
|
|
2
2
|
#
|
|
3
3
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
4
|
# you may not use this file except in compliance with the License.
|
|
@@ -36,4 +36,7 @@ To install the necessary dependencies, install `flwr` with the `simulation` extr
|
|
|
36
36
|
raise ImportError(RAY_IMPORT_ERROR)
|
|
37
37
|
|
|
38
38
|
|
|
39
|
-
__all__ = [
|
|
39
|
+
__all__ = [
|
|
40
|
+
"run_simulation",
|
|
41
|
+
"start_simulation",
|
|
42
|
+
]
|
flwr/simulation/app.py
CHANGED
|
@@ -399,12 +399,6 @@ class VirtualClientEngineActorPool(ActorPool):
|
|
|
399
399
|
return self._fetch_future_result(cid)
|
|
400
400
|
|
|
401
401
|
|
|
402
|
-
def init_ray(*args: Any, **kwargs: Any) -> None:
|
|
403
|
-
"""Intialises Ray if not already initialised."""
|
|
404
|
-
if not ray.is_initialized():
|
|
405
|
-
ray.init(*args, **kwargs)
|
|
406
|
-
|
|
407
|
-
|
|
408
402
|
class BasicActorPool:
|
|
409
403
|
"""A basic actor pool."""
|
|
410
404
|
|
|
@@ -22,16 +22,17 @@ import threading
|
|
|
22
22
|
import traceback
|
|
23
23
|
from logging import DEBUG, ERROR, INFO, WARNING
|
|
24
24
|
from time import sleep
|
|
25
|
-
from typing import
|
|
25
|
+
from typing import Optional
|
|
26
26
|
|
|
27
27
|
from flwr.client import ClientApp
|
|
28
28
|
from flwr.common import EventType, event, log
|
|
29
29
|
from flwr.common.logger import set_logger_propagation, update_console_handler
|
|
30
|
-
from flwr.common.typing import
|
|
30
|
+
from flwr.common.typing import Run
|
|
31
31
|
from flwr.server.driver import Driver, InMemoryDriver
|
|
32
32
|
from flwr.server.run_serverapp import run
|
|
33
33
|
from flwr.server.server_app import ServerApp
|
|
34
34
|
from flwr.server.superlink.fleet import vce
|
|
35
|
+
from flwr.server.superlink.fleet.vce.backend.backend import BackendConfig
|
|
35
36
|
from flwr.server.superlink.state import StateFactory
|
|
36
37
|
from flwr.simulation.ray_transport.utils import (
|
|
37
38
|
enable_tf_gpu_growth as enable_gpu_growth,
|
|
@@ -53,6 +54,7 @@ def run_simulation_from_cli() -> None:
|
|
|
53
54
|
backend_name=args.backend,
|
|
54
55
|
backend_config=backend_config_dict,
|
|
55
56
|
app_dir=args.app_dir,
|
|
57
|
+
run_id=args.run_id,
|
|
56
58
|
enable_tf_gpu_growth=args.enable_tf_gpu_growth,
|
|
57
59
|
verbose_logging=args.verbose,
|
|
58
60
|
)
|
|
@@ -65,7 +67,7 @@ def run_simulation(
|
|
|
65
67
|
client_app: ClientApp,
|
|
66
68
|
num_supernodes: int,
|
|
67
69
|
backend_name: str = "ray",
|
|
68
|
-
backend_config: Optional[
|
|
70
|
+
backend_config: Optional[BackendConfig] = None,
|
|
69
71
|
enable_tf_gpu_growth: bool = False,
|
|
70
72
|
verbose_logging: bool = False,
|
|
71
73
|
) -> None:
|
|
@@ -89,9 +91,12 @@ def run_simulation(
|
|
|
89
91
|
backend_name : str (default: ray)
|
|
90
92
|
A simulation backend that runs `ClientApp`s.
|
|
91
93
|
|
|
92
|
-
backend_config : Optional[
|
|
93
|
-
'A dictionary
|
|
94
|
-
backend.
|
|
94
|
+
backend_config : Optional[BackendConfig]
|
|
95
|
+
'A dictionary to configure a backend. Separate dictionaries to configure
|
|
96
|
+
different elements of backend. Supported top-level keys are `init_args`
|
|
97
|
+
for values parsed to initialisation of backend, `client_resources`
|
|
98
|
+
to define the resources for clients, and `actor` to define the actor
|
|
99
|
+
parameters. Values supported in <value> are those included by
|
|
95
100
|
`flwr.common.typing.ConfigsRecordValues`.
|
|
96
101
|
|
|
97
102
|
enable_tf_gpu_growth : bool (default: False)
|
|
@@ -103,7 +108,7 @@ def run_simulation(
|
|
|
103
108
|
works in the TensorFlow documentation: https://www.tensorflow.org/api/stable.
|
|
104
109
|
|
|
105
110
|
verbose_logging : bool (default: False)
|
|
106
|
-
When
|
|
111
|
+
When disabled, only INFO, WARNING and ERROR log messages will be shown. If
|
|
107
112
|
enabled, DEBUG-level logs will be displayed.
|
|
108
113
|
"""
|
|
109
114
|
_run_simulation(
|
|
@@ -132,7 +137,7 @@ def run_serverapp_th(
|
|
|
132
137
|
def server_th_with_start_checks( # type: ignore
|
|
133
138
|
tf_gpu_growth: bool, stop_event: asyncio.Event, **kwargs
|
|
134
139
|
) -> None:
|
|
135
|
-
"""Run SeverApp, after check if GPU memory
|
|
140
|
+
"""Run SeverApp, after check if GPU memory growth has to be set.
|
|
136
141
|
|
|
137
142
|
Upon exception, trigger stop event for Simulation Engine.
|
|
138
143
|
"""
|
|
@@ -168,6 +173,16 @@ def run_serverapp_th(
|
|
|
168
173
|
return serverapp_th
|
|
169
174
|
|
|
170
175
|
|
|
176
|
+
def _override_run_id(state: StateFactory, run_id_to_replace: int, run_id: int) -> None:
|
|
177
|
+
"""Override the run_id of an existing Run."""
|
|
178
|
+
log(DEBUG, "Pre-registering run with id %s", run_id)
|
|
179
|
+
# Remove run
|
|
180
|
+
run_info: Run = state.state().run_ids.pop(run_id_to_replace) # type: ignore
|
|
181
|
+
# Update with new run_id and insert back in state
|
|
182
|
+
run_info.run_id = run_id
|
|
183
|
+
state.state().run_ids[run_id] = run_info # type: ignore
|
|
184
|
+
|
|
185
|
+
|
|
171
186
|
# pylint: disable=too-many-locals
|
|
172
187
|
def _main_loop(
|
|
173
188
|
num_supernodes: int,
|
|
@@ -175,6 +190,7 @@ def _main_loop(
|
|
|
175
190
|
backend_config_stream: str,
|
|
176
191
|
app_dir: str,
|
|
177
192
|
enable_tf_gpu_growth: bool,
|
|
193
|
+
run_id: Optional[int] = None,
|
|
178
194
|
client_app: Optional[ClientApp] = None,
|
|
179
195
|
client_app_attr: Optional[str] = None,
|
|
180
196
|
server_app: Optional[ServerApp] = None,
|
|
@@ -182,7 +198,7 @@ def _main_loop(
|
|
|
182
198
|
) -> None:
|
|
183
199
|
"""Launch SuperLink with Simulation Engine, then ServerApp on a separate thread.
|
|
184
200
|
|
|
185
|
-
Everything runs on the main thread or a separate one,
|
|
201
|
+
Everything runs on the main thread or a separate one, depending on whether the main
|
|
186
202
|
thread already contains a running Asyncio event loop. This is the case if running
|
|
187
203
|
the Simulation Engine on a Jupyter/Colab notebook.
|
|
188
204
|
"""
|
|
@@ -192,8 +208,15 @@ def _main_loop(
|
|
|
192
208
|
f_stop = asyncio.Event()
|
|
193
209
|
serverapp_th = None
|
|
194
210
|
try:
|
|
211
|
+
# Create run (with empty fab_id and fab_version)
|
|
212
|
+
run_id_ = state_factory.state().create_run("", "")
|
|
213
|
+
|
|
214
|
+
if run_id:
|
|
215
|
+
_override_run_id(state_factory, run_id_to_replace=run_id_, run_id=run_id)
|
|
216
|
+
run_id_ = run_id
|
|
217
|
+
|
|
195
218
|
# Initialize Driver
|
|
196
|
-
driver = InMemoryDriver(state_factory)
|
|
219
|
+
driver = InMemoryDriver(run_id=run_id_, state_factory=state_factory)
|
|
197
220
|
|
|
198
221
|
# Get and run ServerApp thread
|
|
199
222
|
serverapp_th = run_serverapp_th(
|
|
@@ -240,10 +263,11 @@ def _run_simulation(
|
|
|
240
263
|
client_app: Optional[ClientApp] = None,
|
|
241
264
|
server_app: Optional[ServerApp] = None,
|
|
242
265
|
backend_name: str = "ray",
|
|
243
|
-
backend_config: Optional[
|
|
266
|
+
backend_config: Optional[BackendConfig] = None,
|
|
244
267
|
client_app_attr: Optional[str] = None,
|
|
245
268
|
server_app_attr: Optional[str] = None,
|
|
246
269
|
app_dir: str = "",
|
|
270
|
+
run_id: Optional[int] = None,
|
|
247
271
|
enable_tf_gpu_growth: bool = False,
|
|
248
272
|
verbose_logging: bool = False,
|
|
249
273
|
) -> None:
|
|
@@ -266,9 +290,12 @@ def _run_simulation(
|
|
|
266
290
|
backend_name : str (default: ray)
|
|
267
291
|
A simulation backend that runs `ClientApp`s.
|
|
268
292
|
|
|
269
|
-
backend_config : Optional[
|
|
270
|
-
'A dictionary
|
|
271
|
-
backend.
|
|
293
|
+
backend_config : Optional[BackendConfig]
|
|
294
|
+
'A dictionary to configure a backend. Separate dictionaries to configure
|
|
295
|
+
different elements of backend. Supported top-level keys are `init_args`
|
|
296
|
+
for values parsed to initialisation of backend, `client_resources`
|
|
297
|
+
to define the resources for clients, and `actor` to define the actor
|
|
298
|
+
parameters. Values supported in <value> are those included by
|
|
272
299
|
`flwr.common.typing.ConfigsRecordValues`.
|
|
273
300
|
|
|
274
301
|
client_app_attr : str
|
|
@@ -283,34 +310,41 @@ def _run_simulation(
|
|
|
283
310
|
Add specified directory to the PYTHONPATH and load `ClientApp` from there.
|
|
284
311
|
(Default: current working directory.)
|
|
285
312
|
|
|
313
|
+
run_id : Optional[int]
|
|
314
|
+
An integer specifying the ID of the run started when running this function.
|
|
315
|
+
|
|
286
316
|
enable_tf_gpu_growth : bool (default: False)
|
|
287
317
|
A boolean to indicate whether to enable GPU growth on the main thread. This is
|
|
288
318
|
desirable if you make use of a TensorFlow model on your `ServerApp` while
|
|
289
319
|
having your `ClientApp` running on the same GPU. Without enabling this, you
|
|
290
|
-
might encounter an out-of-memory error
|
|
320
|
+
might encounter an out-of-memory error because TensorFlow by default allocates
|
|
291
321
|
all GPU memory. Read mor about how `tf.config.experimental.set_memory_growth()`
|
|
292
322
|
works in the TensorFlow documentation: https://www.tensorflow.org/api/stable.
|
|
293
323
|
|
|
294
324
|
verbose_logging : bool (default: False)
|
|
295
|
-
When
|
|
325
|
+
When disabled, only INFO, WARNING and ERROR log messages will be shown. If
|
|
296
326
|
enabled, DEBUG-level logs will be displayed.
|
|
297
327
|
"""
|
|
298
328
|
if backend_config is None:
|
|
299
329
|
backend_config = {}
|
|
300
330
|
|
|
331
|
+
if "init_args" not in backend_config:
|
|
332
|
+
backend_config["init_args"] = {}
|
|
333
|
+
|
|
301
334
|
# Set logging level
|
|
302
335
|
logger = logging.getLogger("flwr")
|
|
303
336
|
if verbose_logging:
|
|
304
337
|
update_console_handler(level=DEBUG, timestamps=True, colored=True)
|
|
305
338
|
else:
|
|
306
|
-
backend_config["
|
|
339
|
+
backend_config["init_args"]["logging_level"] = WARNING
|
|
340
|
+
backend_config["init_args"]["log_to_driver"] = True
|
|
307
341
|
|
|
308
342
|
if enable_tf_gpu_growth:
|
|
309
343
|
# Check that Backend config has also enabled using GPU growth
|
|
310
|
-
use_tf = backend_config.get("tensorflow", False)
|
|
344
|
+
use_tf = backend_config.get("actor", {}).get("tensorflow", False)
|
|
311
345
|
if not use_tf:
|
|
312
346
|
log(WARNING, "Enabling GPU growth for your backend.")
|
|
313
|
-
backend_config["tensorflow"] = True
|
|
347
|
+
backend_config["actor"]["tensorflow"] = True
|
|
314
348
|
|
|
315
349
|
# Convert config to original JSON-stream format
|
|
316
350
|
backend_config_stream = json.dumps(backend_config)
|
|
@@ -322,13 +356,14 @@ def _run_simulation(
|
|
|
322
356
|
backend_config_stream,
|
|
323
357
|
app_dir,
|
|
324
358
|
enable_tf_gpu_growth,
|
|
359
|
+
run_id,
|
|
325
360
|
client_app,
|
|
326
361
|
client_app_attr,
|
|
327
362
|
server_app,
|
|
328
363
|
server_app_attr,
|
|
329
364
|
)
|
|
330
365
|
# Detect if there is an Asyncio event loop already running.
|
|
331
|
-
# If yes, run everything on a separate thread. In
|
|
366
|
+
# If yes, run everything on a separate thread. In environments
|
|
332
367
|
# like Jupyter/Colab notebooks, there is an event loop present.
|
|
333
368
|
run_in_thread = False
|
|
334
369
|
try:
|
|
@@ -340,7 +375,7 @@ def _run_simulation(
|
|
|
340
375
|
run_in_thread = True
|
|
341
376
|
|
|
342
377
|
except RuntimeError:
|
|
343
|
-
log(DEBUG, "No asyncio event loop
|
|
378
|
+
log(DEBUG, "No asyncio event loop running")
|
|
344
379
|
|
|
345
380
|
finally:
|
|
346
381
|
if run_in_thread:
|
|
@@ -385,7 +420,8 @@ def _parse_args_run_simulation() -> argparse.ArgumentParser:
|
|
|
385
420
|
parser.add_argument(
|
|
386
421
|
"--backend-config",
|
|
387
422
|
type=str,
|
|
388
|
-
default='{"client_resources": {"num_cpus":2, "num_gpus":0.0},
|
|
423
|
+
default='{"client_resources": {"num_cpus":2, "num_gpus":0.0},'
|
|
424
|
+
'"actor": {"tensorflow": 0}}',
|
|
389
425
|
help='A JSON formatted stream, e.g \'{"<keyA>":<value>, "<keyB>":<value>}\' to '
|
|
390
426
|
"configure a backend. Values supported in <value> are those included by "
|
|
391
427
|
"`flwr.common.typing.ConfigsRecordValues`. ",
|
|
@@ -413,5 +449,10 @@ def _parse_args_run_simulation() -> argparse.ArgumentParser:
|
|
|
413
449
|
"ClientApp and ServerApp from there."
|
|
414
450
|
" Default: current working directory.",
|
|
415
451
|
)
|
|
452
|
+
parser.add_argument(
|
|
453
|
+
"--run-id",
|
|
454
|
+
type=int,
|
|
455
|
+
help="Sets the ID of the run started by the Simulation Engine.",
|
|
456
|
+
)
|
|
416
457
|
|
|
417
458
|
return parser
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
# Copyright 2024 Flower Labs GmbH. All Rights Reserved.
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
# ==============================================================================
|
|
15
|
+
"""Flower SuperExec service."""
|
|
16
|
+
|
|
17
|
+
from .app import run_superexec as run_superexec
|
|
18
|
+
|
|
19
|
+
__all__ = [
|
|
20
|
+
"run_superexec",
|
|
21
|
+
]
|