flwr-nightly 1.10.0.dev20240707__py3-none-any.whl → 1.11.0.dev20240724__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of flwr-nightly might be problematic. Click here for more details.
- flwr/cli/build.py +16 -2
- flwr/cli/config_utils.py +47 -27
- flwr/cli/install.py +17 -1
- flwr/cli/new/new.py +32 -21
- flwr/cli/new/templates/app/code/{client.hf.py.tpl → client.huggingface.py.tpl} +15 -5
- flwr/cli/new/templates/app/code/client.jax.py.tpl +2 -1
- flwr/cli/new/templates/app/code/client.mlx.py.tpl +36 -13
- flwr/cli/new/templates/app/code/client.numpy.py.tpl +2 -1
- flwr/cli/new/templates/app/code/client.pytorch.py.tpl +16 -5
- flwr/cli/new/templates/app/code/client.sklearn.py.tpl +6 -3
- flwr/cli/new/templates/app/code/client.tensorflow.py.tpl +25 -5
- flwr/cli/new/templates/app/code/flwr_tune/app.py.tpl +22 -19
- flwr/cli/new/templates/app/code/flwr_tune/client.py.tpl +5 -3
- flwr/cli/new/templates/app/code/flwr_tune/server.py.tpl +1 -1
- flwr/cli/new/templates/app/code/server.huggingface.py.tpl +23 -0
- flwr/cli/new/templates/app/code/server.jax.py.tpl +16 -8
- flwr/cli/new/templates/app/code/server.mlx.py.tpl +12 -7
- flwr/cli/new/templates/app/code/server.numpy.py.tpl +16 -8
- flwr/cli/new/templates/app/code/server.pytorch.py.tpl +15 -13
- flwr/cli/new/templates/app/code/server.sklearn.py.tpl +17 -10
- flwr/cli/new/templates/app/code/server.tensorflow.py.tpl +16 -13
- flwr/cli/new/templates/app/code/{task.hf.py.tpl → task.huggingface.py.tpl} +14 -2
- flwr/cli/new/templates/app/code/task.mlx.py.tpl +14 -2
- flwr/cli/new/templates/app/code/task.pytorch.py.tpl +14 -3
- flwr/cli/new/templates/app/code/task.tensorflow.py.tpl +13 -1
- flwr/cli/new/templates/app/pyproject.flowertune.toml.tpl +9 -12
- flwr/cli/new/templates/app/pyproject.huggingface.toml.tpl +38 -0
- flwr/cli/new/templates/app/pyproject.jax.toml.tpl +17 -11
- flwr/cli/new/templates/app/pyproject.mlx.toml.tpl +17 -12
- flwr/cli/new/templates/app/pyproject.numpy.toml.tpl +12 -12
- flwr/cli/new/templates/app/pyproject.pytorch.toml.tpl +13 -12
- flwr/cli/new/templates/app/pyproject.sklearn.toml.tpl +12 -12
- flwr/cli/new/templates/app/pyproject.tensorflow.toml.tpl +15 -12
- flwr/cli/run/run.py +133 -54
- flwr/client/app.py +56 -24
- flwr/client/client_app.py +28 -8
- flwr/client/grpc_adapter_client/connection.py +3 -2
- flwr/client/grpc_client/connection.py +3 -2
- flwr/client/grpc_rere_client/connection.py +17 -6
- flwr/client/message_handler/message_handler.py +1 -1
- flwr/client/node_state.py +59 -12
- flwr/client/node_state_tests.py +4 -3
- flwr/client/rest_client/connection.py +19 -8
- flwr/client/supernode/app.py +39 -39
- flwr/client/typing.py +2 -2
- flwr/common/config.py +92 -2
- flwr/common/constant.py +3 -0
- flwr/common/context.py +24 -9
- flwr/common/logger.py +25 -0
- flwr/common/object_ref.py +84 -21
- flwr/common/serde.py +45 -0
- flwr/common/telemetry.py +17 -0
- flwr/common/typing.py +5 -0
- flwr/proto/common_pb2.py +36 -0
- flwr/proto/common_pb2.pyi +121 -0
- flwr/proto/common_pb2_grpc.py +4 -0
- flwr/proto/common_pb2_grpc.pyi +4 -0
- flwr/proto/driver_pb2.py +24 -19
- flwr/proto/driver_pb2.pyi +21 -1
- flwr/proto/exec_pb2.py +20 -11
- flwr/proto/exec_pb2.pyi +41 -1
- flwr/proto/run_pb2.py +12 -7
- flwr/proto/run_pb2.pyi +22 -1
- flwr/proto/task_pb2.py +7 -8
- flwr/server/__init__.py +2 -0
- flwr/server/compat/legacy_context.py +5 -4
- flwr/server/driver/grpc_driver.py +82 -140
- flwr/server/run_serverapp.py +40 -18
- flwr/server/server_app.py +56 -10
- flwr/server/serverapp_components.py +52 -0
- flwr/server/superlink/driver/driver_servicer.py +18 -3
- flwr/server/superlink/fleet/message_handler/message_handler.py +13 -2
- flwr/server/superlink/fleet/vce/backend/__init__.py +1 -1
- flwr/server/superlink/fleet/vce/backend/backend.py +4 -4
- flwr/server/superlink/fleet/vce/backend/raybackend.py +10 -10
- flwr/server/superlink/fleet/vce/vce_api.py +149 -117
- flwr/server/superlink/state/in_memory_state.py +11 -3
- flwr/server/superlink/state/sqlite_state.py +23 -8
- flwr/server/superlink/state/state.py +7 -2
- flwr/server/typing.py +2 -0
- flwr/server/workflow/secure_aggregation/secaggplus_workflow.py +18 -2
- flwr/simulation/__init__.py +1 -1
- flwr/simulation/app.py +4 -3
- flwr/simulation/ray_transport/ray_actor.py +15 -19
- flwr/simulation/ray_transport/ray_client_proxy.py +22 -9
- flwr/simulation/run_simulation.py +269 -70
- flwr/superexec/app.py +17 -11
- flwr/superexec/deployment.py +111 -35
- flwr/superexec/exec_grpc.py +5 -1
- flwr/superexec/exec_servicer.py +6 -1
- flwr/superexec/executor.py +21 -0
- flwr/superexec/simulation.py +181 -0
- {flwr_nightly-1.10.0.dev20240707.dist-info → flwr_nightly-1.11.0.dev20240724.dist-info}/METADATA +3 -2
- {flwr_nightly-1.10.0.dev20240707.dist-info → flwr_nightly-1.11.0.dev20240724.dist-info}/RECORD +97 -91
- flwr/cli/new/templates/app/code/server.hf.py.tpl +0 -17
- flwr/cli/new/templates/app/pyproject.hf.toml.tpl +0 -37
- {flwr_nightly-1.10.0.dev20240707.dist-info → flwr_nightly-1.11.0.dev20240724.dist-info}/LICENSE +0 -0
- {flwr_nightly-1.10.0.dev20240707.dist-info → flwr_nightly-1.11.0.dev20240724.dist-info}/WHEEL +0 -0
- {flwr_nightly-1.10.0.dev20240707.dist-info → flwr_nightly-1.11.0.dev20240724.dist-info}/entry_points.txt +0 -0
|
@@ -18,45 +18,169 @@ import argparse
|
|
|
18
18
|
import asyncio
|
|
19
19
|
import json
|
|
20
20
|
import logging
|
|
21
|
+
import sys
|
|
21
22
|
import threading
|
|
22
23
|
import traceback
|
|
24
|
+
from argparse import Namespace
|
|
23
25
|
from logging import DEBUG, ERROR, INFO, WARNING
|
|
26
|
+
from pathlib import Path
|
|
24
27
|
from time import sleep
|
|
25
|
-
from typing import Optional
|
|
28
|
+
from typing import List, Optional
|
|
26
29
|
|
|
30
|
+
from flwr.cli.config_utils import load_and_validate
|
|
27
31
|
from flwr.client import ClientApp
|
|
28
32
|
from flwr.common import EventType, event, log
|
|
29
|
-
from flwr.common.
|
|
30
|
-
from flwr.common.
|
|
33
|
+
from flwr.common.config import get_fused_config_from_dir, parse_config_args
|
|
34
|
+
from flwr.common.constant import RUN_ID_NUM_BYTES
|
|
35
|
+
from flwr.common.logger import (
|
|
36
|
+
set_logger_propagation,
|
|
37
|
+
update_console_handler,
|
|
38
|
+
warn_deprecated_feature_with_example,
|
|
39
|
+
)
|
|
40
|
+
from flwr.common.typing import Run, UserConfig
|
|
31
41
|
from flwr.server.driver import Driver, InMemoryDriver
|
|
32
|
-
from flwr.server.run_serverapp import run
|
|
42
|
+
from flwr.server.run_serverapp import run as run_server_app
|
|
33
43
|
from flwr.server.server_app import ServerApp
|
|
34
44
|
from flwr.server.superlink.fleet import vce
|
|
35
45
|
from flwr.server.superlink.fleet.vce.backend.backend import BackendConfig
|
|
36
46
|
from flwr.server.superlink.state import StateFactory
|
|
47
|
+
from flwr.server.superlink.state.utils import generate_rand_int_from_bytes
|
|
37
48
|
from flwr.simulation.ray_transport.utils import (
|
|
38
49
|
enable_tf_gpu_growth as enable_gpu_growth,
|
|
39
50
|
)
|
|
40
51
|
|
|
41
52
|
|
|
53
|
+
def _check_args_do_not_interfere(args: Namespace) -> bool:
|
|
54
|
+
"""Ensure decoupling of flags for different ways to start the simulation."""
|
|
55
|
+
mode_one_args = ["app", "run_config"]
|
|
56
|
+
mode_two_args = ["client_app", "server_app"]
|
|
57
|
+
|
|
58
|
+
def _resolve_message(conflict_keys: List[str]) -> str:
|
|
59
|
+
return ",".join([f"`--{key}`".replace("_", "-") for key in conflict_keys])
|
|
60
|
+
|
|
61
|
+
# When passing `--app`, `--app-dir` is ignored
|
|
62
|
+
if args.app and args.app_dir:
|
|
63
|
+
log(ERROR, "Either `--app` or `--app-dir` can be set, but not both.")
|
|
64
|
+
return False
|
|
65
|
+
|
|
66
|
+
if any(getattr(args, key) for key in mode_one_args):
|
|
67
|
+
if any(getattr(args, key) for key in mode_two_args):
|
|
68
|
+
log(
|
|
69
|
+
ERROR,
|
|
70
|
+
"Passing any of {%s} alongside with any of {%s}",
|
|
71
|
+
_resolve_message(mode_one_args),
|
|
72
|
+
_resolve_message(mode_two_args),
|
|
73
|
+
)
|
|
74
|
+
return False
|
|
75
|
+
|
|
76
|
+
if not args.app:
|
|
77
|
+
log(ERROR, "You need to pass --app")
|
|
78
|
+
return False
|
|
79
|
+
|
|
80
|
+
return True
|
|
81
|
+
|
|
82
|
+
# Ensure all args are set (required for the non-FAB mode of execution)
|
|
83
|
+
if not all(getattr(args, key) for key in mode_two_args):
|
|
84
|
+
log(
|
|
85
|
+
ERROR,
|
|
86
|
+
"Passing all of %s keys are required.",
|
|
87
|
+
_resolve_message(mode_two_args),
|
|
88
|
+
)
|
|
89
|
+
return False
|
|
90
|
+
|
|
91
|
+
return True
|
|
92
|
+
|
|
93
|
+
|
|
42
94
|
# Entry point from CLI
|
|
95
|
+
# pylint: disable=too-many-locals
|
|
43
96
|
def run_simulation_from_cli() -> None:
|
|
44
97
|
"""Run Simulation Engine from the CLI."""
|
|
45
98
|
args = _parse_args_run_simulation().parse_args()
|
|
46
99
|
|
|
100
|
+
if args.enable_tf_gpu_growth:
|
|
101
|
+
warn_deprecated_feature_with_example(
|
|
102
|
+
"Passing `--enable-tf-gpu-growth` is deprecated.",
|
|
103
|
+
example_message="Instead, set the `TF_FORCE_GPU_ALLOW_GROWTH` environmnet "
|
|
104
|
+
"variable to true.",
|
|
105
|
+
code_example='TF_FORCE_GPU_ALLOW_GROWTH="true" flower-simulation <...>',
|
|
106
|
+
)
|
|
107
|
+
|
|
108
|
+
# We are supporting two modes for the CLI entrypoint:
|
|
109
|
+
# 1) Running an app dir containing a `pyproject.toml`
|
|
110
|
+
# 2) Running any ClientApp and SeverApp w/o pyproject.toml being present
|
|
111
|
+
# For 2), some CLI args are compulsory, but they are not required for 1)
|
|
112
|
+
# We first do these checks
|
|
113
|
+
args_check_pass = _check_args_do_not_interfere(args)
|
|
114
|
+
if not args_check_pass:
|
|
115
|
+
sys.exit("Simulation Engine cannot start.")
|
|
116
|
+
|
|
117
|
+
run_id = (
|
|
118
|
+
generate_rand_int_from_bytes(RUN_ID_NUM_BYTES)
|
|
119
|
+
if args.run_id is None
|
|
120
|
+
else args.run_id
|
|
121
|
+
)
|
|
122
|
+
if args.app:
|
|
123
|
+
# Mode 1
|
|
124
|
+
app_path = Path(args.app)
|
|
125
|
+
if not app_path.is_dir():
|
|
126
|
+
log(ERROR, "--app is not a directory")
|
|
127
|
+
sys.exit("Simulation Engine cannot start.")
|
|
128
|
+
|
|
129
|
+
# Load pyproject.toml
|
|
130
|
+
config, errors, warnings = load_and_validate(
|
|
131
|
+
app_path / "pyproject.toml", check_module=False
|
|
132
|
+
)
|
|
133
|
+
if errors:
|
|
134
|
+
raise ValueError(errors)
|
|
135
|
+
|
|
136
|
+
if warnings:
|
|
137
|
+
log(WARNING, warnings)
|
|
138
|
+
|
|
139
|
+
if config is None:
|
|
140
|
+
raise ValueError("Config extracted from FAB's pyproject.toml is not valid")
|
|
141
|
+
|
|
142
|
+
# Get ClientApp and SeverApp components
|
|
143
|
+
app_components = config["tool"]["flwr"]["app"]["components"]
|
|
144
|
+
client_app_attr = app_components["clientapp"]
|
|
145
|
+
server_app_attr = app_components["serverapp"]
|
|
146
|
+
|
|
147
|
+
override_config = parse_config_args([args.run_config])
|
|
148
|
+
fused_config = get_fused_config_from_dir(app_path, override_config)
|
|
149
|
+
app_dir = args.app
|
|
150
|
+
is_app = True
|
|
151
|
+
|
|
152
|
+
else:
|
|
153
|
+
# Mode 2
|
|
154
|
+
client_app_attr = args.client_app
|
|
155
|
+
server_app_attr = args.server_app
|
|
156
|
+
override_config = {}
|
|
157
|
+
fused_config = None
|
|
158
|
+
app_dir = args.app_dir
|
|
159
|
+
is_app = False
|
|
160
|
+
|
|
161
|
+
# Create run
|
|
162
|
+
run = Run(
|
|
163
|
+
run_id=run_id,
|
|
164
|
+
fab_id="",
|
|
165
|
+
fab_version="",
|
|
166
|
+
override_config=override_config,
|
|
167
|
+
)
|
|
168
|
+
|
|
47
169
|
# Load JSON config
|
|
48
170
|
backend_config_dict = json.loads(args.backend_config)
|
|
49
171
|
|
|
50
172
|
_run_simulation(
|
|
51
|
-
server_app_attr=
|
|
52
|
-
client_app_attr=
|
|
173
|
+
server_app_attr=server_app_attr,
|
|
174
|
+
client_app_attr=client_app_attr,
|
|
53
175
|
num_supernodes=args.num_supernodes,
|
|
54
176
|
backend_name=args.backend,
|
|
55
177
|
backend_config=backend_config_dict,
|
|
56
|
-
app_dir=
|
|
57
|
-
|
|
178
|
+
app_dir=app_dir,
|
|
179
|
+
run=run,
|
|
58
180
|
enable_tf_gpu_growth=args.enable_tf_gpu_growth,
|
|
59
181
|
verbose_logging=args.verbose,
|
|
182
|
+
server_app_run_config=fused_config,
|
|
183
|
+
is_app=is_app,
|
|
60
184
|
)
|
|
61
185
|
|
|
62
186
|
|
|
@@ -111,6 +235,15 @@ def run_simulation(
|
|
|
111
235
|
When disabled, only INFO, WARNING and ERROR log messages will be shown. If
|
|
112
236
|
enabled, DEBUG-level logs will be displayed.
|
|
113
237
|
"""
|
|
238
|
+
if enable_tf_gpu_growth:
|
|
239
|
+
warn_deprecated_feature_with_example(
|
|
240
|
+
"Passing `enable_tf_gpu_growth=True` is deprecated.",
|
|
241
|
+
example_message="Instead, set the `TF_FORCE_GPU_ALLOW_GROWTH` environmnet "
|
|
242
|
+
"variable to true.",
|
|
243
|
+
code_example='import os;os.environ["TF_FORCE_GPU_ALLOW_GROWTH"]="true"'
|
|
244
|
+
"\n\tflwr.simulation.run_simulationt(...)",
|
|
245
|
+
)
|
|
246
|
+
|
|
114
247
|
_run_simulation(
|
|
115
248
|
num_supernodes=num_supernodes,
|
|
116
249
|
client_app=client_app,
|
|
@@ -126,16 +259,25 @@ def run_simulation(
|
|
|
126
259
|
def run_serverapp_th(
|
|
127
260
|
server_app_attr: Optional[str],
|
|
128
261
|
server_app: Optional[ServerApp],
|
|
262
|
+
server_app_run_config: UserConfig,
|
|
129
263
|
driver: Driver,
|
|
130
264
|
app_dir: str,
|
|
131
|
-
f_stop:
|
|
265
|
+
f_stop: threading.Event,
|
|
266
|
+
has_exception: threading.Event,
|
|
132
267
|
enable_tf_gpu_growth: bool,
|
|
133
268
|
delay_launch: int = 3,
|
|
134
269
|
) -> threading.Thread:
|
|
135
270
|
"""Run SeverApp in a thread."""
|
|
136
271
|
|
|
137
|
-
def server_th_with_start_checks(
|
|
138
|
-
tf_gpu_growth: bool,
|
|
272
|
+
def server_th_with_start_checks(
|
|
273
|
+
tf_gpu_growth: bool,
|
|
274
|
+
stop_event: threading.Event,
|
|
275
|
+
exception_event: threading.Event,
|
|
276
|
+
_driver: Driver,
|
|
277
|
+
_server_app_dir: str,
|
|
278
|
+
_server_app_run_config: UserConfig,
|
|
279
|
+
_server_app_attr: Optional[str],
|
|
280
|
+
_server_app: Optional[ServerApp],
|
|
139
281
|
) -> None:
|
|
140
282
|
"""Run SeverApp, after check if GPU memory growth has to be set.
|
|
141
283
|
|
|
@@ -143,14 +285,22 @@ def run_serverapp_th(
|
|
|
143
285
|
"""
|
|
144
286
|
try:
|
|
145
287
|
if tf_gpu_growth:
|
|
146
|
-
log(INFO, "Enabling GPU growth for Tensorflow on the
|
|
288
|
+
log(INFO, "Enabling GPU growth for Tensorflow on the server thread.")
|
|
147
289
|
enable_gpu_growth()
|
|
148
290
|
|
|
149
291
|
# Run ServerApp
|
|
150
|
-
|
|
292
|
+
run_server_app(
|
|
293
|
+
driver=_driver,
|
|
294
|
+
server_app_dir=_server_app_dir,
|
|
295
|
+
server_app_run_config=_server_app_run_config,
|
|
296
|
+
server_app_attr=_server_app_attr,
|
|
297
|
+
loaded_server_app=_server_app,
|
|
298
|
+
)
|
|
151
299
|
except Exception as ex: # pylint: disable=broad-exception-caught
|
|
152
300
|
log(ERROR, "ServerApp thread raised an exception: %s", ex)
|
|
153
301
|
log(ERROR, traceback.format_exc())
|
|
302
|
+
exception_event.set()
|
|
303
|
+
raise
|
|
154
304
|
finally:
|
|
155
305
|
log(DEBUG, "ServerApp finished running.")
|
|
156
306
|
# Upon completion, trigger stop event if one was passed
|
|
@@ -160,71 +310,66 @@ def run_serverapp_th(
|
|
|
160
310
|
|
|
161
311
|
serverapp_th = threading.Thread(
|
|
162
312
|
target=server_th_with_start_checks,
|
|
163
|
-
args=(
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
|
|
313
|
+
args=(
|
|
314
|
+
enable_tf_gpu_growth,
|
|
315
|
+
f_stop,
|
|
316
|
+
has_exception,
|
|
317
|
+
driver,
|
|
318
|
+
app_dir,
|
|
319
|
+
server_app_run_config,
|
|
320
|
+
server_app_attr,
|
|
321
|
+
server_app,
|
|
322
|
+
),
|
|
170
323
|
)
|
|
171
324
|
sleep(delay_launch)
|
|
172
325
|
serverapp_th.start()
|
|
173
326
|
return serverapp_th
|
|
174
327
|
|
|
175
328
|
|
|
176
|
-
def _override_run_id(state: StateFactory, run_id_to_replace: int, run_id: int) -> None:
|
|
177
|
-
"""Override the run_id of an existing Run."""
|
|
178
|
-
log(DEBUG, "Pre-registering run with id %s", run_id)
|
|
179
|
-
# Remove run
|
|
180
|
-
run_info: Run = state.state().run_ids.pop(run_id_to_replace) # type: ignore
|
|
181
|
-
# Update with new run_id and insert back in state
|
|
182
|
-
run_info.run_id = run_id
|
|
183
|
-
state.state().run_ids[run_id] = run_info # type: ignore
|
|
184
|
-
|
|
185
|
-
|
|
186
329
|
# pylint: disable=too-many-locals
|
|
187
330
|
def _main_loop(
|
|
188
331
|
num_supernodes: int,
|
|
189
332
|
backend_name: str,
|
|
190
333
|
backend_config_stream: str,
|
|
191
334
|
app_dir: str,
|
|
335
|
+
is_app: bool,
|
|
192
336
|
enable_tf_gpu_growth: bool,
|
|
193
|
-
|
|
337
|
+
run: Run,
|
|
338
|
+
flwr_dir: Optional[str] = None,
|
|
194
339
|
client_app: Optional[ClientApp] = None,
|
|
195
340
|
client_app_attr: Optional[str] = None,
|
|
196
341
|
server_app: Optional[ServerApp] = None,
|
|
197
342
|
server_app_attr: Optional[str] = None,
|
|
343
|
+
server_app_run_config: Optional[UserConfig] = None,
|
|
198
344
|
) -> None:
|
|
199
|
-
"""Launch SuperLink with Simulation Engine, then ServerApp on a separate thread.
|
|
200
|
-
|
|
201
|
-
Everything runs on the main thread or a separate one, depending on whether the main
|
|
202
|
-
thread already contains a running Asyncio event loop. This is the case if running
|
|
203
|
-
the Simulation Engine on a Jupyter/Colab notebook.
|
|
204
|
-
"""
|
|
345
|
+
"""Launch SuperLink with Simulation Engine, then ServerApp on a separate thread."""
|
|
205
346
|
# Initialize StateFactory
|
|
206
347
|
state_factory = StateFactory(":flwr-in-memory-state:")
|
|
207
348
|
|
|
208
|
-
f_stop =
|
|
349
|
+
f_stop = threading.Event()
|
|
350
|
+
# A Threading event to indicate if an exception was raised in the ServerApp thread
|
|
351
|
+
server_app_thread_has_exception = threading.Event()
|
|
209
352
|
serverapp_th = None
|
|
210
353
|
try:
|
|
211
|
-
#
|
|
212
|
-
|
|
354
|
+
# Register run
|
|
355
|
+
log(DEBUG, "Pre-registering run with id %s", run.run_id)
|
|
356
|
+
state_factory.state().run_ids[run.run_id] = run # type: ignore
|
|
213
357
|
|
|
214
|
-
if
|
|
215
|
-
|
|
216
|
-
run_id_ = run_id
|
|
358
|
+
if server_app_run_config is None:
|
|
359
|
+
server_app_run_config = {}
|
|
217
360
|
|
|
218
361
|
# Initialize Driver
|
|
219
|
-
driver = InMemoryDriver(run_id=
|
|
362
|
+
driver = InMemoryDriver(run_id=run.run_id, state_factory=state_factory)
|
|
220
363
|
|
|
221
364
|
# Get and run ServerApp thread
|
|
222
365
|
serverapp_th = run_serverapp_th(
|
|
223
366
|
server_app_attr=server_app_attr,
|
|
224
367
|
server_app=server_app,
|
|
368
|
+
server_app_run_config=server_app_run_config,
|
|
225
369
|
driver=driver,
|
|
226
370
|
app_dir=app_dir,
|
|
227
371
|
f_stop=f_stop,
|
|
372
|
+
has_exception=server_app_thread_has_exception,
|
|
228
373
|
enable_tf_gpu_growth=enable_tf_gpu_growth,
|
|
229
374
|
)
|
|
230
375
|
|
|
@@ -237,8 +382,11 @@ def _main_loop(
|
|
|
237
382
|
backend_name=backend_name,
|
|
238
383
|
backend_config_json_stream=backend_config_stream,
|
|
239
384
|
app_dir=app_dir,
|
|
385
|
+
is_app=is_app,
|
|
240
386
|
state_factory=state_factory,
|
|
241
387
|
f_stop=f_stop,
|
|
388
|
+
run=run,
|
|
389
|
+
flwr_dir=flwr_dir,
|
|
242
390
|
)
|
|
243
391
|
|
|
244
392
|
except Exception as ex:
|
|
@@ -253,6 +401,8 @@ def _main_loop(
|
|
|
253
401
|
event(EventType.RUN_SUPERLINK_LEAVE)
|
|
254
402
|
if serverapp_th:
|
|
255
403
|
serverapp_th.join()
|
|
404
|
+
if server_app_thread_has_exception.is_set():
|
|
405
|
+
raise RuntimeError("Exception in ServerApp thread")
|
|
256
406
|
|
|
257
407
|
log(DEBUG, "Stopping Simulation Engine now.")
|
|
258
408
|
|
|
@@ -266,10 +416,13 @@ def _run_simulation(
|
|
|
266
416
|
backend_config: Optional[BackendConfig] = None,
|
|
267
417
|
client_app_attr: Optional[str] = None,
|
|
268
418
|
server_app_attr: Optional[str] = None,
|
|
419
|
+
server_app_run_config: Optional[UserConfig] = None,
|
|
269
420
|
app_dir: str = "",
|
|
270
|
-
|
|
421
|
+
flwr_dir: Optional[str] = None,
|
|
422
|
+
run: Optional[Run] = None,
|
|
271
423
|
enable_tf_gpu_growth: bool = False,
|
|
272
424
|
verbose_logging: bool = False,
|
|
425
|
+
is_app: bool = False,
|
|
273
426
|
) -> None:
|
|
274
427
|
r"""Launch the Simulation Engine.
|
|
275
428
|
|
|
@@ -298,20 +451,27 @@ def _run_simulation(
|
|
|
298
451
|
parameters. Values supported in <value> are those included by
|
|
299
452
|
`flwr.common.typing.ConfigsRecordValues`.
|
|
300
453
|
|
|
301
|
-
client_app_attr : str
|
|
454
|
+
client_app_attr : Optional[str]
|
|
302
455
|
A path to a `ClientApp` module to be loaded: For example: `client:app` or
|
|
303
456
|
`project.package.module:wrapper.app`."
|
|
304
457
|
|
|
305
|
-
server_app_attr : str
|
|
458
|
+
server_app_attr : Optional[str]
|
|
306
459
|
A path to a `ServerApp` module to be loaded: For example: `server:app` or
|
|
307
460
|
`project.package.module:wrapper.app`."
|
|
308
461
|
|
|
462
|
+
server_app_run_config : Optional[UserConfig]
|
|
463
|
+
Config dictionary that parameterizes the run config. It will be made accesible
|
|
464
|
+
to the ServerApp.
|
|
465
|
+
|
|
309
466
|
app_dir : str
|
|
310
467
|
Add specified directory to the PYTHONPATH and load `ClientApp` from there.
|
|
311
468
|
(Default: current working directory.)
|
|
312
469
|
|
|
313
|
-
|
|
314
|
-
|
|
470
|
+
flwr_dir : Optional[str]
|
|
471
|
+
The path containing installed Flower Apps.
|
|
472
|
+
|
|
473
|
+
run : Optional[Run]
|
|
474
|
+
An object carrying details about the run.
|
|
315
475
|
|
|
316
476
|
enable_tf_gpu_growth : bool (default: False)
|
|
317
477
|
A boolean to indicate whether to enable GPU growth on the main thread. This is
|
|
@@ -324,6 +484,11 @@ def _run_simulation(
|
|
|
324
484
|
verbose_logging : bool (default: False)
|
|
325
485
|
When disabled, only INFO, WARNING and ERROR log messages will be shown. If
|
|
326
486
|
enabled, DEBUG-level logs will be displayed.
|
|
487
|
+
|
|
488
|
+
is_app : bool (default: False)
|
|
489
|
+
A flag that indicates whether the simulation is running an app or not. This is
|
|
490
|
+
needed in order to attempt loading an app's pyproject.toml when nodes register
|
|
491
|
+
a context object.
|
|
327
492
|
"""
|
|
328
493
|
if backend_config is None:
|
|
329
494
|
backend_config = {}
|
|
@@ -331,13 +496,25 @@ def _run_simulation(
|
|
|
331
496
|
if "init_args" not in backend_config:
|
|
332
497
|
backend_config["init_args"] = {}
|
|
333
498
|
|
|
499
|
+
# Set default client_resources if not passed
|
|
500
|
+
if "client_resources" not in backend_config:
|
|
501
|
+
backend_config["client_resources"] = {"num_cpus": 2, "num_gpus": 0}
|
|
502
|
+
|
|
503
|
+
# Initialization of backend config to enable GPU growth globally when set
|
|
504
|
+
if "actor" not in backend_config:
|
|
505
|
+
backend_config["actor"] = {"tensorflow": 0}
|
|
506
|
+
|
|
334
507
|
# Set logging level
|
|
335
508
|
logger = logging.getLogger("flwr")
|
|
336
509
|
if verbose_logging:
|
|
337
510
|
update_console_handler(level=DEBUG, timestamps=True, colored=True)
|
|
338
511
|
else:
|
|
339
|
-
backend_config["init_args"]["logging_level"] =
|
|
340
|
-
|
|
512
|
+
backend_config["init_args"]["logging_level"] = backend_config["init_args"].get(
|
|
513
|
+
"logging_level", WARNING
|
|
514
|
+
)
|
|
515
|
+
backend_config["init_args"]["log_to_driver"] = backend_config["init_args"].get(
|
|
516
|
+
"log_to_driver", True
|
|
517
|
+
)
|
|
341
518
|
|
|
342
519
|
if enable_tf_gpu_growth:
|
|
343
520
|
# Check that Backend config has also enabled using GPU growth
|
|
@@ -349,45 +526,47 @@ def _run_simulation(
|
|
|
349
526
|
# Convert config to original JSON-stream format
|
|
350
527
|
backend_config_stream = json.dumps(backend_config)
|
|
351
528
|
|
|
352
|
-
|
|
529
|
+
# If no `Run` object is set, create one
|
|
530
|
+
if run is None:
|
|
531
|
+
run_id = generate_rand_int_from_bytes(RUN_ID_NUM_BYTES)
|
|
532
|
+
run = Run(run_id=run_id, fab_id="", fab_version="", override_config={})
|
|
533
|
+
|
|
353
534
|
args = (
|
|
354
535
|
num_supernodes,
|
|
355
536
|
backend_name,
|
|
356
537
|
backend_config_stream,
|
|
357
538
|
app_dir,
|
|
539
|
+
is_app,
|
|
358
540
|
enable_tf_gpu_growth,
|
|
359
|
-
|
|
541
|
+
run,
|
|
542
|
+
flwr_dir,
|
|
360
543
|
client_app,
|
|
361
544
|
client_app_attr,
|
|
362
545
|
server_app,
|
|
363
546
|
server_app_attr,
|
|
547
|
+
server_app_run_config,
|
|
364
548
|
)
|
|
365
549
|
# Detect if there is an Asyncio event loop already running.
|
|
366
|
-
# If yes,
|
|
367
|
-
# like Jupyter/Colab notebooks,
|
|
368
|
-
|
|
550
|
+
# If yes, disable logger propagation. In environmnets
|
|
551
|
+
# like Jupyter/Colab notebooks, it's often better to do this.
|
|
552
|
+
asyncio_loop_running = False
|
|
369
553
|
try:
|
|
370
554
|
_ = (
|
|
371
555
|
asyncio.get_running_loop()
|
|
372
556
|
) # Raises RuntimeError if no event loop is present
|
|
373
557
|
log(DEBUG, "Asyncio event loop already running.")
|
|
374
558
|
|
|
375
|
-
|
|
559
|
+
asyncio_loop_running = True
|
|
376
560
|
|
|
377
561
|
except RuntimeError:
|
|
378
|
-
|
|
562
|
+
pass
|
|
379
563
|
|
|
380
564
|
finally:
|
|
381
|
-
if
|
|
565
|
+
if asyncio_loop_running:
|
|
382
566
|
# Set logger propagation to False to prevent duplicated log output in Colab.
|
|
383
567
|
logger = set_logger_propagation(logger, False)
|
|
384
|
-
|
|
385
|
-
|
|
386
|
-
simulation_engine_th.start()
|
|
387
|
-
simulation_engine_th.join()
|
|
388
|
-
else:
|
|
389
|
-
log(DEBUG, "Starting Simulation Engine on the main thread.")
|
|
390
|
-
_main_loop(*args)
|
|
568
|
+
|
|
569
|
+
_main_loop(*args)
|
|
391
570
|
|
|
392
571
|
|
|
393
572
|
def _parse_args_run_simulation() -> argparse.ArgumentParser:
|
|
@@ -397,12 +576,10 @@ def _parse_args_run_simulation() -> argparse.ArgumentParser:
|
|
|
397
576
|
)
|
|
398
577
|
parser.add_argument(
|
|
399
578
|
"--server-app",
|
|
400
|
-
required=True,
|
|
401
579
|
help="For example: `server:app` or `project.package.module:wrapper.app`",
|
|
402
580
|
)
|
|
403
581
|
parser.add_argument(
|
|
404
582
|
"--client-app",
|
|
405
|
-
required=True,
|
|
406
583
|
help="For example: `client:app` or `project.package.module:wrapper.app`",
|
|
407
584
|
)
|
|
408
585
|
parser.add_argument(
|
|
@@ -411,6 +588,18 @@ def _parse_args_run_simulation() -> argparse.ArgumentParser:
|
|
|
411
588
|
required=True,
|
|
412
589
|
help="Number of simulated SuperNodes.",
|
|
413
590
|
)
|
|
591
|
+
parser.add_argument(
|
|
592
|
+
"--app",
|
|
593
|
+
type=str,
|
|
594
|
+
default=None,
|
|
595
|
+
help="Path to a directory containing a FAB-like structure with a "
|
|
596
|
+
"pyproject.toml.",
|
|
597
|
+
)
|
|
598
|
+
parser.add_argument(
|
|
599
|
+
"--run-config",
|
|
600
|
+
default=None,
|
|
601
|
+
help="Override configuration key-value pairs.",
|
|
602
|
+
)
|
|
414
603
|
parser.add_argument(
|
|
415
604
|
"--backend",
|
|
416
605
|
default="ray",
|
|
@@ -420,8 +609,7 @@ def _parse_args_run_simulation() -> argparse.ArgumentParser:
|
|
|
420
609
|
parser.add_argument(
|
|
421
610
|
"--backend-config",
|
|
422
611
|
type=str,
|
|
423
|
-
default=
|
|
424
|
-
'"actor": {"tensorflow": 0}}',
|
|
612
|
+
default="{}",
|
|
425
613
|
help='A JSON formatted stream, e.g \'{"<keyA>":<value>, "<keyB>":<value>}\' to '
|
|
426
614
|
"configure a backend. Values supported in <value> are those included by "
|
|
427
615
|
"`flwr.common.typing.ConfigsRecordValues`. ",
|
|
@@ -449,6 +637,17 @@ def _parse_args_run_simulation() -> argparse.ArgumentParser:
|
|
|
449
637
|
"ClientApp and ServerApp from there."
|
|
450
638
|
" Default: current working directory.",
|
|
451
639
|
)
|
|
640
|
+
parser.add_argument(
|
|
641
|
+
"--flwr-dir",
|
|
642
|
+
default=None,
|
|
643
|
+
help="""The path containing installed Flower Apps.
|
|
644
|
+
By default, this value is equal to:
|
|
645
|
+
|
|
646
|
+
- `$FLWR_HOME/` if `$FLWR_HOME` is defined
|
|
647
|
+
- `$XDG_DATA_HOME/.flwr/` if `$XDG_DATA_HOME` is defined
|
|
648
|
+
- `$HOME/.flwr/` in all other cases
|
|
649
|
+
""",
|
|
650
|
+
)
|
|
452
651
|
parser.add_argument(
|
|
453
652
|
"--run-id",
|
|
454
653
|
type=int,
|
flwr/superexec/app.py
CHANGED
|
@@ -24,6 +24,7 @@ import grpc
|
|
|
24
24
|
|
|
25
25
|
from flwr.common import EventType, event, log
|
|
26
26
|
from flwr.common.address import parse_address
|
|
27
|
+
from flwr.common.config import parse_config_args
|
|
27
28
|
from flwr.common.constant import SUPEREXEC_DEFAULT_ADDRESS
|
|
28
29
|
from flwr.common.exit_handlers import register_exit_handlers
|
|
29
30
|
from flwr.common.object_ref import load_app, validate
|
|
@@ -55,6 +56,7 @@ def run_superexec() -> None:
|
|
|
55
56
|
address=address,
|
|
56
57
|
executor=_load_executor(args),
|
|
57
58
|
certificates=certificates,
|
|
59
|
+
config=parse_config_args([args.executor_config]),
|
|
58
60
|
)
|
|
59
61
|
|
|
60
62
|
grpc_servers = [superexec_server]
|
|
@@ -74,20 +76,27 @@ def _parse_args_run_superexec() -> argparse.ArgumentParser:
|
|
|
74
76
|
parser = argparse.ArgumentParser(
|
|
75
77
|
description="Start a Flower SuperExec",
|
|
76
78
|
)
|
|
77
|
-
parser.add_argument(
|
|
78
|
-
"executor",
|
|
79
|
-
help="For example: `deployment:exec` or `project.package.module:wrapper.exec`.",
|
|
80
|
-
)
|
|
81
79
|
parser.add_argument(
|
|
82
80
|
"--address",
|
|
83
81
|
help="SuperExec (gRPC) server address (IPv4, IPv6, or a domain name)",
|
|
84
82
|
default=SUPEREXEC_DEFAULT_ADDRESS,
|
|
85
83
|
)
|
|
84
|
+
parser.add_argument(
|
|
85
|
+
"--executor",
|
|
86
|
+
help="For example: `deployment:exec` or `project.package.module:wrapper.exec`.",
|
|
87
|
+
default="flwr.superexec.deployment:executor",
|
|
88
|
+
)
|
|
86
89
|
parser.add_argument(
|
|
87
90
|
"--executor-dir",
|
|
88
91
|
help="The directory for the executor.",
|
|
89
92
|
default=".",
|
|
90
93
|
)
|
|
94
|
+
parser.add_argument(
|
|
95
|
+
"--executor-config",
|
|
96
|
+
help="Key-value pairs for the executor config, separated by commas. "
|
|
97
|
+
'For example:\n\n`--executor-config superlink="superlink:9091",'
|
|
98
|
+
'root-certificates="certificates/superlink-ca.crt"`',
|
|
99
|
+
)
|
|
91
100
|
parser.add_argument(
|
|
92
101
|
"--insecure",
|
|
93
102
|
action="store_true",
|
|
@@ -126,11 +135,11 @@ def _try_obtain_certificates(
|
|
|
126
135
|
return None
|
|
127
136
|
# Check if certificates are provided
|
|
128
137
|
if args.ssl_certfile and args.ssl_keyfile and args.ssl_ca_certfile:
|
|
129
|
-
if not Path
|
|
138
|
+
if not Path(args.ssl_ca_certfile).is_file():
|
|
130
139
|
sys.exit("Path argument `--ssl-ca-certfile` does not point to a file.")
|
|
131
|
-
if not Path
|
|
140
|
+
if not Path(args.ssl_certfile).is_file():
|
|
132
141
|
sys.exit("Path argument `--ssl-certfile` does not point to a file.")
|
|
133
|
-
if not Path
|
|
142
|
+
if not Path(args.ssl_keyfile).is_file():
|
|
134
143
|
sys.exit("Path argument `--ssl-keyfile` does not point to a file.")
|
|
135
144
|
certificates = (
|
|
136
145
|
Path(args.ssl_ca_certfile).read_bytes(), # CA certificate
|
|
@@ -156,11 +165,8 @@ def _load_executor(
|
|
|
156
165
|
args: argparse.Namespace,
|
|
157
166
|
) -> Executor:
|
|
158
167
|
"""Get the executor plugin."""
|
|
159
|
-
if args.executor_dir is not None:
|
|
160
|
-
sys.path.insert(0, args.executor_dir)
|
|
161
|
-
|
|
162
168
|
executor_ref: str = args.executor
|
|
163
|
-
valid, error_msg = validate(executor_ref)
|
|
169
|
+
valid, error_msg = validate(executor_ref, project_dir=args.executor_dir)
|
|
164
170
|
if not valid and error_msg:
|
|
165
171
|
raise LoadExecutorError(error_msg) from None
|
|
166
172
|
|