flwr-nightly 1.10.0.dev20240707__py3-none-any.whl → 1.10.0.dev20240722__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of flwr-nightly might be problematic. Click here for more details.
- flwr/cli/build.py +16 -2
- flwr/cli/config_utils.py +36 -14
- flwr/cli/install.py +17 -1
- flwr/cli/new/new.py +31 -20
- flwr/cli/new/templates/app/code/client.hf.py.tpl +11 -3
- flwr/cli/new/templates/app/code/client.jax.py.tpl +2 -1
- flwr/cli/new/templates/app/code/client.mlx.py.tpl +15 -10
- flwr/cli/new/templates/app/code/client.numpy.py.tpl +2 -1
- flwr/cli/new/templates/app/code/client.pytorch.py.tpl +12 -3
- flwr/cli/new/templates/app/code/client.sklearn.py.tpl +6 -3
- flwr/cli/new/templates/app/code/client.tensorflow.py.tpl +13 -3
- flwr/cli/new/templates/app/code/flwr_tune/app.py.tpl +2 -2
- flwr/cli/new/templates/app/code/flwr_tune/server.py.tpl +1 -1
- flwr/cli/new/templates/app/code/server.hf.py.tpl +16 -11
- flwr/cli/new/templates/app/code/server.jax.py.tpl +15 -8
- flwr/cli/new/templates/app/code/server.mlx.py.tpl +11 -7
- flwr/cli/new/templates/app/code/server.numpy.py.tpl +15 -8
- flwr/cli/new/templates/app/code/server.pytorch.py.tpl +15 -13
- flwr/cli/new/templates/app/code/server.sklearn.py.tpl +16 -10
- flwr/cli/new/templates/app/code/server.tensorflow.py.tpl +16 -13
- flwr/cli/new/templates/app/code/task.hf.py.tpl +2 -2
- flwr/cli/new/templates/app/code/task.mlx.py.tpl +2 -2
- flwr/cli/new/templates/app/code/task.pytorch.py.tpl +1 -1
- flwr/cli/new/templates/app/pyproject.flowertune.toml.tpl +9 -12
- flwr/cli/new/templates/app/pyproject.hf.toml.tpl +17 -16
- flwr/cli/new/templates/app/pyproject.jax.toml.tpl +17 -11
- flwr/cli/new/templates/app/pyproject.mlx.toml.tpl +17 -12
- flwr/cli/new/templates/app/pyproject.numpy.toml.tpl +12 -12
- flwr/cli/new/templates/app/pyproject.pytorch.toml.tpl +13 -12
- flwr/cli/new/templates/app/pyproject.sklearn.toml.tpl +12 -12
- flwr/cli/new/templates/app/pyproject.tensorflow.toml.tpl +15 -12
- flwr/cli/run/run.py +128 -53
- flwr/client/app.py +56 -24
- flwr/client/client_app.py +28 -8
- flwr/client/grpc_adapter_client/connection.py +3 -2
- flwr/client/grpc_client/connection.py +3 -2
- flwr/client/grpc_rere_client/connection.py +17 -6
- flwr/client/message_handler/message_handler.py +1 -1
- flwr/client/node_state.py +59 -12
- flwr/client/node_state_tests.py +4 -3
- flwr/client/rest_client/connection.py +19 -8
- flwr/client/supernode/app.py +55 -24
- flwr/client/typing.py +2 -2
- flwr/common/config.py +87 -2
- flwr/common/constant.py +3 -0
- flwr/common/context.py +24 -9
- flwr/common/logger.py +25 -0
- flwr/common/serde.py +45 -0
- flwr/common/telemetry.py +17 -0
- flwr/common/typing.py +5 -0
- flwr/proto/common_pb2.py +36 -0
- flwr/proto/common_pb2.pyi +121 -0
- flwr/proto/common_pb2_grpc.py +4 -0
- flwr/proto/common_pb2_grpc.pyi +4 -0
- flwr/proto/driver_pb2.py +24 -19
- flwr/proto/driver_pb2.pyi +21 -1
- flwr/proto/exec_pb2.py +16 -11
- flwr/proto/exec_pb2.pyi +22 -1
- flwr/proto/run_pb2.py +12 -7
- flwr/proto/run_pb2.pyi +22 -1
- flwr/proto/task_pb2.py +7 -8
- flwr/server/__init__.py +2 -0
- flwr/server/compat/legacy_context.py +5 -4
- flwr/server/driver/grpc_driver.py +82 -140
- flwr/server/run_serverapp.py +40 -15
- flwr/server/server_app.py +56 -10
- flwr/server/serverapp_components.py +52 -0
- flwr/server/superlink/driver/driver_servicer.py +18 -3
- flwr/server/superlink/fleet/message_handler/message_handler.py +13 -2
- flwr/server/superlink/fleet/vce/backend/backend.py +4 -4
- flwr/server/superlink/fleet/vce/backend/raybackend.py +10 -10
- flwr/server/superlink/fleet/vce/vce_api.py +149 -117
- flwr/server/superlink/state/in_memory_state.py +11 -3
- flwr/server/superlink/state/sqlite_state.py +23 -8
- flwr/server/superlink/state/state.py +7 -2
- flwr/server/typing.py +2 -0
- flwr/server/workflow/secure_aggregation/secaggplus_workflow.py +18 -2
- flwr/simulation/app.py +4 -3
- flwr/simulation/ray_transport/ray_actor.py +15 -19
- flwr/simulation/ray_transport/ray_client_proxy.py +22 -9
- flwr/simulation/run_simulation.py +237 -66
- flwr/superexec/app.py +14 -7
- flwr/superexec/deployment.py +110 -33
- flwr/superexec/exec_grpc.py +5 -1
- flwr/superexec/exec_servicer.py +4 -1
- flwr/superexec/executor.py +18 -0
- flwr/superexec/simulation.py +151 -0
- {flwr_nightly-1.10.0.dev20240707.dist-info → flwr_nightly-1.10.0.dev20240722.dist-info}/METADATA +3 -2
- {flwr_nightly-1.10.0.dev20240707.dist-info → flwr_nightly-1.10.0.dev20240722.dist-info}/RECORD +92 -86
- {flwr_nightly-1.10.0.dev20240707.dist-info → flwr_nightly-1.10.0.dev20240722.dist-info}/LICENSE +0 -0
- {flwr_nightly-1.10.0.dev20240707.dist-info → flwr_nightly-1.10.0.dev20240722.dist-info}/WHEEL +0 -0
- {flwr_nightly-1.10.0.dev20240707.dist-info → flwr_nightly-1.10.0.dev20240722.dist-info}/entry_points.txt +0 -0
|
@@ -18,45 +18,157 @@ import argparse
|
|
|
18
18
|
import asyncio
|
|
19
19
|
import json
|
|
20
20
|
import logging
|
|
21
|
+
import sys
|
|
21
22
|
import threading
|
|
22
23
|
import traceback
|
|
24
|
+
from argparse import Namespace
|
|
23
25
|
from logging import DEBUG, ERROR, INFO, WARNING
|
|
26
|
+
from pathlib import Path
|
|
24
27
|
from time import sleep
|
|
25
|
-
from typing import Optional
|
|
28
|
+
from typing import List, Optional
|
|
26
29
|
|
|
30
|
+
from flwr.cli.config_utils import load_and_validate
|
|
27
31
|
from flwr.client import ClientApp
|
|
28
32
|
from flwr.common import EventType, event, log
|
|
33
|
+
from flwr.common.config import get_fused_config_from_dir, parse_config_args
|
|
34
|
+
from flwr.common.constant import RUN_ID_NUM_BYTES
|
|
29
35
|
from flwr.common.logger import set_logger_propagation, update_console_handler
|
|
30
|
-
from flwr.common.typing import Run
|
|
36
|
+
from flwr.common.typing import Run, UserConfig
|
|
31
37
|
from flwr.server.driver import Driver, InMemoryDriver
|
|
32
|
-
from flwr.server.run_serverapp import run
|
|
38
|
+
from flwr.server.run_serverapp import run as run_server_app
|
|
33
39
|
from flwr.server.server_app import ServerApp
|
|
34
40
|
from flwr.server.superlink.fleet import vce
|
|
35
41
|
from flwr.server.superlink.fleet.vce.backend.backend import BackendConfig
|
|
36
42
|
from flwr.server.superlink.state import StateFactory
|
|
43
|
+
from flwr.server.superlink.state.utils import generate_rand_int_from_bytes
|
|
37
44
|
from flwr.simulation.ray_transport.utils import (
|
|
38
45
|
enable_tf_gpu_growth as enable_gpu_growth,
|
|
39
46
|
)
|
|
40
47
|
|
|
41
48
|
|
|
49
|
+
def _check_args_do_not_interfere(args: Namespace) -> bool:
|
|
50
|
+
"""Ensure decoupling of flags for different ways to start the simulation."""
|
|
51
|
+
mode_one_args = ["app", "run_config"]
|
|
52
|
+
mode_two_args = ["client_app", "server_app"]
|
|
53
|
+
|
|
54
|
+
def _resolve_message(conflict_keys: List[str]) -> str:
|
|
55
|
+
return ",".join([f"`--{key}`".replace("_", "-") for key in conflict_keys])
|
|
56
|
+
|
|
57
|
+
# When passing `--app`, `--app-dir` is ignored
|
|
58
|
+
if args.app and args.app_dir:
|
|
59
|
+
log(ERROR, "Either `--app` or `--app-dir` can be set, but not both.")
|
|
60
|
+
return False
|
|
61
|
+
|
|
62
|
+
if any(getattr(args, key) for key in mode_one_args):
|
|
63
|
+
if any(getattr(args, key) for key in mode_two_args):
|
|
64
|
+
log(
|
|
65
|
+
ERROR,
|
|
66
|
+
"Passing any of {%s} alongside with any of {%s}",
|
|
67
|
+
_resolve_message(mode_one_args),
|
|
68
|
+
_resolve_message(mode_two_args),
|
|
69
|
+
)
|
|
70
|
+
return False
|
|
71
|
+
|
|
72
|
+
if not args.app:
|
|
73
|
+
log(ERROR, "You need to pass --app")
|
|
74
|
+
return False
|
|
75
|
+
|
|
76
|
+
return True
|
|
77
|
+
|
|
78
|
+
# Ensure all args are set (required for the non-FAB mode of execution)
|
|
79
|
+
if not all(getattr(args, key) for key in mode_two_args):
|
|
80
|
+
log(
|
|
81
|
+
ERROR,
|
|
82
|
+
"Passing all of %s keys are required.",
|
|
83
|
+
_resolve_message(mode_two_args),
|
|
84
|
+
)
|
|
85
|
+
return False
|
|
86
|
+
|
|
87
|
+
return True
|
|
88
|
+
|
|
89
|
+
|
|
42
90
|
# Entry point from CLI
|
|
91
|
+
# pylint: disable=too-many-locals
|
|
43
92
|
def run_simulation_from_cli() -> None:
|
|
44
93
|
"""Run Simulation Engine from the CLI."""
|
|
45
94
|
args = _parse_args_run_simulation().parse_args()
|
|
46
95
|
|
|
96
|
+
# We are supporting two modes for the CLI entrypoint:
|
|
97
|
+
# 1) Running an app dir containing a `pyproject.toml`
|
|
98
|
+
# 2) Running any ClientApp and SeverApp w/o pyproject.toml being present
|
|
99
|
+
# For 2), some CLI args are compulsory, but they are not required for 1)
|
|
100
|
+
# We first do these checks
|
|
101
|
+
args_check_pass = _check_args_do_not_interfere(args)
|
|
102
|
+
if not args_check_pass:
|
|
103
|
+
sys.exit("Simulation Engine cannot start.")
|
|
104
|
+
|
|
105
|
+
run_id = (
|
|
106
|
+
generate_rand_int_from_bytes(RUN_ID_NUM_BYTES)
|
|
107
|
+
if args.run_id is None
|
|
108
|
+
else args.run_id
|
|
109
|
+
)
|
|
110
|
+
if args.app:
|
|
111
|
+
# Mode 1
|
|
112
|
+
app_path = Path(args.app)
|
|
113
|
+
if not app_path.is_dir():
|
|
114
|
+
log(ERROR, "--app is not a directory")
|
|
115
|
+
sys.exit("Simulation Engine cannot start.")
|
|
116
|
+
|
|
117
|
+
# Load pyproject.toml
|
|
118
|
+
config, errors, warnings = load_and_validate(
|
|
119
|
+
app_path / "pyproject.toml", check_module=False
|
|
120
|
+
)
|
|
121
|
+
if errors:
|
|
122
|
+
raise ValueError(errors)
|
|
123
|
+
|
|
124
|
+
if warnings:
|
|
125
|
+
log(WARNING, warnings)
|
|
126
|
+
|
|
127
|
+
if config is None:
|
|
128
|
+
raise ValueError("Config extracted from FAB's pyproject.toml is not valid")
|
|
129
|
+
|
|
130
|
+
# Get ClientApp and SeverApp components
|
|
131
|
+
app_components = config["tool"]["flwr"]["app"]["components"]
|
|
132
|
+
client_app_attr = app_components["clientapp"]
|
|
133
|
+
server_app_attr = app_components["serverapp"]
|
|
134
|
+
|
|
135
|
+
override_config = parse_config_args([args.run_config])
|
|
136
|
+
fused_config = get_fused_config_from_dir(app_path, override_config)
|
|
137
|
+
app_dir = args.app
|
|
138
|
+
is_app = True
|
|
139
|
+
|
|
140
|
+
else:
|
|
141
|
+
# Mode 2
|
|
142
|
+
client_app_attr = args.client_app
|
|
143
|
+
server_app_attr = args.server_app
|
|
144
|
+
override_config = {}
|
|
145
|
+
fused_config = None
|
|
146
|
+
app_dir = args.app_dir
|
|
147
|
+
is_app = False
|
|
148
|
+
|
|
149
|
+
# Create run
|
|
150
|
+
run = Run(
|
|
151
|
+
run_id=run_id,
|
|
152
|
+
fab_id="",
|
|
153
|
+
fab_version="",
|
|
154
|
+
override_config=override_config,
|
|
155
|
+
)
|
|
156
|
+
|
|
47
157
|
# Load JSON config
|
|
48
158
|
backend_config_dict = json.loads(args.backend_config)
|
|
49
159
|
|
|
50
160
|
_run_simulation(
|
|
51
|
-
server_app_attr=
|
|
52
|
-
client_app_attr=
|
|
161
|
+
server_app_attr=server_app_attr,
|
|
162
|
+
client_app_attr=client_app_attr,
|
|
53
163
|
num_supernodes=args.num_supernodes,
|
|
54
164
|
backend_name=args.backend,
|
|
55
165
|
backend_config=backend_config_dict,
|
|
56
|
-
app_dir=
|
|
57
|
-
|
|
166
|
+
app_dir=app_dir,
|
|
167
|
+
run=run,
|
|
58
168
|
enable_tf_gpu_growth=args.enable_tf_gpu_growth,
|
|
59
169
|
verbose_logging=args.verbose,
|
|
170
|
+
server_app_run_config=fused_config,
|
|
171
|
+
is_app=is_app,
|
|
60
172
|
)
|
|
61
173
|
|
|
62
174
|
|
|
@@ -126,16 +238,25 @@ def run_simulation(
|
|
|
126
238
|
def run_serverapp_th(
|
|
127
239
|
server_app_attr: Optional[str],
|
|
128
240
|
server_app: Optional[ServerApp],
|
|
241
|
+
server_app_run_config: UserConfig,
|
|
129
242
|
driver: Driver,
|
|
130
243
|
app_dir: str,
|
|
131
|
-
f_stop:
|
|
244
|
+
f_stop: threading.Event,
|
|
245
|
+
has_exception: threading.Event,
|
|
132
246
|
enable_tf_gpu_growth: bool,
|
|
133
247
|
delay_launch: int = 3,
|
|
134
248
|
) -> threading.Thread:
|
|
135
249
|
"""Run SeverApp in a thread."""
|
|
136
250
|
|
|
137
|
-
def server_th_with_start_checks(
|
|
138
|
-
tf_gpu_growth: bool,
|
|
251
|
+
def server_th_with_start_checks(
|
|
252
|
+
tf_gpu_growth: bool,
|
|
253
|
+
stop_event: threading.Event,
|
|
254
|
+
exception_event: threading.Event,
|
|
255
|
+
_driver: Driver,
|
|
256
|
+
_server_app_dir: str,
|
|
257
|
+
_server_app_run_config: UserConfig,
|
|
258
|
+
_server_app_attr: Optional[str],
|
|
259
|
+
_server_app: Optional[ServerApp],
|
|
139
260
|
) -> None:
|
|
140
261
|
"""Run SeverApp, after check if GPU memory growth has to be set.
|
|
141
262
|
|
|
@@ -147,10 +268,18 @@ def run_serverapp_th(
|
|
|
147
268
|
enable_gpu_growth()
|
|
148
269
|
|
|
149
270
|
# Run ServerApp
|
|
150
|
-
|
|
271
|
+
run_server_app(
|
|
272
|
+
driver=_driver,
|
|
273
|
+
server_app_dir=_server_app_dir,
|
|
274
|
+
server_app_run_config=_server_app_run_config,
|
|
275
|
+
server_app_attr=_server_app_attr,
|
|
276
|
+
loaded_server_app=_server_app,
|
|
277
|
+
)
|
|
151
278
|
except Exception as ex: # pylint: disable=broad-exception-caught
|
|
152
279
|
log(ERROR, "ServerApp thread raised an exception: %s", ex)
|
|
153
280
|
log(ERROR, traceback.format_exc())
|
|
281
|
+
exception_event.set()
|
|
282
|
+
raise
|
|
154
283
|
finally:
|
|
155
284
|
log(DEBUG, "ServerApp finished running.")
|
|
156
285
|
# Upon completion, trigger stop event if one was passed
|
|
@@ -160,71 +289,66 @@ def run_serverapp_th(
|
|
|
160
289
|
|
|
161
290
|
serverapp_th = threading.Thread(
|
|
162
291
|
target=server_th_with_start_checks,
|
|
163
|
-
args=(
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
|
|
292
|
+
args=(
|
|
293
|
+
enable_tf_gpu_growth,
|
|
294
|
+
f_stop,
|
|
295
|
+
has_exception,
|
|
296
|
+
driver,
|
|
297
|
+
app_dir,
|
|
298
|
+
server_app_run_config,
|
|
299
|
+
server_app_attr,
|
|
300
|
+
server_app,
|
|
301
|
+
),
|
|
170
302
|
)
|
|
171
303
|
sleep(delay_launch)
|
|
172
304
|
serverapp_th.start()
|
|
173
305
|
return serverapp_th
|
|
174
306
|
|
|
175
307
|
|
|
176
|
-
def _override_run_id(state: StateFactory, run_id_to_replace: int, run_id: int) -> None:
|
|
177
|
-
"""Override the run_id of an existing Run."""
|
|
178
|
-
log(DEBUG, "Pre-registering run with id %s", run_id)
|
|
179
|
-
# Remove run
|
|
180
|
-
run_info: Run = state.state().run_ids.pop(run_id_to_replace) # type: ignore
|
|
181
|
-
# Update with new run_id and insert back in state
|
|
182
|
-
run_info.run_id = run_id
|
|
183
|
-
state.state().run_ids[run_id] = run_info # type: ignore
|
|
184
|
-
|
|
185
|
-
|
|
186
308
|
# pylint: disable=too-many-locals
|
|
187
309
|
def _main_loop(
|
|
188
310
|
num_supernodes: int,
|
|
189
311
|
backend_name: str,
|
|
190
312
|
backend_config_stream: str,
|
|
191
313
|
app_dir: str,
|
|
314
|
+
is_app: bool,
|
|
192
315
|
enable_tf_gpu_growth: bool,
|
|
193
|
-
|
|
316
|
+
run: Run,
|
|
317
|
+
flwr_dir: Optional[str] = None,
|
|
194
318
|
client_app: Optional[ClientApp] = None,
|
|
195
319
|
client_app_attr: Optional[str] = None,
|
|
196
320
|
server_app: Optional[ServerApp] = None,
|
|
197
321
|
server_app_attr: Optional[str] = None,
|
|
322
|
+
server_app_run_config: Optional[UserConfig] = None,
|
|
198
323
|
) -> None:
|
|
199
|
-
"""Launch SuperLink with Simulation Engine, then ServerApp on a separate thread.
|
|
200
|
-
|
|
201
|
-
Everything runs on the main thread or a separate one, depending on whether the main
|
|
202
|
-
thread already contains a running Asyncio event loop. This is the case if running
|
|
203
|
-
the Simulation Engine on a Jupyter/Colab notebook.
|
|
204
|
-
"""
|
|
324
|
+
"""Launch SuperLink with Simulation Engine, then ServerApp on a separate thread."""
|
|
205
325
|
# Initialize StateFactory
|
|
206
326
|
state_factory = StateFactory(":flwr-in-memory-state:")
|
|
207
327
|
|
|
208
|
-
f_stop =
|
|
328
|
+
f_stop = threading.Event()
|
|
329
|
+
# A Threading event to indicate if an exception was raised in the ServerApp thread
|
|
330
|
+
server_app_thread_has_exception = threading.Event()
|
|
209
331
|
serverapp_th = None
|
|
210
332
|
try:
|
|
211
|
-
#
|
|
212
|
-
|
|
333
|
+
# Register run
|
|
334
|
+
log(DEBUG, "Pre-registering run with id %s", run.run_id)
|
|
335
|
+
state_factory.state().run_ids[run.run_id] = run # type: ignore
|
|
213
336
|
|
|
214
|
-
if
|
|
215
|
-
|
|
216
|
-
run_id_ = run_id
|
|
337
|
+
if server_app_run_config is None:
|
|
338
|
+
server_app_run_config = {}
|
|
217
339
|
|
|
218
340
|
# Initialize Driver
|
|
219
|
-
driver = InMemoryDriver(run_id=
|
|
341
|
+
driver = InMemoryDriver(run_id=run.run_id, state_factory=state_factory)
|
|
220
342
|
|
|
221
343
|
# Get and run ServerApp thread
|
|
222
344
|
serverapp_th = run_serverapp_th(
|
|
223
345
|
server_app_attr=server_app_attr,
|
|
224
346
|
server_app=server_app,
|
|
347
|
+
server_app_run_config=server_app_run_config,
|
|
225
348
|
driver=driver,
|
|
226
349
|
app_dir=app_dir,
|
|
227
350
|
f_stop=f_stop,
|
|
351
|
+
has_exception=server_app_thread_has_exception,
|
|
228
352
|
enable_tf_gpu_growth=enable_tf_gpu_growth,
|
|
229
353
|
)
|
|
230
354
|
|
|
@@ -237,8 +361,11 @@ def _main_loop(
|
|
|
237
361
|
backend_name=backend_name,
|
|
238
362
|
backend_config_json_stream=backend_config_stream,
|
|
239
363
|
app_dir=app_dir,
|
|
364
|
+
is_app=is_app,
|
|
240
365
|
state_factory=state_factory,
|
|
241
366
|
f_stop=f_stop,
|
|
367
|
+
run=run,
|
|
368
|
+
flwr_dir=flwr_dir,
|
|
242
369
|
)
|
|
243
370
|
|
|
244
371
|
except Exception as ex:
|
|
@@ -253,6 +380,8 @@ def _main_loop(
|
|
|
253
380
|
event(EventType.RUN_SUPERLINK_LEAVE)
|
|
254
381
|
if serverapp_th:
|
|
255
382
|
serverapp_th.join()
|
|
383
|
+
if server_app_thread_has_exception.is_set():
|
|
384
|
+
raise RuntimeError("Exception in ServerApp thread")
|
|
256
385
|
|
|
257
386
|
log(DEBUG, "Stopping Simulation Engine now.")
|
|
258
387
|
|
|
@@ -266,10 +395,13 @@ def _run_simulation(
|
|
|
266
395
|
backend_config: Optional[BackendConfig] = None,
|
|
267
396
|
client_app_attr: Optional[str] = None,
|
|
268
397
|
server_app_attr: Optional[str] = None,
|
|
398
|
+
server_app_run_config: Optional[UserConfig] = None,
|
|
269
399
|
app_dir: str = "",
|
|
270
|
-
|
|
400
|
+
flwr_dir: Optional[str] = None,
|
|
401
|
+
run: Optional[Run] = None,
|
|
271
402
|
enable_tf_gpu_growth: bool = False,
|
|
272
403
|
verbose_logging: bool = False,
|
|
404
|
+
is_app: bool = False,
|
|
273
405
|
) -> None:
|
|
274
406
|
r"""Launch the Simulation Engine.
|
|
275
407
|
|
|
@@ -298,20 +430,27 @@ def _run_simulation(
|
|
|
298
430
|
parameters. Values supported in <value> are those included by
|
|
299
431
|
`flwr.common.typing.ConfigsRecordValues`.
|
|
300
432
|
|
|
301
|
-
client_app_attr : str
|
|
433
|
+
client_app_attr : Optional[str]
|
|
302
434
|
A path to a `ClientApp` module to be loaded: For example: `client:app` or
|
|
303
435
|
`project.package.module:wrapper.app`."
|
|
304
436
|
|
|
305
|
-
server_app_attr : str
|
|
437
|
+
server_app_attr : Optional[str]
|
|
306
438
|
A path to a `ServerApp` module to be loaded: For example: `server:app` or
|
|
307
439
|
`project.package.module:wrapper.app`."
|
|
308
440
|
|
|
441
|
+
server_app_run_config : Optional[UserConfig]
|
|
442
|
+
Config dictionary that parameterizes the run config. It will be made accesible
|
|
443
|
+
to the ServerApp.
|
|
444
|
+
|
|
309
445
|
app_dir : str
|
|
310
446
|
Add specified directory to the PYTHONPATH and load `ClientApp` from there.
|
|
311
447
|
(Default: current working directory.)
|
|
312
448
|
|
|
313
|
-
|
|
314
|
-
|
|
449
|
+
flwr_dir : Optional[str]
|
|
450
|
+
The path containing installed Flower Apps.
|
|
451
|
+
|
|
452
|
+
run : Optional[Run]
|
|
453
|
+
An object carrying details about the run.
|
|
315
454
|
|
|
316
455
|
enable_tf_gpu_growth : bool (default: False)
|
|
317
456
|
A boolean to indicate whether to enable GPU growth on the main thread. This is
|
|
@@ -324,6 +463,11 @@ def _run_simulation(
|
|
|
324
463
|
verbose_logging : bool (default: False)
|
|
325
464
|
When disabled, only INFO, WARNING and ERROR log messages will be shown. If
|
|
326
465
|
enabled, DEBUG-level logs will be displayed.
|
|
466
|
+
|
|
467
|
+
is_app : bool (default: False)
|
|
468
|
+
A flag that indicates whether the simulation is running an app or not. This is
|
|
469
|
+
needed in order to attempt loading an app's pyproject.toml when nodes register
|
|
470
|
+
a context object.
|
|
327
471
|
"""
|
|
328
472
|
if backend_config is None:
|
|
329
473
|
backend_config = {}
|
|
@@ -336,8 +480,12 @@ def _run_simulation(
|
|
|
336
480
|
if verbose_logging:
|
|
337
481
|
update_console_handler(level=DEBUG, timestamps=True, colored=True)
|
|
338
482
|
else:
|
|
339
|
-
backend_config["init_args"]["logging_level"] =
|
|
340
|
-
|
|
483
|
+
backend_config["init_args"]["logging_level"] = backend_config["init_args"].get(
|
|
484
|
+
"logging_level", WARNING
|
|
485
|
+
)
|
|
486
|
+
backend_config["init_args"]["log_to_driver"] = backend_config["init_args"].get(
|
|
487
|
+
"log_to_driver", True
|
|
488
|
+
)
|
|
341
489
|
|
|
342
490
|
if enable_tf_gpu_growth:
|
|
343
491
|
# Check that Backend config has also enabled using GPU growth
|
|
@@ -349,45 +497,47 @@ def _run_simulation(
|
|
|
349
497
|
# Convert config to original JSON-stream format
|
|
350
498
|
backend_config_stream = json.dumps(backend_config)
|
|
351
499
|
|
|
352
|
-
|
|
500
|
+
# If no `Run` object is set, create one
|
|
501
|
+
if run is None:
|
|
502
|
+
run_id = generate_rand_int_from_bytes(RUN_ID_NUM_BYTES)
|
|
503
|
+
run = Run(run_id=run_id, fab_id="", fab_version="", override_config={})
|
|
504
|
+
|
|
353
505
|
args = (
|
|
354
506
|
num_supernodes,
|
|
355
507
|
backend_name,
|
|
356
508
|
backend_config_stream,
|
|
357
509
|
app_dir,
|
|
510
|
+
is_app,
|
|
358
511
|
enable_tf_gpu_growth,
|
|
359
|
-
|
|
512
|
+
run,
|
|
513
|
+
flwr_dir,
|
|
360
514
|
client_app,
|
|
361
515
|
client_app_attr,
|
|
362
516
|
server_app,
|
|
363
517
|
server_app_attr,
|
|
518
|
+
server_app_run_config,
|
|
364
519
|
)
|
|
365
520
|
# Detect if there is an Asyncio event loop already running.
|
|
366
|
-
# If yes,
|
|
367
|
-
# like Jupyter/Colab notebooks,
|
|
368
|
-
|
|
521
|
+
# If yes, disable logger propagation. In environmnets
|
|
522
|
+
# like Jupyter/Colab notebooks, it's often better to do this.
|
|
523
|
+
asyncio_loop_running = False
|
|
369
524
|
try:
|
|
370
525
|
_ = (
|
|
371
526
|
asyncio.get_running_loop()
|
|
372
527
|
) # Raises RuntimeError if no event loop is present
|
|
373
528
|
log(DEBUG, "Asyncio event loop already running.")
|
|
374
529
|
|
|
375
|
-
|
|
530
|
+
asyncio_loop_running = True
|
|
376
531
|
|
|
377
532
|
except RuntimeError:
|
|
378
|
-
|
|
533
|
+
pass
|
|
379
534
|
|
|
380
535
|
finally:
|
|
381
|
-
if
|
|
536
|
+
if asyncio_loop_running:
|
|
382
537
|
# Set logger propagation to False to prevent duplicated log output in Colab.
|
|
383
538
|
logger = set_logger_propagation(logger, False)
|
|
384
|
-
|
|
385
|
-
|
|
386
|
-
simulation_engine_th.start()
|
|
387
|
-
simulation_engine_th.join()
|
|
388
|
-
else:
|
|
389
|
-
log(DEBUG, "Starting Simulation Engine on the main thread.")
|
|
390
|
-
_main_loop(*args)
|
|
539
|
+
|
|
540
|
+
_main_loop(*args)
|
|
391
541
|
|
|
392
542
|
|
|
393
543
|
def _parse_args_run_simulation() -> argparse.ArgumentParser:
|
|
@@ -397,12 +547,10 @@ def _parse_args_run_simulation() -> argparse.ArgumentParser:
|
|
|
397
547
|
)
|
|
398
548
|
parser.add_argument(
|
|
399
549
|
"--server-app",
|
|
400
|
-
required=True,
|
|
401
550
|
help="For example: `server:app` or `project.package.module:wrapper.app`",
|
|
402
551
|
)
|
|
403
552
|
parser.add_argument(
|
|
404
553
|
"--client-app",
|
|
405
|
-
required=True,
|
|
406
554
|
help="For example: `client:app` or `project.package.module:wrapper.app`",
|
|
407
555
|
)
|
|
408
556
|
parser.add_argument(
|
|
@@ -411,6 +559,18 @@ def _parse_args_run_simulation() -> argparse.ArgumentParser:
|
|
|
411
559
|
required=True,
|
|
412
560
|
help="Number of simulated SuperNodes.",
|
|
413
561
|
)
|
|
562
|
+
parser.add_argument(
|
|
563
|
+
"--app",
|
|
564
|
+
type=str,
|
|
565
|
+
default=None,
|
|
566
|
+
help="Path to a directory containing a FAB-like structure with a "
|
|
567
|
+
"pyproject.toml.",
|
|
568
|
+
)
|
|
569
|
+
parser.add_argument(
|
|
570
|
+
"--run-config",
|
|
571
|
+
default=None,
|
|
572
|
+
help="Override configuration key-value pairs.",
|
|
573
|
+
)
|
|
414
574
|
parser.add_argument(
|
|
415
575
|
"--backend",
|
|
416
576
|
default="ray",
|
|
@@ -449,6 +609,17 @@ def _parse_args_run_simulation() -> argparse.ArgumentParser:
|
|
|
449
609
|
"ClientApp and ServerApp from there."
|
|
450
610
|
" Default: current working directory.",
|
|
451
611
|
)
|
|
612
|
+
parser.add_argument(
|
|
613
|
+
"--flwr-dir",
|
|
614
|
+
default=None,
|
|
615
|
+
help="""The path containing installed Flower Apps.
|
|
616
|
+
By default, this value is equal to:
|
|
617
|
+
|
|
618
|
+
- `$FLWR_HOME/` if `$FLWR_HOME` is defined
|
|
619
|
+
- `$XDG_DATA_HOME/.flwr/` if `$XDG_DATA_HOME` is defined
|
|
620
|
+
- `$HOME/.flwr/` in all other cases
|
|
621
|
+
""",
|
|
622
|
+
)
|
|
452
623
|
parser.add_argument(
|
|
453
624
|
"--run-id",
|
|
454
625
|
type=int,
|
flwr/superexec/app.py
CHANGED
|
@@ -24,6 +24,7 @@ import grpc
|
|
|
24
24
|
|
|
25
25
|
from flwr.common import EventType, event, log
|
|
26
26
|
from flwr.common.address import parse_address
|
|
27
|
+
from flwr.common.config import parse_config_args
|
|
27
28
|
from flwr.common.constant import SUPEREXEC_DEFAULT_ADDRESS
|
|
28
29
|
from flwr.common.exit_handlers import register_exit_handlers
|
|
29
30
|
from flwr.common.object_ref import load_app, validate
|
|
@@ -55,6 +56,7 @@ def run_superexec() -> None:
|
|
|
55
56
|
address=address,
|
|
56
57
|
executor=_load_executor(args),
|
|
57
58
|
certificates=certificates,
|
|
59
|
+
config=parse_config_args([args.executor_config]),
|
|
58
60
|
)
|
|
59
61
|
|
|
60
62
|
grpc_servers = [superexec_server]
|
|
@@ -74,20 +76,25 @@ def _parse_args_run_superexec() -> argparse.ArgumentParser:
|
|
|
74
76
|
parser = argparse.ArgumentParser(
|
|
75
77
|
description="Start a Flower SuperExec",
|
|
76
78
|
)
|
|
77
|
-
parser.add_argument(
|
|
78
|
-
"executor",
|
|
79
|
-
help="For example: `deployment:exec` or `project.package.module:wrapper.exec`.",
|
|
80
|
-
)
|
|
81
79
|
parser.add_argument(
|
|
82
80
|
"--address",
|
|
83
81
|
help="SuperExec (gRPC) server address (IPv4, IPv6, or a domain name)",
|
|
84
82
|
default=SUPEREXEC_DEFAULT_ADDRESS,
|
|
85
83
|
)
|
|
84
|
+
parser.add_argument(
|
|
85
|
+
"--executor",
|
|
86
|
+
help="For example: `deployment:exec` or `project.package.module:wrapper.exec`.",
|
|
87
|
+
default="flwr.superexec.deployment:executor",
|
|
88
|
+
)
|
|
86
89
|
parser.add_argument(
|
|
87
90
|
"--executor-dir",
|
|
88
91
|
help="The directory for the executor.",
|
|
89
92
|
default=".",
|
|
90
93
|
)
|
|
94
|
+
parser.add_argument(
|
|
95
|
+
"--executor-config",
|
|
96
|
+
help="Key-value pairs for the executor config, separated by commas.",
|
|
97
|
+
)
|
|
91
98
|
parser.add_argument(
|
|
92
99
|
"--insecure",
|
|
93
100
|
action="store_true",
|
|
@@ -126,11 +133,11 @@ def _try_obtain_certificates(
|
|
|
126
133
|
return None
|
|
127
134
|
# Check if certificates are provided
|
|
128
135
|
if args.ssl_certfile and args.ssl_keyfile and args.ssl_ca_certfile:
|
|
129
|
-
if not Path
|
|
136
|
+
if not Path(args.ssl_ca_certfile).is_file():
|
|
130
137
|
sys.exit("Path argument `--ssl-ca-certfile` does not point to a file.")
|
|
131
|
-
if not Path
|
|
138
|
+
if not Path(args.ssl_certfile).is_file():
|
|
132
139
|
sys.exit("Path argument `--ssl-certfile` does not point to a file.")
|
|
133
|
-
if not Path
|
|
140
|
+
if not Path(args.ssl_keyfile).is_file():
|
|
134
141
|
sys.exit("Path argument `--ssl-keyfile` does not point to a file.")
|
|
135
142
|
certificates = (
|
|
136
143
|
Path(args.ssl_ca_certfile).read_bytes(), # CA certificate
|