flwr-nightly 1.8.0.dev20240303__py3-none-any.whl → 1.8.0.dev20240305__py3-none-any.whl

Sign up to get free protection for your applications and to get access to all the features.
@@ -219,16 +219,23 @@ async def run(
219
219
 
220
220
  # pylint: disable=too-many-arguments,unused-argument,too-many-locals
221
221
  def start_vce(
222
- client_app_module_name: str,
223
222
  backend_name: str,
224
223
  backend_config_json_stream: str,
225
- working_dir: str,
224
+ app_dir: str,
226
225
  f_stop: asyncio.Event,
226
+ client_app: Optional[ClientApp] = None,
227
+ client_app_attr: Optional[str] = None,
227
228
  num_supernodes: Optional[int] = None,
228
229
  state_factory: Optional[StateFactory] = None,
229
230
  existing_nodes_mapping: Optional[NodeToPartitionMapping] = None,
230
231
  ) -> None:
231
232
  """Start Fleet API with the Simulation Engine."""
233
+ if client_app_attr is not None and client_app is not None:
234
+ raise ValueError(
235
+ "Both `client_app_attr` and `client_app` are provided, "
236
+ "but only one is allowed."
237
+ )
238
+
232
239
  if num_supernodes is not None and existing_nodes_mapping is not None:
233
240
  raise ValueError(
234
241
  "Both `num_supernodes` and `existing_nodes_mapping` are provided, "
@@ -290,12 +297,17 @@ def start_vce(
290
297
 
291
298
  def backend_fn() -> Backend:
292
299
  """Instantiate a Backend."""
293
- return backend_type(backend_config, work_dir=working_dir)
300
+ return backend_type(backend_config, work_dir=app_dir)
294
301
 
295
- log(INFO, "client_app_module_name = %s", client_app_module_name)
302
+ log(INFO, "client_app_attr = %s", client_app_attr)
296
303
 
304
+ # Load ClientApp if needed
297
305
  def _load() -> ClientApp:
298
- app: ClientApp = load_client_app(client_app_module_name)
306
+
307
+ if client_app_attr:
308
+ app: ClientApp = load_client_app(client_app_attr)
309
+ if client_app:
310
+ app = client_app
299
311
  return app
300
312
 
301
313
  app_fn = _load
@@ -17,7 +17,7 @@
17
17
 
18
18
  import importlib
19
19
 
20
- from flwr.simulation.run_simulation import run_simulation
20
+ from flwr.simulation.run_simulation import run_simulation, run_simulation_from_cli
21
21
 
22
22
  is_ray_installed = importlib.util.find_spec("ray") is not None
23
23
 
@@ -36,7 +36,4 @@ To install the necessary dependencies, install `flwr` with the `simulation` extr
36
36
  raise ImportError(RAY_IMPORT_ERROR)
37
37
 
38
38
 
39
- __all__ = [
40
- "start_simulation",
41
- "run_simulation",
42
- ]
39
+ __all__ = ["start_simulation", "run_simulation_from_cli", "run_simulation"]
@@ -17,39 +17,153 @@
17
17
  import argparse
18
18
  import asyncio
19
19
  import json
20
+ import logging
20
21
  import threading
21
22
  import traceback
22
- from logging import ERROR, INFO, WARNING
23
+ from logging import DEBUG, ERROR, INFO, WARNING
23
24
  from time import sleep
24
- from typing import Any, Callable
25
+ from typing import Dict, Optional
25
26
 
26
27
  import grpc
27
28
 
29
+ from flwr.client import ClientApp
28
30
  from flwr.common import EventType, event, log
31
+ from flwr.common.typing import ConfigsRecordValues
29
32
  from flwr.server.driver.driver import Driver
30
33
  from flwr.server.run_serverapp import run
34
+ from flwr.server.server_app import ServerApp
31
35
  from flwr.server.superlink.driver.driver_grpc import run_driver_api_grpc
32
36
  from flwr.server.superlink.fleet import vce
33
37
  from flwr.server.superlink.state import StateFactory
34
- from flwr.simulation.ray_transport.utils import enable_tf_gpu_growth
38
+ from flwr.simulation.ray_transport.utils import (
39
+ enable_tf_gpu_growth as enable_gpu_growth,
40
+ )
35
41
 
36
42
 
43
+ # Entry point from CLI
44
+ def run_simulation_from_cli() -> None:
45
+ """Run Simulation Engine from the CLI."""
46
+ args = _parse_args_run_simulation().parse_args()
47
+
48
+ # Load JSON config
49
+ backend_config_dict = json.loads(args.backend_config)
50
+
51
+ _run_simulation(
52
+ server_app_attr=args.server_app,
53
+ client_app_attr=args.client_app,
54
+ num_supernodes=args.num_supernodes,
55
+ backend_name=args.backend,
56
+ backend_config=backend_config_dict,
57
+ app_dir=args.app_dir,
58
+ driver_api_address=args.driver_api_address,
59
+ enable_tf_gpu_growth=args.enable_tf_gpu_growth,
60
+ verbose_logging=args.verbose,
61
+ )
62
+
63
+
64
+ # Entry point from Python session (script or notebook)
65
+ # pylint: disable=too-many-arguments
66
+ def run_simulation(
67
+ server_app: ServerApp,
68
+ client_app: ClientApp,
69
+ num_supernodes: int,
70
+ backend_name: str = "ray",
71
+ backend_config: Optional[Dict[str, ConfigsRecordValues]] = None,
72
+ enable_tf_gpu_growth: bool = False,
73
+ verbose_logging: bool = False,
74
+ ) -> None:
75
+ r"""Run a Flower App using the Simulation Engine.
76
+
77
+ Parameters
78
+ ----------
79
+ server_app : ServerApp
80
+ The `ServerApp` to be executed. It will send messages to different `ClientApp`
81
+ instances running on different (virtual) SuperNodes.
82
+
83
+ client_app : ClientApp
84
+ The `ClientApp` to be executed by each of the SuperNodes. It will receive
85
+ messages sent by the `ServerApp`.
86
+
87
+ num_supernodes : int
88
+ Number of nodes that run a ClientApp. They can be sampled by a
89
+ Driver in the ServerApp and receive a Message describing what the ClientApp
90
+ should perform.
91
+
92
+ backend_name : str (default: ray)
93
+ A simulation backend that runs `ClientApp`s.
94
+
95
+ backend_config : Optional[Dict[str, ConfigsRecordValues]]
96
+ 'A dictionary, e.g {"<keyA>": <value>, "<keyB>": <value>} to configure a
97
+ backend. Values supported in <value> are those included by
98
+ `flwr.common.typing.ConfigsRecordValues`.
99
+
100
+ enable_tf_gpu_growth : bool (default: False)
101
+ A boolean to indicate whether to enable GPU growth on the main thread. This is
102
+ desirable if you make use of a TensorFlow model on your `ServerApp` while
103
+ having your `ClientApp` running on the same GPU. Without enabling this, you
104
+ might encounter an out-of-memory error because TensorFlow, by default, allocates
105
+ all GPU memory. Read more about how `tf.config.experimental.set_memory_growth()`
106
+ works in the TensorFlow documentation: https://www.tensorflow.org/api/stable.
107
+
108
+ verbose_logging : bool (default: False)
109
+ When diabled, only INFO, WARNING and ERROR log messages will be shown. If
110
+ enabled, DEBUG-level logs will be displayed.
111
+ """
112
+ _run_simulation(
113
+ num_supernodes=num_supernodes,
114
+ client_app=client_app,
115
+ server_app=server_app,
116
+ backend_name=backend_name,
117
+ backend_config=backend_config,
118
+ enable_tf_gpu_growth=enable_tf_gpu_growth,
119
+ verbose_logging=verbose_logging,
120
+ )
121
+
122
+
123
+ # pylint: disable=too-many-arguments
37
124
  def run_serverapp_th(
38
- server_app_attr: str,
125
+ server_app_attr: Optional[str],
126
+ server_app: Optional[ServerApp],
39
127
  driver: Driver,
40
- server_app_dir: str,
128
+ app_dir: str,
41
129
  f_stop: asyncio.Event,
130
+ enable_tf_gpu_growth: bool,
42
131
  delay_launch: int = 3,
43
132
  ) -> threading.Thread:
44
133
  """Run SeverApp in a thread."""
134
+
135
+ def server_th_with_start_checks( # type: ignore
136
+ tf_gpu_growth: bool, stop_event: asyncio.Event, **kwargs
137
+ ) -> None:
138
+ """Run SeverApp, after check if GPU memory grouwth has to be set.
139
+
140
+ Upon exception, trigger stop event for Simulation Engine.
141
+ """
142
+ try:
143
+ if tf_gpu_growth:
144
+ log(INFO, "Enabling GPU growth for Tensorflow on the main thread.")
145
+ enable_gpu_growth()
146
+
147
+ # Run ServerApp
148
+ run(**kwargs)
149
+ except Exception as ex: # pylint: disable=broad-exception-caught
150
+ log(ERROR, "ServerApp thread raised an exception: %s", ex)
151
+ log(ERROR, traceback.format_exc())
152
+ finally:
153
+ log(DEBUG, "ServerApp finished running.")
154
+ # Upon completion, trigger stop event if one was passed
155
+ if stop_event is not None:
156
+ stop_event.set()
157
+ log(WARNING, "Triggered stop event for Simulation Engine.")
158
+
45
159
  serverapp_th = threading.Thread(
46
- target=run,
160
+ target=server_th_with_start_checks,
161
+ args=(enable_tf_gpu_growth, f_stop),
47
162
  kwargs={
48
163
  "server_app_attr": server_app_attr,
164
+ "loaded_server_app": server_app,
49
165
  "driver": driver,
50
- "server_app_dir": server_app_dir,
51
- "stop_event": f_stop, # will be set when `run()` finishes
52
- # will trigger the shutdown of the Simulation Engine
166
+ "server_app_dir": app_dir,
53
167
  },
54
168
  )
55
169
  sleep(delay_launch)
@@ -57,52 +171,31 @@ def run_serverapp_th(
57
171
  return serverapp_th
58
172
 
59
173
 
60
- def get_thread_exception_hook(stop_event: asyncio.Event) -> Callable[[Any], None]:
61
- """Return a callback for when the ServerApp thread raises an exception."""
62
-
63
- def execepthook(args: Any) -> None:
64
- """Upon exception raised, log exception and trigger stop event."""
65
- # log
66
- log(
67
- ERROR,
68
- "The ServerApp thread triggered exception (%s): %s",
69
- args.exc_type,
70
- args.exc_value,
71
- )
72
- log(ERROR, traceback.format_exc())
73
- # Set stop event
74
- stop_event.set()
75
- log(WARNING, "Triggered stop event for Simulation Engine.")
76
-
77
- return execepthook
78
-
79
-
80
- def run_simulation() -> None:
81
- """Run Simulation Engine."""
82
- args = _parse_args_run_simulation().parse_args()
83
-
84
- # Load JSON config
85
- backend_config_dict = json.loads(args.backend_config)
86
-
87
- # Enable GPU memory growth (relevant only for TF)
88
- if args.enable_tf_gpu_growth:
89
- log(INFO, "Enabling GPU growth for Tensorflow on the main thread.")
90
- enable_tf_gpu_growth()
91
- # Check that Backend config has also enabled using GPU growth
92
- use_tf = backend_config_dict.get("tensorflow", False)
93
- if not use_tf:
94
- log(WARNING, "Enabling GPU growth for your backend.")
95
- backend_config_dict["tensorflow"] = True
96
-
97
- # Convert back to JSON stream
98
- backend_config = json.dumps(backend_config_dict)
174
+ # pylint: disable=too-many-locals
175
+ def _main_loop(
176
+ num_supernodes: int,
177
+ backend_name: str,
178
+ backend_config_stream: str,
179
+ driver_api_address: str,
180
+ app_dir: str,
181
+ enable_tf_gpu_growth: bool,
182
+ client_app: Optional[ClientApp] = None,
183
+ client_app_attr: Optional[str] = None,
184
+ server_app: Optional[ServerApp] = None,
185
+ server_app_attr: Optional[str] = None,
186
+ ) -> None:
187
+ """Launch SuperLink with Simulation Engine, then ServerApp on a separate thread.
99
188
 
189
+ Everything runs on the main thread or a separate one, depening on whether the main
190
+ thread already contains a running Asyncio event loop. This is the case if running
191
+ the Simulation Engine on a Jupyter/Colab notebook.
192
+ """
100
193
  # Initialize StateFactory
101
194
  state_factory = StateFactory(":flwr-in-memory-state:")
102
195
 
103
196
  # Start Driver API
104
197
  driver_server: grpc.Server = run_driver_api_grpc(
105
- address=args.driver_api_address,
198
+ address=driver_api_address,
106
199
  state_factory=state_factory,
107
200
  certificates=None,
108
201
  )
@@ -112,35 +205,39 @@ def run_simulation() -> None:
112
205
  try:
113
206
  # Initialize Driver
114
207
  driver = Driver(
115
- driver_service_address=args.driver_api_address,
208
+ driver_service_address=driver_api_address,
116
209
  root_certificates=None,
117
210
  )
118
211
 
119
212
  # Get and run ServerApp thread
120
- serverapp_th = run_serverapp_th(args.server_app, driver, args.dir, f_stop)
121
- # Setup an exception hook
122
- threading.excepthook = get_thread_exception_hook(f_stop)
213
+ serverapp_th = run_serverapp_th(
214
+ server_app_attr=server_app_attr,
215
+ server_app=server_app,
216
+ driver=driver,
217
+ app_dir=app_dir,
218
+ f_stop=f_stop,
219
+ enable_tf_gpu_growth=enable_tf_gpu_growth,
220
+ )
123
221
 
124
222
  # SuperLink with Simulation Engine
125
223
  event(EventType.RUN_SUPERLINK_ENTER)
126
224
  vce.start_vce(
127
- num_supernodes=args.num_supernodes,
128
- client_app_module_name=args.client_app,
129
- backend_name=args.backend,
130
- backend_config_json_stream=backend_config,
131
- working_dir=args.dir,
225
+ num_supernodes=num_supernodes,
226
+ client_app_attr=client_app_attr,
227
+ client_app=client_app,
228
+ backend_name=backend_name,
229
+ backend_config_json_stream=backend_config_stream,
230
+ app_dir=app_dir,
132
231
  state_factory=state_factory,
133
232
  f_stop=f_stop,
134
233
  )
135
234
 
136
235
  except Exception as ex:
137
-
138
- log(ERROR, "An exception occurred: %s", ex)
236
+ log(ERROR, "An exception occurred !! %s", ex)
139
237
  log(ERROR, traceback.format_exc())
140
238
  raise RuntimeError("An error was encountered. Ending simulation.") from ex
141
239
 
142
240
  finally:
143
-
144
241
  # Stop Driver
145
242
  driver_server.stop(grace=0)
146
243
  del driver
@@ -154,20 +251,148 @@ def run_simulation() -> None:
154
251
  log(INFO, "Stopping Simulation Engine now.")
155
252
 
156
253
 
254
+ # pylint: disable=too-many-arguments,too-many-locals
255
+ def _run_simulation(
256
+ num_supernodes: int,
257
+ client_app: Optional[ClientApp] = None,
258
+ server_app: Optional[ServerApp] = None,
259
+ backend_name: str = "ray",
260
+ backend_config: Optional[Dict[str, ConfigsRecordValues]] = None,
261
+ client_app_attr: Optional[str] = None,
262
+ server_app_attr: Optional[str] = None,
263
+ app_dir: str = "",
264
+ driver_api_address: str = "0.0.0.0:9091",
265
+ enable_tf_gpu_growth: bool = False,
266
+ verbose_logging: bool = False,
267
+ ) -> None:
268
+ r"""Launch the Simulation Engine.
269
+
270
+ Parameters
271
+ ----------
272
+ num_supernodes : int
273
+ Number of nodes that run a ClientApp. They can be sampled by a
274
+ Driver in the ServerApp and receive a Message describing what the ClientApp
275
+ should perform.
276
+
277
+ client_app : Optional[ClientApp]
278
+ The `ClientApp` to be executed by each of the `SuperNodes`. It will receive
279
+ messages sent by the `ServerApp`.
280
+
281
+ server_app : Optional[ServerApp]
282
+ The `ServerApp` to be executed.
283
+
284
+ backend_name : str (default: ray)
285
+ A simulation backend that runs `ClientApp`s.
286
+
287
+ backend_config : Optional[Dict[str, ConfigsRecordValues]]
288
+ 'A dictionary, e.g {"<keyA>":<value>, "<keyB>":<value>} to configure a
289
+ backend. Values supported in <value> are those included by
290
+ `flwr.common.typing.ConfigsRecordValues`.
291
+
292
+ client_app_attr : str
293
+ A path to a `ClientApp` module to be loaded: For example: `client:app` or
294
+ `project.package.module:wrapper.app`."
295
+
296
+ server_app_attr : str
297
+ A path to a `ServerApp` module to be loaded: For example: `server:app` or
298
+ `project.package.module:wrapper.app`."
299
+
300
+ app_dir : str
301
+ Add specified directory to the PYTHONPATH and load `ClientApp` from there.
302
+ (Default: current working directory.)
303
+
304
+ driver_api_address : str (default: "0.0.0.0:9091")
305
+ Driver API (gRPC) server address (IPv4, IPv6, or a domain name)
306
+
307
+ enable_tf_gpu_growth : bool (default: False)
308
+ A boolean to indicate whether to enable GPU growth on the main thread. This is
309
+ desirable if you make use of a TensorFlow model on your `ServerApp` while
310
+ having your `ClientApp` running on the same GPU. Without enabling this, you
311
+ might encounter an out-of-memory error becasue TensorFlow by default allocates
312
+ all GPU memory. Read mor about how `tf.config.experimental.set_memory_growth()`
313
+ works in the TensorFlow documentation: https://www.tensorflow.org/api/stable.
314
+
315
+ verbose_logging : bool (default: False)
316
+ When diabled, only INFO, WARNING and ERROR log messages will be shown. If
317
+ enabled, DEBUG-level logs will be displayed.
318
+ """
319
+ # Set logging level
320
+ if not verbose_logging:
321
+ logger = logging.getLogger("flwr")
322
+ logger.setLevel(INFO)
323
+
324
+ if backend_config is None:
325
+ backend_config = {}
326
+
327
+ if enable_tf_gpu_growth:
328
+ # Check that Backend config has also enabled using GPU growth
329
+ use_tf = backend_config.get("tensorflow", False)
330
+ if not use_tf:
331
+ log(WARNING, "Enabling GPU growth for your backend.")
332
+ backend_config["tensorflow"] = True
333
+
334
+ # Convert config to original JSON-stream format
335
+ backend_config_stream = json.dumps(backend_config)
336
+
337
+ simulation_engine_th = None
338
+ args = (
339
+ num_supernodes,
340
+ backend_name,
341
+ backend_config_stream,
342
+ driver_api_address,
343
+ app_dir,
344
+ enable_tf_gpu_growth,
345
+ client_app,
346
+ client_app_attr,
347
+ server_app,
348
+ server_app_attr,
349
+ )
350
+ # Detect if there is an Asyncio event loop already running.
351
+ # If yes, run everything on a separate thread. In environmnets
352
+ # like Jupyter/Colab notebooks, there is an event loop present.
353
+ run_in_thread = False
354
+ try:
355
+ _ = (
356
+ asyncio.get_running_loop()
357
+ ) # Raises RuntimeError if no event loop is present
358
+ log(DEBUG, "Asyncio event loop already running.")
359
+
360
+ run_in_thread = True
361
+
362
+ except RuntimeError:
363
+ log(DEBUG, "No asyncio event loop runnig")
364
+
365
+ finally:
366
+ if run_in_thread:
367
+ log(DEBUG, "Starting Simulation Engine on a new thread.")
368
+ simulation_engine_th = threading.Thread(target=_main_loop, args=args)
369
+ simulation_engine_th.start()
370
+ simulation_engine_th.join()
371
+ else:
372
+ log(DEBUG, "Starting Simulation Engine on the main thread.")
373
+ _main_loop(*args)
374
+
375
+
157
376
  def _parse_args_run_simulation() -> argparse.ArgumentParser:
158
377
  """Parse flower-simulation command line arguments."""
159
378
  parser = argparse.ArgumentParser(
160
379
  description="Start a Flower simulation",
161
380
  )
381
+ parser.add_argument(
382
+ "--server-app",
383
+ required=True,
384
+ help="For example: `server:app` or `project.package.module:wrapper.app`",
385
+ )
162
386
  parser.add_argument(
163
387
  "--client-app",
164
388
  required=True,
165
389
  help="For example: `client:app` or `project.package.module:wrapper.app`",
166
390
  )
167
391
  parser.add_argument(
168
- "--server-app",
392
+ "--num-supernodes",
393
+ type=int,
169
394
  required=True,
170
- help="For example: `server:app` or `project.package.module:wrapper.app`",
395
+ help="Number of simulated SuperNodes.",
171
396
  )
172
397
  parser.add_argument(
173
398
  "--driver-api-address",
@@ -175,18 +400,20 @@ def _parse_args_run_simulation() -> argparse.ArgumentParser:
175
400
  type=str,
176
401
  help="For example: `server:app` or `project.package.module:wrapper.app`",
177
402
  )
178
- parser.add_argument(
179
- "--num-supernodes",
180
- type=int,
181
- required=True,
182
- help="Number of simulated SuperNodes.",
183
- )
184
403
  parser.add_argument(
185
404
  "--backend",
186
405
  default="ray",
187
406
  type=str,
188
407
  help="Simulation backend that executes the ClientApp.",
189
408
  )
409
+ parser.add_argument(
410
+ "--backend-config",
411
+ type=str,
412
+ default='{"client_resources": {"num_cpus":2, "num_gpus":0.0}, "tensorflow": 0}',
413
+ help='A JSON formatted stream, e.g \'{"<keyA>":<value>, "<keyB>":<value>}\' to '
414
+ "configure a backend. Values supported in <value> are those included by "
415
+ "`flwr.common.typing.ConfigsRecordValues`. ",
416
+ )
190
417
  parser.add_argument(
191
418
  "--enable-tf-gpu-growth",
192
419
  action="store_true",
@@ -198,15 +425,13 @@ def _parse_args_run_simulation() -> argparse.ArgumentParser:
198
425
  "the TensorFlow documentation: https://www.tensorflow.org/api/stable.",
199
426
  )
200
427
  parser.add_argument(
201
- "--backend-config",
202
- type=str,
203
- default='{"client_resources": {"num_cpus":2, "num_gpus":0.0}, "tensorflow": 0}',
204
- help='A JSON formatted stream, e.g \'{"<keyA>":<value>, "<keyB>":<value>}\' to '
205
- "configure a backend. Values supported in <value> are those included by "
206
- "`flwr.common.typing.ConfigsRecordValues`. ",
428
+ "--verbose",
429
+ action="store_true",
430
+ help="When unset, only INFO, WARNING and ERROR log messages will be shown. "
431
+ "If set, DEBUG-level logs will be displayed. ",
207
432
  )
208
433
  parser.add_argument(
209
- "--dir",
434
+ "--app-dir",
210
435
  default="",
211
436
  help="Add specified directory to the PYTHONPATH and load"
212
437
  "ClientApp and ServerApp from there."
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: flwr-nightly
3
- Version: 1.8.0.dev20240303
3
+ Version: 1.8.0.dev20240305
4
4
  Summary: Flower: A Friendly Federated Learning Framework
5
5
  Home-page: https://flower.ai
6
6
  License: Apache-2.0