flwr-nightly 1.11.0.dev20240822__py3-none-any.whl → 1.11.1.dev20240912__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of flwr-nightly might be problematic. Click here for more details.

Files changed (75) hide show
  1. flwr/cli/app.py +0 -2
  2. flwr/cli/build.py +1 -1
  3. flwr/cli/new/new.py +41 -40
  4. flwr/cli/new/templates/app/LICENSE.tpl +202 -0
  5. flwr/cli/new/templates/app/README.baseline.md.tpl +127 -0
  6. flwr/cli/new/templates/app/README.flowertune.md.tpl +16 -6
  7. flwr/cli/new/templates/app/README.md.tpl +7 -30
  8. flwr/cli/new/templates/app/code/__init__.baseline.py.tpl +1 -0
  9. flwr/cli/new/templates/app/code/client.baseline.py.tpl +58 -0
  10. flwr/cli/new/templates/app/code/client.huggingface.py.tpl +19 -29
  11. flwr/cli/new/templates/app/code/client.tensorflow.py.tpl +0 -3
  12. flwr/cli/new/templates/app/code/dataset.baseline.py.tpl +36 -0
  13. flwr/cli/new/templates/app/code/flwr_tune/{client.py.tpl → client_app.py.tpl} +50 -40
  14. flwr/cli/new/templates/app/code/flwr_tune/dataset.py.tpl +32 -2
  15. flwr/cli/new/templates/app/code/flwr_tune/models.py.tpl +0 -3
  16. flwr/cli/new/templates/app/code/flwr_tune/server_app.py.tpl +95 -0
  17. flwr/cli/new/templates/app/code/flwr_tune/strategy.py.tpl +83 -0
  18. flwr/cli/new/templates/app/code/model.baseline.py.tpl +80 -0
  19. flwr/cli/new/templates/app/code/server.baseline.py.tpl +46 -0
  20. flwr/cli/new/templates/app/code/server.huggingface.py.tpl +18 -3
  21. flwr/cli/new/templates/app/code/strategy.baseline.py.tpl +1 -0
  22. flwr/cli/new/templates/app/code/task.huggingface.py.tpl +16 -13
  23. flwr/cli/new/templates/app/code/utils.baseline.py.tpl +1 -0
  24. flwr/cli/new/templates/app/pyproject.baseline.toml.tpl +138 -0
  25. flwr/cli/new/templates/app/pyproject.flowertune.toml.tpl +34 -7
  26. flwr/cli/new/templates/app/pyproject.huggingface.toml.tpl +9 -1
  27. flwr/cli/run/run.py +12 -2
  28. flwr/client/__init__.py +0 -4
  29. flwr/client/app.py +3 -4
  30. flwr/client/client.py +22 -1
  31. flwr/client/client_app.py +2 -2
  32. flwr/client/grpc_rere_client/client_interceptor.py +15 -7
  33. flwr/client/numpy_client.py +22 -1
  34. flwr/client/rest_client/connection.py +1 -1
  35. flwr/client/supernode/app.py +8 -7
  36. flwr/common/address.py +43 -0
  37. flwr/common/config.py +14 -11
  38. flwr/common/constant.py +12 -1
  39. flwr/common/record/recordset.py +1 -1
  40. flwr/common/record/typeddict.py +24 -1
  41. flwr/common/telemetry.py +36 -30
  42. flwr/server/__init__.py +0 -4
  43. flwr/server/app.py +27 -22
  44. flwr/server/compat/app.py +0 -5
  45. flwr/server/driver/grpc_driver.py +3 -6
  46. flwr/server/run_serverapp.py +20 -7
  47. flwr/server/superlink/fleet/grpc_adapter/grpc_adapter_servicer.py +15 -2
  48. flwr/server/superlink/fleet/grpc_bidi/grpc_server.py +5 -0
  49. flwr/server/superlink/fleet/grpc_rere/fleet_servicer.py +19 -8
  50. flwr/server/superlink/fleet/grpc_rere/server_interceptor.py +13 -12
  51. flwr/server/superlink/fleet/rest_rere/rest_api.py +71 -122
  52. flwr/server/superlink/fleet/vce/backend/backend.py +1 -2
  53. flwr/server/superlink/fleet/vce/backend/raybackend.py +33 -15
  54. flwr/server/superlink/fleet/vce/vce_api.py +2 -6
  55. flwr/server/superlink/state/in_memory_state.py +15 -15
  56. flwr/server/superlink/state/sqlite_state.py +10 -10
  57. flwr/server/superlink/state/state.py +8 -8
  58. flwr/server/workflow/secure_aggregation/secagg_workflow.py +1 -0
  59. flwr/server/workflow/secure_aggregation/secaggplus_workflow.py +1 -0
  60. flwr/simulation/ray_transport/ray_actor.py +2 -2
  61. flwr/simulation/run_simulation.py +85 -25
  62. flwr/superexec/__init__.py +0 -6
  63. flwr/superexec/app.py +5 -3
  64. flwr/superexec/deployment.py +2 -2
  65. flwr/superexec/simulation.py +20 -1
  66. {flwr_nightly-1.11.0.dev20240822.dist-info → flwr_nightly-1.11.1.dev20240912.dist-info}/METADATA +3 -3
  67. {flwr_nightly-1.11.0.dev20240822.dist-info → flwr_nightly-1.11.1.dev20240912.dist-info}/RECORD +70 -62
  68. flwr_nightly-1.11.1.dev20240912.dist-info/entry_points.txt +10 -0
  69. flwr/cli/new/templates/app/code/flwr_tune/app.py.tpl +0 -89
  70. flwr/cli/new/templates/app/code/flwr_tune/config.yaml.tpl +0 -34
  71. flwr/cli/new/templates/app/code/flwr_tune/server.py.tpl +0 -48
  72. flwr/cli/new/templates/app/code/flwr_tune/static_config.yaml.tpl +0 -11
  73. flwr_nightly-1.11.0.dev20240822.dist-info/entry_points.txt +0 -10
  74. {flwr_nightly-1.11.0.dev20240822.dist-info → flwr_nightly-1.11.1.dev20240912.dist-info}/LICENSE +0 -0
  75. {flwr_nightly-1.11.0.dev20240822.dist-info → flwr_nightly-1.11.1.dev20240912.dist-info}/WHEEL +0 -0
@@ -45,7 +45,7 @@ class InMemoryState(State): # pylint: disable=R0902,R0904
45
45
  self.task_ins_store: Dict[UUID, TaskIns] = {}
46
46
  self.task_res_store: Dict[UUID, TaskRes] = {}
47
47
 
48
- self.client_public_keys: Set[bytes] = set()
48
+ self.node_public_keys: Set[bytes] = set()
49
49
  self.server_public_key: Optional[bytes] = None
50
50
  self.server_private_key: Optional[bytes] = None
51
51
 
@@ -237,7 +237,7 @@ class InMemoryState(State): # pylint: disable=R0902,R0904
237
237
  return node_id
238
238
 
239
239
  def delete_node(self, node_id: int, public_key: Optional[bytes] = None) -> None:
240
- """Delete a client node."""
240
+ """Delete a node."""
241
241
  with self.lock:
242
242
  if node_id not in self.node_ids:
243
243
  raise ValueError(f"Node {node_id} not found")
@@ -254,7 +254,7 @@ class InMemoryState(State): # pylint: disable=R0902,R0904
254
254
  del self.node_ids[node_id]
255
255
 
256
256
  def get_nodes(self, run_id: int) -> Set[int]:
257
- """Return all available client nodes.
257
+ """Return all available nodes.
258
258
 
259
259
  Constraints
260
260
  -----------
@@ -271,9 +271,9 @@ class InMemoryState(State): # pylint: disable=R0902,R0904
271
271
  if online_until > current_time
272
272
  }
273
273
 
274
- def get_node_id(self, client_public_key: bytes) -> Optional[int]:
275
- """Retrieve stored `node_id` filtered by `client_public_keys`."""
276
- return self.public_key_to_node_id.get(client_public_key)
274
+ def get_node_id(self, node_public_key: bytes) -> Optional[int]:
275
+ """Retrieve stored `node_id` filtered by `node_public_keys`."""
276
+ return self.public_key_to_node_id.get(node_public_key)
277
277
 
278
278
  def create_run(
279
279
  self,
@@ -318,19 +318,19 @@ class InMemoryState(State): # pylint: disable=R0902,R0904
318
318
  """Retrieve `server_public_key` in urlsafe bytes."""
319
319
  return self.server_public_key
320
320
 
321
- def store_client_public_keys(self, public_keys: Set[bytes]) -> None:
322
- """Store a set of `client_public_keys` in state."""
321
+ def store_node_public_keys(self, public_keys: Set[bytes]) -> None:
322
+ """Store a set of `node_public_keys` in state."""
323
323
  with self.lock:
324
- self.client_public_keys = public_keys
324
+ self.node_public_keys = public_keys
325
325
 
326
- def store_client_public_key(self, public_key: bytes) -> None:
327
- """Store a `client_public_key` in state."""
326
+ def store_node_public_key(self, public_key: bytes) -> None:
327
+ """Store a `node_public_key` in state."""
328
328
  with self.lock:
329
- self.client_public_keys.add(public_key)
329
+ self.node_public_keys.add(public_key)
330
330
 
331
- def get_client_public_keys(self) -> Set[bytes]:
332
- """Retrieve all currently stored `client_public_keys` as a set."""
333
- return self.client_public_keys
331
+ def get_node_public_keys(self) -> Set[bytes]:
332
+ """Retrieve all currently stored `node_public_keys` as a set."""
333
+ return self.node_public_keys
334
334
 
335
335
  def get_run(self, run_id: int) -> Optional[Run]:
336
336
  """Retrieve information about the run with the specified `run_id`."""
@@ -569,7 +569,7 @@ class SqliteState(State): # pylint: disable=R0904
569
569
  return node_id
570
570
 
571
571
  def delete_node(self, node_id: int, public_key: Optional[bytes] = None) -> None:
572
- """Delete a client node."""
572
+ """Delete a node."""
573
573
  query = "DELETE FROM node WHERE node_id = ?"
574
574
  params = (node_id,)
575
575
 
@@ -607,10 +607,10 @@ class SqliteState(State): # pylint: disable=R0904
607
607
  result: Set[int] = {row["node_id"] for row in rows}
608
608
  return result
609
609
 
610
- def get_node_id(self, client_public_key: bytes) -> Optional[int]:
611
- """Retrieve stored `node_id` filtered by `client_public_keys`."""
610
+ def get_node_id(self, node_public_key: bytes) -> Optional[int]:
611
+ """Retrieve stored `node_id` filtered by `node_public_keys`."""
612
612
  query = "SELECT node_id FROM node WHERE public_key = :public_key;"
613
- row = self.query(query, {"public_key": client_public_key})
613
+ row = self.query(query, {"public_key": node_public_key})
614
614
  if len(row) > 0:
615
615
  node_id: int = row[0]["node_id"]
616
616
  return node_id
@@ -684,19 +684,19 @@ class SqliteState(State): # pylint: disable=R0904
684
684
  public_key = None
685
685
  return public_key
686
686
 
687
- def store_client_public_keys(self, public_keys: Set[bytes]) -> None:
688
- """Store a set of `client_public_keys` in state."""
687
+ def store_node_public_keys(self, public_keys: Set[bytes]) -> None:
688
+ """Store a set of `node_public_keys` in state."""
689
689
  query = "INSERT INTO public_key (public_key) VALUES (?)"
690
690
  data = [(key,) for key in public_keys]
691
691
  self.query(query, data)
692
692
 
693
- def store_client_public_key(self, public_key: bytes) -> None:
694
- """Store a `client_public_key` in state."""
693
+ def store_node_public_key(self, public_key: bytes) -> None:
694
+ """Store a `node_public_key` in state."""
695
695
  query = "INSERT INTO public_key (public_key) VALUES (:public_key)"
696
696
  self.query(query, {"public_key": public_key})
697
697
 
698
- def get_client_public_keys(self) -> Set[bytes]:
699
- """Retrieve all currently stored `client_public_keys` as a set."""
698
+ def get_node_public_keys(self) -> Set[bytes]:
699
+ """Retrieve all currently stored `node_public_keys` as a set."""
700
700
  query = "SELECT public_key FROM public_key"
701
701
  rows = self.query(query)
702
702
  result: Set[bytes] = {row["public_key"] for row in rows}
@@ -153,8 +153,8 @@ class State(abc.ABC): # pylint: disable=R0904
153
153
  """
154
154
 
155
155
  @abc.abstractmethod
156
- def get_node_id(self, client_public_key: bytes) -> Optional[int]:
157
- """Retrieve stored `node_id` filtered by `client_public_keys`."""
156
+ def get_node_id(self, node_public_key: bytes) -> Optional[int]:
157
+ """Retrieve stored `node_id` filtered by `node_public_keys`."""
158
158
 
159
159
  @abc.abstractmethod
160
160
  def create_run(
@@ -199,16 +199,16 @@ class State(abc.ABC): # pylint: disable=R0904
199
199
  """Retrieve `server_public_key` in urlsafe bytes."""
200
200
 
201
201
  @abc.abstractmethod
202
- def store_client_public_keys(self, public_keys: Set[bytes]) -> None:
203
- """Store a set of `client_public_keys` in state."""
202
+ def store_node_public_keys(self, public_keys: Set[bytes]) -> None:
203
+ """Store a set of `node_public_keys` in state."""
204
204
 
205
205
  @abc.abstractmethod
206
- def store_client_public_key(self, public_key: bytes) -> None:
207
- """Store a `client_public_key` in state."""
206
+ def store_node_public_key(self, public_key: bytes) -> None:
207
+ """Store a `node_public_key` in state."""
208
208
 
209
209
  @abc.abstractmethod
210
- def get_client_public_keys(self) -> Set[bytes]:
211
- """Retrieve all currently stored `client_public_keys` as a set."""
210
+ def get_node_public_keys(self) -> Set[bytes]:
211
+ """Retrieve all currently stored `node_public_keys` as a set."""
212
212
 
213
213
  @abc.abstractmethod
214
214
  def acknowledge_ping(self, node_id: int, ping_interval: float) -> bool:
@@ -35,6 +35,7 @@ class SecAggWorkflow(SecAggPlusWorkflow):
35
35
  contributions to compute the weighted average of model parameters.
36
36
 
37
37
  The protocol involves four main stages:
38
+
38
39
  - 'setup': Send SecAgg configuration to clients and collect their public keys.
39
40
  - 'share keys': Broadcast public keys among clients and collect encrypted secret
40
41
  key shares.
@@ -99,6 +99,7 @@ class SecAggPlusWorkflow:
99
99
  contributions to compute the weighted average of model parameters.
100
100
 
101
101
  The protocol involves four main stages:
102
+
102
103
  - 'setup': Send SecAgg+ configuration to clients and collect their public keys.
103
104
  - 'share keys': Broadcast public keys among clients and collect encrypted secret
104
105
  key shares.
@@ -124,14 +124,14 @@ def pool_size_from_resources(client_resources: Dict[str, Union[int, float]]) ->
124
124
  WARNING,
125
125
  "The ActorPool is empty. The system (CPUs=%s, GPUs=%s) "
126
126
  "does not meet the criteria to host at least one client with resources:"
127
- " %s. Lowering the `client_resources` could help.",
127
+ " %s. Lowering these resources could help.",
128
128
  num_cpus,
129
129
  num_gpus,
130
130
  client_resources,
131
131
  )
132
132
  raise ValueError(
133
133
  "ActorPool is empty. Stopping Simulation. "
134
- "Check 'client_resources' passed to `start_simulation`"
134
+ "Check `num_cpus` and/or `num_gpus` passed to the simulation engine"
135
135
  )
136
136
 
137
137
  return total_num_actors
@@ -25,7 +25,7 @@ from argparse import Namespace
25
25
  from logging import DEBUG, ERROR, INFO, WARNING
26
26
  from pathlib import Path
27
27
  from time import sleep
28
- from typing import List, Optional
28
+ from typing import Any, List, Optional
29
29
 
30
30
  from flwr.cli.config_utils import load_and_validate
31
31
  from flwr.client import ClientApp
@@ -35,6 +35,7 @@ from flwr.common.constant import RUN_ID_NUM_BYTES
35
35
  from flwr.common.logger import (
36
36
  set_logger_propagation,
37
37
  update_console_handler,
38
+ warn_deprecated_feature,
38
39
  warn_deprecated_feature_with_example,
39
40
  )
40
41
  from flwr.common.typing import Run, UserConfig
@@ -91,12 +92,41 @@ def _check_args_do_not_interfere(args: Namespace) -> bool:
91
92
  return True
92
93
 
93
94
 
95
+ def _replace_keys(d: Any, match: str, target: str) -> Any:
96
+ if isinstance(d, dict):
97
+ return {
98
+ k.replace(match, target): _replace_keys(v, match, target)
99
+ for k, v in d.items()
100
+ }
101
+ if isinstance(d, list):
102
+ return [_replace_keys(i, match, target) for i in d]
103
+ return d
104
+
105
+
94
106
  # Entry point from CLI
95
107
  # pylint: disable=too-many-locals
96
108
  def run_simulation_from_cli() -> None:
97
109
  """Run Simulation Engine from the CLI."""
98
110
  args = _parse_args_run_simulation().parse_args()
99
111
 
112
+ event(
113
+ EventType.CLI_FLOWER_SIMULATION_ENTER,
114
+ event_details={"backend": args.backend, "num-supernodes": args.num_supernodes},
115
+ )
116
+
117
+ # Add warnings for deprecated server_app and client_app arguments
118
+ if args.server_app:
119
+ warn_deprecated_feature(
120
+ "The `--server-app` argument is deprecated. "
121
+ "Please use the `--app` argument instead."
122
+ )
123
+
124
+ if args.client_app:
125
+ warn_deprecated_feature(
126
+ "The `--client-app` argument is deprecated. "
127
+ "Use the `--app` argument instead."
128
+ )
129
+
100
130
  if args.enable_tf_gpu_growth:
101
131
  warn_deprecated_feature_with_example(
102
132
  "Passing `--enable-tf-gpu-growth` is deprecated.",
@@ -105,6 +135,14 @@ def run_simulation_from_cli() -> None:
105
135
  code_example='TF_FORCE_GPU_ALLOW_GROWTH="true" flower-simulation <...>',
106
136
  )
107
137
 
138
+ # Load JSON config
139
+ backend_config_dict = json.loads(args.backend_config)
140
+
141
+ if backend_config_dict:
142
+ # Backend config internally operates with `_` not with `-`
143
+ backend_config_dict = _replace_keys(backend_config_dict, match="-", target="_")
144
+ log(DEBUG, "backend_config_dict: %s", backend_config_dict)
145
+
108
146
  # We are supporting two modes for the CLI entrypoint:
109
147
  # 1) Running an app dir containing a `pyproject.toml`
110
148
  # 2) Running any ClientApp and SeverApp w/o pyproject.toml being present
@@ -144,7 +182,9 @@ def run_simulation_from_cli() -> None:
144
182
  client_app_attr = app_components["clientapp"]
145
183
  server_app_attr = app_components["serverapp"]
146
184
 
147
- override_config = parse_config_args([args.run_config])
185
+ override_config = parse_config_args(
186
+ [args.run_config] if args.run_config else args.run_config
187
+ )
148
188
  fused_config = get_fused_config_from_dir(app_path, override_config)
149
189
  app_dir = args.app
150
190
  is_app = True
@@ -167,9 +207,6 @@ def run_simulation_from_cli() -> None:
167
207
  override_config=override_config,
168
208
  )
169
209
 
170
- # Load JSON config
171
- backend_config_dict = json.loads(args.backend_config)
172
-
173
210
  _run_simulation(
174
211
  server_app_attr=server_app_attr,
175
212
  client_app_attr=client_app_attr,
@@ -179,9 +216,11 @@ def run_simulation_from_cli() -> None:
179
216
  app_dir=app_dir,
180
217
  run=run,
181
218
  enable_tf_gpu_growth=args.enable_tf_gpu_growth,
219
+ delay_start=args.delay_start,
182
220
  verbose_logging=args.verbose,
183
221
  server_app_run_config=fused_config,
184
222
  is_app=is_app,
223
+ exit_event=EventType.CLI_FLOWER_SIMULATION_LEAVE,
185
224
  )
186
225
 
187
226
 
@@ -209,9 +248,8 @@ def run_simulation(
209
248
  messages sent by the `ServerApp`.
210
249
 
211
250
  num_supernodes : int
212
- Number of nodes that run a ClientApp. They can be sampled by a
213
- Driver in the ServerApp and receive a Message describing what the ClientApp
214
- should perform.
251
+ Number of nodes that run a ClientApp. They can be sampled by a Driver in the
252
+ ServerApp and receive a Message describing what the ClientApp should perform.
215
253
 
216
254
  backend_name : str (default: ray)
217
255
  A simulation backend that runs `ClientApp`s.
@@ -236,10 +274,15 @@ def run_simulation(
236
274
  When disabled, only INFO, WARNING and ERROR log messages will be shown. If
237
275
  enabled, DEBUG-level logs will be displayed.
238
276
  """
277
+ event(
278
+ EventType.PYTHON_API_RUN_SIMULATION_ENTER,
279
+ event_details={"backend": backend_name, "num-supernodes": num_supernodes},
280
+ )
281
+
239
282
  if enable_tf_gpu_growth:
240
283
  warn_deprecated_feature_with_example(
241
284
  "Passing `enable_tf_gpu_growth=True` is deprecated.",
242
- example_message="Instead, set the `TF_FORCE_GPU_ALLOW_GROWTH` environmnet "
285
+ example_message="Instead, set the `TF_FORCE_GPU_ALLOW_GROWTH` environment "
243
286
  "variable to true.",
244
287
  code_example='import os;os.environ["TF_FORCE_GPU_ALLOW_GROWTH"]="true"'
245
288
  "\n\tflwr.simulation.run_simulationt(...)",
@@ -253,6 +296,7 @@ def run_simulation(
253
296
  backend_config=backend_config,
254
297
  enable_tf_gpu_growth=enable_tf_gpu_growth,
255
298
  verbose_logging=verbose_logging,
299
+ exit_event=EventType.PYTHON_API_RUN_SIMULATION_LEAVE,
256
300
  )
257
301
 
258
302
 
@@ -266,7 +310,6 @@ def run_serverapp_th(
266
310
  f_stop: threading.Event,
267
311
  has_exception: threading.Event,
268
312
  enable_tf_gpu_growth: bool,
269
- delay_launch: int = 3,
270
313
  ) -> threading.Thread:
271
314
  """Run SeverApp in a thread."""
272
315
 
@@ -322,7 +365,6 @@ def run_serverapp_th(
322
365
  server_app,
323
366
  ),
324
367
  )
325
- sleep(delay_launch)
326
368
  serverapp_th.start()
327
369
  return serverapp_th
328
370
 
@@ -336,6 +378,8 @@ def _main_loop(
336
378
  is_app: bool,
337
379
  enable_tf_gpu_growth: bool,
338
380
  run: Run,
381
+ exit_event: EventType,
382
+ delay_start: int,
339
383
  flwr_dir: Optional[str] = None,
340
384
  client_app: Optional[ClientApp] = None,
341
385
  client_app_attr: Optional[str] = None,
@@ -343,7 +387,7 @@ def _main_loop(
343
387
  server_app_attr: Optional[str] = None,
344
388
  server_app_run_config: Optional[UserConfig] = None,
345
389
  ) -> None:
346
- """Launch SuperLink with Simulation Engine, then ServerApp on a separate thread."""
390
+ """Start ServerApp on a separate thread, then launch Simulation Engine."""
347
391
  # Initialize StateFactory
348
392
  state_factory = StateFactory(":flwr-in-memory-state:")
349
393
 
@@ -351,6 +395,7 @@ def _main_loop(
351
395
  # A Threading event to indicate if an exception was raised in the ServerApp thread
352
396
  server_app_thread_has_exception = threading.Event()
353
397
  serverapp_th = None
398
+ success = True
354
399
  try:
355
400
  # Register run
356
401
  log(DEBUG, "Pre-registering run with id %s", run.run_id)
@@ -374,8 +419,10 @@ def _main_loop(
374
419
  enable_tf_gpu_growth=enable_tf_gpu_growth,
375
420
  )
376
421
 
377
- # SuperLink with Simulation Engine
378
- event(EventType.RUN_SUPERLINK_ENTER)
422
+ # Buffer time so the `ServerApp` in separate thread is ready
423
+ log(DEBUG, "Buffer time delay: %ds", delay_start)
424
+ sleep(delay_start)
425
+ # Start Simulation Engine
379
426
  vce.start_vce(
380
427
  num_supernodes=num_supernodes,
381
428
  client_app_attr=client_app_attr,
@@ -393,13 +440,13 @@ def _main_loop(
393
440
  except Exception as ex:
394
441
  log(ERROR, "An exception occurred !! %s", ex)
395
442
  log(ERROR, traceback.format_exc())
443
+ success = False
396
444
  raise RuntimeError("An error was encountered. Ending simulation.") from ex
397
445
 
398
446
  finally:
399
447
  # Trigger stop event
400
448
  f_stop.set()
401
-
402
- event(EventType.RUN_SUPERLINK_LEAVE)
449
+ event(exit_event, event_details={"success": success})
403
450
  if serverapp_th:
404
451
  serverapp_th.join()
405
452
  if server_app_thread_has_exception.is_set():
@@ -411,6 +458,7 @@ def _main_loop(
411
458
  # pylint: disable=too-many-arguments,too-many-locals
412
459
  def _run_simulation(
413
460
  num_supernodes: int,
461
+ exit_event: EventType,
414
462
  client_app: Optional[ClientApp] = None,
415
463
  server_app: Optional[ServerApp] = None,
416
464
  backend_name: str = "ray",
@@ -422,6 +470,7 @@ def _run_simulation(
422
470
  flwr_dir: Optional[str] = None,
423
471
  run: Optional[Run] = None,
424
472
  enable_tf_gpu_growth: bool = False,
473
+ delay_start: int = 5,
425
474
  verbose_logging: bool = False,
426
475
  is_app: bool = False,
427
476
  ) -> None:
@@ -477,6 +526,8 @@ def _run_simulation(
477
526
  is_app,
478
527
  enable_tf_gpu_growth,
479
528
  run,
529
+ exit_event,
530
+ delay_start,
480
531
  flwr_dir,
481
532
  client_app,
482
533
  client_app_attr,
@@ -512,13 +563,22 @@ def _parse_args_run_simulation() -> argparse.ArgumentParser:
512
563
  parser = argparse.ArgumentParser(
513
564
  description="Start a Flower simulation",
514
565
  )
566
+ parser.add_argument(
567
+ "--app",
568
+ type=str,
569
+ default=None,
570
+ help="Path to a directory containing a FAB-like structure with a "
571
+ "pyproject.toml.",
572
+ )
515
573
  parser.add_argument(
516
574
  "--server-app",
517
- help="For example: `server:app` or `project.package.module:wrapper.app`",
575
+ help="(DEPRECATED: use --app instead) For example: `server:app` or "
576
+ "`project.package.module:wrapper.app`",
518
577
  )
519
578
  parser.add_argument(
520
579
  "--client-app",
521
- help="For example: `client:app` or `project.package.module:wrapper.app`",
580
+ help="(DEPRECATED: use --app instead) For example: `client:app` or "
581
+ "`project.package.module:wrapper.app`",
522
582
  )
523
583
  parser.add_argument(
524
584
  "--num-supernodes",
@@ -526,13 +586,6 @@ def _parse_args_run_simulation() -> argparse.ArgumentParser:
526
586
  required=True,
527
587
  help="Number of simulated SuperNodes.",
528
588
  )
529
- parser.add_argument(
530
- "--app",
531
- type=str,
532
- default=None,
533
- help="Path to a directory containing a FAB-like structure with a "
534
- "pyproject.toml.",
535
- )
536
589
  parser.add_argument(
537
590
  "--run-config",
538
591
  default=None,
@@ -562,6 +615,13 @@ def _parse_args_run_simulation() -> argparse.ArgumentParser:
562
615
  "Read more about how `tf.config.experimental.set_memory_growth()` works in "
563
616
  "the TensorFlow documentation: https://www.tensorflow.org/api/stable.",
564
617
  )
618
+ parser.add_argument(
619
+ "--delay-start",
620
+ type=int,
621
+ default=3,
622
+ help="Buffer time (in seconds) to delay the start the simulation engine after "
623
+ "the `ServerApp`, which runs in a separate thread, has been launched.",
624
+ )
565
625
  parser.add_argument(
566
626
  "--verbose",
567
627
  action="store_true",
@@ -13,9 +13,3 @@
13
13
  # limitations under the License.
14
14
  # ==============================================================================
15
15
  """Flower SuperExec service."""
16
-
17
- from .app import run_superexec as run_superexec
18
-
19
- __all__ = [
20
- "run_superexec",
21
- ]
flwr/superexec/app.py CHANGED
@@ -25,7 +25,7 @@ import grpc
25
25
  from flwr.common import EventType, event, log
26
26
  from flwr.common.address import parse_address
27
27
  from flwr.common.config import parse_config_args
28
- from flwr.common.constant import SUPEREXEC_DEFAULT_ADDRESS
28
+ from flwr.common.constant import EXEC_API_DEFAULT_ADDRESS
29
29
  from flwr.common.exit_handlers import register_exit_handlers
30
30
  from flwr.common.object_ref import load_app, validate
31
31
 
@@ -56,7 +56,9 @@ def run_superexec() -> None:
56
56
  address=address,
57
57
  executor=_load_executor(args),
58
58
  certificates=certificates,
59
- config=parse_config_args([args.executor_config]),
59
+ config=parse_config_args(
60
+ [args.executor_config] if args.executor_config else args.executor_config
61
+ ),
60
62
  )
61
63
 
62
64
  grpc_servers = [superexec_server]
@@ -79,7 +81,7 @@ def _parse_args_run_superexec() -> argparse.ArgumentParser:
79
81
  parser.add_argument(
80
82
  "--address",
81
83
  help="SuperExec (gRPC) server address (IPv4, IPv6, or a domain name)",
82
- default=SUPEREXEC_DEFAULT_ADDRESS,
84
+ default=EXEC_API_DEFAULT_ADDRESS,
83
85
  )
84
86
  parser.add_argument(
85
87
  "--executor",
@@ -23,13 +23,13 @@ from typing import Optional
23
23
  from typing_extensions import override
24
24
 
25
25
  from flwr.cli.install import install_from_fab
26
+ from flwr.common.constant import DRIVER_API_DEFAULT_ADDRESS
26
27
  from flwr.common.grpc import create_channel
27
28
  from flwr.common.logger import log
28
29
  from flwr.common.serde import fab_to_proto, user_config_to_proto
29
30
  from flwr.common.typing import Fab, UserConfig
30
31
  from flwr.proto.driver_pb2 import CreateRunRequest # pylint: disable=E0611
31
32
  from flwr.proto.driver_pb2_grpc import DriverStub
32
- from flwr.server.driver.grpc_driver import DEFAULT_SERVER_ADDRESS_DRIVER
33
33
 
34
34
  from .executor import Executor, RunTracker
35
35
 
@@ -50,7 +50,7 @@ class DeploymentEngine(Executor):
50
50
 
51
51
  def __init__(
52
52
  self,
53
- superlink: str = DEFAULT_SERVER_ADDRESS_DRIVER,
53
+ superlink: str = DRIVER_API_DEFAULT_ADDRESS,
54
54
  root_certificates: Optional[str] = None,
55
55
  flwr_dir: Optional[str] = None,
56
56
  ) -> None:
@@ -15,6 +15,7 @@
15
15
  """Simulation engine executor."""
16
16
 
17
17
 
18
+ import json
18
19
  import subprocess
19
20
  import sys
20
21
  from logging import ERROR, INFO, WARN
@@ -24,6 +25,7 @@ from typing_extensions import override
24
25
 
25
26
  from flwr.cli.config_utils import load_and_validate
26
27
  from flwr.cli.install import install_from_fab
28
+ from flwr.common.config import unflatten_dict
27
29
  from flwr.common.constant import RUN_ID_NUM_BYTES
28
30
  from flwr.common.logger import log
29
31
  from flwr.common.typing import UserConfig
@@ -108,6 +110,7 @@ class SimulationEngine(Executor):
108
110
  )
109
111
  self.verbose = verbose
110
112
 
113
+ # pylint: disable=too-many-locals
111
114
  @override
112
115
  def start_run(
113
116
  self,
@@ -152,6 +155,15 @@ class SimulationEngine(Executor):
152
155
  "Config extracted from FAB's pyproject.toml is not valid"
153
156
  )
154
157
 
158
+ # Flatten federated config
159
+ federation_config_flat = unflatten_dict(federation_config)
160
+
161
+ num_supernodes = federation_config_flat.get(
162
+ "num-supernodes", self.num_supernodes
163
+ )
164
+ backend_cfg = federation_config_flat.get("backend", {})
165
+ verbose: Optional[bool] = federation_config_flat.get("verbose")
166
+
155
167
  # In Simulation there is no SuperLink, still we create a run_id
156
168
  run_id = generate_rand_int_from_bytes(RUN_ID_NUM_BYTES)
157
169
  log(INFO, "Created run %s", str(run_id))
@@ -162,11 +174,18 @@ class SimulationEngine(Executor):
162
174
  "--app",
163
175
  f"{str(fab_path)}",
164
176
  "--num-supernodes",
165
- f"{federation_config.get('num-supernodes', self.num_supernodes)}",
177
+ f"{num_supernodes}",
166
178
  "--run-id",
167
179
  str(run_id),
168
180
  ]
169
181
 
182
+ if backend_cfg:
183
+ # Stringify as JSON
184
+ command.extend(["--backend-config", json.dumps(backend_cfg)])
185
+
186
+ if verbose:
187
+ command.extend(["--verbose"])
188
+
170
189
  if override_config:
171
190
  override_config_str = _user_config_to_str(override_config)
172
191
  command.extend(["--run-config", f"{override_config_str}"])
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: flwr-nightly
3
- Version: 1.11.0.dev20240822
3
+ Version: 1.11.1.dev20240912
4
4
  Summary: Flower: A Friendly Federated Learning Framework
5
5
  Home-page: https://flower.ai
6
6
  License: Apache-2.0
@@ -195,8 +195,8 @@ Other [examples](https://github.com/adap/flower/tree/main/examples):
195
195
  - [PyTorch: From Centralized to Federated](https://github.com/adap/flower/tree/main/examples/pytorch-from-centralized-to-federated)
196
196
  - [Vertical FL](https://github.com/adap/flower/tree/main/examples/vertical-fl)
197
197
  - [Federated Finetuning of OpenAI's Whisper](https://github.com/adap/flower/tree/main/examples/whisper-federated-finetuning)
198
- - [Federated Finetuning of Large Language Model](https://github.com/adap/flower/tree/main/examples/llm-flowertune)
199
- - [Federated Finetuning of a Vision Transformer](https://github.com/adap/flower/tree/main/examples/vit-finetune)
198
+ - [Federated Finetuning of Large Language Model](https://github.com/adap/flower/tree/main/examples/flowertune-llm)
199
+ - [Federated Finetuning of a Vision Transformer](https://github.com/adap/flower/tree/main/examples/flowertune-vit)
200
200
  - [Advanced Flower with TensorFlow/Keras](https://github.com/adap/flower/tree/main/examples/advanced-tensorflow)
201
201
  - [Advanced Flower with PyTorch](https://github.com/adap/flower/tree/main/examples/advanced-pytorch)
202
202
  - Single-Machine Simulation of Federated Learning Systems ([PyTorch](https://github.com/adap/flower/tree/main/examples/simulation-pytorch)) ([Tensorflow](https://github.com/adap/flower/tree/main/examples/simulation-tensorflow))