flwr-nightly 1.10.0.dev20240612__py3-none-any.whl → 1.10.0.dev20240624__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of flwr-nightly might be problematic. Click here for more details.

Files changed (130) hide show
  1. flwr/cli/app.py +3 -0
  2. flwr/cli/build.py +6 -8
  3. flwr/cli/config_utils.py +53 -3
  4. flwr/cli/install.py +35 -20
  5. flwr/cli/new/new.py +104 -28
  6. flwr/cli/new/templates/app/README.flowertune.md.tpl +56 -0
  7. flwr/cli/new/templates/app/code/flwr_tune/__init__.py +15 -0
  8. flwr/cli/new/templates/app/code/flwr_tune/app.py.tpl +86 -0
  9. flwr/cli/new/templates/app/code/flwr_tune/client.py.tpl +124 -0
  10. flwr/cli/new/templates/app/code/flwr_tune/config.yaml.tpl +34 -0
  11. flwr/cli/new/templates/app/code/flwr_tune/dataset.py.tpl +57 -0
  12. flwr/cli/new/templates/app/code/flwr_tune/models.py.tpl +59 -0
  13. flwr/cli/new/templates/app/code/flwr_tune/server.py.tpl +48 -0
  14. flwr/cli/new/templates/app/code/flwr_tune/static_config.yaml.tpl +11 -0
  15. flwr/cli/new/templates/app/pyproject.flowertune.toml.tpl +42 -0
  16. flwr/cli/run/run.py +46 -2
  17. flwr/client/__init__.py +1 -1
  18. flwr/client/app.py +22 -10
  19. flwr/client/client_app.py +1 -1
  20. flwr/client/dpfedavg_numpy_client.py +1 -1
  21. flwr/client/grpc_adapter_client/__init__.py +15 -0
  22. flwr/client/grpc_adapter_client/connection.py +94 -0
  23. flwr/client/grpc_client/connection.py +5 -1
  24. flwr/client/grpc_rere_client/__init__.py +1 -1
  25. flwr/client/grpc_rere_client/connection.py +9 -2
  26. flwr/client/grpc_rere_client/grpc_adapter.py +133 -0
  27. flwr/client/message_handler/__init__.py +1 -1
  28. flwr/client/message_handler/message_handler.py +1 -1
  29. flwr/client/mod/__init__.py +4 -4
  30. flwr/client/mod/secure_aggregation/__init__.py +1 -1
  31. flwr/client/mod/utils.py +1 -1
  32. flwr/client/rest_client/__init__.py +1 -1
  33. flwr/client/rest_client/connection.py +10 -2
  34. flwr/client/supernode/app.py +141 -41
  35. flwr/common/__init__.py +12 -12
  36. flwr/common/address.py +1 -1
  37. flwr/common/config.py +73 -0
  38. flwr/common/constant.py +16 -1
  39. flwr/common/date.py +1 -1
  40. flwr/common/dp.py +1 -1
  41. flwr/common/grpc.py +1 -1
  42. flwr/common/object_ref.py +39 -5
  43. flwr/common/record/__init__.py +1 -1
  44. flwr/common/secure_aggregation/__init__.py +1 -1
  45. flwr/common/secure_aggregation/crypto/__init__.py +1 -1
  46. flwr/common/secure_aggregation/crypto/shamir.py +1 -1
  47. flwr/common/secure_aggregation/crypto/symmetric_encryption.py +1 -1
  48. flwr/common/secure_aggregation/ndarrays_arithmetic.py +1 -1
  49. flwr/common/secure_aggregation/quantization.py +1 -1
  50. flwr/common/secure_aggregation/secaggplus_constants.py +1 -1
  51. flwr/common/secure_aggregation/secaggplus_utils.py +1 -1
  52. flwr/common/telemetry.py +4 -0
  53. flwr/common/typing.py +9 -0
  54. flwr/common/version.py +14 -0
  55. flwr/proto/exec_pb2.py +34 -0
  56. flwr/proto/exec_pb2.pyi +55 -0
  57. flwr/proto/exec_pb2_grpc.py +101 -0
  58. flwr/proto/exec_pb2_grpc.pyi +41 -0
  59. flwr/proto/fab_pb2.py +30 -0
  60. flwr/proto/fab_pb2.pyi +56 -0
  61. flwr/proto/fab_pb2_grpc.py +4 -0
  62. flwr/proto/fab_pb2_grpc.pyi +4 -0
  63. flwr/server/__init__.py +2 -2
  64. flwr/server/app.py +62 -25
  65. flwr/server/compat/app.py +1 -1
  66. flwr/server/compat/app_utils.py +1 -1
  67. flwr/server/compat/driver_client_proxy.py +1 -1
  68. flwr/server/driver/driver.py +6 -0
  69. flwr/server/driver/grpc_driver.py +85 -63
  70. flwr/server/driver/inmemory_driver.py +28 -26
  71. flwr/server/run_serverapp.py +65 -20
  72. flwr/server/strategy/__init__.py +2 -2
  73. flwr/server/strategy/bulyan.py +1 -1
  74. flwr/server/strategy/dpfedavg_adaptive.py +1 -1
  75. flwr/server/strategy/dpfedavg_fixed.py +1 -1
  76. flwr/server/strategy/fedadagrad.py +1 -1
  77. flwr/server/strategy/fedadam.py +1 -1
  78. flwr/server/strategy/fedavg_android.py +1 -1
  79. flwr/server/strategy/fedavgm.py +1 -1
  80. flwr/server/strategy/fedmedian.py +1 -1
  81. flwr/server/strategy/fedopt.py +1 -1
  82. flwr/server/strategy/fedprox.py +1 -1
  83. flwr/server/strategy/fedxgb_bagging.py +1 -1
  84. flwr/server/strategy/fedxgb_cyclic.py +1 -1
  85. flwr/server/strategy/fedxgb_nn_avg.py +1 -1
  86. flwr/server/strategy/fedyogi.py +1 -1
  87. flwr/server/strategy/krum.py +1 -1
  88. flwr/server/strategy/qfedavg.py +1 -1
  89. flwr/server/superlink/driver/__init__.py +1 -1
  90. flwr/server/superlink/driver/driver_grpc.py +1 -1
  91. flwr/server/superlink/driver/driver_servicer.py +15 -3
  92. flwr/server/superlink/fleet/__init__.py +1 -1
  93. flwr/server/superlink/fleet/grpc_adapter/__init__.py +15 -0
  94. flwr/server/superlink/fleet/grpc_adapter/grpc_adapter_servicer.py +131 -0
  95. flwr/server/superlink/fleet/grpc_bidi/__init__.py +1 -1
  96. flwr/server/superlink/fleet/grpc_bidi/flower_service_servicer.py +1 -1
  97. flwr/server/superlink/fleet/grpc_bidi/grpc_bridge.py +1 -1
  98. flwr/server/superlink/fleet/grpc_bidi/grpc_client_proxy.py +1 -1
  99. flwr/server/superlink/fleet/grpc_bidi/grpc_server.py +5 -1
  100. flwr/server/superlink/fleet/grpc_rere/__init__.py +1 -1
  101. flwr/server/superlink/fleet/grpc_rere/fleet_servicer.py +1 -1
  102. flwr/server/superlink/fleet/message_handler/__init__.py +1 -1
  103. flwr/server/superlink/fleet/message_handler/message_handler.py +4 -4
  104. flwr/server/superlink/fleet/rest_rere/__init__.py +1 -1
  105. flwr/server/superlink/fleet/rest_rere/rest_api.py +1 -1
  106. flwr/server/superlink/fleet/vce/backend/raybackend.py +44 -25
  107. flwr/server/superlink/fleet/vce/vce_api.py +3 -1
  108. flwr/server/superlink/state/__init__.py +1 -1
  109. flwr/server/superlink/state/in_memory_state.py +9 -6
  110. flwr/server/superlink/state/sqlite_state.py +7 -4
  111. flwr/server/superlink/state/state.py +6 -5
  112. flwr/server/superlink/state/state_factory.py +11 -2
  113. flwr/server/utils/__init__.py +1 -1
  114. flwr/server/utils/tensorboard.py +1 -1
  115. flwr/simulation/__init__.py +5 -2
  116. flwr/simulation/app.py +1 -1
  117. flwr/simulation/ray_transport/__init__.py +1 -1
  118. flwr/simulation/ray_transport/ray_actor.py +0 -6
  119. flwr/simulation/ray_transport/ray_client_proxy.py +1 -1
  120. flwr/simulation/run_simulation.py +63 -22
  121. flwr/superexec/__init__.py +21 -0
  122. flwr/superexec/app.py +178 -0
  123. flwr/superexec/exec_grpc.py +51 -0
  124. flwr/superexec/exec_servicer.py +65 -0
  125. flwr/superexec/executor.py +54 -0
  126. {flwr_nightly-1.10.0.dev20240612.dist-info → flwr_nightly-1.10.0.dev20240624.dist-info}/METADATA +2 -1
  127. {flwr_nightly-1.10.0.dev20240612.dist-info → flwr_nightly-1.10.0.dev20240624.dist-info}/RECORD +130 -101
  128. {flwr_nightly-1.10.0.dev20240612.dist-info → flwr_nightly-1.10.0.dev20240624.dist-info}/entry_points.txt +1 -0
  129. {flwr_nightly-1.10.0.dev20240612.dist-info → flwr_nightly-1.10.0.dev20240624.dist-info}/LICENSE +0 -0
  130. {flwr_nightly-1.10.0.dev20240612.dist-info → flwr_nightly-1.10.0.dev20240624.dist-info}/WHEEL +0 -0
@@ -15,7 +15,7 @@
15
15
  """Ray backend for the Fleet API using the Simulation Engine."""
16
16
 
17
17
  import pathlib
18
- from logging import DEBUG, ERROR, WARNING
18
+ from logging import DEBUG, ERROR
19
19
  from typing import Callable, Dict, List, Tuple, Union
20
20
 
21
21
  import ray
@@ -24,16 +24,15 @@ from flwr.client.client_app import ClientApp
24
24
  from flwr.common.context import Context
25
25
  from flwr.common.logger import log
26
26
  from flwr.common.message import Message
27
- from flwr.simulation.ray_transport.ray_actor import (
28
- BasicActorPool,
29
- ClientAppActor,
30
- init_ray,
31
- )
27
+ from flwr.common.typing import ConfigsRecordValues
28
+ from flwr.simulation.ray_transport.ray_actor import BasicActorPool, ClientAppActor
32
29
  from flwr.simulation.ray_transport.utils import enable_tf_gpu_growth
33
30
 
34
31
  from .backend import Backend, BackendConfig
35
32
 
36
33
  ClientResourcesDict = Dict[str, Union[int, float]]
34
+ ActorArgsDict = Dict[str, Union[int, float, Callable[[], None]]]
35
+ RunTimeEnvDict = Dict[str, Union[str, List[str]]]
37
36
 
38
37
 
39
38
  class RayBackend(Backend):
@@ -51,40 +50,29 @@ class RayBackend(Backend):
51
50
  if not pathlib.Path(work_dir).exists():
52
51
  raise ValueError(f"Specified work_dir {work_dir} does not exist.")
53
52
 
54
- # Init ray and append working dir if needed
55
- runtime_env = (
56
- self._configure_runtime_env(work_dir=work_dir) if work_dir else None
57
- )
58
-
59
- if backend_config.get("mute_logging", False):
60
- init_ray(
61
- logging_level=WARNING, log_to_driver=False, runtime_env=runtime_env
62
- )
63
- elif backend_config.get("silent", False):
64
- init_ray(logging_level=WARNING, log_to_driver=True, runtime_env=runtime_env)
65
- else:
66
- init_ray(runtime_env=runtime_env)
53
+ # Initialise ray
54
+ self.init_args_key = "init_args"
55
+ self.init_ray(backend_config, work_dir)
67
56
 
68
57
  # Validate client resources
69
58
  self.client_resources_key = "client_resources"
59
+ client_resources = self._validate_client_resources(config=backend_config)
70
60
 
71
61
  # Create actor pool
72
- use_tf = backend_config.get("tensorflow", False)
73
- actor_kwargs = {"on_actor_init_fn": enable_tf_gpu_growth} if use_tf else {}
62
+ actor_kwargs = self._validate_actor_arguments(config=backend_config)
74
63
 
75
- client_resources = self._validate_client_resources(config=backend_config)
76
64
  self.pool = BasicActorPool(
77
65
  actor_type=ClientAppActor,
78
66
  client_resources=client_resources,
79
67
  actor_kwargs=actor_kwargs,
80
68
  )
81
69
 
82
- def _configure_runtime_env(self, work_dir: str) -> Dict[str, Union[str, List[str]]]:
70
+ def _configure_runtime_env(self, work_dir: str) -> RunTimeEnvDict:
83
71
  """Return list of files/subdirectories to exclude relative to work_dir.
84
72
 
85
73
  Without this, Ray will push everything to the Ray Cluster.
86
74
  """
87
- runtime_env: Dict[str, Union[str, List[str]]] = {"working_dir": work_dir}
75
+ runtime_env: RunTimeEnvDict = {"working_dir": work_dir}
88
76
 
89
77
  excludes = []
90
78
  path = pathlib.Path(work_dir)
@@ -125,6 +113,37 @@ class RayBackend(Backend):
125
113
 
126
114
  return client_resources
127
115
 
116
+ def _validate_actor_arguments(self, config: BackendConfig) -> ActorArgsDict:
117
+ actor_args_config = config.get("actor", False)
118
+ actor_args: ActorArgsDict = {}
119
+ if actor_args_config:
120
+ use_tf = actor_args.get("tensorflow", False)
121
+ if use_tf:
122
+ actor_args["on_actor_init_fn"] = enable_tf_gpu_growth
123
+ return actor_args
124
+
125
+ def init_ray(self, backend_config: BackendConfig, work_dir: str) -> None:
126
+ """Intialises Ray if not already initialised."""
127
+ if not ray.is_initialized():
128
+ # Init ray and append working dir if needed
129
+ runtime_env = (
130
+ self._configure_runtime_env(work_dir=work_dir) if work_dir else None
131
+ )
132
+
133
+ ray_init_args: Dict[
134
+ str,
135
+ Union[ConfigsRecordValues, RunTimeEnvDict],
136
+ ] = {}
137
+
138
+ if backend_config.get(self.init_args_key):
139
+ for k, v in backend_config[self.init_args_key].items():
140
+ ray_init_args[k] = v
141
+
142
+ if runtime_env is not None:
143
+ ray_init_args["runtime_env"] = runtime_env
144
+
145
+ ray.init(**ray_init_args)
146
+
128
147
  @property
129
148
  def num_workers(self) -> int:
130
149
  """Return number of actors in pool."""
@@ -152,7 +171,7 @@ class RayBackend(Backend):
152
171
  partition_id = message.metadata.partition_id
153
172
 
154
173
  try:
155
- # Submite a task to the pool
174
+ # Submit a task to the pool
156
175
  future = await self.pool.submit(
157
176
  lambda a, a_fn, mssg, cid, state: a.run.remote(a_fn, mssg, cid, state),
158
177
  (app, message, str(partition_id), context),
@@ -20,6 +20,7 @@ import sys
20
20
  import time
21
21
  import traceback
22
22
  from logging import DEBUG, ERROR, INFO, WARN
23
+ from pathlib import Path
23
24
  from typing import Callable, Dict, List, Optional
24
25
 
25
26
  from flwr.client.client_app import ClientApp, ClientAppException, LoadClientAppError
@@ -274,6 +275,7 @@ def start_vce(
274
275
  # Use mapping constructed externally. This also means nodes
275
276
  # have previously being registered.
276
277
  nodes_mapping = existing_nodes_mapping
278
+ app_dir = str(Path(app_dir).absolute())
277
279
 
278
280
  if not state_factory:
279
281
  log(INFO, "A StateFactory was not supplied to the SimulationEngine.")
@@ -323,7 +325,7 @@ def start_vce(
323
325
  if app_dir is not None:
324
326
  sys.path.insert(0, app_dir)
325
327
 
326
- app: ClientApp = load_app(client_app_attr, LoadClientAppError)
328
+ app: ClientApp = load_app(client_app_attr, LoadClientAppError, app_dir)
327
329
 
328
330
  if not isinstance(app, ClientApp):
329
331
  raise LoadClientAppError(
@@ -1,4 +1,4 @@
1
- # Copyright 2023 Flower Labs GmbH. All Rights Reserved.
1
+ # Copyright 2024 Flower Labs GmbH. All Rights Reserved.
2
2
  #
3
3
  # Licensed under the Apache License, Version 2.0 (the "License");
4
4
  # you may not use this file except in compliance with the License.
@@ -1,4 +1,4 @@
1
- # Copyright 2023 Flower Labs GmbH. All Rights Reserved.
1
+ # Copyright 2024 Flower Labs GmbH. All Rights Reserved.
2
2
  #
3
3
  # Licensed under the Apache License, Version 2.0 (the "License");
4
4
  # you may not use this file except in compliance with the License.
@@ -23,6 +23,7 @@ from typing import Dict, List, Optional, Set, Tuple
23
23
  from uuid import UUID, uuid4
24
24
 
25
25
  from flwr.common import log, now
26
+ from flwr.common.typing import Run
26
27
  from flwr.proto.task_pb2 import TaskIns, TaskRes # pylint: disable=E0611
27
28
  from flwr.server.superlink.state.state import State
28
29
  from flwr.server.utils import validate_task_ins_or_res
@@ -40,7 +41,7 @@ class InMemoryState(State): # pylint: disable=R0902,R0904
40
41
  self.public_key_to_node_id: Dict[bytes, int] = {}
41
42
 
42
43
  # Map run_id to (fab_id, fab_version)
43
- self.run_ids: Dict[int, Tuple[str, str]] = {}
44
+ self.run_ids: Dict[int, Run] = {}
44
45
  self.task_ins_store: Dict[UUID, TaskIns] = {}
45
46
  self.task_res_store: Dict[UUID, TaskRes] = {}
46
47
 
@@ -281,7 +282,9 @@ class InMemoryState(State): # pylint: disable=R0902,R0904
281
282
  run_id: int = int.from_bytes(os.urandom(8), "little", signed=True)
282
283
 
283
284
  if run_id not in self.run_ids:
284
- self.run_ids[run_id] = (fab_id, fab_version)
285
+ self.run_ids[run_id] = Run(
286
+ run_id=run_id, fab_id=fab_id, fab_version=fab_version
287
+ )
285
288
  return run_id
286
289
  log(ERROR, "Unexpected run creation failure.")
287
290
  return 0
@@ -319,13 +322,13 @@ class InMemoryState(State): # pylint: disable=R0902,R0904
319
322
  """Retrieve all currently stored `client_public_keys` as a set."""
320
323
  return self.client_public_keys
321
324
 
322
- def get_run(self, run_id: int) -> Tuple[int, str, str]:
325
+ def get_run(self, run_id: int) -> Optional[Run]:
323
326
  """Retrieve information about the run with the specified `run_id`."""
324
327
  with self.lock:
325
328
  if run_id not in self.run_ids:
326
329
  log(ERROR, "`run_id` is invalid")
327
- return 0, "", ""
328
- return run_id, *self.run_ids[run_id]
330
+ return None
331
+ return self.run_ids[run_id]
329
332
 
330
333
  def acknowledge_ping(self, node_id: int, ping_interval: float) -> bool:
331
334
  """Acknowledge a ping received from a node, serving as a heartbeat."""
@@ -1,4 +1,4 @@
1
- # Copyright 2023 Flower Labs GmbH. All Rights Reserved.
1
+ # Copyright 2024 Flower Labs GmbH. All Rights Reserved.
2
2
  #
3
3
  # Licensed under the Apache License, Version 2.0 (the "License");
4
4
  # you may not use this file except in compliance with the License.
@@ -24,6 +24,7 @@ from typing import Any, Dict, List, Optional, Sequence, Set, Tuple, Union, cast
24
24
  from uuid import UUID, uuid4
25
25
 
26
26
  from flwr.common import log, now
27
+ from flwr.common.typing import Run
27
28
  from flwr.proto.node_pb2 import Node # pylint: disable=E0611
28
29
  from flwr.proto.recordset_pb2 import RecordSet # pylint: disable=E0611
29
30
  from flwr.proto.task_pb2 import Task, TaskIns, TaskRes # pylint: disable=E0611
@@ -680,15 +681,17 @@ class SqliteState(State): # pylint: disable=R0904
680
681
  result: Set[bytes] = {row["public_key"] for row in rows}
681
682
  return result
682
683
 
683
- def get_run(self, run_id: int) -> Tuple[int, str, str]:
684
+ def get_run(self, run_id: int) -> Optional[Run]:
684
685
  """Retrieve information about the run with the specified `run_id`."""
685
686
  query = "SELECT * FROM run WHERE run_id = ?;"
686
687
  try:
687
688
  row = self.query(query, (run_id,))[0]
688
- return run_id, row["fab_id"], row["fab_version"]
689
+ return Run(
690
+ run_id=run_id, fab_id=row["fab_id"], fab_version=row["fab_version"]
691
+ )
689
692
  except sqlite3.IntegrityError:
690
693
  log(ERROR, "`run_id` does not exist.")
691
- return 0, "", ""
694
+ return None
692
695
 
693
696
  def acknowledge_ping(self, node_id: int, ping_interval: float) -> bool:
694
697
  """Acknowledge a ping received from a node, serving as a heartbeat."""
@@ -1,4 +1,4 @@
1
- # Copyright 2022 Flower Labs GmbH. All Rights Reserved.
1
+ # Copyright 2024 Flower Labs GmbH. All Rights Reserved.
2
2
  #
3
3
  # Licensed under the Apache License, Version 2.0 (the "License");
4
4
  # you may not use this file except in compliance with the License.
@@ -16,9 +16,10 @@
16
16
 
17
17
 
18
18
  import abc
19
- from typing import List, Optional, Set, Tuple
19
+ from typing import List, Optional, Set
20
20
  from uuid import UUID
21
21
 
22
+ from flwr.common.typing import Run
22
23
  from flwr.proto.task_pb2 import TaskIns, TaskRes # pylint: disable=E0611
23
24
 
24
25
 
@@ -160,7 +161,7 @@ class State(abc.ABC): # pylint: disable=R0904
160
161
  """Create a new run for the specified `fab_id` and `fab_version`."""
161
162
 
162
163
  @abc.abstractmethod
163
- def get_run(self, run_id: int) -> Tuple[int, str, str]:
164
+ def get_run(self, run_id: int) -> Optional[Run]:
164
165
  """Retrieve information about the run with the specified `run_id`.
165
166
 
166
167
  Parameters
@@ -170,8 +171,8 @@ class State(abc.ABC): # pylint: disable=R0904
170
171
 
171
172
  Returns
172
173
  -------
173
- Tuple[int, str, str]
174
- A tuple containing three elements:
174
+ Optional[Run]
175
+ A dataclass instance containing three elements if `run_id` is valid:
175
176
  - `run_id`: The identifier of the run, same as the specified `run_id`.
176
177
  - `fab_id`: The identifier of the FAB used in the specified run.
177
178
  - `fab_version`: The version of the FAB used in the specified run.
@@ -1,4 +1,4 @@
1
- # Copyright 2022 Flower Labs GmbH. All Rights Reserved.
1
+ # Copyright 2024 Flower Labs GmbH. All Rights Reserved.
2
2
  #
3
3
  # Licensed under the Apache License, Version 2.0 (the "License");
4
4
  # you may not use this file except in compliance with the License.
@@ -26,7 +26,16 @@ from .state import State
26
26
 
27
27
 
28
28
  class StateFactory:
29
- """Factory class that creates State instances."""
29
+ """Factory class that creates State instances.
30
+
31
+ Parameters
32
+ ----------
33
+ database : str
34
+ A string representing the path to the database file that will be opened.
35
+ Note that passing ':memory:' will open a connection to a database that is
36
+ in RAM, instead of on disk. For more information on special in-memory
37
+ databases, please refer to https://sqlite.org/inmemorydb.html.
38
+ """
30
39
 
31
40
  def __init__(self, database: str) -> None:
32
41
  self.database = database
@@ -1,4 +1,4 @@
1
- # Copyright 2020 Flower Labs GmbH. All Rights Reserved.
1
+ # Copyright 2021 Flower Labs GmbH. All Rights Reserved.
2
2
  #
3
3
  # Licensed under the Apache License, Version 2.0 (the "License");
4
4
  # you may not use this file except in compliance with the License.
@@ -1,4 +1,4 @@
1
- # Copyright 2020 Flower Labs GmbH. All Rights Reserved.
1
+ # Copyright 2021 Flower Labs GmbH. All Rights Reserved.
2
2
  #
3
3
  # Licensed under the Apache License, Version 2.0 (the "License");
4
4
  # you may not use this file except in compliance with the License.
@@ -1,4 +1,4 @@
1
- # Copyright 2020 Flower Labs GmbH. All Rights Reserved.
1
+ # Copyright 2021 Flower Labs GmbH. All Rights Reserved.
2
2
  #
3
3
  # Licensed under the Apache License, Version 2.0 (the "License");
4
4
  # you may not use this file except in compliance with the License.
@@ -36,4 +36,7 @@ To install the necessary dependencies, install `flwr` with the `simulation` extr
36
36
  raise ImportError(RAY_IMPORT_ERROR)
37
37
 
38
38
 
39
- __all__ = ["start_simulation", "run_simulation"]
39
+ __all__ = [
40
+ "run_simulation",
41
+ "start_simulation",
42
+ ]
flwr/simulation/app.py CHANGED
@@ -1,4 +1,4 @@
1
- # Copyright 2020 Flower Labs GmbH. All Rights Reserved.
1
+ # Copyright 2021 Flower Labs GmbH. All Rights Reserved.
2
2
  #
3
3
  # Licensed under the Apache License, Version 2.0 (the "License");
4
4
  # you may not use this file except in compliance with the License.
@@ -1,4 +1,4 @@
1
- # Copyright 2020 Flower Labs GmbH. All Rights Reserved.
1
+ # Copyright 2021 Flower Labs GmbH. All Rights Reserved.
2
2
  #
3
3
  # Licensed under the Apache License, Version 2.0 (the "License");
4
4
  # you may not use this file except in compliance with the License.
@@ -399,12 +399,6 @@ class VirtualClientEngineActorPool(ActorPool):
399
399
  return self._fetch_future_result(cid)
400
400
 
401
401
 
402
- def init_ray(*args: Any, **kwargs: Any) -> None:
403
- """Intialises Ray if not already initialised."""
404
- if not ray.is_initialized():
405
- ray.init(*args, **kwargs)
406
-
407
-
408
402
  class BasicActorPool:
409
403
  """A basic actor pool."""
410
404
 
@@ -1,4 +1,4 @@
1
- # Copyright 2020 Flower Labs GmbH. All Rights Reserved.
1
+ # Copyright 2021 Flower Labs GmbH. All Rights Reserved.
2
2
  #
3
3
  # Licensed under the Apache License, Version 2.0 (the "License");
4
4
  # you may not use this file except in compliance with the License.
@@ -22,16 +22,17 @@ import threading
22
22
  import traceback
23
23
  from logging import DEBUG, ERROR, INFO, WARNING
24
24
  from time import sleep
25
- from typing import Dict, Optional
25
+ from typing import Optional
26
26
 
27
27
  from flwr.client import ClientApp
28
28
  from flwr.common import EventType, event, log
29
29
  from flwr.common.logger import set_logger_propagation, update_console_handler
30
- from flwr.common.typing import ConfigsRecordValues
30
+ from flwr.common.typing import Run
31
31
  from flwr.server.driver import Driver, InMemoryDriver
32
32
  from flwr.server.run_serverapp import run
33
33
  from flwr.server.server_app import ServerApp
34
34
  from flwr.server.superlink.fleet import vce
35
+ from flwr.server.superlink.fleet.vce.backend.backend import BackendConfig
35
36
  from flwr.server.superlink.state import StateFactory
36
37
  from flwr.simulation.ray_transport.utils import (
37
38
  enable_tf_gpu_growth as enable_gpu_growth,
@@ -53,6 +54,7 @@ def run_simulation_from_cli() -> None:
53
54
  backend_name=args.backend,
54
55
  backend_config=backend_config_dict,
55
56
  app_dir=args.app_dir,
57
+ run_id=args.run_id,
56
58
  enable_tf_gpu_growth=args.enable_tf_gpu_growth,
57
59
  verbose_logging=args.verbose,
58
60
  )
@@ -65,7 +67,7 @@ def run_simulation(
65
67
  client_app: ClientApp,
66
68
  num_supernodes: int,
67
69
  backend_name: str = "ray",
68
- backend_config: Optional[Dict[str, ConfigsRecordValues]] = None,
70
+ backend_config: Optional[BackendConfig] = None,
69
71
  enable_tf_gpu_growth: bool = False,
70
72
  verbose_logging: bool = False,
71
73
  ) -> None:
@@ -89,9 +91,12 @@ def run_simulation(
89
91
  backend_name : str (default: ray)
90
92
  A simulation backend that runs `ClientApp`s.
91
93
 
92
- backend_config : Optional[Dict[str, ConfigsRecordValues]]
93
- 'A dictionary, e.g {"<keyA>": <value>, "<keyB>": <value>} to configure a
94
- backend. Values supported in <value> are those included by
94
+ backend_config : Optional[BackendConfig]
95
+ 'A dictionary to configure a backend. Separate dictionaries to configure
96
+ different elements of backend. Supported top-level keys are `init_args`
97
+ for values parsed to initialisation of backend, `client_resources`
98
+ to define the resources for clients, and `actor` to define the actor
99
+ parameters. Values supported in <value> are those included by
95
100
  `flwr.common.typing.ConfigsRecordValues`.
96
101
 
97
102
  enable_tf_gpu_growth : bool (default: False)
@@ -103,7 +108,7 @@ def run_simulation(
103
108
  works in the TensorFlow documentation: https://www.tensorflow.org/api/stable.
104
109
 
105
110
  verbose_logging : bool (default: False)
106
- When diabled, only INFO, WARNING and ERROR log messages will be shown. If
111
+ When disabled, only INFO, WARNING and ERROR log messages will be shown. If
107
112
  enabled, DEBUG-level logs will be displayed.
108
113
  """
109
114
  _run_simulation(
@@ -132,7 +137,7 @@ def run_serverapp_th(
132
137
  def server_th_with_start_checks( # type: ignore
133
138
  tf_gpu_growth: bool, stop_event: asyncio.Event, **kwargs
134
139
  ) -> None:
135
- """Run SeverApp, after check if GPU memory grouwth has to be set.
140
+ """Run SeverApp, after check if GPU memory growth has to be set.
136
141
 
137
142
  Upon exception, trigger stop event for Simulation Engine.
138
143
  """
@@ -168,6 +173,16 @@ def run_serverapp_th(
168
173
  return serverapp_th
169
174
 
170
175
 
176
+ def _override_run_id(state: StateFactory, run_id_to_replace: int, run_id: int) -> None:
177
+ """Override the run_id of an existing Run."""
178
+ log(DEBUG, "Pre-registering run with id %s", run_id)
179
+ # Remove run
180
+ run_info: Run = state.state().run_ids.pop(run_id_to_replace) # type: ignore
181
+ # Update with new run_id and insert back in state
182
+ run_info.run_id = run_id
183
+ state.state().run_ids[run_id] = run_info # type: ignore
184
+
185
+
171
186
  # pylint: disable=too-many-locals
172
187
  def _main_loop(
173
188
  num_supernodes: int,
@@ -175,6 +190,7 @@ def _main_loop(
175
190
  backend_config_stream: str,
176
191
  app_dir: str,
177
192
  enable_tf_gpu_growth: bool,
193
+ run_id: Optional[int] = None,
178
194
  client_app: Optional[ClientApp] = None,
179
195
  client_app_attr: Optional[str] = None,
180
196
  server_app: Optional[ServerApp] = None,
@@ -182,7 +198,7 @@ def _main_loop(
182
198
  ) -> None:
183
199
  """Launch SuperLink with Simulation Engine, then ServerApp on a separate thread.
184
200
 
185
- Everything runs on the main thread or a separate one, depening on whether the main
201
+ Everything runs on the main thread or a separate one, depending on whether the main
186
202
  thread already contains a running Asyncio event loop. This is the case if running
187
203
  the Simulation Engine on a Jupyter/Colab notebook.
188
204
  """
@@ -192,8 +208,15 @@ def _main_loop(
192
208
  f_stop = asyncio.Event()
193
209
  serverapp_th = None
194
210
  try:
211
+ # Create run (with empty fab_id and fab_version)
212
+ run_id_ = state_factory.state().create_run("", "")
213
+
214
+ if run_id:
215
+ _override_run_id(state_factory, run_id_to_replace=run_id_, run_id=run_id)
216
+ run_id_ = run_id
217
+
195
218
  # Initialize Driver
196
- driver = InMemoryDriver(state_factory)
219
+ driver = InMemoryDriver(run_id=run_id_, state_factory=state_factory)
197
220
 
198
221
  # Get and run ServerApp thread
199
222
  serverapp_th = run_serverapp_th(
@@ -240,10 +263,11 @@ def _run_simulation(
240
263
  client_app: Optional[ClientApp] = None,
241
264
  server_app: Optional[ServerApp] = None,
242
265
  backend_name: str = "ray",
243
- backend_config: Optional[Dict[str, ConfigsRecordValues]] = None,
266
+ backend_config: Optional[BackendConfig] = None,
244
267
  client_app_attr: Optional[str] = None,
245
268
  server_app_attr: Optional[str] = None,
246
269
  app_dir: str = "",
270
+ run_id: Optional[int] = None,
247
271
  enable_tf_gpu_growth: bool = False,
248
272
  verbose_logging: bool = False,
249
273
  ) -> None:
@@ -266,9 +290,12 @@ def _run_simulation(
266
290
  backend_name : str (default: ray)
267
291
  A simulation backend that runs `ClientApp`s.
268
292
 
269
- backend_config : Optional[Dict[str, ConfigsRecordValues]]
270
- 'A dictionary, e.g {"<keyA>":<value>, "<keyB>":<value>} to configure a
271
- backend. Values supported in <value> are those included by
293
+ backend_config : Optional[BackendConfig]
294
+ 'A dictionary to configure a backend. Separate dictionaries to configure
295
+ different elements of backend. Supported top-level keys are `init_args`
296
+ for values parsed to initialisation of backend, `client_resources`
297
+ to define the resources for clients, and `actor` to define the actor
298
+ parameters. Values supported in <value> are those included by
272
299
  `flwr.common.typing.ConfigsRecordValues`.
273
300
 
274
301
  client_app_attr : str
@@ -283,34 +310,41 @@ def _run_simulation(
283
310
  Add specified directory to the PYTHONPATH and load `ClientApp` from there.
284
311
  (Default: current working directory.)
285
312
 
313
+ run_id : Optional[int]
314
+ An integer specifying the ID of the run started when running this function.
315
+
286
316
  enable_tf_gpu_growth : bool (default: False)
287
317
  A boolean to indicate whether to enable GPU growth on the main thread. This is
288
318
  desirable if you make use of a TensorFlow model on your `ServerApp` while
289
319
  having your `ClientApp` running on the same GPU. Without enabling this, you
290
- might encounter an out-of-memory error becasue TensorFlow by default allocates
320
+ might encounter an out-of-memory error because TensorFlow by default allocates
291
321
  all GPU memory. Read mor about how `tf.config.experimental.set_memory_growth()`
292
322
  works in the TensorFlow documentation: https://www.tensorflow.org/api/stable.
293
323
 
294
324
  verbose_logging : bool (default: False)
295
- When diabled, only INFO, WARNING and ERROR log messages will be shown. If
325
+ When disabled, only INFO, WARNING and ERROR log messages will be shown. If
296
326
  enabled, DEBUG-level logs will be displayed.
297
327
  """
298
328
  if backend_config is None:
299
329
  backend_config = {}
300
330
 
331
+ if "init_args" not in backend_config:
332
+ backend_config["init_args"] = {}
333
+
301
334
  # Set logging level
302
335
  logger = logging.getLogger("flwr")
303
336
  if verbose_logging:
304
337
  update_console_handler(level=DEBUG, timestamps=True, colored=True)
305
338
  else:
306
- backend_config["silent"] = True
339
+ backend_config["init_args"]["logging_level"] = WARNING
340
+ backend_config["init_args"]["log_to_driver"] = True
307
341
 
308
342
  if enable_tf_gpu_growth:
309
343
  # Check that Backend config has also enabled using GPU growth
310
- use_tf = backend_config.get("tensorflow", False)
344
+ use_tf = backend_config.get("actor", {}).get("tensorflow", False)
311
345
  if not use_tf:
312
346
  log(WARNING, "Enabling GPU growth for your backend.")
313
- backend_config["tensorflow"] = True
347
+ backend_config["actor"]["tensorflow"] = True
314
348
 
315
349
  # Convert config to original JSON-stream format
316
350
  backend_config_stream = json.dumps(backend_config)
@@ -322,13 +356,14 @@ def _run_simulation(
322
356
  backend_config_stream,
323
357
  app_dir,
324
358
  enable_tf_gpu_growth,
359
+ run_id,
325
360
  client_app,
326
361
  client_app_attr,
327
362
  server_app,
328
363
  server_app_attr,
329
364
  )
330
365
  # Detect if there is an Asyncio event loop already running.
331
- # If yes, run everything on a separate thread. In environmnets
366
+ # If yes, run everything on a separate thread. In environments
332
367
  # like Jupyter/Colab notebooks, there is an event loop present.
333
368
  run_in_thread = False
334
369
  try:
@@ -340,7 +375,7 @@ def _run_simulation(
340
375
  run_in_thread = True
341
376
 
342
377
  except RuntimeError:
343
- log(DEBUG, "No asyncio event loop runnig")
378
+ log(DEBUG, "No asyncio event loop running")
344
379
 
345
380
  finally:
346
381
  if run_in_thread:
@@ -385,7 +420,8 @@ def _parse_args_run_simulation() -> argparse.ArgumentParser:
385
420
  parser.add_argument(
386
421
  "--backend-config",
387
422
  type=str,
388
- default='{"client_resources": {"num_cpus":2, "num_gpus":0.0}, "tensorflow": 0}',
423
+ default='{"client_resources": {"num_cpus":2, "num_gpus":0.0},'
424
+ '"actor": {"tensorflow": 0}}',
389
425
  help='A JSON formatted stream, e.g \'{"<keyA>":<value>, "<keyB>":<value>}\' to '
390
426
  "configure a backend. Values supported in <value> are those included by "
391
427
  "`flwr.common.typing.ConfigsRecordValues`. ",
@@ -413,5 +449,10 @@ def _parse_args_run_simulation() -> argparse.ArgumentParser:
413
449
  "ClientApp and ServerApp from there."
414
450
  " Default: current working directory.",
415
451
  )
452
+ parser.add_argument(
453
+ "--run-id",
454
+ type=int,
455
+ help="Sets the ID of the run started by the Simulation Engine.",
456
+ )
416
457
 
417
458
  return parser
@@ -0,0 +1,21 @@
1
+ # Copyright 2024 Flower Labs GmbH. All Rights Reserved.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+ # ==============================================================================
15
+ """Flower SuperExec service."""
16
+
17
+ from .app import run_superexec as run_superexec
18
+
19
+ __all__ = [
20
+ "run_superexec",
21
+ ]