flwr-nightly 1.8.0.dev20240314__py3-none-any.whl → 1.11.0.dev20240813__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of flwr-nightly might be problematic. Click here for more details.

Files changed (237) hide show
  1. flwr/cli/app.py +7 -0
  2. flwr/cli/build.py +150 -0
  3. flwr/cli/config_utils.py +219 -0
  4. flwr/cli/example.py +3 -1
  5. flwr/cli/install.py +227 -0
  6. flwr/cli/new/new.py +179 -48
  7. flwr/cli/new/templates/app/.gitignore.tpl +160 -0
  8. flwr/cli/new/templates/app/README.flowertune.md.tpl +56 -0
  9. flwr/cli/new/templates/app/README.md.tpl +1 -5
  10. flwr/cli/new/templates/app/code/__init__.py.tpl +1 -1
  11. flwr/cli/new/templates/app/code/client.huggingface.py.tpl +65 -0
  12. flwr/cli/new/templates/app/code/client.jax.py.tpl +56 -0
  13. flwr/cli/new/templates/app/code/client.mlx.py.tpl +93 -0
  14. flwr/cli/new/templates/app/code/client.numpy.py.tpl +3 -2
  15. flwr/cli/new/templates/app/code/client.pytorch.py.tpl +23 -11
  16. flwr/cli/new/templates/app/code/client.sklearn.py.tpl +97 -0
  17. flwr/cli/new/templates/app/code/client.tensorflow.py.tpl +60 -1
  18. flwr/cli/new/templates/app/code/flwr_tune/__init__.py +15 -0
  19. flwr/cli/new/templates/app/code/flwr_tune/app.py.tpl +89 -0
  20. flwr/cli/new/templates/app/code/flwr_tune/client.py.tpl +126 -0
  21. flwr/cli/new/templates/app/code/flwr_tune/config.yaml.tpl +34 -0
  22. flwr/cli/new/templates/app/code/flwr_tune/dataset.py.tpl +57 -0
  23. flwr/cli/new/templates/app/code/flwr_tune/models.py.tpl +59 -0
  24. flwr/cli/new/templates/app/code/flwr_tune/server.py.tpl +48 -0
  25. flwr/cli/new/templates/app/code/flwr_tune/static_config.yaml.tpl +11 -0
  26. flwr/cli/new/templates/app/code/server.huggingface.py.tpl +23 -0
  27. flwr/cli/new/templates/app/code/server.jax.py.tpl +20 -0
  28. flwr/cli/new/templates/app/code/server.mlx.py.tpl +20 -0
  29. flwr/cli/new/templates/app/code/server.numpy.py.tpl +17 -9
  30. flwr/cli/new/templates/app/code/server.pytorch.py.tpl +21 -18
  31. flwr/cli/new/templates/app/code/server.sklearn.py.tpl +24 -0
  32. flwr/cli/new/templates/app/code/server.tensorflow.py.tpl +29 -1
  33. flwr/cli/new/templates/app/code/task.huggingface.py.tpl +99 -0
  34. flwr/cli/new/templates/app/code/task.jax.py.tpl +57 -0
  35. flwr/cli/new/templates/app/code/task.mlx.py.tpl +102 -0
  36. flwr/cli/new/templates/app/code/task.pytorch.py.tpl +28 -23
  37. flwr/cli/new/templates/app/code/task.tensorflow.py.tpl +53 -0
  38. flwr/cli/new/templates/app/pyproject.flowertune.toml.tpl +39 -0
  39. flwr/cli/new/templates/app/pyproject.huggingface.toml.tpl +38 -0
  40. flwr/cli/new/templates/app/pyproject.jax.toml.tpl +34 -0
  41. flwr/cli/new/templates/app/pyproject.mlx.toml.tpl +39 -0
  42. flwr/cli/new/templates/app/pyproject.numpy.toml.tpl +25 -12
  43. flwr/cli/new/templates/app/pyproject.pytorch.toml.tpl +29 -14
  44. flwr/cli/new/templates/app/pyproject.sklearn.toml.tpl +33 -0
  45. flwr/cli/new/templates/app/pyproject.tensorflow.toml.tpl +29 -14
  46. flwr/cli/run/run.py +168 -17
  47. flwr/cli/utils.py +75 -4
  48. flwr/client/__init__.py +6 -1
  49. flwr/client/app.py +239 -248
  50. flwr/client/client_app.py +70 -9
  51. flwr/client/dpfedavg_numpy_client.py +1 -1
  52. flwr/client/grpc_adapter_client/__init__.py +15 -0
  53. flwr/client/grpc_adapter_client/connection.py +97 -0
  54. flwr/client/grpc_client/connection.py +18 -5
  55. flwr/client/grpc_rere_client/__init__.py +1 -1
  56. flwr/client/grpc_rere_client/client_interceptor.py +158 -0
  57. flwr/client/grpc_rere_client/connection.py +127 -33
  58. flwr/client/grpc_rere_client/grpc_adapter.py +140 -0
  59. flwr/client/heartbeat.py +74 -0
  60. flwr/client/message_handler/__init__.py +1 -1
  61. flwr/client/message_handler/message_handler.py +7 -7
  62. flwr/client/mod/__init__.py +5 -5
  63. flwr/client/mod/centraldp_mods.py +4 -2
  64. flwr/client/mod/comms_mods.py +4 -4
  65. flwr/client/mod/localdp_mod.py +9 -4
  66. flwr/client/mod/secure_aggregation/__init__.py +1 -1
  67. flwr/client/mod/secure_aggregation/secaggplus_mod.py +1 -1
  68. flwr/client/mod/utils.py +1 -1
  69. flwr/client/node_state.py +60 -10
  70. flwr/client/node_state_tests.py +4 -3
  71. flwr/client/rest_client/__init__.py +1 -1
  72. flwr/client/rest_client/connection.py +177 -157
  73. flwr/client/supernode/__init__.py +26 -0
  74. flwr/client/supernode/app.py +464 -0
  75. flwr/client/typing.py +1 -0
  76. flwr/common/__init__.py +13 -11
  77. flwr/common/address.py +1 -1
  78. flwr/common/config.py +193 -0
  79. flwr/common/constant.py +42 -1
  80. flwr/common/context.py +26 -1
  81. flwr/common/date.py +1 -1
  82. flwr/common/dp.py +1 -1
  83. flwr/common/grpc.py +6 -2
  84. flwr/common/logger.py +79 -8
  85. flwr/common/message.py +167 -105
  86. flwr/common/object_ref.py +126 -25
  87. flwr/common/record/__init__.py +1 -1
  88. flwr/common/record/parametersrecord.py +0 -1
  89. flwr/common/record/recordset.py +78 -27
  90. flwr/common/recordset_compat.py +8 -1
  91. flwr/common/retry_invoker.py +25 -13
  92. flwr/common/secure_aggregation/__init__.py +1 -1
  93. flwr/common/secure_aggregation/crypto/__init__.py +1 -1
  94. flwr/common/secure_aggregation/crypto/shamir.py +1 -1
  95. flwr/common/secure_aggregation/crypto/symmetric_encryption.py +21 -2
  96. flwr/common/secure_aggregation/ndarrays_arithmetic.py +1 -1
  97. flwr/common/secure_aggregation/quantization.py +1 -1
  98. flwr/common/secure_aggregation/secaggplus_constants.py +1 -1
  99. flwr/common/secure_aggregation/secaggplus_utils.py +1 -1
  100. flwr/common/serde.py +209 -3
  101. flwr/common/telemetry.py +25 -0
  102. flwr/common/typing.py +38 -0
  103. flwr/common/version.py +14 -0
  104. flwr/proto/clientappio_pb2.py +41 -0
  105. flwr/proto/clientappio_pb2.pyi +110 -0
  106. flwr/proto/clientappio_pb2_grpc.py +101 -0
  107. flwr/proto/clientappio_pb2_grpc.pyi +40 -0
  108. flwr/proto/common_pb2.py +36 -0
  109. flwr/proto/common_pb2.pyi +121 -0
  110. flwr/proto/common_pb2_grpc.py +4 -0
  111. flwr/proto/common_pb2_grpc.pyi +4 -0
  112. flwr/proto/driver_pb2.py +26 -19
  113. flwr/proto/driver_pb2.pyi +34 -0
  114. flwr/proto/driver_pb2_grpc.py +70 -0
  115. flwr/proto/driver_pb2_grpc.pyi +28 -0
  116. flwr/proto/exec_pb2.py +43 -0
  117. flwr/proto/exec_pb2.pyi +95 -0
  118. flwr/proto/exec_pb2_grpc.py +101 -0
  119. flwr/proto/exec_pb2_grpc.pyi +41 -0
  120. flwr/proto/fab_pb2.py +30 -0
  121. flwr/proto/fab_pb2.pyi +56 -0
  122. flwr/proto/fab_pb2_grpc.py +4 -0
  123. flwr/proto/fab_pb2_grpc.pyi +4 -0
  124. flwr/proto/fleet_pb2.py +29 -23
  125. flwr/proto/fleet_pb2.pyi +33 -0
  126. flwr/proto/fleet_pb2_grpc.py +102 -0
  127. flwr/proto/fleet_pb2_grpc.pyi +35 -0
  128. flwr/proto/grpcadapter_pb2.py +32 -0
  129. flwr/proto/grpcadapter_pb2.pyi +43 -0
  130. flwr/proto/grpcadapter_pb2_grpc.py +66 -0
  131. flwr/proto/grpcadapter_pb2_grpc.pyi +24 -0
  132. flwr/proto/message_pb2.py +41 -0
  133. flwr/proto/message_pb2.pyi +122 -0
  134. flwr/proto/message_pb2_grpc.py +4 -0
  135. flwr/proto/message_pb2_grpc.pyi +4 -0
  136. flwr/proto/run_pb2.py +35 -0
  137. flwr/proto/run_pb2.pyi +76 -0
  138. flwr/proto/run_pb2_grpc.py +4 -0
  139. flwr/proto/run_pb2_grpc.pyi +4 -0
  140. flwr/proto/task_pb2.py +7 -8
  141. flwr/proto/task_pb2.pyi +8 -5
  142. flwr/server/__init__.py +4 -8
  143. flwr/server/app.py +298 -350
  144. flwr/server/compat/app.py +6 -57
  145. flwr/server/compat/app_utils.py +5 -4
  146. flwr/server/compat/driver_client_proxy.py +29 -48
  147. flwr/server/compat/legacy_context.py +5 -4
  148. flwr/server/driver/__init__.py +2 -0
  149. flwr/server/driver/driver.py +22 -132
  150. flwr/server/driver/grpc_driver.py +224 -74
  151. flwr/server/driver/inmemory_driver.py +183 -0
  152. flwr/server/history.py +20 -20
  153. flwr/server/run_serverapp.py +121 -34
  154. flwr/server/server.py +11 -7
  155. flwr/server/server_app.py +59 -10
  156. flwr/server/serverapp_components.py +52 -0
  157. flwr/server/strategy/__init__.py +2 -2
  158. flwr/server/strategy/bulyan.py +1 -1
  159. flwr/server/strategy/dp_adaptive_clipping.py +3 -3
  160. flwr/server/strategy/dp_fixed_clipping.py +4 -3
  161. flwr/server/strategy/dpfedavg_adaptive.py +1 -1
  162. flwr/server/strategy/dpfedavg_fixed.py +1 -1
  163. flwr/server/strategy/fedadagrad.py +1 -1
  164. flwr/server/strategy/fedadam.py +1 -1
  165. flwr/server/strategy/fedavg_android.py +1 -1
  166. flwr/server/strategy/fedavgm.py +1 -1
  167. flwr/server/strategy/fedmedian.py +1 -1
  168. flwr/server/strategy/fedopt.py +1 -1
  169. flwr/server/strategy/fedprox.py +1 -1
  170. flwr/server/strategy/fedxgb_bagging.py +1 -1
  171. flwr/server/strategy/fedxgb_cyclic.py +1 -1
  172. flwr/server/strategy/fedxgb_nn_avg.py +1 -1
  173. flwr/server/strategy/fedyogi.py +1 -1
  174. flwr/server/strategy/krum.py +1 -1
  175. flwr/server/strategy/qfedavg.py +1 -1
  176. flwr/server/superlink/driver/__init__.py +1 -1
  177. flwr/server/superlink/driver/driver_grpc.py +1 -1
  178. flwr/server/superlink/driver/driver_servicer.py +51 -4
  179. flwr/server/superlink/ffs/__init__.py +24 -0
  180. flwr/server/superlink/ffs/disk_ffs.py +104 -0
  181. flwr/server/superlink/ffs/ffs.py +79 -0
  182. flwr/server/superlink/fleet/__init__.py +1 -1
  183. flwr/server/superlink/fleet/grpc_adapter/__init__.py +15 -0
  184. flwr/server/superlink/fleet/grpc_adapter/grpc_adapter_servicer.py +131 -0
  185. flwr/server/superlink/fleet/grpc_bidi/__init__.py +1 -1
  186. flwr/server/superlink/fleet/grpc_bidi/flower_service_servicer.py +1 -1
  187. flwr/server/superlink/fleet/grpc_bidi/grpc_bridge.py +1 -1
  188. flwr/server/superlink/fleet/grpc_bidi/grpc_client_proxy.py +1 -1
  189. flwr/server/superlink/fleet/grpc_bidi/grpc_server.py +8 -2
  190. flwr/server/superlink/fleet/grpc_rere/__init__.py +1 -1
  191. flwr/server/superlink/fleet/grpc_rere/fleet_servicer.py +30 -2
  192. flwr/server/superlink/fleet/grpc_rere/server_interceptor.py +214 -0
  193. flwr/server/superlink/fleet/message_handler/__init__.py +1 -1
  194. flwr/server/superlink/fleet/message_handler/message_handler.py +42 -2
  195. flwr/server/superlink/fleet/rest_rere/__init__.py +1 -1
  196. flwr/server/superlink/fleet/rest_rere/rest_api.py +59 -1
  197. flwr/server/superlink/fleet/vce/backend/__init__.py +1 -1
  198. flwr/server/superlink/fleet/vce/backend/backend.py +5 -5
  199. flwr/server/superlink/fleet/vce/backend/raybackend.py +53 -56
  200. flwr/server/superlink/fleet/vce/vce_api.py +190 -127
  201. flwr/server/superlink/state/__init__.py +1 -1
  202. flwr/server/superlink/state/in_memory_state.py +159 -42
  203. flwr/server/superlink/state/sqlite_state.py +243 -39
  204. flwr/server/superlink/state/state.py +81 -6
  205. flwr/server/superlink/state/state_factory.py +11 -2
  206. flwr/server/superlink/state/utils.py +62 -0
  207. flwr/server/typing.py +2 -0
  208. flwr/server/utils/__init__.py +1 -1
  209. flwr/server/utils/tensorboard.py +1 -1
  210. flwr/server/utils/validator.py +23 -9
  211. flwr/server/workflow/default_workflows.py +67 -25
  212. flwr/server/workflow/secure_aggregation/secaggplus_workflow.py +18 -6
  213. flwr/simulation/__init__.py +7 -4
  214. flwr/simulation/app.py +67 -36
  215. flwr/simulation/ray_transport/__init__.py +1 -1
  216. flwr/simulation/ray_transport/ray_actor.py +20 -46
  217. flwr/simulation/ray_transport/ray_client_proxy.py +36 -16
  218. flwr/simulation/run_simulation.py +308 -92
  219. flwr/superexec/__init__.py +21 -0
  220. flwr/superexec/app.py +184 -0
  221. flwr/superexec/deployment.py +185 -0
  222. flwr/superexec/exec_grpc.py +55 -0
  223. flwr/superexec/exec_servicer.py +70 -0
  224. flwr/superexec/executor.py +75 -0
  225. flwr/superexec/simulation.py +193 -0
  226. {flwr_nightly-1.8.0.dev20240314.dist-info → flwr_nightly-1.11.0.dev20240813.dist-info}/METADATA +10 -6
  227. flwr_nightly-1.11.0.dev20240813.dist-info/RECORD +288 -0
  228. flwr_nightly-1.11.0.dev20240813.dist-info/entry_points.txt +10 -0
  229. flwr/cli/flower_toml.py +0 -140
  230. flwr/cli/new/templates/app/flower.toml.tpl +0 -13
  231. flwr/cli/new/templates/app/requirements.numpy.txt.tpl +0 -2
  232. flwr/cli/new/templates/app/requirements.pytorch.txt.tpl +0 -4
  233. flwr/cli/new/templates/app/requirements.tensorflow.txt.tpl +0 -4
  234. flwr_nightly-1.8.0.dev20240314.dist-info/RECORD +0 -211
  235. flwr_nightly-1.8.0.dev20240314.dist-info/entry_points.txt +0 -9
  236. {flwr_nightly-1.8.0.dev20240314.dist-info → flwr_nightly-1.11.0.dev20240813.dist-info}/LICENSE +0 -0
  237. {flwr_nightly-1.8.0.dev20240314.dist-info → flwr_nightly-1.11.0.dev20240813.dist-info}/WHEEL +0 -0
@@ -18,46 +18,169 @@ import argparse
18
18
  import asyncio
19
19
  import json
20
20
  import logging
21
+ import sys
21
22
  import threading
22
23
  import traceback
24
+ from argparse import Namespace
23
25
  from logging import DEBUG, ERROR, INFO, WARNING
26
+ from pathlib import Path
24
27
  from time import sleep
25
- from typing import Dict, Optional
26
-
27
- import grpc
28
+ from typing import List, Optional
28
29
 
30
+ from flwr.cli.config_utils import load_and_validate
29
31
  from flwr.client import ClientApp
30
32
  from flwr.common import EventType, event, log
31
- from flwr.common.typing import ConfigsRecordValues
32
- from flwr.server.driver.driver import Driver
33
- from flwr.server.run_serverapp import run
33
+ from flwr.common.config import get_fused_config_from_dir, parse_config_args
34
+ from flwr.common.constant import RUN_ID_NUM_BYTES
35
+ from flwr.common.logger import (
36
+ set_logger_propagation,
37
+ update_console_handler,
38
+ warn_deprecated_feature_with_example,
39
+ )
40
+ from flwr.common.typing import Run, UserConfig
41
+ from flwr.server.driver import Driver, InMemoryDriver
42
+ from flwr.server.run_serverapp import run as run_server_app
34
43
  from flwr.server.server_app import ServerApp
35
- from flwr.server.superlink.driver.driver_grpc import run_driver_api_grpc
36
44
  from flwr.server.superlink.fleet import vce
45
+ from flwr.server.superlink.fleet.vce.backend.backend import BackendConfig
37
46
  from flwr.server.superlink.state import StateFactory
47
+ from flwr.server.superlink.state.utils import generate_rand_int_from_bytes
38
48
  from flwr.simulation.ray_transport.utils import (
39
49
  enable_tf_gpu_growth as enable_gpu_growth,
40
50
  )
41
51
 
42
52
 
53
+ def _check_args_do_not_interfere(args: Namespace) -> bool:
54
+ """Ensure decoupling of flags for different ways to start the simulation."""
55
+ mode_one_args = ["app", "run_config"]
56
+ mode_two_args = ["client_app", "server_app"]
57
+
58
+ def _resolve_message(conflict_keys: List[str]) -> str:
59
+ return ",".join([f"`--{key}`".replace("_", "-") for key in conflict_keys])
60
+
61
+ # When passing `--app`, `--app-dir` is ignored
62
+ if args.app and args.app_dir:
63
+ log(ERROR, "Either `--app` or `--app-dir` can be set, but not both.")
64
+ return False
65
+
66
+ if any(getattr(args, key) for key in mode_one_args):
67
+ if any(getattr(args, key) for key in mode_two_args):
68
+ log(
69
+ ERROR,
70
+ "Passing any of {%s} alongside with any of {%s}",
71
+ _resolve_message(mode_one_args),
72
+ _resolve_message(mode_two_args),
73
+ )
74
+ return False
75
+
76
+ if not args.app:
77
+ log(ERROR, "You need to pass --app")
78
+ return False
79
+
80
+ return True
81
+
82
+ # Ensure all args are set (required for the non-FAB mode of execution)
83
+ if not all(getattr(args, key) for key in mode_two_args):
84
+ log(
85
+ ERROR,
86
+ "Passing all of %s keys are required.",
87
+ _resolve_message(mode_two_args),
88
+ )
89
+ return False
90
+
91
+ return True
92
+
93
+
43
94
  # Entry point from CLI
95
+ # pylint: disable=too-many-locals
44
96
  def run_simulation_from_cli() -> None:
45
97
  """Run Simulation Engine from the CLI."""
46
98
  args = _parse_args_run_simulation().parse_args()
47
99
 
100
+ if args.enable_tf_gpu_growth:
101
+ warn_deprecated_feature_with_example(
102
+ "Passing `--enable-tf-gpu-growth` is deprecated.",
103
+ example_message="Instead, set the `TF_FORCE_GPU_ALLOW_GROWTH` environmnet "
104
+ "variable to true.",
105
+ code_example='TF_FORCE_GPU_ALLOW_GROWTH="true" flower-simulation <...>',
106
+ )
107
+
108
+ # We are supporting two modes for the CLI entrypoint:
109
+ # 1) Running an app dir containing a `pyproject.toml`
110
+ # 2) Running any ClientApp and SeverApp w/o pyproject.toml being present
111
+ # For 2), some CLI args are compulsory, but they are not required for 1)
112
+ # We first do these checks
113
+ args_check_pass = _check_args_do_not_interfere(args)
114
+ if not args_check_pass:
115
+ sys.exit("Simulation Engine cannot start.")
116
+
117
+ run_id = (
118
+ generate_rand_int_from_bytes(RUN_ID_NUM_BYTES)
119
+ if args.run_id is None
120
+ else args.run_id
121
+ )
122
+ if args.app:
123
+ # Mode 1
124
+ app_path = Path(args.app)
125
+ if not app_path.is_dir():
126
+ log(ERROR, "--app is not a directory")
127
+ sys.exit("Simulation Engine cannot start.")
128
+
129
+ # Load pyproject.toml
130
+ config, errors, warnings = load_and_validate(
131
+ app_path / "pyproject.toml", check_module=False
132
+ )
133
+ if errors:
134
+ raise ValueError(errors)
135
+
136
+ if warnings:
137
+ log(WARNING, warnings)
138
+
139
+ if config is None:
140
+ raise ValueError("Config extracted from FAB's pyproject.toml is not valid")
141
+
142
+ # Get ClientApp and SeverApp components
143
+ app_components = config["tool"]["flwr"]["app"]["components"]
144
+ client_app_attr = app_components["clientapp"]
145
+ server_app_attr = app_components["serverapp"]
146
+
147
+ override_config = parse_config_args([args.run_config])
148
+ fused_config = get_fused_config_from_dir(app_path, override_config)
149
+ app_dir = args.app
150
+ is_app = True
151
+
152
+ else:
153
+ # Mode 2
154
+ client_app_attr = args.client_app
155
+ server_app_attr = args.server_app
156
+ override_config = {}
157
+ fused_config = None
158
+ app_dir = args.app_dir
159
+ is_app = False
160
+
161
+ # Create run
162
+ run = Run(
163
+ run_id=run_id,
164
+ fab_id="",
165
+ fab_version="",
166
+ override_config=override_config,
167
+ )
168
+
48
169
  # Load JSON config
49
170
  backend_config_dict = json.loads(args.backend_config)
50
171
 
51
172
  _run_simulation(
52
- server_app_attr=args.server_app,
53
- client_app_attr=args.client_app,
173
+ server_app_attr=server_app_attr,
174
+ client_app_attr=client_app_attr,
54
175
  num_supernodes=args.num_supernodes,
55
176
  backend_name=args.backend,
56
177
  backend_config=backend_config_dict,
57
- app_dir=args.app_dir,
58
- driver_api_address=args.driver_api_address,
178
+ app_dir=app_dir,
179
+ run=run,
59
180
  enable_tf_gpu_growth=args.enable_tf_gpu_growth,
60
181
  verbose_logging=args.verbose,
182
+ server_app_run_config=fused_config,
183
+ is_app=is_app,
61
184
  )
62
185
 
63
186
 
@@ -68,7 +191,7 @@ def run_simulation(
68
191
  client_app: ClientApp,
69
192
  num_supernodes: int,
70
193
  backend_name: str = "ray",
71
- backend_config: Optional[Dict[str, ConfigsRecordValues]] = None,
194
+ backend_config: Optional[BackendConfig] = None,
72
195
  enable_tf_gpu_growth: bool = False,
73
196
  verbose_logging: bool = False,
74
197
  ) -> None:
@@ -92,9 +215,12 @@ def run_simulation(
92
215
  backend_name : str (default: ray)
93
216
  A simulation backend that runs `ClientApp`s.
94
217
 
95
- backend_config : Optional[Dict[str, ConfigsRecordValues]]
96
- 'A dictionary, e.g {"<keyA>": <value>, "<keyB>": <value>} to configure a
97
- backend. Values supported in <value> are those included by
218
+ backend_config : Optional[BackendConfig]
219
+ 'A dictionary to configure a backend. Separate dictionaries to configure
220
+ different elements of backend. Supported top-level keys are `init_args`
221
+ for values parsed to initialisation of backend, `client_resources`
222
+ to define the resources for clients, and `actor` to define the actor
223
+ parameters. Values supported in <value> are those included by
98
224
  `flwr.common.typing.ConfigsRecordValues`.
99
225
 
100
226
  enable_tf_gpu_growth : bool (default: False)
@@ -106,9 +232,18 @@ def run_simulation(
106
232
  works in the TensorFlow documentation: https://www.tensorflow.org/api/stable.
107
233
 
108
234
  verbose_logging : bool (default: False)
109
- When diabled, only INFO, WARNING and ERROR log messages will be shown. If
235
+ When disabled, only INFO, WARNING and ERROR log messages will be shown. If
110
236
  enabled, DEBUG-level logs will be displayed.
111
237
  """
238
+ if enable_tf_gpu_growth:
239
+ warn_deprecated_feature_with_example(
240
+ "Passing `enable_tf_gpu_growth=True` is deprecated.",
241
+ example_message="Instead, set the `TF_FORCE_GPU_ALLOW_GROWTH` environmnet "
242
+ "variable to true.",
243
+ code_example='import os;os.environ["TF_FORCE_GPU_ALLOW_GROWTH"]="true"'
244
+ "\n\tflwr.simulation.run_simulationt(...)",
245
+ )
246
+
112
247
  _run_simulation(
113
248
  num_supernodes=num_supernodes,
114
249
  client_app=client_app,
@@ -124,47 +259,67 @@ def run_simulation(
124
259
  def run_serverapp_th(
125
260
  server_app_attr: Optional[str],
126
261
  server_app: Optional[ServerApp],
262
+ server_app_run_config: UserConfig,
127
263
  driver: Driver,
128
264
  app_dir: str,
129
- f_stop: asyncio.Event,
265
+ f_stop: threading.Event,
266
+ has_exception: threading.Event,
130
267
  enable_tf_gpu_growth: bool,
131
268
  delay_launch: int = 3,
132
269
  ) -> threading.Thread:
133
270
  """Run SeverApp in a thread."""
134
271
 
135
- def server_th_with_start_checks( # type: ignore
136
- tf_gpu_growth: bool, stop_event: asyncio.Event, **kwargs
272
+ def server_th_with_start_checks(
273
+ tf_gpu_growth: bool,
274
+ stop_event: threading.Event,
275
+ exception_event: threading.Event,
276
+ _driver: Driver,
277
+ _server_app_dir: str,
278
+ _server_app_run_config: UserConfig,
279
+ _server_app_attr: Optional[str],
280
+ _server_app: Optional[ServerApp],
137
281
  ) -> None:
138
- """Run SeverApp, after check if GPU memory grouwth has to be set.
282
+ """Run SeverApp, after check if GPU memory growth has to be set.
139
283
 
140
284
  Upon exception, trigger stop event for Simulation Engine.
141
285
  """
142
286
  try:
143
287
  if tf_gpu_growth:
144
- log(INFO, "Enabling GPU growth for Tensorflow on the main thread.")
288
+ log(INFO, "Enabling GPU growth for Tensorflow on the server thread.")
145
289
  enable_gpu_growth()
146
290
 
147
291
  # Run ServerApp
148
- run(**kwargs)
292
+ run_server_app(
293
+ driver=_driver,
294
+ server_app_dir=_server_app_dir,
295
+ server_app_run_config=_server_app_run_config,
296
+ server_app_attr=_server_app_attr,
297
+ loaded_server_app=_server_app,
298
+ )
149
299
  except Exception as ex: # pylint: disable=broad-exception-caught
150
300
  log(ERROR, "ServerApp thread raised an exception: %s", ex)
151
301
  log(ERROR, traceback.format_exc())
302
+ exception_event.set()
303
+ raise
152
304
  finally:
153
305
  log(DEBUG, "ServerApp finished running.")
154
306
  # Upon completion, trigger stop event if one was passed
155
307
  if stop_event is not None:
156
308
  stop_event.set()
157
- log(WARNING, "Triggered stop event for Simulation Engine.")
309
+ log(DEBUG, "Triggered stop event for Simulation Engine.")
158
310
 
159
311
  serverapp_th = threading.Thread(
160
312
  target=server_th_with_start_checks,
161
- args=(enable_tf_gpu_growth, f_stop),
162
- kwargs={
163
- "server_app_attr": server_app_attr,
164
- "loaded_server_app": server_app,
165
- "driver": driver,
166
- "server_app_dir": app_dir,
167
- },
313
+ args=(
314
+ enable_tf_gpu_growth,
315
+ f_stop,
316
+ has_exception,
317
+ driver,
318
+ app_dir,
319
+ server_app_run_config,
320
+ server_app_attr,
321
+ server_app,
322
+ ),
168
323
  )
169
324
  sleep(delay_launch)
170
325
  serverapp_th.start()
@@ -176,46 +331,45 @@ def _main_loop(
176
331
  num_supernodes: int,
177
332
  backend_name: str,
178
333
  backend_config_stream: str,
179
- driver_api_address: str,
180
334
  app_dir: str,
335
+ is_app: bool,
181
336
  enable_tf_gpu_growth: bool,
337
+ run: Run,
338
+ flwr_dir: Optional[str] = None,
182
339
  client_app: Optional[ClientApp] = None,
183
340
  client_app_attr: Optional[str] = None,
184
341
  server_app: Optional[ServerApp] = None,
185
342
  server_app_attr: Optional[str] = None,
343
+ server_app_run_config: Optional[UserConfig] = None,
186
344
  ) -> None:
187
- """Launch SuperLink with Simulation Engine, then ServerApp on a separate thread.
188
-
189
- Everything runs on the main thread or a separate one, depening on whether the main
190
- thread already contains a running Asyncio event loop. This is the case if running
191
- the Simulation Engine on a Jupyter/Colab notebook.
192
- """
345
+ """Launch SuperLink with Simulation Engine, then ServerApp on a separate thread."""
193
346
  # Initialize StateFactory
194
347
  state_factory = StateFactory(":flwr-in-memory-state:")
195
348
 
196
- # Start Driver API
197
- driver_server: grpc.Server = run_driver_api_grpc(
198
- address=driver_api_address,
199
- state_factory=state_factory,
200
- certificates=None,
201
- )
202
-
203
- f_stop = asyncio.Event()
349
+ f_stop = threading.Event()
350
+ # A Threading event to indicate if an exception was raised in the ServerApp thread
351
+ server_app_thread_has_exception = threading.Event()
204
352
  serverapp_th = None
205
353
  try:
354
+ # Register run
355
+ log(DEBUG, "Pre-registering run with id %s", run.run_id)
356
+ state_factory.state().run_ids[run.run_id] = run # type: ignore
357
+
358
+ if server_app_run_config is None:
359
+ server_app_run_config = {}
360
+
206
361
  # Initialize Driver
207
- driver = Driver(
208
- driver_service_address=driver_api_address,
209
- root_certificates=None,
210
- )
362
+ driver = InMemoryDriver(run_id=run.run_id, state_factory=state_factory)
211
363
 
212
364
  # Get and run ServerApp thread
213
365
  serverapp_th = run_serverapp_th(
214
366
  server_app_attr=server_app_attr,
215
367
  server_app=server_app,
368
+ server_app_run_config=server_app_run_config,
216
369
  driver=driver,
217
370
  app_dir=app_dir,
218
371
  f_stop=f_stop,
372
+ has_exception=server_app_thread_has_exception,
219
373
  enable_tf_gpu_growth=enable_tf_gpu_growth,
220
374
  )
221
375
 
@@ -228,8 +382,11 @@ def _main_loop(
228
382
  backend_name=backend_name,
229
383
  backend_config_json_stream=backend_config_stream,
230
384
  app_dir=app_dir,
385
+ is_app=is_app,
231
386
  state_factory=state_factory,
232
387
  f_stop=f_stop,
388
+ run=run,
389
+ flwr_dir=flwr_dir,
233
390
  )
234
391
 
235
392
  except Exception as ex:
@@ -238,17 +395,16 @@ def _main_loop(
238
395
  raise RuntimeError("An error was encountered. Ending simulation.") from ex
239
396
 
240
397
  finally:
241
- # Stop Driver
242
- driver_server.stop(grace=0)
243
- driver.close()
244
398
  # Trigger stop event
245
399
  f_stop.set()
246
400
 
247
401
  event(EventType.RUN_SUPERLINK_LEAVE)
248
402
  if serverapp_th:
249
403
  serverapp_th.join()
404
+ if server_app_thread_has_exception.is_set():
405
+ raise RuntimeError("Exception in ServerApp thread")
250
406
 
251
- log(INFO, "Stopping Simulation Engine now.")
407
+ log(DEBUG, "Stopping Simulation Engine now.")
252
408
 
253
409
 
254
410
  # pylint: disable=too-many-arguments,too-many-locals
@@ -257,13 +413,16 @@ def _run_simulation(
257
413
  client_app: Optional[ClientApp] = None,
258
414
  server_app: Optional[ServerApp] = None,
259
415
  backend_name: str = "ray",
260
- backend_config: Optional[Dict[str, ConfigsRecordValues]] = None,
416
+ backend_config: Optional[BackendConfig] = None,
261
417
  client_app_attr: Optional[str] = None,
262
418
  server_app_attr: Optional[str] = None,
419
+ server_app_run_config: Optional[UserConfig] = None,
263
420
  app_dir: str = "",
264
- driver_api_address: str = "0.0.0.0:9091",
421
+ flwr_dir: Optional[str] = None,
422
+ run: Optional[Run] = None,
265
423
  enable_tf_gpu_growth: bool = False,
266
424
  verbose_logging: bool = False,
425
+ is_app: bool = False,
267
426
  ) -> None:
268
427
  r"""Launch the Simulation Engine.
269
428
 
@@ -284,93 +443,130 @@ def _run_simulation(
284
443
  backend_name : str (default: ray)
285
444
  A simulation backend that runs `ClientApp`s.
286
445
 
287
- backend_config : Optional[Dict[str, ConfigsRecordValues]]
288
- 'A dictionary, e.g {"<keyA>":<value>, "<keyB>":<value>} to configure a
289
- backend. Values supported in <value> are those included by
446
+ backend_config : Optional[BackendConfig]
447
+ 'A dictionary to configure a backend. Separate dictionaries to configure
448
+ different elements of backend. Supported top-level keys are `init_args`
449
+ for values parsed to initialisation of backend, `client_resources`
450
+ to define the resources for clients, and `actor` to define the actor
451
+ parameters. Values supported in <value> are those included by
290
452
  `flwr.common.typing.ConfigsRecordValues`.
291
453
 
292
- client_app_attr : str
454
+ client_app_attr : Optional[str]
293
455
  A path to a `ClientApp` module to be loaded: For example: `client:app` or
294
456
  `project.package.module:wrapper.app`."
295
457
 
296
- server_app_attr : str
458
+ server_app_attr : Optional[str]
297
459
  A path to a `ServerApp` module to be loaded: For example: `server:app` or
298
460
  `project.package.module:wrapper.app`."
299
461
 
462
+ server_app_run_config : Optional[UserConfig]
463
+ Config dictionary that parameterizes the run config. It will be made accesible
464
+ to the ServerApp.
465
+
300
466
  app_dir : str
301
467
  Add specified directory to the PYTHONPATH and load `ClientApp` from there.
302
468
  (Default: current working directory.)
303
469
 
304
- driver_api_address : str (default: "0.0.0.0:9091")
305
- Driver API (gRPC) server address (IPv4, IPv6, or a domain name)
470
+ flwr_dir : Optional[str]
471
+ The path containing installed Flower Apps.
472
+
473
+ run : Optional[Run]
474
+ An object carrying details about the run.
306
475
 
307
476
  enable_tf_gpu_growth : bool (default: False)
308
477
  A boolean to indicate whether to enable GPU growth on the main thread. This is
309
478
  desirable if you make use of a TensorFlow model on your `ServerApp` while
310
479
  having your `ClientApp` running on the same GPU. Without enabling this, you
311
- might encounter an out-of-memory error becasue TensorFlow by default allocates
480
+ might encounter an out-of-memory error because TensorFlow by default allocates
312
481
  all GPU memory. Read mor about how `tf.config.experimental.set_memory_growth()`
313
482
  works in the TensorFlow documentation: https://www.tensorflow.org/api/stable.
314
483
 
315
484
  verbose_logging : bool (default: False)
316
- When diabled, only INFO, WARNING and ERROR log messages will be shown. If
485
+ When disabled, only INFO, WARNING and ERROR log messages will be shown. If
317
486
  enabled, DEBUG-level logs will be displayed.
318
- """
319
- # Set logging level
320
- if not verbose_logging:
321
- logger = logging.getLogger("flwr")
322
- logger.setLevel(INFO)
323
487
 
488
+ is_app : bool (default: False)
489
+ A flag that indicates whether the simulation is running an app or not. This is
490
+ needed in order to attempt loading an app's pyproject.toml when nodes register
491
+ a context object.
492
+ """
324
493
  if backend_config is None:
325
494
  backend_config = {}
326
495
 
496
+ if "init_args" not in backend_config:
497
+ backend_config["init_args"] = {}
498
+
499
+ # Set default client_resources if not passed
500
+ if "client_resources" not in backend_config:
501
+ backend_config["client_resources"] = {"num_cpus": 2, "num_gpus": 0}
502
+
503
+ # Initialization of backend config to enable GPU growth globally when set
504
+ if "actor" not in backend_config:
505
+ backend_config["actor"] = {"tensorflow": 0}
506
+
507
+ # Set logging level
508
+ logger = logging.getLogger("flwr")
509
+ if verbose_logging:
510
+ update_console_handler(level=DEBUG, timestamps=True, colored=True)
511
+ else:
512
+ backend_config["init_args"]["logging_level"] = backend_config["init_args"].get(
513
+ "logging_level", WARNING
514
+ )
515
+ backend_config["init_args"]["log_to_driver"] = backend_config["init_args"].get(
516
+ "log_to_driver", True
517
+ )
518
+
327
519
  if enable_tf_gpu_growth:
328
520
  # Check that Backend config has also enabled using GPU growth
329
- use_tf = backend_config.get("tensorflow", False)
521
+ use_tf = backend_config.get("actor", {}).get("tensorflow", False)
330
522
  if not use_tf:
331
523
  log(WARNING, "Enabling GPU growth for your backend.")
332
- backend_config["tensorflow"] = True
524
+ backend_config["actor"]["tensorflow"] = True
333
525
 
334
526
  # Convert config to original JSON-stream format
335
527
  backend_config_stream = json.dumps(backend_config)
336
528
 
337
- simulation_engine_th = None
529
+ # If no `Run` object is set, create one
530
+ if run is None:
531
+ run_id = generate_rand_int_from_bytes(RUN_ID_NUM_BYTES)
532
+ run = Run(run_id=run_id, fab_id="", fab_version="", override_config={})
533
+
338
534
  args = (
339
535
  num_supernodes,
340
536
  backend_name,
341
537
  backend_config_stream,
342
- driver_api_address,
343
538
  app_dir,
539
+ is_app,
344
540
  enable_tf_gpu_growth,
541
+ run,
542
+ flwr_dir,
345
543
  client_app,
346
544
  client_app_attr,
347
545
  server_app,
348
546
  server_app_attr,
547
+ server_app_run_config,
349
548
  )
350
549
  # Detect if there is an Asyncio event loop already running.
351
- # If yes, run everything on a separate thread. In environmnets
352
- # like Jupyter/Colab notebooks, there is an event loop present.
353
- run_in_thread = False
550
+ # If yes, disable logger propagation. In environmnets
551
+ # like Jupyter/Colab notebooks, it's often better to do this.
552
+ asyncio_loop_running = False
354
553
  try:
355
554
  _ = (
356
555
  asyncio.get_running_loop()
357
556
  ) # Raises RuntimeError if no event loop is present
358
557
  log(DEBUG, "Asyncio event loop already running.")
359
558
 
360
- run_in_thread = True
559
+ asyncio_loop_running = True
361
560
 
362
561
  except RuntimeError:
363
- log(DEBUG, "No asyncio event loop runnig")
562
+ pass
364
563
 
365
564
  finally:
366
- if run_in_thread:
367
- log(DEBUG, "Starting Simulation Engine on a new thread.")
368
- simulation_engine_th = threading.Thread(target=_main_loop, args=args)
369
- simulation_engine_th.start()
370
- simulation_engine_th.join()
371
- else:
372
- log(DEBUG, "Starting Simulation Engine on the main thread.")
373
- _main_loop(*args)
565
+ if asyncio_loop_running:
566
+ # Set logger propagation to False to prevent duplicated log output in Colab.
567
+ logger = set_logger_propagation(logger, False)
568
+
569
+ _main_loop(*args)
374
570
 
375
571
 
376
572
  def _parse_args_run_simulation() -> argparse.ArgumentParser:
@@ -380,12 +576,10 @@ def _parse_args_run_simulation() -> argparse.ArgumentParser:
380
576
  )
381
577
  parser.add_argument(
382
578
  "--server-app",
383
- required=True,
384
579
  help="For example: `server:app` or `project.package.module:wrapper.app`",
385
580
  )
386
581
  parser.add_argument(
387
582
  "--client-app",
388
- required=True,
389
583
  help="For example: `client:app` or `project.package.module:wrapper.app`",
390
584
  )
391
585
  parser.add_argument(
@@ -395,10 +589,16 @@ def _parse_args_run_simulation() -> argparse.ArgumentParser:
395
589
  help="Number of simulated SuperNodes.",
396
590
  )
397
591
  parser.add_argument(
398
- "--driver-api-address",
399
- default="0.0.0.0:9091",
592
+ "--app",
400
593
  type=str,
401
- help="For example: `server:app` or `project.package.module:wrapper.app`",
594
+ default=None,
595
+ help="Path to a directory containing a FAB-like structure with a "
596
+ "pyproject.toml.",
597
+ )
598
+ parser.add_argument(
599
+ "--run-config",
600
+ default=None,
601
+ help="Override configuration key-value pairs.",
402
602
  )
403
603
  parser.add_argument(
404
604
  "--backend",
@@ -409,7 +609,7 @@ def _parse_args_run_simulation() -> argparse.ArgumentParser:
409
609
  parser.add_argument(
410
610
  "--backend-config",
411
611
  type=str,
412
- default='{"client_resources": {"num_cpus":2, "num_gpus":0.0}, "tensorflow": 0}',
612
+ default="{}",
413
613
  help='A JSON formatted stream, e.g \'{"<keyA>":<value>, "<keyB>":<value>}\' to '
414
614
  "configure a backend. Values supported in <value> are those included by "
415
615
  "`flwr.common.typing.ConfigsRecordValues`. ",
@@ -437,5 +637,21 @@ def _parse_args_run_simulation() -> argparse.ArgumentParser:
437
637
  "ClientApp and ServerApp from there."
438
638
  " Default: current working directory.",
439
639
  )
640
+ parser.add_argument(
641
+ "--flwr-dir",
642
+ default=None,
643
+ help="""The path containing installed Flower Apps.
644
+ By default, this value is equal to:
645
+
646
+ - `$FLWR_HOME/` if `$FLWR_HOME` is defined
647
+ - `$XDG_DATA_HOME/.flwr/` if `$XDG_DATA_HOME` is defined
648
+ - `$HOME/.flwr/` in all other cases
649
+ """,
650
+ )
651
+ parser.add_argument(
652
+ "--run-id",
653
+ type=int,
654
+ help="Sets the ID of the run started by the Simulation Engine.",
655
+ )
440
656
 
441
657
  return parser
@@ -0,0 +1,21 @@
1
+ # Copyright 2024 Flower Labs GmbH. All Rights Reserved.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+ # ==============================================================================
15
+ """Flower SuperExec service."""
16
+
17
+ from .app import run_superexec as run_superexec
18
+
19
+ __all__ = [
20
+ "run_superexec",
21
+ ]