flwr-nightly 1.8.0.dev20240314__py3-none-any.whl → 1.15.0.dev20250114__py3-none-any.whl

Sign up to get free protection for your applications and to get access to all the features.
Files changed (311) hide show
  1. flwr/cli/app.py +16 -2
  2. flwr/cli/build.py +181 -0
  3. flwr/cli/cli_user_auth_interceptor.py +90 -0
  4. flwr/cli/config_utils.py +343 -0
  5. flwr/cli/example.py +4 -1
  6. flwr/cli/install.py +253 -0
  7. flwr/cli/log.py +182 -0
  8. flwr/{server/superlink/state → cli/login}/__init__.py +4 -10
  9. flwr/cli/login/login.py +88 -0
  10. flwr/cli/ls.py +327 -0
  11. flwr/cli/new/__init__.py +1 -0
  12. flwr/cli/new/new.py +210 -66
  13. flwr/cli/new/templates/app/.gitignore.tpl +163 -0
  14. flwr/cli/new/templates/app/LICENSE.tpl +202 -0
  15. flwr/cli/new/templates/app/README.baseline.md.tpl +127 -0
  16. flwr/cli/new/templates/app/README.flowertune.md.tpl +66 -0
  17. flwr/cli/new/templates/app/README.md.tpl +16 -32
  18. flwr/cli/new/templates/app/code/__init__.baseline.py.tpl +1 -0
  19. flwr/cli/new/templates/app/code/__init__.py.tpl +1 -1
  20. flwr/cli/new/templates/app/code/client.baseline.py.tpl +58 -0
  21. flwr/cli/new/templates/app/code/client.huggingface.py.tpl +55 -0
  22. flwr/cli/new/templates/app/code/client.jax.py.tpl +50 -0
  23. flwr/cli/new/templates/app/code/client.mlx.py.tpl +73 -0
  24. flwr/cli/new/templates/app/code/client.numpy.py.tpl +7 -7
  25. flwr/cli/new/templates/app/code/client.pytorch.py.tpl +30 -21
  26. flwr/cli/new/templates/app/code/client.sklearn.py.tpl +63 -0
  27. flwr/cli/new/templates/app/code/client.tensorflow.py.tpl +57 -1
  28. flwr/cli/new/templates/app/code/dataset.baseline.py.tpl +36 -0
  29. flwr/cli/new/templates/app/code/flwr_tune/__init__.py +15 -0
  30. flwr/cli/new/templates/app/code/flwr_tune/client_app.py.tpl +126 -0
  31. flwr/cli/new/templates/app/code/flwr_tune/dataset.py.tpl +87 -0
  32. flwr/cli/new/templates/app/code/flwr_tune/models.py.tpl +78 -0
  33. flwr/cli/new/templates/app/code/flwr_tune/server_app.py.tpl +94 -0
  34. flwr/cli/new/templates/app/code/flwr_tune/strategy.py.tpl +83 -0
  35. flwr/cli/new/templates/app/code/model.baseline.py.tpl +80 -0
  36. flwr/cli/new/templates/app/code/server.baseline.py.tpl +46 -0
  37. flwr/cli/new/templates/app/code/server.huggingface.py.tpl +38 -0
  38. flwr/cli/new/templates/app/code/server.jax.py.tpl +26 -0
  39. flwr/cli/new/templates/app/code/server.mlx.py.tpl +31 -0
  40. flwr/cli/new/templates/app/code/server.numpy.py.tpl +22 -9
  41. flwr/cli/new/templates/app/code/server.pytorch.py.tpl +21 -18
  42. flwr/cli/new/templates/app/code/server.sklearn.py.tpl +36 -0
  43. flwr/cli/new/templates/app/code/server.tensorflow.py.tpl +29 -1
  44. flwr/cli/new/templates/app/code/strategy.baseline.py.tpl +1 -0
  45. flwr/cli/new/templates/app/code/task.huggingface.py.tpl +102 -0
  46. flwr/cli/new/templates/app/code/task.jax.py.tpl +57 -0
  47. flwr/cli/new/templates/app/code/task.mlx.py.tpl +102 -0
  48. flwr/cli/new/templates/app/code/task.numpy.py.tpl +7 -0
  49. flwr/cli/new/templates/app/code/task.pytorch.py.tpl +29 -24
  50. flwr/cli/new/templates/app/code/task.sklearn.py.tpl +67 -0
  51. flwr/cli/new/templates/app/code/task.tensorflow.py.tpl +53 -0
  52. flwr/cli/new/templates/app/code/utils.baseline.py.tpl +1 -0
  53. flwr/cli/new/templates/app/pyproject.baseline.toml.tpl +138 -0
  54. flwr/cli/new/templates/app/pyproject.flowertune.toml.tpl +68 -0
  55. flwr/cli/new/templates/app/pyproject.huggingface.toml.tpl +46 -0
  56. flwr/cli/new/templates/app/pyproject.jax.toml.tpl +35 -0
  57. flwr/cli/new/templates/app/pyproject.mlx.toml.tpl +39 -0
  58. flwr/cli/new/templates/app/pyproject.numpy.toml.tpl +25 -12
  59. flwr/cli/new/templates/app/pyproject.pytorch.toml.tpl +29 -14
  60. flwr/cli/new/templates/app/pyproject.sklearn.toml.tpl +35 -0
  61. flwr/cli/new/templates/app/pyproject.tensorflow.toml.tpl +29 -14
  62. flwr/cli/run/__init__.py +1 -0
  63. flwr/cli/run/run.py +212 -34
  64. flwr/cli/stop.py +130 -0
  65. flwr/cli/utils.py +240 -5
  66. flwr/client/__init__.py +3 -2
  67. flwr/client/app.py +432 -255
  68. flwr/client/client.py +1 -11
  69. flwr/client/client_app.py +74 -13
  70. flwr/client/clientapp/__init__.py +22 -0
  71. flwr/client/clientapp/app.py +259 -0
  72. flwr/client/clientapp/clientappio_servicer.py +244 -0
  73. flwr/client/clientapp/utils.py +115 -0
  74. flwr/client/dpfedavg_numpy_client.py +7 -8
  75. flwr/client/grpc_adapter_client/__init__.py +15 -0
  76. flwr/client/grpc_adapter_client/connection.py +98 -0
  77. flwr/client/grpc_client/connection.py +21 -7
  78. flwr/client/grpc_rere_client/__init__.py +1 -1
  79. flwr/client/grpc_rere_client/client_interceptor.py +176 -0
  80. flwr/client/grpc_rere_client/connection.py +163 -56
  81. flwr/client/grpc_rere_client/grpc_adapter.py +167 -0
  82. flwr/client/heartbeat.py +74 -0
  83. flwr/client/message_handler/__init__.py +1 -1
  84. flwr/client/message_handler/message_handler.py +10 -11
  85. flwr/client/mod/__init__.py +5 -5
  86. flwr/client/mod/centraldp_mods.py +4 -2
  87. flwr/client/mod/comms_mods.py +5 -4
  88. flwr/client/mod/localdp_mod.py +10 -5
  89. flwr/client/mod/secure_aggregation/__init__.py +1 -1
  90. flwr/client/mod/secure_aggregation/secaggplus_mod.py +26 -26
  91. flwr/client/mod/utils.py +2 -4
  92. flwr/client/nodestate/__init__.py +26 -0
  93. flwr/client/nodestate/in_memory_nodestate.py +38 -0
  94. flwr/client/nodestate/nodestate.py +31 -0
  95. flwr/client/nodestate/nodestate_factory.py +38 -0
  96. flwr/client/numpy_client.py +8 -31
  97. flwr/client/rest_client/__init__.py +1 -1
  98. flwr/client/rest_client/connection.py +199 -176
  99. flwr/client/run_info_store.py +112 -0
  100. flwr/client/supernode/__init__.py +24 -0
  101. flwr/client/supernode/app.py +321 -0
  102. flwr/client/typing.py +1 -0
  103. flwr/common/__init__.py +17 -11
  104. flwr/common/address.py +47 -3
  105. flwr/common/args.py +153 -0
  106. flwr/common/auth_plugin/__init__.py +24 -0
  107. flwr/common/auth_plugin/auth_plugin.py +121 -0
  108. flwr/common/config.py +243 -0
  109. flwr/common/constant.py +132 -1
  110. flwr/common/context.py +32 -2
  111. flwr/common/date.py +22 -4
  112. flwr/common/differential_privacy.py +2 -2
  113. flwr/common/dp.py +2 -4
  114. flwr/common/exit_handlers.py +3 -3
  115. flwr/common/grpc.py +164 -5
  116. flwr/common/logger.py +230 -12
  117. flwr/common/message.py +191 -106
  118. flwr/common/object_ref.py +179 -44
  119. flwr/common/pyproject.py +1 -0
  120. flwr/common/record/__init__.py +2 -1
  121. flwr/common/record/configsrecord.py +58 -18
  122. flwr/common/record/metricsrecord.py +57 -17
  123. flwr/common/record/parametersrecord.py +88 -20
  124. flwr/common/record/recordset.py +153 -30
  125. flwr/common/record/typeddict.py +30 -55
  126. flwr/common/recordset_compat.py +31 -12
  127. flwr/common/retry_invoker.py +123 -30
  128. flwr/common/secure_aggregation/__init__.py +1 -1
  129. flwr/common/secure_aggregation/crypto/__init__.py +1 -1
  130. flwr/common/secure_aggregation/crypto/shamir.py +11 -11
  131. flwr/common/secure_aggregation/crypto/symmetric_encryption.py +68 -4
  132. flwr/common/secure_aggregation/ndarrays_arithmetic.py +17 -17
  133. flwr/common/secure_aggregation/quantization.py +8 -8
  134. flwr/common/secure_aggregation/secaggplus_constants.py +1 -1
  135. flwr/common/secure_aggregation/secaggplus_utils.py +10 -12
  136. flwr/common/serde.py +298 -19
  137. flwr/common/telemetry.py +65 -29
  138. flwr/common/typing.py +120 -19
  139. flwr/common/version.py +17 -3
  140. flwr/proto/clientappio_pb2.py +45 -0
  141. flwr/proto/clientappio_pb2.pyi +132 -0
  142. flwr/proto/clientappio_pb2_grpc.py +135 -0
  143. flwr/proto/clientappio_pb2_grpc.pyi +53 -0
  144. flwr/proto/exec_pb2.py +62 -0
  145. flwr/proto/exec_pb2.pyi +212 -0
  146. flwr/proto/exec_pb2_grpc.py +237 -0
  147. flwr/proto/exec_pb2_grpc.pyi +93 -0
  148. flwr/proto/fab_pb2.py +31 -0
  149. flwr/proto/fab_pb2.pyi +65 -0
  150. flwr/proto/fab_pb2_grpc.py +4 -0
  151. flwr/proto/fab_pb2_grpc.pyi +4 -0
  152. flwr/proto/fleet_pb2.py +42 -23
  153. flwr/proto/fleet_pb2.pyi +123 -1
  154. flwr/proto/fleet_pb2_grpc.py +170 -0
  155. flwr/proto/fleet_pb2_grpc.pyi +61 -0
  156. flwr/proto/grpcadapter_pb2.py +32 -0
  157. flwr/proto/grpcadapter_pb2.pyi +43 -0
  158. flwr/proto/grpcadapter_pb2_grpc.py +66 -0
  159. flwr/proto/grpcadapter_pb2_grpc.pyi +24 -0
  160. flwr/proto/log_pb2.py +29 -0
  161. flwr/proto/log_pb2.pyi +39 -0
  162. flwr/proto/log_pb2_grpc.py +4 -0
  163. flwr/proto/log_pb2_grpc.pyi +4 -0
  164. flwr/proto/message_pb2.py +41 -0
  165. flwr/proto/message_pb2.pyi +128 -0
  166. flwr/proto/message_pb2_grpc.py +4 -0
  167. flwr/proto/message_pb2_grpc.pyi +4 -0
  168. flwr/proto/node_pb2.py +1 -1
  169. flwr/proto/recordset_pb2.py +35 -33
  170. flwr/proto/recordset_pb2.pyi +40 -14
  171. flwr/proto/run_pb2.py +64 -0
  172. flwr/proto/run_pb2.pyi +268 -0
  173. flwr/proto/run_pb2_grpc.py +4 -0
  174. flwr/proto/run_pb2_grpc.pyi +4 -0
  175. flwr/proto/serverappio_pb2.py +52 -0
  176. flwr/proto/{driver_pb2.pyi → serverappio_pb2.pyi} +62 -20
  177. flwr/proto/serverappio_pb2_grpc.py +410 -0
  178. flwr/proto/serverappio_pb2_grpc.pyi +160 -0
  179. flwr/proto/simulationio_pb2.py +38 -0
  180. flwr/proto/simulationio_pb2.pyi +65 -0
  181. flwr/proto/simulationio_pb2_grpc.py +239 -0
  182. flwr/proto/simulationio_pb2_grpc.pyi +94 -0
  183. flwr/proto/task_pb2.py +7 -8
  184. flwr/proto/task_pb2.pyi +8 -5
  185. flwr/proto/transport_pb2.py +8 -8
  186. flwr/proto/transport_pb2.pyi +9 -6
  187. flwr/server/__init__.py +2 -10
  188. flwr/server/app.py +579 -402
  189. flwr/server/client_manager.py +8 -6
  190. flwr/server/compat/app.py +6 -62
  191. flwr/server/compat/app_utils.py +14 -8
  192. flwr/server/compat/driver_client_proxy.py +25 -58
  193. flwr/server/compat/legacy_context.py +5 -4
  194. flwr/server/driver/__init__.py +2 -0
  195. flwr/server/driver/driver.py +36 -131
  196. flwr/server/driver/grpc_driver.py +217 -81
  197. flwr/server/driver/inmemory_driver.py +182 -0
  198. flwr/server/history.py +28 -29
  199. flwr/server/run_serverapp.py +15 -126
  200. flwr/server/server.py +50 -44
  201. flwr/server/server_app.py +59 -10
  202. flwr/server/serverapp/__init__.py +22 -0
  203. flwr/server/serverapp/app.py +256 -0
  204. flwr/server/serverapp_components.py +52 -0
  205. flwr/server/strategy/__init__.py +2 -2
  206. flwr/server/strategy/aggregate.py +37 -23
  207. flwr/server/strategy/bulyan.py +9 -9
  208. flwr/server/strategy/dp_adaptive_clipping.py +25 -25
  209. flwr/server/strategy/dp_fixed_clipping.py +23 -22
  210. flwr/server/strategy/dpfedavg_adaptive.py +8 -8
  211. flwr/server/strategy/dpfedavg_fixed.py +13 -12
  212. flwr/server/strategy/fault_tolerant_fedavg.py +11 -11
  213. flwr/server/strategy/fedadagrad.py +9 -9
  214. flwr/server/strategy/fedadam.py +20 -10
  215. flwr/server/strategy/fedavg.py +16 -16
  216. flwr/server/strategy/fedavg_android.py +17 -17
  217. flwr/server/strategy/fedavgm.py +9 -9
  218. flwr/server/strategy/fedmedian.py +5 -5
  219. flwr/server/strategy/fedopt.py +6 -6
  220. flwr/server/strategy/fedprox.py +7 -7
  221. flwr/server/strategy/fedtrimmedavg.py +8 -8
  222. flwr/server/strategy/fedxgb_bagging.py +12 -12
  223. flwr/server/strategy/fedxgb_cyclic.py +10 -10
  224. flwr/server/strategy/fedxgb_nn_avg.py +6 -6
  225. flwr/server/strategy/fedyogi.py +9 -9
  226. flwr/server/strategy/krum.py +9 -9
  227. flwr/server/strategy/qfedavg.py +16 -16
  228. flwr/server/strategy/strategy.py +10 -10
  229. flwr/server/superlink/driver/__init__.py +2 -2
  230. flwr/server/superlink/driver/serverappio_grpc.py +61 -0
  231. flwr/server/superlink/driver/serverappio_servicer.py +363 -0
  232. flwr/server/superlink/ffs/__init__.py +24 -0
  233. flwr/server/superlink/ffs/disk_ffs.py +108 -0
  234. flwr/server/superlink/ffs/ffs.py +79 -0
  235. flwr/server/superlink/ffs/ffs_factory.py +47 -0
  236. flwr/server/superlink/fleet/__init__.py +1 -1
  237. flwr/server/superlink/fleet/grpc_adapter/__init__.py +15 -0
  238. flwr/server/superlink/fleet/grpc_adapter/grpc_adapter_servicer.py +162 -0
  239. flwr/server/superlink/fleet/grpc_bidi/__init__.py +1 -1
  240. flwr/server/superlink/fleet/grpc_bidi/flower_service_servicer.py +4 -2
  241. flwr/server/superlink/fleet/grpc_bidi/grpc_bridge.py +3 -2
  242. flwr/server/superlink/fleet/grpc_bidi/grpc_client_proxy.py +1 -1
  243. flwr/server/superlink/fleet/grpc_bidi/grpc_server.py +5 -154
  244. flwr/server/superlink/fleet/grpc_rere/__init__.py +1 -1
  245. flwr/server/superlink/fleet/grpc_rere/fleet_servicer.py +120 -13
  246. flwr/server/superlink/fleet/grpc_rere/server_interceptor.py +228 -0
  247. flwr/server/superlink/fleet/message_handler/__init__.py +1 -1
  248. flwr/server/superlink/fleet/message_handler/message_handler.py +153 -9
  249. flwr/server/superlink/fleet/rest_rere/__init__.py +1 -1
  250. flwr/server/superlink/fleet/rest_rere/rest_api.py +119 -81
  251. flwr/server/superlink/fleet/vce/__init__.py +1 -0
  252. flwr/server/superlink/fleet/vce/backend/__init__.py +4 -4
  253. flwr/server/superlink/fleet/vce/backend/backend.py +8 -9
  254. flwr/server/superlink/fleet/vce/backend/raybackend.py +87 -68
  255. flwr/server/superlink/fleet/vce/vce_api.py +208 -146
  256. flwr/server/superlink/linkstate/__init__.py +28 -0
  257. flwr/server/superlink/linkstate/in_memory_linkstate.py +581 -0
  258. flwr/server/superlink/linkstate/linkstate.py +389 -0
  259. flwr/server/superlink/{state/state_factory.py → linkstate/linkstate_factory.py} +19 -10
  260. flwr/server/superlink/linkstate/sqlite_linkstate.py +1236 -0
  261. flwr/server/superlink/linkstate/utils.py +389 -0
  262. flwr/server/superlink/simulation/__init__.py +15 -0
  263. flwr/server/superlink/simulation/simulationio_grpc.py +65 -0
  264. flwr/server/superlink/simulation/simulationio_servicer.py +186 -0
  265. flwr/server/superlink/utils.py +65 -0
  266. flwr/server/typing.py +2 -0
  267. flwr/server/utils/__init__.py +1 -1
  268. flwr/server/utils/tensorboard.py +5 -5
  269. flwr/server/utils/validator.py +31 -11
  270. flwr/server/workflow/default_workflows.py +70 -26
  271. flwr/server/workflow/secure_aggregation/secagg_workflow.py +1 -0
  272. flwr/server/workflow/secure_aggregation/secaggplus_workflow.py +40 -27
  273. flwr/simulation/__init__.py +12 -5
  274. flwr/simulation/app.py +247 -315
  275. flwr/simulation/legacy_app.py +402 -0
  276. flwr/simulation/ray_transport/__init__.py +1 -1
  277. flwr/simulation/ray_transport/ray_actor.py +42 -67
  278. flwr/simulation/ray_transport/ray_client_proxy.py +37 -17
  279. flwr/simulation/ray_transport/utils.py +1 -0
  280. flwr/simulation/run_simulation.py +306 -163
  281. flwr/simulation/simulationio_connection.py +89 -0
  282. flwr/superexec/__init__.py +15 -0
  283. flwr/superexec/app.py +59 -0
  284. flwr/superexec/deployment.py +188 -0
  285. flwr/superexec/exec_grpc.py +80 -0
  286. flwr/superexec/exec_servicer.py +231 -0
  287. flwr/superexec/exec_user_auth_interceptor.py +101 -0
  288. flwr/superexec/executor.py +96 -0
  289. flwr/superexec/simulation.py +124 -0
  290. {flwr_nightly-1.8.0.dev20240314.dist-info → flwr_nightly-1.15.0.dev20250114.dist-info}/METADATA +33 -26
  291. flwr_nightly-1.15.0.dev20250114.dist-info/RECORD +328 -0
  292. flwr_nightly-1.15.0.dev20250114.dist-info/entry_points.txt +12 -0
  293. flwr/cli/flower_toml.py +0 -140
  294. flwr/cli/new/templates/app/flower.toml.tpl +0 -13
  295. flwr/cli/new/templates/app/requirements.numpy.txt.tpl +0 -2
  296. flwr/cli/new/templates/app/requirements.pytorch.txt.tpl +0 -4
  297. flwr/cli/new/templates/app/requirements.tensorflow.txt.tpl +0 -4
  298. flwr/client/node_state.py +0 -48
  299. flwr/client/node_state_tests.py +0 -65
  300. flwr/proto/driver_pb2.py +0 -44
  301. flwr/proto/driver_pb2_grpc.py +0 -169
  302. flwr/proto/driver_pb2_grpc.pyi +0 -66
  303. flwr/server/superlink/driver/driver_grpc.py +0 -54
  304. flwr/server/superlink/driver/driver_servicer.py +0 -129
  305. flwr/server/superlink/state/in_memory_state.py +0 -230
  306. flwr/server/superlink/state/sqlite_state.py +0 -630
  307. flwr/server/superlink/state/state.py +0 -154
  308. flwr_nightly-1.8.0.dev20240314.dist-info/RECORD +0 -211
  309. flwr_nightly-1.8.0.dev20240314.dist-info/entry_points.txt +0 -9
  310. {flwr_nightly-1.8.0.dev20240314.dist-info → flwr_nightly-1.15.0.dev20250114.dist-info}/LICENSE +0 -0
  311. {flwr_nightly-1.8.0.dev20240314.dist-info → flwr_nightly-1.15.0.dev20250114.dist-info}/WHEEL +0 -0
flwr/simulation/app.py CHANGED
@@ -1,4 +1,4 @@
1
- # Copyright 2020 Flower Labs GmbH. All Rights Reserved.
1
+ # Copyright 2021 Flower Labs GmbH. All Rights Reserved.
2
2
  #
3
3
  # Licensed under the Apache License, Version 2.0 (the "License");
4
4
  # you may not use this file except in compliance with the License.
@@ -12,340 +12,272 @@
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
14
  # ==============================================================================
15
- """Flower simulation app."""
15
+ """Flower Simulation process."""
16
16
 
17
17
 
18
+ import argparse
18
19
  import sys
19
- import threading
20
- import traceback
21
- import warnings
22
- from logging import ERROR, INFO
23
- from typing import Any, Dict, List, Optional, Type, Union
24
-
25
- import ray
26
- from ray.util.scheduling_strategies import NodeAffinitySchedulingStrategy
27
-
28
- from flwr.client import ClientFn
20
+ from logging import DEBUG, ERROR, INFO
21
+ from queue import Queue
22
+ from time import sleep
23
+ from typing import Optional
24
+
25
+ from flwr.cli.config_utils import get_fab_metadata
26
+ from flwr.cli.install import install_from_fab
27
+ from flwr.cli.utils import get_sha256_hash
29
28
  from flwr.common import EventType, event
30
- from flwr.common.logger import log
31
- from flwr.server.client_manager import ClientManager
32
- from flwr.server.history import History
33
- from flwr.server.server import Server, init_defaults, run_fl
34
- from flwr.server.server_config import ServerConfig
35
- from flwr.server.strategy import Strategy
36
- from flwr.simulation.ray_transport.ray_actor import (
37
- ClientAppActor,
38
- VirtualClientEngineActor,
39
- VirtualClientEngineActorPool,
40
- pool_size_from_resources,
29
+ from flwr.common.args import add_args_flwr_app_common
30
+ from flwr.common.config import (
31
+ get_flwr_dir,
32
+ get_fused_config_from_dir,
33
+ get_project_config,
34
+ get_project_dir,
35
+ unflatten_dict,
41
36
  )
42
- from flwr.simulation.ray_transport.ray_client_proxy import RayActorClientProxy
43
-
44
- INVALID_ARGUMENTS_START_SIMULATION = """
45
- INVALID ARGUMENTS ERROR
46
-
47
- Invalid Arguments in method:
48
-
49
- `start_simulation(
50
- *,
51
- client_fn: ClientFn,
52
- num_clients: Optional[int] = None,
53
- clients_ids: Optional[List[str]] = None,
54
- client_resources: Optional[Dict[str, float]] = None,
55
- server: Optional[Server] = None,
56
- config: ServerConfig = None,
57
- strategy: Optional[Strategy] = None,
58
- client_manager: Optional[ClientManager] = None,
59
- ray_init_args: Optional[Dict[str, Any]] = None,
60
- ) -> None:`
61
-
62
- REASON:
63
- Method requires:
64
- - Either `num_clients`[int] or `clients_ids`[List[str]]
65
- to be set exclusively.
66
- OR
67
- - `len(clients_ids)` == `num_clients`
68
-
69
- """
70
-
71
-
72
- # pylint: disable=too-many-arguments,too-many-statements,too-many-branches
73
- def start_simulation(
74
- *,
75
- client_fn: ClientFn,
76
- num_clients: Optional[int] = None,
77
- clients_ids: Optional[List[str]] = None,
78
- client_resources: Optional[Dict[str, float]] = None,
79
- server: Optional[Server] = None,
80
- config: Optional[ServerConfig] = None,
81
- strategy: Optional[Strategy] = None,
82
- client_manager: Optional[ClientManager] = None,
83
- ray_init_args: Optional[Dict[str, Any]] = None,
84
- keep_initialised: Optional[bool] = False,
85
- actor_type: Type[VirtualClientEngineActor] = ClientAppActor,
86
- actor_kwargs: Optional[Dict[str, Any]] = None,
87
- actor_scheduling: Union[str, NodeAffinitySchedulingStrategy] = "DEFAULT",
88
- ) -> History:
89
- """Start a Ray-based Flower simulation server.
90
-
91
- Parameters
92
- ----------
93
- client_fn : ClientFn
94
- A function creating client instances. The function must take a single
95
- `str` argument called `cid`. It should return a single client instance
96
- of type Client. Note that the created client instances are ephemeral
97
- and will often be destroyed after a single method invocation. Since client
98
- instances are not long-lived, they should not attempt to carry state over
99
- method invocations. Any state required by the instance (model, dataset,
100
- hyperparameters, ...) should be (re-)created in either the call to `client_fn`
101
- or the call to any of the client methods (e.g., load evaluation data in the
102
- `evaluate` method itself).
103
- num_clients : Optional[int]
104
- The total number of clients in this simulation. This must be set if
105
- `clients_ids` is not set and vice-versa.
106
- clients_ids : Optional[List[str]]
107
- List `client_id`s for each client. This is only required if
108
- `num_clients` is not set. Setting both `num_clients` and `clients_ids`
109
- with `len(clients_ids)` not equal to `num_clients` generates an error.
110
- client_resources : Optional[Dict[str, float]] (default: `{"num_cpus": 1, "num_gpus": 0.0}`)
111
- CPU and GPU resources for a single client. Supported keys
112
- are `num_cpus` and `num_gpus`. To understand the GPU utilization caused by
113
- `num_gpus`, as well as using custom resources, please consult the Ray
114
- documentation.
115
- server : Optional[flwr.server.Server] (default: None).
116
- An implementation of the abstract base class `flwr.server.Server`. If no
117
- instance is provided, then `start_server` will create one.
118
- config: ServerConfig (default: None).
119
- Currently supported values are `num_rounds` (int, default: 1) and
120
- `round_timeout` in seconds (float, default: None).
121
- strategy : Optional[flwr.server.Strategy] (default: None)
122
- An implementation of the abstract base class `flwr.server.Strategy`. If
123
- no strategy is provided, then `start_server` will use
124
- `flwr.server.strategy.FedAvg`.
125
- client_manager : Optional[flwr.server.ClientManager] (default: None)
126
- An implementation of the abstract base class `flwr.server.ClientManager`.
127
- If no implementation is provided, then `start_simulation` will use
128
- `flwr.server.client_manager.SimpleClientManager`.
129
- ray_init_args : Optional[Dict[str, Any]] (default: None)
130
- Optional dictionary containing arguments for the call to `ray.init`.
131
- If ray_init_args is None (the default), Ray will be initialized with
132
- the following default args:
133
-
134
- { "ignore_reinit_error": True, "include_dashboard": False }
135
-
136
- An empty dictionary can be used (ray_init_args={}) to prevent any
137
- arguments from being passed to ray.init.
138
- keep_initialised: Optional[bool] (default: False)
139
- Set to True to prevent `ray.shutdown()` in case `ray.is_initialized()=True`.
140
-
141
- actor_type: VirtualClientEngineActor (default: ClientAppActor)
142
- Optionally specify the type of actor to use. The actor object, which
143
- persists throughout the simulation, will be the process in charge of
144
- executing a ClientApp wrapping input argument `client_fn`.
145
-
146
- actor_kwargs: Optional[Dict[str, Any]] (default: None)
147
- If you want to create your own Actor classes, you might need to pass
148
- some input argument. You can use this dictionary for such purpose.
149
-
150
- actor_scheduling: Optional[Union[str, NodeAffinitySchedulingStrategy]]
151
- (default: "DEFAULT")
152
- Optional string ("DEFAULT" or "SPREAD") for the VCE to choose in which
153
- node the actor is placed. If you are an advanced user needed more control
154
- you can use lower-level scheduling strategies to pin actors to specific
155
- compute nodes (e.g. via NodeAffinitySchedulingStrategy). Please note this
156
- is an advanced feature. For all details, please refer to the Ray documentation:
157
- https://docs.ray.io/en/latest/ray-core/scheduling/index.html
158
-
159
- Returns
160
- -------
161
- hist : flwr.server.history.History
162
- Object containing metrics from training.
163
- """ # noqa: E501
164
- # pylint: disable-msg=too-many-locals
165
- event(
166
- EventType.START_SIMULATION_ENTER,
167
- {"num_clients": len(clients_ids) if clients_ids is not None else num_clients},
168
- )
37
+ from flwr.common.constant import (
38
+ SIMULATIONIO_API_DEFAULT_CLIENT_ADDRESS,
39
+ Status,
40
+ SubStatus,
41
+ )
42
+ from flwr.common.logger import (
43
+ log,
44
+ mirror_output_to_queue,
45
+ restore_output,
46
+ start_log_uploader,
47
+ stop_log_uploader,
48
+ )
49
+ from flwr.common.serde import (
50
+ configs_record_from_proto,
51
+ context_from_proto,
52
+ context_to_proto,
53
+ fab_from_proto,
54
+ run_from_proto,
55
+ run_status_to_proto,
56
+ )
57
+ from flwr.common.typing import RunStatus
58
+ from flwr.proto.run_pb2 import ( # pylint: disable=E0611
59
+ GetFederationOptionsRequest,
60
+ GetFederationOptionsResponse,
61
+ UpdateRunStatusRequest,
62
+ )
63
+ from flwr.proto.simulationio_pb2 import ( # pylint: disable=E0611
64
+ PullSimulationInputsRequest,
65
+ PullSimulationInputsResponse,
66
+ PushSimulationOutputsRequest,
67
+ )
68
+ from flwr.server.superlink.fleet.vce.backend.backend import BackendConfig
69
+ from flwr.simulation.run_simulation import _run_simulation
70
+ from flwr.simulation.simulationio_connection import SimulationIoConnection
169
71
 
170
- # Initialize server and server config
171
- initialized_server, initialized_config = init_defaults(
172
- server=server,
173
- config=config,
174
- strategy=strategy,
175
- client_manager=client_manager,
176
- )
177
72
 
178
- log(
179
- INFO,
180
- "Starting Flower simulation, config: %s",
181
- initialized_config,
182
- )
73
+ def flwr_simulation() -> None:
74
+ """Run process-isolated Flower Simulation."""
75
+ # Capture stdout/stderr
76
+ log_queue: Queue[Optional[str]] = Queue()
77
+ mirror_output_to_queue(log_queue)
183
78
 
184
- # clients_ids takes precedence
185
- cids: List[str]
186
- if clients_ids is not None:
187
- if (num_clients is not None) and (len(clients_ids) != num_clients):
188
- log(ERROR, INVALID_ARGUMENTS_START_SIMULATION)
189
- sys.exit()
190
- else:
191
- cids = clients_ids
192
- else:
193
- if num_clients is None:
194
- log(ERROR, INVALID_ARGUMENTS_START_SIMULATION)
195
- sys.exit()
196
- else:
197
- cids = [str(x) for x in range(num_clients)]
198
-
199
- # Default arguments for Ray initialization
200
- if not ray_init_args:
201
- ray_init_args = {
202
- "ignore_reinit_error": True,
203
- "include_dashboard": False,
204
- }
205
-
206
- # Shut down Ray if it has already been initialized (unless asked not to)
207
- if ray.is_initialized() and not keep_initialised:
208
- ray.shutdown()
209
-
210
- # Initialize Ray
211
- ray.init(**ray_init_args)
212
- cluster_resources = ray.cluster_resources()
213
- log(
214
- INFO,
215
- "Flower VCE: Ray initialized with resources: %s",
216
- cluster_resources,
217
- )
79
+ args = _parse_args_run_flwr_simulation().parse_args()
218
80
 
219
- log(
220
- INFO,
221
- "Optimize your simulation with Flower VCE: "
222
- "https://flower.ai/docs/framework/how-to-run-simulations.html",
223
- )
81
+ log(INFO, "Starting Flower Simulation")
224
82
 
225
- # Log the resources that a single client will be able to use
226
- if client_resources is None:
83
+ if not args.insecure:
227
84
  log(
228
- INFO,
229
- "No `client_resources` specified. Using minimal resources for clients.",
230
- )
231
- client_resources = {"num_cpus": 1, "num_gpus": 0.0}
232
-
233
- # Each client needs at the very least one CPU
234
- if "num_cpus" not in client_resources:
235
- warnings.warn(
236
- "No `num_cpus` specified in `client_resources`. "
237
- "Using `num_cpus=1` for each client.",
238
- stacklevel=2,
85
+ ERROR,
86
+ "`flwr-simulation` does not support TLS yet. "
87
+ "Please use the '--insecure' flag.",
239
88
  )
240
- client_resources["num_cpus"] = 1
89
+ sys.exit(1)
241
90
 
242
91
  log(
243
- INFO,
244
- "Flower VCE: Resources for each Virtual Client: %s",
245
- client_resources,
92
+ DEBUG,
93
+ "Starting isolated `Simulation` connected to SuperLink SimulationAppIo API "
94
+ "at %s",
95
+ args.simulationio_api_address,
96
+ )
97
+ run_simulation_process(
98
+ simulationio_api_address=args.simulationio_api_address,
99
+ log_queue=log_queue,
100
+ run_once=args.run_once,
101
+ flwr_dir_=args.flwr_dir,
102
+ certificates=None,
246
103
  )
247
104
 
248
- actor_args = {} if actor_kwargs is None else actor_kwargs
249
-
250
- # An actor factory. This is called N times to add N actors
251
- # to the pool. If at some point the pool can accommodate more actors
252
- # this will be called again.
253
- def create_actor_fn() -> Type[VirtualClientEngineActor]:
254
- return actor_type.options( # type: ignore
255
- **client_resources,
256
- scheduling_strategy=actor_scheduling,
257
- ).remote(**actor_args)
258
-
259
- # Instantiate ActorPool
260
- pool = VirtualClientEngineActorPool(
261
- create_actor_fn=create_actor_fn,
262
- client_resources=client_resources,
105
+ # Restore stdout/stderr
106
+ restore_output()
107
+
108
+
109
+ def run_simulation_process( # pylint: disable=R0914, disable=W0212, disable=R0915
110
+ simulationio_api_address: str,
111
+ log_queue: Queue[Optional[str]],
112
+ run_once: bool,
113
+ flwr_dir_: Optional[str] = None,
114
+ certificates: Optional[bytes] = None,
115
+ ) -> None:
116
+ """Run Flower Simulation process."""
117
+ conn = SimulationIoConnection(
118
+ simulationio_service_address=simulationio_api_address,
119
+ root_certificates=certificates,
263
120
  )
264
121
 
265
- f_stop = threading.Event()
266
-
267
- # Periodically, check if the cluster has grown (i.e. a new
268
- # node has been added). If this happens, we likely want to grow
269
- # the actor pool by adding more Actors to it.
270
- def update_resources(f_stop: threading.Event) -> None:
271
- """Periodically check if more actors can be added to the pool.
272
-
273
- If so, extend the pool.
274
- """
275
- if not f_stop.is_set():
276
- num_max_actors = pool_size_from_resources(client_resources)
277
- if num_max_actors > pool.num_actors:
278
- num_new = num_max_actors - pool.num_actors
279
- log(
280
- INFO, "The cluster expanded. Adding %s actors to the pool.", num_new
122
+ # Resolve directory where FABs are installed
123
+ flwr_dir = get_flwr_dir(flwr_dir_)
124
+ log_uploader = None
125
+
126
+ while True:
127
+
128
+ try:
129
+ # Pull SimulationInputs from LinkState
130
+ req = PullSimulationInputsRequest()
131
+ res: PullSimulationInputsResponse = conn._stub.PullSimulationInputs(req)
132
+ if not res.HasField("run"):
133
+ sleep(3)
134
+ run_status = None
135
+ continue
136
+
137
+ context = context_from_proto(res.context)
138
+ run = run_from_proto(res.run)
139
+ fab = fab_from_proto(res.fab)
140
+
141
+ # Start log uploader for this run
142
+ log_uploader = start_log_uploader(
143
+ log_queue=log_queue,
144
+ node_id=context.node_id,
145
+ run_id=run.run_id,
146
+ stub=conn._stub,
147
+ )
148
+
149
+ log(DEBUG, "Simulation process starts FAB installation.")
150
+ install_from_fab(fab.content, flwr_dir=flwr_dir, skip_prompt=True)
151
+
152
+ fab_id, fab_version = get_fab_metadata(fab.content)
153
+
154
+ app_path = get_project_dir(fab_id, fab_version, fab.hash_str, flwr_dir)
155
+ config = get_project_config(app_path)
156
+
157
+ # Get ClientApp and SeverApp components
158
+ app_components = config["tool"]["flwr"]["app"]["components"]
159
+ client_app_attr = app_components["clientapp"]
160
+ server_app_attr = app_components["serverapp"]
161
+ fused_config = get_fused_config_from_dir(app_path, run.override_config)
162
+
163
+ # Update run_config in context
164
+ context.run_config = fused_config
165
+
166
+ log(
167
+ DEBUG,
168
+ "Flower will load ServerApp `%s` in %s",
169
+ server_app_attr,
170
+ app_path,
171
+ )
172
+ log(
173
+ DEBUG,
174
+ "Flower will load ClientApp `%s` in %s",
175
+ client_app_attr,
176
+ app_path,
177
+ )
178
+
179
+ # Change status to Running
180
+ run_status_proto = run_status_to_proto(RunStatus(Status.RUNNING, "", ""))
181
+ conn._stub.UpdateRunStatus(
182
+ UpdateRunStatusRequest(run_id=run.run_id, run_status=run_status_proto)
183
+ )
184
+
185
+ # Pull Federation Options
186
+ fed_opt_res: GetFederationOptionsResponse = conn._stub.GetFederationOptions(
187
+ GetFederationOptionsRequest(run_id=run.run_id)
188
+ )
189
+ federation_options = configs_record_from_proto(
190
+ fed_opt_res.federation_options
191
+ )
192
+
193
+ # Unflatten underlying dict
194
+ fed_opt = unflatten_dict({**federation_options})
195
+
196
+ # Extract configs values of interest
197
+ num_supernodes = fed_opt.get("num-supernodes")
198
+ if num_supernodes is None:
199
+ raise ValueError(
200
+ "Federation options expects `num-supernodes` to be set."
201
+ )
202
+ backend_config: BackendConfig = fed_opt.get("backend", {})
203
+ verbose: bool = fed_opt.get("verbose", False)
204
+ enable_tf_gpu_growth: bool = fed_opt.get("enable_tf_gpu_growth", False)
205
+
206
+ event(
207
+ EventType.FLWR_SIMULATION_RUN_ENTER,
208
+ event_details={
209
+ "backend": "ray",
210
+ "num-supernodes": num_supernodes,
211
+ "run-id-hash": get_sha256_hash(run.run_id),
212
+ },
213
+ )
214
+
215
+ # Launch the simulation
216
+ updated_context = _run_simulation(
217
+ server_app_attr=server_app_attr,
218
+ client_app_attr=client_app_attr,
219
+ num_supernodes=num_supernodes,
220
+ backend_config=backend_config,
221
+ app_dir=str(app_path),
222
+ run=run,
223
+ enable_tf_gpu_growth=enable_tf_gpu_growth,
224
+ verbose_logging=verbose,
225
+ server_app_run_config=fused_config,
226
+ is_app=True,
227
+ exit_event=EventType.FLWR_SIMULATION_RUN_LEAVE,
228
+ )
229
+
230
+ # Send resulting context
231
+ context_proto = context_to_proto(updated_context)
232
+ out_req = PushSimulationOutputsRequest(
233
+ run_id=run.run_id, context=context_proto
234
+ )
235
+ _ = conn._stub.PushSimulationOutputs(out_req)
236
+
237
+ run_status = RunStatus(Status.FINISHED, SubStatus.COMPLETED, "")
238
+
239
+ except Exception as ex: # pylint: disable=broad-exception-caught
240
+ exc_entity = "Simulation"
241
+ log(ERROR, "%s raised an exception", exc_entity, exc_info=ex)
242
+ run_status = RunStatus(Status.FINISHED, SubStatus.FAILED, str(ex))
243
+
244
+ finally:
245
+ # Stop log uploader for this run and upload final logs
246
+ if log_uploader:
247
+ stop_log_uploader(log_queue, log_uploader)
248
+ log_uploader = None
249
+
250
+ # Update run status
251
+ if run_status:
252
+ run_status_proto = run_status_to_proto(run_status)
253
+ conn._stub.UpdateRunStatus(
254
+ UpdateRunStatusRequest(
255
+ run_id=run.run_id, run_status=run_status_proto
256
+ )
281
257
  )
282
- pool.add_actors_to_pool(num_actors=num_new)
283
258
 
284
- threading.Timer(10, update_resources, [f_stop]).start()
259
+ # Stop the loop if `flwr-simulation` is expected to process a single run
260
+ if run_once:
261
+ break
285
262
 
286
- update_resources(f_stop)
287
263
 
288
- log(
289
- INFO,
290
- "Flower VCE: Creating %s with %s actors",
291
- pool.__class__.__name__,
292
- pool.num_actors,
264
+ def _parse_args_run_flwr_simulation() -> argparse.ArgumentParser:
265
+ """Parse flwr-simulation command line arguments."""
266
+ parser = argparse.ArgumentParser(
267
+ description="Run a Flower Simulation",
293
268
  )
294
-
295
- # Register one RayClientProxy object for each client with the ClientManager
296
- for cid in cids:
297
- client_proxy = RayActorClientProxy(
298
- client_fn=client_fn,
299
- cid=cid,
300
- actor_pool=pool,
301
- )
302
- initialized_server.client_manager().register(client=client_proxy)
303
-
304
- hist = History()
305
- # pylint: disable=broad-except
306
- try:
307
- # Start training
308
- hist = run_fl(
309
- server=initialized_server,
310
- config=initialized_config,
311
- )
312
- except Exception as ex:
313
- log(ERROR, ex)
314
- log(ERROR, traceback.format_exc())
315
- log(
316
- ERROR,
317
- "Your simulation crashed :(. This could be because of several reasons. "
318
- "The most common are: "
319
- "\n\t > Sometimes, issues in the simulation code itself can cause crashes. "
320
- "It's always a good idea to double-check your code for any potential bugs "
321
- "or inconsistencies that might be contributing to the problem. "
322
- "For example: "
323
- "\n\t\t - You might be using a class attribute in your clients that "
324
- "hasn't been defined."
325
- "\n\t\t - There could be an incorrect method call to a 3rd party library "
326
- "(e.g., PyTorch)."
327
- "\n\t\t - The return types of methods in your clients/strategies might be "
328
- "incorrect."
329
- "\n\t > Your system couldn't fit a single VirtualClient: try lowering "
330
- "`client_resources`."
331
- "\n\t > All the actors in your pool crashed. This could be because: "
332
- "\n\t\t - You clients hit an out-of-memory (OOM) error and actors couldn't "
333
- "recover from it. Try launching your simulation with more generous "
334
- "`client_resources` setting (i.e. it seems %s is "
335
- "not enough for your run). Use fewer concurrent actors. "
336
- "\n\t\t - You were running a multi-node simulation and all worker nodes "
337
- "disconnected. The head node might still be alive but cannot accommodate "
338
- "any actor with resources: %s."
339
- "\nTake a look at the Flower simulation examples for guidance "
340
- "<https://flower.ai/docs/framework/how-to-run-simulations.html>.",
341
- client_resources,
342
- client_resources,
343
- )
344
- raise RuntimeError("Simulation crashed.") from ex
345
-
346
- finally:
347
- # Stop time monitoring resources in cluster
348
- f_stop.set()
349
- event(EventType.START_SIMULATION_LEAVE)
350
-
351
- return hist
269
+ parser.add_argument(
270
+ "--simulationio-api-address",
271
+ default=SIMULATIONIO_API_DEFAULT_CLIENT_ADDRESS,
272
+ type=str,
273
+ help="Address of SuperLink's SimulationIO API (IPv4, IPv6, or a domain name)."
274
+ f"By default, it is set to {SIMULATIONIO_API_DEFAULT_CLIENT_ADDRESS}.",
275
+ )
276
+ parser.add_argument(
277
+ "--run-once",
278
+ action="store_true",
279
+ help="When set, this process will start a single simulation "
280
+ "for a pending Run. If no pending run the process will exit. ",
281
+ )
282
+ add_args_flwr_app_common(parser=parser)
283
+ return parser