flwr-nightly 1.8.0.dev20240315__py3-none-any.whl → 1.15.0.dev20250115__py3-none-any.whl

Sign up to get free protection for your applications and to get access to all the features.
Files changed (312) hide show
  1. flwr/cli/app.py +16 -2
  2. flwr/cli/build.py +181 -0
  3. flwr/cli/cli_user_auth_interceptor.py +90 -0
  4. flwr/cli/config_utils.py +343 -0
  5. flwr/cli/example.py +4 -1
  6. flwr/cli/install.py +253 -0
  7. flwr/cli/log.py +182 -0
  8. flwr/{server/superlink/state → cli/login}/__init__.py +4 -10
  9. flwr/cli/login/login.py +88 -0
  10. flwr/cli/ls.py +327 -0
  11. flwr/cli/new/__init__.py +1 -0
  12. flwr/cli/new/new.py +210 -66
  13. flwr/cli/new/templates/app/.gitignore.tpl +163 -0
  14. flwr/cli/new/templates/app/LICENSE.tpl +202 -0
  15. flwr/cli/new/templates/app/README.baseline.md.tpl +127 -0
  16. flwr/cli/new/templates/app/README.flowertune.md.tpl +66 -0
  17. flwr/cli/new/templates/app/README.md.tpl +16 -32
  18. flwr/cli/new/templates/app/code/__init__.baseline.py.tpl +1 -0
  19. flwr/cli/new/templates/app/code/__init__.py.tpl +1 -1
  20. flwr/cli/new/templates/app/code/client.baseline.py.tpl +58 -0
  21. flwr/cli/new/templates/app/code/client.huggingface.py.tpl +55 -0
  22. flwr/cli/new/templates/app/code/client.jax.py.tpl +50 -0
  23. flwr/cli/new/templates/app/code/client.mlx.py.tpl +73 -0
  24. flwr/cli/new/templates/app/code/client.numpy.py.tpl +7 -7
  25. flwr/cli/new/templates/app/code/client.pytorch.py.tpl +30 -21
  26. flwr/cli/new/templates/app/code/client.sklearn.py.tpl +63 -0
  27. flwr/cli/new/templates/app/code/client.tensorflow.py.tpl +57 -1
  28. flwr/cli/new/templates/app/code/dataset.baseline.py.tpl +36 -0
  29. flwr/cli/new/templates/app/code/flwr_tune/__init__.py +15 -0
  30. flwr/cli/new/templates/app/code/flwr_tune/client_app.py.tpl +126 -0
  31. flwr/cli/new/templates/app/code/flwr_tune/dataset.py.tpl +87 -0
  32. flwr/cli/new/templates/app/code/flwr_tune/models.py.tpl +78 -0
  33. flwr/cli/new/templates/app/code/flwr_tune/server_app.py.tpl +94 -0
  34. flwr/cli/new/templates/app/code/flwr_tune/strategy.py.tpl +83 -0
  35. flwr/cli/new/templates/app/code/model.baseline.py.tpl +80 -0
  36. flwr/cli/new/templates/app/code/server.baseline.py.tpl +46 -0
  37. flwr/cli/new/templates/app/code/server.huggingface.py.tpl +38 -0
  38. flwr/cli/new/templates/app/code/server.jax.py.tpl +26 -0
  39. flwr/cli/new/templates/app/code/server.mlx.py.tpl +31 -0
  40. flwr/cli/new/templates/app/code/server.numpy.py.tpl +22 -9
  41. flwr/cli/new/templates/app/code/server.pytorch.py.tpl +21 -18
  42. flwr/cli/new/templates/app/code/server.sklearn.py.tpl +36 -0
  43. flwr/cli/new/templates/app/code/server.tensorflow.py.tpl +29 -1
  44. flwr/cli/new/templates/app/code/strategy.baseline.py.tpl +1 -0
  45. flwr/cli/new/templates/app/code/task.huggingface.py.tpl +102 -0
  46. flwr/cli/new/templates/app/code/task.jax.py.tpl +57 -0
  47. flwr/cli/new/templates/app/code/task.mlx.py.tpl +102 -0
  48. flwr/cli/new/templates/app/code/task.numpy.py.tpl +7 -0
  49. flwr/cli/new/templates/app/code/task.pytorch.py.tpl +29 -24
  50. flwr/cli/new/templates/app/code/task.sklearn.py.tpl +67 -0
  51. flwr/cli/new/templates/app/code/task.tensorflow.py.tpl +53 -0
  52. flwr/cli/new/templates/app/code/utils.baseline.py.tpl +1 -0
  53. flwr/cli/new/templates/app/pyproject.baseline.toml.tpl +138 -0
  54. flwr/cli/new/templates/app/pyproject.flowertune.toml.tpl +68 -0
  55. flwr/cli/new/templates/app/pyproject.huggingface.toml.tpl +46 -0
  56. flwr/cli/new/templates/app/pyproject.jax.toml.tpl +35 -0
  57. flwr/cli/new/templates/app/pyproject.mlx.toml.tpl +39 -0
  58. flwr/cli/new/templates/app/pyproject.numpy.toml.tpl +25 -12
  59. flwr/cli/new/templates/app/pyproject.pytorch.toml.tpl +29 -14
  60. flwr/cli/new/templates/app/pyproject.sklearn.toml.tpl +35 -0
  61. flwr/cli/new/templates/app/pyproject.tensorflow.toml.tpl +29 -14
  62. flwr/cli/run/__init__.py +1 -0
  63. flwr/cli/run/run.py +212 -34
  64. flwr/cli/stop.py +130 -0
  65. flwr/cli/utils.py +240 -5
  66. flwr/client/__init__.py +3 -2
  67. flwr/client/app.py +432 -255
  68. flwr/client/client.py +1 -11
  69. flwr/client/client_app.py +74 -13
  70. flwr/client/clientapp/__init__.py +22 -0
  71. flwr/client/clientapp/app.py +259 -0
  72. flwr/client/clientapp/clientappio_servicer.py +244 -0
  73. flwr/client/clientapp/utils.py +115 -0
  74. flwr/client/dpfedavg_numpy_client.py +7 -8
  75. flwr/client/grpc_adapter_client/__init__.py +15 -0
  76. flwr/client/grpc_adapter_client/connection.py +98 -0
  77. flwr/client/grpc_client/connection.py +21 -7
  78. flwr/client/grpc_rere_client/__init__.py +1 -1
  79. flwr/client/grpc_rere_client/client_interceptor.py +176 -0
  80. flwr/client/grpc_rere_client/connection.py +163 -56
  81. flwr/client/grpc_rere_client/grpc_adapter.py +167 -0
  82. flwr/client/heartbeat.py +74 -0
  83. flwr/client/message_handler/__init__.py +1 -1
  84. flwr/client/message_handler/message_handler.py +10 -11
  85. flwr/client/mod/__init__.py +5 -5
  86. flwr/client/mod/centraldp_mods.py +4 -2
  87. flwr/client/mod/comms_mods.py +5 -4
  88. flwr/client/mod/localdp_mod.py +10 -5
  89. flwr/client/mod/secure_aggregation/__init__.py +1 -1
  90. flwr/client/mod/secure_aggregation/secaggplus_mod.py +26 -26
  91. flwr/client/mod/utils.py +2 -4
  92. flwr/client/nodestate/__init__.py +26 -0
  93. flwr/client/nodestate/in_memory_nodestate.py +38 -0
  94. flwr/client/nodestate/nodestate.py +31 -0
  95. flwr/client/nodestate/nodestate_factory.py +38 -0
  96. flwr/client/numpy_client.py +8 -31
  97. flwr/client/rest_client/__init__.py +1 -1
  98. flwr/client/rest_client/connection.py +199 -176
  99. flwr/client/run_info_store.py +112 -0
  100. flwr/client/supernode/__init__.py +24 -0
  101. flwr/client/supernode/app.py +321 -0
  102. flwr/client/typing.py +1 -0
  103. flwr/common/__init__.py +17 -11
  104. flwr/common/address.py +47 -3
  105. flwr/common/args.py +153 -0
  106. flwr/common/auth_plugin/__init__.py +24 -0
  107. flwr/common/auth_plugin/auth_plugin.py +121 -0
  108. flwr/common/config.py +243 -0
  109. flwr/common/constant.py +135 -1
  110. flwr/common/context.py +32 -2
  111. flwr/common/date.py +22 -4
  112. flwr/common/differential_privacy.py +2 -2
  113. flwr/common/dp.py +2 -4
  114. flwr/common/exit_handlers.py +3 -3
  115. flwr/common/grpc.py +164 -5
  116. flwr/common/logger.py +230 -12
  117. flwr/common/message.py +191 -106
  118. flwr/common/object_ref.py +179 -44
  119. flwr/common/pyproject.py +1 -0
  120. flwr/common/record/__init__.py +2 -1
  121. flwr/common/record/configsrecord.py +58 -18
  122. flwr/common/record/metricsrecord.py +57 -17
  123. flwr/common/record/parametersrecord.py +88 -20
  124. flwr/common/record/recordset.py +153 -30
  125. flwr/common/record/typeddict.py +30 -55
  126. flwr/common/recordset_compat.py +31 -12
  127. flwr/common/retry_invoker.py +123 -30
  128. flwr/common/secure_aggregation/__init__.py +1 -1
  129. flwr/common/secure_aggregation/crypto/__init__.py +1 -1
  130. flwr/common/secure_aggregation/crypto/shamir.py +11 -11
  131. flwr/common/secure_aggregation/crypto/symmetric_encryption.py +68 -4
  132. flwr/common/secure_aggregation/ndarrays_arithmetic.py +17 -17
  133. flwr/common/secure_aggregation/quantization.py +8 -8
  134. flwr/common/secure_aggregation/secaggplus_constants.py +1 -1
  135. flwr/common/secure_aggregation/secaggplus_utils.py +10 -12
  136. flwr/common/serde.py +304 -23
  137. flwr/common/telemetry.py +65 -29
  138. flwr/common/typing.py +120 -19
  139. flwr/common/version.py +17 -3
  140. flwr/proto/clientappio_pb2.py +45 -0
  141. flwr/proto/clientappio_pb2.pyi +132 -0
  142. flwr/proto/clientappio_pb2_grpc.py +135 -0
  143. flwr/proto/clientappio_pb2_grpc.pyi +53 -0
  144. flwr/proto/exec_pb2.py +62 -0
  145. flwr/proto/exec_pb2.pyi +212 -0
  146. flwr/proto/exec_pb2_grpc.py +237 -0
  147. flwr/proto/exec_pb2_grpc.pyi +93 -0
  148. flwr/proto/fab_pb2.py +31 -0
  149. flwr/proto/fab_pb2.pyi +65 -0
  150. flwr/proto/fab_pb2_grpc.py +4 -0
  151. flwr/proto/fab_pb2_grpc.pyi +4 -0
  152. flwr/proto/fleet_pb2.py +42 -23
  153. flwr/proto/fleet_pb2.pyi +123 -1
  154. flwr/proto/fleet_pb2_grpc.py +170 -0
  155. flwr/proto/fleet_pb2_grpc.pyi +61 -0
  156. flwr/proto/grpcadapter_pb2.py +32 -0
  157. flwr/proto/grpcadapter_pb2.pyi +43 -0
  158. flwr/proto/grpcadapter_pb2_grpc.py +66 -0
  159. flwr/proto/grpcadapter_pb2_grpc.pyi +24 -0
  160. flwr/proto/log_pb2.py +29 -0
  161. flwr/proto/log_pb2.pyi +39 -0
  162. flwr/proto/log_pb2_grpc.py +4 -0
  163. flwr/proto/log_pb2_grpc.pyi +4 -0
  164. flwr/proto/message_pb2.py +41 -0
  165. flwr/proto/message_pb2.pyi +128 -0
  166. flwr/proto/message_pb2_grpc.py +4 -0
  167. flwr/proto/message_pb2_grpc.pyi +4 -0
  168. flwr/proto/node_pb2.py +2 -2
  169. flwr/proto/node_pb2.pyi +1 -4
  170. flwr/proto/recordset_pb2.py +35 -33
  171. flwr/proto/recordset_pb2.pyi +40 -14
  172. flwr/proto/run_pb2.py +64 -0
  173. flwr/proto/run_pb2.pyi +268 -0
  174. flwr/proto/run_pb2_grpc.py +4 -0
  175. flwr/proto/run_pb2_grpc.pyi +4 -0
  176. flwr/proto/serverappio_pb2.py +52 -0
  177. flwr/proto/{driver_pb2.pyi → serverappio_pb2.pyi} +62 -20
  178. flwr/proto/serverappio_pb2_grpc.py +410 -0
  179. flwr/proto/serverappio_pb2_grpc.pyi +160 -0
  180. flwr/proto/simulationio_pb2.py +38 -0
  181. flwr/proto/simulationio_pb2.pyi +65 -0
  182. flwr/proto/simulationio_pb2_grpc.py +239 -0
  183. flwr/proto/simulationio_pb2_grpc.pyi +94 -0
  184. flwr/proto/task_pb2.py +7 -8
  185. flwr/proto/task_pb2.pyi +8 -5
  186. flwr/proto/transport_pb2.py +8 -8
  187. flwr/proto/transport_pb2.pyi +9 -6
  188. flwr/server/__init__.py +2 -10
  189. flwr/server/app.py +579 -402
  190. flwr/server/client_manager.py +8 -6
  191. flwr/server/compat/app.py +6 -62
  192. flwr/server/compat/app_utils.py +14 -9
  193. flwr/server/compat/driver_client_proxy.py +25 -59
  194. flwr/server/compat/legacy_context.py +5 -4
  195. flwr/server/driver/__init__.py +2 -0
  196. flwr/server/driver/driver.py +36 -131
  197. flwr/server/driver/grpc_driver.py +220 -81
  198. flwr/server/driver/inmemory_driver.py +183 -0
  199. flwr/server/history.py +28 -29
  200. flwr/server/run_serverapp.py +15 -126
  201. flwr/server/server.py +50 -44
  202. flwr/server/server_app.py +59 -10
  203. flwr/server/serverapp/__init__.py +22 -0
  204. flwr/server/serverapp/app.py +256 -0
  205. flwr/server/serverapp_components.py +52 -0
  206. flwr/server/strategy/__init__.py +2 -2
  207. flwr/server/strategy/aggregate.py +37 -23
  208. flwr/server/strategy/bulyan.py +9 -9
  209. flwr/server/strategy/dp_adaptive_clipping.py +25 -25
  210. flwr/server/strategy/dp_fixed_clipping.py +23 -22
  211. flwr/server/strategy/dpfedavg_adaptive.py +8 -8
  212. flwr/server/strategy/dpfedavg_fixed.py +13 -12
  213. flwr/server/strategy/fault_tolerant_fedavg.py +11 -11
  214. flwr/server/strategy/fedadagrad.py +9 -9
  215. flwr/server/strategy/fedadam.py +20 -10
  216. flwr/server/strategy/fedavg.py +16 -16
  217. flwr/server/strategy/fedavg_android.py +17 -17
  218. flwr/server/strategy/fedavgm.py +9 -9
  219. flwr/server/strategy/fedmedian.py +5 -5
  220. flwr/server/strategy/fedopt.py +6 -6
  221. flwr/server/strategy/fedprox.py +7 -7
  222. flwr/server/strategy/fedtrimmedavg.py +8 -8
  223. flwr/server/strategy/fedxgb_bagging.py +12 -12
  224. flwr/server/strategy/fedxgb_cyclic.py +10 -10
  225. flwr/server/strategy/fedxgb_nn_avg.py +6 -6
  226. flwr/server/strategy/fedyogi.py +9 -9
  227. flwr/server/strategy/krum.py +9 -9
  228. flwr/server/strategy/qfedavg.py +16 -16
  229. flwr/server/strategy/strategy.py +10 -10
  230. flwr/server/superlink/driver/__init__.py +2 -2
  231. flwr/server/superlink/driver/serverappio_grpc.py +61 -0
  232. flwr/server/superlink/driver/serverappio_servicer.py +361 -0
  233. flwr/server/superlink/ffs/__init__.py +24 -0
  234. flwr/server/superlink/ffs/disk_ffs.py +108 -0
  235. flwr/server/superlink/ffs/ffs.py +79 -0
  236. flwr/server/superlink/ffs/ffs_factory.py +47 -0
  237. flwr/server/superlink/fleet/__init__.py +1 -1
  238. flwr/server/superlink/fleet/grpc_adapter/__init__.py +15 -0
  239. flwr/server/superlink/fleet/grpc_adapter/grpc_adapter_servicer.py +162 -0
  240. flwr/server/superlink/fleet/grpc_bidi/__init__.py +1 -1
  241. flwr/server/superlink/fleet/grpc_bidi/flower_service_servicer.py +4 -2
  242. flwr/server/superlink/fleet/grpc_bidi/grpc_bridge.py +3 -2
  243. flwr/server/superlink/fleet/grpc_bidi/grpc_client_proxy.py +1 -1
  244. flwr/server/superlink/fleet/grpc_bidi/grpc_server.py +5 -154
  245. flwr/server/superlink/fleet/grpc_rere/__init__.py +1 -1
  246. flwr/server/superlink/fleet/grpc_rere/fleet_servicer.py +120 -13
  247. flwr/server/superlink/fleet/grpc_rere/server_interceptor.py +228 -0
  248. flwr/server/superlink/fleet/message_handler/__init__.py +1 -1
  249. flwr/server/superlink/fleet/message_handler/message_handler.py +156 -13
  250. flwr/server/superlink/fleet/rest_rere/__init__.py +1 -1
  251. flwr/server/superlink/fleet/rest_rere/rest_api.py +119 -81
  252. flwr/server/superlink/fleet/vce/__init__.py +1 -0
  253. flwr/server/superlink/fleet/vce/backend/__init__.py +4 -4
  254. flwr/server/superlink/fleet/vce/backend/backend.py +8 -9
  255. flwr/server/superlink/fleet/vce/backend/raybackend.py +87 -68
  256. flwr/server/superlink/fleet/vce/vce_api.py +208 -146
  257. flwr/server/superlink/linkstate/__init__.py +28 -0
  258. flwr/server/superlink/linkstate/in_memory_linkstate.py +569 -0
  259. flwr/server/superlink/linkstate/linkstate.py +376 -0
  260. flwr/server/superlink/{state/state_factory.py → linkstate/linkstate_factory.py} +19 -10
  261. flwr/server/superlink/linkstate/sqlite_linkstate.py +1196 -0
  262. flwr/server/superlink/linkstate/utils.py +399 -0
  263. flwr/server/superlink/simulation/__init__.py +15 -0
  264. flwr/server/superlink/simulation/simulationio_grpc.py +65 -0
  265. flwr/server/superlink/simulation/simulationio_servicer.py +186 -0
  266. flwr/server/superlink/utils.py +65 -0
  267. flwr/server/typing.py +2 -0
  268. flwr/server/utils/__init__.py +1 -1
  269. flwr/server/utils/tensorboard.py +5 -5
  270. flwr/server/utils/validator.py +40 -45
  271. flwr/server/workflow/default_workflows.py +70 -26
  272. flwr/server/workflow/secure_aggregation/secagg_workflow.py +1 -0
  273. flwr/server/workflow/secure_aggregation/secaggplus_workflow.py +40 -27
  274. flwr/simulation/__init__.py +12 -5
  275. flwr/simulation/app.py +247 -315
  276. flwr/simulation/legacy_app.py +404 -0
  277. flwr/simulation/ray_transport/__init__.py +1 -1
  278. flwr/simulation/ray_transport/ray_actor.py +42 -67
  279. flwr/simulation/ray_transport/ray_client_proxy.py +37 -17
  280. flwr/simulation/ray_transport/utils.py +1 -0
  281. flwr/simulation/run_simulation.py +306 -163
  282. flwr/simulation/simulationio_connection.py +89 -0
  283. flwr/superexec/__init__.py +15 -0
  284. flwr/superexec/app.py +59 -0
  285. flwr/superexec/deployment.py +188 -0
  286. flwr/superexec/exec_grpc.py +80 -0
  287. flwr/superexec/exec_servicer.py +231 -0
  288. flwr/superexec/exec_user_auth_interceptor.py +101 -0
  289. flwr/superexec/executor.py +96 -0
  290. flwr/superexec/simulation.py +124 -0
  291. {flwr_nightly-1.8.0.dev20240315.dist-info → flwr_nightly-1.15.0.dev20250115.dist-info}/METADATA +33 -26
  292. flwr_nightly-1.15.0.dev20250115.dist-info/RECORD +328 -0
  293. flwr_nightly-1.15.0.dev20250115.dist-info/entry_points.txt +12 -0
  294. flwr/cli/flower_toml.py +0 -140
  295. flwr/cli/new/templates/app/flower.toml.tpl +0 -13
  296. flwr/cli/new/templates/app/requirements.numpy.txt.tpl +0 -2
  297. flwr/cli/new/templates/app/requirements.pytorch.txt.tpl +0 -4
  298. flwr/cli/new/templates/app/requirements.tensorflow.txt.tpl +0 -4
  299. flwr/client/node_state.py +0 -48
  300. flwr/client/node_state_tests.py +0 -65
  301. flwr/proto/driver_pb2.py +0 -44
  302. flwr/proto/driver_pb2_grpc.py +0 -169
  303. flwr/proto/driver_pb2_grpc.pyi +0 -66
  304. flwr/server/superlink/driver/driver_grpc.py +0 -54
  305. flwr/server/superlink/driver/driver_servicer.py +0 -129
  306. flwr/server/superlink/state/in_memory_state.py +0 -230
  307. flwr/server/superlink/state/sqlite_state.py +0 -630
  308. flwr/server/superlink/state/state.py +0 -154
  309. flwr_nightly-1.8.0.dev20240315.dist-info/RECORD +0 -211
  310. flwr_nightly-1.8.0.dev20240315.dist-info/entry_points.txt +0 -9
  311. {flwr_nightly-1.8.0.dev20240315.dist-info → flwr_nightly-1.15.0.dev20250115.dist-info}/LICENSE +0 -0
  312. {flwr_nightly-1.8.0.dev20240315.dist-info → flwr_nightly-1.15.0.dev20250115.dist-info}/WHEEL +0 -0
@@ -15,151 +15,175 @@
15
15
  """Fleet Simulation Engine API."""
16
16
 
17
17
 
18
- import asyncio
19
18
  import json
19
+ import threading
20
+ import time
20
21
  import traceback
22
+ from concurrent.futures import ThreadPoolExecutor
21
23
  from logging import DEBUG, ERROR, INFO, WARN
22
- from typing import Callable, Dict, List, Optional
23
-
24
- from flwr.client.client_app import ClientApp, LoadClientAppError
25
- from flwr.client.node_state import NodeState
24
+ from pathlib import Path
25
+ from queue import Empty, Queue
26
+ from time import sleep
27
+ from typing import Callable, Optional
28
+
29
+ from flwr.client.client_app import ClientApp, ClientAppException, LoadClientAppError
30
+ from flwr.client.clientapp.utils import get_load_client_app_fn
31
+ from flwr.client.run_info_store import DeprecatedRunInfoStore
32
+ from flwr.common.constant import (
33
+ NUM_PARTITIONS_KEY,
34
+ PARTITION_ID_KEY,
35
+ PING_MAX_INTERVAL,
36
+ ErrorCode,
37
+ )
26
38
  from flwr.common.logger import log
27
- from flwr.common.object_ref import load_app
39
+ from flwr.common.message import Error
28
40
  from flwr.common.serde import message_from_taskins, message_to_taskres
29
- from flwr.proto.task_pb2 import TaskIns # pylint: disable=E0611
30
- from flwr.server.superlink.state import StateFactory
41
+ from flwr.common.typing import Run
42
+ from flwr.proto.task_pb2 import TaskIns, TaskRes # pylint: disable=E0611
43
+ from flwr.server.superlink.linkstate import LinkState, LinkStateFactory
31
44
 
32
45
  from .backend import Backend, error_messages_backends, supported_backends
33
46
 
34
- NodeToPartitionMapping = Dict[int, int]
47
+ NodeToPartitionMapping = dict[int, int]
35
48
 
36
49
 
37
50
  def _register_nodes(
38
- num_nodes: int, state_factory: StateFactory
51
+ num_nodes: int, state_factory: LinkStateFactory
39
52
  ) -> NodeToPartitionMapping:
40
53
  """Register nodes with the StateFactory and create node-id:partition-id mapping."""
41
54
  nodes_mapping: NodeToPartitionMapping = {}
42
55
  state = state_factory.state()
43
56
  for i in range(num_nodes):
44
- node_id = state.create_node()
57
+ node_id = state.create_node(ping_interval=PING_MAX_INTERVAL)
45
58
  nodes_mapping[node_id] = i
46
- log(INFO, "Registered %i nodes", len(nodes_mapping))
59
+ log(DEBUG, "Registered %i nodes", len(nodes_mapping))
47
60
  return nodes_mapping
48
61
 
49
62
 
50
- # pylint: disable=too-many-arguments,too-many-locals
51
- async def worker(
52
- app_fn: Callable[[], ClientApp],
53
- queue: "asyncio.Queue[TaskIns]",
54
- node_states: Dict[int, NodeState],
55
- state_factory: StateFactory,
63
+ def _register_node_info_stores(
56
64
  nodes_mapping: NodeToPartitionMapping,
65
+ run: Run,
66
+ app_dir: Optional[str] = None,
67
+ ) -> dict[int, DeprecatedRunInfoStore]:
68
+ """Create DeprecatedRunInfoStore objects and register the context for the run."""
69
+ node_info_store: dict[int, DeprecatedRunInfoStore] = {}
70
+ num_partitions = len(set(nodes_mapping.values()))
71
+ for node_id, partition_id in nodes_mapping.items():
72
+ node_info_store[node_id] = DeprecatedRunInfoStore(
73
+ node_id=node_id,
74
+ node_config={
75
+ PARTITION_ID_KEY: partition_id,
76
+ NUM_PARTITIONS_KEY: num_partitions,
77
+ },
78
+ )
79
+
80
+ # Pre-register Context objects
81
+ node_info_store[node_id].register_context(
82
+ run_id=run.run_id, run=run, app_dir=app_dir
83
+ )
84
+
85
+ return node_info_store
86
+
87
+
88
+ # pylint: disable=too-many-arguments,too-many-locals
89
+ def worker(
90
+ taskins_queue: "Queue[TaskIns]",
91
+ taskres_queue: "Queue[TaskRes]",
92
+ node_info_store: dict[int, DeprecatedRunInfoStore],
57
93
  backend: Backend,
94
+ f_stop: threading.Event,
58
95
  ) -> None:
59
96
  """Get TaskIns from queue and pass it to an actor in the pool to execute it."""
60
- state = state_factory.state()
61
- while True:
97
+ while not f_stop.is_set():
98
+ out_mssg = None
62
99
  try:
63
- task_ins: TaskIns = await queue.get()
100
+ # Fetch from queue with timeout. We use a timeout so
101
+ # the stopping event can be evaluated even when the queue is empty.
102
+ task_ins: TaskIns = taskins_queue.get(timeout=1.0)
64
103
  node_id = task_ins.task.consumer.node_id
65
104
 
66
- # Register and retrieve runstate
67
- node_states[node_id].register_context(run_id=task_ins.run_id)
68
- context = node_states[node_id].retrieve_context(run_id=task_ins.run_id)
105
+ # Retrieve context
106
+ context = node_info_store[node_id].retrieve_context(run_id=task_ins.run_id)
69
107
 
70
108
  # Convert TaskIns to Message
71
109
  message = message_from_taskins(task_ins)
72
- # Set partition_id
73
- message.metadata.partition_id = nodes_mapping[node_id]
74
110
 
75
111
  # Let backend process message
76
- out_mssg, updated_context = await backend.process_message(
77
- app_fn, message, context
78
- )
112
+ out_mssg, updated_context = backend.process_message(message, context)
79
113
 
80
114
  # Update Context
81
- node_states[node_id].update_context(
115
+ node_info_store[node_id].update_context(
82
116
  task_ins.run_id, context=updated_context
83
117
  )
84
-
85
- # Convert to TaskRes
86
- task_res = message_to_taskres(out_mssg)
87
- # Store TaskRes in state
88
- state.store_task_res(task_res)
89
-
90
- except asyncio.CancelledError as e:
91
- log(DEBUG, "Async worker: %s", e)
92
- break
93
-
94
- except LoadClientAppError as app_ex:
95
- log(ERROR, "Async worker: %s", app_ex)
96
- log(ERROR, traceback.format_exc())
97
- raise
98
-
118
+ except Empty:
119
+ # An exception raised if queue.get times out
120
+ pass
121
+ # Exceptions aren't raised but reported as an error message
99
122
  except Exception as ex: # pylint: disable=broad-exception-caught
100
123
  log(ERROR, ex)
101
124
  log(ERROR, traceback.format_exc())
102
- break
103
125
 
126
+ if isinstance(ex, ClientAppException):
127
+ e_code = ErrorCode.CLIENT_APP_RAISED_EXCEPTION
128
+ elif isinstance(ex, LoadClientAppError):
129
+ e_code = ErrorCode.LOAD_CLIENT_APP_EXCEPTION
130
+ else:
131
+ e_code = ErrorCode.UNKNOWN
132
+
133
+ reason = str(type(ex)) + ":<'" + str(ex) + "'>"
134
+ out_mssg = message.create_error_reply(
135
+ error=Error(code=e_code, reason=reason)
136
+ )
137
+
138
+ finally:
139
+ if out_mssg:
140
+ # Convert to TaskRes
141
+ task_res = message_to_taskres(out_mssg)
142
+ # Store TaskRes in state
143
+ task_res.task.pushed_at = time.time()
144
+ taskres_queue.put(task_res)
104
145
 
105
- async def add_taskins_to_queue(
106
- queue: "asyncio.Queue[TaskIns]",
107
- state_factory: StateFactory,
146
+
147
+ def add_taskins_to_queue(
148
+ state: LinkState,
149
+ queue: "Queue[TaskIns]",
108
150
  nodes_mapping: NodeToPartitionMapping,
109
- backend: Backend,
110
- consumers: List["asyncio.Task[None]"],
111
- f_stop: asyncio.Event,
151
+ f_stop: threading.Event,
112
152
  ) -> None:
113
- """Retrieve TaskIns and add it to the queue."""
114
- state = state_factory.state()
115
- num_initial_consumers = len(consumers)
153
+ """Put TaskIns in a queue from State."""
116
154
  while not f_stop.is_set():
117
155
  for node_id in nodes_mapping.keys():
118
- task_ins = state.get_task_ins(node_id=node_id, limit=1)
119
- if task_ins:
120
- await queue.put(task_ins[0])
121
-
122
- # Count consumers that are running
123
- num_active = sum(not (cc.done()) for cc in consumers)
124
-
125
- # Alert if number of consumers decreased by half
126
- if num_active < num_initial_consumers // 2:
127
- log(
128
- WARN,
129
- "Number of active workers has more than halved: (%i/%i active)",
130
- num_active,
131
- num_initial_consumers,
132
- )
156
+ task_ins_list = state.get_task_ins(node_id=node_id, limit=1)
157
+ for task_ins in task_ins_list:
158
+ queue.put(task_ins)
159
+ sleep(0.1)
133
160
 
134
- # Break if consumers died
135
- if num_active == 0:
136
- raise RuntimeError("All workers have died. Ending Simulation.")
137
161
 
138
- # Log some stats
139
- log(
140
- DEBUG,
141
- "Simulation Engine stats: "
142
- "Active workers: (%i/%i) | %s (%i workers) | Tasks in queue: %i)",
143
- num_active,
144
- num_initial_consumers,
145
- backend.__class__.__name__,
146
- backend.num_workers,
147
- queue.qsize(),
148
- )
149
- await asyncio.sleep(1.0)
150
- log(DEBUG, "Async producer: Stopped pulling from StateFactory.")
162
+ def put_taskres_into_state(
163
+ state: LinkState, queue: "Queue[TaskRes]", f_stop: threading.Event
164
+ ) -> None:
165
+ """Put TaskRes into State from a queue."""
166
+ while not f_stop.is_set():
167
+ try:
168
+ taskres = queue.get(timeout=1.0)
169
+ state.store_task_res(taskres)
170
+ except Empty:
171
+ # queue is empty when timeout was triggered
172
+ pass
151
173
 
152
174
 
153
- async def run(
175
+ # pylint: disable=too-many-positional-arguments
176
+ def run_api(
154
177
  app_fn: Callable[[], ClientApp],
155
178
  backend_fn: Callable[[], Backend],
156
179
  nodes_mapping: NodeToPartitionMapping,
157
- state_factory: StateFactory,
158
- node_states: Dict[int, NodeState],
159
- f_stop: asyncio.Event,
180
+ state_factory: LinkStateFactory,
181
+ node_info_stores: dict[int, DeprecatedRunInfoStore],
182
+ f_stop: threading.Event,
160
183
  ) -> None:
161
- """Run the VCE async."""
162
- queue: "asyncio.Queue[TaskIns]" = asyncio.Queue(128)
184
+ """Run the VCE."""
185
+ taskins_queue: Queue[TaskIns] = Queue()
186
+ taskres_queue: Queue[TaskRes] = Queue()
163
187
 
164
188
  try:
165
189
 
@@ -167,29 +191,47 @@ async def run(
167
191
  backend = backend_fn()
168
192
 
169
193
  # Build backend
170
- await backend.build()
194
+ backend.build(app_fn)
171
195
 
172
196
  # Add workers (they submit Messages to Backend)
173
- worker_tasks = [
174
- asyncio.create_task(
175
- worker(
176
- app_fn, queue, node_states, state_factory, nodes_mapping, backend
177
- )
178
- )
179
- for _ in range(backend.num_workers)
180
- ]
181
- # Create producer (adds TaskIns into Queue)
182
- producer = asyncio.create_task(
183
- add_taskins_to_queue(
184
- queue, state_factory, nodes_mapping, backend, worker_tasks, f_stop
185
- )
197
+ state = state_factory.state()
198
+
199
+ extractor_th = threading.Thread(
200
+ target=add_taskins_to_queue,
201
+ args=(
202
+ state,
203
+ taskins_queue,
204
+ nodes_mapping,
205
+ f_stop,
206
+ ),
207
+ )
208
+ extractor_th.start()
209
+
210
+ injector_th = threading.Thread(
211
+ target=put_taskres_into_state,
212
+ args=(
213
+ state,
214
+ taskres_queue,
215
+ f_stop,
216
+ ),
186
217
  )
218
+ injector_th.start()
219
+
220
+ with ThreadPoolExecutor() as executor:
221
+ _ = [
222
+ executor.submit(
223
+ worker,
224
+ taskins_queue,
225
+ taskres_queue,
226
+ node_info_stores,
227
+ backend,
228
+ f_stop,
229
+ )
230
+ for _ in range(backend.num_workers)
231
+ ]
187
232
 
188
- # Wait for producer to finish
189
- # The producer runs forever until f_stop is set or until
190
- # all worker (consumer) coroutines are completed. Workers
191
- # also run forever and only end if an exception is raised.
192
- await asyncio.gather(producer)
233
+ extractor_th.join()
234
+ injector_th.join()
193
235
 
194
236
  except Exception as ex:
195
237
 
@@ -204,33 +246,30 @@ async def run(
204
246
  raise RuntimeError("Simulation Engine crashed.") from ex
205
247
 
206
248
  finally:
207
- # Produced task terminated, now cancel worker tasks
208
- for w_t in worker_tasks:
209
- _ = w_t.cancel()
210
-
211
- while not all(w_t.done() for w_t in worker_tasks):
212
- log(DEBUG, "Terminating async workers...")
213
- await asyncio.sleep(0.5)
214
-
215
- await asyncio.gather(*[w_t for w_t in worker_tasks if not w_t.done()])
216
249
 
217
250
  # Terminate backend
218
- await backend.terminate()
251
+ backend.terminate()
219
252
 
220
253
 
221
- # pylint: disable=too-many-arguments,unused-argument,too-many-locals
254
+ # pylint: disable=too-many-arguments,unused-argument,too-many-locals,too-many-branches
255
+ # pylint: disable=too-many-statements,too-many-positional-arguments
222
256
  def start_vce(
223
257
  backend_name: str,
224
258
  backend_config_json_stream: str,
225
259
  app_dir: str,
226
- f_stop: asyncio.Event,
260
+ is_app: bool,
261
+ f_stop: threading.Event,
262
+ run: Run,
263
+ flwr_dir: Optional[str] = None,
227
264
  client_app: Optional[ClientApp] = None,
228
265
  client_app_attr: Optional[str] = None,
229
266
  num_supernodes: Optional[int] = None,
230
- state_factory: Optional[StateFactory] = None,
267
+ state_factory: Optional[LinkStateFactory] = None,
231
268
  existing_nodes_mapping: Optional[NodeToPartitionMapping] = None,
232
269
  ) -> None:
233
270
  """Start Fleet API with the Simulation Engine."""
271
+ nodes_mapping = {}
272
+
234
273
  if client_app_attr is not None and client_app is not None:
235
274
  raise ValueError(
236
275
  "Both `client_app_attr` and `client_app` are provided, "
@@ -259,11 +298,12 @@ def start_vce(
259
298
  # Use mapping constructed externally. This also means nodes
260
299
  # have previously being registered.
261
300
  nodes_mapping = existing_nodes_mapping
301
+ app_dir = str(Path(app_dir).absolute())
262
302
 
263
303
  if not state_factory:
264
304
  log(INFO, "A StateFactory was not supplied to the SimulationEngine.")
265
305
  # Create an empty in-memory state factory
266
- state_factory = StateFactory(":flwr-in-memory-state:")
306
+ state_factory = LinkStateFactory(":flwr-in-memory-state:")
267
307
  log(INFO, "Created new %s.", state_factory.__class__.__name__)
268
308
 
269
309
  if num_supernodes:
@@ -272,13 +312,13 @@ def start_vce(
272
312
  num_nodes=num_supernodes, state_factory=state_factory
273
313
  )
274
314
 
275
- # Construct mapping of NodeStates
276
- node_states: Dict[int, NodeState] = {}
277
- for node_id in nodes_mapping:
278
- node_states[node_id] = NodeState()
315
+ # Construct mapping of DeprecatedRunInfoStore
316
+ node_info_stores = _register_node_info_stores(
317
+ nodes_mapping=nodes_mapping, run=run, app_dir=app_dir if is_app else None
318
+ )
279
319
 
280
320
  # Load backend config
281
- log(INFO, "Supported backends: %s", list(supported_backends.keys()))
321
+ log(DEBUG, "Supported backends: %s", list(supported_backends.keys()))
282
322
  backend_config = json.loads(backend_config_json_stream)
283
323
 
284
324
  try:
@@ -298,34 +338,56 @@ def start_vce(
298
338
 
299
339
  def backend_fn() -> Backend:
300
340
  """Instantiate a Backend."""
301
- return backend_type(backend_config, work_dir=app_dir)
302
-
303
- log(INFO, "client_app_attr = %s", client_app_attr)
341
+ return backend_type(backend_config)
304
342
 
305
343
  # Load ClientApp if needed
306
344
  def _load() -> ClientApp:
307
345
 
346
+ if client_app:
347
+ return client_app
308
348
  if client_app_attr:
309
- app: ClientApp = load_app(client_app_attr, LoadClientAppError)
349
+ return get_load_client_app_fn(
350
+ default_app_ref=client_app_attr,
351
+ app_path=app_dir,
352
+ flwr_dir=flwr_dir,
353
+ multi_app=False,
354
+ )(run.fab_id, run.fab_version, run.fab_hash)
310
355
 
311
- if not isinstance(app, ClientApp):
312
- raise LoadClientAppError(
313
- f"Attribute {client_app_attr} is not of type {ClientApp}",
314
- ) from None
315
-
316
- if client_app:
317
- app = client_app
318
- return app
356
+ raise ValueError("Either `client_app_attr` or `client_app` must be provided")
319
357
 
320
358
  app_fn = _load
321
359
 
322
- asyncio.run(
323
- run(
360
+ try:
361
+ # Test if ClientApp can be loaded
362
+ client_app = app_fn()
363
+
364
+ # Cache `ClientApp`
365
+ if client_app_attr:
366
+ # Now wrap the loaded ClientApp in a dummy function
367
+ # this prevent unnecesary low-level loading of ClientApp
368
+ def _load_client_app() -> ClientApp:
369
+ return client_app
370
+
371
+ app_fn = _load_client_app
372
+
373
+ # Run main simulation loop
374
+ run_api(
324
375
  app_fn,
325
376
  backend_fn,
326
377
  nodes_mapping,
327
378
  state_factory,
328
- node_states,
379
+ node_info_stores,
329
380
  f_stop,
330
381
  )
331
- )
382
+ except LoadClientAppError as loadapp_ex:
383
+ f_stop_delay = 10
384
+ log(
385
+ ERROR,
386
+ "LoadClientAppError exception encountered. Terminating simulation in %is",
387
+ f_stop_delay,
388
+ )
389
+ time.sleep(f_stop_delay)
390
+ f_stop.set() # set termination event
391
+ raise loadapp_ex
392
+ except Exception as ex:
393
+ raise ex
@@ -0,0 +1,28 @@
1
+ # Copyright 2024 Flower Labs GmbH. All Rights Reserved.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+ # ==============================================================================
15
+ """Flower LinkState."""
16
+
17
+
18
+ from .in_memory_linkstate import InMemoryLinkState as InMemoryLinkState
19
+ from .linkstate import LinkState as LinkState
20
+ from .linkstate_factory import LinkStateFactory as LinkStateFactory
21
+ from .sqlite_linkstate import SqliteLinkState as SqliteLinkState
22
+
23
+ __all__ = [
24
+ "InMemoryLinkState",
25
+ "LinkState",
26
+ "LinkStateFactory",
27
+ "SqliteLinkState",
28
+ ]