flwr 1.23.0__py3-none-any.whl → 1.24.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (292) hide show
  1. flwr/__init__.py +16 -5
  2. flwr/app/error.py +2 -2
  3. flwr/app/exception.py +3 -3
  4. flwr/cli/app.py +19 -0
  5. flwr/cli/app_cmd/__init__.py +23 -0
  6. flwr/cli/app_cmd/publish.py +285 -0
  7. flwr/cli/app_cmd/review.py +252 -0
  8. flwr/cli/auth_plugin/auth_plugin.py +4 -5
  9. flwr/cli/auth_plugin/noop_auth_plugin.py +54 -11
  10. flwr/cli/auth_plugin/oidc_cli_plugin.py +32 -9
  11. flwr/cli/build.py +60 -18
  12. flwr/cli/cli_account_auth_interceptor.py +24 -7
  13. flwr/cli/config_utils.py +101 -13
  14. flwr/cli/federation/__init__.py +24 -0
  15. flwr/cli/federation/ls.py +140 -0
  16. flwr/cli/federation/show.py +317 -0
  17. flwr/cli/install.py +91 -13
  18. flwr/cli/log.py +52 -9
  19. flwr/cli/login/login.py +7 -4
  20. flwr/cli/ls.py +170 -130
  21. flwr/cli/new/new.py +33 -50
  22. flwr/cli/new/templates/app/code/task.pytorch.py.tpl +1 -0
  23. flwr/cli/new/templates/app/pyproject.baseline.toml.tpl +1 -1
  24. flwr/cli/new/templates/app/pyproject.flowertune.toml.tpl +1 -1
  25. flwr/cli/new/templates/app/pyproject.huggingface.toml.tpl +1 -1
  26. flwr/cli/new/templates/app/pyproject.jax.toml.tpl +1 -1
  27. flwr/cli/new/templates/app/pyproject.mlx.toml.tpl +1 -1
  28. flwr/cli/new/templates/app/pyproject.numpy.toml.tpl +1 -1
  29. flwr/cli/new/templates/app/pyproject.pytorch.toml.tpl +3 -3
  30. flwr/cli/new/templates/app/pyproject.pytorch_legacy_api.toml.tpl +1 -1
  31. flwr/cli/new/templates/app/pyproject.sklearn.toml.tpl +1 -1
  32. flwr/cli/new/templates/app/pyproject.tensorflow.toml.tpl +2 -2
  33. flwr/cli/new/templates/app/pyproject.xgboost.toml.tpl +1 -1
  34. flwr/cli/pull.py +10 -5
  35. flwr/cli/run/run.py +77 -30
  36. flwr/cli/run_utils.py +130 -0
  37. flwr/cli/stop.py +25 -7
  38. flwr/cli/supernode/ls.py +16 -8
  39. flwr/cli/supernode/register.py +9 -4
  40. flwr/cli/supernode/unregister.py +5 -3
  41. flwr/cli/utils.py +376 -16
  42. flwr/client/__init__.py +1 -1
  43. flwr/client/dpfedavg_numpy_client.py +4 -1
  44. flwr/client/grpc_adapter_client/connection.py +6 -7
  45. flwr/client/grpc_rere_client/connection.py +10 -11
  46. flwr/client/grpc_rere_client/grpc_adapter.py +6 -2
  47. flwr/client/grpc_rere_client/node_auth_client_interceptor.py +2 -1
  48. flwr/client/message_handler/message_handler.py +2 -2
  49. flwr/client/mod/secure_aggregation/secaggplus_mod.py +3 -3
  50. flwr/client/numpy_client.py +1 -1
  51. flwr/client/rest_client/connection.py +12 -14
  52. flwr/client/run_info_store.py +4 -5
  53. flwr/client/typing.py +1 -1
  54. flwr/clientapp/client_app.py +9 -10
  55. flwr/clientapp/mod/centraldp_mods.py +16 -17
  56. flwr/clientapp/mod/localdp_mod.py +8 -9
  57. flwr/clientapp/typing.py +1 -1
  58. flwr/clientapp/utils.py +3 -3
  59. flwr/common/address.py +1 -2
  60. flwr/common/args.py +3 -4
  61. flwr/common/config.py +13 -16
  62. flwr/common/constant.py +5 -2
  63. flwr/common/differential_privacy.py +3 -4
  64. flwr/common/event_log_plugin/event_log_plugin.py +3 -4
  65. flwr/common/exit/exit.py +15 -2
  66. flwr/common/exit/exit_code.py +19 -0
  67. flwr/common/exit/exit_handler.py +6 -2
  68. flwr/common/exit/signal_handler.py +5 -5
  69. flwr/common/grpc.py +6 -6
  70. flwr/common/inflatable_protobuf_utils.py +1 -1
  71. flwr/common/inflatable_utils.py +38 -21
  72. flwr/common/logger.py +19 -19
  73. flwr/common/message.py +4 -4
  74. flwr/common/object_ref.py +7 -7
  75. flwr/common/record/array.py +3 -3
  76. flwr/common/record/arrayrecord.py +18 -30
  77. flwr/common/record/configrecord.py +3 -3
  78. flwr/common/record/recorddict.py +5 -5
  79. flwr/common/record/typeddict.py +9 -2
  80. flwr/common/recorddict_compat.py +7 -10
  81. flwr/common/retry_invoker.py +20 -20
  82. flwr/common/secure_aggregation/ndarrays_arithmetic.py +3 -3
  83. flwr/common/serde.py +5 -4
  84. flwr/common/serde_utils.py +2 -2
  85. flwr/common/telemetry.py +9 -5
  86. flwr/common/typing.py +52 -37
  87. flwr/compat/client/app.py +38 -37
  88. flwr/compat/client/grpc_client/connection.py +11 -11
  89. flwr/compat/server/app.py +5 -6
  90. flwr/proto/appio_pb2.py +13 -3
  91. flwr/proto/appio_pb2.pyi +134 -65
  92. flwr/proto/appio_pb2_grpc.py +20 -0
  93. flwr/proto/appio_pb2_grpc.pyi +27 -0
  94. flwr/proto/clientappio_pb2.py +17 -7
  95. flwr/proto/clientappio_pb2.pyi +15 -0
  96. flwr/proto/clientappio_pb2_grpc.py +206 -40
  97. flwr/proto/clientappio_pb2_grpc.pyi +168 -53
  98. flwr/proto/control_pb2.py +71 -52
  99. flwr/proto/control_pb2.pyi +277 -111
  100. flwr/proto/control_pb2_grpc.py +249 -40
  101. flwr/proto/control_pb2_grpc.pyi +185 -52
  102. flwr/proto/error_pb2.py +13 -3
  103. flwr/proto/error_pb2.pyi +24 -6
  104. flwr/proto/error_pb2_grpc.py +20 -0
  105. flwr/proto/error_pb2_grpc.pyi +27 -0
  106. flwr/proto/fab_pb2.py +14 -4
  107. flwr/proto/fab_pb2.pyi +59 -31
  108. flwr/proto/fab_pb2_grpc.py +20 -0
  109. flwr/proto/fab_pb2_grpc.pyi +27 -0
  110. flwr/proto/federation_pb2.py +38 -0
  111. flwr/proto/federation_pb2.pyi +56 -0
  112. flwr/proto/federation_pb2_grpc.py +24 -0
  113. flwr/proto/federation_pb2_grpc.pyi +31 -0
  114. flwr/proto/fleet_pb2.py +14 -4
  115. flwr/proto/fleet_pb2.pyi +137 -61
  116. flwr/proto/fleet_pb2_grpc.py +189 -48
  117. flwr/proto/fleet_pb2_grpc.pyi +175 -61
  118. flwr/proto/grpcadapter_pb2.py +14 -4
  119. flwr/proto/grpcadapter_pb2.pyi +38 -16
  120. flwr/proto/grpcadapter_pb2_grpc.py +35 -4
  121. flwr/proto/grpcadapter_pb2_grpc.pyi +38 -7
  122. flwr/proto/heartbeat_pb2.py +17 -7
  123. flwr/proto/heartbeat_pb2.pyi +51 -22
  124. flwr/proto/heartbeat_pb2_grpc.py +20 -0
  125. flwr/proto/heartbeat_pb2_grpc.pyi +27 -0
  126. flwr/proto/log_pb2.py +13 -3
  127. flwr/proto/log_pb2.pyi +34 -11
  128. flwr/proto/log_pb2_grpc.py +20 -0
  129. flwr/proto/log_pb2_grpc.pyi +27 -0
  130. flwr/proto/message_pb2.py +15 -5
  131. flwr/proto/message_pb2.pyi +154 -86
  132. flwr/proto/message_pb2_grpc.py +20 -0
  133. flwr/proto/message_pb2_grpc.pyi +27 -0
  134. flwr/proto/node_pb2.py +15 -5
  135. flwr/proto/node_pb2.pyi +50 -25
  136. flwr/proto/node_pb2_grpc.py +20 -0
  137. flwr/proto/node_pb2_grpc.pyi +27 -0
  138. flwr/proto/recorddict_pb2.py +13 -3
  139. flwr/proto/recorddict_pb2.pyi +184 -107
  140. flwr/proto/recorddict_pb2_grpc.py +20 -0
  141. flwr/proto/recorddict_pb2_grpc.pyi +27 -0
  142. flwr/proto/run_pb2.py +40 -31
  143. flwr/proto/run_pb2.pyi +149 -84
  144. flwr/proto/run_pb2_grpc.py +20 -0
  145. flwr/proto/run_pb2_grpc.pyi +27 -0
  146. flwr/proto/serverappio_pb2.py +13 -3
  147. flwr/proto/serverappio_pb2.pyi +32 -8
  148. flwr/proto/serverappio_pb2_grpc.py +246 -65
  149. flwr/proto/serverappio_pb2_grpc.pyi +221 -85
  150. flwr/proto/simulationio_pb2.py +16 -8
  151. flwr/proto/simulationio_pb2.pyi +15 -0
  152. flwr/proto/simulationio_pb2_grpc.py +162 -41
  153. flwr/proto/simulationio_pb2_grpc.pyi +149 -55
  154. flwr/proto/transport_pb2.py +20 -10
  155. flwr/proto/transport_pb2.pyi +249 -160
  156. flwr/proto/transport_pb2_grpc.py +35 -4
  157. flwr/proto/transport_pb2_grpc.pyi +38 -8
  158. flwr/server/app.py +38 -17
  159. flwr/server/client_manager.py +4 -5
  160. flwr/server/client_proxy.py +10 -11
  161. flwr/server/compat/app.py +4 -5
  162. flwr/server/compat/app_utils.py +2 -1
  163. flwr/server/compat/grid_client_proxy.py +10 -12
  164. flwr/server/compat/legacy_context.py +3 -4
  165. flwr/server/fleet_event_log_interceptor.py +2 -1
  166. flwr/server/grid/grid.py +2 -3
  167. flwr/server/grid/grpc_grid.py +10 -8
  168. flwr/server/grid/inmemory_grid.py +4 -4
  169. flwr/server/run_serverapp.py +2 -3
  170. flwr/server/server.py +34 -39
  171. flwr/server/server_app.py +7 -8
  172. flwr/server/server_config.py +1 -2
  173. flwr/server/serverapp/app.py +34 -28
  174. flwr/server/serverapp_components.py +4 -5
  175. flwr/server/strategy/aggregate.py +9 -8
  176. flwr/server/strategy/bulyan.py +13 -11
  177. flwr/server/strategy/dp_adaptive_clipping.py +16 -20
  178. flwr/server/strategy/dp_fixed_clipping.py +12 -17
  179. flwr/server/strategy/dpfedavg_adaptive.py +3 -4
  180. flwr/server/strategy/dpfedavg_fixed.py +6 -10
  181. flwr/server/strategy/fault_tolerant_fedavg.py +14 -13
  182. flwr/server/strategy/fedadagrad.py +18 -14
  183. flwr/server/strategy/fedadam.py +16 -14
  184. flwr/server/strategy/fedavg.py +16 -17
  185. flwr/server/strategy/fedavg_android.py +15 -15
  186. flwr/server/strategy/fedavgm.py +21 -18
  187. flwr/server/strategy/fedmedian.py +2 -3
  188. flwr/server/strategy/fedopt.py +11 -10
  189. flwr/server/strategy/fedprox.py +10 -9
  190. flwr/server/strategy/fedtrimmedavg.py +12 -11
  191. flwr/server/strategy/fedxgb_bagging.py +13 -11
  192. flwr/server/strategy/fedxgb_cyclic.py +6 -6
  193. flwr/server/strategy/fedxgb_nn_avg.py +4 -4
  194. flwr/server/strategy/fedyogi.py +16 -14
  195. flwr/server/strategy/krum.py +12 -11
  196. flwr/server/strategy/qfedavg.py +16 -15
  197. flwr/server/strategy/strategy.py +6 -9
  198. flwr/server/superlink/fleet/grpc_adapter/grpc_adapter_servicer.py +2 -1
  199. flwr/server/superlink/fleet/grpc_bidi/flower_service_servicer.py +1 -2
  200. flwr/server/superlink/fleet/grpc_bidi/grpc_bridge.py +3 -4
  201. flwr/server/superlink/fleet/grpc_bidi/grpc_client_proxy.py +10 -12
  202. flwr/server/superlink/fleet/grpc_bidi/grpc_server.py +1 -3
  203. flwr/server/superlink/fleet/grpc_rere/fleet_servicer.py +4 -4
  204. flwr/server/superlink/fleet/grpc_rere/node_auth_server_interceptor.py +3 -2
  205. flwr/server/superlink/fleet/message_handler/message_handler.py +34 -28
  206. flwr/server/superlink/fleet/rest_rere/rest_api.py +2 -2
  207. flwr/server/superlink/fleet/vce/backend/backend.py +1 -1
  208. flwr/server/superlink/fleet/vce/backend/raybackend.py +5 -5
  209. flwr/server/superlink/fleet/vce/vce_api.py +15 -9
  210. flwr/server/superlink/linkstate/in_memory_linkstate.py +115 -150
  211. flwr/server/superlink/linkstate/linkstate.py +59 -43
  212. flwr/server/superlink/linkstate/linkstate_factory.py +22 -5
  213. flwr/server/superlink/linkstate/sqlite_linkstate.py +447 -438
  214. flwr/server/superlink/linkstate/utils.py +6 -6
  215. flwr/server/superlink/serverappio/serverappio_grpc.py +1 -2
  216. flwr/server/superlink/serverappio/serverappio_servicer.py +26 -21
  217. flwr/server/superlink/simulation/simulationio_grpc.py +1 -2
  218. flwr/server/superlink/simulation/simulationio_servicer.py +18 -13
  219. flwr/server/superlink/utils.py +4 -6
  220. flwr/server/typing.py +1 -1
  221. flwr/server/utils/tensorboard.py +15 -8
  222. flwr/server/workflow/default_workflows.py +5 -5
  223. flwr/server/workflow/secure_aggregation/secagg_workflow.py +2 -4
  224. flwr/server/workflow/secure_aggregation/secaggplus_workflow.py +8 -8
  225. flwr/serverapp/strategy/bulyan.py +16 -15
  226. flwr/serverapp/strategy/dp_adaptive_clipping.py +12 -11
  227. flwr/serverapp/strategy/dp_fixed_clipping.py +11 -14
  228. flwr/serverapp/strategy/fedadagrad.py +10 -11
  229. flwr/serverapp/strategy/fedadam.py +10 -11
  230. flwr/serverapp/strategy/fedavg.py +9 -10
  231. flwr/serverapp/strategy/fedavgm.py +17 -16
  232. flwr/serverapp/strategy/fedmedian.py +2 -2
  233. flwr/serverapp/strategy/fedopt.py +10 -11
  234. flwr/serverapp/strategy/fedprox.py +7 -8
  235. flwr/serverapp/strategy/fedtrimmedavg.py +9 -9
  236. flwr/serverapp/strategy/fedxgb_bagging.py +3 -3
  237. flwr/serverapp/strategy/fedxgb_cyclic.py +9 -9
  238. flwr/serverapp/strategy/fedyogi.py +9 -11
  239. flwr/serverapp/strategy/krum.py +7 -7
  240. flwr/serverapp/strategy/multikrum.py +9 -9
  241. flwr/serverapp/strategy/qfedavg.py +17 -16
  242. flwr/serverapp/strategy/strategy.py +6 -9
  243. flwr/serverapp/strategy/strategy_utils.py +7 -8
  244. flwr/simulation/app.py +46 -42
  245. flwr/simulation/legacy_app.py +12 -12
  246. flwr/simulation/ray_transport/ray_actor.py +10 -11
  247. flwr/simulation/ray_transport/ray_client_proxy.py +11 -12
  248. flwr/simulation/run_simulation.py +43 -43
  249. flwr/simulation/simulationio_connection.py +4 -4
  250. flwr/supercore/cli/flower_superexec.py +3 -4
  251. flwr/supercore/constant.py +31 -1
  252. flwr/supercore/corestate/corestate.py +24 -3
  253. flwr/supercore/corestate/in_memory_corestate.py +138 -0
  254. flwr/supercore/corestate/sqlite_corestate.py +157 -0
  255. flwr/supercore/ffs/disk_ffs.py +1 -2
  256. flwr/supercore/ffs/ffs.py +1 -2
  257. flwr/supercore/ffs/ffs_factory.py +1 -2
  258. flwr/{common → supercore}/heartbeat.py +20 -25
  259. flwr/supercore/object_store/in_memory_object_store.py +1 -2
  260. flwr/supercore/object_store/object_store.py +1 -2
  261. flwr/supercore/object_store/object_store_factory.py +1 -2
  262. flwr/supercore/object_store/sqlite_object_store.py +8 -7
  263. flwr/supercore/primitives/asymmetric.py +1 -1
  264. flwr/supercore/primitives/asymmetric_ed25519.py +11 -1
  265. flwr/supercore/sqlite_mixin.py +37 -34
  266. flwr/supercore/superexec/plugin/base_exec_plugin.py +1 -2
  267. flwr/supercore/superexec/plugin/exec_plugin.py +3 -3
  268. flwr/supercore/superexec/run_superexec.py +9 -13
  269. flwr/superlink/artifact_provider/artifact_provider.py +1 -2
  270. flwr/superlink/auth_plugin/auth_plugin.py +6 -9
  271. flwr/superlink/auth_plugin/noop_auth_plugin.py +6 -9
  272. flwr/superlink/federation/__init__.py +24 -0
  273. flwr/superlink/federation/federation_manager.py +64 -0
  274. flwr/superlink/federation/noop_federation_manager.py +71 -0
  275. flwr/superlink/servicer/control/control_account_auth_interceptor.py +22 -13
  276. flwr/superlink/servicer/control/control_event_log_interceptor.py +7 -7
  277. flwr/superlink/servicer/control/control_grpc.py +5 -6
  278. flwr/superlink/servicer/control/control_license_interceptor.py +3 -3
  279. flwr/superlink/servicer/control/control_servicer.py +102 -18
  280. flwr/supernode/cli/flower_supernode.py +58 -3
  281. flwr/supernode/nodestate/in_memory_nodestate.py +60 -49
  282. flwr/supernode/nodestate/nodestate.py +7 -8
  283. flwr/supernode/nodestate/nodestate_factory.py +7 -4
  284. flwr/supernode/runtime/run_clientapp.py +41 -22
  285. flwr/supernode/servicer/clientappio/clientappio_servicer.py +40 -10
  286. flwr/supernode/start_client_internal.py +158 -42
  287. {flwr-1.23.0.dist-info → flwr-1.24.0.dist-info}/METADATA +8 -8
  288. flwr-1.24.0.dist-info/RECORD +454 -0
  289. flwr/supercore/object_store/utils.py +0 -43
  290. flwr-1.23.0.dist-info/RECORD +0 -439
  291. {flwr-1.23.0.dist-info → flwr-1.24.0.dist-info}/WHEEL +0 -0
  292. {flwr-1.23.0.dist-info → flwr-1.24.0.dist-info}/entry_points.txt +0 -0
@@ -15,18 +15,27 @@
15
15
  """In-memory NodeState implementation."""
16
16
 
17
17
 
18
- import secrets
19
18
  from collections.abc import Sequence
20
19
  from dataclasses import dataclass
21
- from threading import Lock
22
- from typing import Optional
23
-
24
- from flwr.common import Context, Message
25
- from flwr.common.constant import FLWR_APP_TOKEN_LENGTH
20
+ from threading import Lock, RLock
21
+
22
+ from flwr.common import Context, Error, Message
23
+ from flwr.common.constant import ErrorCode
24
+ from flwr.common.inflatable import (
25
+ get_all_nested_objects,
26
+ get_object_tree,
27
+ no_object_id_recompute,
28
+ )
26
29
  from flwr.common.typing import Run
30
+ from flwr.supercore.corestate.in_memory_corestate import InMemoryCoreState
31
+ from flwr.supercore.object_store import ObjectStore
27
32
 
28
33
  from .nodestate import NodeState
29
34
 
35
+ CLIENT_APP_CRASHED_ERROR = Error(
36
+ ErrorCode.CLIENT_APP_CRASHED, "ClientApp stopped responding."
37
+ )
38
+
30
39
 
31
40
  @dataclass
32
41
  class MessageEntry:
@@ -36,27 +45,26 @@ class MessageEntry:
36
45
  is_retrieved: bool = False
37
46
 
38
47
 
39
- class InMemoryNodeState(NodeState): # pylint: disable=too-many-instance-attributes
48
+ class InMemoryNodeState(
49
+ NodeState, InMemoryCoreState
50
+ ): # pylint: disable=too-many-instance-attributes
40
51
  """In-memory NodeState implementation."""
41
52
 
42
- def __init__(self) -> None:
53
+ def __init__(self, object_store: ObjectStore) -> None:
54
+ super().__init__(object_store)
43
55
  # Store node_id
44
- self.node_id: Optional[int] = None
56
+ self.node_id: int | None = None
45
57
  # Store Object ID to MessageEntry mapping
46
58
  self.msg_store: dict[str, MessageEntry] = {}
47
- self.lock_msg_store = Lock()
59
+ self.lock_msg_store = RLock()
48
60
  # Store run ID to Run mapping
49
61
  self.run_store: dict[int, Run] = {}
50
62
  self.lock_run_store = Lock()
51
63
  # Store run ID to Context mapping
52
64
  self.ctx_store: dict[int, Context] = {}
53
65
  self.lock_ctx_store = Lock()
54
- # Store run ID to token mapping and token to run ID mapping
55
- self.token_store: dict[int, str] = {}
56
- self.token_to_run_id: dict[str, int] = {}
57
- self.lock_token_store = Lock()
58
66
 
59
- def set_node_id(self, node_id: Optional[int]) -> None:
67
+ def set_node_id(self, node_id: int | None) -> None:
60
68
  """Set the node ID."""
61
69
  self.node_id = node_id
62
70
 
@@ -66,8 +74,10 @@ class InMemoryNodeState(NodeState): # pylint: disable=too-many-instance-attribu
66
74
  raise ValueError("Node ID not set")
67
75
  return self.node_id
68
76
 
69
- def store_message(self, message: Message) -> Optional[str]:
77
+ def store_message(self, message: Message) -> str | None:
70
78
  """Store a message."""
79
+ # No need to check for expired tokens here
80
+ # The ClientAppIo servicer will first verify the token before storing messages
71
81
  with self.lock_msg_store:
72
82
  msg_id = message.metadata.message_id
73
83
  if msg_id == "" or msg_id in self.msg_store:
@@ -78,13 +88,14 @@ class InMemoryNodeState(NodeState): # pylint: disable=too-many-instance-attribu
78
88
  def get_messages(
79
89
  self,
80
90
  *,
81
- run_ids: Optional[Sequence[int]] = None,
82
- is_reply: Optional[bool] = None,
83
- limit: Optional[int] = None,
91
+ run_ids: Sequence[int] | None = None,
92
+ is_reply: bool | None = None,
93
+ limit: int | None = None,
84
94
  ) -> Sequence[Message]:
85
95
  """Retrieve messages based on the specified filters."""
86
- selected_messages: list[Message] = []
96
+ self._cleanup_expired_tokens()
87
97
 
98
+ selected_messages: list[Message] = []
88
99
  with self.lock_msg_store:
89
100
  # Iterate through all messages in the store
90
101
  for object_id in list(self.msg_store.keys()):
@@ -122,7 +133,7 @@ class InMemoryNodeState(NodeState): # pylint: disable=too-many-instance-attribu
122
133
  def delete_messages(
123
134
  self,
124
135
  *,
125
- message_ids: Optional[Sequence[str]] = None,
136
+ message_ids: Sequence[str] | None = None,
126
137
  ) -> None:
127
138
  """Delete messages based on the specified filters."""
128
139
  with self.lock_msg_store:
@@ -140,7 +151,7 @@ class InMemoryNodeState(NodeState): # pylint: disable=too-many-instance-attribu
140
151
  with self.lock_run_store:
141
152
  self.run_store[run.run_id] = run
142
153
 
143
- def get_run(self, run_id: int) -> Optional[Run]:
154
+ def get_run(self, run_id: int) -> Run | None:
144
155
  """Retrieve a run by its ID."""
145
156
  with self.lock_run_store:
146
157
  return self.run_store.get(run_id)
@@ -150,7 +161,7 @@ class InMemoryNodeState(NodeState): # pylint: disable=too-many-instance-attribu
150
161
  with self.lock_ctx_store:
151
162
  self.ctx_store[context.run_id] = context
152
163
 
153
- def get_context(self, run_id: int) -> Optional[Context]:
164
+ def get_context(self, run_id: int) -> Context | None:
154
165
  """Retrieve a context by its run ID."""
155
166
  with self.lock_ctx_store:
156
167
  return self.ctx_store.get(run_id)
@@ -171,29 +182,29 @@ class InMemoryNodeState(NodeState): # pylint: disable=too-many-instance-attribu
171
182
  ret -= set(self.token_store.keys())
172
183
  return list(ret)
173
184
 
174
- def create_token(self, run_id: int) -> Optional[str]:
175
- """Create a token for the given run ID."""
176
- token = secrets.token_hex(FLWR_APP_TOKEN_LENGTH) # Generate a random token
177
- with self.lock_token_store:
178
- if run_id in self.token_store:
179
- return None # Token already created for this run ID
180
- self.token_store[run_id] = token
181
- self.token_to_run_id[token] = run_id
182
- return token
183
-
184
- def verify_token(self, run_id: int, token: str) -> bool:
185
- """Verify a token for the given run ID."""
186
- with self.lock_token_store:
187
- return self.token_store.get(run_id) == token
188
-
189
- def delete_token(self, run_id: int) -> None:
190
- """Delete the token for the given run ID."""
191
- with self.lock_token_store:
192
- token = self.token_store.pop(run_id, None)
193
- if token is not None:
194
- self.token_to_run_id.pop(token, None)
195
-
196
- def get_run_id_by_token(self, token: str) -> Optional[int]:
197
- """Get the run ID associated with a given token."""
198
- with self.lock_token_store:
199
- return self.token_to_run_id.get(token)
185
+ def _on_tokens_expired(self, expired_records: list[tuple[int, float]]) -> None:
186
+ """Insert error replies for messages associated with expired tokens."""
187
+ with self.lock_msg_store:
188
+ # Find all retrieved messages associated with expired run IDs
189
+ expired_run_ids = {run_id for run_id, _ in expired_records}
190
+ messages_to_reply: list[Message] = []
191
+ for entry in self.msg_store.values():
192
+ msg = entry.message
193
+ if msg.metadata.run_id in expired_run_ids and entry.is_retrieved:
194
+ messages_to_reply.append(msg)
195
+
196
+ # Create and store error replies for each message
197
+ for msg in messages_to_reply:
198
+ error_reply = Message(CLIENT_APP_CRASHED_ERROR, reply_to=msg)
199
+
200
+ # Insert objects of the error reply into the object store
201
+ with no_object_id_recompute():
202
+ # pylint: disable-next=W0212
203
+ error_reply.metadata._message_id = error_reply.object_id # type: ignore
204
+ object_tree = get_object_tree(error_reply)
205
+ self.object_store.preregister(msg.metadata.run_id, object_tree)
206
+ for obj_id, obj in get_all_nested_objects(error_reply).items():
207
+ self.object_store.put(obj_id, obj.deflate())
208
+
209
+ # Store the error reply message
210
+ self.store_message(error_reply)
@@ -17,7 +17,6 @@
17
17
 
18
18
  from abc import abstractmethod
19
19
  from collections.abc import Sequence
20
- from typing import Optional
21
20
 
22
21
  from flwr.common import Context, Message
23
22
  from flwr.common.typing import Run
@@ -36,7 +35,7 @@ class NodeState(CoreState):
36
35
  """Get the node ID."""
37
36
 
38
37
  @abstractmethod
39
- def store_message(self, message: Message) -> Optional[str]:
38
+ def store_message(self, message: Message) -> str | None:
40
39
  """Store a message.
41
40
 
42
41
  Parameters
@@ -54,9 +53,9 @@ class NodeState(CoreState):
54
53
  def get_messages(
55
54
  self,
56
55
  *,
57
- run_ids: Optional[Sequence[int]] = None,
58
- is_reply: Optional[bool] = None,
59
- limit: Optional[int] = None,
56
+ run_ids: Sequence[int] | None = None,
57
+ is_reply: bool | None = None,
58
+ limit: int | None = None,
60
59
  ) -> Sequence[Message]:
61
60
  """Retrieve messages based on the specified filters.
62
61
 
@@ -89,7 +88,7 @@ class NodeState(CoreState):
89
88
  def delete_messages(
90
89
  self,
91
90
  *,
92
- message_ids: Optional[Sequence[str]] = None,
91
+ message_ids: Sequence[str] | None = None,
93
92
  ) -> None:
94
93
  """Delete messages based on the specified filters.
95
94
 
@@ -118,7 +117,7 @@ class NodeState(CoreState):
118
117
  """
119
118
 
120
119
  @abstractmethod
121
- def get_run(self, run_id: int) -> Optional[Run]:
120
+ def get_run(self, run_id: int) -> Run | None:
122
121
  """Retrieve a run by its ID.
123
122
 
124
123
  Parameters
@@ -143,7 +142,7 @@ class NodeState(CoreState):
143
142
  """
144
143
 
145
144
  @abstractmethod
146
- def get_context(self, run_id: int) -> Optional[Context]:
145
+ def get_context(self, run_id: int) -> Context | None:
147
146
  """Retrieve a context by its run ID.
148
147
 
149
148
  Parameters
@@ -16,7 +16,8 @@
16
16
 
17
17
 
18
18
  import threading
19
- from typing import Optional
19
+
20
+ from flwr.supercore.object_store import ObjectStoreFactory
20
21
 
21
22
  from .in_memory_nodestate import InMemoryNodeState
22
23
  from .nodestate import NodeState
@@ -25,8 +26,9 @@ from .nodestate import NodeState
25
26
  class NodeStateFactory:
26
27
  """Factory class that creates NodeState instances."""
27
28
 
28
- def __init__(self) -> None:
29
- self.state_instance: Optional[NodeState] = None
29
+ def __init__(self, objectstore_factory: ObjectStoreFactory) -> None:
30
+ self.objectstore_factory = objectstore_factory
31
+ self.state_instance: NodeState | None = None
30
32
  self.lock = threading.RLock()
31
33
 
32
34
  def state(self) -> NodeState:
@@ -34,5 +36,6 @@ class NodeStateFactory:
34
36
  # Lock access to NodeStateFactory to prevent returning different instances
35
37
  with self.lock:
36
38
  if self.state_instance is None:
37
- self.state_instance = InMemoryNodeState()
39
+ object_store = self.objectstore_factory.store()
40
+ self.state_instance = InMemoryNodeState(object_store)
38
41
  return self.state_instance
@@ -15,9 +15,7 @@
15
15
  """Flower ClientApp process."""
16
16
 
17
17
 
18
- import gc
19
18
  from logging import DEBUG, ERROR, INFO
20
- from typing import Optional
21
19
 
22
20
  import grpc
23
21
 
@@ -28,6 +26,7 @@ from flwr.clientapp.utils import get_load_client_app_fn
28
26
  from flwr.common import Context, Message
29
27
  from flwr.common.config import get_flwr_dir
30
28
  from flwr.common.constant import ErrorCode
29
+ from flwr.common.exit import ExitCode, flwr_exit, register_signal_handlers
31
30
  from flwr.common.grpc import create_channel, on_channel_state_change
32
31
  from flwr.common.inflatable import (
33
32
  get_all_nested_objects,
@@ -50,6 +49,7 @@ from flwr.common.serde import (
50
49
  message_to_proto,
51
50
  run_from_proto,
52
51
  )
52
+ from flwr.common.telemetry import EventType, event
53
53
  from flwr.common.typing import Fab, Run
54
54
  from flwr.proto.appio_pb2 import ( # pylint: disable=E0611
55
55
  PullAppInputsRequest,
@@ -63,27 +63,41 @@ from flwr.proto.appio_pb2 import ( # pylint: disable=E0611
63
63
  from flwr.proto.clientappio_pb2_grpc import ClientAppIoStub
64
64
  from flwr.proto.node_pb2 import Node # pylint: disable=E0611
65
65
  from flwr.supercore.app_utils import start_parent_process_monitor
66
+ from flwr.supercore.heartbeat import HeartbeatSender, make_app_heartbeat_fn_grpc
66
67
  from flwr.supercore.utils import mask_string
67
68
 
68
69
 
69
70
  def run_clientapp( # pylint: disable=R0913, R0914, R0917
70
71
  clientappio_api_address: str,
71
72
  token: str,
72
- flwr_dir: Optional[str] = None,
73
- certificates: Optional[bytes] = None,
74
- parent_pid: Optional[int] = None,
73
+ flwr_dir: str | None = None,
74
+ certificates: bytes | None = None,
75
+ parent_pid: int | None = None,
75
76
  ) -> None:
76
77
  """Run Flower ClientApp process."""
77
78
  # Monitor the main process in case of SIGKILL
78
79
  if parent_pid is not None:
79
80
  start_parent_process_monitor(parent_pid)
80
81
 
82
+ event(EventType.FLWR_CLIENTAPP_RUN_ENTER)
83
+
81
84
  channel = create_channel(
82
85
  server_address=clientappio_api_address,
83
86
  insecure=(certificates is None),
84
87
  root_certificates=certificates,
85
88
  )
86
89
  channel.subscribe(on_channel_state_change)
90
+ heartbeat_sender = None
91
+
92
+ def on_exit() -> None:
93
+ if heartbeat_sender is not None and heartbeat_sender.is_running:
94
+ heartbeat_sender.stop()
95
+ channel.close()
96
+
97
+ register_signal_handlers(
98
+ event_type=EventType.FLWR_CLIENTAPP_RUN_LEAVE,
99
+ exit_handlers=[on_exit],
100
+ )
87
101
 
88
102
  # Resolve directory where FABs are installed
89
103
  flwr_dir_ = get_flwr_dir(flwr_dir)
@@ -91,22 +105,27 @@ def run_clientapp( # pylint: disable=R0913, R0914, R0917
91
105
  stub = ClientAppIoStub(channel)
92
106
  _wrap_stub(stub, _make_simple_grpc_retry_invoker())
93
107
 
108
+ # Start app heartbeat
109
+ heartbeat_sender = HeartbeatSender(make_app_heartbeat_fn_grpc(stub, token))
110
+ heartbeat_sender.start()
111
+
94
112
  # Pull Message, Context, Run and (optional) FAB from SuperNode
95
113
  message, context, run, fab = pull_clientappinputs(stub=stub, token=token)
96
114
 
97
- # Install FAB, if provided
98
- if fab:
99
- log(DEBUG, "[flwr-clientapp] Start FAB installation.")
100
- install_from_fab(fab.content, flwr_dir=flwr_dir_, skip_prompt=True)
115
+ try:
101
116
 
102
- load_client_app_fn = get_load_client_app_fn(
103
- default_app_ref="",
104
- app_path=None,
105
- multi_app=True,
106
- flwr_dir=str(flwr_dir_),
107
- )
117
+ # Install FAB, if provided
118
+ if fab:
119
+ log(DEBUG, "[flwr-clientapp] Start FAB installation.")
120
+ install_from_fab(fab.content, flwr_dir=flwr_dir_, skip_prompt=True)
121
+
122
+ load_client_app_fn = get_load_client_app_fn(
123
+ default_app_ref="",
124
+ app_path=None,
125
+ multi_app=True,
126
+ flwr_dir=str(flwr_dir_),
127
+ )
108
128
 
109
- try:
110
129
  # Load ClientApp
111
130
  log(DEBUG, "[flwr-clientapp] Start `ClientApp` Loading.")
112
131
  client_app: ClientApp = load_client_app_fn(
@@ -137,18 +156,18 @@ def run_clientapp( # pylint: disable=R0913, R0914, R0917
137
156
  stub=stub, token=token, message=reply_message, context=context
138
157
  )
139
158
 
140
- del client_app, message, context, run, fab, reply_message
141
- gc.collect()
142
-
143
159
  except grpc.RpcError as e:
144
160
  log(ERROR, "GRPC error occurred: %s", str(e))
145
- finally:
146
- channel.close()
161
+
162
+ flwr_exit(
163
+ code=ExitCode.SUCCESS,
164
+ event_type=EventType.FLWR_CLIENTAPP_RUN_LEAVE,
165
+ )
147
166
 
148
167
 
149
168
  def pull_clientappinputs(
150
169
  stub: ClientAppIoStub, token: str
151
- ) -> tuple[Message, Context, Run, Optional[Fab]]:
170
+ ) -> tuple[Message, Context, Run, Fab | None]:
152
171
  """Pull ClientAppInputs from SuperNode."""
153
172
  masked_token = mask_string(token)
154
173
  log(INFO, "[flwr-clientapp] Pull `ClientAppInputs` for token %s", masked_token)
@@ -35,7 +35,7 @@ from flwr.common.typing import Fab, Run
35
35
 
36
36
  # pylint: disable=E0611
37
37
  from flwr.proto import clientappio_pb2_grpc
38
- from flwr.proto.appio_pb2 import ( # pylint: disable=E0401
38
+ from flwr.proto.appio_pb2 import (
39
39
  ListAppsToLaunchRequest,
40
40
  ListAppsToLaunchResponse,
41
41
  PullAppInputsRequest,
@@ -49,6 +49,7 @@ from flwr.proto.appio_pb2 import ( # pylint: disable=E0401
49
49
  RequestTokenRequest,
50
50
  RequestTokenResponse,
51
51
  )
52
+ from flwr.proto.heartbeat_pb2 import SendAppHeartbeatRequest, SendAppHeartbeatResponse
52
53
  from flwr.proto.message_pb2 import (
53
54
  ConfirmMessageReceivedRequest,
54
55
  ConfirmMessageReceivedResponse,
@@ -57,12 +58,11 @@ from flwr.proto.message_pb2 import (
57
58
  PushObjectRequest,
58
59
  PushObjectResponse,
59
60
  )
60
- from flwr.proto.run_pb2 import GetRunRequest, GetRunResponse # pylint: disable=E0611
61
+ from flwr.proto.run_pb2 import GetRunRequest, GetRunResponse
61
62
 
62
63
  # pylint: disable=E0601
63
64
  from flwr.supercore.ffs import FfsFactory
64
65
  from flwr.supercore.object_store import NoObjectInStoreError, ObjectStoreFactory
65
- from flwr.supercore.object_store.utils import store_mapping_and_register_objects
66
66
  from flwr.supernode.nodestate import NodeStateFactory
67
67
 
68
68
 
@@ -151,7 +151,24 @@ class ClientAppIoServicer(clientappio_pb2_grpc.ClientAppIoServicer):
151
151
  # Retrieve context, run and fab for this run
152
152
  context = cast(Context, state.get_context(run_id))
153
153
  run = cast(Run, state.get_run(run_id))
154
- fab = Fab(run.fab_hash, ffs.get(run.fab_hash)[0], ffs.get(run.fab_hash)[1]) # type: ignore
154
+
155
+ # Retrieve FAB from FFS
156
+ if result := ffs.get(run.fab_hash):
157
+ content, verifications = result
158
+ log(
159
+ DEBUG,
160
+ "Retrieved FAB: hash=%s, content_len=%d, verifications=%s",
161
+ run.fab_hash,
162
+ len(content),
163
+ verifications,
164
+ )
165
+ fab = Fab(run.fab_hash, content, verifications)
166
+ else:
167
+ context.abort(
168
+ grpc.StatusCode.NOT_FOUND,
169
+ f"FAB with hash {run.fab_hash} not found in FFS.",
170
+ )
171
+ raise RuntimeError("This line should never be reached.")
155
172
 
156
173
  return PullAppInputsResponse(
157
174
  context=context_to_proto(context),
@@ -231,19 +248,32 @@ class ClientAppIoServicer(clientappio_pb2_grpc.ClientAppIoServicer):
231
248
  )
232
249
  raise RuntimeError("This line should never be reached.")
233
250
 
251
+ # Store Message object to descendants mapping and preregister objects
252
+ objects_to_push: set[str] = set()
253
+ for object_tree in request.message_object_trees:
254
+ objects_to_push |= set(store.preregister(run_id, object_tree))
234
255
  # Save the message to the state
235
256
  state.store_message(message_from_proto(request.messages_list[0]))
236
257
 
237
- # Store Message object to descendants mapping and preregister objects
238
- objects_to_push = store_mapping_and_register_objects(store, request=request)
239
-
240
258
  return PushAppMessagesResponse(objects_to_push=objects_to_push)
241
259
 
260
+ def SendAppHeartbeat(
261
+ self, request: SendAppHeartbeatRequest, context: grpc.ServicerContext
262
+ ) -> SendAppHeartbeatResponse:
263
+ """Handle a heartbeat from an app process."""
264
+ log(DEBUG, "ClientAppIoServicer.SendAppHeartbeat")
265
+ # Initialize state
266
+ state = self.state_factory.state()
267
+
268
+ # Acknowledge the heartbeat
269
+ success = state.acknowledge_app_heartbeat(request.token)
270
+ return SendAppHeartbeatResponse(success=success)
271
+
242
272
  def PushObject(
243
273
  self, request: PushObjectRequest, context: grpc.ServicerContext
244
274
  ) -> PushObjectResponse:
245
275
  """Push an object to the ObjectStore."""
246
- log(DEBUG, "ServerAppIoServicer.PushObject")
276
+ log(DEBUG, "ClientAppIoServicer.PushObject")
247
277
 
248
278
  # Init state and store
249
279
  store = self.objectstore_factory.store()
@@ -265,7 +295,7 @@ class ClientAppIoServicer(clientappio_pb2_grpc.ClientAppIoServicer):
265
295
  self, request: PullObjectRequest, context: grpc.ServicerContext
266
296
  ) -> PullObjectResponse:
267
297
  """Pull an object from the ObjectStore."""
268
- log(DEBUG, "ServerAppIoServicer.PullObject")
298
+ log(DEBUG, "ClientAppIoServicer.PullObject")
269
299
 
270
300
  # Init state and store
271
301
  store = self.objectstore_factory.store()
@@ -285,7 +315,7 @@ class ClientAppIoServicer(clientappio_pb2_grpc.ClientAppIoServicer):
285
315
  self, request: ConfirmMessageReceivedRequest, context: grpc.ServicerContext
286
316
  ) -> ConfirmMessageReceivedResponse:
287
317
  """Confirm message received."""
288
- log(DEBUG, "ServerAppIoServicer.ConfirmMessageReceived")
318
+ log(DEBUG, "ClientAppIoServicer.ConfirmMessageReceived")
289
319
 
290
320
  # Init state and store
291
321
  store = self.objectstore_factory.store()