flwr 1.19.0__py3-none-any.whl → 1.21.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (167) hide show
  1. flwr/__init__.py +4 -1
  2. flwr/app/__init__.py +28 -0
  3. flwr/app/exception.py +31 -0
  4. flwr/cli/auth_plugin/oidc_cli_plugin.py +4 -4
  5. flwr/cli/build.py +15 -5
  6. flwr/cli/cli_user_auth_interceptor.py +1 -1
  7. flwr/cli/config_utils.py +3 -3
  8. flwr/cli/constant.py +25 -8
  9. flwr/cli/log.py +9 -9
  10. flwr/cli/login/login.py +3 -3
  11. flwr/cli/ls.py +5 -5
  12. flwr/cli/new/new.py +23 -4
  13. flwr/cli/new/templates/app/README.flowertune.md.tpl +2 -0
  14. flwr/cli/new/templates/app/README.md.tpl +5 -0
  15. flwr/cli/new/templates/app/code/__init__.pytorch_msg_api.py.tpl +1 -0
  16. flwr/cli/new/templates/app/code/client.pytorch_msg_api.py.tpl +80 -0
  17. flwr/cli/new/templates/app/code/server.pytorch_msg_api.py.tpl +41 -0
  18. flwr/cli/new/templates/app/code/task.pytorch_msg_api.py.tpl +98 -0
  19. flwr/cli/new/templates/app/pyproject.baseline.toml.tpl +14 -3
  20. flwr/cli/new/templates/app/pyproject.flowertune.toml.tpl +13 -1
  21. flwr/cli/new/templates/app/pyproject.huggingface.toml.tpl +21 -2
  22. flwr/cli/new/templates/app/pyproject.jax.toml.tpl +18 -1
  23. flwr/cli/new/templates/app/pyproject.mlx.toml.tpl +19 -2
  24. flwr/cli/new/templates/app/pyproject.numpy.toml.tpl +18 -1
  25. flwr/cli/new/templates/app/pyproject.pytorch.toml.tpl +20 -3
  26. flwr/cli/new/templates/app/pyproject.pytorch_msg_api.toml.tpl +53 -0
  27. flwr/cli/new/templates/app/pyproject.sklearn.toml.tpl +18 -1
  28. flwr/cli/new/templates/app/pyproject.tensorflow.toml.tpl +18 -1
  29. flwr/cli/run/run.py +53 -50
  30. flwr/cli/stop.py +7 -4
  31. flwr/cli/utils.py +29 -11
  32. flwr/client/grpc_adapter_client/connection.py +11 -4
  33. flwr/client/grpc_rere_client/connection.py +93 -129
  34. flwr/client/rest_client/connection.py +134 -164
  35. flwr/clientapp/__init__.py +10 -0
  36. flwr/clientapp/mod/__init__.py +26 -0
  37. flwr/clientapp/mod/centraldp_mods.py +132 -0
  38. flwr/common/args.py +20 -6
  39. flwr/common/auth_plugin/__init__.py +4 -4
  40. flwr/common/auth_plugin/auth_plugin.py +7 -7
  41. flwr/common/constant.py +26 -5
  42. flwr/common/event_log_plugin/event_log_plugin.py +1 -1
  43. flwr/common/exit/__init__.py +4 -0
  44. flwr/common/exit/exit.py +8 -1
  45. flwr/common/exit/exit_code.py +42 -8
  46. flwr/common/exit/exit_handler.py +62 -0
  47. flwr/common/{exit_handlers.py → exit/signal_handler.py} +20 -37
  48. flwr/common/grpc.py +1 -1
  49. flwr/common/{inflatable_grpc_utils.py → inflatable_protobuf_utils.py} +52 -10
  50. flwr/common/inflatable_utils.py +191 -24
  51. flwr/common/logger.py +1 -1
  52. flwr/common/record/array.py +101 -22
  53. flwr/common/record/arraychunk.py +59 -0
  54. flwr/common/retry_invoker.py +30 -11
  55. flwr/common/serde.py +0 -28
  56. flwr/common/telemetry.py +4 -0
  57. flwr/compat/client/app.py +14 -31
  58. flwr/compat/server/app.py +2 -2
  59. flwr/proto/appio_pb2.py +51 -0
  60. flwr/proto/appio_pb2.pyi +195 -0
  61. flwr/proto/appio_pb2_grpc.py +4 -0
  62. flwr/proto/appio_pb2_grpc.pyi +4 -0
  63. flwr/proto/clientappio_pb2.py +4 -19
  64. flwr/proto/clientappio_pb2.pyi +0 -125
  65. flwr/proto/clientappio_pb2_grpc.py +269 -29
  66. flwr/proto/clientappio_pb2_grpc.pyi +114 -21
  67. flwr/proto/control_pb2.py +62 -0
  68. flwr/proto/{exec_pb2_grpc.py → control_pb2_grpc.py} +54 -54
  69. flwr/proto/{exec_pb2_grpc.pyi → control_pb2_grpc.pyi} +28 -28
  70. flwr/proto/fleet_pb2.py +12 -20
  71. flwr/proto/fleet_pb2.pyi +6 -36
  72. flwr/proto/serverappio_pb2.py +8 -31
  73. flwr/proto/serverappio_pb2.pyi +0 -152
  74. flwr/proto/serverappio_pb2_grpc.py +107 -38
  75. flwr/proto/serverappio_pb2_grpc.pyi +47 -20
  76. flwr/proto/simulationio_pb2.py +4 -11
  77. flwr/proto/simulationio_pb2.pyi +0 -58
  78. flwr/proto/simulationio_pb2_grpc.py +129 -27
  79. flwr/proto/simulationio_pb2_grpc.pyi +52 -13
  80. flwr/server/app.py +130 -153
  81. flwr/server/fleet_event_log_interceptor.py +4 -0
  82. flwr/server/grid/grpc_grid.py +94 -54
  83. flwr/server/grid/inmemory_grid.py +1 -0
  84. flwr/server/serverapp/app.py +165 -144
  85. flwr/server/superlink/fleet/grpc_adapter/grpc_adapter_servicer.py +8 -0
  86. flwr/server/superlink/fleet/grpc_rere/fleet_servicer.py +1 -1
  87. flwr/server/superlink/fleet/grpc_rere/server_interceptor.py +2 -5
  88. flwr/server/superlink/fleet/message_handler/message_handler.py +10 -16
  89. flwr/server/superlink/fleet/rest_rere/rest_api.py +1 -2
  90. flwr/server/superlink/fleet/vce/backend/raybackend.py +3 -1
  91. flwr/server/superlink/fleet/vce/vce_api.py +6 -6
  92. flwr/server/superlink/linkstate/in_memory_linkstate.py +34 -0
  93. flwr/server/superlink/linkstate/linkstate.py +2 -1
  94. flwr/server/superlink/linkstate/sqlite_linkstate.py +45 -0
  95. flwr/server/superlink/serverappio/serverappio_grpc.py +2 -2
  96. flwr/server/superlink/serverappio/serverappio_servicer.py +95 -48
  97. flwr/server/superlink/simulation/simulationio_grpc.py +1 -1
  98. flwr/server/superlink/simulation/simulationio_servicer.py +98 -22
  99. flwr/server/superlink/utils.py +0 -35
  100. flwr/serverapp/__init__.py +12 -0
  101. flwr/serverapp/dp_fixed_clipping.py +352 -0
  102. flwr/serverapp/exception.py +38 -0
  103. flwr/serverapp/strategy/__init__.py +38 -0
  104. flwr/serverapp/strategy/dp_fixed_clipping.py +352 -0
  105. flwr/serverapp/strategy/fedadagrad.py +162 -0
  106. flwr/serverapp/strategy/fedadam.py +181 -0
  107. flwr/serverapp/strategy/fedavg.py +295 -0
  108. flwr/serverapp/strategy/fedopt.py +218 -0
  109. flwr/serverapp/strategy/fedyogi.py +173 -0
  110. flwr/serverapp/strategy/result.py +105 -0
  111. flwr/serverapp/strategy/strategy.py +285 -0
  112. flwr/serverapp/strategy/strategy_utils.py +251 -0
  113. flwr/serverapp/strategy/strategy_utils_tests.py +304 -0
  114. flwr/simulation/app.py +159 -154
  115. flwr/simulation/run_simulation.py +17 -0
  116. flwr/supercore/app_utils.py +58 -0
  117. flwr/supercore/cli/__init__.py +22 -0
  118. flwr/supercore/cli/flower_superexec.py +141 -0
  119. flwr/supercore/corestate/__init__.py +22 -0
  120. flwr/supercore/corestate/corestate.py +81 -0
  121. flwr/{server/superlink → supercore}/ffs/disk_ffs.py +1 -1
  122. flwr/supercore/grpc_health/__init__.py +25 -0
  123. flwr/supercore/grpc_health/health_server.py +53 -0
  124. flwr/supercore/grpc_health/simple_health_servicer.py +38 -0
  125. flwr/supercore/license_plugin/__init__.py +22 -0
  126. flwr/supercore/license_plugin/license_plugin.py +26 -0
  127. flwr/supercore/object_store/in_memory_object_store.py +31 -31
  128. flwr/supercore/object_store/object_store.py +20 -42
  129. flwr/supercore/object_store/utils.py +43 -0
  130. flwr/{superexec → supercore/superexec}/__init__.py +1 -1
  131. flwr/supercore/superexec/plugin/__init__.py +28 -0
  132. flwr/supercore/superexec/plugin/base_exec_plugin.py +53 -0
  133. flwr/supercore/superexec/plugin/clientapp_exec_plugin.py +28 -0
  134. flwr/supercore/superexec/plugin/exec_plugin.py +71 -0
  135. flwr/supercore/superexec/plugin/serverapp_exec_plugin.py +28 -0
  136. flwr/supercore/superexec/plugin/simulation_exec_plugin.py +28 -0
  137. flwr/supercore/superexec/run_superexec.py +185 -0
  138. flwr/supercore/utils.py +32 -0
  139. flwr/superlink/servicer/__init__.py +15 -0
  140. flwr/superlink/servicer/control/__init__.py +22 -0
  141. flwr/{superexec/exec_event_log_interceptor.py → superlink/servicer/control/control_event_log_interceptor.py} +9 -5
  142. flwr/{superexec/exec_grpc.py → superlink/servicer/control/control_grpc.py} +39 -28
  143. flwr/superlink/servicer/control/control_license_interceptor.py +82 -0
  144. flwr/{superexec/exec_servicer.py → superlink/servicer/control/control_servicer.py} +79 -31
  145. flwr/{superexec/exec_user_auth_interceptor.py → superlink/servicer/control/control_user_auth_interceptor.py} +18 -10
  146. flwr/supernode/cli/flower_supernode.py +3 -7
  147. flwr/supernode/cli/flwr_clientapp.py +20 -16
  148. flwr/supernode/nodestate/in_memory_nodestate.py +13 -4
  149. flwr/supernode/nodestate/nodestate.py +3 -44
  150. flwr/supernode/runtime/run_clientapp.py +129 -115
  151. flwr/supernode/servicer/clientappio/__init__.py +1 -3
  152. flwr/supernode/servicer/clientappio/clientappio_servicer.py +217 -165
  153. flwr/supernode/start_client_internal.py +205 -148
  154. {flwr-1.19.0.dist-info → flwr-1.21.0.dist-info}/METADATA +5 -3
  155. {flwr-1.19.0.dist-info → flwr-1.21.0.dist-info}/RECORD +161 -117
  156. {flwr-1.19.0.dist-info → flwr-1.21.0.dist-info}/entry_points.txt +1 -0
  157. flwr/common/inflatable_rest_utils.py +0 -99
  158. flwr/proto/exec_pb2.py +0 -62
  159. flwr/superexec/app.py +0 -45
  160. flwr/superexec/deployment.py +0 -192
  161. flwr/superexec/executor.py +0 -100
  162. flwr/superexec/simulation.py +0 -130
  163. /flwr/proto/{exec_pb2.pyi → control_pb2.pyi} +0 -0
  164. /flwr/{server/superlink → supercore}/ffs/__init__.py +0 -0
  165. /flwr/{server/superlink → supercore}/ffs/ffs.py +0 -0
  166. /flwr/{server/superlink → supercore}/ffs/ffs_factory.py +0 -0
  167. {flwr-1.19.0.dist-info → flwr-1.21.0.dist-info}/WHEEL +0 -0
@@ -22,31 +22,43 @@ from typing import Optional, cast
22
22
 
23
23
  import grpc
24
24
 
25
- from flwr.common import Message, RecordDict
25
+ from flwr.app.error import Error
26
+ from flwr.common import Message, Metadata, RecordDict, now
26
27
  from flwr.common.constant import (
27
28
  SERVERAPPIO_API_DEFAULT_CLIENT_ADDRESS,
28
29
  SUPERLINK_NODE_ID,
30
+ ErrorCode,
31
+ MessageType,
29
32
  )
30
33
  from flwr.common.grpc import create_channel, on_channel_state_change
31
34
  from flwr.common.inflatable import (
35
+ InflatableObject,
32
36
  get_all_nested_objects,
33
37
  get_object_tree,
38
+ iterate_object_tree,
34
39
  no_object_id_recompute,
35
40
  )
36
- from flwr.common.inflatable_grpc_utils import (
37
- make_pull_object_fn_grpc,
38
- make_push_object_fn_grpc,
41
+ from flwr.common.inflatable_protobuf_utils import (
42
+ make_pull_object_fn_protobuf,
43
+ make_push_object_fn_protobuf,
39
44
  )
40
45
  from flwr.common.inflatable_utils import (
46
+ ObjectUnavailableError,
41
47
  inflate_object_from_contents,
42
48
  pull_objects,
43
49
  push_objects,
44
50
  )
45
51
  from flwr.common.logger import log, warn_deprecated_feature
46
- from flwr.common.message import remove_content_from_message
52
+ from flwr.common.message import make_message, remove_content_from_message
47
53
  from flwr.common.retry_invoker import _make_simple_grpc_retry_invoker, _wrap_stub
48
54
  from flwr.common.serde import message_to_proto, run_from_proto
49
55
  from flwr.common.typing import Run
56
+ from flwr.proto.appio_pb2 import ( # pylint: disable=E0611
57
+ PullAppMessagesRequest,
58
+ PullAppMessagesResponse,
59
+ PushAppMessagesRequest,
60
+ PushAppMessagesResponse,
61
+ )
50
62
  from flwr.proto.message_pb2 import ( # pylint: disable=E0611
51
63
  ConfirmMessageReceivedRequest,
52
64
  )
@@ -55,10 +67,6 @@ from flwr.proto.run_pb2 import GetRunRequest, GetRunResponse # pylint: disable=
55
67
  from flwr.proto.serverappio_pb2 import ( # pylint: disable=E0611
56
68
  GetNodesRequest,
57
69
  GetNodesResponse,
58
- PullResMessagesRequest,
59
- PullResMessagesResponse,
60
- PushInsMessagesRequest,
61
- PushInsMessagesResponse,
62
70
  )
63
71
  from flwr.proto.serverappio_pb2_grpc import ServerAppIoStub # pylint: disable=E0611
64
72
 
@@ -215,37 +223,38 @@ class GrpcGrid(Grid):
215
223
  )
216
224
  return [node.node_id for node in res.nodes]
217
225
 
218
- def _try_push_message(self, run_id: int, message: Message) -> str:
219
- """Push one message and its associated objects."""
220
- # Compute mapping of message descendants
221
- all_objects = get_all_nested_objects(message)
222
- msg_id = message.object_id
223
- object_tree = get_object_tree(message)
226
+ def _try_push_messages(self, run_id: int, messages: Iterable[Message]) -> list[str]:
227
+ """Push all messages and its associated objects."""
228
+ # Prepare all Messages to be sent in a single request
229
+ proto_messages = []
230
+ object_trees = []
231
+ all_objects: dict[str, InflatableObject] = {}
232
+ for msg in messages:
233
+ proto_messages.append(message_to_proto(remove_content_from_message(msg)))
234
+ all_objects.update(get_all_nested_objects(msg))
235
+ object_trees.append(get_object_tree(msg))
236
+ del msg
224
237
 
225
238
  # Call GrpcServerAppIoStub method
226
- res: PushInsMessagesResponse = self._stub.PushMessages(
227
- PushInsMessagesRequest(
228
- messages_list=[message_to_proto(remove_content_from_message(message))],
239
+ res: PushAppMessagesResponse = self._stub.PushMessages(
240
+ PushAppMessagesRequest(
241
+ messages_list=proto_messages,
229
242
  run_id=run_id,
230
- message_object_trees=[object_tree],
243
+ message_object_trees=object_trees,
231
244
  )
232
245
  )
233
246
 
234
247
  # Push objects
235
- # If Message was added to the LinkState correctly
236
- if msg_id is not None:
237
- obj_ids_to_push = set(res.objects_to_push[msg_id].object_ids)
238
- # Push only object that are not in the store
239
- push_objects(
240
- all_objects,
241
- push_object_fn=make_push_object_fn_grpc(
242
- push_object_grpc=self._stub.PushObject,
243
- node=self.node,
244
- run_id=run_id,
245
- ),
246
- object_ids_to_push=obj_ids_to_push,
247
- )
248
- return msg_id
248
+ push_objects(
249
+ all_objects,
250
+ push_object_fn=make_push_object_fn_protobuf(
251
+ push_object_protobuf=self._stub.PushObject,
252
+ node=self.node,
253
+ run_id=run_id,
254
+ ),
255
+ object_ids_to_push=set(res.objects_to_push),
256
+ )
257
+ return cast(list[str], res.message_ids)
249
258
 
250
259
  def push_messages(self, messages: Iterable[Message]) -> Iterable[str]:
251
260
  """Push messages to specified node IDs.
@@ -256,17 +265,19 @@ class GrpcGrid(Grid):
256
265
  # Construct Messages
257
266
  run_id = cast(Run, self._run).run_id
258
267
  message_ids: list[str] = []
268
+ if not messages:
269
+ return message_ids
259
270
  try:
260
- for msg in messages:
261
- # Populate metadata
262
- msg.metadata.__dict__["_run_id"] = run_id
263
- msg.metadata.__dict__["_src_node_id"] = self.node.node_id
264
- msg.metadata.__dict__["_message_id"] = msg.object_id
265
- # Check message
266
- self._check_message(msg)
267
- # Try pushing message and its objects
268
- with no_object_id_recompute():
269
- message_ids.append(self._try_push_message(run_id, msg))
271
+ with no_object_id_recompute():
272
+ for msg in messages:
273
+ # Populate metadata
274
+ msg.metadata.__dict__["_run_id"] = run_id
275
+ msg.metadata.__dict__["_src_node_id"] = self.node.node_id
276
+ msg.metadata.__dict__["_message_id"] = msg.object_id
277
+ # Check message
278
+ self._check_message(msg)
279
+ # Try pushing messages and their objects
280
+ message_ids = self._try_push_messages(run_id, messages)
270
281
 
271
282
  except grpc.RpcError as e:
272
283
  if e.code() == grpc.StatusCode.RESOURCE_EXHAUSTED: # pylint: disable=E1101
@@ -294,24 +305,52 @@ class GrpcGrid(Grid):
294
305
  run_id = cast(Run, self._run).run_id
295
306
  try:
296
307
  # Pull Messages
297
- res: PullResMessagesResponse = self._stub.PullMessages(
298
- PullResMessagesRequest(
308
+ res: PullAppMessagesResponse = self._stub.PullMessages(
309
+ PullAppMessagesRequest(
299
310
  message_ids=message_ids,
300
311
  run_id=run_id,
301
312
  )
302
313
  )
303
314
  # Pull Messages from store
304
315
  inflated_msgs: list[Message] = []
305
- for msg_proto in res.messages_list:
316
+ for msg_proto, msg_tree in zip(res.messages_list, res.message_object_trees):
306
317
  msg_id = msg_proto.metadata.message_id
307
- all_object_contents = pull_objects(
308
- list(res.objects_to_pull[msg_id].object_ids) + [msg_id],
309
- pull_object_fn=make_pull_object_fn_grpc(
310
- pull_object_grpc=self._stub.PullObject,
311
- node=self.node,
312
- run_id=run_id,
313
- ),
314
- )
318
+ try:
319
+ all_object_contents = pull_objects(
320
+ object_ids=[
321
+ tree.object_id for tree in iterate_object_tree(msg_tree)
322
+ ],
323
+ pull_object_fn=make_pull_object_fn_protobuf(
324
+ pull_object_protobuf=self._stub.PullObject,
325
+ node=self.node,
326
+ run_id=run_id,
327
+ ),
328
+ )
329
+ except ObjectUnavailableError as e:
330
+ # An ObjectUnavailableError indicates that the object is not yet
331
+ # available. If this point has been reached, it means that the
332
+ # Grid has tried to pull the object for the maximum number of times
333
+ # or for the maximum time allowed, so we return an inflated message
334
+ # with an error
335
+ inflated_msgs.append(
336
+ make_message(
337
+ metadata=Metadata(
338
+ run_id=run_id,
339
+ message_id="",
340
+ src_node_id=self.node.node_id,
341
+ dst_node_id=self.node.node_id,
342
+ message_type=MessageType.SYSTEM,
343
+ group_id="",
344
+ ttl=0,
345
+ reply_to_message_id=msg_proto.metadata.reply_to_message_id,
346
+ created_at=now().timestamp(),
347
+ ),
348
+ error=Error(
349
+ code=ErrorCode.MESSAGE_UNAVAILABLE, reason=(str(e))
350
+ ),
351
+ )
352
+ )
353
+ continue
315
354
 
316
355
  # Confirm that the message has been received
317
356
  self._stub.ConfirmMessageReceived(
@@ -347,6 +386,7 @@ class GrpcGrid(Grid):
347
386
  """
348
387
  # Push messages
349
388
  msg_ids = set(self.push_messages(messages))
389
+ del messages
350
390
 
351
391
  # Pull messages
352
392
  end_time = time.time() + (timeout if timeout is not None else 0.0)
@@ -153,6 +153,7 @@ class InMemoryGrid(Grid):
153
153
  """
154
154
  # Push messages
155
155
  msg_ids = set(self.push_messages(messages))
156
+ del messages
156
157
 
157
158
  # Pull messages
158
159
  end_time = time.time() + (timeout if timeout is not None else 0.0)
@@ -19,9 +19,9 @@ import argparse
19
19
  from logging import DEBUG, ERROR, INFO
20
20
  from pathlib import Path
21
21
  from queue import Queue
22
- from time import sleep
23
22
  from typing import Optional
24
23
 
24
+ from flwr.app.exception import AppExitException
25
25
  from flwr.cli.config_utils import get_fab_metadata
26
26
  from flwr.cli.install import install_from_fab
27
27
  from flwr.cli.utils import get_sha256_hash
@@ -34,10 +34,11 @@ from flwr.common.config import (
34
34
  )
35
35
  from flwr.common.constant import (
36
36
  SERVERAPPIO_API_DEFAULT_CLIENT_ADDRESS,
37
+ ExecPluginType,
37
38
  Status,
38
39
  SubStatus,
39
40
  )
40
- from flwr.common.exit import ExitCode, flwr_exit
41
+ from flwr.common.exit import ExitCode, add_exit_handler, flwr_exit
41
42
  from flwr.common.heartbeat import HeartbeatSender, get_grpc_app_heartbeat_fn
42
43
  from flwr.common.logger import (
43
44
  log,
@@ -55,14 +56,18 @@ from flwr.common.serde import (
55
56
  )
56
57
  from flwr.common.telemetry import EventType, event
57
58
  from flwr.common.typing import RunNotRunningException, RunStatus
58
- from flwr.proto.run_pb2 import UpdateRunStatusRequest # pylint: disable=E0611
59
- from flwr.proto.serverappio_pb2 import ( # pylint: disable=E0611
60
- PullServerAppInputsRequest,
61
- PullServerAppInputsResponse,
62
- PushServerAppOutputsRequest,
59
+ from flwr.proto.appio_pb2 import ( # pylint: disable=E0611
60
+ PullAppInputsRequest,
61
+ PullAppInputsResponse,
62
+ PushAppOutputsRequest,
63
63
  )
64
+ from flwr.proto.run_pb2 import UpdateRunStatusRequest # pylint: disable=E0611
65
+ from flwr.proto.serverappio_pb2_grpc import ServerAppIoStub
64
66
  from flwr.server.grid.grpc_grid import GrpcGrid
65
67
  from flwr.server.run_serverapp import run as run_
68
+ from flwr.supercore.app_utils import start_parent_process_monitor
69
+ from flwr.supercore.superexec.plugin import ServerAppExecPlugin
70
+ from flwr.supercore.superexec.run_superexec import run_with_deprecation_warning
66
71
 
67
72
 
68
73
  def flwr_serverapp() -> None:
@@ -73,14 +78,27 @@ def flwr_serverapp() -> None:
73
78
 
74
79
  args = _parse_args_run_flwr_serverapp().parse_args()
75
80
 
76
- log(INFO, "Start `flwr-serverapp` process")
77
-
78
81
  if not args.insecure:
79
82
  flwr_exit(
80
83
  ExitCode.COMMON_TLS_NOT_SUPPORTED,
81
84
  "`flwr-serverapp` does not support TLS yet.",
82
85
  )
83
86
 
87
+ # Disallow long-running `flwr-serverapp` processes
88
+ if args.token is None:
89
+ run_with_deprecation_warning(
90
+ cmd="flwr-serverapp",
91
+ plugin_type=ExecPluginType.SERVER_APP,
92
+ plugin_class=ServerAppExecPlugin,
93
+ stub_class=ServerAppIoStub,
94
+ appio_api_address=args.serverappio_api_address,
95
+ flwr_dir=args.flwr_dir,
96
+ parent_pid=args.parent_pid,
97
+ warn_run_once=args.run_once,
98
+ )
99
+ return
100
+
101
+ log(INFO, "Start `flwr-serverapp` process")
84
102
  log(
85
103
  DEBUG,
86
104
  "`flwr-serverapp` will attempt to connect to SuperLink's "
@@ -90,168 +108,177 @@ def flwr_serverapp() -> None:
90
108
  run_serverapp(
91
109
  serverappio_api_address=args.serverappio_api_address,
92
110
  log_queue=log_queue,
93
- run_once=args.run_once,
111
+ token=args.token,
94
112
  flwr_dir=args.flwr_dir,
95
113
  certificates=None,
114
+ parent_pid=args.parent_pid,
96
115
  )
97
116
 
98
117
  # Restore stdout/stderr
99
118
  restore_output()
100
119
 
101
120
 
102
- def run_serverapp( # pylint: disable=R0914, disable=W0212, disable=R0915
121
+ def run_serverapp( # pylint: disable=R0913, R0914, R0915, R0917, W0212
103
122
  serverappio_api_address: str,
104
123
  log_queue: Queue[Optional[str]],
105
- run_once: bool,
124
+ token: str,
106
125
  flwr_dir: Optional[str] = None,
107
126
  certificates: Optional[bytes] = None,
127
+ parent_pid: Optional[int] = None,
108
128
  ) -> None:
109
129
  """Run Flower ServerApp process."""
110
- grid = GrpcGrid(
111
- serverappio_service_address=serverappio_api_address,
112
- root_certificates=certificates,
113
- )
130
+ # Monitor the main process in case of SIGKILL
131
+ if parent_pid is not None:
132
+ start_parent_process_monitor(parent_pid)
114
133
 
115
134
  # Resolve directory where FABs are installed
116
135
  flwr_dir_ = get_flwr_dir(flwr_dir)
117
136
  log_uploader = None
118
- success = True
119
137
  hash_run_id = None
120
138
  run_status = None
121
139
  heartbeat_sender = None
122
- while True:
123
-
124
- try:
125
- # Pull ServerAppInputs from LinkState
126
- req = PullServerAppInputsRequest()
127
- log(DEBUG, "[flwr-serverapp] Pull ServerAppInputs")
128
- res: PullServerAppInputsResponse = grid._stub.PullServerAppInputs(req)
129
- if not res.HasField("run"):
130
- sleep(3)
131
- run_status = None
132
- continue
133
-
134
- context = context_from_proto(res.context)
135
- run = run_from_proto(res.run)
136
- fab = fab_from_proto(res.fab)
137
-
138
- hash_run_id = get_sha256_hash(run.run_id)
139
-
140
- grid.set_run(run.run_id)
141
-
142
- # Start log uploader for this run
143
- log_uploader = start_log_uploader(
144
- log_queue=log_queue,
145
- node_id=0,
146
- run_id=run.run_id,
147
- stub=grid._stub,
140
+ grid = None
141
+ context = None
142
+ exit_code = ExitCode.SUCCESS
143
+
144
+ def on_exit() -> None:
145
+ # Stop heartbeat sender
146
+ if heartbeat_sender:
147
+ heartbeat_sender.stop()
148
+
149
+ # Stop log uploader for this run and upload final logs
150
+ if log_uploader:
151
+ stop_log_uploader(log_queue, log_uploader)
152
+
153
+ # Update run status
154
+ if run_status and grid:
155
+ run_status_proto = run_status_to_proto(run_status)
156
+ grid._stub.UpdateRunStatus(
157
+ UpdateRunStatusRequest(run_id=run.run_id, run_status=run_status_proto)
148
158
  )
149
159
 
150
- log(DEBUG, "[flwr-serverapp] Start FAB installation.")
151
- install_from_fab(fab.content, flwr_dir=flwr_dir_, skip_prompt=True)
160
+ # Close the Grpc connection
161
+ if grid:
162
+ grid.close()
152
163
 
153
- fab_id, fab_version = get_fab_metadata(fab.content)
164
+ add_exit_handler(on_exit)
154
165
 
155
- app_path = str(
156
- get_project_dir(fab_id, fab_version, fab.hash_str, flwr_dir_)
157
- )
158
- config = get_project_config(app_path)
166
+ try:
167
+ # Initialize the GrpcGrid
168
+ grid = GrpcGrid(
169
+ serverappio_service_address=serverappio_api_address,
170
+ root_certificates=certificates,
171
+ )
159
172
 
160
- # Obtain server app reference and the run config
161
- server_app_attr = config["tool"]["flwr"]["app"]["components"]["serverapp"]
162
- server_app_run_config = get_fused_config_from_dir(
163
- Path(app_path), run.override_config
164
- )
173
+ # Pull ServerAppInputs from LinkState
174
+ req = PullAppInputsRequest(token=token)
175
+ log(DEBUG, "[flwr-serverapp] Pull ServerAppInputs")
176
+ res: PullAppInputsResponse = grid._stub.PullAppInputs(req)
177
+ context = context_from_proto(res.context)
178
+ run = run_from_proto(res.run)
179
+ fab = fab_from_proto(res.fab)
165
180
 
166
- # Update run_config in context
167
- context.run_config = server_app_run_config
181
+ hash_run_id = get_sha256_hash(run.run_id)
168
182
 
169
- log(
170
- DEBUG,
171
- "[flwr-serverapp] Will load ServerApp `%s` in %s",
172
- server_app_attr,
173
- app_path,
174
- )
183
+ grid.set_run(run.run_id)
175
184
 
176
- # Change status to Running
177
- run_status_proto = run_status_to_proto(RunStatus(Status.RUNNING, "", ""))
178
- grid._stub.UpdateRunStatus(
179
- UpdateRunStatusRequest(run_id=run.run_id, run_status=run_status_proto)
180
- )
185
+ # Start log uploader for this run
186
+ log_uploader = start_log_uploader(
187
+ log_queue=log_queue,
188
+ node_id=0,
189
+ run_id=run.run_id,
190
+ stub=grid._stub,
191
+ )
181
192
 
182
- event(
183
- EventType.FLWR_SERVERAPP_RUN_ENTER,
184
- event_details={"run-id-hash": hash_run_id},
185
- )
193
+ log(DEBUG, "[flwr-serverapp] Start FAB installation.")
194
+ install_from_fab(fab.content, flwr_dir=flwr_dir_, skip_prompt=True)
186
195
 
187
- # Set up heartbeat sender
188
- heartbeat_fn = get_grpc_app_heartbeat_fn(
189
- grid._stub,
190
- run.run_id,
191
- failure_message="Heartbeat failed unexpectedly. The SuperLink could "
192
- "not find the provided run ID, or the run status is invalid.",
193
- )
194
- heartbeat_sender = HeartbeatSender(heartbeat_fn)
195
- heartbeat_sender.start()
196
-
197
- # Load and run the ServerApp with the Grid
198
- updated_context = run_(
199
- grid=grid,
200
- server_app_dir=app_path,
201
- server_app_attr=server_app_attr,
202
- context=context,
203
- )
196
+ fab_id, fab_version = get_fab_metadata(fab.content)
204
197
 
205
- # Send resulting context
206
- context_proto = context_to_proto(updated_context)
207
- log(DEBUG, "[flwr-serverapp] Will push ServerAppOutputs")
208
- out_req = PushServerAppOutputsRequest(
209
- run_id=run.run_id, context=context_proto
210
- )
211
- _ = grid._stub.PushServerAppOutputs(out_req)
212
-
213
- run_status = RunStatus(Status.FINISHED, SubStatus.COMPLETED, "")
214
- except RunNotRunningException:
215
- log(INFO, "")
216
- log(INFO, "Run ID %s stopped.", run.run_id)
217
- log(INFO, "")
218
- run_status = None
219
- success = False
220
-
221
- except Exception as ex: # pylint: disable=broad-exception-caught
222
- exc_entity = "ServerApp"
223
- log(ERROR, "%s raised an exception", exc_entity, exc_info=ex)
224
- run_status = RunStatus(Status.FINISHED, SubStatus.FAILED, str(ex))
225
- success = False
226
-
227
- finally:
228
- # Stop heartbeat sender
229
- if heartbeat_sender:
230
- heartbeat_sender.stop()
231
- heartbeat_sender = None
232
-
233
- # Stop log uploader for this run and upload final logs
234
- if log_uploader:
235
- stop_log_uploader(log_queue, log_uploader)
236
- log_uploader = None
237
-
238
- # Update run status
239
- if run_status:
240
- run_status_proto = run_status_to_proto(run_status)
241
- grid._stub.UpdateRunStatus(
242
- UpdateRunStatusRequest(
243
- run_id=run.run_id, run_status=run_status_proto
244
- )
245
- )
246
-
247
- event(
248
- EventType.FLWR_SERVERAPP_RUN_LEAVE,
249
- event_details={"run-id-hash": hash_run_id, "success": success},
250
- )
198
+ app_path = str(get_project_dir(fab_id, fab_version, fab.hash_str, flwr_dir_))
199
+ config = get_project_config(app_path)
200
+
201
+ # Obtain server app reference and the run config
202
+ server_app_attr = config["tool"]["flwr"]["app"]["components"]["serverapp"]
203
+ server_app_run_config = get_fused_config_from_dir(
204
+ Path(app_path), run.override_config
205
+ )
251
206
 
252
- # Stop the loop if `flwr-serverapp` is expected to process a single run
253
- if run_once:
254
- break
207
+ # Update run_config in context
208
+ context.run_config = server_app_run_config
209
+
210
+ log(
211
+ DEBUG,
212
+ "[flwr-serverapp] Will load ServerApp `%s` in %s",
213
+ server_app_attr,
214
+ app_path,
215
+ )
216
+
217
+ # Change status to Running
218
+ run_status_proto = run_status_to_proto(RunStatus(Status.RUNNING, "", ""))
219
+ grid._stub.UpdateRunStatus(
220
+ UpdateRunStatusRequest(run_id=run.run_id, run_status=run_status_proto)
221
+ )
222
+
223
+ event(
224
+ EventType.FLWR_SERVERAPP_RUN_ENTER,
225
+ event_details={"run-id-hash": hash_run_id},
226
+ )
227
+
228
+ # Set up heartbeat sender
229
+ heartbeat_fn = get_grpc_app_heartbeat_fn(
230
+ grid._stub,
231
+ run.run_id,
232
+ failure_message="Heartbeat failed unexpectedly. The SuperLink could "
233
+ "not find the provided run ID, or the run status is invalid.",
234
+ )
235
+ heartbeat_sender = HeartbeatSender(heartbeat_fn)
236
+ heartbeat_sender.start()
237
+
238
+ # Load and run the ServerApp with the Grid
239
+ updated_context = run_(
240
+ grid=grid,
241
+ server_app_dir=app_path,
242
+ server_app_attr=server_app_attr,
243
+ context=context,
244
+ )
245
+
246
+ # Send resulting context
247
+ context_proto = context_to_proto(updated_context)
248
+ log(DEBUG, "[flwr-serverapp] Will push ServerAppOutputs")
249
+ out_req = PushAppOutputsRequest(
250
+ token=token, run_id=run.run_id, context=context_proto
251
+ )
252
+ _ = grid._stub.PushAppOutputs(out_req)
253
+
254
+ run_status = RunStatus(Status.FINISHED, SubStatus.COMPLETED, "")
255
+
256
+ # Raised when the run is already stopped by the user
257
+ except RunNotRunningException:
258
+ log(INFO, "")
259
+ log(INFO, "Run ID %s stopped.", run.run_id)
260
+ log(INFO, "")
261
+ run_status = None
262
+ # No need to update the exit code since this is expected behavior
263
+
264
+ except Exception as ex: # pylint: disable=broad-exception-caught
265
+ exc_entity = "ServerApp"
266
+ log(ERROR, "%s raised an exception", exc_entity, exc_info=ex)
267
+ run_status = RunStatus(Status.FINISHED, SubStatus.FAILED, str(ex))
268
+
269
+ # Set exit code
270
+ exit_code = ExitCode.SERVERAPP_EXCEPTION # General exit code
271
+ if isinstance(ex, AppExitException):
272
+ exit_code = ex.exit_code
273
+
274
+ flwr_exit(
275
+ code=exit_code,
276
+ event_type=EventType.FLWR_SERVERAPP_RUN_LEAVE,
277
+ event_details={
278
+ "run-id-hash": hash_run_id,
279
+ "success": exit_code == ExitCode.SUCCESS,
280
+ },
281
+ )
255
282
 
256
283
 
257
284
  def _parse_args_run_flwr_serverapp() -> argparse.ArgumentParser:
@@ -266,11 +293,5 @@ def _parse_args_run_flwr_serverapp() -> argparse.ArgumentParser:
266
293
  help="Address of SuperLink's ServerAppIo API (IPv4, IPv6, or a domain name)."
267
294
  f"By default, it is set to {SERVERAPPIO_API_DEFAULT_CLIENT_ADDRESS}.",
268
295
  )
269
- parser.add_argument(
270
- "--run-once",
271
- action="store_true",
272
- help="When set, this process will start a single ServerApp for a pending Run. "
273
- "If there is no pending Run, the process will exit.",
274
- )
275
296
  add_args_flwr_app_common(parser=parser)
276
297
  return parser
@@ -41,6 +41,7 @@ from flwr.proto.fleet_pb2 import ( # pylint: disable=E0611
41
41
  from flwr.proto.grpcadapter_pb2 import MessageContainer # pylint: disable=E0611
42
42
  from flwr.proto.heartbeat_pb2 import SendNodeHeartbeatRequest # pylint: disable=E0611
43
43
  from flwr.proto.message_pb2 import ( # pylint: disable=E0611
44
+ ConfirmMessageReceivedRequest,
44
45
  PullObjectRequest,
45
46
  PushObjectRequest,
46
47
  )
@@ -101,4 +102,11 @@ class GrpcAdapterServicer(grpcadapter_pb2_grpc.GrpcAdapterServicer, FleetService
101
102
  return _handle(request, context, PushObjectRequest, self.PushObject)
102
103
  if request.grpc_message_name == PullObjectRequest.__qualname__:
103
104
  return _handle(request, context, PullObjectRequest, self.PullObject)
105
+ if request.grpc_message_name == ConfirmMessageReceivedRequest.__qualname__:
106
+ return _handle(
107
+ request,
108
+ context,
109
+ ConfirmMessageReceivedRequest,
110
+ self.ConfirmMessageReceived,
111
+ )
104
112
  raise ValueError(f"Invalid grpc_message_name: {request.grpc_message_name}")
@@ -48,10 +48,10 @@ from flwr.proto.message_pb2 import ( # pylint: disable=E0611
48
48
  PushObjectResponse,
49
49
  )
50
50
  from flwr.proto.run_pb2 import GetRunRequest, GetRunResponse # pylint: disable=E0611
51
- from flwr.server.superlink.ffs.ffs_factory import FfsFactory
52
51
  from flwr.server.superlink.fleet.message_handler import message_handler
53
52
  from flwr.server.superlink.linkstate import LinkStateFactory
54
53
  from flwr.server.superlink.utils import abort_grpc_context
54
+ from flwr.supercore.ffs import FfsFactory
55
55
  from flwr.supercore.object_store import ObjectStoreFactory
56
56
 
57
57