flwr 1.20.0__py3-none-any.whl → 1.21.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (132) hide show
  1. flwr/__init__.py +4 -1
  2. flwr/app/__init__.py +28 -0
  3. flwr/app/exception.py +31 -0
  4. flwr/cli/auth_plugin/oidc_cli_plugin.py +4 -4
  5. flwr/cli/cli_user_auth_interceptor.py +1 -1
  6. flwr/cli/config_utils.py +3 -3
  7. flwr/cli/constant.py +25 -8
  8. flwr/cli/log.py +9 -9
  9. flwr/cli/login/login.py +3 -3
  10. flwr/cli/ls.py +5 -5
  11. flwr/cli/new/new.py +11 -0
  12. flwr/cli/new/templates/app/code/__init__.pytorch_msg_api.py.tpl +1 -0
  13. flwr/cli/new/templates/app/code/client.pytorch_msg_api.py.tpl +80 -0
  14. flwr/cli/new/templates/app/code/server.pytorch_msg_api.py.tpl +41 -0
  15. flwr/cli/new/templates/app/code/task.pytorch_msg_api.py.tpl +98 -0
  16. flwr/cli/new/templates/app/pyproject.baseline.toml.tpl +1 -1
  17. flwr/cli/new/templates/app/pyproject.flowertune.toml.tpl +1 -1
  18. flwr/cli/new/templates/app/pyproject.huggingface.toml.tpl +1 -1
  19. flwr/cli/new/templates/app/pyproject.jax.toml.tpl +1 -1
  20. flwr/cli/new/templates/app/pyproject.mlx.toml.tpl +1 -1
  21. flwr/cli/new/templates/app/pyproject.numpy.toml.tpl +1 -1
  22. flwr/cli/new/templates/app/pyproject.pytorch.toml.tpl +1 -1
  23. flwr/cli/new/templates/app/pyproject.pytorch_msg_api.toml.tpl +53 -0
  24. flwr/cli/new/templates/app/pyproject.sklearn.toml.tpl +1 -1
  25. flwr/cli/new/templates/app/pyproject.tensorflow.toml.tpl +1 -1
  26. flwr/cli/run/run.py +9 -13
  27. flwr/cli/stop.py +7 -4
  28. flwr/cli/utils.py +19 -8
  29. flwr/client/grpc_rere_client/connection.py +1 -12
  30. flwr/client/rest_client/connection.py +3 -0
  31. flwr/clientapp/__init__.py +10 -0
  32. flwr/clientapp/mod/__init__.py +26 -0
  33. flwr/clientapp/mod/centraldp_mods.py +132 -0
  34. flwr/common/args.py +20 -6
  35. flwr/common/auth_plugin/__init__.py +4 -4
  36. flwr/common/auth_plugin/auth_plugin.py +7 -7
  37. flwr/common/constant.py +23 -4
  38. flwr/common/event_log_plugin/event_log_plugin.py +1 -1
  39. flwr/common/exit/__init__.py +4 -0
  40. flwr/common/exit/exit.py +8 -1
  41. flwr/common/exit/exit_code.py +26 -7
  42. flwr/common/exit/exit_handler.py +62 -0
  43. flwr/common/{exit_handlers.py → exit/signal_handler.py} +20 -37
  44. flwr/common/grpc.py +0 -11
  45. flwr/common/inflatable_utils.py +1 -1
  46. flwr/common/logger.py +1 -1
  47. flwr/common/retry_invoker.py +30 -11
  48. flwr/common/telemetry.py +4 -0
  49. flwr/compat/server/app.py +2 -2
  50. flwr/proto/appio_pb2.py +25 -17
  51. flwr/proto/appio_pb2.pyi +46 -2
  52. flwr/proto/clientappio_pb2.py +3 -11
  53. flwr/proto/clientappio_pb2.pyi +0 -47
  54. flwr/proto/clientappio_pb2_grpc.py +19 -20
  55. flwr/proto/clientappio_pb2_grpc.pyi +10 -11
  56. flwr/proto/control_pb2.py +62 -0
  57. flwr/proto/{exec_pb2_grpc.py → control_pb2_grpc.py} +54 -54
  58. flwr/proto/{exec_pb2_grpc.pyi → control_pb2_grpc.pyi} +28 -28
  59. flwr/proto/serverappio_pb2.py +2 -2
  60. flwr/proto/serverappio_pb2_grpc.py +68 -0
  61. flwr/proto/serverappio_pb2_grpc.pyi +26 -0
  62. flwr/proto/simulationio_pb2.py +4 -11
  63. flwr/proto/simulationio_pb2.pyi +0 -58
  64. flwr/proto/simulationio_pb2_grpc.py +129 -27
  65. flwr/proto/simulationio_pb2_grpc.pyi +52 -13
  66. flwr/server/app.py +129 -152
  67. flwr/server/grid/grpc_grid.py +3 -0
  68. flwr/server/grid/inmemory_grid.py +1 -0
  69. flwr/server/serverapp/app.py +157 -146
  70. flwr/server/superlink/fleet/vce/backend/raybackend.py +3 -1
  71. flwr/server/superlink/fleet/vce/vce_api.py +6 -6
  72. flwr/server/superlink/linkstate/in_memory_linkstate.py +34 -0
  73. flwr/server/superlink/linkstate/linkstate.py +2 -1
  74. flwr/server/superlink/linkstate/sqlite_linkstate.py +45 -0
  75. flwr/server/superlink/serverappio/serverappio_grpc.py +1 -1
  76. flwr/server/superlink/serverappio/serverappio_servicer.py +61 -6
  77. flwr/server/superlink/simulation/simulationio_servicer.py +97 -21
  78. flwr/serverapp/__init__.py +12 -0
  79. flwr/serverapp/dp_fixed_clipping.py +352 -0
  80. flwr/serverapp/exception.py +38 -0
  81. flwr/serverapp/strategy/__init__.py +38 -0
  82. flwr/serverapp/strategy/dp_fixed_clipping.py +352 -0
  83. flwr/serverapp/strategy/fedadagrad.py +162 -0
  84. flwr/serverapp/strategy/fedadam.py +181 -0
  85. flwr/serverapp/strategy/fedavg.py +295 -0
  86. flwr/serverapp/strategy/fedopt.py +218 -0
  87. flwr/serverapp/strategy/fedyogi.py +173 -0
  88. flwr/serverapp/strategy/result.py +105 -0
  89. flwr/serverapp/strategy/strategy.py +285 -0
  90. flwr/serverapp/strategy/strategy_utils.py +251 -0
  91. flwr/serverapp/strategy/strategy_utils_tests.py +304 -0
  92. flwr/simulation/app.py +161 -164
  93. flwr/supercore/app_utils.py +58 -0
  94. flwr/{supernode/scheduler → supercore/cli}/__init__.py +3 -3
  95. flwr/supercore/cli/flower_superexec.py +141 -0
  96. flwr/supercore/{scheduler → corestate}/__init__.py +3 -3
  97. flwr/supercore/corestate/corestate.py +81 -0
  98. flwr/supercore/grpc_health/__init__.py +3 -0
  99. flwr/supercore/grpc_health/health_server.py +53 -0
  100. flwr/supercore/grpc_health/simple_health_servicer.py +2 -2
  101. flwr/{superexec → supercore/superexec}/__init__.py +1 -1
  102. flwr/supercore/superexec/plugin/__init__.py +28 -0
  103. flwr/{supernode/scheduler/simple_clientapp_scheduler_plugin.py → supercore/superexec/plugin/base_exec_plugin.py} +10 -6
  104. flwr/supercore/superexec/plugin/clientapp_exec_plugin.py +28 -0
  105. flwr/supercore/{scheduler/plugin.py → superexec/plugin/exec_plugin.py} +4 -4
  106. flwr/supercore/superexec/plugin/serverapp_exec_plugin.py +28 -0
  107. flwr/supercore/superexec/plugin/simulation_exec_plugin.py +28 -0
  108. flwr/supercore/superexec/run_superexec.py +185 -0
  109. flwr/superlink/servicer/__init__.py +15 -0
  110. flwr/superlink/servicer/control/__init__.py +22 -0
  111. flwr/{superexec/exec_event_log_interceptor.py → superlink/servicer/control/control_event_log_interceptor.py} +7 -7
  112. flwr/{superexec/exec_grpc.py → superlink/servicer/control/control_grpc.py} +24 -29
  113. flwr/{superexec/exec_license_interceptor.py → superlink/servicer/control/control_license_interceptor.py} +6 -6
  114. flwr/{superexec/exec_servicer.py → superlink/servicer/control/control_servicer.py} +69 -30
  115. flwr/{superexec/exec_user_auth_interceptor.py → superlink/servicer/control/control_user_auth_interceptor.py} +10 -10
  116. flwr/supernode/cli/flower_supernode.py +3 -0
  117. flwr/supernode/cli/flwr_clientapp.py +18 -21
  118. flwr/supernode/nodestate/in_memory_nodestate.py +2 -2
  119. flwr/supernode/nodestate/nodestate.py +3 -59
  120. flwr/supernode/runtime/run_clientapp.py +39 -102
  121. flwr/supernode/servicer/clientappio/clientappio_servicer.py +10 -17
  122. flwr/supernode/start_client_internal.py +35 -76
  123. {flwr-1.20.0.dist-info → flwr-1.21.0.dist-info}/METADATA +4 -3
  124. {flwr-1.20.0.dist-info → flwr-1.21.0.dist-info}/RECORD +127 -98
  125. {flwr-1.20.0.dist-info → flwr-1.21.0.dist-info}/entry_points.txt +1 -0
  126. flwr/proto/exec_pb2.py +0 -62
  127. flwr/superexec/app.py +0 -45
  128. flwr/superexec/deployment.py +0 -191
  129. flwr/superexec/executor.py +0 -100
  130. flwr/superexec/simulation.py +0 -129
  131. /flwr/proto/{exec_pb2.pyi → control_pb2.pyi} +0 -0
  132. {flwr-1.20.0.dist-info → flwr-1.21.0.dist-info}/WHEEL +0 -0
@@ -16,9 +16,6 @@
16
16
 
17
17
 
18
18
  import gc
19
- import os
20
- import threading
21
- import time
22
19
  from logging import DEBUG, ERROR, INFO
23
20
  from typing import Optional
24
21
 
@@ -63,23 +60,15 @@ from flwr.proto.appio_pb2 import ( # pylint: disable=E0611
63
60
  PushAppOutputsRequest,
64
61
  PushAppOutputsResponse,
65
62
  )
66
-
67
- # pylint: disable=E0611
68
- from flwr.proto.clientappio_pb2 import (
69
- GetRunIdsWithPendingMessagesRequest,
70
- GetRunIdsWithPendingMessagesResponse,
71
- RequestTokenRequest,
72
- RequestTokenResponse,
73
- )
74
63
  from flwr.proto.clientappio_pb2_grpc import ClientAppIoStub
75
64
  from flwr.proto.node_pb2 import Node # pylint: disable=E0611
65
+ from flwr.supercore.app_utils import start_parent_process_monitor
76
66
  from flwr.supercore.utils import mask_string
77
67
 
78
68
 
79
69
  def run_clientapp( # pylint: disable=R0913, R0914, R0917
80
70
  clientappio_api_address: str,
81
- run_once: bool,
82
- token: Optional[str] = None,
71
+ token: str,
83
72
  flwr_dir: Optional[str] = None,
84
73
  certificates: Optional[bytes] = None,
85
74
  parent_pid: Optional[int] = None,
@@ -102,113 +91,61 @@ def run_clientapp( # pylint: disable=R0913, R0914, R0917
102
91
  stub = ClientAppIoStub(channel)
103
92
  _wrap_stub(stub, _make_simple_grpc_retry_invoker())
104
93
 
105
- while True:
106
- # If token is not set, loop until token is received from SuperNode
107
- if token is None:
108
- token = get_token(stub)
94
+ # Pull Message, Context, Run and (optional) FAB from SuperNode
95
+ message, context, run, fab = pull_clientappinputs(stub=stub, token=token)
109
96
 
110
- # Pull Message, Context, Run and (optional) FAB from SuperNode
111
- message, context, run, fab = pull_clientappinputs(stub=stub, token=token)
97
+ # Install FAB, if provided
98
+ if fab:
99
+ log(DEBUG, "[flwr-clientapp] Start FAB installation.")
100
+ install_from_fab(fab.content, flwr_dir=flwr_dir_, skip_prompt=True)
112
101
 
113
- # Install FAB, if provided
114
- if fab:
115
- log(DEBUG, "[flwr-clientapp] Start FAB installation.")
116
- install_from_fab(fab.content, flwr_dir=flwr_dir_, skip_prompt=True)
102
+ load_client_app_fn = get_load_client_app_fn(
103
+ default_app_ref="",
104
+ app_path=None,
105
+ multi_app=True,
106
+ flwr_dir=str(flwr_dir_),
107
+ )
117
108
 
118
- load_client_app_fn = get_load_client_app_fn(
119
- default_app_ref="",
120
- app_path=None,
121
- multi_app=True,
122
- flwr_dir=str(flwr_dir_),
109
+ try:
110
+ # Load ClientApp
111
+ log(DEBUG, "[flwr-clientapp] Start `ClientApp` Loading.")
112
+ client_app: ClientApp = load_client_app_fn(
113
+ run.fab_id, run.fab_version, fab.hash_str if fab else ""
123
114
  )
124
115
 
125
- try:
126
- # Load ClientApp
127
- log(DEBUG, "[flwr-clientapp] Start `ClientApp` Loading.")
128
- client_app: ClientApp = load_client_app_fn(
129
- run.fab_id, run.fab_version, fab.hash_str if fab else ""
130
- )
131
-
132
- # Execute ClientApp
133
- reply_message = client_app(message=message, context=context)
116
+ # Execute ClientApp
117
+ reply_message = client_app(message=message, context=context)
134
118
 
135
- except Exception as ex: # pylint: disable=broad-exception-caught
136
- # Don't update/change NodeState
119
+ except Exception as ex: # pylint: disable=broad-exception-caught
120
+ # Don't update/change NodeState
137
121
 
138
- e_code = ErrorCode.CLIENT_APP_RAISED_EXCEPTION
139
- # Ex fmt: "<class 'ZeroDivisionError'>:<'division by zero'>"
140
- reason = str(type(ex)) + ":<'" + str(ex) + "'>"
141
- exc_entity = "ClientApp"
142
- if isinstance(ex, LoadClientAppError):
143
- reason = (
144
- "An exception was raised when attempting to load `ClientApp`"
145
- )
146
- e_code = ErrorCode.LOAD_CLIENT_APP_EXCEPTION
122
+ e_code = ErrorCode.CLIENT_APP_RAISED_EXCEPTION
123
+ # Ex fmt: "<class 'ZeroDivisionError'>:<'division by zero'>"
124
+ reason = str(type(ex)) + ":<'" + str(ex) + "'>"
125
+ exc_entity = "ClientApp"
126
+ if isinstance(ex, LoadClientAppError):
127
+ reason = "An exception was raised when attempting to load `ClientApp`"
128
+ e_code = ErrorCode.LOAD_CLIENT_APP_EXCEPTION
147
129
 
148
- log(ERROR, "%s raised an exception", exc_entity, exc_info=ex)
130
+ log(ERROR, "%s raised an exception", exc_entity, exc_info=ex)
149
131
 
150
- # Create error message
151
- reply_message = Message(
152
- Error(code=e_code, reason=reason), reply_to=message
153
- )
132
+ # Create error message
133
+ reply_message = Message(Error(code=e_code, reason=reason), reply_to=message)
154
134
 
155
- # Push Message and Context to SuperNode
156
- _ = push_clientappoutputs(
157
- stub=stub, token=token, message=reply_message, context=context
158
- )
159
-
160
- del client_app, message, context, run, fab, reply_message
161
- gc.collect()
162
-
163
- # Reset token to `None` to prevent flwr-clientapp from trying to pull the
164
- # same inputs again
165
- token = None
135
+ # Push Message and Context to SuperNode
136
+ _ = push_clientappoutputs(
137
+ stub=stub, token=token, message=reply_message, context=context
138
+ )
166
139
 
167
- # Stop the loop if `flwr-clientapp` is expected to process only a single
168
- # message
169
- if run_once:
170
- break
140
+ del client_app, message, context, run, fab, reply_message
141
+ gc.collect()
171
142
 
172
- except KeyboardInterrupt:
173
- log(INFO, "Closing connection")
174
143
  except grpc.RpcError as e:
175
144
  log(ERROR, "GRPC error occurred: %s", str(e))
176
145
  finally:
177
146
  channel.close()
178
147
 
179
148
 
180
- def start_parent_process_monitor(
181
- parent_pid: int,
182
- ) -> None:
183
- """Monitor the parent process and exit if it terminates."""
184
-
185
- def monitor() -> None:
186
- while True:
187
- time.sleep(0.2)
188
- if os.getppid() != parent_pid:
189
- os.kill(os.getpid(), 9)
190
-
191
- threading.Thread(target=monitor, daemon=True).start()
192
-
193
-
194
- def get_token(stub: ClientAppIoStub) -> str:
195
- """Get a token from SuperNode."""
196
- log(DEBUG, "[flwr-clientapp] Request token")
197
- while True:
198
- res: GetRunIdsWithPendingMessagesResponse = stub.GetRunIdsWithPendingMessages(
199
- GetRunIdsWithPendingMessagesRequest()
200
- )
201
-
202
- for run_id in res.run_ids:
203
- tk_res: RequestTokenResponse = stub.RequestToken(
204
- RequestTokenRequest(run_id=run_id)
205
- )
206
- if tk_res.token:
207
- return tk_res.token
208
-
209
- time.sleep(1) # Wait before retrying to get run IDs
210
-
211
-
212
149
  def pull_clientappinputs(
213
150
  stub: ClientAppIoStub, token: str
214
151
  ) -> tuple[Message, Context, Run, Optional[Fab]]:
@@ -36,6 +36,8 @@ from flwr.common.typing import Fab, Run
36
36
  # pylint: disable=E0611
37
37
  from flwr.proto import clientappio_pb2_grpc
38
38
  from flwr.proto.appio_pb2 import ( # pylint: disable=E0401
39
+ ListAppsToLaunchRequest,
40
+ ListAppsToLaunchResponse,
39
41
  PullAppInputsRequest,
40
42
  PullAppInputsResponse,
41
43
  PullAppMessagesRequest,
@@ -44,10 +46,6 @@ from flwr.proto.appio_pb2 import ( # pylint: disable=E0401
44
46
  PushAppMessagesResponse,
45
47
  PushAppOutputsRequest,
46
48
  PushAppOutputsResponse,
47
- )
48
- from flwr.proto.clientappio_pb2 import ( # pylint: disable=E0401
49
- GetRunIdsWithPendingMessagesRequest,
50
- GetRunIdsWithPendingMessagesResponse,
51
49
  RequestTokenRequest,
52
50
  RequestTokenResponse,
53
51
  )
@@ -82,13 +80,13 @@ class ClientAppIoServicer(clientappio_pb2_grpc.ClientAppIoServicer):
82
80
  self.ffs_factory = ffs_factory
83
81
  self.objectstore_factory = objectstore_factory
84
82
 
85
- def GetRunIdsWithPendingMessages(
83
+ def ListAppsToLaunch(
86
84
  self,
87
- request: GetRunIdsWithPendingMessagesRequest,
85
+ request: ListAppsToLaunchRequest,
88
86
  context: grpc.ServicerContext,
89
- ) -> GetRunIdsWithPendingMessagesResponse:
90
- """Get run IDs with pending messages."""
91
- log(DEBUG, "ClientAppIo.GetRunIdsWithPendingMessages")
87
+ ) -> ListAppsToLaunchResponse:
88
+ """Get run IDs with apps to launch."""
89
+ log(DEBUG, "ClientAppIo.ListAppsToLaunch")
92
90
 
93
91
  # Initialize state connection
94
92
  state = self.state_factory.state()
@@ -97,7 +95,7 @@ class ClientAppIoServicer(clientappio_pb2_grpc.ClientAppIoServicer):
97
95
  run_ids = state.get_run_ids_with_pending_messages()
98
96
 
99
97
  # Return run IDs
100
- return GetRunIdsWithPendingMessagesResponse(run_ids=run_ids)
98
+ return ListAppsToLaunchResponse(run_ids=run_ids)
101
99
 
102
100
  def RequestToken(
103
101
  self, request: RequestTokenRequest, context: grpc.ServicerContext
@@ -109,15 +107,10 @@ class ClientAppIoServicer(clientappio_pb2_grpc.ClientAppIoServicer):
109
107
  state = self.state_factory.state()
110
108
 
111
109
  # Attempt to create a token for the provided run ID
112
- try:
113
- token = state.create_token(request.run_id)
114
- except ValueError:
115
- # Return an empty token if A token already exists for this run ID,
116
- # indicating the run is in progress
117
- return RequestTokenResponse(token="")
110
+ token = state.create_token(request.run_id)
118
111
 
119
112
  # Return the token
120
- return RequestTokenResponse(token=token)
113
+ return RequestTokenResponse(token=token or "")
121
114
 
122
115
  def GetRun(
123
116
  self, request: GetRunRequest, context: grpc.ServicerContext
@@ -21,7 +21,7 @@ import time
21
21
  from collections.abc import Iterator
22
22
  from contextlib import contextmanager
23
23
  from functools import partial
24
- from logging import INFO, WARN
24
+ from logging import INFO
25
25
  from pathlib import Path
26
26
  from typing import Callable, Optional, Union, cast
27
27
 
@@ -35,18 +35,15 @@ from flwr.common import GRPC_MAX_MESSAGE_LENGTH, Context, Message, RecordDict
35
35
  from flwr.common.address import parse_address
36
36
  from flwr.common.config import get_flwr_dir, get_fused_config_from_fab
37
37
  from flwr.common.constant import (
38
- CLIENT_OCTET,
39
38
  CLIENTAPPIO_API_DEFAULT_SERVER_ADDRESS,
40
39
  ISOLATION_MODE_SUBPROCESS,
41
- MAX_RETRY_DELAY,
42
- SERVER_OCTET,
43
40
  TRANSPORT_TYPE_GRPC_ADAPTER,
44
41
  TRANSPORT_TYPE_GRPC_RERE,
45
42
  TRANSPORT_TYPE_REST,
46
43
  TRANSPORT_TYPES,
44
+ ExecPluginType,
47
45
  )
48
- from flwr.common.exit import ExitCode, flwr_exit
49
- from flwr.common.exit_handlers import register_exit_handlers
46
+ from flwr.common.exit import ExitCode, flwr_exit, register_signal_handlers
50
47
  from flwr.common.grpc import generic_create_grpc_server
51
48
  from flwr.common.inflatable import iterate_object_tree
52
49
  from flwr.common.inflatable_utils import (
@@ -54,12 +51,13 @@ from flwr.common.inflatable_utils import (
54
51
  push_object_contents_from_iterable,
55
52
  )
56
53
  from flwr.common.logger import log
57
- from flwr.common.retry_invoker import RetryInvoker, RetryState, exponential
54
+ from flwr.common.retry_invoker import RetryInvoker, _make_simple_grpc_retry_invoker
58
55
  from flwr.common.telemetry import EventType
59
56
  from flwr.common.typing import Fab, Run, RunNotRunningException, UserConfig
60
57
  from flwr.proto.clientappio_pb2_grpc import add_ClientAppIoServicer_to_server
61
58
  from flwr.proto.message_pb2 import ObjectTree # pylint: disable=E0611
62
59
  from flwr.supercore.ffs import Ffs, FfsFactory
60
+ from flwr.supercore.grpc_health import run_health_server_grpc_no_tls
63
61
  from flwr.supercore.object_store import ObjectStore, ObjectStoreFactory
64
62
  from flwr.supernode.nodestate import NodeState, NodeStateFactory
65
63
  from flwr.supernode.servicer.clientappio import ClientAppIoServicer
@@ -87,6 +85,7 @@ def start_client_internal(
87
85
  flwr_path: Optional[Path] = None,
88
86
  isolation: str = ISOLATION_MODE_SUBPROCESS,
89
87
  clientappio_api_address: str = CLIENTAPPIO_API_DEFAULT_SERVER_ADDRESS,
88
+ health_server_address: Optional[str] = None,
90
89
  ) -> None:
91
90
  """Start a Flower client node which connects to a Flower server.
92
91
 
@@ -135,6 +134,9 @@ def start_client_internal(
135
134
  clientappio_api_address : str
136
135
  (default: `CLIENTAPPIO_API_DEFAULT_SERVER_ADDRESS`)
137
136
  The SuperNode gRPC server address.
137
+ health_server_address : Optional[str] (default: None)
138
+ The address of the health server. If `None` is provided, the health server will
139
+ NOT be started.
138
140
  """
139
141
  if insecure is None:
140
142
  insecure = root_certificates is None
@@ -145,6 +147,7 @@ def start_client_internal(
145
147
  object_store_factory = ObjectStoreFactory()
146
148
 
147
149
  # Launch ClientAppIo API server
150
+ grpc_servers = []
148
151
  clientappio_server = run_clientappio_api_grpc(
149
152
  address=clientappio_api_address,
150
153
  state_factory=state_factory,
@@ -152,12 +155,18 @@ def start_client_internal(
152
155
  objectstore_factory=object_store_factory,
153
156
  certificates=None,
154
157
  )
158
+ grpc_servers.append(clientappio_server)
159
+
160
+ # Launch gRPC health server
161
+ if health_server_address is not None:
162
+ health_server = run_health_server_grpc_no_tls(health_server_address)
163
+ grpc_servers.append(health_server)
155
164
 
156
165
  # Register handlers for graceful shutdown
157
- register_exit_handlers(
166
+ register_signal_handlers(
158
167
  event_type=EventType.RUN_SUPERNODE_LEAVE,
159
168
  exit_message="SuperNode terminated gracefully.",
160
- grpc_servers=[clientappio_server],
169
+ grpc_servers=grpc_servers,
161
170
  )
162
171
 
163
172
  # Initialize NodeState, Ffs, and ObjectStore
@@ -165,6 +174,15 @@ def start_client_internal(
165
174
  ffs = ffs_factory.ffs()
166
175
  store = object_store_factory.store()
167
176
 
177
+ # Launch the SuperExec if the isolation mode is `subprocess`
178
+ if isolation == ISOLATION_MODE_SUBPROCESS:
179
+ command = ["flower-superexec", "--insecure"]
180
+ command += ["--appio-api-address", clientappio_api_address]
181
+ command += ["--plugin-type", ExecPluginType.CLIENT_APP]
182
+ command += ["--parent-pid", str(os.getpid())]
183
+ # pylint: disable-next=consider-using-with
184
+ subprocess.Popen(command)
185
+
168
186
  with _init_connection(
169
187
  transport=transport,
170
188
  server_address=server_address,
@@ -208,35 +226,6 @@ def start_client_internal(
208
226
  confirm_message_received=confirm_message_received,
209
227
  )
210
228
 
211
- # Two isolation modes:
212
- # 1. `subprocess`: SuperNode is starting the ClientApp
213
- # process as a subprocess.
214
- # 2. `process`: ClientApp process gets started separately
215
- # (via `flwr-clientapp`), for example, in a separate
216
- # Docker container.
217
-
218
- # Mode 1: SuperNode starts ClientApp as subprocess
219
- start_subprocess = isolation == ISOLATION_MODE_SUBPROCESS
220
-
221
- if start_subprocess and run_id is not None:
222
- _octet, _colon, _port = clientappio_api_address.rpartition(":")
223
- io_address = (
224
- f"{CLIENT_OCTET}:{_port}"
225
- if _octet == SERVER_OCTET
226
- else clientappio_api_address
227
- )
228
- # Start ClientApp subprocess
229
- command = [
230
- "flwr-clientapp",
231
- "--clientappio-api-address",
232
- io_address,
233
- "--parent-pid",
234
- str(os.getpid()),
235
- "--insecure",
236
- "--run-once",
237
- ]
238
- subprocess.run(command, check=False)
239
-
240
229
  # No message has been pulled therefore we can skip the push stage.
241
230
  if run_id is None:
242
231
  # If no message was received, wait for a while
@@ -521,44 +510,14 @@ def _make_fleet_connection_retry_invoker(
521
510
  connection_error_type: type[Exception] = RpcError,
522
511
  ) -> RetryInvoker:
523
512
  """Create a retry invoker for fleet connection."""
513
+ retry_invoker = _make_simple_grpc_retry_invoker()
514
+ retry_invoker.recoverable_exceptions = connection_error_type
515
+ if max_retries is not None:
516
+ retry_invoker.max_tries = max_retries + 1
517
+ if max_wait_time is not None:
518
+ retry_invoker.max_time = max_wait_time
524
519
 
525
- def _on_success(retry_state: RetryState) -> None:
526
- if retry_state.tries > 1:
527
- log(
528
- INFO,
529
- "Connection successful after %.2f seconds and %s tries.",
530
- retry_state.elapsed_time,
531
- retry_state.tries,
532
- )
533
-
534
- def _on_backoff(retry_state: RetryState) -> None:
535
- if retry_state.tries == 1:
536
- log(WARN, "Connection attempt failed, retrying...")
537
- else:
538
- log(
539
- WARN,
540
- "Connection attempt failed, retrying in %.2f seconds",
541
- retry_state.actual_wait,
542
- )
543
-
544
- return RetryInvoker(
545
- wait_gen_factory=lambda: exponential(max_delay=MAX_RETRY_DELAY),
546
- recoverable_exceptions=connection_error_type,
547
- max_tries=max_retries + 1 if max_retries is not None else None,
548
- max_time=max_wait_time,
549
- on_giveup=lambda retry_state: (
550
- log(
551
- WARN,
552
- "Giving up reconnection after %.2f seconds and %s tries.",
553
- retry_state.elapsed_time,
554
- retry_state.tries,
555
- )
556
- if retry_state.tries > 1
557
- else None
558
- ),
559
- on_success=_on_success,
560
- on_backoff=_on_backoff,
561
- )
520
+ return retry_invoker
562
521
 
563
522
 
564
523
  def run_clientappio_api_grpc(
@@ -584,6 +543,6 @@ def run_clientappio_api_grpc(
584
543
  max_message_length=GRPC_MAX_MESSAGE_LENGTH,
585
544
  certificates=certificates,
586
545
  )
587
- log(INFO, "Starting Flower ClientAppIo gRPC server on %s", address)
546
+ log(INFO, "Flower Deployment Runtime: Starting ClientAppIo API on %s", address)
588
547
  clientappio_grpc_server.start()
589
548
  return clientappio_grpc_server
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: flwr
3
- Version: 1.20.0
3
+ Version: 1.21.0
4
4
  Summary: Flower: A Friendly Federated AI Framework
5
5
  License: Apache-2.0
6
6
  Keywords: Artificial Intelligence,Federated AI,Federated Analytics,Federated Evaluation,Federated Learning,Flower,Machine Learning
@@ -172,8 +172,9 @@ Flower Baselines is a collection of community-contributed projects that reproduc
172
172
  - [FedOpt](https://github.com/adap/flower/tree/main/baselines/flwr_baselines/flwr_baselines/publications/adaptive_federated_optimization)
173
173
 
174
174
  Please refer to the [Flower Baselines Documentation](https://flower.ai/docs/baselines/) for a detailed categorization of baselines and for additional info including:
175
- * [How to use Flower Baselines](https://flower.ai/docs/baselines/how-to-use-baselines.html)
176
- * [How to contribute a new Flower Baseline](https://flower.ai/docs/baselines/how-to-contribute-baselines.html)
175
+
176
+ - [How to use Flower Baselines](https://flower.ai/docs/baselines/how-to-use-baselines.html)
177
+ - [How to contribute a new Flower Baseline](https://flower.ai/docs/baselines/how-to-contribute-baselines.html)
177
178
 
178
179
  ## Flower Usage Examples
179
180