flwr 1.20.0__py3-none-any.whl → 1.21.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (132) hide show
  1. flwr/__init__.py +4 -1
  2. flwr/app/__init__.py +28 -0
  3. flwr/app/exception.py +31 -0
  4. flwr/cli/auth_plugin/oidc_cli_plugin.py +4 -4
  5. flwr/cli/cli_user_auth_interceptor.py +1 -1
  6. flwr/cli/config_utils.py +3 -3
  7. flwr/cli/constant.py +25 -8
  8. flwr/cli/log.py +9 -9
  9. flwr/cli/login/login.py +3 -3
  10. flwr/cli/ls.py +5 -5
  11. flwr/cli/new/new.py +11 -0
  12. flwr/cli/new/templates/app/code/__init__.pytorch_msg_api.py.tpl +1 -0
  13. flwr/cli/new/templates/app/code/client.pytorch_msg_api.py.tpl +80 -0
  14. flwr/cli/new/templates/app/code/server.pytorch_msg_api.py.tpl +41 -0
  15. flwr/cli/new/templates/app/code/task.pytorch_msg_api.py.tpl +98 -0
  16. flwr/cli/new/templates/app/pyproject.baseline.toml.tpl +1 -1
  17. flwr/cli/new/templates/app/pyproject.flowertune.toml.tpl +1 -1
  18. flwr/cli/new/templates/app/pyproject.huggingface.toml.tpl +1 -1
  19. flwr/cli/new/templates/app/pyproject.jax.toml.tpl +1 -1
  20. flwr/cli/new/templates/app/pyproject.mlx.toml.tpl +1 -1
  21. flwr/cli/new/templates/app/pyproject.numpy.toml.tpl +1 -1
  22. flwr/cli/new/templates/app/pyproject.pytorch.toml.tpl +1 -1
  23. flwr/cli/new/templates/app/pyproject.pytorch_msg_api.toml.tpl +53 -0
  24. flwr/cli/new/templates/app/pyproject.sklearn.toml.tpl +1 -1
  25. flwr/cli/new/templates/app/pyproject.tensorflow.toml.tpl +1 -1
  26. flwr/cli/run/run.py +9 -13
  27. flwr/cli/stop.py +7 -4
  28. flwr/cli/utils.py +19 -8
  29. flwr/client/grpc_rere_client/connection.py +1 -12
  30. flwr/client/rest_client/connection.py +3 -0
  31. flwr/clientapp/__init__.py +10 -0
  32. flwr/clientapp/mod/__init__.py +26 -0
  33. flwr/clientapp/mod/centraldp_mods.py +132 -0
  34. flwr/common/args.py +20 -6
  35. flwr/common/auth_plugin/__init__.py +4 -4
  36. flwr/common/auth_plugin/auth_plugin.py +7 -7
  37. flwr/common/constant.py +23 -4
  38. flwr/common/event_log_plugin/event_log_plugin.py +1 -1
  39. flwr/common/exit/__init__.py +4 -0
  40. flwr/common/exit/exit.py +8 -1
  41. flwr/common/exit/exit_code.py +26 -7
  42. flwr/common/exit/exit_handler.py +62 -0
  43. flwr/common/{exit_handlers.py → exit/signal_handler.py} +20 -37
  44. flwr/common/grpc.py +0 -11
  45. flwr/common/inflatable_utils.py +1 -1
  46. flwr/common/logger.py +1 -1
  47. flwr/common/retry_invoker.py +30 -11
  48. flwr/common/telemetry.py +4 -0
  49. flwr/compat/server/app.py +2 -2
  50. flwr/proto/appio_pb2.py +25 -17
  51. flwr/proto/appio_pb2.pyi +46 -2
  52. flwr/proto/clientappio_pb2.py +3 -11
  53. flwr/proto/clientappio_pb2.pyi +0 -47
  54. flwr/proto/clientappio_pb2_grpc.py +19 -20
  55. flwr/proto/clientappio_pb2_grpc.pyi +10 -11
  56. flwr/proto/control_pb2.py +62 -0
  57. flwr/proto/{exec_pb2_grpc.py → control_pb2_grpc.py} +54 -54
  58. flwr/proto/{exec_pb2_grpc.pyi → control_pb2_grpc.pyi} +28 -28
  59. flwr/proto/serverappio_pb2.py +2 -2
  60. flwr/proto/serverappio_pb2_grpc.py +68 -0
  61. flwr/proto/serverappio_pb2_grpc.pyi +26 -0
  62. flwr/proto/simulationio_pb2.py +4 -11
  63. flwr/proto/simulationio_pb2.pyi +0 -58
  64. flwr/proto/simulationio_pb2_grpc.py +129 -27
  65. flwr/proto/simulationio_pb2_grpc.pyi +52 -13
  66. flwr/server/app.py +129 -152
  67. flwr/server/grid/grpc_grid.py +3 -0
  68. flwr/server/grid/inmemory_grid.py +1 -0
  69. flwr/server/serverapp/app.py +157 -146
  70. flwr/server/superlink/fleet/vce/backend/raybackend.py +3 -1
  71. flwr/server/superlink/fleet/vce/vce_api.py +6 -6
  72. flwr/server/superlink/linkstate/in_memory_linkstate.py +34 -0
  73. flwr/server/superlink/linkstate/linkstate.py +2 -1
  74. flwr/server/superlink/linkstate/sqlite_linkstate.py +45 -0
  75. flwr/server/superlink/serverappio/serverappio_grpc.py +1 -1
  76. flwr/server/superlink/serverappio/serverappio_servicer.py +61 -6
  77. flwr/server/superlink/simulation/simulationio_servicer.py +97 -21
  78. flwr/serverapp/__init__.py +12 -0
  79. flwr/serverapp/dp_fixed_clipping.py +352 -0
  80. flwr/serverapp/exception.py +38 -0
  81. flwr/serverapp/strategy/__init__.py +38 -0
  82. flwr/serverapp/strategy/dp_fixed_clipping.py +352 -0
  83. flwr/serverapp/strategy/fedadagrad.py +162 -0
  84. flwr/serverapp/strategy/fedadam.py +181 -0
  85. flwr/serverapp/strategy/fedavg.py +295 -0
  86. flwr/serverapp/strategy/fedopt.py +218 -0
  87. flwr/serverapp/strategy/fedyogi.py +173 -0
  88. flwr/serverapp/strategy/result.py +105 -0
  89. flwr/serverapp/strategy/strategy.py +285 -0
  90. flwr/serverapp/strategy/strategy_utils.py +251 -0
  91. flwr/serverapp/strategy/strategy_utils_tests.py +304 -0
  92. flwr/simulation/app.py +161 -164
  93. flwr/supercore/app_utils.py +58 -0
  94. flwr/{supernode/scheduler → supercore/cli}/__init__.py +3 -3
  95. flwr/supercore/cli/flower_superexec.py +141 -0
  96. flwr/supercore/{scheduler → corestate}/__init__.py +3 -3
  97. flwr/supercore/corestate/corestate.py +81 -0
  98. flwr/supercore/grpc_health/__init__.py +3 -0
  99. flwr/supercore/grpc_health/health_server.py +53 -0
  100. flwr/supercore/grpc_health/simple_health_servicer.py +2 -2
  101. flwr/{superexec → supercore/superexec}/__init__.py +1 -1
  102. flwr/supercore/superexec/plugin/__init__.py +28 -0
  103. flwr/{supernode/scheduler/simple_clientapp_scheduler_plugin.py → supercore/superexec/plugin/base_exec_plugin.py} +10 -6
  104. flwr/supercore/superexec/plugin/clientapp_exec_plugin.py +28 -0
  105. flwr/supercore/{scheduler/plugin.py → superexec/plugin/exec_plugin.py} +4 -4
  106. flwr/supercore/superexec/plugin/serverapp_exec_plugin.py +28 -0
  107. flwr/supercore/superexec/plugin/simulation_exec_plugin.py +28 -0
  108. flwr/supercore/superexec/run_superexec.py +185 -0
  109. flwr/superlink/servicer/__init__.py +15 -0
  110. flwr/superlink/servicer/control/__init__.py +22 -0
  111. flwr/{superexec/exec_event_log_interceptor.py → superlink/servicer/control/control_event_log_interceptor.py} +7 -7
  112. flwr/{superexec/exec_grpc.py → superlink/servicer/control/control_grpc.py} +24 -29
  113. flwr/{superexec/exec_license_interceptor.py → superlink/servicer/control/control_license_interceptor.py} +6 -6
  114. flwr/{superexec/exec_servicer.py → superlink/servicer/control/control_servicer.py} +69 -30
  115. flwr/{superexec/exec_user_auth_interceptor.py → superlink/servicer/control/control_user_auth_interceptor.py} +10 -10
  116. flwr/supernode/cli/flower_supernode.py +3 -0
  117. flwr/supernode/cli/flwr_clientapp.py +18 -21
  118. flwr/supernode/nodestate/in_memory_nodestate.py +2 -2
  119. flwr/supernode/nodestate/nodestate.py +3 -59
  120. flwr/supernode/runtime/run_clientapp.py +39 -102
  121. flwr/supernode/servicer/clientappio/clientappio_servicer.py +10 -17
  122. flwr/supernode/start_client_internal.py +35 -76
  123. {flwr-1.20.0.dist-info → flwr-1.21.0.dist-info}/METADATA +4 -3
  124. {flwr-1.20.0.dist-info → flwr-1.21.0.dist-info}/RECORD +127 -98
  125. {flwr-1.20.0.dist-info → flwr-1.21.0.dist-info}/entry_points.txt +1 -0
  126. flwr/proto/exec_pb2.py +0 -62
  127. flwr/superexec/app.py +0 -45
  128. flwr/superexec/deployment.py +0 -191
  129. flwr/superexec/executor.py +0 -100
  130. flwr/superexec/simulation.py +0 -129
  131. /flwr/proto/{exec_pb2.pyi → control_pb2.pyi} +0 -0
  132. {flwr-1.20.0.dist-info → flwr-1.21.0.dist-info}/WHEEL +0 -0
@@ -16,13 +16,12 @@
16
16
 
17
17
 
18
18
  import argparse
19
- import gc
20
19
  from logging import DEBUG, ERROR, INFO
21
20
  from pathlib import Path
22
21
  from queue import Queue
23
- from time import sleep
24
22
  from typing import Optional
25
23
 
24
+ from flwr.app.exception import AppExitException
26
25
  from flwr.cli.config_utils import get_fab_metadata
27
26
  from flwr.cli.install import install_from_fab
28
27
  from flwr.cli.utils import get_sha256_hash
@@ -35,10 +34,11 @@ from flwr.common.config import (
35
34
  )
36
35
  from flwr.common.constant import (
37
36
  SERVERAPPIO_API_DEFAULT_CLIENT_ADDRESS,
37
+ ExecPluginType,
38
38
  Status,
39
39
  SubStatus,
40
40
  )
41
- from flwr.common.exit import ExitCode, flwr_exit
41
+ from flwr.common.exit import ExitCode, add_exit_handler, flwr_exit
42
42
  from flwr.common.heartbeat import HeartbeatSender, get_grpc_app_heartbeat_fn
43
43
  from flwr.common.logger import (
44
44
  log,
@@ -62,8 +62,12 @@ from flwr.proto.appio_pb2 import ( # pylint: disable=E0611
62
62
  PushAppOutputsRequest,
63
63
  )
64
64
  from flwr.proto.run_pb2 import UpdateRunStatusRequest # pylint: disable=E0611
65
+ from flwr.proto.serverappio_pb2_grpc import ServerAppIoStub
65
66
  from flwr.server.grid.grpc_grid import GrpcGrid
66
67
  from flwr.server.run_serverapp import run as run_
68
+ from flwr.supercore.app_utils import start_parent_process_monitor
69
+ from flwr.supercore.superexec.plugin import ServerAppExecPlugin
70
+ from flwr.supercore.superexec.run_superexec import run_with_deprecation_warning
67
71
 
68
72
 
69
73
  def flwr_serverapp() -> None:
@@ -74,14 +78,27 @@ def flwr_serverapp() -> None:
74
78
 
75
79
  args = _parse_args_run_flwr_serverapp().parse_args()
76
80
 
77
- log(INFO, "Start `flwr-serverapp` process")
78
-
79
81
  if not args.insecure:
80
82
  flwr_exit(
81
83
  ExitCode.COMMON_TLS_NOT_SUPPORTED,
82
84
  "`flwr-serverapp` does not support TLS yet.",
83
85
  )
84
86
 
87
+ # Disallow long-running `flwr-serverapp` processes
88
+ if args.token is None:
89
+ run_with_deprecation_warning(
90
+ cmd="flwr-serverapp",
91
+ plugin_type=ExecPluginType.SERVER_APP,
92
+ plugin_class=ServerAppExecPlugin,
93
+ stub_class=ServerAppIoStub,
94
+ appio_api_address=args.serverappio_api_address,
95
+ flwr_dir=args.flwr_dir,
96
+ parent_pid=args.parent_pid,
97
+ warn_run_once=args.run_once,
98
+ )
99
+ return
100
+
101
+ log(INFO, "Start `flwr-serverapp` process")
85
102
  log(
86
103
  DEBUG,
87
104
  "`flwr-serverapp` will attempt to connect to SuperLink's "
@@ -91,177 +108,177 @@ def flwr_serverapp() -> None:
91
108
  run_serverapp(
92
109
  serverappio_api_address=args.serverappio_api_address,
93
110
  log_queue=log_queue,
94
- run_once=args.run_once,
111
+ token=args.token,
95
112
  flwr_dir=args.flwr_dir,
96
113
  certificates=None,
114
+ parent_pid=args.parent_pid,
97
115
  )
98
116
 
99
117
  # Restore stdout/stderr
100
118
  restore_output()
101
119
 
102
120
 
103
- def run_serverapp( # pylint: disable=R0914, disable=W0212, disable=R0915
121
+ def run_serverapp( # pylint: disable=R0913, R0914, R0915, R0917, W0212
104
122
  serverappio_api_address: str,
105
123
  log_queue: Queue[Optional[str]],
106
- run_once: bool,
124
+ token: str,
107
125
  flwr_dir: Optional[str] = None,
108
126
  certificates: Optional[bytes] = None,
127
+ parent_pid: Optional[int] = None,
109
128
  ) -> None:
110
129
  """Run Flower ServerApp process."""
130
+ # Monitor the main process in case of SIGKILL
131
+ if parent_pid is not None:
132
+ start_parent_process_monitor(parent_pid)
133
+
111
134
  # Resolve directory where FABs are installed
112
135
  flwr_dir_ = get_flwr_dir(flwr_dir)
113
136
  log_uploader = None
114
- success = True
115
137
  hash_run_id = None
116
138
  run_status = None
117
139
  heartbeat_sender = None
118
140
  grid = None
119
141
  context = None
120
- while True:
142
+ exit_code = ExitCode.SUCCESS
121
143
 
122
- try:
123
- # Initialize the GrpcGrid
124
- grid = GrpcGrid(
125
- serverappio_service_address=serverappio_api_address,
126
- root_certificates=certificates,
127
- )
144
+ def on_exit() -> None:
145
+ # Stop heartbeat sender
146
+ if heartbeat_sender:
147
+ heartbeat_sender.stop()
148
+
149
+ # Stop log uploader for this run and upload final logs
150
+ if log_uploader:
151
+ stop_log_uploader(log_queue, log_uploader)
128
152
 
129
- # Pull ServerAppInputs from LinkState
130
- req = PullAppInputsRequest()
131
- log(DEBUG, "[flwr-serverapp] Pull ServerAppInputs")
132
- res: PullAppInputsResponse = grid._stub.PullAppInputs(req)
133
- if not res.HasField("run"):
134
- sleep(3)
135
- run_status = None
136
- continue
137
-
138
- context = context_from_proto(res.context)
139
- run = run_from_proto(res.run)
140
- fab = fab_from_proto(res.fab)
141
-
142
- hash_run_id = get_sha256_hash(run.run_id)
143
-
144
- grid.set_run(run.run_id)
145
-
146
- # Start log uploader for this run
147
- log_uploader = start_log_uploader(
148
- log_queue=log_queue,
149
- node_id=0,
150
- run_id=run.run_id,
151
- stub=grid._stub,
153
+ # Update run status
154
+ if run_status and grid:
155
+ run_status_proto = run_status_to_proto(run_status)
156
+ grid._stub.UpdateRunStatus(
157
+ UpdateRunStatusRequest(run_id=run.run_id, run_status=run_status_proto)
152
158
  )
153
159
 
154
- log(DEBUG, "[flwr-serverapp] Start FAB installation.")
155
- install_from_fab(fab.content, flwr_dir=flwr_dir_, skip_prompt=True)
160
+ # Close the Grpc connection
161
+ if grid:
162
+ grid.close()
156
163
 
157
- fab_id, fab_version = get_fab_metadata(fab.content)
164
+ add_exit_handler(on_exit)
158
165
 
159
- app_path = str(
160
- get_project_dir(fab_id, fab_version, fab.hash_str, flwr_dir_)
161
- )
162
- config = get_project_config(app_path)
166
+ try:
167
+ # Initialize the GrpcGrid
168
+ grid = GrpcGrid(
169
+ serverappio_service_address=serverappio_api_address,
170
+ root_certificates=certificates,
171
+ )
163
172
 
164
- # Obtain server app reference and the run config
165
- server_app_attr = config["tool"]["flwr"]["app"]["components"]["serverapp"]
166
- server_app_run_config = get_fused_config_from_dir(
167
- Path(app_path), run.override_config
168
- )
173
+ # Pull ServerAppInputs from LinkState
174
+ req = PullAppInputsRequest(token=token)
175
+ log(DEBUG, "[flwr-serverapp] Pull ServerAppInputs")
176
+ res: PullAppInputsResponse = grid._stub.PullAppInputs(req)
177
+ context = context_from_proto(res.context)
178
+ run = run_from_proto(res.run)
179
+ fab = fab_from_proto(res.fab)
169
180
 
170
- # Update run_config in context
171
- context.run_config = server_app_run_config
181
+ hash_run_id = get_sha256_hash(run.run_id)
172
182
 
173
- log(
174
- DEBUG,
175
- "[flwr-serverapp] Will load ServerApp `%s` in %s",
176
- server_app_attr,
177
- app_path,
178
- )
183
+ grid.set_run(run.run_id)
179
184
 
180
- # Change status to Running
181
- run_status_proto = run_status_to_proto(RunStatus(Status.RUNNING, "", ""))
182
- grid._stub.UpdateRunStatus(
183
- UpdateRunStatusRequest(run_id=run.run_id, run_status=run_status_proto)
184
- )
185
+ # Start log uploader for this run
186
+ log_uploader = start_log_uploader(
187
+ log_queue=log_queue,
188
+ node_id=0,
189
+ run_id=run.run_id,
190
+ stub=grid._stub,
191
+ )
185
192
 
186
- event(
187
- EventType.FLWR_SERVERAPP_RUN_ENTER,
188
- event_details={"run-id-hash": hash_run_id},
189
- )
193
+ log(DEBUG, "[flwr-serverapp] Start FAB installation.")
194
+ install_from_fab(fab.content, flwr_dir=flwr_dir_, skip_prompt=True)
190
195
 
191
- # Set up heartbeat sender
192
- heartbeat_fn = get_grpc_app_heartbeat_fn(
193
- grid._stub,
194
- run.run_id,
195
- failure_message="Heartbeat failed unexpectedly. The SuperLink could "
196
- "not find the provided run ID, or the run status is invalid.",
197
- )
198
- heartbeat_sender = HeartbeatSender(heartbeat_fn)
199
- heartbeat_sender.start()
200
-
201
- # Load and run the ServerApp with the Grid
202
- updated_context = run_(
203
- grid=grid,
204
- server_app_dir=app_path,
205
- server_app_attr=server_app_attr,
206
- context=context,
207
- )
196
+ fab_id, fab_version = get_fab_metadata(fab.content)
208
197
 
209
- # Send resulting context
210
- context_proto = context_to_proto(updated_context)
211
- log(DEBUG, "[flwr-serverapp] Will push ServerAppOutputs")
212
- out_req = PushAppOutputsRequest(run_id=run.run_id, context=context_proto)
213
- _ = grid._stub.PushAppOutputs(out_req)
214
-
215
- run_status = RunStatus(Status.FINISHED, SubStatus.COMPLETED, "")
216
- except RunNotRunningException:
217
- log(INFO, "")
218
- log(INFO, "Run ID %s stopped.", run.run_id)
219
- log(INFO, "")
220
- run_status = None
221
- success = False
222
-
223
- except Exception as ex: # pylint: disable=broad-exception-caught
224
- exc_entity = "ServerApp"
225
- log(ERROR, "%s raised an exception", exc_entity, exc_info=ex)
226
- run_status = RunStatus(Status.FINISHED, SubStatus.FAILED, str(ex))
227
- success = False
228
-
229
- finally:
230
- # Stop heartbeat sender
231
- if heartbeat_sender:
232
- heartbeat_sender.stop()
233
- heartbeat_sender = None
234
-
235
- # Stop log uploader for this run and upload final logs
236
- if log_uploader:
237
- stop_log_uploader(log_queue, log_uploader)
238
- log_uploader = None
239
-
240
- # Update run status
241
- if run_status and grid:
242
- run_status_proto = run_status_to_proto(run_status)
243
- grid._stub.UpdateRunStatus(
244
- UpdateRunStatusRequest(
245
- run_id=run.run_id, run_status=run_status_proto
246
- )
247
- )
248
-
249
- # Close the Grpc connection
250
- if grid:
251
- grid.close()
252
-
253
- # Clean up the Context
254
- context = None
255
- gc.collect()
256
-
257
- event(
258
- EventType.FLWR_SERVERAPP_RUN_LEAVE,
259
- event_details={"run-id-hash": hash_run_id, "success": success},
260
- )
198
+ app_path = str(get_project_dir(fab_id, fab_version, fab.hash_str, flwr_dir_))
199
+ config = get_project_config(app_path)
200
+
201
+ # Obtain server app reference and the run config
202
+ server_app_attr = config["tool"]["flwr"]["app"]["components"]["serverapp"]
203
+ server_app_run_config = get_fused_config_from_dir(
204
+ Path(app_path), run.override_config
205
+ )
261
206
 
262
- # Stop the loop if `flwr-serverapp` is expected to process a single run
263
- if run_once:
264
- break
207
+ # Update run_config in context
208
+ context.run_config = server_app_run_config
209
+
210
+ log(
211
+ DEBUG,
212
+ "[flwr-serverapp] Will load ServerApp `%s` in %s",
213
+ server_app_attr,
214
+ app_path,
215
+ )
216
+
217
+ # Change status to Running
218
+ run_status_proto = run_status_to_proto(RunStatus(Status.RUNNING, "", ""))
219
+ grid._stub.UpdateRunStatus(
220
+ UpdateRunStatusRequest(run_id=run.run_id, run_status=run_status_proto)
221
+ )
222
+
223
+ event(
224
+ EventType.FLWR_SERVERAPP_RUN_ENTER,
225
+ event_details={"run-id-hash": hash_run_id},
226
+ )
227
+
228
+ # Set up heartbeat sender
229
+ heartbeat_fn = get_grpc_app_heartbeat_fn(
230
+ grid._stub,
231
+ run.run_id,
232
+ failure_message="Heartbeat failed unexpectedly. The SuperLink could "
233
+ "not find the provided run ID, or the run status is invalid.",
234
+ )
235
+ heartbeat_sender = HeartbeatSender(heartbeat_fn)
236
+ heartbeat_sender.start()
237
+
238
+ # Load and run the ServerApp with the Grid
239
+ updated_context = run_(
240
+ grid=grid,
241
+ server_app_dir=app_path,
242
+ server_app_attr=server_app_attr,
243
+ context=context,
244
+ )
245
+
246
+ # Send resulting context
247
+ context_proto = context_to_proto(updated_context)
248
+ log(DEBUG, "[flwr-serverapp] Will push ServerAppOutputs")
249
+ out_req = PushAppOutputsRequest(
250
+ token=token, run_id=run.run_id, context=context_proto
251
+ )
252
+ _ = grid._stub.PushAppOutputs(out_req)
253
+
254
+ run_status = RunStatus(Status.FINISHED, SubStatus.COMPLETED, "")
255
+
256
+ # Raised when the run is already stopped by the user
257
+ except RunNotRunningException:
258
+ log(INFO, "")
259
+ log(INFO, "Run ID %s stopped.", run.run_id)
260
+ log(INFO, "")
261
+ run_status = None
262
+ # No need to update the exit code since this is expected behavior
263
+
264
+ except Exception as ex: # pylint: disable=broad-exception-caught
265
+ exc_entity = "ServerApp"
266
+ log(ERROR, "%s raised an exception", exc_entity, exc_info=ex)
267
+ run_status = RunStatus(Status.FINISHED, SubStatus.FAILED, str(ex))
268
+
269
+ # Set exit code
270
+ exit_code = ExitCode.SERVERAPP_EXCEPTION # General exit code
271
+ if isinstance(ex, AppExitException):
272
+ exit_code = ex.exit_code
273
+
274
+ flwr_exit(
275
+ code=exit_code,
276
+ event_type=EventType.FLWR_SERVERAPP_RUN_LEAVE,
277
+ event_details={
278
+ "run-id-hash": hash_run_id,
279
+ "success": exit_code == ExitCode.SUCCESS,
280
+ },
281
+ )
265
282
 
266
283
 
267
284
  def _parse_args_run_flwr_serverapp() -> argparse.ArgumentParser:
@@ -276,11 +293,5 @@ def _parse_args_run_flwr_serverapp() -> argparse.ArgumentParser:
276
293
  help="Address of SuperLink's ServerAppIo API (IPv4, IPv6, or a domain name)."
277
294
  f"By default, it is set to {SERVERAPPIO_API_DEFAULT_CLIENT_ADDRESS}.",
278
295
  )
279
- parser.add_argument(
280
- "--run-once",
281
- action="store_true",
282
- help="When set, this process will start a single ServerApp for a pending Run. "
283
- "If there is no pending Run, the process will exit.",
284
- )
285
296
  add_args_flwr_app_common(parser=parser)
286
297
  return parser
@@ -161,6 +161,7 @@ class RayBackend(Backend):
161
161
  "Call the backend's `build()` method before processing messages."
162
162
  )
163
163
 
164
+ future = None
164
165
  try:
165
166
  # Submit a task to the pool
166
167
  future = self.pool.submit(
@@ -183,7 +184,8 @@ class RayBackend(Backend):
183
184
  self.__class__.__name__,
184
185
  )
185
186
  # add actor back into pool
186
- self.pool.add_actor_back_to_pool(future)
187
+ if future is not None:
188
+ self.pool.add_actor_back_to_pool(future)
187
189
  raise ex
188
190
 
189
191
  def terminate(self) -> None:
@@ -23,7 +23,6 @@ from concurrent.futures import ThreadPoolExecutor
23
23
  from logging import DEBUG, ERROR, INFO, WARN
24
24
  from pathlib import Path
25
25
  from queue import Empty, Queue
26
- from time import sleep
27
26
  from typing import Callable, Optional
28
27
  from uuid import uuid4
29
28
 
@@ -153,7 +152,7 @@ def add_messages_to_queue(
153
152
  message_ins_list = state.get_message_ins(node_id=node_id, limit=1)
154
153
  for msg in message_ins_list:
155
154
  queue.put(msg)
156
- sleep(0.1)
155
+ f_stop.wait(0.1)
157
156
 
158
157
 
159
158
  def put_message_into_state(
@@ -182,6 +181,7 @@ def run_api(
182
181
  messageins_queue: Queue[Message] = Queue()
183
182
  messageres_queue: Queue[Message] = Queue()
184
183
 
184
+ backend = None
185
185
  try:
186
186
 
187
187
  # Instantiate backend
@@ -236,16 +236,16 @@ def run_api(
236
236
  log(ERROR, traceback.format_exc())
237
237
  log(WARN, "Stopping Simulation Engine.")
238
238
 
239
- # Manually trigger stopping event
240
- f_stop.set()
241
-
242
239
  # Raise exception
243
240
  raise RuntimeError("Simulation Engine crashed.") from ex
244
241
 
245
242
  finally:
243
+ # Manually trigger stopping event
244
+ f_stop.set()
246
245
 
247
246
  # Terminate backend
248
- backend.terminate()
247
+ if backend is not None:
248
+ backend.terminate()
249
249
 
250
250
 
251
251
  # pylint: disable=too-many-arguments,unused-argument,too-many-locals,too-many-branches
@@ -15,6 +15,7 @@
15
15
  """In-memory LinkState implementation."""
16
16
 
17
17
 
18
+ import secrets
18
19
  import threading
19
20
  import time
20
21
  from bisect import bisect_right
@@ -25,6 +26,7 @@ from typing import Optional
25
26
 
26
27
  from flwr.common import Context, Message, log, now
27
28
  from flwr.common.constant import (
29
+ FLWR_APP_TOKEN_LENGTH,
28
30
  HEARTBEAT_MAX_INTERVAL,
29
31
  HEARTBEAT_PATIENCE,
30
32
  MESSAGE_TTL_TOLERANCE,
@@ -80,6 +82,11 @@ class InMemoryLinkState(LinkState): # pylint: disable=R0902,R0904
80
82
  self.message_res_store: dict[str, Message] = {}
81
83
  self.message_ins_id_to_message_res_id: dict[str, str] = {}
82
84
 
85
+ # Store run ID to token mapping and token to run ID mapping
86
+ self.token_store: dict[int, str] = {}
87
+ self.token_to_run_id: dict[str, int] = {}
88
+ self.lock_token_store = threading.Lock()
89
+
83
90
  # Map flwr_aid to run_ids for O(1) reverse index lookup
84
91
  self.flwr_aid_to_run_ids: dict[str, set[int]] = defaultdict(set)
85
92
 
@@ -678,3 +685,30 @@ class InMemoryLinkState(LinkState): # pylint: disable=R0902,R0904
678
685
  index = bisect_right(run.logs, (after_timestamp, ""))
679
686
  latest_timestamp = run.logs[-1][0] if index < len(run.logs) else 0.0
680
687
  return "".join(log for _, log in run.logs[index:]), latest_timestamp
688
+
689
+ def create_token(self, run_id: int) -> Optional[str]:
690
+ """Create a token for the given run ID."""
691
+ token = secrets.token_hex(FLWR_APP_TOKEN_LENGTH) # Generate a random token
692
+ with self.lock_token_store:
693
+ if run_id in self.token_store:
694
+ return None # Token already created for this run ID
695
+ self.token_store[run_id] = token
696
+ self.token_to_run_id[token] = run_id
697
+ return token
698
+
699
+ def verify_token(self, run_id: int, token: str) -> bool:
700
+ """Verify a token for the given run ID."""
701
+ with self.lock_token_store:
702
+ return self.token_store.get(run_id) == token
703
+
704
+ def delete_token(self, run_id: int) -> None:
705
+ """Delete the token for the given run ID."""
706
+ with self.lock_token_store:
707
+ token = self.token_store.pop(run_id, None)
708
+ if token is not None:
709
+ self.token_to_run_id.pop(token, None)
710
+
711
+ def get_run_id_by_token(self, token: str) -> Optional[int]:
712
+ """Get the run ID associated with a given token."""
713
+ with self.lock_token_store:
714
+ return self.token_to_run_id.get(token)
@@ -21,9 +21,10 @@ from typing import Optional
21
21
  from flwr.common import Context, Message
22
22
  from flwr.common.record import ConfigRecord
23
23
  from flwr.common.typing import Run, RunStatus, UserConfig
24
+ from flwr.supercore.corestate import CoreState
24
25
 
25
26
 
26
- class LinkState(abc.ABC): # pylint: disable=R0904
27
+ class LinkState(CoreState): # pylint: disable=R0904
27
28
  """Abstract LinkState."""
28
29
 
29
30
  @abc.abstractmethod
@@ -19,6 +19,7 @@
19
19
 
20
20
  import json
21
21
  import re
22
+ import secrets
22
23
  import sqlite3
23
24
  import time
24
25
  from collections.abc import Sequence
@@ -27,6 +28,7 @@ from typing import Any, Optional, Union, cast
27
28
 
28
29
  from flwr.common import Context, Message, Metadata, log, now
29
30
  from flwr.common.constant import (
31
+ FLWR_APP_TOKEN_LENGTH,
30
32
  HEARTBEAT_MAX_INTERVAL,
31
33
  HEARTBEAT_PATIENCE,
32
34
  MESSAGE_TTL_TOLERANCE,
@@ -163,6 +165,13 @@ CREATE TABLE IF NOT EXISTS message_res(
163
165
  );
164
166
  """
165
167
 
168
+ SQL_CREATE_TABLE_TOKEN_STORE = """
169
+ CREATE TABLE IF NOT EXISTS token_store (
170
+ run_id INTEGER PRIMARY KEY,
171
+ token TEXT UNIQUE NOT NULL
172
+ );
173
+ """
174
+
166
175
  DictOrTuple = Union[tuple[Any, ...], dict[str, Any]]
167
176
 
168
177
 
@@ -212,6 +221,7 @@ class SqliteLinkState(LinkState): # pylint: disable=R0904
212
221
  cur.execute(SQL_CREATE_TABLE_MESSAGE_RES)
213
222
  cur.execute(SQL_CREATE_TABLE_NODE)
214
223
  cur.execute(SQL_CREATE_TABLE_PUBLIC_KEY)
224
+ cur.execute(SQL_CREATE_TABLE_TOKEN_STORE)
215
225
  cur.execute(SQL_CREATE_INDEX_ONLINE_UNTIL)
216
226
  res = cur.execute("SELECT name FROM sqlite_schema;")
217
227
  return res.fetchall()
@@ -1138,6 +1148,41 @@ class SqliteLinkState(LinkState): # pylint: disable=R0904
1138
1148
 
1139
1149
  return message_ins
1140
1150
 
1151
+ def create_token(self, run_id: int) -> Optional[str]:
1152
+ """Create a token for the given run ID."""
1153
+ token = secrets.token_hex(FLWR_APP_TOKEN_LENGTH) # Generate a random token
1154
+ query = "INSERT INTO token_store (run_id, token) VALUES (:run_id, :token);"
1155
+ data = {"run_id": convert_uint64_to_sint64(run_id), "token": token}
1156
+ try:
1157
+ self.query(query, data)
1158
+ except sqlite3.IntegrityError:
1159
+ return None # Token already created for this run ID
1160
+ return token
1161
+
1162
+ def verify_token(self, run_id: int, token: str) -> bool:
1163
+ """Verify a token for the given run ID."""
1164
+ query = "SELECT token FROM token_store WHERE run_id = :run_id;"
1165
+ data = {"run_id": convert_uint64_to_sint64(run_id)}
1166
+ rows = self.query(query, data)
1167
+ if not rows:
1168
+ return False
1169
+ return cast(str, rows[0]["token"]) == token
1170
+
1171
+ def delete_token(self, run_id: int) -> None:
1172
+ """Delete the token for the given run ID."""
1173
+ query = "DELETE FROM token_store WHERE run_id = :run_id;"
1174
+ data = {"run_id": convert_uint64_to_sint64(run_id)}
1175
+ self.query(query, data)
1176
+
1177
+ def get_run_id_by_token(self, token: str) -> Optional[int]:
1178
+ """Get the run ID associated with a given token."""
1179
+ query = "SELECT run_id FROM token_store WHERE token = :token;"
1180
+ data = {"token": token}
1181
+ rows = self.query(query, data)
1182
+ if not rows:
1183
+ return None
1184
+ return convert_sint64_to_uint64(rows[0]["run_id"])
1185
+
1141
1186
 
1142
1187
  def dict_factory(
1143
1188
  cursor: sqlite3.Cursor,
@@ -58,7 +58,7 @@ def run_serverappio_api_grpc(
58
58
  certificates=certificates,
59
59
  )
60
60
 
61
- log(INFO, "Flower ECE: Starting ServerAppIo API (gRPC-rere) on %s", address)
61
+ log(INFO, "Flower Deployment Runtime: Starting ServerAppIo API on %s", address)
62
62
  serverappio_grpc_server.start()
63
63
 
64
64
  return serverappio_grpc_server