flwr 1.20.0__py3-none-any.whl → 1.21.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (132) hide show
  1. flwr/__init__.py +4 -1
  2. flwr/app/__init__.py +28 -0
  3. flwr/app/exception.py +31 -0
  4. flwr/cli/auth_plugin/oidc_cli_plugin.py +4 -4
  5. flwr/cli/cli_user_auth_interceptor.py +1 -1
  6. flwr/cli/config_utils.py +3 -3
  7. flwr/cli/constant.py +25 -8
  8. flwr/cli/log.py +9 -9
  9. flwr/cli/login/login.py +3 -3
  10. flwr/cli/ls.py +5 -5
  11. flwr/cli/new/new.py +11 -0
  12. flwr/cli/new/templates/app/code/__init__.pytorch_msg_api.py.tpl +1 -0
  13. flwr/cli/new/templates/app/code/client.pytorch_msg_api.py.tpl +80 -0
  14. flwr/cli/new/templates/app/code/server.pytorch_msg_api.py.tpl +41 -0
  15. flwr/cli/new/templates/app/code/task.pytorch_msg_api.py.tpl +98 -0
  16. flwr/cli/new/templates/app/pyproject.baseline.toml.tpl +1 -1
  17. flwr/cli/new/templates/app/pyproject.flowertune.toml.tpl +1 -1
  18. flwr/cli/new/templates/app/pyproject.huggingface.toml.tpl +1 -1
  19. flwr/cli/new/templates/app/pyproject.jax.toml.tpl +1 -1
  20. flwr/cli/new/templates/app/pyproject.mlx.toml.tpl +1 -1
  21. flwr/cli/new/templates/app/pyproject.numpy.toml.tpl +1 -1
  22. flwr/cli/new/templates/app/pyproject.pytorch.toml.tpl +1 -1
  23. flwr/cli/new/templates/app/pyproject.pytorch_msg_api.toml.tpl +53 -0
  24. flwr/cli/new/templates/app/pyproject.sklearn.toml.tpl +1 -1
  25. flwr/cli/new/templates/app/pyproject.tensorflow.toml.tpl +1 -1
  26. flwr/cli/run/run.py +9 -13
  27. flwr/cli/stop.py +7 -4
  28. flwr/cli/utils.py +19 -8
  29. flwr/client/grpc_rere_client/connection.py +1 -12
  30. flwr/client/rest_client/connection.py +3 -0
  31. flwr/clientapp/__init__.py +10 -0
  32. flwr/clientapp/mod/__init__.py +26 -0
  33. flwr/clientapp/mod/centraldp_mods.py +132 -0
  34. flwr/common/args.py +20 -6
  35. flwr/common/auth_plugin/__init__.py +4 -4
  36. flwr/common/auth_plugin/auth_plugin.py +7 -7
  37. flwr/common/constant.py +23 -4
  38. flwr/common/event_log_plugin/event_log_plugin.py +1 -1
  39. flwr/common/exit/__init__.py +4 -0
  40. flwr/common/exit/exit.py +8 -1
  41. flwr/common/exit/exit_code.py +26 -7
  42. flwr/common/exit/exit_handler.py +62 -0
  43. flwr/common/{exit_handlers.py → exit/signal_handler.py} +20 -37
  44. flwr/common/grpc.py +0 -11
  45. flwr/common/inflatable_utils.py +1 -1
  46. flwr/common/logger.py +1 -1
  47. flwr/common/retry_invoker.py +30 -11
  48. flwr/common/telemetry.py +4 -0
  49. flwr/compat/server/app.py +2 -2
  50. flwr/proto/appio_pb2.py +25 -17
  51. flwr/proto/appio_pb2.pyi +46 -2
  52. flwr/proto/clientappio_pb2.py +3 -11
  53. flwr/proto/clientappio_pb2.pyi +0 -47
  54. flwr/proto/clientappio_pb2_grpc.py +19 -20
  55. flwr/proto/clientappio_pb2_grpc.pyi +10 -11
  56. flwr/proto/control_pb2.py +62 -0
  57. flwr/proto/{exec_pb2_grpc.py → control_pb2_grpc.py} +54 -54
  58. flwr/proto/{exec_pb2_grpc.pyi → control_pb2_grpc.pyi} +28 -28
  59. flwr/proto/serverappio_pb2.py +2 -2
  60. flwr/proto/serverappio_pb2_grpc.py +68 -0
  61. flwr/proto/serverappio_pb2_grpc.pyi +26 -0
  62. flwr/proto/simulationio_pb2.py +4 -11
  63. flwr/proto/simulationio_pb2.pyi +0 -58
  64. flwr/proto/simulationio_pb2_grpc.py +129 -27
  65. flwr/proto/simulationio_pb2_grpc.pyi +52 -13
  66. flwr/server/app.py +129 -152
  67. flwr/server/grid/grpc_grid.py +3 -0
  68. flwr/server/grid/inmemory_grid.py +1 -0
  69. flwr/server/serverapp/app.py +157 -146
  70. flwr/server/superlink/fleet/vce/backend/raybackend.py +3 -1
  71. flwr/server/superlink/fleet/vce/vce_api.py +6 -6
  72. flwr/server/superlink/linkstate/in_memory_linkstate.py +34 -0
  73. flwr/server/superlink/linkstate/linkstate.py +2 -1
  74. flwr/server/superlink/linkstate/sqlite_linkstate.py +45 -0
  75. flwr/server/superlink/serverappio/serverappio_grpc.py +1 -1
  76. flwr/server/superlink/serverappio/serverappio_servicer.py +61 -6
  77. flwr/server/superlink/simulation/simulationio_servicer.py +97 -21
  78. flwr/serverapp/__init__.py +12 -0
  79. flwr/serverapp/dp_fixed_clipping.py +352 -0
  80. flwr/serverapp/exception.py +38 -0
  81. flwr/serverapp/strategy/__init__.py +38 -0
  82. flwr/serverapp/strategy/dp_fixed_clipping.py +352 -0
  83. flwr/serverapp/strategy/fedadagrad.py +162 -0
  84. flwr/serverapp/strategy/fedadam.py +181 -0
  85. flwr/serverapp/strategy/fedavg.py +295 -0
  86. flwr/serverapp/strategy/fedopt.py +218 -0
  87. flwr/serverapp/strategy/fedyogi.py +173 -0
  88. flwr/serverapp/strategy/result.py +105 -0
  89. flwr/serverapp/strategy/strategy.py +285 -0
  90. flwr/serverapp/strategy/strategy_utils.py +251 -0
  91. flwr/serverapp/strategy/strategy_utils_tests.py +304 -0
  92. flwr/simulation/app.py +161 -164
  93. flwr/supercore/app_utils.py +58 -0
  94. flwr/{supernode/scheduler → supercore/cli}/__init__.py +3 -3
  95. flwr/supercore/cli/flower_superexec.py +141 -0
  96. flwr/supercore/{scheduler → corestate}/__init__.py +3 -3
  97. flwr/supercore/corestate/corestate.py +81 -0
  98. flwr/supercore/grpc_health/__init__.py +3 -0
  99. flwr/supercore/grpc_health/health_server.py +53 -0
  100. flwr/supercore/grpc_health/simple_health_servicer.py +2 -2
  101. flwr/{superexec → supercore/superexec}/__init__.py +1 -1
  102. flwr/supercore/superexec/plugin/__init__.py +28 -0
  103. flwr/{supernode/scheduler/simple_clientapp_scheduler_plugin.py → supercore/superexec/plugin/base_exec_plugin.py} +10 -6
  104. flwr/supercore/superexec/plugin/clientapp_exec_plugin.py +28 -0
  105. flwr/supercore/{scheduler/plugin.py → superexec/plugin/exec_plugin.py} +4 -4
  106. flwr/supercore/superexec/plugin/serverapp_exec_plugin.py +28 -0
  107. flwr/supercore/superexec/plugin/simulation_exec_plugin.py +28 -0
  108. flwr/supercore/superexec/run_superexec.py +185 -0
  109. flwr/superlink/servicer/__init__.py +15 -0
  110. flwr/superlink/servicer/control/__init__.py +22 -0
  111. flwr/{superexec/exec_event_log_interceptor.py → superlink/servicer/control/control_event_log_interceptor.py} +7 -7
  112. flwr/{superexec/exec_grpc.py → superlink/servicer/control/control_grpc.py} +24 -29
  113. flwr/{superexec/exec_license_interceptor.py → superlink/servicer/control/control_license_interceptor.py} +6 -6
  114. flwr/{superexec/exec_servicer.py → superlink/servicer/control/control_servicer.py} +69 -30
  115. flwr/{superexec/exec_user_auth_interceptor.py → superlink/servicer/control/control_user_auth_interceptor.py} +10 -10
  116. flwr/supernode/cli/flower_supernode.py +3 -0
  117. flwr/supernode/cli/flwr_clientapp.py +18 -21
  118. flwr/supernode/nodestate/in_memory_nodestate.py +2 -2
  119. flwr/supernode/nodestate/nodestate.py +3 -59
  120. flwr/supernode/runtime/run_clientapp.py +39 -102
  121. flwr/supernode/servicer/clientappio/clientappio_servicer.py +10 -17
  122. flwr/supernode/start_client_internal.py +35 -76
  123. {flwr-1.20.0.dist-info → flwr-1.21.0.dist-info}/METADATA +4 -3
  124. {flwr-1.20.0.dist-info → flwr-1.21.0.dist-info}/RECORD +127 -98
  125. {flwr-1.20.0.dist-info → flwr-1.21.0.dist-info}/entry_points.txt +1 -0
  126. flwr/proto/exec_pb2.py +0 -62
  127. flwr/superexec/app.py +0 -45
  128. flwr/superexec/deployment.py +0 -191
  129. flwr/superexec/executor.py +0 -100
  130. flwr/superexec/simulation.py +0 -129
  131. /flwr/proto/{exec_pb2.pyi → control_pb2.pyi} +0 -0
  132. {flwr-1.20.0.dist-info → flwr-1.21.0.dist-info}/WHEEL +0 -0
flwr/simulation/app.py CHANGED
@@ -16,10 +16,8 @@
16
16
 
17
17
 
18
18
  import argparse
19
- import gc
20
19
  from logging import DEBUG, ERROR, INFO
21
20
  from queue import Queue
22
- from time import sleep
23
21
  from typing import Optional
24
22
 
25
23
  from flwr.cli.config_utils import get_fab_metadata
@@ -36,6 +34,7 @@ from flwr.common.config import (
36
34
  )
37
35
  from flwr.common.constant import (
38
36
  SIMULATIONIO_API_DEFAULT_CLIENT_ADDRESS,
37
+ ExecPluginType,
39
38
  Status,
40
39
  SubStatus,
41
40
  )
@@ -57,19 +56,23 @@ from flwr.common.serde import (
57
56
  run_status_to_proto,
58
57
  )
59
58
  from flwr.common.typing import RunStatus
59
+ from flwr.proto.appio_pb2 import ( # pylint: disable=E0611
60
+ PullAppInputsRequest,
61
+ PullAppInputsResponse,
62
+ PushAppOutputsRequest,
63
+ )
60
64
  from flwr.proto.run_pb2 import ( # pylint: disable=E0611
61
65
  GetFederationOptionsRequest,
62
66
  GetFederationOptionsResponse,
63
67
  UpdateRunStatusRequest,
64
68
  )
65
- from flwr.proto.simulationio_pb2 import ( # pylint: disable=E0611
66
- PullSimulationInputsRequest,
67
- PullSimulationInputsResponse,
68
- PushSimulationOutputsRequest,
69
- )
69
+ from flwr.proto.simulationio_pb2_grpc import SimulationIoStub
70
70
  from flwr.server.superlink.fleet.vce.backend.backend import BackendConfig
71
71
  from flwr.simulation.run_simulation import _run_simulation
72
72
  from flwr.simulation.simulationio_connection import SimulationIoConnection
73
+ from flwr.supercore.app_utils import start_parent_process_monitor
74
+ from flwr.supercore.superexec.plugin import SimulationExecPlugin
75
+ from flwr.supercore.superexec.run_superexec import run_with_deprecation_warning
73
76
 
74
77
 
75
78
  def flwr_simulation() -> None:
@@ -80,14 +83,27 @@ def flwr_simulation() -> None:
80
83
 
81
84
  args = _parse_args_run_flwr_simulation().parse_args()
82
85
 
83
- log(INFO, "Starting Flower Simulation")
84
-
85
86
  if not args.insecure:
86
87
  flwr_exit(
87
88
  ExitCode.COMMON_TLS_NOT_SUPPORTED,
88
- "`flwr-simulation` does not support TLS yet. ",
89
+ "`flwr-simulation` does not support TLS yet.",
90
+ )
91
+
92
+ # Disallow long-running `flwr-simulation` processes
93
+ if args.token is None:
94
+ run_with_deprecation_warning(
95
+ cmd="flwr-simulation",
96
+ plugin_type=ExecPluginType.SIMULATION,
97
+ plugin_class=SimulationExecPlugin,
98
+ stub_class=SimulationIoStub,
99
+ appio_api_address=args.simulationio_api_address,
100
+ flwr_dir=args.flwr_dir,
101
+ parent_pid=args.parent_pid,
102
+ warn_run_once=args.run_once,
89
103
  )
104
+ return
90
105
 
106
+ log(INFO, "Starting Flower Simulation")
91
107
  log(
92
108
  DEBUG,
93
109
  "Starting isolated `Simulation` connected to SuperLink SimulationAppIo API "
@@ -97,23 +113,29 @@ def flwr_simulation() -> None:
97
113
  run_simulation_process(
98
114
  simulationio_api_address=args.simulationio_api_address,
99
115
  log_queue=log_queue,
100
- run_once=args.run_once,
116
+ token=args.token,
101
117
  flwr_dir_=args.flwr_dir,
102
118
  certificates=None,
119
+ parent_pid=args.parent_pid,
103
120
  )
104
121
 
105
122
  # Restore stdout/stderr
106
123
  restore_output()
107
124
 
108
125
 
109
- def run_simulation_process( # pylint: disable=R0914, disable=W0212, disable=R0915
126
+ def run_simulation_process( # pylint: disable=R0913, R0914, R0915, R0917, W0212
110
127
  simulationio_api_address: str,
111
128
  log_queue: Queue[Optional[str]],
112
- run_once: bool,
129
+ token: str,
113
130
  flwr_dir_: Optional[str] = None,
114
131
  certificates: Optional[bytes] = None,
132
+ parent_pid: Optional[int] = None,
115
133
  ) -> None:
116
134
  """Run Flower Simulation process."""
135
+ # Start monitoring the parent process if a PID is provided
136
+ if parent_pid is not None:
137
+ start_parent_process_monitor(parent_pid)
138
+
117
139
  conn = SimulationIoConnection(
118
140
  simulationio_service_address=simulationio_api_address,
119
141
  root_certificates=certificates,
@@ -123,165 +145,146 @@ def run_simulation_process( # pylint: disable=R0914, disable=W0212, disable=R09
123
145
  flwr_dir = get_flwr_dir(flwr_dir_)
124
146
  log_uploader = None
125
147
  heartbeat_sender = None
148
+ run_status = None
149
+
150
+ try:
151
+ # Pull SimulationInputs from LinkState
152
+ req = PullAppInputsRequest(token=token)
153
+ res: PullAppInputsResponse = conn._stub.PullAppInputs(req)
154
+ context = context_from_proto(res.context)
155
+ run = run_from_proto(res.run)
156
+ fab = fab_from_proto(res.fab)
157
+
158
+ # Start log uploader for this run
159
+ log_uploader = start_log_uploader(
160
+ log_queue=log_queue,
161
+ node_id=context.node_id,
162
+ run_id=run.run_id,
163
+ stub=conn._stub,
164
+ )
126
165
 
127
- while True:
166
+ log(DEBUG, "Simulation process starts FAB installation.")
167
+ install_from_fab(fab.content, flwr_dir=flwr_dir, skip_prompt=True)
128
168
 
129
- try:
130
- # Pull SimulationInputs from LinkState
131
- req = PullSimulationInputsRequest()
132
- res: PullSimulationInputsResponse = conn._stub.PullSimulationInputs(req)
133
- if not res.HasField("run"):
134
- sleep(3)
135
- run_status = None
136
- continue
137
-
138
- context = context_from_proto(res.context)
139
- run = run_from_proto(res.run)
140
- fab = fab_from_proto(res.fab)
141
-
142
- # Start log uploader for this run
143
- log_uploader = start_log_uploader(
144
- log_queue=log_queue,
145
- node_id=context.node_id,
146
- run_id=run.run_id,
147
- stub=conn._stub,
148
- )
169
+ fab_id, fab_version = get_fab_metadata(fab.content)
149
170
 
150
- log(DEBUG, "Simulation process starts FAB installation.")
151
- install_from_fab(fab.content, flwr_dir=flwr_dir, skip_prompt=True)
171
+ app_path = get_project_dir(fab_id, fab_version, fab.hash_str, flwr_dir)
172
+ config = get_project_config(app_path)
152
173
 
153
- fab_id, fab_version = get_fab_metadata(fab.content)
174
+ # Get ClientApp and SeverApp components
175
+ app_components = config["tool"]["flwr"]["app"]["components"]
176
+ client_app_attr = app_components["clientapp"]
177
+ server_app_attr = app_components["serverapp"]
178
+ fused_config = get_fused_config_from_dir(app_path, run.override_config)
154
179
 
155
- app_path = get_project_dir(fab_id, fab_version, fab.hash_str, flwr_dir)
156
- config = get_project_config(app_path)
180
+ # Update run_config in context
181
+ context.run_config = fused_config
157
182
 
158
- # Get ClientApp and SeverApp components
159
- app_components = config["tool"]["flwr"]["app"]["components"]
160
- client_app_attr = app_components["clientapp"]
161
- server_app_attr = app_components["serverapp"]
162
- fused_config = get_fused_config_from_dir(app_path, run.override_config)
183
+ log(
184
+ DEBUG,
185
+ "Flower will load ServerApp `%s` in %s",
186
+ server_app_attr,
187
+ app_path,
188
+ )
189
+ log(
190
+ DEBUG,
191
+ "Flower will load ClientApp `%s` in %s",
192
+ client_app_attr,
193
+ app_path,
194
+ )
163
195
 
164
- # Update run_config in context
165
- context.run_config = fused_config
196
+ # Change status to Running
197
+ run_status_proto = run_status_to_proto(RunStatus(Status.RUNNING, "", ""))
198
+ conn._stub.UpdateRunStatus(
199
+ UpdateRunStatusRequest(run_id=run.run_id, run_status=run_status_proto)
200
+ )
166
201
 
167
- log(
168
- DEBUG,
169
- "Flower will load ServerApp `%s` in %s",
170
- server_app_attr,
171
- app_path,
172
- )
173
- log(
174
- DEBUG,
175
- "Flower will load ClientApp `%s` in %s",
176
- client_app_attr,
177
- app_path,
178
- )
202
+ # Pull Federation Options
203
+ fed_opt_res: GetFederationOptionsResponse = conn._stub.GetFederationOptions(
204
+ GetFederationOptionsRequest(run_id=run.run_id)
205
+ )
206
+ federation_options = config_record_from_proto(fed_opt_res.federation_options)
207
+
208
+ # Unflatten underlying dict
209
+ fed_opt = unflatten_dict({**federation_options})
210
+
211
+ # Extract configs values of interest
212
+ num_supernodes = fed_opt.get("num-supernodes")
213
+ if num_supernodes is None:
214
+ raise ValueError("Federation options expects `num-supernodes` to be set.")
215
+ backend_config: BackendConfig = fed_opt.get("backend", {})
216
+ verbose: bool = fed_opt.get("verbose", False)
217
+ enable_tf_gpu_growth: bool = fed_opt.get("enable_tf_gpu_growth", False)
218
+
219
+ event(
220
+ EventType.FLWR_SIMULATION_RUN_ENTER,
221
+ event_details={
222
+ "backend": "ray",
223
+ "num-supernodes": num_supernodes,
224
+ "run-id-hash": get_sha256_hash(run.run_id),
225
+ },
226
+ )
179
227
 
180
- # Change status to Running
181
- run_status_proto = run_status_to_proto(RunStatus(Status.RUNNING, "", ""))
182
- conn._stub.UpdateRunStatus(
183
- UpdateRunStatusRequest(run_id=run.run_id, run_status=run_status_proto)
184
- )
228
+ # Set up heartbeat sender
229
+ heartbeat_fn = get_grpc_app_heartbeat_fn(
230
+ conn._stub,
231
+ run.run_id,
232
+ failure_message="Heartbeat failed unexpectedly. The SuperLink could "
233
+ "not find the provided run ID, or the run status is invalid.",
234
+ )
235
+ heartbeat_sender = HeartbeatSender(heartbeat_fn)
236
+ heartbeat_sender.start()
237
+
238
+ # Launch the simulation
239
+ updated_context = _run_simulation(
240
+ server_app_attr=server_app_attr,
241
+ client_app_attr=client_app_attr,
242
+ num_supernodes=num_supernodes,
243
+ backend_config=backend_config,
244
+ app_dir=str(app_path),
245
+ run=run,
246
+ enable_tf_gpu_growth=enable_tf_gpu_growth,
247
+ verbose_logging=verbose,
248
+ server_app_run_config=fused_config,
249
+ is_app=True,
250
+ exit_event=EventType.FLWR_SIMULATION_RUN_LEAVE,
251
+ )
185
252
 
186
- # Pull Federation Options
187
- fed_opt_res: GetFederationOptionsResponse = conn._stub.GetFederationOptions(
188
- GetFederationOptionsRequest(run_id=run.run_id)
189
- )
190
- federation_options = config_record_from_proto(
191
- fed_opt_res.federation_options
192
- )
253
+ # Send resulting context
254
+ context_proto = context_to_proto(updated_context)
255
+ out_req = PushAppOutputsRequest(
256
+ token=token, run_id=run.run_id, context=context_proto
257
+ )
258
+ _ = conn._stub.PushAppOutputs(out_req)
193
259
 
194
- # Unflatten underlying dict
195
- fed_opt = unflatten_dict({**federation_options})
196
-
197
- # Extract configs values of interest
198
- num_supernodes = fed_opt.get("num-supernodes")
199
- if num_supernodes is None:
200
- raise ValueError(
201
- "Federation options expects `num-supernodes` to be set."
202
- )
203
- backend_config: BackendConfig = fed_opt.get("backend", {})
204
- verbose: bool = fed_opt.get("verbose", False)
205
- enable_tf_gpu_growth: bool = fed_opt.get("enable_tf_gpu_growth", False)
206
-
207
- event(
208
- EventType.FLWR_SIMULATION_RUN_ENTER,
209
- event_details={
210
- "backend": "ray",
211
- "num-supernodes": num_supernodes,
212
- "run-id-hash": get_sha256_hash(run.run_id),
213
- },
214
- )
260
+ run_status = RunStatus(Status.FINISHED, SubStatus.COMPLETED, "")
215
261
 
216
- # Set up heartbeat sender
217
- heartbeat_fn = get_grpc_app_heartbeat_fn(
218
- conn._stub,
219
- run.run_id,
220
- failure_message="Heartbeat failed unexpectedly. The SuperLink could "
221
- "not find the provided run ID, or the run status is invalid.",
222
- )
223
- heartbeat_sender = HeartbeatSender(heartbeat_fn)
224
- heartbeat_sender.start()
225
-
226
- # Launch the simulation
227
- updated_context = _run_simulation(
228
- server_app_attr=server_app_attr,
229
- client_app_attr=client_app_attr,
230
- num_supernodes=num_supernodes,
231
- backend_config=backend_config,
232
- app_dir=str(app_path),
233
- run=run,
234
- enable_tf_gpu_growth=enable_tf_gpu_growth,
235
- verbose_logging=verbose,
236
- server_app_run_config=fused_config,
237
- is_app=True,
238
- exit_event=EventType.FLWR_SIMULATION_RUN_LEAVE,
239
- )
262
+ except Exception as ex: # pylint: disable=broad-exception-caught
263
+ exc_entity = "Simulation"
264
+ log(ERROR, "%s raised an exception", exc_entity, exc_info=ex)
265
+ run_status = RunStatus(Status.FINISHED, SubStatus.FAILED, str(ex))
266
+
267
+ finally:
268
+ # Stop heartbeat sender
269
+ if heartbeat_sender:
270
+ heartbeat_sender.stop()
271
+
272
+ # Stop log uploader for this run and upload final logs
273
+ if log_uploader:
274
+ stop_log_uploader(log_queue, log_uploader)
240
275
 
241
- # Send resulting context
242
- context_proto = context_to_proto(updated_context)
243
- out_req = PushSimulationOutputsRequest(
244
- run_id=run.run_id, context=context_proto
276
+ # Update run status
277
+ if run_status:
278
+ run_status_proto = run_status_to_proto(run_status)
279
+ conn._stub.UpdateRunStatus(
280
+ UpdateRunStatusRequest(run_id=run.run_id, run_status=run_status_proto)
245
281
  )
246
- _ = conn._stub.PushSimulationOutputs(out_req)
247
-
248
- run_status = RunStatus(Status.FINISHED, SubStatus.COMPLETED, "")
249
-
250
- except Exception as ex: # pylint: disable=broad-exception-caught
251
- exc_entity = "Simulation"
252
- log(ERROR, "%s raised an exception", exc_entity, exc_info=ex)
253
- run_status = RunStatus(Status.FINISHED, SubStatus.FAILED, str(ex))
254
-
255
- finally:
256
- # Stop heartbeat sender
257
- if heartbeat_sender:
258
- heartbeat_sender.stop()
259
- heartbeat_sender = None
260
-
261
- # Stop log uploader for this run and upload final logs
262
- if log_uploader:
263
- stop_log_uploader(log_queue, log_uploader)
264
- log_uploader = None
265
-
266
- # Update run status
267
- if run_status:
268
- run_status_proto = run_status_to_proto(run_status)
269
- conn._stub.UpdateRunStatus(
270
- UpdateRunStatusRequest(
271
- run_id=run.run_id, run_status=run_status_proto
272
- )
273
- )
274
-
275
- # Clean up the Context if it exists
276
- try:
277
- del updated_context
278
- except NameError:
279
- pass
280
- gc.collect()
281
-
282
- # Stop the loop if `flwr-simulation` is expected to process a single run
283
- if run_once:
284
- break
282
+
283
+ # Clean up the Context if it exists
284
+ try:
285
+ del updated_context
286
+ except NameError:
287
+ pass
285
288
 
286
289
 
287
290
  def _parse_args_run_flwr_simulation() -> argparse.ArgumentParser:
@@ -296,11 +299,5 @@ def _parse_args_run_flwr_simulation() -> argparse.ArgumentParser:
296
299
  help="Address of SuperLink's SimulationIO API (IPv4, IPv6, or a domain name)."
297
300
  f"By default, it is set to {SIMULATIONIO_API_DEFAULT_CLIENT_ADDRESS}.",
298
301
  )
299
- parser.add_argument(
300
- "--run-once",
301
- action="store_true",
302
- help="When set, this process will start a single simulation "
303
- "for a pending Run. If no pending run the process will exit. ",
304
- )
305
302
  add_args_flwr_app_common(parser=parser)
306
303
  return parser
@@ -0,0 +1,58 @@
1
+ # Copyright 2025 Flower Labs GmbH. All Rights Reserved.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+ # ==============================================================================
15
+ """Utility functions for app processes."""
16
+
17
+
18
+ import os
19
+ import signal
20
+ import threading
21
+ import time
22
+
23
+ if os.name == "nt":
24
+ from ctypes import windll # type: ignore
25
+
26
+
27
+ def _pid_exists(pid: int) -> bool:
28
+ """Check if a process with the given PID exists.
29
+
30
+ This works on Unix-like systems and Windows.
31
+ """
32
+ # Use `ctypes` to check if the process exists on Windows
33
+ if os.name == "nt":
34
+ handle = windll.kernel32.OpenProcess(0x1000, False, pid)
35
+ if handle:
36
+ windll.kernel32.CloseHandle(handle)
37
+ return True
38
+ return False
39
+ # Use `os.kill` on Unix-like systems
40
+ try:
41
+ os.kill(pid, 0)
42
+ except OSError:
43
+ return False
44
+ return True
45
+
46
+
47
+ def start_parent_process_monitor(
48
+ parent_pid: int,
49
+ ) -> None:
50
+ """Monitor the parent process and exit if it terminates."""
51
+
52
+ def monitor() -> None:
53
+ while True:
54
+ time.sleep(0.2)
55
+ if not _pid_exists(parent_pid):
56
+ os.kill(os.getpid(), signal.SIGKILL)
57
+
58
+ threading.Thread(target=monitor, daemon=True).start()
@@ -12,11 +12,11 @@
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
14
  # ==============================================================================
15
- """Flower ClientApp Scheduler."""
15
+ """Flower command line interface for shared infrastructure components."""
16
16
 
17
17
 
18
- from .simple_clientapp_scheduler_plugin import SimpleClientAppSchedulerPlugin
18
+ from .flower_superexec import flower_superexec
19
19
 
20
20
  __all__ = [
21
- "SimpleClientAppSchedulerPlugin",
21
+ "flower_superexec",
22
22
  ]
@@ -0,0 +1,141 @@
1
+ # Copyright 2025 Flower Labs GmbH. All Rights Reserved.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+ # ==============================================================================
15
+ """`flower-superexec` command."""
16
+
17
+
18
+ import argparse
19
+ from logging import INFO
20
+ from typing import Optional
21
+
22
+ from flwr.common import EventType, event
23
+ from flwr.common.constant import ExecPluginType
24
+ from flwr.common.exit import ExitCode, flwr_exit
25
+ from flwr.common.logger import log
26
+ from flwr.proto.clientappio_pb2_grpc import ClientAppIoStub
27
+ from flwr.proto.serverappio_pb2_grpc import ServerAppIoStub
28
+ from flwr.proto.simulationio_pb2_grpc import SimulationIoStub
29
+ from flwr.supercore.grpc_health import add_args_health
30
+ from flwr.supercore.superexec.plugin import (
31
+ ClientAppExecPlugin,
32
+ ExecPlugin,
33
+ ServerAppExecPlugin,
34
+ SimulationExecPlugin,
35
+ )
36
+ from flwr.supercore.superexec.run_superexec import run_superexec
37
+
38
+ try:
39
+ from flwr.ee.constant import ExecEePluginType
40
+ from flwr.ee.exec_plugin import get_ee_plugin_and_stub_class
41
+ except ImportError:
42
+
43
+ class ExecEePluginType: # type: ignore[no-redef]
44
+ """SuperExec EE plugin types."""
45
+
46
+ @staticmethod
47
+ def all() -> list[str]:
48
+ """Return all SuperExec EE plugin types."""
49
+ return []
50
+
51
+ def get_ee_plugin_and_stub_class( # pylint: disable=unused-argument
52
+ plugin_type: str,
53
+ ) -> Optional[tuple[type[ExecPlugin], type[object]]]:
54
+ """Get the EE plugin class and stub class based on the plugin type."""
55
+ return None
56
+
57
+
58
+ def flower_superexec() -> None:
59
+ """Run `flower-superexec` command."""
60
+ args = _parse_args().parse_args()
61
+ if not args.insecure:
62
+ flwr_exit(
63
+ ExitCode.COMMON_TLS_NOT_SUPPORTED,
64
+ "SuperExec does not support TLS yet.",
65
+ )
66
+
67
+ # Log the first message after parsing arguments in case of `--help`
68
+ log(INFO, "Starting Flower SuperExec")
69
+
70
+ # Trigger telemetry event
71
+ event(EventType.RUN_SUPEREXEC_ENTER, {"plugin_type": args.plugin_type})
72
+
73
+ # Get the plugin class and stub class based on the plugin type
74
+ plugin_class, stub_class = _get_plugin_and_stub_class(args.plugin_type)
75
+ run_superexec(
76
+ plugin_class=plugin_class,
77
+ stub_class=stub_class, # type: ignore
78
+ appio_api_address=args.appio_api_address,
79
+ flwr_dir=args.flwr_dir,
80
+ parent_pid=args.parent_pid,
81
+ health_server_address=args.health_server_address,
82
+ )
83
+
84
+
85
+ def _parse_args() -> argparse.ArgumentParser:
86
+ """Parse `flower-superexec` command line arguments."""
87
+ parser = argparse.ArgumentParser(
88
+ description="Run Flower SuperExec.",
89
+ )
90
+ parser.add_argument(
91
+ "--appio-api-address", type=str, required=True, help="Address of the AppIO API"
92
+ )
93
+ parser.add_argument(
94
+ "--plugin-type",
95
+ type=str,
96
+ choices=ExecPluginType.all() + ExecEePluginType.all(),
97
+ required=True,
98
+ help="The type of plugin to use.",
99
+ )
100
+ parser.add_argument(
101
+ "--insecure",
102
+ action="store_true",
103
+ help="Connect to the AppIO API without TLS. "
104
+ "Data transmitted between the client and server is not encrypted. "
105
+ "Use this flag only if you understand the risks.",
106
+ )
107
+ parser.add_argument(
108
+ "--flwr-dir",
109
+ default=None,
110
+ help="""The path containing installed Flower Apps.
111
+ By default, this value is equal to:
112
+
113
+ - `$FLWR_HOME/` if `$FLWR_HOME` is defined
114
+ - `$XDG_DATA_HOME/.flwr/` if `$XDG_DATA_HOME` is defined
115
+ - `$HOME/.flwr/` in all other cases
116
+ """,
117
+ )
118
+ parser.add_argument(
119
+ "--parent-pid",
120
+ type=int,
121
+ default=None,
122
+ help="The PID of the parent process. When set, the process will terminate "
123
+ "when the parent process exits.",
124
+ )
125
+ add_args_health(parser)
126
+ return parser
127
+
128
+
129
+ def _get_plugin_and_stub_class(
130
+ plugin_type: str,
131
+ ) -> tuple[type[ExecPlugin], type[object]]:
132
+ """Get the plugin class and stub class based on the plugin type."""
133
+ if plugin_type == ExecPluginType.CLIENT_APP:
134
+ return ClientAppExecPlugin, ClientAppIoStub
135
+ if plugin_type == ExecPluginType.SERVER_APP:
136
+ return ServerAppExecPlugin, ServerAppIoStub
137
+ if plugin_type == ExecPluginType.SIMULATION:
138
+ return SimulationExecPlugin, SimulationIoStub
139
+ if ret := get_ee_plugin_and_stub_class(plugin_type):
140
+ return ret # type: ignore[no-any-return]
141
+ raise ValueError(f"Unknown plugin type: {plugin_type}")
@@ -12,11 +12,11 @@
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
14
  # ==============================================================================
15
- """Flower App Scheduler."""
15
+ """Flower CoreState."""
16
16
 
17
17
 
18
- from .plugin import SchedulerPlugin
18
+ from .corestate import CoreState
19
19
 
20
20
  __all__ = [
21
- "SchedulerPlugin",
21
+ "CoreState",
22
22
  ]