flwr 1.19.0__py3-none-any.whl → 1.21.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (167) hide show
  1. flwr/__init__.py +4 -1
  2. flwr/app/__init__.py +28 -0
  3. flwr/app/exception.py +31 -0
  4. flwr/cli/auth_plugin/oidc_cli_plugin.py +4 -4
  5. flwr/cli/build.py +15 -5
  6. flwr/cli/cli_user_auth_interceptor.py +1 -1
  7. flwr/cli/config_utils.py +3 -3
  8. flwr/cli/constant.py +25 -8
  9. flwr/cli/log.py +9 -9
  10. flwr/cli/login/login.py +3 -3
  11. flwr/cli/ls.py +5 -5
  12. flwr/cli/new/new.py +23 -4
  13. flwr/cli/new/templates/app/README.flowertune.md.tpl +2 -0
  14. flwr/cli/new/templates/app/README.md.tpl +5 -0
  15. flwr/cli/new/templates/app/code/__init__.pytorch_msg_api.py.tpl +1 -0
  16. flwr/cli/new/templates/app/code/client.pytorch_msg_api.py.tpl +80 -0
  17. flwr/cli/new/templates/app/code/server.pytorch_msg_api.py.tpl +41 -0
  18. flwr/cli/new/templates/app/code/task.pytorch_msg_api.py.tpl +98 -0
  19. flwr/cli/new/templates/app/pyproject.baseline.toml.tpl +14 -3
  20. flwr/cli/new/templates/app/pyproject.flowertune.toml.tpl +13 -1
  21. flwr/cli/new/templates/app/pyproject.huggingface.toml.tpl +21 -2
  22. flwr/cli/new/templates/app/pyproject.jax.toml.tpl +18 -1
  23. flwr/cli/new/templates/app/pyproject.mlx.toml.tpl +19 -2
  24. flwr/cli/new/templates/app/pyproject.numpy.toml.tpl +18 -1
  25. flwr/cli/new/templates/app/pyproject.pytorch.toml.tpl +20 -3
  26. flwr/cli/new/templates/app/pyproject.pytorch_msg_api.toml.tpl +53 -0
  27. flwr/cli/new/templates/app/pyproject.sklearn.toml.tpl +18 -1
  28. flwr/cli/new/templates/app/pyproject.tensorflow.toml.tpl +18 -1
  29. flwr/cli/run/run.py +53 -50
  30. flwr/cli/stop.py +7 -4
  31. flwr/cli/utils.py +29 -11
  32. flwr/client/grpc_adapter_client/connection.py +11 -4
  33. flwr/client/grpc_rere_client/connection.py +93 -129
  34. flwr/client/rest_client/connection.py +134 -164
  35. flwr/clientapp/__init__.py +10 -0
  36. flwr/clientapp/mod/__init__.py +26 -0
  37. flwr/clientapp/mod/centraldp_mods.py +132 -0
  38. flwr/common/args.py +20 -6
  39. flwr/common/auth_plugin/__init__.py +4 -4
  40. flwr/common/auth_plugin/auth_plugin.py +7 -7
  41. flwr/common/constant.py +26 -5
  42. flwr/common/event_log_plugin/event_log_plugin.py +1 -1
  43. flwr/common/exit/__init__.py +4 -0
  44. flwr/common/exit/exit.py +8 -1
  45. flwr/common/exit/exit_code.py +42 -8
  46. flwr/common/exit/exit_handler.py +62 -0
  47. flwr/common/{exit_handlers.py → exit/signal_handler.py} +20 -37
  48. flwr/common/grpc.py +1 -1
  49. flwr/common/{inflatable_grpc_utils.py → inflatable_protobuf_utils.py} +52 -10
  50. flwr/common/inflatable_utils.py +191 -24
  51. flwr/common/logger.py +1 -1
  52. flwr/common/record/array.py +101 -22
  53. flwr/common/record/arraychunk.py +59 -0
  54. flwr/common/retry_invoker.py +30 -11
  55. flwr/common/serde.py +0 -28
  56. flwr/common/telemetry.py +4 -0
  57. flwr/compat/client/app.py +14 -31
  58. flwr/compat/server/app.py +2 -2
  59. flwr/proto/appio_pb2.py +51 -0
  60. flwr/proto/appio_pb2.pyi +195 -0
  61. flwr/proto/appio_pb2_grpc.py +4 -0
  62. flwr/proto/appio_pb2_grpc.pyi +4 -0
  63. flwr/proto/clientappio_pb2.py +4 -19
  64. flwr/proto/clientappio_pb2.pyi +0 -125
  65. flwr/proto/clientappio_pb2_grpc.py +269 -29
  66. flwr/proto/clientappio_pb2_grpc.pyi +114 -21
  67. flwr/proto/control_pb2.py +62 -0
  68. flwr/proto/{exec_pb2_grpc.py → control_pb2_grpc.py} +54 -54
  69. flwr/proto/{exec_pb2_grpc.pyi → control_pb2_grpc.pyi} +28 -28
  70. flwr/proto/fleet_pb2.py +12 -20
  71. flwr/proto/fleet_pb2.pyi +6 -36
  72. flwr/proto/serverappio_pb2.py +8 -31
  73. flwr/proto/serverappio_pb2.pyi +0 -152
  74. flwr/proto/serverappio_pb2_grpc.py +107 -38
  75. flwr/proto/serverappio_pb2_grpc.pyi +47 -20
  76. flwr/proto/simulationio_pb2.py +4 -11
  77. flwr/proto/simulationio_pb2.pyi +0 -58
  78. flwr/proto/simulationio_pb2_grpc.py +129 -27
  79. flwr/proto/simulationio_pb2_grpc.pyi +52 -13
  80. flwr/server/app.py +130 -153
  81. flwr/server/fleet_event_log_interceptor.py +4 -0
  82. flwr/server/grid/grpc_grid.py +94 -54
  83. flwr/server/grid/inmemory_grid.py +1 -0
  84. flwr/server/serverapp/app.py +165 -144
  85. flwr/server/superlink/fleet/grpc_adapter/grpc_adapter_servicer.py +8 -0
  86. flwr/server/superlink/fleet/grpc_rere/fleet_servicer.py +1 -1
  87. flwr/server/superlink/fleet/grpc_rere/server_interceptor.py +2 -5
  88. flwr/server/superlink/fleet/message_handler/message_handler.py +10 -16
  89. flwr/server/superlink/fleet/rest_rere/rest_api.py +1 -2
  90. flwr/server/superlink/fleet/vce/backend/raybackend.py +3 -1
  91. flwr/server/superlink/fleet/vce/vce_api.py +6 -6
  92. flwr/server/superlink/linkstate/in_memory_linkstate.py +34 -0
  93. flwr/server/superlink/linkstate/linkstate.py +2 -1
  94. flwr/server/superlink/linkstate/sqlite_linkstate.py +45 -0
  95. flwr/server/superlink/serverappio/serverappio_grpc.py +2 -2
  96. flwr/server/superlink/serverappio/serverappio_servicer.py +95 -48
  97. flwr/server/superlink/simulation/simulationio_grpc.py +1 -1
  98. flwr/server/superlink/simulation/simulationio_servicer.py +98 -22
  99. flwr/server/superlink/utils.py +0 -35
  100. flwr/serverapp/__init__.py +12 -0
  101. flwr/serverapp/dp_fixed_clipping.py +352 -0
  102. flwr/serverapp/exception.py +38 -0
  103. flwr/serverapp/strategy/__init__.py +38 -0
  104. flwr/serverapp/strategy/dp_fixed_clipping.py +352 -0
  105. flwr/serverapp/strategy/fedadagrad.py +162 -0
  106. flwr/serverapp/strategy/fedadam.py +181 -0
  107. flwr/serverapp/strategy/fedavg.py +295 -0
  108. flwr/serverapp/strategy/fedopt.py +218 -0
  109. flwr/serverapp/strategy/fedyogi.py +173 -0
  110. flwr/serverapp/strategy/result.py +105 -0
  111. flwr/serverapp/strategy/strategy.py +285 -0
  112. flwr/serverapp/strategy/strategy_utils.py +251 -0
  113. flwr/serverapp/strategy/strategy_utils_tests.py +304 -0
  114. flwr/simulation/app.py +159 -154
  115. flwr/simulation/run_simulation.py +17 -0
  116. flwr/supercore/app_utils.py +58 -0
  117. flwr/supercore/cli/__init__.py +22 -0
  118. flwr/supercore/cli/flower_superexec.py +141 -0
  119. flwr/supercore/corestate/__init__.py +22 -0
  120. flwr/supercore/corestate/corestate.py +81 -0
  121. flwr/{server/superlink → supercore}/ffs/disk_ffs.py +1 -1
  122. flwr/supercore/grpc_health/__init__.py +25 -0
  123. flwr/supercore/grpc_health/health_server.py +53 -0
  124. flwr/supercore/grpc_health/simple_health_servicer.py +38 -0
  125. flwr/supercore/license_plugin/__init__.py +22 -0
  126. flwr/supercore/license_plugin/license_plugin.py +26 -0
  127. flwr/supercore/object_store/in_memory_object_store.py +31 -31
  128. flwr/supercore/object_store/object_store.py +20 -42
  129. flwr/supercore/object_store/utils.py +43 -0
  130. flwr/{superexec → supercore/superexec}/__init__.py +1 -1
  131. flwr/supercore/superexec/plugin/__init__.py +28 -0
  132. flwr/supercore/superexec/plugin/base_exec_plugin.py +53 -0
  133. flwr/supercore/superexec/plugin/clientapp_exec_plugin.py +28 -0
  134. flwr/supercore/superexec/plugin/exec_plugin.py +71 -0
  135. flwr/supercore/superexec/plugin/serverapp_exec_plugin.py +28 -0
  136. flwr/supercore/superexec/plugin/simulation_exec_plugin.py +28 -0
  137. flwr/supercore/superexec/run_superexec.py +185 -0
  138. flwr/supercore/utils.py +32 -0
  139. flwr/superlink/servicer/__init__.py +15 -0
  140. flwr/superlink/servicer/control/__init__.py +22 -0
  141. flwr/{superexec/exec_event_log_interceptor.py → superlink/servicer/control/control_event_log_interceptor.py} +9 -5
  142. flwr/{superexec/exec_grpc.py → superlink/servicer/control/control_grpc.py} +39 -28
  143. flwr/superlink/servicer/control/control_license_interceptor.py +82 -0
  144. flwr/{superexec/exec_servicer.py → superlink/servicer/control/control_servicer.py} +79 -31
  145. flwr/{superexec/exec_user_auth_interceptor.py → superlink/servicer/control/control_user_auth_interceptor.py} +18 -10
  146. flwr/supernode/cli/flower_supernode.py +3 -7
  147. flwr/supernode/cli/flwr_clientapp.py +20 -16
  148. flwr/supernode/nodestate/in_memory_nodestate.py +13 -4
  149. flwr/supernode/nodestate/nodestate.py +3 -44
  150. flwr/supernode/runtime/run_clientapp.py +129 -115
  151. flwr/supernode/servicer/clientappio/__init__.py +1 -3
  152. flwr/supernode/servicer/clientappio/clientappio_servicer.py +217 -165
  153. flwr/supernode/start_client_internal.py +205 -148
  154. {flwr-1.19.0.dist-info → flwr-1.21.0.dist-info}/METADATA +5 -3
  155. {flwr-1.19.0.dist-info → flwr-1.21.0.dist-info}/RECORD +161 -117
  156. {flwr-1.19.0.dist-info → flwr-1.21.0.dist-info}/entry_points.txt +1 -0
  157. flwr/common/inflatable_rest_utils.py +0 -99
  158. flwr/proto/exec_pb2.py +0 -62
  159. flwr/superexec/app.py +0 -45
  160. flwr/superexec/deployment.py +0 -192
  161. flwr/superexec/executor.py +0 -100
  162. flwr/superexec/simulation.py +0 -130
  163. /flwr/proto/{exec_pb2.pyi → control_pb2.pyi} +0 -0
  164. /flwr/{server/superlink → supercore}/ffs/__init__.py +0 -0
  165. /flwr/{server/superlink → supercore}/ffs/ffs.py +0 -0
  166. /flwr/{server/superlink → supercore}/ffs/ffs_factory.py +0 -0
  167. {flwr-1.19.0.dist-info → flwr-1.21.0.dist-info}/WHEEL +0 -0
flwr/simulation/app.py CHANGED
@@ -18,7 +18,6 @@
18
18
  import argparse
19
19
  from logging import DEBUG, ERROR, INFO
20
20
  from queue import Queue
21
- from time import sleep
22
21
  from typing import Optional
23
22
 
24
23
  from flwr.cli.config_utils import get_fab_metadata
@@ -35,6 +34,7 @@ from flwr.common.config import (
35
34
  )
36
35
  from flwr.common.constant import (
37
36
  SIMULATIONIO_API_DEFAULT_CLIENT_ADDRESS,
37
+ ExecPluginType,
38
38
  Status,
39
39
  SubStatus,
40
40
  )
@@ -56,19 +56,23 @@ from flwr.common.serde import (
56
56
  run_status_to_proto,
57
57
  )
58
58
  from flwr.common.typing import RunStatus
59
+ from flwr.proto.appio_pb2 import ( # pylint: disable=E0611
60
+ PullAppInputsRequest,
61
+ PullAppInputsResponse,
62
+ PushAppOutputsRequest,
63
+ )
59
64
  from flwr.proto.run_pb2 import ( # pylint: disable=E0611
60
65
  GetFederationOptionsRequest,
61
66
  GetFederationOptionsResponse,
62
67
  UpdateRunStatusRequest,
63
68
  )
64
- from flwr.proto.simulationio_pb2 import ( # pylint: disable=E0611
65
- PullSimulationInputsRequest,
66
- PullSimulationInputsResponse,
67
- PushSimulationOutputsRequest,
68
- )
69
+ from flwr.proto.simulationio_pb2_grpc import SimulationIoStub
69
70
  from flwr.server.superlink.fleet.vce.backend.backend import BackendConfig
70
71
  from flwr.simulation.run_simulation import _run_simulation
71
72
  from flwr.simulation.simulationio_connection import SimulationIoConnection
73
+ from flwr.supercore.app_utils import start_parent_process_monitor
74
+ from flwr.supercore.superexec.plugin import SimulationExecPlugin
75
+ from flwr.supercore.superexec.run_superexec import run_with_deprecation_warning
72
76
 
73
77
 
74
78
  def flwr_simulation() -> None:
@@ -79,14 +83,27 @@ def flwr_simulation() -> None:
79
83
 
80
84
  args = _parse_args_run_flwr_simulation().parse_args()
81
85
 
82
- log(INFO, "Starting Flower Simulation")
83
-
84
86
  if not args.insecure:
85
87
  flwr_exit(
86
88
  ExitCode.COMMON_TLS_NOT_SUPPORTED,
87
- "`flwr-simulation` does not support TLS yet. ",
89
+ "`flwr-simulation` does not support TLS yet.",
90
+ )
91
+
92
+ # Disallow long-running `flwr-simulation` processes
93
+ if args.token is None:
94
+ run_with_deprecation_warning(
95
+ cmd="flwr-simulation",
96
+ plugin_type=ExecPluginType.SIMULATION,
97
+ plugin_class=SimulationExecPlugin,
98
+ stub_class=SimulationIoStub,
99
+ appio_api_address=args.simulationio_api_address,
100
+ flwr_dir=args.flwr_dir,
101
+ parent_pid=args.parent_pid,
102
+ warn_run_once=args.run_once,
88
103
  )
104
+ return
89
105
 
106
+ log(INFO, "Starting Flower Simulation")
90
107
  log(
91
108
  DEBUG,
92
109
  "Starting isolated `Simulation` connected to SuperLink SimulationAppIo API "
@@ -96,23 +113,29 @@ def flwr_simulation() -> None:
96
113
  run_simulation_process(
97
114
  simulationio_api_address=args.simulationio_api_address,
98
115
  log_queue=log_queue,
99
- run_once=args.run_once,
116
+ token=args.token,
100
117
  flwr_dir_=args.flwr_dir,
101
118
  certificates=None,
119
+ parent_pid=args.parent_pid,
102
120
  )
103
121
 
104
122
  # Restore stdout/stderr
105
123
  restore_output()
106
124
 
107
125
 
108
- def run_simulation_process( # pylint: disable=R0914, disable=W0212, disable=R0915
126
+ def run_simulation_process( # pylint: disable=R0913, R0914, R0915, R0917, W0212
109
127
  simulationio_api_address: str,
110
128
  log_queue: Queue[Optional[str]],
111
- run_once: bool,
129
+ token: str,
112
130
  flwr_dir_: Optional[str] = None,
113
131
  certificates: Optional[bytes] = None,
132
+ parent_pid: Optional[int] = None,
114
133
  ) -> None:
115
134
  """Run Flower Simulation process."""
135
+ # Start monitoring the parent process if a PID is provided
136
+ if parent_pid is not None:
137
+ start_parent_process_monitor(parent_pid)
138
+
116
139
  conn = SimulationIoConnection(
117
140
  simulationio_service_address=simulationio_api_address,
118
141
  root_certificates=certificates,
@@ -122,158 +145,146 @@ def run_simulation_process( # pylint: disable=R0914, disable=W0212, disable=R09
122
145
  flwr_dir = get_flwr_dir(flwr_dir_)
123
146
  log_uploader = None
124
147
  heartbeat_sender = None
148
+ run_status = None
149
+
150
+ try:
151
+ # Pull SimulationInputs from LinkState
152
+ req = PullAppInputsRequest(token=token)
153
+ res: PullAppInputsResponse = conn._stub.PullAppInputs(req)
154
+ context = context_from_proto(res.context)
155
+ run = run_from_proto(res.run)
156
+ fab = fab_from_proto(res.fab)
157
+
158
+ # Start log uploader for this run
159
+ log_uploader = start_log_uploader(
160
+ log_queue=log_queue,
161
+ node_id=context.node_id,
162
+ run_id=run.run_id,
163
+ stub=conn._stub,
164
+ )
125
165
 
126
- while True:
127
-
128
- try:
129
- # Pull SimulationInputs from LinkState
130
- req = PullSimulationInputsRequest()
131
- res: PullSimulationInputsResponse = conn._stub.PullSimulationInputs(req)
132
- if not res.HasField("run"):
133
- sleep(3)
134
- run_status = None
135
- continue
136
-
137
- context = context_from_proto(res.context)
138
- run = run_from_proto(res.run)
139
- fab = fab_from_proto(res.fab)
140
-
141
- # Start log uploader for this run
142
- log_uploader = start_log_uploader(
143
- log_queue=log_queue,
144
- node_id=context.node_id,
145
- run_id=run.run_id,
146
- stub=conn._stub,
147
- )
148
-
149
- log(DEBUG, "Simulation process starts FAB installation.")
150
- install_from_fab(fab.content, flwr_dir=flwr_dir, skip_prompt=True)
151
-
152
- fab_id, fab_version = get_fab_metadata(fab.content)
166
+ log(DEBUG, "Simulation process starts FAB installation.")
167
+ install_from_fab(fab.content, flwr_dir=flwr_dir, skip_prompt=True)
153
168
 
154
- app_path = get_project_dir(fab_id, fab_version, fab.hash_str, flwr_dir)
155
- config = get_project_config(app_path)
169
+ fab_id, fab_version = get_fab_metadata(fab.content)
156
170
 
157
- # Get ClientApp and SeverApp components
158
- app_components = config["tool"]["flwr"]["app"]["components"]
159
- client_app_attr = app_components["clientapp"]
160
- server_app_attr = app_components["serverapp"]
161
- fused_config = get_fused_config_from_dir(app_path, run.override_config)
171
+ app_path = get_project_dir(fab_id, fab_version, fab.hash_str, flwr_dir)
172
+ config = get_project_config(app_path)
162
173
 
163
- # Update run_config in context
164
- context.run_config = fused_config
174
+ # Get ClientApp and SeverApp components
175
+ app_components = config["tool"]["flwr"]["app"]["components"]
176
+ client_app_attr = app_components["clientapp"]
177
+ server_app_attr = app_components["serverapp"]
178
+ fused_config = get_fused_config_from_dir(app_path, run.override_config)
165
179
 
166
- log(
167
- DEBUG,
168
- "Flower will load ServerApp `%s` in %s",
169
- server_app_attr,
170
- app_path,
171
- )
172
- log(
173
- DEBUG,
174
- "Flower will load ClientApp `%s` in %s",
175
- client_app_attr,
176
- app_path,
177
- )
180
+ # Update run_config in context
181
+ context.run_config = fused_config
178
182
 
179
- # Change status to Running
180
- run_status_proto = run_status_to_proto(RunStatus(Status.RUNNING, "", ""))
181
- conn._stub.UpdateRunStatus(
182
- UpdateRunStatusRequest(run_id=run.run_id, run_status=run_status_proto)
183
- )
183
+ log(
184
+ DEBUG,
185
+ "Flower will load ServerApp `%s` in %s",
186
+ server_app_attr,
187
+ app_path,
188
+ )
189
+ log(
190
+ DEBUG,
191
+ "Flower will load ClientApp `%s` in %s",
192
+ client_app_attr,
193
+ app_path,
194
+ )
184
195
 
185
- # Pull Federation Options
186
- fed_opt_res: GetFederationOptionsResponse = conn._stub.GetFederationOptions(
187
- GetFederationOptionsRequest(run_id=run.run_id)
188
- )
189
- federation_options = config_record_from_proto(
190
- fed_opt_res.federation_options
191
- )
196
+ # Change status to Running
197
+ run_status_proto = run_status_to_proto(RunStatus(Status.RUNNING, "", ""))
198
+ conn._stub.UpdateRunStatus(
199
+ UpdateRunStatusRequest(run_id=run.run_id, run_status=run_status_proto)
200
+ )
192
201
 
193
- # Unflatten underlying dict
194
- fed_opt = unflatten_dict({**federation_options})
195
-
196
- # Extract configs values of interest
197
- num_supernodes = fed_opt.get("num-supernodes")
198
- if num_supernodes is None:
199
- raise ValueError(
200
- "Federation options expects `num-supernodes` to be set."
201
- )
202
- backend_config: BackendConfig = fed_opt.get("backend", {})
203
- verbose: bool = fed_opt.get("verbose", False)
204
- enable_tf_gpu_growth: bool = fed_opt.get("enable_tf_gpu_growth", False)
205
-
206
- event(
207
- EventType.FLWR_SIMULATION_RUN_ENTER,
208
- event_details={
209
- "backend": "ray",
210
- "num-supernodes": num_supernodes,
211
- "run-id-hash": get_sha256_hash(run.run_id),
212
- },
213
- )
202
+ # Pull Federation Options
203
+ fed_opt_res: GetFederationOptionsResponse = conn._stub.GetFederationOptions(
204
+ GetFederationOptionsRequest(run_id=run.run_id)
205
+ )
206
+ federation_options = config_record_from_proto(fed_opt_res.federation_options)
207
+
208
+ # Unflatten underlying dict
209
+ fed_opt = unflatten_dict({**federation_options})
210
+
211
+ # Extract configs values of interest
212
+ num_supernodes = fed_opt.get("num-supernodes")
213
+ if num_supernodes is None:
214
+ raise ValueError("Federation options expects `num-supernodes` to be set.")
215
+ backend_config: BackendConfig = fed_opt.get("backend", {})
216
+ verbose: bool = fed_opt.get("verbose", False)
217
+ enable_tf_gpu_growth: bool = fed_opt.get("enable_tf_gpu_growth", False)
218
+
219
+ event(
220
+ EventType.FLWR_SIMULATION_RUN_ENTER,
221
+ event_details={
222
+ "backend": "ray",
223
+ "num-supernodes": num_supernodes,
224
+ "run-id-hash": get_sha256_hash(run.run_id),
225
+ },
226
+ )
214
227
 
215
- # Set up heartbeat sender
216
- heartbeat_fn = get_grpc_app_heartbeat_fn(
217
- conn._stub,
218
- run.run_id,
219
- failure_message="Heartbeat failed unexpectedly. The SuperLink could "
220
- "not find the provided run ID, or the run status is invalid.",
221
- )
222
- heartbeat_sender = HeartbeatSender(heartbeat_fn)
223
- heartbeat_sender.start()
224
-
225
- # Launch the simulation
226
- updated_context = _run_simulation(
227
- server_app_attr=server_app_attr,
228
- client_app_attr=client_app_attr,
229
- num_supernodes=num_supernodes,
230
- backend_config=backend_config,
231
- app_dir=str(app_path),
232
- run=run,
233
- enable_tf_gpu_growth=enable_tf_gpu_growth,
234
- verbose_logging=verbose,
235
- server_app_run_config=fused_config,
236
- is_app=True,
237
- exit_event=EventType.FLWR_SIMULATION_RUN_LEAVE,
238
- )
228
+ # Set up heartbeat sender
229
+ heartbeat_fn = get_grpc_app_heartbeat_fn(
230
+ conn._stub,
231
+ run.run_id,
232
+ failure_message="Heartbeat failed unexpectedly. The SuperLink could "
233
+ "not find the provided run ID, or the run status is invalid.",
234
+ )
235
+ heartbeat_sender = HeartbeatSender(heartbeat_fn)
236
+ heartbeat_sender.start()
237
+
238
+ # Launch the simulation
239
+ updated_context = _run_simulation(
240
+ server_app_attr=server_app_attr,
241
+ client_app_attr=client_app_attr,
242
+ num_supernodes=num_supernodes,
243
+ backend_config=backend_config,
244
+ app_dir=str(app_path),
245
+ run=run,
246
+ enable_tf_gpu_growth=enable_tf_gpu_growth,
247
+ verbose_logging=verbose,
248
+ server_app_run_config=fused_config,
249
+ is_app=True,
250
+ exit_event=EventType.FLWR_SIMULATION_RUN_LEAVE,
251
+ )
239
252
 
240
- # Send resulting context
241
- context_proto = context_to_proto(updated_context)
242
- out_req = PushSimulationOutputsRequest(
243
- run_id=run.run_id, context=context_proto
244
- )
245
- _ = conn._stub.PushSimulationOutputs(out_req)
253
+ # Send resulting context
254
+ context_proto = context_to_proto(updated_context)
255
+ out_req = PushAppOutputsRequest(
256
+ token=token, run_id=run.run_id, context=context_proto
257
+ )
258
+ _ = conn._stub.PushAppOutputs(out_req)
246
259
 
247
- run_status = RunStatus(Status.FINISHED, SubStatus.COMPLETED, "")
260
+ run_status = RunStatus(Status.FINISHED, SubStatus.COMPLETED, "")
248
261
 
249
- except Exception as ex: # pylint: disable=broad-exception-caught
250
- exc_entity = "Simulation"
251
- log(ERROR, "%s raised an exception", exc_entity, exc_info=ex)
252
- run_status = RunStatus(Status.FINISHED, SubStatus.FAILED, str(ex))
262
+ except Exception as ex: # pylint: disable=broad-exception-caught
263
+ exc_entity = "Simulation"
264
+ log(ERROR, "%s raised an exception", exc_entity, exc_info=ex)
265
+ run_status = RunStatus(Status.FINISHED, SubStatus.FAILED, str(ex))
253
266
 
254
- finally:
255
- # Stop heartbeat sender
256
- if heartbeat_sender:
257
- heartbeat_sender.stop()
258
- heartbeat_sender = None
267
+ finally:
268
+ # Stop heartbeat sender
269
+ if heartbeat_sender:
270
+ heartbeat_sender.stop()
259
271
 
260
- # Stop log uploader for this run and upload final logs
261
- if log_uploader:
262
- stop_log_uploader(log_queue, log_uploader)
263
- log_uploader = None
272
+ # Stop log uploader for this run and upload final logs
273
+ if log_uploader:
274
+ stop_log_uploader(log_queue, log_uploader)
264
275
 
265
- # Update run status
266
- if run_status:
267
- run_status_proto = run_status_to_proto(run_status)
268
- conn._stub.UpdateRunStatus(
269
- UpdateRunStatusRequest(
270
- run_id=run.run_id, run_status=run_status_proto
271
- )
272
- )
276
+ # Update run status
277
+ if run_status:
278
+ run_status_proto = run_status_to_proto(run_status)
279
+ conn._stub.UpdateRunStatus(
280
+ UpdateRunStatusRequest(run_id=run.run_id, run_status=run_status_proto)
281
+ )
273
282
 
274
- # Stop the loop if `flwr-simulation` is expected to process a single run
275
- if run_once:
276
- break
283
+ # Clean up the Context if it exists
284
+ try:
285
+ del updated_context
286
+ except NameError:
287
+ pass
277
288
 
278
289
 
279
290
  def _parse_args_run_flwr_simulation() -> argparse.ArgumentParser:
@@ -288,11 +299,5 @@ def _parse_args_run_flwr_simulation() -> argparse.ArgumentParser:
288
299
  help="Address of SuperLink's SimulationIO API (IPv4, IPv6, or a domain name)."
289
300
  f"By default, it is set to {SIMULATIONIO_API_DEFAULT_CLIENT_ADDRESS}.",
290
301
  )
291
- parser.add_argument(
292
- "--run-once",
293
- action="store_true",
294
- help="When set, this process will start a single simulation "
295
- "for a pending Run. If no pending run the process will exit. ",
296
- )
297
302
  add_args_flwr_app_common(parser=parser)
298
303
  return parser
@@ -19,6 +19,7 @@ import argparse
19
19
  import asyncio
20
20
  import json
21
21
  import logging
22
+ import platform
22
23
  import sys
23
24
  import threading
24
25
  import traceback
@@ -63,6 +64,18 @@ def _replace_keys(d: Any, match: str, target: str) -> Any:
63
64
  return d
64
65
 
65
66
 
67
+ def _check_ray_support(backend_name: str) -> None:
68
+ if backend_name.lower() == "ray":
69
+ if platform.system() == "Windows":
70
+ log(
71
+ WARNING,
72
+ "Ray support on Windows is experimental "
73
+ "and may not work as expected. "
74
+ "On Windows, Flower Simulations run best in WSL2: "
75
+ "https://learn.microsoft.com/en-us/windows/wsl/about",
76
+ )
77
+
78
+
66
79
  # Entry point from CLI
67
80
  # pylint: disable=too-many-locals
68
81
  def run_simulation_from_cli() -> None:
@@ -82,6 +95,8 @@ def run_simulation_from_cli() -> None:
82
95
  code_example='TF_FORCE_GPU_ALLOW_GROWTH="true" flower-simulation <...>',
83
96
  )
84
97
 
98
+ _check_ray_support(args.backend)
99
+
85
100
  # Load JSON config
86
101
  backend_config_dict = json.loads(args.backend_config)
87
102
 
@@ -208,6 +223,8 @@ def run_simulation(
208
223
  "\n\tflwr.simulation.run_simulationt(...)",
209
224
  )
210
225
 
226
+ _check_ray_support(backend_name)
227
+
211
228
  _ = _run_simulation(
212
229
  num_supernodes=num_supernodes,
213
230
  client_app=client_app,
@@ -0,0 +1,58 @@
1
+ # Copyright 2025 Flower Labs GmbH. All Rights Reserved.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+ # ==============================================================================
15
+ """Utility functions for app processes."""
16
+
17
+
18
+ import os
19
+ import signal
20
+ import threading
21
+ import time
22
+
23
+ if os.name == "nt":
24
+ from ctypes import windll # type: ignore
25
+
26
+
27
+ def _pid_exists(pid: int) -> bool:
28
+ """Check if a process with the given PID exists.
29
+
30
+ This works on Unix-like systems and Windows.
31
+ """
32
+ # Use `ctypes` to check if the process exists on Windows
33
+ if os.name == "nt":
34
+ handle = windll.kernel32.OpenProcess(0x1000, False, pid)
35
+ if handle:
36
+ windll.kernel32.CloseHandle(handle)
37
+ return True
38
+ return False
39
+ # Use `os.kill` on Unix-like systems
40
+ try:
41
+ os.kill(pid, 0)
42
+ except OSError:
43
+ return False
44
+ return True
45
+
46
+
47
+ def start_parent_process_monitor(
48
+ parent_pid: int,
49
+ ) -> None:
50
+ """Monitor the parent process and exit if it terminates."""
51
+
52
+ def monitor() -> None:
53
+ while True:
54
+ time.sleep(0.2)
55
+ if not _pid_exists(parent_pid):
56
+ os.kill(os.getpid(), signal.SIGKILL)
57
+
58
+ threading.Thread(target=monitor, daemon=True).start()
@@ -0,0 +1,22 @@
1
+ # Copyright 2025 Flower Labs GmbH. All Rights Reserved.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+ # ==============================================================================
15
+ """Flower command line interface for shared infrastructure components."""
16
+
17
+
18
+ from .flower_superexec import flower_superexec
19
+
20
+ __all__ = [
21
+ "flower_superexec",
22
+ ]
@@ -0,0 +1,141 @@
1
+ # Copyright 2025 Flower Labs GmbH. All Rights Reserved.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+ # ==============================================================================
15
+ """`flower-superexec` command."""
16
+
17
+
18
+ import argparse
19
+ from logging import INFO
20
+ from typing import Optional
21
+
22
+ from flwr.common import EventType, event
23
+ from flwr.common.constant import ExecPluginType
24
+ from flwr.common.exit import ExitCode, flwr_exit
25
+ from flwr.common.logger import log
26
+ from flwr.proto.clientappio_pb2_grpc import ClientAppIoStub
27
+ from flwr.proto.serverappio_pb2_grpc import ServerAppIoStub
28
+ from flwr.proto.simulationio_pb2_grpc import SimulationIoStub
29
+ from flwr.supercore.grpc_health import add_args_health
30
+ from flwr.supercore.superexec.plugin import (
31
+ ClientAppExecPlugin,
32
+ ExecPlugin,
33
+ ServerAppExecPlugin,
34
+ SimulationExecPlugin,
35
+ )
36
+ from flwr.supercore.superexec.run_superexec import run_superexec
37
+
38
+ try:
39
+ from flwr.ee.constant import ExecEePluginType
40
+ from flwr.ee.exec_plugin import get_ee_plugin_and_stub_class
41
+ except ImportError:
42
+
43
+ class ExecEePluginType: # type: ignore[no-redef]
44
+ """SuperExec EE plugin types."""
45
+
46
+ @staticmethod
47
+ def all() -> list[str]:
48
+ """Return all SuperExec EE plugin types."""
49
+ return []
50
+
51
+ def get_ee_plugin_and_stub_class( # pylint: disable=unused-argument
52
+ plugin_type: str,
53
+ ) -> Optional[tuple[type[ExecPlugin], type[object]]]:
54
+ """Get the EE plugin class and stub class based on the plugin type."""
55
+ return None
56
+
57
+
58
+ def flower_superexec() -> None:
59
+ """Run `flower-superexec` command."""
60
+ args = _parse_args().parse_args()
61
+ if not args.insecure:
62
+ flwr_exit(
63
+ ExitCode.COMMON_TLS_NOT_SUPPORTED,
64
+ "SuperExec does not support TLS yet.",
65
+ )
66
+
67
+ # Log the first message after parsing arguments in case of `--help`
68
+ log(INFO, "Starting Flower SuperExec")
69
+
70
+ # Trigger telemetry event
71
+ event(EventType.RUN_SUPEREXEC_ENTER, {"plugin_type": args.plugin_type})
72
+
73
+ # Get the plugin class and stub class based on the plugin type
74
+ plugin_class, stub_class = _get_plugin_and_stub_class(args.plugin_type)
75
+ run_superexec(
76
+ plugin_class=plugin_class,
77
+ stub_class=stub_class, # type: ignore
78
+ appio_api_address=args.appio_api_address,
79
+ flwr_dir=args.flwr_dir,
80
+ parent_pid=args.parent_pid,
81
+ health_server_address=args.health_server_address,
82
+ )
83
+
84
+
85
+ def _parse_args() -> argparse.ArgumentParser:
86
+ """Parse `flower-superexec` command line arguments."""
87
+ parser = argparse.ArgumentParser(
88
+ description="Run Flower SuperExec.",
89
+ )
90
+ parser.add_argument(
91
+ "--appio-api-address", type=str, required=True, help="Address of the AppIO API"
92
+ )
93
+ parser.add_argument(
94
+ "--plugin-type",
95
+ type=str,
96
+ choices=ExecPluginType.all() + ExecEePluginType.all(),
97
+ required=True,
98
+ help="The type of plugin to use.",
99
+ )
100
+ parser.add_argument(
101
+ "--insecure",
102
+ action="store_true",
103
+ help="Connect to the AppIO API without TLS. "
104
+ "Data transmitted between the client and server is not encrypted. "
105
+ "Use this flag only if you understand the risks.",
106
+ )
107
+ parser.add_argument(
108
+ "--flwr-dir",
109
+ default=None,
110
+ help="""The path containing installed Flower Apps.
111
+ By default, this value is equal to:
112
+
113
+ - `$FLWR_HOME/` if `$FLWR_HOME` is defined
114
+ - `$XDG_DATA_HOME/.flwr/` if `$XDG_DATA_HOME` is defined
115
+ - `$HOME/.flwr/` in all other cases
116
+ """,
117
+ )
118
+ parser.add_argument(
119
+ "--parent-pid",
120
+ type=int,
121
+ default=None,
122
+ help="The PID of the parent process. When set, the process will terminate "
123
+ "when the parent process exits.",
124
+ )
125
+ add_args_health(parser)
126
+ return parser
127
+
128
+
129
+ def _get_plugin_and_stub_class(
130
+ plugin_type: str,
131
+ ) -> tuple[type[ExecPlugin], type[object]]:
132
+ """Get the plugin class and stub class based on the plugin type."""
133
+ if plugin_type == ExecPluginType.CLIENT_APP:
134
+ return ClientAppExecPlugin, ClientAppIoStub
135
+ if plugin_type == ExecPluginType.SERVER_APP:
136
+ return ServerAppExecPlugin, ServerAppIoStub
137
+ if plugin_type == ExecPluginType.SIMULATION:
138
+ return SimulationExecPlugin, SimulationIoStub
139
+ if ret := get_ee_plugin_and_stub_class(plugin_type):
140
+ return ret # type: ignore[no-any-return]
141
+ raise ValueError(f"Unknown plugin type: {plugin_type}")