flwr 1.19.0__py3-none-any.whl → 1.21.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- flwr/__init__.py +4 -1
- flwr/app/__init__.py +28 -0
- flwr/app/exception.py +31 -0
- flwr/cli/auth_plugin/oidc_cli_plugin.py +4 -4
- flwr/cli/build.py +15 -5
- flwr/cli/cli_user_auth_interceptor.py +1 -1
- flwr/cli/config_utils.py +3 -3
- flwr/cli/constant.py +25 -8
- flwr/cli/log.py +9 -9
- flwr/cli/login/login.py +3 -3
- flwr/cli/ls.py +5 -5
- flwr/cli/new/new.py +23 -4
- flwr/cli/new/templates/app/README.flowertune.md.tpl +2 -0
- flwr/cli/new/templates/app/README.md.tpl +5 -0
- flwr/cli/new/templates/app/code/__init__.pytorch_msg_api.py.tpl +1 -0
- flwr/cli/new/templates/app/code/client.pytorch_msg_api.py.tpl +80 -0
- flwr/cli/new/templates/app/code/server.pytorch_msg_api.py.tpl +41 -0
- flwr/cli/new/templates/app/code/task.pytorch_msg_api.py.tpl +98 -0
- flwr/cli/new/templates/app/pyproject.baseline.toml.tpl +14 -3
- flwr/cli/new/templates/app/pyproject.flowertune.toml.tpl +13 -1
- flwr/cli/new/templates/app/pyproject.huggingface.toml.tpl +21 -2
- flwr/cli/new/templates/app/pyproject.jax.toml.tpl +18 -1
- flwr/cli/new/templates/app/pyproject.mlx.toml.tpl +19 -2
- flwr/cli/new/templates/app/pyproject.numpy.toml.tpl +18 -1
- flwr/cli/new/templates/app/pyproject.pytorch.toml.tpl +20 -3
- flwr/cli/new/templates/app/pyproject.pytorch_msg_api.toml.tpl +53 -0
- flwr/cli/new/templates/app/pyproject.sklearn.toml.tpl +18 -1
- flwr/cli/new/templates/app/pyproject.tensorflow.toml.tpl +18 -1
- flwr/cli/run/run.py +53 -50
- flwr/cli/stop.py +7 -4
- flwr/cli/utils.py +29 -11
- flwr/client/grpc_adapter_client/connection.py +11 -4
- flwr/client/grpc_rere_client/connection.py +93 -129
- flwr/client/rest_client/connection.py +134 -164
- flwr/clientapp/__init__.py +10 -0
- flwr/clientapp/mod/__init__.py +26 -0
- flwr/clientapp/mod/centraldp_mods.py +132 -0
- flwr/common/args.py +20 -6
- flwr/common/auth_plugin/__init__.py +4 -4
- flwr/common/auth_plugin/auth_plugin.py +7 -7
- flwr/common/constant.py +26 -5
- flwr/common/event_log_plugin/event_log_plugin.py +1 -1
- flwr/common/exit/__init__.py +4 -0
- flwr/common/exit/exit.py +8 -1
- flwr/common/exit/exit_code.py +42 -8
- flwr/common/exit/exit_handler.py +62 -0
- flwr/common/{exit_handlers.py → exit/signal_handler.py} +20 -37
- flwr/common/grpc.py +1 -1
- flwr/common/{inflatable_grpc_utils.py → inflatable_protobuf_utils.py} +52 -10
- flwr/common/inflatable_utils.py +191 -24
- flwr/common/logger.py +1 -1
- flwr/common/record/array.py +101 -22
- flwr/common/record/arraychunk.py +59 -0
- flwr/common/retry_invoker.py +30 -11
- flwr/common/serde.py +0 -28
- flwr/common/telemetry.py +4 -0
- flwr/compat/client/app.py +14 -31
- flwr/compat/server/app.py +2 -2
- flwr/proto/appio_pb2.py +51 -0
- flwr/proto/appio_pb2.pyi +195 -0
- flwr/proto/appio_pb2_grpc.py +4 -0
- flwr/proto/appio_pb2_grpc.pyi +4 -0
- flwr/proto/clientappio_pb2.py +4 -19
- flwr/proto/clientappio_pb2.pyi +0 -125
- flwr/proto/clientappio_pb2_grpc.py +269 -29
- flwr/proto/clientappio_pb2_grpc.pyi +114 -21
- flwr/proto/control_pb2.py +62 -0
- flwr/proto/{exec_pb2_grpc.py → control_pb2_grpc.py} +54 -54
- flwr/proto/{exec_pb2_grpc.pyi → control_pb2_grpc.pyi} +28 -28
- flwr/proto/fleet_pb2.py +12 -20
- flwr/proto/fleet_pb2.pyi +6 -36
- flwr/proto/serverappio_pb2.py +8 -31
- flwr/proto/serverappio_pb2.pyi +0 -152
- flwr/proto/serverappio_pb2_grpc.py +107 -38
- flwr/proto/serverappio_pb2_grpc.pyi +47 -20
- flwr/proto/simulationio_pb2.py +4 -11
- flwr/proto/simulationio_pb2.pyi +0 -58
- flwr/proto/simulationio_pb2_grpc.py +129 -27
- flwr/proto/simulationio_pb2_grpc.pyi +52 -13
- flwr/server/app.py +130 -153
- flwr/server/fleet_event_log_interceptor.py +4 -0
- flwr/server/grid/grpc_grid.py +94 -54
- flwr/server/grid/inmemory_grid.py +1 -0
- flwr/server/serverapp/app.py +165 -144
- flwr/server/superlink/fleet/grpc_adapter/grpc_adapter_servicer.py +8 -0
- flwr/server/superlink/fleet/grpc_rere/fleet_servicer.py +1 -1
- flwr/server/superlink/fleet/grpc_rere/server_interceptor.py +2 -5
- flwr/server/superlink/fleet/message_handler/message_handler.py +10 -16
- flwr/server/superlink/fleet/rest_rere/rest_api.py +1 -2
- flwr/server/superlink/fleet/vce/backend/raybackend.py +3 -1
- flwr/server/superlink/fleet/vce/vce_api.py +6 -6
- flwr/server/superlink/linkstate/in_memory_linkstate.py +34 -0
- flwr/server/superlink/linkstate/linkstate.py +2 -1
- flwr/server/superlink/linkstate/sqlite_linkstate.py +45 -0
- flwr/server/superlink/serverappio/serverappio_grpc.py +2 -2
- flwr/server/superlink/serverappio/serverappio_servicer.py +95 -48
- flwr/server/superlink/simulation/simulationio_grpc.py +1 -1
- flwr/server/superlink/simulation/simulationio_servicer.py +98 -22
- flwr/server/superlink/utils.py +0 -35
- flwr/serverapp/__init__.py +12 -0
- flwr/serverapp/dp_fixed_clipping.py +352 -0
- flwr/serverapp/exception.py +38 -0
- flwr/serverapp/strategy/__init__.py +38 -0
- flwr/serverapp/strategy/dp_fixed_clipping.py +352 -0
- flwr/serverapp/strategy/fedadagrad.py +162 -0
- flwr/serverapp/strategy/fedadam.py +181 -0
- flwr/serverapp/strategy/fedavg.py +295 -0
- flwr/serverapp/strategy/fedopt.py +218 -0
- flwr/serverapp/strategy/fedyogi.py +173 -0
- flwr/serverapp/strategy/result.py +105 -0
- flwr/serverapp/strategy/strategy.py +285 -0
- flwr/serverapp/strategy/strategy_utils.py +251 -0
- flwr/serverapp/strategy/strategy_utils_tests.py +304 -0
- flwr/simulation/app.py +159 -154
- flwr/simulation/run_simulation.py +17 -0
- flwr/supercore/app_utils.py +58 -0
- flwr/supercore/cli/__init__.py +22 -0
- flwr/supercore/cli/flower_superexec.py +141 -0
- flwr/supercore/corestate/__init__.py +22 -0
- flwr/supercore/corestate/corestate.py +81 -0
- flwr/{server/superlink → supercore}/ffs/disk_ffs.py +1 -1
- flwr/supercore/grpc_health/__init__.py +25 -0
- flwr/supercore/grpc_health/health_server.py +53 -0
- flwr/supercore/grpc_health/simple_health_servicer.py +38 -0
- flwr/supercore/license_plugin/__init__.py +22 -0
- flwr/supercore/license_plugin/license_plugin.py +26 -0
- flwr/supercore/object_store/in_memory_object_store.py +31 -31
- flwr/supercore/object_store/object_store.py +20 -42
- flwr/supercore/object_store/utils.py +43 -0
- flwr/{superexec → supercore/superexec}/__init__.py +1 -1
- flwr/supercore/superexec/plugin/__init__.py +28 -0
- flwr/supercore/superexec/plugin/base_exec_plugin.py +53 -0
- flwr/supercore/superexec/plugin/clientapp_exec_plugin.py +28 -0
- flwr/supercore/superexec/plugin/exec_plugin.py +71 -0
- flwr/supercore/superexec/plugin/serverapp_exec_plugin.py +28 -0
- flwr/supercore/superexec/plugin/simulation_exec_plugin.py +28 -0
- flwr/supercore/superexec/run_superexec.py +185 -0
- flwr/supercore/utils.py +32 -0
- flwr/superlink/servicer/__init__.py +15 -0
- flwr/superlink/servicer/control/__init__.py +22 -0
- flwr/{superexec/exec_event_log_interceptor.py → superlink/servicer/control/control_event_log_interceptor.py} +9 -5
- flwr/{superexec/exec_grpc.py → superlink/servicer/control/control_grpc.py} +39 -28
- flwr/superlink/servicer/control/control_license_interceptor.py +82 -0
- flwr/{superexec/exec_servicer.py → superlink/servicer/control/control_servicer.py} +79 -31
- flwr/{superexec/exec_user_auth_interceptor.py → superlink/servicer/control/control_user_auth_interceptor.py} +18 -10
- flwr/supernode/cli/flower_supernode.py +3 -7
- flwr/supernode/cli/flwr_clientapp.py +20 -16
- flwr/supernode/nodestate/in_memory_nodestate.py +13 -4
- flwr/supernode/nodestate/nodestate.py +3 -44
- flwr/supernode/runtime/run_clientapp.py +129 -115
- flwr/supernode/servicer/clientappio/__init__.py +1 -3
- flwr/supernode/servicer/clientappio/clientappio_servicer.py +217 -165
- flwr/supernode/start_client_internal.py +205 -148
- {flwr-1.19.0.dist-info → flwr-1.21.0.dist-info}/METADATA +5 -3
- {flwr-1.19.0.dist-info → flwr-1.21.0.dist-info}/RECORD +161 -117
- {flwr-1.19.0.dist-info → flwr-1.21.0.dist-info}/entry_points.txt +1 -0
- flwr/common/inflatable_rest_utils.py +0 -99
- flwr/proto/exec_pb2.py +0 -62
- flwr/superexec/app.py +0 -45
- flwr/superexec/deployment.py +0 -192
- flwr/superexec/executor.py +0 -100
- flwr/superexec/simulation.py +0 -130
- /flwr/proto/{exec_pb2.pyi → control_pb2.pyi} +0 -0
- /flwr/{server/superlink → supercore}/ffs/__init__.py +0 -0
- /flwr/{server/superlink → supercore}/ffs/ffs.py +0 -0
- /flwr/{server/superlink → supercore}/ffs/ffs_factory.py +0 -0
- {flwr-1.19.0.dist-info → flwr-1.21.0.dist-info}/WHEEL +0 -0
flwr/simulation/app.py
CHANGED
|
@@ -18,7 +18,6 @@
|
|
|
18
18
|
import argparse
|
|
19
19
|
from logging import DEBUG, ERROR, INFO
|
|
20
20
|
from queue import Queue
|
|
21
|
-
from time import sleep
|
|
22
21
|
from typing import Optional
|
|
23
22
|
|
|
24
23
|
from flwr.cli.config_utils import get_fab_metadata
|
|
@@ -35,6 +34,7 @@ from flwr.common.config import (
|
|
|
35
34
|
)
|
|
36
35
|
from flwr.common.constant import (
|
|
37
36
|
SIMULATIONIO_API_DEFAULT_CLIENT_ADDRESS,
|
|
37
|
+
ExecPluginType,
|
|
38
38
|
Status,
|
|
39
39
|
SubStatus,
|
|
40
40
|
)
|
|
@@ -56,19 +56,23 @@ from flwr.common.serde import (
|
|
|
56
56
|
run_status_to_proto,
|
|
57
57
|
)
|
|
58
58
|
from flwr.common.typing import RunStatus
|
|
59
|
+
from flwr.proto.appio_pb2 import ( # pylint: disable=E0611
|
|
60
|
+
PullAppInputsRequest,
|
|
61
|
+
PullAppInputsResponse,
|
|
62
|
+
PushAppOutputsRequest,
|
|
63
|
+
)
|
|
59
64
|
from flwr.proto.run_pb2 import ( # pylint: disable=E0611
|
|
60
65
|
GetFederationOptionsRequest,
|
|
61
66
|
GetFederationOptionsResponse,
|
|
62
67
|
UpdateRunStatusRequest,
|
|
63
68
|
)
|
|
64
|
-
from flwr.proto.
|
|
65
|
-
PullSimulationInputsRequest,
|
|
66
|
-
PullSimulationInputsResponse,
|
|
67
|
-
PushSimulationOutputsRequest,
|
|
68
|
-
)
|
|
69
|
+
from flwr.proto.simulationio_pb2_grpc import SimulationIoStub
|
|
69
70
|
from flwr.server.superlink.fleet.vce.backend.backend import BackendConfig
|
|
70
71
|
from flwr.simulation.run_simulation import _run_simulation
|
|
71
72
|
from flwr.simulation.simulationio_connection import SimulationIoConnection
|
|
73
|
+
from flwr.supercore.app_utils import start_parent_process_monitor
|
|
74
|
+
from flwr.supercore.superexec.plugin import SimulationExecPlugin
|
|
75
|
+
from flwr.supercore.superexec.run_superexec import run_with_deprecation_warning
|
|
72
76
|
|
|
73
77
|
|
|
74
78
|
def flwr_simulation() -> None:
|
|
@@ -79,14 +83,27 @@ def flwr_simulation() -> None:
|
|
|
79
83
|
|
|
80
84
|
args = _parse_args_run_flwr_simulation().parse_args()
|
|
81
85
|
|
|
82
|
-
log(INFO, "Starting Flower Simulation")
|
|
83
|
-
|
|
84
86
|
if not args.insecure:
|
|
85
87
|
flwr_exit(
|
|
86
88
|
ExitCode.COMMON_TLS_NOT_SUPPORTED,
|
|
87
|
-
"`flwr-simulation` does not support TLS yet.
|
|
89
|
+
"`flwr-simulation` does not support TLS yet.",
|
|
90
|
+
)
|
|
91
|
+
|
|
92
|
+
# Disallow long-running `flwr-simulation` processes
|
|
93
|
+
if args.token is None:
|
|
94
|
+
run_with_deprecation_warning(
|
|
95
|
+
cmd="flwr-simulation",
|
|
96
|
+
plugin_type=ExecPluginType.SIMULATION,
|
|
97
|
+
plugin_class=SimulationExecPlugin,
|
|
98
|
+
stub_class=SimulationIoStub,
|
|
99
|
+
appio_api_address=args.simulationio_api_address,
|
|
100
|
+
flwr_dir=args.flwr_dir,
|
|
101
|
+
parent_pid=args.parent_pid,
|
|
102
|
+
warn_run_once=args.run_once,
|
|
88
103
|
)
|
|
104
|
+
return
|
|
89
105
|
|
|
106
|
+
log(INFO, "Starting Flower Simulation")
|
|
90
107
|
log(
|
|
91
108
|
DEBUG,
|
|
92
109
|
"Starting isolated `Simulation` connected to SuperLink SimulationAppIo API "
|
|
@@ -96,23 +113,29 @@ def flwr_simulation() -> None:
|
|
|
96
113
|
run_simulation_process(
|
|
97
114
|
simulationio_api_address=args.simulationio_api_address,
|
|
98
115
|
log_queue=log_queue,
|
|
99
|
-
|
|
116
|
+
token=args.token,
|
|
100
117
|
flwr_dir_=args.flwr_dir,
|
|
101
118
|
certificates=None,
|
|
119
|
+
parent_pid=args.parent_pid,
|
|
102
120
|
)
|
|
103
121
|
|
|
104
122
|
# Restore stdout/stderr
|
|
105
123
|
restore_output()
|
|
106
124
|
|
|
107
125
|
|
|
108
|
-
def run_simulation_process( # pylint: disable=R0914,
|
|
126
|
+
def run_simulation_process( # pylint: disable=R0913, R0914, R0915, R0917, W0212
|
|
109
127
|
simulationio_api_address: str,
|
|
110
128
|
log_queue: Queue[Optional[str]],
|
|
111
|
-
|
|
129
|
+
token: str,
|
|
112
130
|
flwr_dir_: Optional[str] = None,
|
|
113
131
|
certificates: Optional[bytes] = None,
|
|
132
|
+
parent_pid: Optional[int] = None,
|
|
114
133
|
) -> None:
|
|
115
134
|
"""Run Flower Simulation process."""
|
|
135
|
+
# Start monitoring the parent process if a PID is provided
|
|
136
|
+
if parent_pid is not None:
|
|
137
|
+
start_parent_process_monitor(parent_pid)
|
|
138
|
+
|
|
116
139
|
conn = SimulationIoConnection(
|
|
117
140
|
simulationio_service_address=simulationio_api_address,
|
|
118
141
|
root_certificates=certificates,
|
|
@@ -122,158 +145,146 @@ def run_simulation_process( # pylint: disable=R0914, disable=W0212, disable=R09
|
|
|
122
145
|
flwr_dir = get_flwr_dir(flwr_dir_)
|
|
123
146
|
log_uploader = None
|
|
124
147
|
heartbeat_sender = None
|
|
148
|
+
run_status = None
|
|
149
|
+
|
|
150
|
+
try:
|
|
151
|
+
# Pull SimulationInputs from LinkState
|
|
152
|
+
req = PullAppInputsRequest(token=token)
|
|
153
|
+
res: PullAppInputsResponse = conn._stub.PullAppInputs(req)
|
|
154
|
+
context = context_from_proto(res.context)
|
|
155
|
+
run = run_from_proto(res.run)
|
|
156
|
+
fab = fab_from_proto(res.fab)
|
|
157
|
+
|
|
158
|
+
# Start log uploader for this run
|
|
159
|
+
log_uploader = start_log_uploader(
|
|
160
|
+
log_queue=log_queue,
|
|
161
|
+
node_id=context.node_id,
|
|
162
|
+
run_id=run.run_id,
|
|
163
|
+
stub=conn._stub,
|
|
164
|
+
)
|
|
125
165
|
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
try:
|
|
129
|
-
# Pull SimulationInputs from LinkState
|
|
130
|
-
req = PullSimulationInputsRequest()
|
|
131
|
-
res: PullSimulationInputsResponse = conn._stub.PullSimulationInputs(req)
|
|
132
|
-
if not res.HasField("run"):
|
|
133
|
-
sleep(3)
|
|
134
|
-
run_status = None
|
|
135
|
-
continue
|
|
136
|
-
|
|
137
|
-
context = context_from_proto(res.context)
|
|
138
|
-
run = run_from_proto(res.run)
|
|
139
|
-
fab = fab_from_proto(res.fab)
|
|
140
|
-
|
|
141
|
-
# Start log uploader for this run
|
|
142
|
-
log_uploader = start_log_uploader(
|
|
143
|
-
log_queue=log_queue,
|
|
144
|
-
node_id=context.node_id,
|
|
145
|
-
run_id=run.run_id,
|
|
146
|
-
stub=conn._stub,
|
|
147
|
-
)
|
|
148
|
-
|
|
149
|
-
log(DEBUG, "Simulation process starts FAB installation.")
|
|
150
|
-
install_from_fab(fab.content, flwr_dir=flwr_dir, skip_prompt=True)
|
|
151
|
-
|
|
152
|
-
fab_id, fab_version = get_fab_metadata(fab.content)
|
|
166
|
+
log(DEBUG, "Simulation process starts FAB installation.")
|
|
167
|
+
install_from_fab(fab.content, flwr_dir=flwr_dir, skip_prompt=True)
|
|
153
168
|
|
|
154
|
-
|
|
155
|
-
config = get_project_config(app_path)
|
|
169
|
+
fab_id, fab_version = get_fab_metadata(fab.content)
|
|
156
170
|
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
client_app_attr = app_components["clientapp"]
|
|
160
|
-
server_app_attr = app_components["serverapp"]
|
|
161
|
-
fused_config = get_fused_config_from_dir(app_path, run.override_config)
|
|
171
|
+
app_path = get_project_dir(fab_id, fab_version, fab.hash_str, flwr_dir)
|
|
172
|
+
config = get_project_config(app_path)
|
|
162
173
|
|
|
163
|
-
|
|
164
|
-
|
|
174
|
+
# Get ClientApp and SeverApp components
|
|
175
|
+
app_components = config["tool"]["flwr"]["app"]["components"]
|
|
176
|
+
client_app_attr = app_components["clientapp"]
|
|
177
|
+
server_app_attr = app_components["serverapp"]
|
|
178
|
+
fused_config = get_fused_config_from_dir(app_path, run.override_config)
|
|
165
179
|
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
"Flower will load ServerApp `%s` in %s",
|
|
169
|
-
server_app_attr,
|
|
170
|
-
app_path,
|
|
171
|
-
)
|
|
172
|
-
log(
|
|
173
|
-
DEBUG,
|
|
174
|
-
"Flower will load ClientApp `%s` in %s",
|
|
175
|
-
client_app_attr,
|
|
176
|
-
app_path,
|
|
177
|
-
)
|
|
180
|
+
# Update run_config in context
|
|
181
|
+
context.run_config = fused_config
|
|
178
182
|
|
|
179
|
-
|
|
180
|
-
|
|
181
|
-
|
|
182
|
-
|
|
183
|
-
|
|
183
|
+
log(
|
|
184
|
+
DEBUG,
|
|
185
|
+
"Flower will load ServerApp `%s` in %s",
|
|
186
|
+
server_app_attr,
|
|
187
|
+
app_path,
|
|
188
|
+
)
|
|
189
|
+
log(
|
|
190
|
+
DEBUG,
|
|
191
|
+
"Flower will load ClientApp `%s` in %s",
|
|
192
|
+
client_app_attr,
|
|
193
|
+
app_path,
|
|
194
|
+
)
|
|
184
195
|
|
|
185
|
-
|
|
186
|
-
|
|
187
|
-
|
|
188
|
-
)
|
|
189
|
-
|
|
190
|
-
fed_opt_res.federation_options
|
|
191
|
-
)
|
|
196
|
+
# Change status to Running
|
|
197
|
+
run_status_proto = run_status_to_proto(RunStatus(Status.RUNNING, "", ""))
|
|
198
|
+
conn._stub.UpdateRunStatus(
|
|
199
|
+
UpdateRunStatusRequest(run_id=run.run_id, run_status=run_status_proto)
|
|
200
|
+
)
|
|
192
201
|
|
|
193
|
-
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
|
|
200
|
-
|
|
201
|
-
|
|
202
|
-
|
|
203
|
-
|
|
204
|
-
|
|
205
|
-
|
|
206
|
-
|
|
207
|
-
|
|
208
|
-
|
|
209
|
-
|
|
210
|
-
|
|
211
|
-
|
|
212
|
-
|
|
213
|
-
|
|
202
|
+
# Pull Federation Options
|
|
203
|
+
fed_opt_res: GetFederationOptionsResponse = conn._stub.GetFederationOptions(
|
|
204
|
+
GetFederationOptionsRequest(run_id=run.run_id)
|
|
205
|
+
)
|
|
206
|
+
federation_options = config_record_from_proto(fed_opt_res.federation_options)
|
|
207
|
+
|
|
208
|
+
# Unflatten underlying dict
|
|
209
|
+
fed_opt = unflatten_dict({**federation_options})
|
|
210
|
+
|
|
211
|
+
# Extract configs values of interest
|
|
212
|
+
num_supernodes = fed_opt.get("num-supernodes")
|
|
213
|
+
if num_supernodes is None:
|
|
214
|
+
raise ValueError("Federation options expects `num-supernodes` to be set.")
|
|
215
|
+
backend_config: BackendConfig = fed_opt.get("backend", {})
|
|
216
|
+
verbose: bool = fed_opt.get("verbose", False)
|
|
217
|
+
enable_tf_gpu_growth: bool = fed_opt.get("enable_tf_gpu_growth", False)
|
|
218
|
+
|
|
219
|
+
event(
|
|
220
|
+
EventType.FLWR_SIMULATION_RUN_ENTER,
|
|
221
|
+
event_details={
|
|
222
|
+
"backend": "ray",
|
|
223
|
+
"num-supernodes": num_supernodes,
|
|
224
|
+
"run-id-hash": get_sha256_hash(run.run_id),
|
|
225
|
+
},
|
|
226
|
+
)
|
|
214
227
|
|
|
215
|
-
|
|
216
|
-
|
|
217
|
-
|
|
218
|
-
|
|
219
|
-
|
|
220
|
-
|
|
221
|
-
|
|
222
|
-
|
|
223
|
-
|
|
224
|
-
|
|
225
|
-
|
|
226
|
-
|
|
227
|
-
|
|
228
|
-
|
|
229
|
-
|
|
230
|
-
|
|
231
|
-
|
|
232
|
-
|
|
233
|
-
|
|
234
|
-
|
|
235
|
-
|
|
236
|
-
|
|
237
|
-
|
|
238
|
-
|
|
228
|
+
# Set up heartbeat sender
|
|
229
|
+
heartbeat_fn = get_grpc_app_heartbeat_fn(
|
|
230
|
+
conn._stub,
|
|
231
|
+
run.run_id,
|
|
232
|
+
failure_message="Heartbeat failed unexpectedly. The SuperLink could "
|
|
233
|
+
"not find the provided run ID, or the run status is invalid.",
|
|
234
|
+
)
|
|
235
|
+
heartbeat_sender = HeartbeatSender(heartbeat_fn)
|
|
236
|
+
heartbeat_sender.start()
|
|
237
|
+
|
|
238
|
+
# Launch the simulation
|
|
239
|
+
updated_context = _run_simulation(
|
|
240
|
+
server_app_attr=server_app_attr,
|
|
241
|
+
client_app_attr=client_app_attr,
|
|
242
|
+
num_supernodes=num_supernodes,
|
|
243
|
+
backend_config=backend_config,
|
|
244
|
+
app_dir=str(app_path),
|
|
245
|
+
run=run,
|
|
246
|
+
enable_tf_gpu_growth=enable_tf_gpu_growth,
|
|
247
|
+
verbose_logging=verbose,
|
|
248
|
+
server_app_run_config=fused_config,
|
|
249
|
+
is_app=True,
|
|
250
|
+
exit_event=EventType.FLWR_SIMULATION_RUN_LEAVE,
|
|
251
|
+
)
|
|
239
252
|
|
|
240
|
-
|
|
241
|
-
|
|
242
|
-
|
|
243
|
-
|
|
244
|
-
|
|
245
|
-
|
|
253
|
+
# Send resulting context
|
|
254
|
+
context_proto = context_to_proto(updated_context)
|
|
255
|
+
out_req = PushAppOutputsRequest(
|
|
256
|
+
token=token, run_id=run.run_id, context=context_proto
|
|
257
|
+
)
|
|
258
|
+
_ = conn._stub.PushAppOutputs(out_req)
|
|
246
259
|
|
|
247
|
-
|
|
260
|
+
run_status = RunStatus(Status.FINISHED, SubStatus.COMPLETED, "")
|
|
248
261
|
|
|
249
|
-
|
|
250
|
-
|
|
251
|
-
|
|
252
|
-
|
|
262
|
+
except Exception as ex: # pylint: disable=broad-exception-caught
|
|
263
|
+
exc_entity = "Simulation"
|
|
264
|
+
log(ERROR, "%s raised an exception", exc_entity, exc_info=ex)
|
|
265
|
+
run_status = RunStatus(Status.FINISHED, SubStatus.FAILED, str(ex))
|
|
253
266
|
|
|
254
|
-
|
|
255
|
-
|
|
256
|
-
|
|
257
|
-
|
|
258
|
-
heartbeat_sender = None
|
|
267
|
+
finally:
|
|
268
|
+
# Stop heartbeat sender
|
|
269
|
+
if heartbeat_sender:
|
|
270
|
+
heartbeat_sender.stop()
|
|
259
271
|
|
|
260
|
-
|
|
261
|
-
|
|
262
|
-
|
|
263
|
-
log_uploader = None
|
|
272
|
+
# Stop log uploader for this run and upload final logs
|
|
273
|
+
if log_uploader:
|
|
274
|
+
stop_log_uploader(log_queue, log_uploader)
|
|
264
275
|
|
|
265
|
-
|
|
266
|
-
|
|
267
|
-
|
|
268
|
-
|
|
269
|
-
|
|
270
|
-
|
|
271
|
-
)
|
|
272
|
-
)
|
|
276
|
+
# Update run status
|
|
277
|
+
if run_status:
|
|
278
|
+
run_status_proto = run_status_to_proto(run_status)
|
|
279
|
+
conn._stub.UpdateRunStatus(
|
|
280
|
+
UpdateRunStatusRequest(run_id=run.run_id, run_status=run_status_proto)
|
|
281
|
+
)
|
|
273
282
|
|
|
274
|
-
#
|
|
275
|
-
|
|
276
|
-
|
|
283
|
+
# Clean up the Context if it exists
|
|
284
|
+
try:
|
|
285
|
+
del updated_context
|
|
286
|
+
except NameError:
|
|
287
|
+
pass
|
|
277
288
|
|
|
278
289
|
|
|
279
290
|
def _parse_args_run_flwr_simulation() -> argparse.ArgumentParser:
|
|
@@ -288,11 +299,5 @@ def _parse_args_run_flwr_simulation() -> argparse.ArgumentParser:
|
|
|
288
299
|
help="Address of SuperLink's SimulationIO API (IPv4, IPv6, or a domain name)."
|
|
289
300
|
f"By default, it is set to {SIMULATIONIO_API_DEFAULT_CLIENT_ADDRESS}.",
|
|
290
301
|
)
|
|
291
|
-
parser.add_argument(
|
|
292
|
-
"--run-once",
|
|
293
|
-
action="store_true",
|
|
294
|
-
help="When set, this process will start a single simulation "
|
|
295
|
-
"for a pending Run. If no pending run the process will exit. ",
|
|
296
|
-
)
|
|
297
302
|
add_args_flwr_app_common(parser=parser)
|
|
298
303
|
return parser
|
|
@@ -19,6 +19,7 @@ import argparse
|
|
|
19
19
|
import asyncio
|
|
20
20
|
import json
|
|
21
21
|
import logging
|
|
22
|
+
import platform
|
|
22
23
|
import sys
|
|
23
24
|
import threading
|
|
24
25
|
import traceback
|
|
@@ -63,6 +64,18 @@ def _replace_keys(d: Any, match: str, target: str) -> Any:
|
|
|
63
64
|
return d
|
|
64
65
|
|
|
65
66
|
|
|
67
|
+
def _check_ray_support(backend_name: str) -> None:
|
|
68
|
+
if backend_name.lower() == "ray":
|
|
69
|
+
if platform.system() == "Windows":
|
|
70
|
+
log(
|
|
71
|
+
WARNING,
|
|
72
|
+
"Ray support on Windows is experimental "
|
|
73
|
+
"and may not work as expected. "
|
|
74
|
+
"On Windows, Flower Simulations run best in WSL2: "
|
|
75
|
+
"https://learn.microsoft.com/en-us/windows/wsl/about",
|
|
76
|
+
)
|
|
77
|
+
|
|
78
|
+
|
|
66
79
|
# Entry point from CLI
|
|
67
80
|
# pylint: disable=too-many-locals
|
|
68
81
|
def run_simulation_from_cli() -> None:
|
|
@@ -82,6 +95,8 @@ def run_simulation_from_cli() -> None:
|
|
|
82
95
|
code_example='TF_FORCE_GPU_ALLOW_GROWTH="true" flower-simulation <...>',
|
|
83
96
|
)
|
|
84
97
|
|
|
98
|
+
_check_ray_support(args.backend)
|
|
99
|
+
|
|
85
100
|
# Load JSON config
|
|
86
101
|
backend_config_dict = json.loads(args.backend_config)
|
|
87
102
|
|
|
@@ -208,6 +223,8 @@ def run_simulation(
|
|
|
208
223
|
"\n\tflwr.simulation.run_simulationt(...)",
|
|
209
224
|
)
|
|
210
225
|
|
|
226
|
+
_check_ray_support(backend_name)
|
|
227
|
+
|
|
211
228
|
_ = _run_simulation(
|
|
212
229
|
num_supernodes=num_supernodes,
|
|
213
230
|
client_app=client_app,
|
|
@@ -0,0 +1,58 @@
|
|
|
1
|
+
# Copyright 2025 Flower Labs GmbH. All Rights Reserved.
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
# ==============================================================================
|
|
15
|
+
"""Utility functions for app processes."""
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
import os
|
|
19
|
+
import signal
|
|
20
|
+
import threading
|
|
21
|
+
import time
|
|
22
|
+
|
|
23
|
+
if os.name == "nt":
|
|
24
|
+
from ctypes import windll # type: ignore
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
def _pid_exists(pid: int) -> bool:
|
|
28
|
+
"""Check if a process with the given PID exists.
|
|
29
|
+
|
|
30
|
+
This works on Unix-like systems and Windows.
|
|
31
|
+
"""
|
|
32
|
+
# Use `ctypes` to check if the process exists on Windows
|
|
33
|
+
if os.name == "nt":
|
|
34
|
+
handle = windll.kernel32.OpenProcess(0x1000, False, pid)
|
|
35
|
+
if handle:
|
|
36
|
+
windll.kernel32.CloseHandle(handle)
|
|
37
|
+
return True
|
|
38
|
+
return False
|
|
39
|
+
# Use `os.kill` on Unix-like systems
|
|
40
|
+
try:
|
|
41
|
+
os.kill(pid, 0)
|
|
42
|
+
except OSError:
|
|
43
|
+
return False
|
|
44
|
+
return True
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
def start_parent_process_monitor(
|
|
48
|
+
parent_pid: int,
|
|
49
|
+
) -> None:
|
|
50
|
+
"""Monitor the parent process and exit if it terminates."""
|
|
51
|
+
|
|
52
|
+
def monitor() -> None:
|
|
53
|
+
while True:
|
|
54
|
+
time.sleep(0.2)
|
|
55
|
+
if not _pid_exists(parent_pid):
|
|
56
|
+
os.kill(os.getpid(), signal.SIGKILL)
|
|
57
|
+
|
|
58
|
+
threading.Thread(target=monitor, daemon=True).start()
|
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
# Copyright 2025 Flower Labs GmbH. All Rights Reserved.
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
# ==============================================================================
|
|
15
|
+
"""Flower command line interface for shared infrastructure components."""
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
from .flower_superexec import flower_superexec
|
|
19
|
+
|
|
20
|
+
__all__ = [
|
|
21
|
+
"flower_superexec",
|
|
22
|
+
]
|
|
@@ -0,0 +1,141 @@
|
|
|
1
|
+
# Copyright 2025 Flower Labs GmbH. All Rights Reserved.
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
# ==============================================================================
|
|
15
|
+
"""`flower-superexec` command."""
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
import argparse
|
|
19
|
+
from logging import INFO
|
|
20
|
+
from typing import Optional
|
|
21
|
+
|
|
22
|
+
from flwr.common import EventType, event
|
|
23
|
+
from flwr.common.constant import ExecPluginType
|
|
24
|
+
from flwr.common.exit import ExitCode, flwr_exit
|
|
25
|
+
from flwr.common.logger import log
|
|
26
|
+
from flwr.proto.clientappio_pb2_grpc import ClientAppIoStub
|
|
27
|
+
from flwr.proto.serverappio_pb2_grpc import ServerAppIoStub
|
|
28
|
+
from flwr.proto.simulationio_pb2_grpc import SimulationIoStub
|
|
29
|
+
from flwr.supercore.grpc_health import add_args_health
|
|
30
|
+
from flwr.supercore.superexec.plugin import (
|
|
31
|
+
ClientAppExecPlugin,
|
|
32
|
+
ExecPlugin,
|
|
33
|
+
ServerAppExecPlugin,
|
|
34
|
+
SimulationExecPlugin,
|
|
35
|
+
)
|
|
36
|
+
from flwr.supercore.superexec.run_superexec import run_superexec
|
|
37
|
+
|
|
38
|
+
try:
|
|
39
|
+
from flwr.ee.constant import ExecEePluginType
|
|
40
|
+
from flwr.ee.exec_plugin import get_ee_plugin_and_stub_class
|
|
41
|
+
except ImportError:
|
|
42
|
+
|
|
43
|
+
class ExecEePluginType: # type: ignore[no-redef]
|
|
44
|
+
"""SuperExec EE plugin types."""
|
|
45
|
+
|
|
46
|
+
@staticmethod
|
|
47
|
+
def all() -> list[str]:
|
|
48
|
+
"""Return all SuperExec EE plugin types."""
|
|
49
|
+
return []
|
|
50
|
+
|
|
51
|
+
def get_ee_plugin_and_stub_class( # pylint: disable=unused-argument
|
|
52
|
+
plugin_type: str,
|
|
53
|
+
) -> Optional[tuple[type[ExecPlugin], type[object]]]:
|
|
54
|
+
"""Get the EE plugin class and stub class based on the plugin type."""
|
|
55
|
+
return None
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
def flower_superexec() -> None:
|
|
59
|
+
"""Run `flower-superexec` command."""
|
|
60
|
+
args = _parse_args().parse_args()
|
|
61
|
+
if not args.insecure:
|
|
62
|
+
flwr_exit(
|
|
63
|
+
ExitCode.COMMON_TLS_NOT_SUPPORTED,
|
|
64
|
+
"SuperExec does not support TLS yet.",
|
|
65
|
+
)
|
|
66
|
+
|
|
67
|
+
# Log the first message after parsing arguments in case of `--help`
|
|
68
|
+
log(INFO, "Starting Flower SuperExec")
|
|
69
|
+
|
|
70
|
+
# Trigger telemetry event
|
|
71
|
+
event(EventType.RUN_SUPEREXEC_ENTER, {"plugin_type": args.plugin_type})
|
|
72
|
+
|
|
73
|
+
# Get the plugin class and stub class based on the plugin type
|
|
74
|
+
plugin_class, stub_class = _get_plugin_and_stub_class(args.plugin_type)
|
|
75
|
+
run_superexec(
|
|
76
|
+
plugin_class=plugin_class,
|
|
77
|
+
stub_class=stub_class, # type: ignore
|
|
78
|
+
appio_api_address=args.appio_api_address,
|
|
79
|
+
flwr_dir=args.flwr_dir,
|
|
80
|
+
parent_pid=args.parent_pid,
|
|
81
|
+
health_server_address=args.health_server_address,
|
|
82
|
+
)
|
|
83
|
+
|
|
84
|
+
|
|
85
|
+
def _parse_args() -> argparse.ArgumentParser:
|
|
86
|
+
"""Parse `flower-superexec` command line arguments."""
|
|
87
|
+
parser = argparse.ArgumentParser(
|
|
88
|
+
description="Run Flower SuperExec.",
|
|
89
|
+
)
|
|
90
|
+
parser.add_argument(
|
|
91
|
+
"--appio-api-address", type=str, required=True, help="Address of the AppIO API"
|
|
92
|
+
)
|
|
93
|
+
parser.add_argument(
|
|
94
|
+
"--plugin-type",
|
|
95
|
+
type=str,
|
|
96
|
+
choices=ExecPluginType.all() + ExecEePluginType.all(),
|
|
97
|
+
required=True,
|
|
98
|
+
help="The type of plugin to use.",
|
|
99
|
+
)
|
|
100
|
+
parser.add_argument(
|
|
101
|
+
"--insecure",
|
|
102
|
+
action="store_true",
|
|
103
|
+
help="Connect to the AppIO API without TLS. "
|
|
104
|
+
"Data transmitted between the client and server is not encrypted. "
|
|
105
|
+
"Use this flag only if you understand the risks.",
|
|
106
|
+
)
|
|
107
|
+
parser.add_argument(
|
|
108
|
+
"--flwr-dir",
|
|
109
|
+
default=None,
|
|
110
|
+
help="""The path containing installed Flower Apps.
|
|
111
|
+
By default, this value is equal to:
|
|
112
|
+
|
|
113
|
+
- `$FLWR_HOME/` if `$FLWR_HOME` is defined
|
|
114
|
+
- `$XDG_DATA_HOME/.flwr/` if `$XDG_DATA_HOME` is defined
|
|
115
|
+
- `$HOME/.flwr/` in all other cases
|
|
116
|
+
""",
|
|
117
|
+
)
|
|
118
|
+
parser.add_argument(
|
|
119
|
+
"--parent-pid",
|
|
120
|
+
type=int,
|
|
121
|
+
default=None,
|
|
122
|
+
help="The PID of the parent process. When set, the process will terminate "
|
|
123
|
+
"when the parent process exits.",
|
|
124
|
+
)
|
|
125
|
+
add_args_health(parser)
|
|
126
|
+
return parser
|
|
127
|
+
|
|
128
|
+
|
|
129
|
+
def _get_plugin_and_stub_class(
|
|
130
|
+
plugin_type: str,
|
|
131
|
+
) -> tuple[type[ExecPlugin], type[object]]:
|
|
132
|
+
"""Get the plugin class and stub class based on the plugin type."""
|
|
133
|
+
if plugin_type == ExecPluginType.CLIENT_APP:
|
|
134
|
+
return ClientAppExecPlugin, ClientAppIoStub
|
|
135
|
+
if plugin_type == ExecPluginType.SERVER_APP:
|
|
136
|
+
return ServerAppExecPlugin, ServerAppIoStub
|
|
137
|
+
if plugin_type == ExecPluginType.SIMULATION:
|
|
138
|
+
return SimulationExecPlugin, SimulationIoStub
|
|
139
|
+
if ret := get_ee_plugin_and_stub_class(plugin_type):
|
|
140
|
+
return ret # type: ignore[no-any-return]
|
|
141
|
+
raise ValueError(f"Unknown plugin type: {plugin_type}")
|