flwr 1.20.0__py3-none-any.whl → 1.22.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- flwr/__init__.py +4 -1
- flwr/app/__init__.py +28 -0
- flwr/app/exception.py +31 -0
- flwr/cli/app.py +2 -0
- flwr/cli/auth_plugin/oidc_cli_plugin.py +4 -4
- flwr/cli/cli_user_auth_interceptor.py +1 -1
- flwr/cli/config_utils.py +3 -3
- flwr/cli/constant.py +25 -8
- flwr/cli/log.py +9 -9
- flwr/cli/login/login.py +3 -3
- flwr/cli/ls.py +5 -5
- flwr/cli/new/new.py +15 -2
- flwr/cli/new/templates/app/README.flowertune.md.tpl +1 -1
- flwr/cli/new/templates/app/code/__init__.pytorch_legacy_api.py.tpl +1 -0
- flwr/cli/new/templates/app/code/client.baseline.py.tpl +64 -47
- flwr/cli/new/templates/app/code/client.huggingface.py.tpl +68 -30
- flwr/cli/new/templates/app/code/client.jax.py.tpl +63 -42
- flwr/cli/new/templates/app/code/client.mlx.py.tpl +80 -51
- flwr/cli/new/templates/app/code/client.numpy.py.tpl +36 -13
- flwr/cli/new/templates/app/code/client.pytorch.py.tpl +71 -46
- flwr/cli/new/templates/app/code/client.pytorch_legacy_api.py.tpl +55 -0
- flwr/cli/new/templates/app/code/client.sklearn.py.tpl +75 -30
- flwr/cli/new/templates/app/code/client.tensorflow.py.tpl +69 -44
- flwr/cli/new/templates/app/code/client.xgboost.py.tpl +110 -0
- flwr/cli/new/templates/app/code/flwr_tune/client_app.py.tpl +56 -90
- flwr/cli/new/templates/app/code/flwr_tune/models.py.tpl +1 -23
- flwr/cli/new/templates/app/code/flwr_tune/server_app.py.tpl +37 -58
- flwr/cli/new/templates/app/code/flwr_tune/strategy.py.tpl +39 -44
- flwr/cli/new/templates/app/code/model.baseline.py.tpl +0 -14
- flwr/cli/new/templates/app/code/server.baseline.py.tpl +27 -29
- flwr/cli/new/templates/app/code/server.huggingface.py.tpl +23 -19
- flwr/cli/new/templates/app/code/server.jax.py.tpl +27 -14
- flwr/cli/new/templates/app/code/server.mlx.py.tpl +29 -19
- flwr/cli/new/templates/app/code/server.numpy.py.tpl +30 -17
- flwr/cli/new/templates/app/code/server.pytorch.py.tpl +36 -26
- flwr/cli/new/templates/app/code/server.pytorch_legacy_api.py.tpl +31 -0
- flwr/cli/new/templates/app/code/server.sklearn.py.tpl +29 -21
- flwr/cli/new/templates/app/code/server.tensorflow.py.tpl +28 -19
- flwr/cli/new/templates/app/code/server.xgboost.py.tpl +56 -0
- flwr/cli/new/templates/app/code/task.huggingface.py.tpl +16 -20
- flwr/cli/new/templates/app/code/task.jax.py.tpl +1 -1
- flwr/cli/new/templates/app/code/task.numpy.py.tpl +1 -1
- flwr/cli/new/templates/app/code/task.pytorch.py.tpl +14 -27
- flwr/cli/new/templates/app/code/task.pytorch_legacy_api.py.tpl +111 -0
- flwr/cli/new/templates/app/code/task.tensorflow.py.tpl +1 -2
- flwr/cli/new/templates/app/code/task.xgboost.py.tpl +67 -0
- flwr/cli/new/templates/app/pyproject.baseline.toml.tpl +4 -4
- flwr/cli/new/templates/app/pyproject.flowertune.toml.tpl +2 -2
- flwr/cli/new/templates/app/pyproject.huggingface.toml.tpl +4 -4
- flwr/cli/new/templates/app/pyproject.jax.toml.tpl +1 -1
- flwr/cli/new/templates/app/pyproject.mlx.toml.tpl +2 -2
- flwr/cli/new/templates/app/pyproject.numpy.toml.tpl +1 -1
- flwr/cli/new/templates/app/pyproject.pytorch.toml.tpl +3 -3
- flwr/cli/new/templates/app/pyproject.pytorch_legacy_api.toml.tpl +53 -0
- flwr/cli/new/templates/app/pyproject.sklearn.toml.tpl +1 -1
- flwr/cli/new/templates/app/pyproject.tensorflow.toml.tpl +1 -1
- flwr/cli/new/templates/app/pyproject.xgboost.toml.tpl +61 -0
- flwr/cli/pull.py +100 -0
- flwr/cli/run/run.py +9 -13
- flwr/cli/stop.py +7 -4
- flwr/cli/utils.py +36 -8
- flwr/client/grpc_rere_client/connection.py +1 -12
- flwr/client/rest_client/connection.py +3 -0
- flwr/clientapp/__init__.py +10 -0
- flwr/clientapp/mod/__init__.py +29 -0
- flwr/clientapp/mod/centraldp_mods.py +248 -0
- flwr/clientapp/mod/localdp_mod.py +169 -0
- flwr/clientapp/typing.py +22 -0
- flwr/common/args.py +20 -6
- flwr/common/auth_plugin/__init__.py +4 -4
- flwr/common/auth_plugin/auth_plugin.py +7 -7
- flwr/common/constant.py +26 -4
- flwr/common/event_log_plugin/event_log_plugin.py +1 -1
- flwr/common/exit/__init__.py +4 -0
- flwr/common/exit/exit.py +8 -1
- flwr/common/exit/exit_code.py +30 -7
- flwr/common/exit/exit_handler.py +62 -0
- flwr/common/{exit_handlers.py → exit/signal_handler.py} +20 -37
- flwr/common/grpc.py +0 -11
- flwr/common/inflatable_utils.py +1 -1
- flwr/common/logger.py +1 -1
- flwr/common/record/typeddict.py +12 -0
- flwr/common/retry_invoker.py +30 -11
- flwr/common/telemetry.py +4 -0
- flwr/compat/server/app.py +2 -2
- flwr/proto/appio_pb2.py +25 -17
- flwr/proto/appio_pb2.pyi +46 -2
- flwr/proto/clientappio_pb2.py +3 -11
- flwr/proto/clientappio_pb2.pyi +0 -47
- flwr/proto/clientappio_pb2_grpc.py +19 -20
- flwr/proto/clientappio_pb2_grpc.pyi +10 -11
- flwr/proto/control_pb2.py +66 -0
- flwr/proto/{exec_pb2.pyi → control_pb2.pyi} +24 -0
- flwr/proto/{exec_pb2_grpc.py → control_pb2_grpc.py} +88 -54
- flwr/proto/control_pb2_grpc.pyi +106 -0
- flwr/proto/serverappio_pb2.py +2 -2
- flwr/proto/serverappio_pb2_grpc.py +68 -0
- flwr/proto/serverappio_pb2_grpc.pyi +26 -0
- flwr/proto/simulationio_pb2.py +4 -11
- flwr/proto/simulationio_pb2.pyi +0 -58
- flwr/proto/simulationio_pb2_grpc.py +129 -27
- flwr/proto/simulationio_pb2_grpc.pyi +52 -13
- flwr/server/app.py +142 -152
- flwr/server/grid/grpc_grid.py +3 -0
- flwr/server/grid/inmemory_grid.py +1 -0
- flwr/server/serverapp/app.py +157 -146
- flwr/server/superlink/fleet/vce/backend/raybackend.py +3 -1
- flwr/server/superlink/fleet/vce/vce_api.py +6 -6
- flwr/server/superlink/linkstate/in_memory_linkstate.py +34 -0
- flwr/server/superlink/linkstate/linkstate.py +2 -1
- flwr/server/superlink/linkstate/sqlite_linkstate.py +45 -0
- flwr/server/superlink/serverappio/serverappio_grpc.py +1 -1
- flwr/server/superlink/serverappio/serverappio_servicer.py +61 -6
- flwr/server/superlink/simulation/simulationio_servicer.py +97 -21
- flwr/serverapp/__init__.py +12 -0
- flwr/serverapp/exception.py +38 -0
- flwr/serverapp/strategy/__init__.py +64 -0
- flwr/serverapp/strategy/bulyan.py +238 -0
- flwr/serverapp/strategy/dp_adaptive_clipping.py +335 -0
- flwr/serverapp/strategy/dp_fixed_clipping.py +374 -0
- flwr/serverapp/strategy/fedadagrad.py +159 -0
- flwr/serverapp/strategy/fedadam.py +178 -0
- flwr/serverapp/strategy/fedavg.py +320 -0
- flwr/serverapp/strategy/fedavgm.py +198 -0
- flwr/serverapp/strategy/fedmedian.py +105 -0
- flwr/serverapp/strategy/fedopt.py +218 -0
- flwr/serverapp/strategy/fedprox.py +174 -0
- flwr/serverapp/strategy/fedtrimmedavg.py +176 -0
- flwr/serverapp/strategy/fedxgb_bagging.py +117 -0
- flwr/serverapp/strategy/fedxgb_cyclic.py +220 -0
- flwr/serverapp/strategy/fedyogi.py +170 -0
- flwr/serverapp/strategy/krum.py +112 -0
- flwr/serverapp/strategy/multikrum.py +247 -0
- flwr/serverapp/strategy/qfedavg.py +252 -0
- flwr/serverapp/strategy/result.py +105 -0
- flwr/serverapp/strategy/strategy.py +285 -0
- flwr/serverapp/strategy/strategy_utils.py +299 -0
- flwr/simulation/app.py +161 -164
- flwr/simulation/run_simulation.py +25 -30
- flwr/supercore/app_utils.py +58 -0
- flwr/{supernode/scheduler → supercore/cli}/__init__.py +3 -3
- flwr/supercore/cli/flower_superexec.py +166 -0
- flwr/supercore/constant.py +19 -0
- flwr/supercore/{scheduler → corestate}/__init__.py +3 -3
- flwr/supercore/corestate/corestate.py +81 -0
- flwr/supercore/grpc_health/__init__.py +3 -0
- flwr/supercore/grpc_health/health_server.py +53 -0
- flwr/supercore/grpc_health/simple_health_servicer.py +2 -2
- flwr/{superexec → supercore/superexec}/__init__.py +1 -1
- flwr/supercore/superexec/plugin/__init__.py +28 -0
- flwr/{supernode/scheduler/simple_clientapp_scheduler_plugin.py → supercore/superexec/plugin/base_exec_plugin.py} +10 -6
- flwr/supercore/superexec/plugin/clientapp_exec_plugin.py +28 -0
- flwr/supercore/{scheduler/plugin.py → superexec/plugin/exec_plugin.py} +15 -5
- flwr/supercore/superexec/plugin/serverapp_exec_plugin.py +28 -0
- flwr/supercore/superexec/plugin/simulation_exec_plugin.py +28 -0
- flwr/supercore/superexec/run_superexec.py +199 -0
- flwr/superlink/artifact_provider/__init__.py +22 -0
- flwr/superlink/artifact_provider/artifact_provider.py +37 -0
- flwr/superlink/servicer/__init__.py +15 -0
- flwr/superlink/servicer/control/__init__.py +22 -0
- flwr/{superexec/exec_event_log_interceptor.py → superlink/servicer/control/control_event_log_interceptor.py} +7 -7
- flwr/{superexec/exec_grpc.py → superlink/servicer/control/control_grpc.py} +27 -29
- flwr/{superexec/exec_license_interceptor.py → superlink/servicer/control/control_license_interceptor.py} +6 -6
- flwr/{superexec/exec_servicer.py → superlink/servicer/control/control_servicer.py} +127 -31
- flwr/{superexec/exec_user_auth_interceptor.py → superlink/servicer/control/control_user_auth_interceptor.py} +10 -10
- flwr/supernode/cli/flower_supernode.py +3 -0
- flwr/supernode/cli/flwr_clientapp.py +18 -21
- flwr/supernode/nodestate/in_memory_nodestate.py +2 -2
- flwr/supernode/nodestate/nodestate.py +3 -59
- flwr/supernode/runtime/run_clientapp.py +39 -102
- flwr/supernode/servicer/clientappio/clientappio_servicer.py +10 -17
- flwr/supernode/start_client_internal.py +35 -76
- {flwr-1.20.0.dist-info → flwr-1.22.0.dist-info}/METADATA +9 -18
- {flwr-1.20.0.dist-info → flwr-1.22.0.dist-info}/RECORD +176 -128
- {flwr-1.20.0.dist-info → flwr-1.22.0.dist-info}/entry_points.txt +1 -0
- flwr/proto/exec_pb2.py +0 -62
- flwr/proto/exec_pb2_grpc.pyi +0 -93
- flwr/superexec/app.py +0 -45
- flwr/superexec/deployment.py +0 -191
- flwr/superexec/executor.py +0 -100
- flwr/superexec/simulation.py +0 -129
- {flwr-1.20.0.dist-info → flwr-1.22.0.dist-info}/WHEEL +0 -0
flwr/server/serverapp/app.py
CHANGED
|
@@ -16,13 +16,12 @@
|
|
|
16
16
|
|
|
17
17
|
|
|
18
18
|
import argparse
|
|
19
|
-
import gc
|
|
20
19
|
from logging import DEBUG, ERROR, INFO
|
|
21
20
|
from pathlib import Path
|
|
22
21
|
from queue import Queue
|
|
23
|
-
from time import sleep
|
|
24
22
|
from typing import Optional
|
|
25
23
|
|
|
24
|
+
from flwr.app.exception import AppExitException
|
|
26
25
|
from flwr.cli.config_utils import get_fab_metadata
|
|
27
26
|
from flwr.cli.install import install_from_fab
|
|
28
27
|
from flwr.cli.utils import get_sha256_hash
|
|
@@ -35,10 +34,11 @@ from flwr.common.config import (
|
|
|
35
34
|
)
|
|
36
35
|
from flwr.common.constant import (
|
|
37
36
|
SERVERAPPIO_API_DEFAULT_CLIENT_ADDRESS,
|
|
37
|
+
ExecPluginType,
|
|
38
38
|
Status,
|
|
39
39
|
SubStatus,
|
|
40
40
|
)
|
|
41
|
-
from flwr.common.exit import ExitCode, flwr_exit
|
|
41
|
+
from flwr.common.exit import ExitCode, add_exit_handler, flwr_exit
|
|
42
42
|
from flwr.common.heartbeat import HeartbeatSender, get_grpc_app_heartbeat_fn
|
|
43
43
|
from flwr.common.logger import (
|
|
44
44
|
log,
|
|
@@ -62,8 +62,12 @@ from flwr.proto.appio_pb2 import ( # pylint: disable=E0611
|
|
|
62
62
|
PushAppOutputsRequest,
|
|
63
63
|
)
|
|
64
64
|
from flwr.proto.run_pb2 import UpdateRunStatusRequest # pylint: disable=E0611
|
|
65
|
+
from flwr.proto.serverappio_pb2_grpc import ServerAppIoStub
|
|
65
66
|
from flwr.server.grid.grpc_grid import GrpcGrid
|
|
66
67
|
from flwr.server.run_serverapp import run as run_
|
|
68
|
+
from flwr.supercore.app_utils import start_parent_process_monitor
|
|
69
|
+
from flwr.supercore.superexec.plugin import ServerAppExecPlugin
|
|
70
|
+
from flwr.supercore.superexec.run_superexec import run_with_deprecation_warning
|
|
67
71
|
|
|
68
72
|
|
|
69
73
|
def flwr_serverapp() -> None:
|
|
@@ -74,14 +78,27 @@ def flwr_serverapp() -> None:
|
|
|
74
78
|
|
|
75
79
|
args = _parse_args_run_flwr_serverapp().parse_args()
|
|
76
80
|
|
|
77
|
-
log(INFO, "Start `flwr-serverapp` process")
|
|
78
|
-
|
|
79
81
|
if not args.insecure:
|
|
80
82
|
flwr_exit(
|
|
81
83
|
ExitCode.COMMON_TLS_NOT_SUPPORTED,
|
|
82
84
|
"`flwr-serverapp` does not support TLS yet.",
|
|
83
85
|
)
|
|
84
86
|
|
|
87
|
+
# Disallow long-running `flwr-serverapp` processes
|
|
88
|
+
if args.token is None:
|
|
89
|
+
run_with_deprecation_warning(
|
|
90
|
+
cmd="flwr-serverapp",
|
|
91
|
+
plugin_type=ExecPluginType.SERVER_APP,
|
|
92
|
+
plugin_class=ServerAppExecPlugin,
|
|
93
|
+
stub_class=ServerAppIoStub,
|
|
94
|
+
appio_api_address=args.serverappio_api_address,
|
|
95
|
+
flwr_dir=args.flwr_dir,
|
|
96
|
+
parent_pid=args.parent_pid,
|
|
97
|
+
warn_run_once=args.run_once,
|
|
98
|
+
)
|
|
99
|
+
return
|
|
100
|
+
|
|
101
|
+
log(INFO, "Start `flwr-serverapp` process")
|
|
85
102
|
log(
|
|
86
103
|
DEBUG,
|
|
87
104
|
"`flwr-serverapp` will attempt to connect to SuperLink's "
|
|
@@ -91,177 +108,177 @@ def flwr_serverapp() -> None:
|
|
|
91
108
|
run_serverapp(
|
|
92
109
|
serverappio_api_address=args.serverappio_api_address,
|
|
93
110
|
log_queue=log_queue,
|
|
94
|
-
|
|
111
|
+
token=args.token,
|
|
95
112
|
flwr_dir=args.flwr_dir,
|
|
96
113
|
certificates=None,
|
|
114
|
+
parent_pid=args.parent_pid,
|
|
97
115
|
)
|
|
98
116
|
|
|
99
117
|
# Restore stdout/stderr
|
|
100
118
|
restore_output()
|
|
101
119
|
|
|
102
120
|
|
|
103
|
-
def run_serverapp( # pylint: disable=R0914,
|
|
121
|
+
def run_serverapp( # pylint: disable=R0913, R0914, R0915, R0917, W0212
|
|
104
122
|
serverappio_api_address: str,
|
|
105
123
|
log_queue: Queue[Optional[str]],
|
|
106
|
-
|
|
124
|
+
token: str,
|
|
107
125
|
flwr_dir: Optional[str] = None,
|
|
108
126
|
certificates: Optional[bytes] = None,
|
|
127
|
+
parent_pid: Optional[int] = None,
|
|
109
128
|
) -> None:
|
|
110
129
|
"""Run Flower ServerApp process."""
|
|
130
|
+
# Monitor the main process in case of SIGKILL
|
|
131
|
+
if parent_pid is not None:
|
|
132
|
+
start_parent_process_monitor(parent_pid)
|
|
133
|
+
|
|
111
134
|
# Resolve directory where FABs are installed
|
|
112
135
|
flwr_dir_ = get_flwr_dir(flwr_dir)
|
|
113
136
|
log_uploader = None
|
|
114
|
-
success = True
|
|
115
137
|
hash_run_id = None
|
|
116
138
|
run_status = None
|
|
117
139
|
heartbeat_sender = None
|
|
118
140
|
grid = None
|
|
119
141
|
context = None
|
|
120
|
-
|
|
142
|
+
exit_code = ExitCode.SUCCESS
|
|
121
143
|
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
144
|
+
def on_exit() -> None:
|
|
145
|
+
# Stop heartbeat sender
|
|
146
|
+
if heartbeat_sender:
|
|
147
|
+
heartbeat_sender.stop()
|
|
148
|
+
|
|
149
|
+
# Stop log uploader for this run and upload final logs
|
|
150
|
+
if log_uploader:
|
|
151
|
+
stop_log_uploader(log_queue, log_uploader)
|
|
128
152
|
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
sleep(3)
|
|
135
|
-
run_status = None
|
|
136
|
-
continue
|
|
137
|
-
|
|
138
|
-
context = context_from_proto(res.context)
|
|
139
|
-
run = run_from_proto(res.run)
|
|
140
|
-
fab = fab_from_proto(res.fab)
|
|
141
|
-
|
|
142
|
-
hash_run_id = get_sha256_hash(run.run_id)
|
|
143
|
-
|
|
144
|
-
grid.set_run(run.run_id)
|
|
145
|
-
|
|
146
|
-
# Start log uploader for this run
|
|
147
|
-
log_uploader = start_log_uploader(
|
|
148
|
-
log_queue=log_queue,
|
|
149
|
-
node_id=0,
|
|
150
|
-
run_id=run.run_id,
|
|
151
|
-
stub=grid._stub,
|
|
153
|
+
# Update run status
|
|
154
|
+
if run_status and grid:
|
|
155
|
+
run_status_proto = run_status_to_proto(run_status)
|
|
156
|
+
grid._stub.UpdateRunStatus(
|
|
157
|
+
UpdateRunStatusRequest(run_id=run.run_id, run_status=run_status_proto)
|
|
152
158
|
)
|
|
153
159
|
|
|
154
|
-
|
|
155
|
-
|
|
160
|
+
# Close the Grpc connection
|
|
161
|
+
if grid:
|
|
162
|
+
grid.close()
|
|
156
163
|
|
|
157
|
-
|
|
164
|
+
add_exit_handler(on_exit)
|
|
158
165
|
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
|
|
166
|
+
try:
|
|
167
|
+
# Initialize the GrpcGrid
|
|
168
|
+
grid = GrpcGrid(
|
|
169
|
+
serverappio_service_address=serverappio_api_address,
|
|
170
|
+
root_certificates=certificates,
|
|
171
|
+
)
|
|
163
172
|
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
|
|
173
|
+
# Pull ServerAppInputs from LinkState
|
|
174
|
+
req = PullAppInputsRequest(token=token)
|
|
175
|
+
log(DEBUG, "[flwr-serverapp] Pull ServerAppInputs")
|
|
176
|
+
res: PullAppInputsResponse = grid._stub.PullAppInputs(req)
|
|
177
|
+
context = context_from_proto(res.context)
|
|
178
|
+
run = run_from_proto(res.run)
|
|
179
|
+
fab = fab_from_proto(res.fab)
|
|
169
180
|
|
|
170
|
-
|
|
171
|
-
context.run_config = server_app_run_config
|
|
181
|
+
hash_run_id = get_sha256_hash(run.run_id)
|
|
172
182
|
|
|
173
|
-
|
|
174
|
-
DEBUG,
|
|
175
|
-
"[flwr-serverapp] Will load ServerApp `%s` in %s",
|
|
176
|
-
server_app_attr,
|
|
177
|
-
app_path,
|
|
178
|
-
)
|
|
183
|
+
grid.set_run(run.run_id)
|
|
179
184
|
|
|
180
|
-
|
|
181
|
-
|
|
182
|
-
|
|
183
|
-
|
|
184
|
-
|
|
185
|
+
# Start log uploader for this run
|
|
186
|
+
log_uploader = start_log_uploader(
|
|
187
|
+
log_queue=log_queue,
|
|
188
|
+
node_id=0,
|
|
189
|
+
run_id=run.run_id,
|
|
190
|
+
stub=grid._stub,
|
|
191
|
+
)
|
|
185
192
|
|
|
186
|
-
|
|
187
|
-
|
|
188
|
-
event_details={"run-id-hash": hash_run_id},
|
|
189
|
-
)
|
|
193
|
+
log(DEBUG, "[flwr-serverapp] Start FAB installation.")
|
|
194
|
+
install_from_fab(fab.content, flwr_dir=flwr_dir_, skip_prompt=True)
|
|
190
195
|
|
|
191
|
-
|
|
192
|
-
heartbeat_fn = get_grpc_app_heartbeat_fn(
|
|
193
|
-
grid._stub,
|
|
194
|
-
run.run_id,
|
|
195
|
-
failure_message="Heartbeat failed unexpectedly. The SuperLink could "
|
|
196
|
-
"not find the provided run ID, or the run status is invalid.",
|
|
197
|
-
)
|
|
198
|
-
heartbeat_sender = HeartbeatSender(heartbeat_fn)
|
|
199
|
-
heartbeat_sender.start()
|
|
200
|
-
|
|
201
|
-
# Load and run the ServerApp with the Grid
|
|
202
|
-
updated_context = run_(
|
|
203
|
-
grid=grid,
|
|
204
|
-
server_app_dir=app_path,
|
|
205
|
-
server_app_attr=server_app_attr,
|
|
206
|
-
context=context,
|
|
207
|
-
)
|
|
196
|
+
fab_id, fab_version = get_fab_metadata(fab.content)
|
|
208
197
|
|
|
209
|
-
|
|
210
|
-
|
|
211
|
-
|
|
212
|
-
|
|
213
|
-
|
|
214
|
-
|
|
215
|
-
|
|
216
|
-
|
|
217
|
-
log(INFO, "")
|
|
218
|
-
log(INFO, "Run ID %s stopped.", run.run_id)
|
|
219
|
-
log(INFO, "")
|
|
220
|
-
run_status = None
|
|
221
|
-
success = False
|
|
222
|
-
|
|
223
|
-
except Exception as ex: # pylint: disable=broad-exception-caught
|
|
224
|
-
exc_entity = "ServerApp"
|
|
225
|
-
log(ERROR, "%s raised an exception", exc_entity, exc_info=ex)
|
|
226
|
-
run_status = RunStatus(Status.FINISHED, SubStatus.FAILED, str(ex))
|
|
227
|
-
success = False
|
|
228
|
-
|
|
229
|
-
finally:
|
|
230
|
-
# Stop heartbeat sender
|
|
231
|
-
if heartbeat_sender:
|
|
232
|
-
heartbeat_sender.stop()
|
|
233
|
-
heartbeat_sender = None
|
|
234
|
-
|
|
235
|
-
# Stop log uploader for this run and upload final logs
|
|
236
|
-
if log_uploader:
|
|
237
|
-
stop_log_uploader(log_queue, log_uploader)
|
|
238
|
-
log_uploader = None
|
|
239
|
-
|
|
240
|
-
# Update run status
|
|
241
|
-
if run_status and grid:
|
|
242
|
-
run_status_proto = run_status_to_proto(run_status)
|
|
243
|
-
grid._stub.UpdateRunStatus(
|
|
244
|
-
UpdateRunStatusRequest(
|
|
245
|
-
run_id=run.run_id, run_status=run_status_proto
|
|
246
|
-
)
|
|
247
|
-
)
|
|
248
|
-
|
|
249
|
-
# Close the Grpc connection
|
|
250
|
-
if grid:
|
|
251
|
-
grid.close()
|
|
252
|
-
|
|
253
|
-
# Clean up the Context
|
|
254
|
-
context = None
|
|
255
|
-
gc.collect()
|
|
256
|
-
|
|
257
|
-
event(
|
|
258
|
-
EventType.FLWR_SERVERAPP_RUN_LEAVE,
|
|
259
|
-
event_details={"run-id-hash": hash_run_id, "success": success},
|
|
260
|
-
)
|
|
198
|
+
app_path = str(get_project_dir(fab_id, fab_version, fab.hash_str, flwr_dir_))
|
|
199
|
+
config = get_project_config(app_path)
|
|
200
|
+
|
|
201
|
+
# Obtain server app reference and the run config
|
|
202
|
+
server_app_attr = config["tool"]["flwr"]["app"]["components"]["serverapp"]
|
|
203
|
+
server_app_run_config = get_fused_config_from_dir(
|
|
204
|
+
Path(app_path), run.override_config
|
|
205
|
+
)
|
|
261
206
|
|
|
262
|
-
#
|
|
263
|
-
|
|
264
|
-
|
|
207
|
+
# Update run_config in context
|
|
208
|
+
context.run_config = server_app_run_config
|
|
209
|
+
|
|
210
|
+
log(
|
|
211
|
+
DEBUG,
|
|
212
|
+
"[flwr-serverapp] Will load ServerApp `%s` in %s",
|
|
213
|
+
server_app_attr,
|
|
214
|
+
app_path,
|
|
215
|
+
)
|
|
216
|
+
|
|
217
|
+
# Change status to Running
|
|
218
|
+
run_status_proto = run_status_to_proto(RunStatus(Status.RUNNING, "", ""))
|
|
219
|
+
grid._stub.UpdateRunStatus(
|
|
220
|
+
UpdateRunStatusRequest(run_id=run.run_id, run_status=run_status_proto)
|
|
221
|
+
)
|
|
222
|
+
|
|
223
|
+
event(
|
|
224
|
+
EventType.FLWR_SERVERAPP_RUN_ENTER,
|
|
225
|
+
event_details={"run-id-hash": hash_run_id},
|
|
226
|
+
)
|
|
227
|
+
|
|
228
|
+
# Set up heartbeat sender
|
|
229
|
+
heartbeat_fn = get_grpc_app_heartbeat_fn(
|
|
230
|
+
grid._stub,
|
|
231
|
+
run.run_id,
|
|
232
|
+
failure_message="Heartbeat failed unexpectedly. The SuperLink could "
|
|
233
|
+
"not find the provided run ID, or the run status is invalid.",
|
|
234
|
+
)
|
|
235
|
+
heartbeat_sender = HeartbeatSender(heartbeat_fn)
|
|
236
|
+
heartbeat_sender.start()
|
|
237
|
+
|
|
238
|
+
# Load and run the ServerApp with the Grid
|
|
239
|
+
updated_context = run_(
|
|
240
|
+
grid=grid,
|
|
241
|
+
server_app_dir=app_path,
|
|
242
|
+
server_app_attr=server_app_attr,
|
|
243
|
+
context=context,
|
|
244
|
+
)
|
|
245
|
+
|
|
246
|
+
# Send resulting context
|
|
247
|
+
context_proto = context_to_proto(updated_context)
|
|
248
|
+
log(DEBUG, "[flwr-serverapp] Will push ServerAppOutputs")
|
|
249
|
+
out_req = PushAppOutputsRequest(
|
|
250
|
+
token=token, run_id=run.run_id, context=context_proto
|
|
251
|
+
)
|
|
252
|
+
_ = grid._stub.PushAppOutputs(out_req)
|
|
253
|
+
|
|
254
|
+
run_status = RunStatus(Status.FINISHED, SubStatus.COMPLETED, "")
|
|
255
|
+
|
|
256
|
+
# Raised when the run is already stopped by the user
|
|
257
|
+
except RunNotRunningException:
|
|
258
|
+
log(INFO, "")
|
|
259
|
+
log(INFO, "Run ID %s stopped.", run.run_id)
|
|
260
|
+
log(INFO, "")
|
|
261
|
+
run_status = None
|
|
262
|
+
# No need to update the exit code since this is expected behavior
|
|
263
|
+
|
|
264
|
+
except Exception as ex: # pylint: disable=broad-exception-caught
|
|
265
|
+
exc_entity = "ServerApp"
|
|
266
|
+
log(ERROR, "%s raised an exception", exc_entity, exc_info=ex)
|
|
267
|
+
run_status = RunStatus(Status.FINISHED, SubStatus.FAILED, str(ex))
|
|
268
|
+
|
|
269
|
+
# Set exit code
|
|
270
|
+
exit_code = ExitCode.SERVERAPP_EXCEPTION # General exit code
|
|
271
|
+
if isinstance(ex, AppExitException):
|
|
272
|
+
exit_code = ex.exit_code
|
|
273
|
+
|
|
274
|
+
flwr_exit(
|
|
275
|
+
code=exit_code,
|
|
276
|
+
event_type=EventType.FLWR_SERVERAPP_RUN_LEAVE,
|
|
277
|
+
event_details={
|
|
278
|
+
"run-id-hash": hash_run_id,
|
|
279
|
+
"success": exit_code == ExitCode.SUCCESS,
|
|
280
|
+
},
|
|
281
|
+
)
|
|
265
282
|
|
|
266
283
|
|
|
267
284
|
def _parse_args_run_flwr_serverapp() -> argparse.ArgumentParser:
|
|
@@ -276,11 +293,5 @@ def _parse_args_run_flwr_serverapp() -> argparse.ArgumentParser:
|
|
|
276
293
|
help="Address of SuperLink's ServerAppIo API (IPv4, IPv6, or a domain name)."
|
|
277
294
|
f"By default, it is set to {SERVERAPPIO_API_DEFAULT_CLIENT_ADDRESS}.",
|
|
278
295
|
)
|
|
279
|
-
parser.add_argument(
|
|
280
|
-
"--run-once",
|
|
281
|
-
action="store_true",
|
|
282
|
-
help="When set, this process will start a single ServerApp for a pending Run. "
|
|
283
|
-
"If there is no pending Run, the process will exit.",
|
|
284
|
-
)
|
|
285
296
|
add_args_flwr_app_common(parser=parser)
|
|
286
297
|
return parser
|
|
@@ -161,6 +161,7 @@ class RayBackend(Backend):
|
|
|
161
161
|
"Call the backend's `build()` method before processing messages."
|
|
162
162
|
)
|
|
163
163
|
|
|
164
|
+
future = None
|
|
164
165
|
try:
|
|
165
166
|
# Submit a task to the pool
|
|
166
167
|
future = self.pool.submit(
|
|
@@ -183,7 +184,8 @@ class RayBackend(Backend):
|
|
|
183
184
|
self.__class__.__name__,
|
|
184
185
|
)
|
|
185
186
|
# add actor back into pool
|
|
186
|
-
|
|
187
|
+
if future is not None:
|
|
188
|
+
self.pool.add_actor_back_to_pool(future)
|
|
187
189
|
raise ex
|
|
188
190
|
|
|
189
191
|
def terminate(self) -> None:
|
|
@@ -23,7 +23,6 @@ from concurrent.futures import ThreadPoolExecutor
|
|
|
23
23
|
from logging import DEBUG, ERROR, INFO, WARN
|
|
24
24
|
from pathlib import Path
|
|
25
25
|
from queue import Empty, Queue
|
|
26
|
-
from time import sleep
|
|
27
26
|
from typing import Callable, Optional
|
|
28
27
|
from uuid import uuid4
|
|
29
28
|
|
|
@@ -153,7 +152,7 @@ def add_messages_to_queue(
|
|
|
153
152
|
message_ins_list = state.get_message_ins(node_id=node_id, limit=1)
|
|
154
153
|
for msg in message_ins_list:
|
|
155
154
|
queue.put(msg)
|
|
156
|
-
|
|
155
|
+
f_stop.wait(0.1)
|
|
157
156
|
|
|
158
157
|
|
|
159
158
|
def put_message_into_state(
|
|
@@ -182,6 +181,7 @@ def run_api(
|
|
|
182
181
|
messageins_queue: Queue[Message] = Queue()
|
|
183
182
|
messageres_queue: Queue[Message] = Queue()
|
|
184
183
|
|
|
184
|
+
backend = None
|
|
185
185
|
try:
|
|
186
186
|
|
|
187
187
|
# Instantiate backend
|
|
@@ -236,16 +236,16 @@ def run_api(
|
|
|
236
236
|
log(ERROR, traceback.format_exc())
|
|
237
237
|
log(WARN, "Stopping Simulation Engine.")
|
|
238
238
|
|
|
239
|
-
# Manually trigger stopping event
|
|
240
|
-
f_stop.set()
|
|
241
|
-
|
|
242
239
|
# Raise exception
|
|
243
240
|
raise RuntimeError("Simulation Engine crashed.") from ex
|
|
244
241
|
|
|
245
242
|
finally:
|
|
243
|
+
# Manually trigger stopping event
|
|
244
|
+
f_stop.set()
|
|
246
245
|
|
|
247
246
|
# Terminate backend
|
|
248
|
-
backend
|
|
247
|
+
if backend is not None:
|
|
248
|
+
backend.terminate()
|
|
249
249
|
|
|
250
250
|
|
|
251
251
|
# pylint: disable=too-many-arguments,unused-argument,too-many-locals,too-many-branches
|
|
@@ -15,6 +15,7 @@
|
|
|
15
15
|
"""In-memory LinkState implementation."""
|
|
16
16
|
|
|
17
17
|
|
|
18
|
+
import secrets
|
|
18
19
|
import threading
|
|
19
20
|
import time
|
|
20
21
|
from bisect import bisect_right
|
|
@@ -25,6 +26,7 @@ from typing import Optional
|
|
|
25
26
|
|
|
26
27
|
from flwr.common import Context, Message, log, now
|
|
27
28
|
from flwr.common.constant import (
|
|
29
|
+
FLWR_APP_TOKEN_LENGTH,
|
|
28
30
|
HEARTBEAT_MAX_INTERVAL,
|
|
29
31
|
HEARTBEAT_PATIENCE,
|
|
30
32
|
MESSAGE_TTL_TOLERANCE,
|
|
@@ -80,6 +82,11 @@ class InMemoryLinkState(LinkState): # pylint: disable=R0902,R0904
|
|
|
80
82
|
self.message_res_store: dict[str, Message] = {}
|
|
81
83
|
self.message_ins_id_to_message_res_id: dict[str, str] = {}
|
|
82
84
|
|
|
85
|
+
# Store run ID to token mapping and token to run ID mapping
|
|
86
|
+
self.token_store: dict[int, str] = {}
|
|
87
|
+
self.token_to_run_id: dict[str, int] = {}
|
|
88
|
+
self.lock_token_store = threading.Lock()
|
|
89
|
+
|
|
83
90
|
# Map flwr_aid to run_ids for O(1) reverse index lookup
|
|
84
91
|
self.flwr_aid_to_run_ids: dict[str, set[int]] = defaultdict(set)
|
|
85
92
|
|
|
@@ -678,3 +685,30 @@ class InMemoryLinkState(LinkState): # pylint: disable=R0902,R0904
|
|
|
678
685
|
index = bisect_right(run.logs, (after_timestamp, ""))
|
|
679
686
|
latest_timestamp = run.logs[-1][0] if index < len(run.logs) else 0.0
|
|
680
687
|
return "".join(log for _, log in run.logs[index:]), latest_timestamp
|
|
688
|
+
|
|
689
|
+
def create_token(self, run_id: int) -> Optional[str]:
|
|
690
|
+
"""Create a token for the given run ID."""
|
|
691
|
+
token = secrets.token_hex(FLWR_APP_TOKEN_LENGTH) # Generate a random token
|
|
692
|
+
with self.lock_token_store:
|
|
693
|
+
if run_id in self.token_store:
|
|
694
|
+
return None # Token already created for this run ID
|
|
695
|
+
self.token_store[run_id] = token
|
|
696
|
+
self.token_to_run_id[token] = run_id
|
|
697
|
+
return token
|
|
698
|
+
|
|
699
|
+
def verify_token(self, run_id: int, token: str) -> bool:
|
|
700
|
+
"""Verify a token for the given run ID."""
|
|
701
|
+
with self.lock_token_store:
|
|
702
|
+
return self.token_store.get(run_id) == token
|
|
703
|
+
|
|
704
|
+
def delete_token(self, run_id: int) -> None:
|
|
705
|
+
"""Delete the token for the given run ID."""
|
|
706
|
+
with self.lock_token_store:
|
|
707
|
+
token = self.token_store.pop(run_id, None)
|
|
708
|
+
if token is not None:
|
|
709
|
+
self.token_to_run_id.pop(token, None)
|
|
710
|
+
|
|
711
|
+
def get_run_id_by_token(self, token: str) -> Optional[int]:
|
|
712
|
+
"""Get the run ID associated with a given token."""
|
|
713
|
+
with self.lock_token_store:
|
|
714
|
+
return self.token_to_run_id.get(token)
|
|
@@ -21,9 +21,10 @@ from typing import Optional
|
|
|
21
21
|
from flwr.common import Context, Message
|
|
22
22
|
from flwr.common.record import ConfigRecord
|
|
23
23
|
from flwr.common.typing import Run, RunStatus, UserConfig
|
|
24
|
+
from flwr.supercore.corestate import CoreState
|
|
24
25
|
|
|
25
26
|
|
|
26
|
-
class LinkState(
|
|
27
|
+
class LinkState(CoreState): # pylint: disable=R0904
|
|
27
28
|
"""Abstract LinkState."""
|
|
28
29
|
|
|
29
30
|
@abc.abstractmethod
|
|
@@ -19,6 +19,7 @@
|
|
|
19
19
|
|
|
20
20
|
import json
|
|
21
21
|
import re
|
|
22
|
+
import secrets
|
|
22
23
|
import sqlite3
|
|
23
24
|
import time
|
|
24
25
|
from collections.abc import Sequence
|
|
@@ -27,6 +28,7 @@ from typing import Any, Optional, Union, cast
|
|
|
27
28
|
|
|
28
29
|
from flwr.common import Context, Message, Metadata, log, now
|
|
29
30
|
from flwr.common.constant import (
|
|
31
|
+
FLWR_APP_TOKEN_LENGTH,
|
|
30
32
|
HEARTBEAT_MAX_INTERVAL,
|
|
31
33
|
HEARTBEAT_PATIENCE,
|
|
32
34
|
MESSAGE_TTL_TOLERANCE,
|
|
@@ -163,6 +165,13 @@ CREATE TABLE IF NOT EXISTS message_res(
|
|
|
163
165
|
);
|
|
164
166
|
"""
|
|
165
167
|
|
|
168
|
+
SQL_CREATE_TABLE_TOKEN_STORE = """
|
|
169
|
+
CREATE TABLE IF NOT EXISTS token_store (
|
|
170
|
+
run_id INTEGER PRIMARY KEY,
|
|
171
|
+
token TEXT UNIQUE NOT NULL
|
|
172
|
+
);
|
|
173
|
+
"""
|
|
174
|
+
|
|
166
175
|
DictOrTuple = Union[tuple[Any, ...], dict[str, Any]]
|
|
167
176
|
|
|
168
177
|
|
|
@@ -212,6 +221,7 @@ class SqliteLinkState(LinkState): # pylint: disable=R0904
|
|
|
212
221
|
cur.execute(SQL_CREATE_TABLE_MESSAGE_RES)
|
|
213
222
|
cur.execute(SQL_CREATE_TABLE_NODE)
|
|
214
223
|
cur.execute(SQL_CREATE_TABLE_PUBLIC_KEY)
|
|
224
|
+
cur.execute(SQL_CREATE_TABLE_TOKEN_STORE)
|
|
215
225
|
cur.execute(SQL_CREATE_INDEX_ONLINE_UNTIL)
|
|
216
226
|
res = cur.execute("SELECT name FROM sqlite_schema;")
|
|
217
227
|
return res.fetchall()
|
|
@@ -1138,6 +1148,41 @@ class SqliteLinkState(LinkState): # pylint: disable=R0904
|
|
|
1138
1148
|
|
|
1139
1149
|
return message_ins
|
|
1140
1150
|
|
|
1151
|
+
def create_token(self, run_id: int) -> Optional[str]:
|
|
1152
|
+
"""Create a token for the given run ID."""
|
|
1153
|
+
token = secrets.token_hex(FLWR_APP_TOKEN_LENGTH) # Generate a random token
|
|
1154
|
+
query = "INSERT INTO token_store (run_id, token) VALUES (:run_id, :token);"
|
|
1155
|
+
data = {"run_id": convert_uint64_to_sint64(run_id), "token": token}
|
|
1156
|
+
try:
|
|
1157
|
+
self.query(query, data)
|
|
1158
|
+
except sqlite3.IntegrityError:
|
|
1159
|
+
return None # Token already created for this run ID
|
|
1160
|
+
return token
|
|
1161
|
+
|
|
1162
|
+
def verify_token(self, run_id: int, token: str) -> bool:
|
|
1163
|
+
"""Verify a token for the given run ID."""
|
|
1164
|
+
query = "SELECT token FROM token_store WHERE run_id = :run_id;"
|
|
1165
|
+
data = {"run_id": convert_uint64_to_sint64(run_id)}
|
|
1166
|
+
rows = self.query(query, data)
|
|
1167
|
+
if not rows:
|
|
1168
|
+
return False
|
|
1169
|
+
return cast(str, rows[0]["token"]) == token
|
|
1170
|
+
|
|
1171
|
+
def delete_token(self, run_id: int) -> None:
|
|
1172
|
+
"""Delete the token for the given run ID."""
|
|
1173
|
+
query = "DELETE FROM token_store WHERE run_id = :run_id;"
|
|
1174
|
+
data = {"run_id": convert_uint64_to_sint64(run_id)}
|
|
1175
|
+
self.query(query, data)
|
|
1176
|
+
|
|
1177
|
+
def get_run_id_by_token(self, token: str) -> Optional[int]:
|
|
1178
|
+
"""Get the run ID associated with a given token."""
|
|
1179
|
+
query = "SELECT run_id FROM token_store WHERE token = :token;"
|
|
1180
|
+
data = {"token": token}
|
|
1181
|
+
rows = self.query(query, data)
|
|
1182
|
+
if not rows:
|
|
1183
|
+
return None
|
|
1184
|
+
return convert_sint64_to_uint64(rows[0]["run_id"])
|
|
1185
|
+
|
|
1141
1186
|
|
|
1142
1187
|
def dict_factory(
|
|
1143
1188
|
cursor: sqlite3.Cursor,
|
|
@@ -58,7 +58,7 @@ def run_serverappio_api_grpc(
|
|
|
58
58
|
certificates=certificates,
|
|
59
59
|
)
|
|
60
60
|
|
|
61
|
-
log(INFO, "Flower
|
|
61
|
+
log(INFO, "Flower Deployment Runtime: Starting ServerAppIo API on %s", address)
|
|
62
62
|
serverappio_grpc_server.start()
|
|
63
63
|
|
|
64
64
|
return serverappio_grpc_server
|