flwr 1.19.0__py3-none-any.whl → 1.21.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- flwr/__init__.py +4 -1
- flwr/app/__init__.py +28 -0
- flwr/app/exception.py +31 -0
- flwr/cli/auth_plugin/oidc_cli_plugin.py +4 -4
- flwr/cli/build.py +15 -5
- flwr/cli/cli_user_auth_interceptor.py +1 -1
- flwr/cli/config_utils.py +3 -3
- flwr/cli/constant.py +25 -8
- flwr/cli/log.py +9 -9
- flwr/cli/login/login.py +3 -3
- flwr/cli/ls.py +5 -5
- flwr/cli/new/new.py +23 -4
- flwr/cli/new/templates/app/README.flowertune.md.tpl +2 -0
- flwr/cli/new/templates/app/README.md.tpl +5 -0
- flwr/cli/new/templates/app/code/__init__.pytorch_msg_api.py.tpl +1 -0
- flwr/cli/new/templates/app/code/client.pytorch_msg_api.py.tpl +80 -0
- flwr/cli/new/templates/app/code/server.pytorch_msg_api.py.tpl +41 -0
- flwr/cli/new/templates/app/code/task.pytorch_msg_api.py.tpl +98 -0
- flwr/cli/new/templates/app/pyproject.baseline.toml.tpl +14 -3
- flwr/cli/new/templates/app/pyproject.flowertune.toml.tpl +13 -1
- flwr/cli/new/templates/app/pyproject.huggingface.toml.tpl +21 -2
- flwr/cli/new/templates/app/pyproject.jax.toml.tpl +18 -1
- flwr/cli/new/templates/app/pyproject.mlx.toml.tpl +19 -2
- flwr/cli/new/templates/app/pyproject.numpy.toml.tpl +18 -1
- flwr/cli/new/templates/app/pyproject.pytorch.toml.tpl +20 -3
- flwr/cli/new/templates/app/pyproject.pytorch_msg_api.toml.tpl +53 -0
- flwr/cli/new/templates/app/pyproject.sklearn.toml.tpl +18 -1
- flwr/cli/new/templates/app/pyproject.tensorflow.toml.tpl +18 -1
- flwr/cli/run/run.py +53 -50
- flwr/cli/stop.py +7 -4
- flwr/cli/utils.py +29 -11
- flwr/client/grpc_adapter_client/connection.py +11 -4
- flwr/client/grpc_rere_client/connection.py +93 -129
- flwr/client/rest_client/connection.py +134 -164
- flwr/clientapp/__init__.py +10 -0
- flwr/clientapp/mod/__init__.py +26 -0
- flwr/clientapp/mod/centraldp_mods.py +132 -0
- flwr/common/args.py +20 -6
- flwr/common/auth_plugin/__init__.py +4 -4
- flwr/common/auth_plugin/auth_plugin.py +7 -7
- flwr/common/constant.py +26 -5
- flwr/common/event_log_plugin/event_log_plugin.py +1 -1
- flwr/common/exit/__init__.py +4 -0
- flwr/common/exit/exit.py +8 -1
- flwr/common/exit/exit_code.py +42 -8
- flwr/common/exit/exit_handler.py +62 -0
- flwr/common/{exit_handlers.py → exit/signal_handler.py} +20 -37
- flwr/common/grpc.py +1 -1
- flwr/common/{inflatable_grpc_utils.py → inflatable_protobuf_utils.py} +52 -10
- flwr/common/inflatable_utils.py +191 -24
- flwr/common/logger.py +1 -1
- flwr/common/record/array.py +101 -22
- flwr/common/record/arraychunk.py +59 -0
- flwr/common/retry_invoker.py +30 -11
- flwr/common/serde.py +0 -28
- flwr/common/telemetry.py +4 -0
- flwr/compat/client/app.py +14 -31
- flwr/compat/server/app.py +2 -2
- flwr/proto/appio_pb2.py +51 -0
- flwr/proto/appio_pb2.pyi +195 -0
- flwr/proto/appio_pb2_grpc.py +4 -0
- flwr/proto/appio_pb2_grpc.pyi +4 -0
- flwr/proto/clientappio_pb2.py +4 -19
- flwr/proto/clientappio_pb2.pyi +0 -125
- flwr/proto/clientappio_pb2_grpc.py +269 -29
- flwr/proto/clientappio_pb2_grpc.pyi +114 -21
- flwr/proto/control_pb2.py +62 -0
- flwr/proto/{exec_pb2_grpc.py → control_pb2_grpc.py} +54 -54
- flwr/proto/{exec_pb2_grpc.pyi → control_pb2_grpc.pyi} +28 -28
- flwr/proto/fleet_pb2.py +12 -20
- flwr/proto/fleet_pb2.pyi +6 -36
- flwr/proto/serverappio_pb2.py +8 -31
- flwr/proto/serverappio_pb2.pyi +0 -152
- flwr/proto/serverappio_pb2_grpc.py +107 -38
- flwr/proto/serverappio_pb2_grpc.pyi +47 -20
- flwr/proto/simulationio_pb2.py +4 -11
- flwr/proto/simulationio_pb2.pyi +0 -58
- flwr/proto/simulationio_pb2_grpc.py +129 -27
- flwr/proto/simulationio_pb2_grpc.pyi +52 -13
- flwr/server/app.py +130 -153
- flwr/server/fleet_event_log_interceptor.py +4 -0
- flwr/server/grid/grpc_grid.py +94 -54
- flwr/server/grid/inmemory_grid.py +1 -0
- flwr/server/serverapp/app.py +165 -144
- flwr/server/superlink/fleet/grpc_adapter/grpc_adapter_servicer.py +8 -0
- flwr/server/superlink/fleet/grpc_rere/fleet_servicer.py +1 -1
- flwr/server/superlink/fleet/grpc_rere/server_interceptor.py +2 -5
- flwr/server/superlink/fleet/message_handler/message_handler.py +10 -16
- flwr/server/superlink/fleet/rest_rere/rest_api.py +1 -2
- flwr/server/superlink/fleet/vce/backend/raybackend.py +3 -1
- flwr/server/superlink/fleet/vce/vce_api.py +6 -6
- flwr/server/superlink/linkstate/in_memory_linkstate.py +34 -0
- flwr/server/superlink/linkstate/linkstate.py +2 -1
- flwr/server/superlink/linkstate/sqlite_linkstate.py +45 -0
- flwr/server/superlink/serverappio/serverappio_grpc.py +2 -2
- flwr/server/superlink/serverappio/serverappio_servicer.py +95 -48
- flwr/server/superlink/simulation/simulationio_grpc.py +1 -1
- flwr/server/superlink/simulation/simulationio_servicer.py +98 -22
- flwr/server/superlink/utils.py +0 -35
- flwr/serverapp/__init__.py +12 -0
- flwr/serverapp/dp_fixed_clipping.py +352 -0
- flwr/serverapp/exception.py +38 -0
- flwr/serverapp/strategy/__init__.py +38 -0
- flwr/serverapp/strategy/dp_fixed_clipping.py +352 -0
- flwr/serverapp/strategy/fedadagrad.py +162 -0
- flwr/serverapp/strategy/fedadam.py +181 -0
- flwr/serverapp/strategy/fedavg.py +295 -0
- flwr/serverapp/strategy/fedopt.py +218 -0
- flwr/serverapp/strategy/fedyogi.py +173 -0
- flwr/serverapp/strategy/result.py +105 -0
- flwr/serverapp/strategy/strategy.py +285 -0
- flwr/serverapp/strategy/strategy_utils.py +251 -0
- flwr/serverapp/strategy/strategy_utils_tests.py +304 -0
- flwr/simulation/app.py +159 -154
- flwr/simulation/run_simulation.py +17 -0
- flwr/supercore/app_utils.py +58 -0
- flwr/supercore/cli/__init__.py +22 -0
- flwr/supercore/cli/flower_superexec.py +141 -0
- flwr/supercore/corestate/__init__.py +22 -0
- flwr/supercore/corestate/corestate.py +81 -0
- flwr/{server/superlink → supercore}/ffs/disk_ffs.py +1 -1
- flwr/supercore/grpc_health/__init__.py +25 -0
- flwr/supercore/grpc_health/health_server.py +53 -0
- flwr/supercore/grpc_health/simple_health_servicer.py +38 -0
- flwr/supercore/license_plugin/__init__.py +22 -0
- flwr/supercore/license_plugin/license_plugin.py +26 -0
- flwr/supercore/object_store/in_memory_object_store.py +31 -31
- flwr/supercore/object_store/object_store.py +20 -42
- flwr/supercore/object_store/utils.py +43 -0
- flwr/{superexec → supercore/superexec}/__init__.py +1 -1
- flwr/supercore/superexec/plugin/__init__.py +28 -0
- flwr/supercore/superexec/plugin/base_exec_plugin.py +53 -0
- flwr/supercore/superexec/plugin/clientapp_exec_plugin.py +28 -0
- flwr/supercore/superexec/plugin/exec_plugin.py +71 -0
- flwr/supercore/superexec/plugin/serverapp_exec_plugin.py +28 -0
- flwr/supercore/superexec/plugin/simulation_exec_plugin.py +28 -0
- flwr/supercore/superexec/run_superexec.py +185 -0
- flwr/supercore/utils.py +32 -0
- flwr/superlink/servicer/__init__.py +15 -0
- flwr/superlink/servicer/control/__init__.py +22 -0
- flwr/{superexec/exec_event_log_interceptor.py → superlink/servicer/control/control_event_log_interceptor.py} +9 -5
- flwr/{superexec/exec_grpc.py → superlink/servicer/control/control_grpc.py} +39 -28
- flwr/superlink/servicer/control/control_license_interceptor.py +82 -0
- flwr/{superexec/exec_servicer.py → superlink/servicer/control/control_servicer.py} +79 -31
- flwr/{superexec/exec_user_auth_interceptor.py → superlink/servicer/control/control_user_auth_interceptor.py} +18 -10
- flwr/supernode/cli/flower_supernode.py +3 -7
- flwr/supernode/cli/flwr_clientapp.py +20 -16
- flwr/supernode/nodestate/in_memory_nodestate.py +13 -4
- flwr/supernode/nodestate/nodestate.py +3 -44
- flwr/supernode/runtime/run_clientapp.py +129 -115
- flwr/supernode/servicer/clientappio/__init__.py +1 -3
- flwr/supernode/servicer/clientappio/clientappio_servicer.py +217 -165
- flwr/supernode/start_client_internal.py +205 -148
- {flwr-1.19.0.dist-info → flwr-1.21.0.dist-info}/METADATA +5 -3
- {flwr-1.19.0.dist-info → flwr-1.21.0.dist-info}/RECORD +161 -117
- {flwr-1.19.0.dist-info → flwr-1.21.0.dist-info}/entry_points.txt +1 -0
- flwr/common/inflatable_rest_utils.py +0 -99
- flwr/proto/exec_pb2.py +0 -62
- flwr/superexec/app.py +0 -45
- flwr/superexec/deployment.py +0 -192
- flwr/superexec/executor.py +0 -100
- flwr/superexec/simulation.py +0 -130
- /flwr/proto/{exec_pb2.pyi → control_pb2.pyi} +0 -0
- /flwr/{server/superlink → supercore}/ffs/__init__.py +0 -0
- /flwr/{server/superlink → supercore}/ffs/ffs.py +0 -0
- /flwr/{server/superlink → supercore}/ffs/ffs_factory.py +0 -0
- {flwr-1.19.0.dist-info → flwr-1.21.0.dist-info}/WHEEL +0 -0
flwr/server/grid/grpc_grid.py
CHANGED
|
@@ -22,31 +22,43 @@ from typing import Optional, cast
|
|
|
22
22
|
|
|
23
23
|
import grpc
|
|
24
24
|
|
|
25
|
-
from flwr.
|
|
25
|
+
from flwr.app.error import Error
|
|
26
|
+
from flwr.common import Message, Metadata, RecordDict, now
|
|
26
27
|
from flwr.common.constant import (
|
|
27
28
|
SERVERAPPIO_API_DEFAULT_CLIENT_ADDRESS,
|
|
28
29
|
SUPERLINK_NODE_ID,
|
|
30
|
+
ErrorCode,
|
|
31
|
+
MessageType,
|
|
29
32
|
)
|
|
30
33
|
from flwr.common.grpc import create_channel, on_channel_state_change
|
|
31
34
|
from flwr.common.inflatable import (
|
|
35
|
+
InflatableObject,
|
|
32
36
|
get_all_nested_objects,
|
|
33
37
|
get_object_tree,
|
|
38
|
+
iterate_object_tree,
|
|
34
39
|
no_object_id_recompute,
|
|
35
40
|
)
|
|
36
|
-
from flwr.common.
|
|
37
|
-
|
|
38
|
-
|
|
41
|
+
from flwr.common.inflatable_protobuf_utils import (
|
|
42
|
+
make_pull_object_fn_protobuf,
|
|
43
|
+
make_push_object_fn_protobuf,
|
|
39
44
|
)
|
|
40
45
|
from flwr.common.inflatable_utils import (
|
|
46
|
+
ObjectUnavailableError,
|
|
41
47
|
inflate_object_from_contents,
|
|
42
48
|
pull_objects,
|
|
43
49
|
push_objects,
|
|
44
50
|
)
|
|
45
51
|
from flwr.common.logger import log, warn_deprecated_feature
|
|
46
|
-
from flwr.common.message import remove_content_from_message
|
|
52
|
+
from flwr.common.message import make_message, remove_content_from_message
|
|
47
53
|
from flwr.common.retry_invoker import _make_simple_grpc_retry_invoker, _wrap_stub
|
|
48
54
|
from flwr.common.serde import message_to_proto, run_from_proto
|
|
49
55
|
from flwr.common.typing import Run
|
|
56
|
+
from flwr.proto.appio_pb2 import ( # pylint: disable=E0611
|
|
57
|
+
PullAppMessagesRequest,
|
|
58
|
+
PullAppMessagesResponse,
|
|
59
|
+
PushAppMessagesRequest,
|
|
60
|
+
PushAppMessagesResponse,
|
|
61
|
+
)
|
|
50
62
|
from flwr.proto.message_pb2 import ( # pylint: disable=E0611
|
|
51
63
|
ConfirmMessageReceivedRequest,
|
|
52
64
|
)
|
|
@@ -55,10 +67,6 @@ from flwr.proto.run_pb2 import GetRunRequest, GetRunResponse # pylint: disable=
|
|
|
55
67
|
from flwr.proto.serverappio_pb2 import ( # pylint: disable=E0611
|
|
56
68
|
GetNodesRequest,
|
|
57
69
|
GetNodesResponse,
|
|
58
|
-
PullResMessagesRequest,
|
|
59
|
-
PullResMessagesResponse,
|
|
60
|
-
PushInsMessagesRequest,
|
|
61
|
-
PushInsMessagesResponse,
|
|
62
70
|
)
|
|
63
71
|
from flwr.proto.serverappio_pb2_grpc import ServerAppIoStub # pylint: disable=E0611
|
|
64
72
|
|
|
@@ -215,37 +223,38 @@ class GrpcGrid(Grid):
|
|
|
215
223
|
)
|
|
216
224
|
return [node.node_id for node in res.nodes]
|
|
217
225
|
|
|
218
|
-
def
|
|
219
|
-
"""Push
|
|
220
|
-
#
|
|
221
|
-
|
|
222
|
-
|
|
223
|
-
|
|
226
|
+
def _try_push_messages(self, run_id: int, messages: Iterable[Message]) -> list[str]:
|
|
227
|
+
"""Push all messages and its associated objects."""
|
|
228
|
+
# Prepare all Messages to be sent in a single request
|
|
229
|
+
proto_messages = []
|
|
230
|
+
object_trees = []
|
|
231
|
+
all_objects: dict[str, InflatableObject] = {}
|
|
232
|
+
for msg in messages:
|
|
233
|
+
proto_messages.append(message_to_proto(remove_content_from_message(msg)))
|
|
234
|
+
all_objects.update(get_all_nested_objects(msg))
|
|
235
|
+
object_trees.append(get_object_tree(msg))
|
|
236
|
+
del msg
|
|
224
237
|
|
|
225
238
|
# Call GrpcServerAppIoStub method
|
|
226
|
-
res:
|
|
227
|
-
|
|
228
|
-
messages_list=
|
|
239
|
+
res: PushAppMessagesResponse = self._stub.PushMessages(
|
|
240
|
+
PushAppMessagesRequest(
|
|
241
|
+
messages_list=proto_messages,
|
|
229
242
|
run_id=run_id,
|
|
230
|
-
message_object_trees=
|
|
243
|
+
message_object_trees=object_trees,
|
|
231
244
|
)
|
|
232
245
|
)
|
|
233
246
|
|
|
234
247
|
# Push objects
|
|
235
|
-
|
|
236
|
-
|
|
237
|
-
|
|
238
|
-
|
|
239
|
-
|
|
240
|
-
|
|
241
|
-
|
|
242
|
-
|
|
243
|
-
|
|
244
|
-
|
|
245
|
-
),
|
|
246
|
-
object_ids_to_push=obj_ids_to_push,
|
|
247
|
-
)
|
|
248
|
-
return msg_id
|
|
248
|
+
push_objects(
|
|
249
|
+
all_objects,
|
|
250
|
+
push_object_fn=make_push_object_fn_protobuf(
|
|
251
|
+
push_object_protobuf=self._stub.PushObject,
|
|
252
|
+
node=self.node,
|
|
253
|
+
run_id=run_id,
|
|
254
|
+
),
|
|
255
|
+
object_ids_to_push=set(res.objects_to_push),
|
|
256
|
+
)
|
|
257
|
+
return cast(list[str], res.message_ids)
|
|
249
258
|
|
|
250
259
|
def push_messages(self, messages: Iterable[Message]) -> Iterable[str]:
|
|
251
260
|
"""Push messages to specified node IDs.
|
|
@@ -256,17 +265,19 @@ class GrpcGrid(Grid):
|
|
|
256
265
|
# Construct Messages
|
|
257
266
|
run_id = cast(Run, self._run).run_id
|
|
258
267
|
message_ids: list[str] = []
|
|
268
|
+
if not messages:
|
|
269
|
+
return message_ids
|
|
259
270
|
try:
|
|
260
|
-
|
|
261
|
-
|
|
262
|
-
|
|
263
|
-
|
|
264
|
-
|
|
265
|
-
|
|
266
|
-
|
|
267
|
-
|
|
268
|
-
|
|
269
|
-
|
|
271
|
+
with no_object_id_recompute():
|
|
272
|
+
for msg in messages:
|
|
273
|
+
# Populate metadata
|
|
274
|
+
msg.metadata.__dict__["_run_id"] = run_id
|
|
275
|
+
msg.metadata.__dict__["_src_node_id"] = self.node.node_id
|
|
276
|
+
msg.metadata.__dict__["_message_id"] = msg.object_id
|
|
277
|
+
# Check message
|
|
278
|
+
self._check_message(msg)
|
|
279
|
+
# Try pushing messages and their objects
|
|
280
|
+
message_ids = self._try_push_messages(run_id, messages)
|
|
270
281
|
|
|
271
282
|
except grpc.RpcError as e:
|
|
272
283
|
if e.code() == grpc.StatusCode.RESOURCE_EXHAUSTED: # pylint: disable=E1101
|
|
@@ -294,24 +305,52 @@ class GrpcGrid(Grid):
|
|
|
294
305
|
run_id = cast(Run, self._run).run_id
|
|
295
306
|
try:
|
|
296
307
|
# Pull Messages
|
|
297
|
-
res:
|
|
298
|
-
|
|
308
|
+
res: PullAppMessagesResponse = self._stub.PullMessages(
|
|
309
|
+
PullAppMessagesRequest(
|
|
299
310
|
message_ids=message_ids,
|
|
300
311
|
run_id=run_id,
|
|
301
312
|
)
|
|
302
313
|
)
|
|
303
314
|
# Pull Messages from store
|
|
304
315
|
inflated_msgs: list[Message] = []
|
|
305
|
-
for msg_proto in res.messages_list:
|
|
316
|
+
for msg_proto, msg_tree in zip(res.messages_list, res.message_object_trees):
|
|
306
317
|
msg_id = msg_proto.metadata.message_id
|
|
307
|
-
|
|
308
|
-
|
|
309
|
-
|
|
310
|
-
|
|
311
|
-
|
|
312
|
-
|
|
313
|
-
|
|
314
|
-
|
|
318
|
+
try:
|
|
319
|
+
all_object_contents = pull_objects(
|
|
320
|
+
object_ids=[
|
|
321
|
+
tree.object_id for tree in iterate_object_tree(msg_tree)
|
|
322
|
+
],
|
|
323
|
+
pull_object_fn=make_pull_object_fn_protobuf(
|
|
324
|
+
pull_object_protobuf=self._stub.PullObject,
|
|
325
|
+
node=self.node,
|
|
326
|
+
run_id=run_id,
|
|
327
|
+
),
|
|
328
|
+
)
|
|
329
|
+
except ObjectUnavailableError as e:
|
|
330
|
+
# An ObjectUnavailableError indicates that the object is not yet
|
|
331
|
+
# available. If this point has been reached, it means that the
|
|
332
|
+
# Grid has tried to pull the object for the maximum number of times
|
|
333
|
+
# or for the maximum time allowed, so we return an inflated message
|
|
334
|
+
# with an error
|
|
335
|
+
inflated_msgs.append(
|
|
336
|
+
make_message(
|
|
337
|
+
metadata=Metadata(
|
|
338
|
+
run_id=run_id,
|
|
339
|
+
message_id="",
|
|
340
|
+
src_node_id=self.node.node_id,
|
|
341
|
+
dst_node_id=self.node.node_id,
|
|
342
|
+
message_type=MessageType.SYSTEM,
|
|
343
|
+
group_id="",
|
|
344
|
+
ttl=0,
|
|
345
|
+
reply_to_message_id=msg_proto.metadata.reply_to_message_id,
|
|
346
|
+
created_at=now().timestamp(),
|
|
347
|
+
),
|
|
348
|
+
error=Error(
|
|
349
|
+
code=ErrorCode.MESSAGE_UNAVAILABLE, reason=(str(e))
|
|
350
|
+
),
|
|
351
|
+
)
|
|
352
|
+
)
|
|
353
|
+
continue
|
|
315
354
|
|
|
316
355
|
# Confirm that the message has been received
|
|
317
356
|
self._stub.ConfirmMessageReceived(
|
|
@@ -347,6 +386,7 @@ class GrpcGrid(Grid):
|
|
|
347
386
|
"""
|
|
348
387
|
# Push messages
|
|
349
388
|
msg_ids = set(self.push_messages(messages))
|
|
389
|
+
del messages
|
|
350
390
|
|
|
351
391
|
# Pull messages
|
|
352
392
|
end_time = time.time() + (timeout if timeout is not None else 0.0)
|
flwr/server/serverapp/app.py
CHANGED
|
@@ -19,9 +19,9 @@ import argparse
|
|
|
19
19
|
from logging import DEBUG, ERROR, INFO
|
|
20
20
|
from pathlib import Path
|
|
21
21
|
from queue import Queue
|
|
22
|
-
from time import sleep
|
|
23
22
|
from typing import Optional
|
|
24
23
|
|
|
24
|
+
from flwr.app.exception import AppExitException
|
|
25
25
|
from flwr.cli.config_utils import get_fab_metadata
|
|
26
26
|
from flwr.cli.install import install_from_fab
|
|
27
27
|
from flwr.cli.utils import get_sha256_hash
|
|
@@ -34,10 +34,11 @@ from flwr.common.config import (
|
|
|
34
34
|
)
|
|
35
35
|
from flwr.common.constant import (
|
|
36
36
|
SERVERAPPIO_API_DEFAULT_CLIENT_ADDRESS,
|
|
37
|
+
ExecPluginType,
|
|
37
38
|
Status,
|
|
38
39
|
SubStatus,
|
|
39
40
|
)
|
|
40
|
-
from flwr.common.exit import ExitCode, flwr_exit
|
|
41
|
+
from flwr.common.exit import ExitCode, add_exit_handler, flwr_exit
|
|
41
42
|
from flwr.common.heartbeat import HeartbeatSender, get_grpc_app_heartbeat_fn
|
|
42
43
|
from flwr.common.logger import (
|
|
43
44
|
log,
|
|
@@ -55,14 +56,18 @@ from flwr.common.serde import (
|
|
|
55
56
|
)
|
|
56
57
|
from flwr.common.telemetry import EventType, event
|
|
57
58
|
from flwr.common.typing import RunNotRunningException, RunStatus
|
|
58
|
-
from flwr.proto.
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
PushServerAppOutputsRequest,
|
|
59
|
+
from flwr.proto.appio_pb2 import ( # pylint: disable=E0611
|
|
60
|
+
PullAppInputsRequest,
|
|
61
|
+
PullAppInputsResponse,
|
|
62
|
+
PushAppOutputsRequest,
|
|
63
63
|
)
|
|
64
|
+
from flwr.proto.run_pb2 import UpdateRunStatusRequest # pylint: disable=E0611
|
|
65
|
+
from flwr.proto.serverappio_pb2_grpc import ServerAppIoStub
|
|
64
66
|
from flwr.server.grid.grpc_grid import GrpcGrid
|
|
65
67
|
from flwr.server.run_serverapp import run as run_
|
|
68
|
+
from flwr.supercore.app_utils import start_parent_process_monitor
|
|
69
|
+
from flwr.supercore.superexec.plugin import ServerAppExecPlugin
|
|
70
|
+
from flwr.supercore.superexec.run_superexec import run_with_deprecation_warning
|
|
66
71
|
|
|
67
72
|
|
|
68
73
|
def flwr_serverapp() -> None:
|
|
@@ -73,14 +78,27 @@ def flwr_serverapp() -> None:
|
|
|
73
78
|
|
|
74
79
|
args = _parse_args_run_flwr_serverapp().parse_args()
|
|
75
80
|
|
|
76
|
-
log(INFO, "Start `flwr-serverapp` process")
|
|
77
|
-
|
|
78
81
|
if not args.insecure:
|
|
79
82
|
flwr_exit(
|
|
80
83
|
ExitCode.COMMON_TLS_NOT_SUPPORTED,
|
|
81
84
|
"`flwr-serverapp` does not support TLS yet.",
|
|
82
85
|
)
|
|
83
86
|
|
|
87
|
+
# Disallow long-running `flwr-serverapp` processes
|
|
88
|
+
if args.token is None:
|
|
89
|
+
run_with_deprecation_warning(
|
|
90
|
+
cmd="flwr-serverapp",
|
|
91
|
+
plugin_type=ExecPluginType.SERVER_APP,
|
|
92
|
+
plugin_class=ServerAppExecPlugin,
|
|
93
|
+
stub_class=ServerAppIoStub,
|
|
94
|
+
appio_api_address=args.serverappio_api_address,
|
|
95
|
+
flwr_dir=args.flwr_dir,
|
|
96
|
+
parent_pid=args.parent_pid,
|
|
97
|
+
warn_run_once=args.run_once,
|
|
98
|
+
)
|
|
99
|
+
return
|
|
100
|
+
|
|
101
|
+
log(INFO, "Start `flwr-serverapp` process")
|
|
84
102
|
log(
|
|
85
103
|
DEBUG,
|
|
86
104
|
"`flwr-serverapp` will attempt to connect to SuperLink's "
|
|
@@ -90,168 +108,177 @@ def flwr_serverapp() -> None:
|
|
|
90
108
|
run_serverapp(
|
|
91
109
|
serverappio_api_address=args.serverappio_api_address,
|
|
92
110
|
log_queue=log_queue,
|
|
93
|
-
|
|
111
|
+
token=args.token,
|
|
94
112
|
flwr_dir=args.flwr_dir,
|
|
95
113
|
certificates=None,
|
|
114
|
+
parent_pid=args.parent_pid,
|
|
96
115
|
)
|
|
97
116
|
|
|
98
117
|
# Restore stdout/stderr
|
|
99
118
|
restore_output()
|
|
100
119
|
|
|
101
120
|
|
|
102
|
-
def run_serverapp( # pylint: disable=R0914,
|
|
121
|
+
def run_serverapp( # pylint: disable=R0913, R0914, R0915, R0917, W0212
|
|
103
122
|
serverappio_api_address: str,
|
|
104
123
|
log_queue: Queue[Optional[str]],
|
|
105
|
-
|
|
124
|
+
token: str,
|
|
106
125
|
flwr_dir: Optional[str] = None,
|
|
107
126
|
certificates: Optional[bytes] = None,
|
|
127
|
+
parent_pid: Optional[int] = None,
|
|
108
128
|
) -> None:
|
|
109
129
|
"""Run Flower ServerApp process."""
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
)
|
|
130
|
+
# Monitor the main process in case of SIGKILL
|
|
131
|
+
if parent_pid is not None:
|
|
132
|
+
start_parent_process_monitor(parent_pid)
|
|
114
133
|
|
|
115
134
|
# Resolve directory where FABs are installed
|
|
116
135
|
flwr_dir_ = get_flwr_dir(flwr_dir)
|
|
117
136
|
log_uploader = None
|
|
118
|
-
success = True
|
|
119
137
|
hash_run_id = None
|
|
120
138
|
run_status = None
|
|
121
139
|
heartbeat_sender = None
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
grid.set_run(run.run_id)
|
|
141
|
-
|
|
142
|
-
# Start log uploader for this run
|
|
143
|
-
log_uploader = start_log_uploader(
|
|
144
|
-
log_queue=log_queue,
|
|
145
|
-
node_id=0,
|
|
146
|
-
run_id=run.run_id,
|
|
147
|
-
stub=grid._stub,
|
|
140
|
+
grid = None
|
|
141
|
+
context = None
|
|
142
|
+
exit_code = ExitCode.SUCCESS
|
|
143
|
+
|
|
144
|
+
def on_exit() -> None:
|
|
145
|
+
# Stop heartbeat sender
|
|
146
|
+
if heartbeat_sender:
|
|
147
|
+
heartbeat_sender.stop()
|
|
148
|
+
|
|
149
|
+
# Stop log uploader for this run and upload final logs
|
|
150
|
+
if log_uploader:
|
|
151
|
+
stop_log_uploader(log_queue, log_uploader)
|
|
152
|
+
|
|
153
|
+
# Update run status
|
|
154
|
+
if run_status and grid:
|
|
155
|
+
run_status_proto = run_status_to_proto(run_status)
|
|
156
|
+
grid._stub.UpdateRunStatus(
|
|
157
|
+
UpdateRunStatusRequest(run_id=run.run_id, run_status=run_status_proto)
|
|
148
158
|
)
|
|
149
159
|
|
|
150
|
-
|
|
151
|
-
|
|
160
|
+
# Close the Grpc connection
|
|
161
|
+
if grid:
|
|
162
|
+
grid.close()
|
|
152
163
|
|
|
153
|
-
|
|
164
|
+
add_exit_handler(on_exit)
|
|
154
165
|
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
|
|
166
|
+
try:
|
|
167
|
+
# Initialize the GrpcGrid
|
|
168
|
+
grid = GrpcGrid(
|
|
169
|
+
serverappio_service_address=serverappio_api_address,
|
|
170
|
+
root_certificates=certificates,
|
|
171
|
+
)
|
|
159
172
|
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
|
|
173
|
+
# Pull ServerAppInputs from LinkState
|
|
174
|
+
req = PullAppInputsRequest(token=token)
|
|
175
|
+
log(DEBUG, "[flwr-serverapp] Pull ServerAppInputs")
|
|
176
|
+
res: PullAppInputsResponse = grid._stub.PullAppInputs(req)
|
|
177
|
+
context = context_from_proto(res.context)
|
|
178
|
+
run = run_from_proto(res.run)
|
|
179
|
+
fab = fab_from_proto(res.fab)
|
|
165
180
|
|
|
166
|
-
|
|
167
|
-
context.run_config = server_app_run_config
|
|
181
|
+
hash_run_id = get_sha256_hash(run.run_id)
|
|
168
182
|
|
|
169
|
-
|
|
170
|
-
DEBUG,
|
|
171
|
-
"[flwr-serverapp] Will load ServerApp `%s` in %s",
|
|
172
|
-
server_app_attr,
|
|
173
|
-
app_path,
|
|
174
|
-
)
|
|
183
|
+
grid.set_run(run.run_id)
|
|
175
184
|
|
|
176
|
-
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
|
|
185
|
+
# Start log uploader for this run
|
|
186
|
+
log_uploader = start_log_uploader(
|
|
187
|
+
log_queue=log_queue,
|
|
188
|
+
node_id=0,
|
|
189
|
+
run_id=run.run_id,
|
|
190
|
+
stub=grid._stub,
|
|
191
|
+
)
|
|
181
192
|
|
|
182
|
-
|
|
183
|
-
|
|
184
|
-
event_details={"run-id-hash": hash_run_id},
|
|
185
|
-
)
|
|
193
|
+
log(DEBUG, "[flwr-serverapp] Start FAB installation.")
|
|
194
|
+
install_from_fab(fab.content, flwr_dir=flwr_dir_, skip_prompt=True)
|
|
186
195
|
|
|
187
|
-
|
|
188
|
-
heartbeat_fn = get_grpc_app_heartbeat_fn(
|
|
189
|
-
grid._stub,
|
|
190
|
-
run.run_id,
|
|
191
|
-
failure_message="Heartbeat failed unexpectedly. The SuperLink could "
|
|
192
|
-
"not find the provided run ID, or the run status is invalid.",
|
|
193
|
-
)
|
|
194
|
-
heartbeat_sender = HeartbeatSender(heartbeat_fn)
|
|
195
|
-
heartbeat_sender.start()
|
|
196
|
-
|
|
197
|
-
# Load and run the ServerApp with the Grid
|
|
198
|
-
updated_context = run_(
|
|
199
|
-
grid=grid,
|
|
200
|
-
server_app_dir=app_path,
|
|
201
|
-
server_app_attr=server_app_attr,
|
|
202
|
-
context=context,
|
|
203
|
-
)
|
|
196
|
+
fab_id, fab_version = get_fab_metadata(fab.content)
|
|
204
197
|
|
|
205
|
-
|
|
206
|
-
|
|
207
|
-
|
|
208
|
-
|
|
209
|
-
|
|
210
|
-
|
|
211
|
-
|
|
212
|
-
|
|
213
|
-
run_status = RunStatus(Status.FINISHED, SubStatus.COMPLETED, "")
|
|
214
|
-
except RunNotRunningException:
|
|
215
|
-
log(INFO, "")
|
|
216
|
-
log(INFO, "Run ID %s stopped.", run.run_id)
|
|
217
|
-
log(INFO, "")
|
|
218
|
-
run_status = None
|
|
219
|
-
success = False
|
|
220
|
-
|
|
221
|
-
except Exception as ex: # pylint: disable=broad-exception-caught
|
|
222
|
-
exc_entity = "ServerApp"
|
|
223
|
-
log(ERROR, "%s raised an exception", exc_entity, exc_info=ex)
|
|
224
|
-
run_status = RunStatus(Status.FINISHED, SubStatus.FAILED, str(ex))
|
|
225
|
-
success = False
|
|
226
|
-
|
|
227
|
-
finally:
|
|
228
|
-
# Stop heartbeat sender
|
|
229
|
-
if heartbeat_sender:
|
|
230
|
-
heartbeat_sender.stop()
|
|
231
|
-
heartbeat_sender = None
|
|
232
|
-
|
|
233
|
-
# Stop log uploader for this run and upload final logs
|
|
234
|
-
if log_uploader:
|
|
235
|
-
stop_log_uploader(log_queue, log_uploader)
|
|
236
|
-
log_uploader = None
|
|
237
|
-
|
|
238
|
-
# Update run status
|
|
239
|
-
if run_status:
|
|
240
|
-
run_status_proto = run_status_to_proto(run_status)
|
|
241
|
-
grid._stub.UpdateRunStatus(
|
|
242
|
-
UpdateRunStatusRequest(
|
|
243
|
-
run_id=run.run_id, run_status=run_status_proto
|
|
244
|
-
)
|
|
245
|
-
)
|
|
246
|
-
|
|
247
|
-
event(
|
|
248
|
-
EventType.FLWR_SERVERAPP_RUN_LEAVE,
|
|
249
|
-
event_details={"run-id-hash": hash_run_id, "success": success},
|
|
250
|
-
)
|
|
198
|
+
app_path = str(get_project_dir(fab_id, fab_version, fab.hash_str, flwr_dir_))
|
|
199
|
+
config = get_project_config(app_path)
|
|
200
|
+
|
|
201
|
+
# Obtain server app reference and the run config
|
|
202
|
+
server_app_attr = config["tool"]["flwr"]["app"]["components"]["serverapp"]
|
|
203
|
+
server_app_run_config = get_fused_config_from_dir(
|
|
204
|
+
Path(app_path), run.override_config
|
|
205
|
+
)
|
|
251
206
|
|
|
252
|
-
#
|
|
253
|
-
|
|
254
|
-
|
|
207
|
+
# Update run_config in context
|
|
208
|
+
context.run_config = server_app_run_config
|
|
209
|
+
|
|
210
|
+
log(
|
|
211
|
+
DEBUG,
|
|
212
|
+
"[flwr-serverapp] Will load ServerApp `%s` in %s",
|
|
213
|
+
server_app_attr,
|
|
214
|
+
app_path,
|
|
215
|
+
)
|
|
216
|
+
|
|
217
|
+
# Change status to Running
|
|
218
|
+
run_status_proto = run_status_to_proto(RunStatus(Status.RUNNING, "", ""))
|
|
219
|
+
grid._stub.UpdateRunStatus(
|
|
220
|
+
UpdateRunStatusRequest(run_id=run.run_id, run_status=run_status_proto)
|
|
221
|
+
)
|
|
222
|
+
|
|
223
|
+
event(
|
|
224
|
+
EventType.FLWR_SERVERAPP_RUN_ENTER,
|
|
225
|
+
event_details={"run-id-hash": hash_run_id},
|
|
226
|
+
)
|
|
227
|
+
|
|
228
|
+
# Set up heartbeat sender
|
|
229
|
+
heartbeat_fn = get_grpc_app_heartbeat_fn(
|
|
230
|
+
grid._stub,
|
|
231
|
+
run.run_id,
|
|
232
|
+
failure_message="Heartbeat failed unexpectedly. The SuperLink could "
|
|
233
|
+
"not find the provided run ID, or the run status is invalid.",
|
|
234
|
+
)
|
|
235
|
+
heartbeat_sender = HeartbeatSender(heartbeat_fn)
|
|
236
|
+
heartbeat_sender.start()
|
|
237
|
+
|
|
238
|
+
# Load and run the ServerApp with the Grid
|
|
239
|
+
updated_context = run_(
|
|
240
|
+
grid=grid,
|
|
241
|
+
server_app_dir=app_path,
|
|
242
|
+
server_app_attr=server_app_attr,
|
|
243
|
+
context=context,
|
|
244
|
+
)
|
|
245
|
+
|
|
246
|
+
# Send resulting context
|
|
247
|
+
context_proto = context_to_proto(updated_context)
|
|
248
|
+
log(DEBUG, "[flwr-serverapp] Will push ServerAppOutputs")
|
|
249
|
+
out_req = PushAppOutputsRequest(
|
|
250
|
+
token=token, run_id=run.run_id, context=context_proto
|
|
251
|
+
)
|
|
252
|
+
_ = grid._stub.PushAppOutputs(out_req)
|
|
253
|
+
|
|
254
|
+
run_status = RunStatus(Status.FINISHED, SubStatus.COMPLETED, "")
|
|
255
|
+
|
|
256
|
+
# Raised when the run is already stopped by the user
|
|
257
|
+
except RunNotRunningException:
|
|
258
|
+
log(INFO, "")
|
|
259
|
+
log(INFO, "Run ID %s stopped.", run.run_id)
|
|
260
|
+
log(INFO, "")
|
|
261
|
+
run_status = None
|
|
262
|
+
# No need to update the exit code since this is expected behavior
|
|
263
|
+
|
|
264
|
+
except Exception as ex: # pylint: disable=broad-exception-caught
|
|
265
|
+
exc_entity = "ServerApp"
|
|
266
|
+
log(ERROR, "%s raised an exception", exc_entity, exc_info=ex)
|
|
267
|
+
run_status = RunStatus(Status.FINISHED, SubStatus.FAILED, str(ex))
|
|
268
|
+
|
|
269
|
+
# Set exit code
|
|
270
|
+
exit_code = ExitCode.SERVERAPP_EXCEPTION # General exit code
|
|
271
|
+
if isinstance(ex, AppExitException):
|
|
272
|
+
exit_code = ex.exit_code
|
|
273
|
+
|
|
274
|
+
flwr_exit(
|
|
275
|
+
code=exit_code,
|
|
276
|
+
event_type=EventType.FLWR_SERVERAPP_RUN_LEAVE,
|
|
277
|
+
event_details={
|
|
278
|
+
"run-id-hash": hash_run_id,
|
|
279
|
+
"success": exit_code == ExitCode.SUCCESS,
|
|
280
|
+
},
|
|
281
|
+
)
|
|
255
282
|
|
|
256
283
|
|
|
257
284
|
def _parse_args_run_flwr_serverapp() -> argparse.ArgumentParser:
|
|
@@ -266,11 +293,5 @@ def _parse_args_run_flwr_serverapp() -> argparse.ArgumentParser:
|
|
|
266
293
|
help="Address of SuperLink's ServerAppIo API (IPv4, IPv6, or a domain name)."
|
|
267
294
|
f"By default, it is set to {SERVERAPPIO_API_DEFAULT_CLIENT_ADDRESS}.",
|
|
268
295
|
)
|
|
269
|
-
parser.add_argument(
|
|
270
|
-
"--run-once",
|
|
271
|
-
action="store_true",
|
|
272
|
-
help="When set, this process will start a single ServerApp for a pending Run. "
|
|
273
|
-
"If there is no pending Run, the process will exit.",
|
|
274
|
-
)
|
|
275
296
|
add_args_flwr_app_common(parser=parser)
|
|
276
297
|
return parser
|
|
@@ -41,6 +41,7 @@ from flwr.proto.fleet_pb2 import ( # pylint: disable=E0611
|
|
|
41
41
|
from flwr.proto.grpcadapter_pb2 import MessageContainer # pylint: disable=E0611
|
|
42
42
|
from flwr.proto.heartbeat_pb2 import SendNodeHeartbeatRequest # pylint: disable=E0611
|
|
43
43
|
from flwr.proto.message_pb2 import ( # pylint: disable=E0611
|
|
44
|
+
ConfirmMessageReceivedRequest,
|
|
44
45
|
PullObjectRequest,
|
|
45
46
|
PushObjectRequest,
|
|
46
47
|
)
|
|
@@ -101,4 +102,11 @@ class GrpcAdapterServicer(grpcadapter_pb2_grpc.GrpcAdapterServicer, FleetService
|
|
|
101
102
|
return _handle(request, context, PushObjectRequest, self.PushObject)
|
|
102
103
|
if request.grpc_message_name == PullObjectRequest.__qualname__:
|
|
103
104
|
return _handle(request, context, PullObjectRequest, self.PullObject)
|
|
105
|
+
if request.grpc_message_name == ConfirmMessageReceivedRequest.__qualname__:
|
|
106
|
+
return _handle(
|
|
107
|
+
request,
|
|
108
|
+
context,
|
|
109
|
+
ConfirmMessageReceivedRequest,
|
|
110
|
+
self.ConfirmMessageReceived,
|
|
111
|
+
)
|
|
104
112
|
raise ValueError(f"Invalid grpc_message_name: {request.grpc_message_name}")
|
|
@@ -48,10 +48,10 @@ from flwr.proto.message_pb2 import ( # pylint: disable=E0611
|
|
|
48
48
|
PushObjectResponse,
|
|
49
49
|
)
|
|
50
50
|
from flwr.proto.run_pb2 import GetRunRequest, GetRunResponse # pylint: disable=E0611
|
|
51
|
-
from flwr.server.superlink.ffs.ffs_factory import FfsFactory
|
|
52
51
|
from flwr.server.superlink.fleet.message_handler import message_handler
|
|
53
52
|
from flwr.server.superlink.linkstate import LinkStateFactory
|
|
54
53
|
from flwr.server.superlink.utils import abort_grpc_context
|
|
54
|
+
from flwr.supercore.ffs import FfsFactory
|
|
55
55
|
from flwr.supercore.object_store import ObjectStoreFactory
|
|
56
56
|
|
|
57
57
|
|