flwr 1.23.0__py3-none-any.whl → 1.24.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- flwr/__init__.py +16 -5
- flwr/app/error.py +2 -2
- flwr/app/exception.py +3 -3
- flwr/cli/app.py +19 -0
- flwr/cli/app_cmd/__init__.py +23 -0
- flwr/cli/app_cmd/publish.py +285 -0
- flwr/cli/app_cmd/review.py +252 -0
- flwr/cli/auth_plugin/auth_plugin.py +4 -5
- flwr/cli/auth_plugin/noop_auth_plugin.py +54 -11
- flwr/cli/auth_plugin/oidc_cli_plugin.py +32 -9
- flwr/cli/build.py +60 -18
- flwr/cli/cli_account_auth_interceptor.py +24 -7
- flwr/cli/config_utils.py +101 -13
- flwr/cli/federation/__init__.py +24 -0
- flwr/cli/federation/ls.py +140 -0
- flwr/cli/federation/show.py +317 -0
- flwr/cli/install.py +91 -13
- flwr/cli/log.py +52 -9
- flwr/cli/login/login.py +7 -4
- flwr/cli/ls.py +170 -130
- flwr/cli/new/new.py +33 -50
- flwr/cli/new/templates/app/code/task.pytorch.py.tpl +1 -0
- flwr/cli/new/templates/app/pyproject.baseline.toml.tpl +1 -1
- flwr/cli/new/templates/app/pyproject.flowertune.toml.tpl +1 -1
- flwr/cli/new/templates/app/pyproject.huggingface.toml.tpl +1 -1
- flwr/cli/new/templates/app/pyproject.jax.toml.tpl +1 -1
- flwr/cli/new/templates/app/pyproject.mlx.toml.tpl +1 -1
- flwr/cli/new/templates/app/pyproject.numpy.toml.tpl +1 -1
- flwr/cli/new/templates/app/pyproject.pytorch.toml.tpl +3 -3
- flwr/cli/new/templates/app/pyproject.pytorch_legacy_api.toml.tpl +1 -1
- flwr/cli/new/templates/app/pyproject.sklearn.toml.tpl +1 -1
- flwr/cli/new/templates/app/pyproject.tensorflow.toml.tpl +2 -2
- flwr/cli/new/templates/app/pyproject.xgboost.toml.tpl +1 -1
- flwr/cli/pull.py +10 -5
- flwr/cli/run/run.py +77 -30
- flwr/cli/run_utils.py +130 -0
- flwr/cli/stop.py +25 -7
- flwr/cli/supernode/ls.py +16 -8
- flwr/cli/supernode/register.py +9 -4
- flwr/cli/supernode/unregister.py +5 -3
- flwr/cli/utils.py +376 -16
- flwr/client/__init__.py +1 -1
- flwr/client/dpfedavg_numpy_client.py +4 -1
- flwr/client/grpc_adapter_client/connection.py +6 -7
- flwr/client/grpc_rere_client/connection.py +10 -11
- flwr/client/grpc_rere_client/grpc_adapter.py +6 -2
- flwr/client/grpc_rere_client/node_auth_client_interceptor.py +2 -1
- flwr/client/message_handler/message_handler.py +2 -2
- flwr/client/mod/secure_aggregation/secaggplus_mod.py +3 -3
- flwr/client/numpy_client.py +1 -1
- flwr/client/rest_client/connection.py +12 -14
- flwr/client/run_info_store.py +4 -5
- flwr/client/typing.py +1 -1
- flwr/clientapp/client_app.py +9 -10
- flwr/clientapp/mod/centraldp_mods.py +16 -17
- flwr/clientapp/mod/localdp_mod.py +8 -9
- flwr/clientapp/typing.py +1 -1
- flwr/clientapp/utils.py +3 -3
- flwr/common/address.py +1 -2
- flwr/common/args.py +3 -4
- flwr/common/config.py +13 -16
- flwr/common/constant.py +5 -2
- flwr/common/differential_privacy.py +3 -4
- flwr/common/event_log_plugin/event_log_plugin.py +3 -4
- flwr/common/exit/exit.py +15 -2
- flwr/common/exit/exit_code.py +19 -0
- flwr/common/exit/exit_handler.py +6 -2
- flwr/common/exit/signal_handler.py +5 -5
- flwr/common/grpc.py +6 -6
- flwr/common/inflatable_protobuf_utils.py +1 -1
- flwr/common/inflatable_utils.py +38 -21
- flwr/common/logger.py +19 -19
- flwr/common/message.py +4 -4
- flwr/common/object_ref.py +7 -7
- flwr/common/record/array.py +3 -3
- flwr/common/record/arrayrecord.py +18 -30
- flwr/common/record/configrecord.py +3 -3
- flwr/common/record/recorddict.py +5 -5
- flwr/common/record/typeddict.py +9 -2
- flwr/common/recorddict_compat.py +7 -10
- flwr/common/retry_invoker.py +20 -20
- flwr/common/secure_aggregation/ndarrays_arithmetic.py +3 -3
- flwr/common/serde.py +5 -4
- flwr/common/serde_utils.py +2 -2
- flwr/common/telemetry.py +9 -5
- flwr/common/typing.py +52 -37
- flwr/compat/client/app.py +38 -37
- flwr/compat/client/grpc_client/connection.py +11 -11
- flwr/compat/server/app.py +5 -6
- flwr/proto/appio_pb2.py +13 -3
- flwr/proto/appio_pb2.pyi +134 -65
- flwr/proto/appio_pb2_grpc.py +20 -0
- flwr/proto/appio_pb2_grpc.pyi +27 -0
- flwr/proto/clientappio_pb2.py +17 -7
- flwr/proto/clientappio_pb2.pyi +15 -0
- flwr/proto/clientappio_pb2_grpc.py +206 -40
- flwr/proto/clientappio_pb2_grpc.pyi +168 -53
- flwr/proto/control_pb2.py +71 -52
- flwr/proto/control_pb2.pyi +277 -111
- flwr/proto/control_pb2_grpc.py +249 -40
- flwr/proto/control_pb2_grpc.pyi +185 -52
- flwr/proto/error_pb2.py +13 -3
- flwr/proto/error_pb2.pyi +24 -6
- flwr/proto/error_pb2_grpc.py +20 -0
- flwr/proto/error_pb2_grpc.pyi +27 -0
- flwr/proto/fab_pb2.py +14 -4
- flwr/proto/fab_pb2.pyi +59 -31
- flwr/proto/fab_pb2_grpc.py +20 -0
- flwr/proto/fab_pb2_grpc.pyi +27 -0
- flwr/proto/federation_pb2.py +38 -0
- flwr/proto/federation_pb2.pyi +56 -0
- flwr/proto/federation_pb2_grpc.py +24 -0
- flwr/proto/federation_pb2_grpc.pyi +31 -0
- flwr/proto/fleet_pb2.py +14 -4
- flwr/proto/fleet_pb2.pyi +137 -61
- flwr/proto/fleet_pb2_grpc.py +189 -48
- flwr/proto/fleet_pb2_grpc.pyi +175 -61
- flwr/proto/grpcadapter_pb2.py +14 -4
- flwr/proto/grpcadapter_pb2.pyi +38 -16
- flwr/proto/grpcadapter_pb2_grpc.py +35 -4
- flwr/proto/grpcadapter_pb2_grpc.pyi +38 -7
- flwr/proto/heartbeat_pb2.py +17 -7
- flwr/proto/heartbeat_pb2.pyi +51 -22
- flwr/proto/heartbeat_pb2_grpc.py +20 -0
- flwr/proto/heartbeat_pb2_grpc.pyi +27 -0
- flwr/proto/log_pb2.py +13 -3
- flwr/proto/log_pb2.pyi +34 -11
- flwr/proto/log_pb2_grpc.py +20 -0
- flwr/proto/log_pb2_grpc.pyi +27 -0
- flwr/proto/message_pb2.py +15 -5
- flwr/proto/message_pb2.pyi +154 -86
- flwr/proto/message_pb2_grpc.py +20 -0
- flwr/proto/message_pb2_grpc.pyi +27 -0
- flwr/proto/node_pb2.py +15 -5
- flwr/proto/node_pb2.pyi +50 -25
- flwr/proto/node_pb2_grpc.py +20 -0
- flwr/proto/node_pb2_grpc.pyi +27 -0
- flwr/proto/recorddict_pb2.py +13 -3
- flwr/proto/recorddict_pb2.pyi +184 -107
- flwr/proto/recorddict_pb2_grpc.py +20 -0
- flwr/proto/recorddict_pb2_grpc.pyi +27 -0
- flwr/proto/run_pb2.py +40 -31
- flwr/proto/run_pb2.pyi +149 -84
- flwr/proto/run_pb2_grpc.py +20 -0
- flwr/proto/run_pb2_grpc.pyi +27 -0
- flwr/proto/serverappio_pb2.py +13 -3
- flwr/proto/serverappio_pb2.pyi +32 -8
- flwr/proto/serverappio_pb2_grpc.py +246 -65
- flwr/proto/serverappio_pb2_grpc.pyi +221 -85
- flwr/proto/simulationio_pb2.py +16 -8
- flwr/proto/simulationio_pb2.pyi +15 -0
- flwr/proto/simulationio_pb2_grpc.py +162 -41
- flwr/proto/simulationio_pb2_grpc.pyi +149 -55
- flwr/proto/transport_pb2.py +20 -10
- flwr/proto/transport_pb2.pyi +249 -160
- flwr/proto/transport_pb2_grpc.py +35 -4
- flwr/proto/transport_pb2_grpc.pyi +38 -8
- flwr/server/app.py +38 -17
- flwr/server/client_manager.py +4 -5
- flwr/server/client_proxy.py +10 -11
- flwr/server/compat/app.py +4 -5
- flwr/server/compat/app_utils.py +2 -1
- flwr/server/compat/grid_client_proxy.py +10 -12
- flwr/server/compat/legacy_context.py +3 -4
- flwr/server/fleet_event_log_interceptor.py +2 -1
- flwr/server/grid/grid.py +2 -3
- flwr/server/grid/grpc_grid.py +10 -8
- flwr/server/grid/inmemory_grid.py +4 -4
- flwr/server/run_serverapp.py +2 -3
- flwr/server/server.py +34 -39
- flwr/server/server_app.py +7 -8
- flwr/server/server_config.py +1 -2
- flwr/server/serverapp/app.py +34 -28
- flwr/server/serverapp_components.py +4 -5
- flwr/server/strategy/aggregate.py +9 -8
- flwr/server/strategy/bulyan.py +13 -11
- flwr/server/strategy/dp_adaptive_clipping.py +16 -20
- flwr/server/strategy/dp_fixed_clipping.py +12 -17
- flwr/server/strategy/dpfedavg_adaptive.py +3 -4
- flwr/server/strategy/dpfedavg_fixed.py +6 -10
- flwr/server/strategy/fault_tolerant_fedavg.py +14 -13
- flwr/server/strategy/fedadagrad.py +18 -14
- flwr/server/strategy/fedadam.py +16 -14
- flwr/server/strategy/fedavg.py +16 -17
- flwr/server/strategy/fedavg_android.py +15 -15
- flwr/server/strategy/fedavgm.py +21 -18
- flwr/server/strategy/fedmedian.py +2 -3
- flwr/server/strategy/fedopt.py +11 -10
- flwr/server/strategy/fedprox.py +10 -9
- flwr/server/strategy/fedtrimmedavg.py +12 -11
- flwr/server/strategy/fedxgb_bagging.py +13 -11
- flwr/server/strategy/fedxgb_cyclic.py +6 -6
- flwr/server/strategy/fedxgb_nn_avg.py +4 -4
- flwr/server/strategy/fedyogi.py +16 -14
- flwr/server/strategy/krum.py +12 -11
- flwr/server/strategy/qfedavg.py +16 -15
- flwr/server/strategy/strategy.py +6 -9
- flwr/server/superlink/fleet/grpc_adapter/grpc_adapter_servicer.py +2 -1
- flwr/server/superlink/fleet/grpc_bidi/flower_service_servicer.py +1 -2
- flwr/server/superlink/fleet/grpc_bidi/grpc_bridge.py +3 -4
- flwr/server/superlink/fleet/grpc_bidi/grpc_client_proxy.py +10 -12
- flwr/server/superlink/fleet/grpc_bidi/grpc_server.py +1 -3
- flwr/server/superlink/fleet/grpc_rere/fleet_servicer.py +4 -4
- flwr/server/superlink/fleet/grpc_rere/node_auth_server_interceptor.py +3 -2
- flwr/server/superlink/fleet/message_handler/message_handler.py +34 -28
- flwr/server/superlink/fleet/rest_rere/rest_api.py +2 -2
- flwr/server/superlink/fleet/vce/backend/backend.py +1 -1
- flwr/server/superlink/fleet/vce/backend/raybackend.py +5 -5
- flwr/server/superlink/fleet/vce/vce_api.py +15 -9
- flwr/server/superlink/linkstate/in_memory_linkstate.py +115 -150
- flwr/server/superlink/linkstate/linkstate.py +59 -43
- flwr/server/superlink/linkstate/linkstate_factory.py +22 -5
- flwr/server/superlink/linkstate/sqlite_linkstate.py +447 -438
- flwr/server/superlink/linkstate/utils.py +6 -6
- flwr/server/superlink/serverappio/serverappio_grpc.py +1 -2
- flwr/server/superlink/serverappio/serverappio_servicer.py +26 -21
- flwr/server/superlink/simulation/simulationio_grpc.py +1 -2
- flwr/server/superlink/simulation/simulationio_servicer.py +18 -13
- flwr/server/superlink/utils.py +4 -6
- flwr/server/typing.py +1 -1
- flwr/server/utils/tensorboard.py +15 -8
- flwr/server/workflow/default_workflows.py +5 -5
- flwr/server/workflow/secure_aggregation/secagg_workflow.py +2 -4
- flwr/server/workflow/secure_aggregation/secaggplus_workflow.py +8 -8
- flwr/serverapp/strategy/bulyan.py +16 -15
- flwr/serverapp/strategy/dp_adaptive_clipping.py +12 -11
- flwr/serverapp/strategy/dp_fixed_clipping.py +11 -14
- flwr/serverapp/strategy/fedadagrad.py +10 -11
- flwr/serverapp/strategy/fedadam.py +10 -11
- flwr/serverapp/strategy/fedavg.py +9 -10
- flwr/serverapp/strategy/fedavgm.py +17 -16
- flwr/serverapp/strategy/fedmedian.py +2 -2
- flwr/serverapp/strategy/fedopt.py +10 -11
- flwr/serverapp/strategy/fedprox.py +7 -8
- flwr/serverapp/strategy/fedtrimmedavg.py +9 -9
- flwr/serverapp/strategy/fedxgb_bagging.py +3 -3
- flwr/serverapp/strategy/fedxgb_cyclic.py +9 -9
- flwr/serverapp/strategy/fedyogi.py +9 -11
- flwr/serverapp/strategy/krum.py +7 -7
- flwr/serverapp/strategy/multikrum.py +9 -9
- flwr/serverapp/strategy/qfedavg.py +17 -16
- flwr/serverapp/strategy/strategy.py +6 -9
- flwr/serverapp/strategy/strategy_utils.py +7 -8
- flwr/simulation/app.py +46 -42
- flwr/simulation/legacy_app.py +12 -12
- flwr/simulation/ray_transport/ray_actor.py +10 -11
- flwr/simulation/ray_transport/ray_client_proxy.py +11 -12
- flwr/simulation/run_simulation.py +43 -43
- flwr/simulation/simulationio_connection.py +4 -4
- flwr/supercore/cli/flower_superexec.py +3 -4
- flwr/supercore/constant.py +31 -1
- flwr/supercore/corestate/corestate.py +24 -3
- flwr/supercore/corestate/in_memory_corestate.py +138 -0
- flwr/supercore/corestate/sqlite_corestate.py +157 -0
- flwr/supercore/ffs/disk_ffs.py +1 -2
- flwr/supercore/ffs/ffs.py +1 -2
- flwr/supercore/ffs/ffs_factory.py +1 -2
- flwr/{common → supercore}/heartbeat.py +20 -25
- flwr/supercore/object_store/in_memory_object_store.py +1 -2
- flwr/supercore/object_store/object_store.py +1 -2
- flwr/supercore/object_store/object_store_factory.py +1 -2
- flwr/supercore/object_store/sqlite_object_store.py +8 -7
- flwr/supercore/primitives/asymmetric.py +1 -1
- flwr/supercore/primitives/asymmetric_ed25519.py +11 -1
- flwr/supercore/sqlite_mixin.py +37 -34
- flwr/supercore/superexec/plugin/base_exec_plugin.py +1 -2
- flwr/supercore/superexec/plugin/exec_plugin.py +3 -3
- flwr/supercore/superexec/run_superexec.py +9 -13
- flwr/superlink/artifact_provider/artifact_provider.py +1 -2
- flwr/superlink/auth_plugin/auth_plugin.py +6 -9
- flwr/superlink/auth_plugin/noop_auth_plugin.py +6 -9
- flwr/superlink/federation/__init__.py +24 -0
- flwr/superlink/federation/federation_manager.py +64 -0
- flwr/superlink/federation/noop_federation_manager.py +71 -0
- flwr/superlink/servicer/control/control_account_auth_interceptor.py +22 -13
- flwr/superlink/servicer/control/control_event_log_interceptor.py +7 -7
- flwr/superlink/servicer/control/control_grpc.py +5 -6
- flwr/superlink/servicer/control/control_license_interceptor.py +3 -3
- flwr/superlink/servicer/control/control_servicer.py +102 -18
- flwr/supernode/cli/flower_supernode.py +58 -3
- flwr/supernode/nodestate/in_memory_nodestate.py +60 -49
- flwr/supernode/nodestate/nodestate.py +7 -8
- flwr/supernode/nodestate/nodestate_factory.py +7 -4
- flwr/supernode/runtime/run_clientapp.py +41 -22
- flwr/supernode/servicer/clientappio/clientappio_servicer.py +40 -10
- flwr/supernode/start_client_internal.py +158 -42
- {flwr-1.23.0.dist-info → flwr-1.24.0.dist-info}/METADATA +8 -8
- flwr-1.24.0.dist-info/RECORD +454 -0
- flwr/supercore/object_store/utils.py +0 -43
- flwr-1.23.0.dist-info/RECORD +0 -439
- {flwr-1.23.0.dist-info → flwr-1.24.0.dist-info}/WHEEL +0 -0
- {flwr-1.23.0.dist-info → flwr-1.24.0.dist-info}/entry_points.txt +0 -0
|
@@ -15,18 +15,27 @@
|
|
|
15
15
|
"""In-memory NodeState implementation."""
|
|
16
16
|
|
|
17
17
|
|
|
18
|
-
import secrets
|
|
19
18
|
from collections.abc import Sequence
|
|
20
19
|
from dataclasses import dataclass
|
|
21
|
-
from threading import Lock
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
from flwr.common import
|
|
25
|
-
from flwr.common.
|
|
20
|
+
from threading import Lock, RLock
|
|
21
|
+
|
|
22
|
+
from flwr.common import Context, Error, Message
|
|
23
|
+
from flwr.common.constant import ErrorCode
|
|
24
|
+
from flwr.common.inflatable import (
|
|
25
|
+
get_all_nested_objects,
|
|
26
|
+
get_object_tree,
|
|
27
|
+
no_object_id_recompute,
|
|
28
|
+
)
|
|
26
29
|
from flwr.common.typing import Run
|
|
30
|
+
from flwr.supercore.corestate.in_memory_corestate import InMemoryCoreState
|
|
31
|
+
from flwr.supercore.object_store import ObjectStore
|
|
27
32
|
|
|
28
33
|
from .nodestate import NodeState
|
|
29
34
|
|
|
35
|
+
CLIENT_APP_CRASHED_ERROR = Error(
|
|
36
|
+
ErrorCode.CLIENT_APP_CRASHED, "ClientApp stopped responding."
|
|
37
|
+
)
|
|
38
|
+
|
|
30
39
|
|
|
31
40
|
@dataclass
|
|
32
41
|
class MessageEntry:
|
|
@@ -36,27 +45,26 @@ class MessageEntry:
|
|
|
36
45
|
is_retrieved: bool = False
|
|
37
46
|
|
|
38
47
|
|
|
39
|
-
class InMemoryNodeState(
|
|
48
|
+
class InMemoryNodeState(
|
|
49
|
+
NodeState, InMemoryCoreState
|
|
50
|
+
): # pylint: disable=too-many-instance-attributes
|
|
40
51
|
"""In-memory NodeState implementation."""
|
|
41
52
|
|
|
42
|
-
def __init__(self) -> None:
|
|
53
|
+
def __init__(self, object_store: ObjectStore) -> None:
|
|
54
|
+
super().__init__(object_store)
|
|
43
55
|
# Store node_id
|
|
44
|
-
self.node_id:
|
|
56
|
+
self.node_id: int | None = None
|
|
45
57
|
# Store Object ID to MessageEntry mapping
|
|
46
58
|
self.msg_store: dict[str, MessageEntry] = {}
|
|
47
|
-
self.lock_msg_store =
|
|
59
|
+
self.lock_msg_store = RLock()
|
|
48
60
|
# Store run ID to Run mapping
|
|
49
61
|
self.run_store: dict[int, Run] = {}
|
|
50
62
|
self.lock_run_store = Lock()
|
|
51
63
|
# Store run ID to Context mapping
|
|
52
64
|
self.ctx_store: dict[int, Context] = {}
|
|
53
65
|
self.lock_ctx_store = Lock()
|
|
54
|
-
# Store run ID to token mapping and token to run ID mapping
|
|
55
|
-
self.token_store: dict[int, str] = {}
|
|
56
|
-
self.token_to_run_id: dict[str, int] = {}
|
|
57
|
-
self.lock_token_store = Lock()
|
|
58
66
|
|
|
59
|
-
def set_node_id(self, node_id:
|
|
67
|
+
def set_node_id(self, node_id: int | None) -> None:
|
|
60
68
|
"""Set the node ID."""
|
|
61
69
|
self.node_id = node_id
|
|
62
70
|
|
|
@@ -66,8 +74,10 @@ class InMemoryNodeState(NodeState): # pylint: disable=too-many-instance-attribu
|
|
|
66
74
|
raise ValueError("Node ID not set")
|
|
67
75
|
return self.node_id
|
|
68
76
|
|
|
69
|
-
def store_message(self, message: Message) ->
|
|
77
|
+
def store_message(self, message: Message) -> str | None:
|
|
70
78
|
"""Store a message."""
|
|
79
|
+
# No need to check for expired tokens here
|
|
80
|
+
# The ClientAppIo servicer will first verify the token before storing messages
|
|
71
81
|
with self.lock_msg_store:
|
|
72
82
|
msg_id = message.metadata.message_id
|
|
73
83
|
if msg_id == "" or msg_id in self.msg_store:
|
|
@@ -78,13 +88,14 @@ class InMemoryNodeState(NodeState): # pylint: disable=too-many-instance-attribu
|
|
|
78
88
|
def get_messages(
|
|
79
89
|
self,
|
|
80
90
|
*,
|
|
81
|
-
run_ids:
|
|
82
|
-
is_reply:
|
|
83
|
-
limit:
|
|
91
|
+
run_ids: Sequence[int] | None = None,
|
|
92
|
+
is_reply: bool | None = None,
|
|
93
|
+
limit: int | None = None,
|
|
84
94
|
) -> Sequence[Message]:
|
|
85
95
|
"""Retrieve messages based on the specified filters."""
|
|
86
|
-
|
|
96
|
+
self._cleanup_expired_tokens()
|
|
87
97
|
|
|
98
|
+
selected_messages: list[Message] = []
|
|
88
99
|
with self.lock_msg_store:
|
|
89
100
|
# Iterate through all messages in the store
|
|
90
101
|
for object_id in list(self.msg_store.keys()):
|
|
@@ -122,7 +133,7 @@ class InMemoryNodeState(NodeState): # pylint: disable=too-many-instance-attribu
|
|
|
122
133
|
def delete_messages(
|
|
123
134
|
self,
|
|
124
135
|
*,
|
|
125
|
-
message_ids:
|
|
136
|
+
message_ids: Sequence[str] | None = None,
|
|
126
137
|
) -> None:
|
|
127
138
|
"""Delete messages based on the specified filters."""
|
|
128
139
|
with self.lock_msg_store:
|
|
@@ -140,7 +151,7 @@ class InMemoryNodeState(NodeState): # pylint: disable=too-many-instance-attribu
|
|
|
140
151
|
with self.lock_run_store:
|
|
141
152
|
self.run_store[run.run_id] = run
|
|
142
153
|
|
|
143
|
-
def get_run(self, run_id: int) ->
|
|
154
|
+
def get_run(self, run_id: int) -> Run | None:
|
|
144
155
|
"""Retrieve a run by its ID."""
|
|
145
156
|
with self.lock_run_store:
|
|
146
157
|
return self.run_store.get(run_id)
|
|
@@ -150,7 +161,7 @@ class InMemoryNodeState(NodeState): # pylint: disable=too-many-instance-attribu
|
|
|
150
161
|
with self.lock_ctx_store:
|
|
151
162
|
self.ctx_store[context.run_id] = context
|
|
152
163
|
|
|
153
|
-
def get_context(self, run_id: int) ->
|
|
164
|
+
def get_context(self, run_id: int) -> Context | None:
|
|
154
165
|
"""Retrieve a context by its run ID."""
|
|
155
166
|
with self.lock_ctx_store:
|
|
156
167
|
return self.ctx_store.get(run_id)
|
|
@@ -171,29 +182,29 @@ class InMemoryNodeState(NodeState): # pylint: disable=too-many-instance-attribu
|
|
|
171
182
|
ret -= set(self.token_store.keys())
|
|
172
183
|
return list(ret)
|
|
173
184
|
|
|
174
|
-
def
|
|
175
|
-
"""
|
|
176
|
-
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
self.
|
|
181
|
-
|
|
182
|
-
|
|
183
|
-
|
|
184
|
-
|
|
185
|
-
|
|
186
|
-
|
|
187
|
-
|
|
188
|
-
|
|
189
|
-
|
|
190
|
-
|
|
191
|
-
|
|
192
|
-
|
|
193
|
-
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
|
|
185
|
+
def _on_tokens_expired(self, expired_records: list[tuple[int, float]]) -> None:
|
|
186
|
+
"""Insert error replies for messages associated with expired tokens."""
|
|
187
|
+
with self.lock_msg_store:
|
|
188
|
+
# Find all retrieved messages associated with expired run IDs
|
|
189
|
+
expired_run_ids = {run_id for run_id, _ in expired_records}
|
|
190
|
+
messages_to_reply: list[Message] = []
|
|
191
|
+
for entry in self.msg_store.values():
|
|
192
|
+
msg = entry.message
|
|
193
|
+
if msg.metadata.run_id in expired_run_ids and entry.is_retrieved:
|
|
194
|
+
messages_to_reply.append(msg)
|
|
195
|
+
|
|
196
|
+
# Create and store error replies for each message
|
|
197
|
+
for msg in messages_to_reply:
|
|
198
|
+
error_reply = Message(CLIENT_APP_CRASHED_ERROR, reply_to=msg)
|
|
199
|
+
|
|
200
|
+
# Insert objects of the error reply into the object store
|
|
201
|
+
with no_object_id_recompute():
|
|
202
|
+
# pylint: disable-next=W0212
|
|
203
|
+
error_reply.metadata._message_id = error_reply.object_id # type: ignore
|
|
204
|
+
object_tree = get_object_tree(error_reply)
|
|
205
|
+
self.object_store.preregister(msg.metadata.run_id, object_tree)
|
|
206
|
+
for obj_id, obj in get_all_nested_objects(error_reply).items():
|
|
207
|
+
self.object_store.put(obj_id, obj.deflate())
|
|
208
|
+
|
|
209
|
+
# Store the error reply message
|
|
210
|
+
self.store_message(error_reply)
|
|
@@ -17,7 +17,6 @@
|
|
|
17
17
|
|
|
18
18
|
from abc import abstractmethod
|
|
19
19
|
from collections.abc import Sequence
|
|
20
|
-
from typing import Optional
|
|
21
20
|
|
|
22
21
|
from flwr.common import Context, Message
|
|
23
22
|
from flwr.common.typing import Run
|
|
@@ -36,7 +35,7 @@ class NodeState(CoreState):
|
|
|
36
35
|
"""Get the node ID."""
|
|
37
36
|
|
|
38
37
|
@abstractmethod
|
|
39
|
-
def store_message(self, message: Message) ->
|
|
38
|
+
def store_message(self, message: Message) -> str | None:
|
|
40
39
|
"""Store a message.
|
|
41
40
|
|
|
42
41
|
Parameters
|
|
@@ -54,9 +53,9 @@ class NodeState(CoreState):
|
|
|
54
53
|
def get_messages(
|
|
55
54
|
self,
|
|
56
55
|
*,
|
|
57
|
-
run_ids:
|
|
58
|
-
is_reply:
|
|
59
|
-
limit:
|
|
56
|
+
run_ids: Sequence[int] | None = None,
|
|
57
|
+
is_reply: bool | None = None,
|
|
58
|
+
limit: int | None = None,
|
|
60
59
|
) -> Sequence[Message]:
|
|
61
60
|
"""Retrieve messages based on the specified filters.
|
|
62
61
|
|
|
@@ -89,7 +88,7 @@ class NodeState(CoreState):
|
|
|
89
88
|
def delete_messages(
|
|
90
89
|
self,
|
|
91
90
|
*,
|
|
92
|
-
message_ids:
|
|
91
|
+
message_ids: Sequence[str] | None = None,
|
|
93
92
|
) -> None:
|
|
94
93
|
"""Delete messages based on the specified filters.
|
|
95
94
|
|
|
@@ -118,7 +117,7 @@ class NodeState(CoreState):
|
|
|
118
117
|
"""
|
|
119
118
|
|
|
120
119
|
@abstractmethod
|
|
121
|
-
def get_run(self, run_id: int) ->
|
|
120
|
+
def get_run(self, run_id: int) -> Run | None:
|
|
122
121
|
"""Retrieve a run by its ID.
|
|
123
122
|
|
|
124
123
|
Parameters
|
|
@@ -143,7 +142,7 @@ class NodeState(CoreState):
|
|
|
143
142
|
"""
|
|
144
143
|
|
|
145
144
|
@abstractmethod
|
|
146
|
-
def get_context(self, run_id: int) ->
|
|
145
|
+
def get_context(self, run_id: int) -> Context | None:
|
|
147
146
|
"""Retrieve a context by its run ID.
|
|
148
147
|
|
|
149
148
|
Parameters
|
|
@@ -16,7 +16,8 @@
|
|
|
16
16
|
|
|
17
17
|
|
|
18
18
|
import threading
|
|
19
|
-
|
|
19
|
+
|
|
20
|
+
from flwr.supercore.object_store import ObjectStoreFactory
|
|
20
21
|
|
|
21
22
|
from .in_memory_nodestate import InMemoryNodeState
|
|
22
23
|
from .nodestate import NodeState
|
|
@@ -25,8 +26,9 @@ from .nodestate import NodeState
|
|
|
25
26
|
class NodeStateFactory:
|
|
26
27
|
"""Factory class that creates NodeState instances."""
|
|
27
28
|
|
|
28
|
-
def __init__(self) -> None:
|
|
29
|
-
self.
|
|
29
|
+
def __init__(self, objectstore_factory: ObjectStoreFactory) -> None:
|
|
30
|
+
self.objectstore_factory = objectstore_factory
|
|
31
|
+
self.state_instance: NodeState | None = None
|
|
30
32
|
self.lock = threading.RLock()
|
|
31
33
|
|
|
32
34
|
def state(self) -> NodeState:
|
|
@@ -34,5 +36,6 @@ class NodeStateFactory:
|
|
|
34
36
|
# Lock access to NodeStateFactory to prevent returning different instances
|
|
35
37
|
with self.lock:
|
|
36
38
|
if self.state_instance is None:
|
|
37
|
-
|
|
39
|
+
object_store = self.objectstore_factory.store()
|
|
40
|
+
self.state_instance = InMemoryNodeState(object_store)
|
|
38
41
|
return self.state_instance
|
|
@@ -15,9 +15,7 @@
|
|
|
15
15
|
"""Flower ClientApp process."""
|
|
16
16
|
|
|
17
17
|
|
|
18
|
-
import gc
|
|
19
18
|
from logging import DEBUG, ERROR, INFO
|
|
20
|
-
from typing import Optional
|
|
21
19
|
|
|
22
20
|
import grpc
|
|
23
21
|
|
|
@@ -28,6 +26,7 @@ from flwr.clientapp.utils import get_load_client_app_fn
|
|
|
28
26
|
from flwr.common import Context, Message
|
|
29
27
|
from flwr.common.config import get_flwr_dir
|
|
30
28
|
from flwr.common.constant import ErrorCode
|
|
29
|
+
from flwr.common.exit import ExitCode, flwr_exit, register_signal_handlers
|
|
31
30
|
from flwr.common.grpc import create_channel, on_channel_state_change
|
|
32
31
|
from flwr.common.inflatable import (
|
|
33
32
|
get_all_nested_objects,
|
|
@@ -50,6 +49,7 @@ from flwr.common.serde import (
|
|
|
50
49
|
message_to_proto,
|
|
51
50
|
run_from_proto,
|
|
52
51
|
)
|
|
52
|
+
from flwr.common.telemetry import EventType, event
|
|
53
53
|
from flwr.common.typing import Fab, Run
|
|
54
54
|
from flwr.proto.appio_pb2 import ( # pylint: disable=E0611
|
|
55
55
|
PullAppInputsRequest,
|
|
@@ -63,27 +63,41 @@ from flwr.proto.appio_pb2 import ( # pylint: disable=E0611
|
|
|
63
63
|
from flwr.proto.clientappio_pb2_grpc import ClientAppIoStub
|
|
64
64
|
from flwr.proto.node_pb2 import Node # pylint: disable=E0611
|
|
65
65
|
from flwr.supercore.app_utils import start_parent_process_monitor
|
|
66
|
+
from flwr.supercore.heartbeat import HeartbeatSender, make_app_heartbeat_fn_grpc
|
|
66
67
|
from flwr.supercore.utils import mask_string
|
|
67
68
|
|
|
68
69
|
|
|
69
70
|
def run_clientapp( # pylint: disable=R0913, R0914, R0917
|
|
70
71
|
clientappio_api_address: str,
|
|
71
72
|
token: str,
|
|
72
|
-
flwr_dir:
|
|
73
|
-
certificates:
|
|
74
|
-
parent_pid:
|
|
73
|
+
flwr_dir: str | None = None,
|
|
74
|
+
certificates: bytes | None = None,
|
|
75
|
+
parent_pid: int | None = None,
|
|
75
76
|
) -> None:
|
|
76
77
|
"""Run Flower ClientApp process."""
|
|
77
78
|
# Monitor the main process in case of SIGKILL
|
|
78
79
|
if parent_pid is not None:
|
|
79
80
|
start_parent_process_monitor(parent_pid)
|
|
80
81
|
|
|
82
|
+
event(EventType.FLWR_CLIENTAPP_RUN_ENTER)
|
|
83
|
+
|
|
81
84
|
channel = create_channel(
|
|
82
85
|
server_address=clientappio_api_address,
|
|
83
86
|
insecure=(certificates is None),
|
|
84
87
|
root_certificates=certificates,
|
|
85
88
|
)
|
|
86
89
|
channel.subscribe(on_channel_state_change)
|
|
90
|
+
heartbeat_sender = None
|
|
91
|
+
|
|
92
|
+
def on_exit() -> None:
|
|
93
|
+
if heartbeat_sender is not None and heartbeat_sender.is_running:
|
|
94
|
+
heartbeat_sender.stop()
|
|
95
|
+
channel.close()
|
|
96
|
+
|
|
97
|
+
register_signal_handlers(
|
|
98
|
+
event_type=EventType.FLWR_CLIENTAPP_RUN_LEAVE,
|
|
99
|
+
exit_handlers=[on_exit],
|
|
100
|
+
)
|
|
87
101
|
|
|
88
102
|
# Resolve directory where FABs are installed
|
|
89
103
|
flwr_dir_ = get_flwr_dir(flwr_dir)
|
|
@@ -91,22 +105,27 @@ def run_clientapp( # pylint: disable=R0913, R0914, R0917
|
|
|
91
105
|
stub = ClientAppIoStub(channel)
|
|
92
106
|
_wrap_stub(stub, _make_simple_grpc_retry_invoker())
|
|
93
107
|
|
|
108
|
+
# Start app heartbeat
|
|
109
|
+
heartbeat_sender = HeartbeatSender(make_app_heartbeat_fn_grpc(stub, token))
|
|
110
|
+
heartbeat_sender.start()
|
|
111
|
+
|
|
94
112
|
# Pull Message, Context, Run and (optional) FAB from SuperNode
|
|
95
113
|
message, context, run, fab = pull_clientappinputs(stub=stub, token=token)
|
|
96
114
|
|
|
97
|
-
|
|
98
|
-
if fab:
|
|
99
|
-
log(DEBUG, "[flwr-clientapp] Start FAB installation.")
|
|
100
|
-
install_from_fab(fab.content, flwr_dir=flwr_dir_, skip_prompt=True)
|
|
115
|
+
try:
|
|
101
116
|
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
117
|
+
# Install FAB, if provided
|
|
118
|
+
if fab:
|
|
119
|
+
log(DEBUG, "[flwr-clientapp] Start FAB installation.")
|
|
120
|
+
install_from_fab(fab.content, flwr_dir=flwr_dir_, skip_prompt=True)
|
|
121
|
+
|
|
122
|
+
load_client_app_fn = get_load_client_app_fn(
|
|
123
|
+
default_app_ref="",
|
|
124
|
+
app_path=None,
|
|
125
|
+
multi_app=True,
|
|
126
|
+
flwr_dir=str(flwr_dir_),
|
|
127
|
+
)
|
|
108
128
|
|
|
109
|
-
try:
|
|
110
129
|
# Load ClientApp
|
|
111
130
|
log(DEBUG, "[flwr-clientapp] Start `ClientApp` Loading.")
|
|
112
131
|
client_app: ClientApp = load_client_app_fn(
|
|
@@ -137,18 +156,18 @@ def run_clientapp( # pylint: disable=R0913, R0914, R0917
|
|
|
137
156
|
stub=stub, token=token, message=reply_message, context=context
|
|
138
157
|
)
|
|
139
158
|
|
|
140
|
-
del client_app, message, context, run, fab, reply_message
|
|
141
|
-
gc.collect()
|
|
142
|
-
|
|
143
159
|
except grpc.RpcError as e:
|
|
144
160
|
log(ERROR, "GRPC error occurred: %s", str(e))
|
|
145
|
-
|
|
146
|
-
|
|
161
|
+
|
|
162
|
+
flwr_exit(
|
|
163
|
+
code=ExitCode.SUCCESS,
|
|
164
|
+
event_type=EventType.FLWR_CLIENTAPP_RUN_LEAVE,
|
|
165
|
+
)
|
|
147
166
|
|
|
148
167
|
|
|
149
168
|
def pull_clientappinputs(
|
|
150
169
|
stub: ClientAppIoStub, token: str
|
|
151
|
-
) -> tuple[Message, Context, Run,
|
|
170
|
+
) -> tuple[Message, Context, Run, Fab | None]:
|
|
152
171
|
"""Pull ClientAppInputs from SuperNode."""
|
|
153
172
|
masked_token = mask_string(token)
|
|
154
173
|
log(INFO, "[flwr-clientapp] Pull `ClientAppInputs` for token %s", masked_token)
|
|
@@ -35,7 +35,7 @@ from flwr.common.typing import Fab, Run
|
|
|
35
35
|
|
|
36
36
|
# pylint: disable=E0611
|
|
37
37
|
from flwr.proto import clientappio_pb2_grpc
|
|
38
|
-
from flwr.proto.appio_pb2 import (
|
|
38
|
+
from flwr.proto.appio_pb2 import (
|
|
39
39
|
ListAppsToLaunchRequest,
|
|
40
40
|
ListAppsToLaunchResponse,
|
|
41
41
|
PullAppInputsRequest,
|
|
@@ -49,6 +49,7 @@ from flwr.proto.appio_pb2 import ( # pylint: disable=E0401
|
|
|
49
49
|
RequestTokenRequest,
|
|
50
50
|
RequestTokenResponse,
|
|
51
51
|
)
|
|
52
|
+
from flwr.proto.heartbeat_pb2 import SendAppHeartbeatRequest, SendAppHeartbeatResponse
|
|
52
53
|
from flwr.proto.message_pb2 import (
|
|
53
54
|
ConfirmMessageReceivedRequest,
|
|
54
55
|
ConfirmMessageReceivedResponse,
|
|
@@ -57,12 +58,11 @@ from flwr.proto.message_pb2 import (
|
|
|
57
58
|
PushObjectRequest,
|
|
58
59
|
PushObjectResponse,
|
|
59
60
|
)
|
|
60
|
-
from flwr.proto.run_pb2 import GetRunRequest, GetRunResponse
|
|
61
|
+
from flwr.proto.run_pb2 import GetRunRequest, GetRunResponse
|
|
61
62
|
|
|
62
63
|
# pylint: disable=E0601
|
|
63
64
|
from flwr.supercore.ffs import FfsFactory
|
|
64
65
|
from flwr.supercore.object_store import NoObjectInStoreError, ObjectStoreFactory
|
|
65
|
-
from flwr.supercore.object_store.utils import store_mapping_and_register_objects
|
|
66
66
|
from flwr.supernode.nodestate import NodeStateFactory
|
|
67
67
|
|
|
68
68
|
|
|
@@ -151,7 +151,24 @@ class ClientAppIoServicer(clientappio_pb2_grpc.ClientAppIoServicer):
|
|
|
151
151
|
# Retrieve context, run and fab for this run
|
|
152
152
|
context = cast(Context, state.get_context(run_id))
|
|
153
153
|
run = cast(Run, state.get_run(run_id))
|
|
154
|
-
|
|
154
|
+
|
|
155
|
+
# Retrieve FAB from FFS
|
|
156
|
+
if result := ffs.get(run.fab_hash):
|
|
157
|
+
content, verifications = result
|
|
158
|
+
log(
|
|
159
|
+
DEBUG,
|
|
160
|
+
"Retrieved FAB: hash=%s, content_len=%d, verifications=%s",
|
|
161
|
+
run.fab_hash,
|
|
162
|
+
len(content),
|
|
163
|
+
verifications,
|
|
164
|
+
)
|
|
165
|
+
fab = Fab(run.fab_hash, content, verifications)
|
|
166
|
+
else:
|
|
167
|
+
context.abort(
|
|
168
|
+
grpc.StatusCode.NOT_FOUND,
|
|
169
|
+
f"FAB with hash {run.fab_hash} not found in FFS.",
|
|
170
|
+
)
|
|
171
|
+
raise RuntimeError("This line should never be reached.")
|
|
155
172
|
|
|
156
173
|
return PullAppInputsResponse(
|
|
157
174
|
context=context_to_proto(context),
|
|
@@ -231,19 +248,32 @@ class ClientAppIoServicer(clientappio_pb2_grpc.ClientAppIoServicer):
|
|
|
231
248
|
)
|
|
232
249
|
raise RuntimeError("This line should never be reached.")
|
|
233
250
|
|
|
251
|
+
# Store Message object to descendants mapping and preregister objects
|
|
252
|
+
objects_to_push: set[str] = set()
|
|
253
|
+
for object_tree in request.message_object_trees:
|
|
254
|
+
objects_to_push |= set(store.preregister(run_id, object_tree))
|
|
234
255
|
# Save the message to the state
|
|
235
256
|
state.store_message(message_from_proto(request.messages_list[0]))
|
|
236
257
|
|
|
237
|
-
# Store Message object to descendants mapping and preregister objects
|
|
238
|
-
objects_to_push = store_mapping_and_register_objects(store, request=request)
|
|
239
|
-
|
|
240
258
|
return PushAppMessagesResponse(objects_to_push=objects_to_push)
|
|
241
259
|
|
|
260
|
+
def SendAppHeartbeat(
|
|
261
|
+
self, request: SendAppHeartbeatRequest, context: grpc.ServicerContext
|
|
262
|
+
) -> SendAppHeartbeatResponse:
|
|
263
|
+
"""Handle a heartbeat from an app process."""
|
|
264
|
+
log(DEBUG, "ClientAppIoServicer.SendAppHeartbeat")
|
|
265
|
+
# Initialize state
|
|
266
|
+
state = self.state_factory.state()
|
|
267
|
+
|
|
268
|
+
# Acknowledge the heartbeat
|
|
269
|
+
success = state.acknowledge_app_heartbeat(request.token)
|
|
270
|
+
return SendAppHeartbeatResponse(success=success)
|
|
271
|
+
|
|
242
272
|
def PushObject(
|
|
243
273
|
self, request: PushObjectRequest, context: grpc.ServicerContext
|
|
244
274
|
) -> PushObjectResponse:
|
|
245
275
|
"""Push an object to the ObjectStore."""
|
|
246
|
-
log(DEBUG, "
|
|
276
|
+
log(DEBUG, "ClientAppIoServicer.PushObject")
|
|
247
277
|
|
|
248
278
|
# Init state and store
|
|
249
279
|
store = self.objectstore_factory.store()
|
|
@@ -265,7 +295,7 @@ class ClientAppIoServicer(clientappio_pb2_grpc.ClientAppIoServicer):
|
|
|
265
295
|
self, request: PullObjectRequest, context: grpc.ServicerContext
|
|
266
296
|
) -> PullObjectResponse:
|
|
267
297
|
"""Pull an object from the ObjectStore."""
|
|
268
|
-
log(DEBUG, "
|
|
298
|
+
log(DEBUG, "ClientAppIoServicer.PullObject")
|
|
269
299
|
|
|
270
300
|
# Init state and store
|
|
271
301
|
store = self.objectstore_factory.store()
|
|
@@ -285,7 +315,7 @@ class ClientAppIoServicer(clientappio_pb2_grpc.ClientAppIoServicer):
|
|
|
285
315
|
self, request: ConfirmMessageReceivedRequest, context: grpc.ServicerContext
|
|
286
316
|
) -> ConfirmMessageReceivedResponse:
|
|
287
317
|
"""Confirm message received."""
|
|
288
|
-
log(DEBUG, "
|
|
318
|
+
log(DEBUG, "ClientAppIoServicer.ConfirmMessageReceived")
|
|
289
319
|
|
|
290
320
|
# Init state and store
|
|
291
321
|
store = self.objectstore_factory.store()
|