flwr 1.19.0__py3-none-any.whl → 1.21.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- flwr/__init__.py +4 -1
- flwr/app/__init__.py +28 -0
- flwr/app/exception.py +31 -0
- flwr/cli/auth_plugin/oidc_cli_plugin.py +4 -4
- flwr/cli/build.py +15 -5
- flwr/cli/cli_user_auth_interceptor.py +1 -1
- flwr/cli/config_utils.py +3 -3
- flwr/cli/constant.py +25 -8
- flwr/cli/log.py +9 -9
- flwr/cli/login/login.py +3 -3
- flwr/cli/ls.py +5 -5
- flwr/cli/new/new.py +23 -4
- flwr/cli/new/templates/app/README.flowertune.md.tpl +2 -0
- flwr/cli/new/templates/app/README.md.tpl +5 -0
- flwr/cli/new/templates/app/code/__init__.pytorch_msg_api.py.tpl +1 -0
- flwr/cli/new/templates/app/code/client.pytorch_msg_api.py.tpl +80 -0
- flwr/cli/new/templates/app/code/server.pytorch_msg_api.py.tpl +41 -0
- flwr/cli/new/templates/app/code/task.pytorch_msg_api.py.tpl +98 -0
- flwr/cli/new/templates/app/pyproject.baseline.toml.tpl +14 -3
- flwr/cli/new/templates/app/pyproject.flowertune.toml.tpl +13 -1
- flwr/cli/new/templates/app/pyproject.huggingface.toml.tpl +21 -2
- flwr/cli/new/templates/app/pyproject.jax.toml.tpl +18 -1
- flwr/cli/new/templates/app/pyproject.mlx.toml.tpl +19 -2
- flwr/cli/new/templates/app/pyproject.numpy.toml.tpl +18 -1
- flwr/cli/new/templates/app/pyproject.pytorch.toml.tpl +20 -3
- flwr/cli/new/templates/app/pyproject.pytorch_msg_api.toml.tpl +53 -0
- flwr/cli/new/templates/app/pyproject.sklearn.toml.tpl +18 -1
- flwr/cli/new/templates/app/pyproject.tensorflow.toml.tpl +18 -1
- flwr/cli/run/run.py +53 -50
- flwr/cli/stop.py +7 -4
- flwr/cli/utils.py +29 -11
- flwr/client/grpc_adapter_client/connection.py +11 -4
- flwr/client/grpc_rere_client/connection.py +93 -129
- flwr/client/rest_client/connection.py +134 -164
- flwr/clientapp/__init__.py +10 -0
- flwr/clientapp/mod/__init__.py +26 -0
- flwr/clientapp/mod/centraldp_mods.py +132 -0
- flwr/common/args.py +20 -6
- flwr/common/auth_plugin/__init__.py +4 -4
- flwr/common/auth_plugin/auth_plugin.py +7 -7
- flwr/common/constant.py +26 -5
- flwr/common/event_log_plugin/event_log_plugin.py +1 -1
- flwr/common/exit/__init__.py +4 -0
- flwr/common/exit/exit.py +8 -1
- flwr/common/exit/exit_code.py +42 -8
- flwr/common/exit/exit_handler.py +62 -0
- flwr/common/{exit_handlers.py → exit/signal_handler.py} +20 -37
- flwr/common/grpc.py +1 -1
- flwr/common/{inflatable_grpc_utils.py → inflatable_protobuf_utils.py} +52 -10
- flwr/common/inflatable_utils.py +191 -24
- flwr/common/logger.py +1 -1
- flwr/common/record/array.py +101 -22
- flwr/common/record/arraychunk.py +59 -0
- flwr/common/retry_invoker.py +30 -11
- flwr/common/serde.py +0 -28
- flwr/common/telemetry.py +4 -0
- flwr/compat/client/app.py +14 -31
- flwr/compat/server/app.py +2 -2
- flwr/proto/appio_pb2.py +51 -0
- flwr/proto/appio_pb2.pyi +195 -0
- flwr/proto/appio_pb2_grpc.py +4 -0
- flwr/proto/appio_pb2_grpc.pyi +4 -0
- flwr/proto/clientappio_pb2.py +4 -19
- flwr/proto/clientappio_pb2.pyi +0 -125
- flwr/proto/clientappio_pb2_grpc.py +269 -29
- flwr/proto/clientappio_pb2_grpc.pyi +114 -21
- flwr/proto/control_pb2.py +62 -0
- flwr/proto/{exec_pb2_grpc.py → control_pb2_grpc.py} +54 -54
- flwr/proto/{exec_pb2_grpc.pyi → control_pb2_grpc.pyi} +28 -28
- flwr/proto/fleet_pb2.py +12 -20
- flwr/proto/fleet_pb2.pyi +6 -36
- flwr/proto/serverappio_pb2.py +8 -31
- flwr/proto/serverappio_pb2.pyi +0 -152
- flwr/proto/serverappio_pb2_grpc.py +107 -38
- flwr/proto/serverappio_pb2_grpc.pyi +47 -20
- flwr/proto/simulationio_pb2.py +4 -11
- flwr/proto/simulationio_pb2.pyi +0 -58
- flwr/proto/simulationio_pb2_grpc.py +129 -27
- flwr/proto/simulationio_pb2_grpc.pyi +52 -13
- flwr/server/app.py +130 -153
- flwr/server/fleet_event_log_interceptor.py +4 -0
- flwr/server/grid/grpc_grid.py +94 -54
- flwr/server/grid/inmemory_grid.py +1 -0
- flwr/server/serverapp/app.py +165 -144
- flwr/server/superlink/fleet/grpc_adapter/grpc_adapter_servicer.py +8 -0
- flwr/server/superlink/fleet/grpc_rere/fleet_servicer.py +1 -1
- flwr/server/superlink/fleet/grpc_rere/server_interceptor.py +2 -5
- flwr/server/superlink/fleet/message_handler/message_handler.py +10 -16
- flwr/server/superlink/fleet/rest_rere/rest_api.py +1 -2
- flwr/server/superlink/fleet/vce/backend/raybackend.py +3 -1
- flwr/server/superlink/fleet/vce/vce_api.py +6 -6
- flwr/server/superlink/linkstate/in_memory_linkstate.py +34 -0
- flwr/server/superlink/linkstate/linkstate.py +2 -1
- flwr/server/superlink/linkstate/sqlite_linkstate.py +45 -0
- flwr/server/superlink/serverappio/serverappio_grpc.py +2 -2
- flwr/server/superlink/serverappio/serverappio_servicer.py +95 -48
- flwr/server/superlink/simulation/simulationio_grpc.py +1 -1
- flwr/server/superlink/simulation/simulationio_servicer.py +98 -22
- flwr/server/superlink/utils.py +0 -35
- flwr/serverapp/__init__.py +12 -0
- flwr/serverapp/dp_fixed_clipping.py +352 -0
- flwr/serverapp/exception.py +38 -0
- flwr/serverapp/strategy/__init__.py +38 -0
- flwr/serverapp/strategy/dp_fixed_clipping.py +352 -0
- flwr/serverapp/strategy/fedadagrad.py +162 -0
- flwr/serverapp/strategy/fedadam.py +181 -0
- flwr/serverapp/strategy/fedavg.py +295 -0
- flwr/serverapp/strategy/fedopt.py +218 -0
- flwr/serverapp/strategy/fedyogi.py +173 -0
- flwr/serverapp/strategy/result.py +105 -0
- flwr/serverapp/strategy/strategy.py +285 -0
- flwr/serverapp/strategy/strategy_utils.py +251 -0
- flwr/serverapp/strategy/strategy_utils_tests.py +304 -0
- flwr/simulation/app.py +159 -154
- flwr/simulation/run_simulation.py +17 -0
- flwr/supercore/app_utils.py +58 -0
- flwr/supercore/cli/__init__.py +22 -0
- flwr/supercore/cli/flower_superexec.py +141 -0
- flwr/supercore/corestate/__init__.py +22 -0
- flwr/supercore/corestate/corestate.py +81 -0
- flwr/{server/superlink → supercore}/ffs/disk_ffs.py +1 -1
- flwr/supercore/grpc_health/__init__.py +25 -0
- flwr/supercore/grpc_health/health_server.py +53 -0
- flwr/supercore/grpc_health/simple_health_servicer.py +38 -0
- flwr/supercore/license_plugin/__init__.py +22 -0
- flwr/supercore/license_plugin/license_plugin.py +26 -0
- flwr/supercore/object_store/in_memory_object_store.py +31 -31
- flwr/supercore/object_store/object_store.py +20 -42
- flwr/supercore/object_store/utils.py +43 -0
- flwr/{superexec → supercore/superexec}/__init__.py +1 -1
- flwr/supercore/superexec/plugin/__init__.py +28 -0
- flwr/supercore/superexec/plugin/base_exec_plugin.py +53 -0
- flwr/supercore/superexec/plugin/clientapp_exec_plugin.py +28 -0
- flwr/supercore/superexec/plugin/exec_plugin.py +71 -0
- flwr/supercore/superexec/plugin/serverapp_exec_plugin.py +28 -0
- flwr/supercore/superexec/plugin/simulation_exec_plugin.py +28 -0
- flwr/supercore/superexec/run_superexec.py +185 -0
- flwr/supercore/utils.py +32 -0
- flwr/superlink/servicer/__init__.py +15 -0
- flwr/superlink/servicer/control/__init__.py +22 -0
- flwr/{superexec/exec_event_log_interceptor.py → superlink/servicer/control/control_event_log_interceptor.py} +9 -5
- flwr/{superexec/exec_grpc.py → superlink/servicer/control/control_grpc.py} +39 -28
- flwr/superlink/servicer/control/control_license_interceptor.py +82 -0
- flwr/{superexec/exec_servicer.py → superlink/servicer/control/control_servicer.py} +79 -31
- flwr/{superexec/exec_user_auth_interceptor.py → superlink/servicer/control/control_user_auth_interceptor.py} +18 -10
- flwr/supernode/cli/flower_supernode.py +3 -7
- flwr/supernode/cli/flwr_clientapp.py +20 -16
- flwr/supernode/nodestate/in_memory_nodestate.py +13 -4
- flwr/supernode/nodestate/nodestate.py +3 -44
- flwr/supernode/runtime/run_clientapp.py +129 -115
- flwr/supernode/servicer/clientappio/__init__.py +1 -3
- flwr/supernode/servicer/clientappio/clientappio_servicer.py +217 -165
- flwr/supernode/start_client_internal.py +205 -148
- {flwr-1.19.0.dist-info → flwr-1.21.0.dist-info}/METADATA +5 -3
- {flwr-1.19.0.dist-info → flwr-1.21.0.dist-info}/RECORD +161 -117
- {flwr-1.19.0.dist-info → flwr-1.21.0.dist-info}/entry_points.txt +1 -0
- flwr/common/inflatable_rest_utils.py +0 -99
- flwr/proto/exec_pb2.py +0 -62
- flwr/superexec/app.py +0 -45
- flwr/superexec/deployment.py +0 -192
- flwr/superexec/executor.py +0 -100
- flwr/superexec/simulation.py +0 -130
- /flwr/proto/{exec_pb2.pyi → control_pb2.pyi} +0 -0
- /flwr/{server/superlink → supercore}/ffs/__init__.py +0 -0
- /flwr/{server/superlink → supercore}/ffs/ffs.py +0 -0
- /flwr/{server/superlink → supercore}/ffs/ffs_factory.py +0 -0
- {flwr-1.19.0.dist-info → flwr-1.21.0.dist-info}/WHEEL +0 -0
|
@@ -20,8 +20,8 @@ import subprocess
|
|
|
20
20
|
import time
|
|
21
21
|
from collections.abc import Iterator
|
|
22
22
|
from contextlib import contextmanager
|
|
23
|
-
from
|
|
24
|
-
from
|
|
23
|
+
from functools import partial
|
|
24
|
+
from logging import INFO
|
|
25
25
|
from pathlib import Path
|
|
26
26
|
from typing import Callable, Optional, Union, cast
|
|
27
27
|
|
|
@@ -35,27 +35,32 @@ from flwr.common import GRPC_MAX_MESSAGE_LENGTH, Context, Message, RecordDict
|
|
|
35
35
|
from flwr.common.address import parse_address
|
|
36
36
|
from flwr.common.config import get_flwr_dir, get_fused_config_from_fab
|
|
37
37
|
from flwr.common.constant import (
|
|
38
|
-
CLIENT_OCTET,
|
|
39
38
|
CLIENTAPPIO_API_DEFAULT_SERVER_ADDRESS,
|
|
40
39
|
ISOLATION_MODE_SUBPROCESS,
|
|
41
|
-
MAX_RETRY_DELAY,
|
|
42
|
-
RUN_ID_NUM_BYTES,
|
|
43
|
-
SERVER_OCTET,
|
|
44
40
|
TRANSPORT_TYPE_GRPC_ADAPTER,
|
|
45
41
|
TRANSPORT_TYPE_GRPC_RERE,
|
|
46
42
|
TRANSPORT_TYPE_REST,
|
|
47
43
|
TRANSPORT_TYPES,
|
|
44
|
+
ExecPluginType,
|
|
48
45
|
)
|
|
49
|
-
from flwr.common.exit import ExitCode, flwr_exit
|
|
46
|
+
from flwr.common.exit import ExitCode, flwr_exit, register_signal_handlers
|
|
50
47
|
from flwr.common.grpc import generic_create_grpc_server
|
|
48
|
+
from flwr.common.inflatable import iterate_object_tree
|
|
49
|
+
from flwr.common.inflatable_utils import (
|
|
50
|
+
pull_objects,
|
|
51
|
+
push_object_contents_from_iterable,
|
|
52
|
+
)
|
|
51
53
|
from flwr.common.logger import log
|
|
52
|
-
from flwr.common.retry_invoker import RetryInvoker,
|
|
54
|
+
from flwr.common.retry_invoker import RetryInvoker, _make_simple_grpc_retry_invoker
|
|
55
|
+
from flwr.common.telemetry import EventType
|
|
53
56
|
from flwr.common.typing import Fab, Run, RunNotRunningException, UserConfig
|
|
54
57
|
from flwr.proto.clientappio_pb2_grpc import add_ClientAppIoServicer_to_server
|
|
55
|
-
from flwr.
|
|
58
|
+
from flwr.proto.message_pb2 import ObjectTree # pylint: disable=E0611
|
|
59
|
+
from flwr.supercore.ffs import Ffs, FfsFactory
|
|
60
|
+
from flwr.supercore.grpc_health import run_health_server_grpc_no_tls
|
|
56
61
|
from flwr.supercore.object_store import ObjectStore, ObjectStoreFactory
|
|
57
62
|
from flwr.supernode.nodestate import NodeState, NodeStateFactory
|
|
58
|
-
from flwr.supernode.servicer.clientappio import
|
|
63
|
+
from flwr.supernode.servicer.clientappio import ClientAppIoServicer
|
|
59
64
|
|
|
60
65
|
DEFAULT_FFS_DIR = get_flwr_dir() / "supernode" / "ffs"
|
|
61
66
|
|
|
@@ -80,6 +85,7 @@ def start_client_internal(
|
|
|
80
85
|
flwr_path: Optional[Path] = None,
|
|
81
86
|
isolation: str = ISOLATION_MODE_SUBPROCESS,
|
|
82
87
|
clientappio_api_address: str = CLIENTAPPIO_API_DEFAULT_SERVER_ADDRESS,
|
|
88
|
+
health_server_address: Optional[str] = None,
|
|
83
89
|
) -> None:
|
|
84
90
|
"""Start a Flower client node which connects to a Flower server.
|
|
85
91
|
|
|
@@ -128,25 +134,55 @@ def start_client_internal(
|
|
|
128
134
|
clientappio_api_address : str
|
|
129
135
|
(default: `CLIENTAPPIO_API_DEFAULT_SERVER_ADDRESS`)
|
|
130
136
|
The SuperNode gRPC server address.
|
|
137
|
+
health_server_address : Optional[str] (default: None)
|
|
138
|
+
The address of the health server. If `None` is provided, the health server will
|
|
139
|
+
NOT be started.
|
|
131
140
|
"""
|
|
132
141
|
if insecure is None:
|
|
133
142
|
insecure = root_certificates is None
|
|
134
143
|
|
|
135
|
-
_clientappio_grpc_server, clientappio_servicer = run_clientappio_api_grpc(
|
|
136
|
-
address=clientappio_api_address,
|
|
137
|
-
certificates=None,
|
|
138
|
-
)
|
|
139
|
-
|
|
140
144
|
# Initialize factories
|
|
141
145
|
state_factory = NodeStateFactory()
|
|
142
146
|
ffs_factory = FfsFactory(get_flwr_dir(flwr_path) / "supernode" / "ffs") # type: ignore
|
|
143
147
|
object_store_factory = ObjectStoreFactory()
|
|
144
148
|
|
|
149
|
+
# Launch ClientAppIo API server
|
|
150
|
+
grpc_servers = []
|
|
151
|
+
clientappio_server = run_clientappio_api_grpc(
|
|
152
|
+
address=clientappio_api_address,
|
|
153
|
+
state_factory=state_factory,
|
|
154
|
+
ffs_factory=ffs_factory,
|
|
155
|
+
objectstore_factory=object_store_factory,
|
|
156
|
+
certificates=None,
|
|
157
|
+
)
|
|
158
|
+
grpc_servers.append(clientappio_server)
|
|
159
|
+
|
|
160
|
+
# Launch gRPC health server
|
|
161
|
+
if health_server_address is not None:
|
|
162
|
+
health_server = run_health_server_grpc_no_tls(health_server_address)
|
|
163
|
+
grpc_servers.append(health_server)
|
|
164
|
+
|
|
165
|
+
# Register handlers for graceful shutdown
|
|
166
|
+
register_signal_handlers(
|
|
167
|
+
event_type=EventType.RUN_SUPERNODE_LEAVE,
|
|
168
|
+
exit_message="SuperNode terminated gracefully.",
|
|
169
|
+
grpc_servers=grpc_servers,
|
|
170
|
+
)
|
|
171
|
+
|
|
145
172
|
# Initialize NodeState, Ffs, and ObjectStore
|
|
146
173
|
state = state_factory.state()
|
|
147
174
|
ffs = ffs_factory.ffs()
|
|
148
175
|
store = object_store_factory.store()
|
|
149
176
|
|
|
177
|
+
# Launch the SuperExec if the isolation mode is `subprocess`
|
|
178
|
+
if isolation == ISOLATION_MODE_SUBPROCESS:
|
|
179
|
+
command = ["flower-superexec", "--insecure"]
|
|
180
|
+
command += ["--appio-api-address", clientappio_api_address]
|
|
181
|
+
command += ["--plugin-type", ExecPluginType.CLIENT_APP]
|
|
182
|
+
command += ["--parent-pid", str(os.getpid())]
|
|
183
|
+
# pylint: disable-next=consider-using-with
|
|
184
|
+
subprocess.Popen(command)
|
|
185
|
+
|
|
150
186
|
with _init_connection(
|
|
151
187
|
transport=transport,
|
|
152
188
|
server_address=server_address,
|
|
@@ -156,7 +192,17 @@ def start_client_internal(
|
|
|
156
192
|
max_retries=max_retries,
|
|
157
193
|
max_wait_time=max_wait_time,
|
|
158
194
|
) as conn:
|
|
159
|
-
|
|
195
|
+
(
|
|
196
|
+
receive,
|
|
197
|
+
send,
|
|
198
|
+
create_node,
|
|
199
|
+
_,
|
|
200
|
+
get_run,
|
|
201
|
+
get_fab,
|
|
202
|
+
pull_object,
|
|
203
|
+
push_object,
|
|
204
|
+
confirm_message_received,
|
|
205
|
+
) = conn
|
|
160
206
|
|
|
161
207
|
# Call create_node fn to register node
|
|
162
208
|
# and store node_id in state
|
|
@@ -176,106 +222,34 @@ def start_client_internal(
|
|
|
176
222
|
receive=receive,
|
|
177
223
|
get_run=get_run,
|
|
178
224
|
get_fab=get_fab,
|
|
225
|
+
pull_object=pull_object,
|
|
226
|
+
confirm_message_received=confirm_message_received,
|
|
179
227
|
)
|
|
180
228
|
|
|
229
|
+
# No message has been pulled therefore we can skip the push stage.
|
|
181
230
|
if run_id is None:
|
|
182
|
-
|
|
231
|
+
# If no message was received, wait for a while
|
|
232
|
+
time.sleep(3)
|
|
183
233
|
continue
|
|
184
234
|
|
|
185
|
-
|
|
186
|
-
|
|
187
|
-
|
|
188
|
-
|
|
189
|
-
|
|
190
|
-
|
|
191
|
-
|
|
192
|
-
# Two isolation modes:
|
|
193
|
-
# 1. `subprocess`: SuperNode is starting the ClientApp
|
|
194
|
-
# process as a subprocess.
|
|
195
|
-
# 2. `process`: ClientApp process gets started separately
|
|
196
|
-
# (via `flwr-clientapp`), for example, in a separate
|
|
197
|
-
# Docker container.
|
|
198
|
-
|
|
199
|
-
# Generate SuperNode token
|
|
200
|
-
token = int.from_bytes(urandom(RUN_ID_NUM_BYTES), "little")
|
|
201
|
-
|
|
202
|
-
# Mode 1: SuperNode starts ClientApp as subprocess
|
|
203
|
-
start_subprocess = isolation == ISOLATION_MODE_SUBPROCESS
|
|
204
|
-
|
|
205
|
-
# Share Message and Context with servicer
|
|
206
|
-
clientappio_servicer.set_inputs(
|
|
207
|
-
clientapp_input=ClientAppInputs(
|
|
208
|
-
message=message,
|
|
209
|
-
context=context,
|
|
210
|
-
run=run,
|
|
211
|
-
fab=fab,
|
|
212
|
-
token=token,
|
|
213
|
-
),
|
|
214
|
-
token_returned=start_subprocess,
|
|
215
|
-
)
|
|
216
|
-
|
|
217
|
-
if start_subprocess:
|
|
218
|
-
_octet, _colon, _port = clientappio_api_address.rpartition(":")
|
|
219
|
-
io_address = (
|
|
220
|
-
f"{CLIENT_OCTET}:{_port}"
|
|
221
|
-
if _octet == SERVER_OCTET
|
|
222
|
-
else clientappio_api_address
|
|
223
|
-
)
|
|
224
|
-
# Start ClientApp subprocess
|
|
225
|
-
command = [
|
|
226
|
-
"flwr-clientapp",
|
|
227
|
-
"--clientappio-api-address",
|
|
228
|
-
io_address,
|
|
229
|
-
"--token",
|
|
230
|
-
str(token),
|
|
231
|
-
"--parent-pid",
|
|
232
|
-
str(os.getpid()),
|
|
233
|
-
"--insecure",
|
|
234
|
-
]
|
|
235
|
-
subprocess.run(command, check=False)
|
|
236
|
-
else:
|
|
237
|
-
# Wait for output to become available
|
|
238
|
-
while not clientappio_servicer.has_outputs():
|
|
239
|
-
time.sleep(0.1)
|
|
240
|
-
|
|
241
|
-
outputs = clientappio_servicer.get_outputs()
|
|
242
|
-
reply_message, context = outputs.message, outputs.context
|
|
243
|
-
|
|
244
|
-
# Update context in the state
|
|
245
|
-
state.store_context(context)
|
|
246
|
-
|
|
247
|
-
# Send
|
|
248
|
-
send(reply_message)
|
|
249
|
-
|
|
250
|
-
# Delete messages from the state
|
|
251
|
-
state.delete_messages(
|
|
252
|
-
message_ids=[
|
|
253
|
-
message.metadata.message_id,
|
|
254
|
-
message.metadata.reply_to_message_id,
|
|
255
|
-
]
|
|
256
|
-
)
|
|
257
|
-
|
|
258
|
-
log(INFO, "Sent reply")
|
|
259
|
-
|
|
260
|
-
except RunNotRunningException:
|
|
261
|
-
log(INFO, "")
|
|
262
|
-
log(
|
|
263
|
-
INFO,
|
|
264
|
-
"SuperNode aborted sending the reply message. "
|
|
265
|
-
"Run ID %s is not in `RUNNING` status.",
|
|
266
|
-
run_id,
|
|
267
|
-
)
|
|
268
|
-
log(INFO, "")
|
|
235
|
+
_push_messages(
|
|
236
|
+
state=state,
|
|
237
|
+
object_store=store,
|
|
238
|
+
send=send,
|
|
239
|
+
push_object=push_object,
|
|
240
|
+
)
|
|
269
241
|
|
|
270
242
|
|
|
271
243
|
def _pull_and_store_message( # pylint: disable=too-many-positional-arguments
|
|
272
244
|
state: NodeState,
|
|
273
245
|
ffs: Ffs,
|
|
274
|
-
object_store: ObjectStore,
|
|
246
|
+
object_store: ObjectStore,
|
|
275
247
|
node_config: UserConfig,
|
|
276
|
-
receive: Callable[[], Optional[Message]],
|
|
248
|
+
receive: Callable[[], Optional[tuple[Message, ObjectTree]]],
|
|
277
249
|
get_run: Callable[[int], Run],
|
|
278
250
|
get_fab: Callable[[str, int], Fab],
|
|
251
|
+
pull_object: Callable[[int, str], bytes],
|
|
252
|
+
confirm_message_received: Callable[[int, str], None],
|
|
279
253
|
) -> Optional[int]:
|
|
280
254
|
"""Pull a message from the SuperLink and store it in the state.
|
|
281
255
|
|
|
@@ -287,8 +261,9 @@ def _pull_and_store_message( # pylint: disable=too-many-positional-arguments
|
|
|
287
261
|
message = None
|
|
288
262
|
try:
|
|
289
263
|
# Pull message
|
|
290
|
-
if (
|
|
264
|
+
if (recv := receive()) is None:
|
|
291
265
|
return None
|
|
266
|
+
message, object_tree = recv
|
|
292
267
|
|
|
293
268
|
# Log message reception
|
|
294
269
|
log(INFO, "")
|
|
@@ -332,8 +307,23 @@ def _pull_and_store_message( # pylint: disable=too-many-positional-arguments
|
|
|
332
307
|
)
|
|
333
308
|
state.store_context(run_ctx)
|
|
334
309
|
|
|
335
|
-
#
|
|
310
|
+
# Preregister the object tree of the message
|
|
311
|
+
obj_ids_to_pull = object_store.preregister(run_id, object_tree)
|
|
312
|
+
|
|
313
|
+
# Store the message in the state (note this message has no content)
|
|
336
314
|
state.store_message(message)
|
|
315
|
+
|
|
316
|
+
# Pull and store objects of the message in the ObjectStore
|
|
317
|
+
obj_contents = pull_objects(
|
|
318
|
+
obj_ids_to_pull,
|
|
319
|
+
pull_object_fn=lambda obj_id: pull_object(run_id, obj_id),
|
|
320
|
+
)
|
|
321
|
+
for obj_id in list(obj_contents.keys()):
|
|
322
|
+
object_store.put(obj_id, obj_contents.pop(obj_id))
|
|
323
|
+
|
|
324
|
+
# Confirm that the message was received
|
|
325
|
+
confirm_message_received(run_id, message.metadata.message_id)
|
|
326
|
+
|
|
337
327
|
except RunNotRunningException:
|
|
338
328
|
if message is None:
|
|
339
329
|
log(
|
|
@@ -353,6 +343,93 @@ def _pull_and_store_message( # pylint: disable=too-many-positional-arguments
|
|
|
353
343
|
return run_id
|
|
354
344
|
|
|
355
345
|
|
|
346
|
+
def _push_messages(
|
|
347
|
+
state: NodeState,
|
|
348
|
+
object_store: ObjectStore,
|
|
349
|
+
send: Callable[[Message, ObjectTree], set[str]],
|
|
350
|
+
push_object: Callable[[int, str, bytes], None],
|
|
351
|
+
) -> None:
|
|
352
|
+
"""Push reply messages to the SuperLink."""
|
|
353
|
+
# This is to ensure that only one message is processed at a time
|
|
354
|
+
# Wait until a reply message is available
|
|
355
|
+
while not (reply_messages := state.get_messages(is_reply=True)):
|
|
356
|
+
time.sleep(0.5)
|
|
357
|
+
|
|
358
|
+
for message in reply_messages:
|
|
359
|
+
# Log message sending
|
|
360
|
+
log(INFO, "")
|
|
361
|
+
if message.metadata.group_id:
|
|
362
|
+
log(
|
|
363
|
+
INFO,
|
|
364
|
+
"[RUN %s, ROUND %s]",
|
|
365
|
+
message.metadata.run_id,
|
|
366
|
+
message.metadata.group_id,
|
|
367
|
+
)
|
|
368
|
+
else:
|
|
369
|
+
log(INFO, "[RUN %s]", message.metadata.run_id)
|
|
370
|
+
log(
|
|
371
|
+
INFO,
|
|
372
|
+
"Sending: %s message",
|
|
373
|
+
message.metadata.message_type,
|
|
374
|
+
)
|
|
375
|
+
|
|
376
|
+
# Get the object tree for the message
|
|
377
|
+
object_tree = object_store.get_object_tree(message.metadata.message_id)
|
|
378
|
+
|
|
379
|
+
# Define the iterator for yielding object contents
|
|
380
|
+
# This will yield (object_id, content) pairs
|
|
381
|
+
def yield_object_contents(
|
|
382
|
+
_obj_tree: ObjectTree, obj_id_set: set[str]
|
|
383
|
+
) -> Iterator[tuple[str, bytes]]:
|
|
384
|
+
for tree in iterate_object_tree(_obj_tree):
|
|
385
|
+
if tree.object_id not in obj_id_set:
|
|
386
|
+
continue
|
|
387
|
+
while (content := object_store.get(tree.object_id)) == b"":
|
|
388
|
+
# Wait for the content to be available
|
|
389
|
+
time.sleep(0.5)
|
|
390
|
+
# At this point, content is guaranteed to be available
|
|
391
|
+
# therefore we can yield it after casting it to bytes
|
|
392
|
+
yield tree.object_id, cast(bytes, content)
|
|
393
|
+
|
|
394
|
+
# Send the message
|
|
395
|
+
try:
|
|
396
|
+
# Send the reply message with its ObjectTree
|
|
397
|
+
# Get the IDs of objects to send
|
|
398
|
+
ids_obj_to_send = send(message, object_tree)
|
|
399
|
+
|
|
400
|
+
# Push object contents from the ObjectStore
|
|
401
|
+
run_id = message.metadata.run_id
|
|
402
|
+
push_object_contents_from_iterable(
|
|
403
|
+
yield_object_contents(object_tree, ids_obj_to_send),
|
|
404
|
+
# Use functools.partial to bind run_id explicitly,
|
|
405
|
+
# avoiding late binding issues and satisfying flake8 (B023)
|
|
406
|
+
# Equivalent to:
|
|
407
|
+
# lambda object_id, content: push_object(run_id, object_id, content)
|
|
408
|
+
push_object_fn=partial(push_object, run_id),
|
|
409
|
+
)
|
|
410
|
+
log(INFO, "Sent successfully")
|
|
411
|
+
except RunNotRunningException:
|
|
412
|
+
log(
|
|
413
|
+
INFO,
|
|
414
|
+
"Run ID %s is not in `RUNNING` status. Ignoring reply message %s.",
|
|
415
|
+
message.metadata.run_id,
|
|
416
|
+
message.metadata.message_id,
|
|
417
|
+
)
|
|
418
|
+
finally:
|
|
419
|
+
# Delete the message from the state
|
|
420
|
+
state.delete_messages(
|
|
421
|
+
message_ids=[
|
|
422
|
+
message.metadata.message_id,
|
|
423
|
+
message.metadata.reply_to_message_id,
|
|
424
|
+
]
|
|
425
|
+
)
|
|
426
|
+
|
|
427
|
+
# Delete all its objects from the ObjectStore
|
|
428
|
+
# No need to delete objects of the message it replies to, as it is
|
|
429
|
+
# already deleted when the ClientApp calls `ConfirmMessageReceived`
|
|
430
|
+
object_store.delete(message.metadata.message_id)
|
|
431
|
+
|
|
432
|
+
|
|
356
433
|
@contextmanager
|
|
357
434
|
def _init_connection( # pylint: disable=too-many-positional-arguments
|
|
358
435
|
transport: str,
|
|
@@ -366,12 +443,15 @@ def _init_connection( # pylint: disable=too-many-positional-arguments
|
|
|
366
443
|
max_wait_time: Optional[float] = None,
|
|
367
444
|
) -> Iterator[
|
|
368
445
|
tuple[
|
|
369
|
-
Callable[[], Optional[Message]],
|
|
370
|
-
Callable[[Message],
|
|
446
|
+
Callable[[], Optional[tuple[Message, ObjectTree]]],
|
|
447
|
+
Callable[[Message, ObjectTree], set[str]],
|
|
371
448
|
Callable[[], Optional[int]],
|
|
372
449
|
Callable[[], None],
|
|
373
450
|
Callable[[int], Run],
|
|
374
451
|
Callable[[str, int], Fab],
|
|
452
|
+
Callable[[int, str], bytes],
|
|
453
|
+
Callable[[int, str, bytes], None],
|
|
454
|
+
Callable[[int, str], None],
|
|
375
455
|
]
|
|
376
456
|
]:
|
|
377
457
|
"""Establish a connection to the Fleet API server at SuperLink."""
|
|
@@ -430,52 +510,29 @@ def _make_fleet_connection_retry_invoker(
|
|
|
430
510
|
connection_error_type: type[Exception] = RpcError,
|
|
431
511
|
) -> RetryInvoker:
|
|
432
512
|
"""Create a retry invoker for fleet connection."""
|
|
513
|
+
retry_invoker = _make_simple_grpc_retry_invoker()
|
|
514
|
+
retry_invoker.recoverable_exceptions = connection_error_type
|
|
515
|
+
if max_retries is not None:
|
|
516
|
+
retry_invoker.max_tries = max_retries + 1
|
|
517
|
+
if max_wait_time is not None:
|
|
518
|
+
retry_invoker.max_time = max_wait_time
|
|
433
519
|
|
|
434
|
-
|
|
435
|
-
if retry_state.tries > 1:
|
|
436
|
-
log(
|
|
437
|
-
INFO,
|
|
438
|
-
"Connection successful after %.2f seconds and %s tries.",
|
|
439
|
-
retry_state.elapsed_time,
|
|
440
|
-
retry_state.tries,
|
|
441
|
-
)
|
|
442
|
-
|
|
443
|
-
def _on_backoff(retry_state: RetryState) -> None:
|
|
444
|
-
if retry_state.tries == 1:
|
|
445
|
-
log(WARN, "Connection attempt failed, retrying...")
|
|
446
|
-
else:
|
|
447
|
-
log(
|
|
448
|
-
WARN,
|
|
449
|
-
"Connection attempt failed, retrying in %.2f seconds",
|
|
450
|
-
retry_state.actual_wait,
|
|
451
|
-
)
|
|
452
|
-
|
|
453
|
-
return RetryInvoker(
|
|
454
|
-
wait_gen_factory=lambda: exponential(max_delay=MAX_RETRY_DELAY),
|
|
455
|
-
recoverable_exceptions=connection_error_type,
|
|
456
|
-
max_tries=max_retries + 1 if max_retries is not None else None,
|
|
457
|
-
max_time=max_wait_time,
|
|
458
|
-
on_giveup=lambda retry_state: (
|
|
459
|
-
log(
|
|
460
|
-
WARN,
|
|
461
|
-
"Giving up reconnection after %.2f seconds and %s tries.",
|
|
462
|
-
retry_state.elapsed_time,
|
|
463
|
-
retry_state.tries,
|
|
464
|
-
)
|
|
465
|
-
if retry_state.tries > 1
|
|
466
|
-
else None
|
|
467
|
-
),
|
|
468
|
-
on_success=_on_success,
|
|
469
|
-
on_backoff=_on_backoff,
|
|
470
|
-
)
|
|
520
|
+
return retry_invoker
|
|
471
521
|
|
|
472
522
|
|
|
473
523
|
def run_clientappio_api_grpc(
|
|
474
524
|
address: str,
|
|
525
|
+
state_factory: NodeStateFactory,
|
|
526
|
+
ffs_factory: FfsFactory,
|
|
527
|
+
objectstore_factory: ObjectStoreFactory,
|
|
475
528
|
certificates: Optional[tuple[bytes, bytes, bytes]],
|
|
476
|
-
) ->
|
|
529
|
+
) -> grpc.Server:
|
|
477
530
|
"""Run ClientAppIo API gRPC server."""
|
|
478
|
-
clientappio_servicer: grpc.Server = ClientAppIoServicer(
|
|
531
|
+
clientappio_servicer: grpc.Server = ClientAppIoServicer(
|
|
532
|
+
state_factory=state_factory,
|
|
533
|
+
ffs_factory=ffs_factory,
|
|
534
|
+
objectstore_factory=objectstore_factory,
|
|
535
|
+
)
|
|
479
536
|
clientappio_add_servicer_to_server_fn = add_ClientAppIoServicer_to_server
|
|
480
537
|
clientappio_grpc_server = generic_create_grpc_server(
|
|
481
538
|
servicer_and_add_fn=(
|
|
@@ -486,6 +543,6 @@ def run_clientappio_api_grpc(
|
|
|
486
543
|
max_message_length=GRPC_MAX_MESSAGE_LENGTH,
|
|
487
544
|
certificates=certificates,
|
|
488
545
|
)
|
|
489
|
-
log(INFO, "Starting
|
|
546
|
+
log(INFO, "Flower Deployment Runtime: Starting ClientAppIo API on %s", address)
|
|
490
547
|
clientappio_grpc_server.start()
|
|
491
|
-
return clientappio_grpc_server
|
|
548
|
+
return clientappio_grpc_server
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.3
|
|
2
2
|
Name: flwr
|
|
3
|
-
Version: 1.
|
|
3
|
+
Version: 1.21.0
|
|
4
4
|
Summary: Flower: A Friendly Federated AI Framework
|
|
5
5
|
License: Apache-2.0
|
|
6
6
|
Keywords: Artificial Intelligence,Federated AI,Federated Analytics,Federated Evaluation,Federated Learning,Flower,Machine Learning
|
|
@@ -34,6 +34,7 @@ Provides-Extra: simulation
|
|
|
34
34
|
Requires-Dist: click (<8.2.0)
|
|
35
35
|
Requires-Dist: cryptography (>=44.0.1,<45.0.0)
|
|
36
36
|
Requires-Dist: grpcio (>=1.62.3,<2.0.0,!=1.65.0)
|
|
37
|
+
Requires-Dist: grpcio-health-checking (>=1.62.3,<2.0.0)
|
|
37
38
|
Requires-Dist: iterators (>=0.0.2,<0.0.3)
|
|
38
39
|
Requires-Dist: numpy (>=1.26.0,<3.0.0)
|
|
39
40
|
Requires-Dist: pathspec (>=0.12.1,<0.13.0)
|
|
@@ -171,8 +172,9 @@ Flower Baselines is a collection of community-contributed projects that reproduc
|
|
|
171
172
|
- [FedOpt](https://github.com/adap/flower/tree/main/baselines/flwr_baselines/flwr_baselines/publications/adaptive_federated_optimization)
|
|
172
173
|
|
|
173
174
|
Please refer to the [Flower Baselines Documentation](https://flower.ai/docs/baselines/) for a detailed categorization of baselines and for additional info including:
|
|
174
|
-
|
|
175
|
-
|
|
175
|
+
|
|
176
|
+
- [How to use Flower Baselines](https://flower.ai/docs/baselines/how-to-use-baselines.html)
|
|
177
|
+
- [How to contribute a new Flower Baseline](https://flower.ai/docs/baselines/how-to-contribute-baselines.html)
|
|
176
178
|
|
|
177
179
|
## Flower Usage Examples
|
|
178
180
|
|