flwr-nightly 1.19.0.dev20250516__py3-none-any.whl → 1.19.0.dev20250521__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- flwr/app/__init__.py +15 -0
- flwr/app/error.py +68 -0
- flwr/app/metadata.py +223 -0
- flwr/client/__init__.py +2 -2
- flwr/client/client_app.py +1 -1
- flwr/client/clientapp/app.py +1 -1
- flwr/client/grpc_rere_client/connection.py +2 -1
- flwr/client/rest_client/connection.py +2 -1
- flwr/client/start_client_internal.py +608 -0
- flwr/client/supernode/app.py +1 -1
- flwr/clientapp/__init__.py +15 -0
- flwr/common/__init__.py +2 -2
- flwr/common/inflatable_grpc_utils.py +97 -0
- flwr/common/message.py +87 -245
- flwr/common/record/array.py +1 -1
- flwr/common/record/configrecord.py +1 -1
- flwr/common/serde.py +9 -54
- flwr/common/serde_utils.py +50 -0
- flwr/compat/__init__.py +15 -0
- flwr/compat/client/__init__.py +15 -0
- flwr/{client → compat/client}/app.py +13 -11
- flwr/compat/common/__init__.py +15 -0
- flwr/compat/server/__init__.py +15 -0
- flwr/compat/simulation/__init__.py +15 -0
- flwr/server/superlink/fleet/vce/vce_api.py +1 -1
- flwr/server/superlink/linkstate/sqlite_linkstate.py +2 -6
- flwr/serverapp/__init__.py +15 -0
- flwr/supercore/__init__.py +15 -0
- flwr/superlink/__init__.py +15 -0
- flwr/supernode/__init__.py +15 -0
- flwr/{client → supernode}/nodestate/in_memory_nodestate.py +1 -1
- {flwr_nightly-1.19.0.dev20250516.dist-info → flwr_nightly-1.19.0.dev20250521.dist-info}/METADATA +1 -1
- {flwr_nightly-1.19.0.dev20250516.dist-info → flwr_nightly-1.19.0.dev20250521.dist-info}/RECORD +38 -23
- /flwr/{client → supernode}/nodestate/__init__.py +0 -0
- /flwr/{client → supernode}/nodestate/nodestate.py +0 -0
- /flwr/{client → supernode}/nodestate/nodestate_factory.py +0 -0
- {flwr_nightly-1.19.0.dev20250516.dist-info → flwr_nightly-1.19.0.dev20250521.dist-info}/WHEEL +0 -0
- {flwr_nightly-1.19.0.dev20250516.dist-info → flwr_nightly-1.19.0.dev20250521.dist-info}/entry_points.txt +0 -0
@@ -0,0 +1,608 @@
|
|
1
|
+
# Copyright 2025 Flower Labs GmbH. All Rights Reserved.
|
2
|
+
#
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
4
|
+
# you may not use this file except in compliance with the License.
|
5
|
+
# You may obtain a copy of the License at
|
6
|
+
#
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
8
|
+
#
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
12
|
+
# See the License for the specific language governing permissions and
|
13
|
+
# limitations under the License.
|
14
|
+
# ==============================================================================
|
15
|
+
"""Main loop for Flower SuperNode."""
|
16
|
+
|
17
|
+
|
18
|
+
import multiprocessing
|
19
|
+
import os
|
20
|
+
import sys
|
21
|
+
import threading
|
22
|
+
import time
|
23
|
+
from contextlib import AbstractContextManager
|
24
|
+
from logging import ERROR, INFO, WARN
|
25
|
+
from os import urandom
|
26
|
+
from pathlib import Path
|
27
|
+
from typing import Callable, Optional, Union, cast
|
28
|
+
|
29
|
+
import grpc
|
30
|
+
from cryptography.hazmat.primitives.asymmetric import ec
|
31
|
+
from grpc import RpcError
|
32
|
+
|
33
|
+
from flwr.app.error import Error
|
34
|
+
from flwr.cli.config_utils import get_fab_metadata
|
35
|
+
from flwr.cli.install import install_from_fab
|
36
|
+
from flwr.client.client import Client
|
37
|
+
from flwr.client.client_app import ClientApp, LoadClientAppError
|
38
|
+
from flwr.client.clientapp.app import flwr_clientapp
|
39
|
+
from flwr.client.clientapp.clientappio_servicer import (
|
40
|
+
ClientAppInputs,
|
41
|
+
ClientAppIoServicer,
|
42
|
+
)
|
43
|
+
from flwr.client.grpc_adapter_client.connection import grpc_adapter
|
44
|
+
from flwr.client.grpc_client.connection import grpc_connection
|
45
|
+
from flwr.client.grpc_rere_client.connection import grpc_request_response
|
46
|
+
from flwr.client.message_handler.message_handler import handle_control_message
|
47
|
+
from flwr.client.run_info_store import DeprecatedRunInfoStore
|
48
|
+
from flwr.client.typing import ClientFnExt
|
49
|
+
from flwr.common import GRPC_MAX_MESSAGE_LENGTH, Context, Message
|
50
|
+
from flwr.common.address import parse_address
|
51
|
+
from flwr.common.constant import (
|
52
|
+
CLIENT_OCTET,
|
53
|
+
CLIENTAPPIO_API_DEFAULT_SERVER_ADDRESS,
|
54
|
+
ISOLATION_MODE_PROCESS,
|
55
|
+
ISOLATION_MODE_SUBPROCESS,
|
56
|
+
MAX_RETRY_DELAY,
|
57
|
+
RUN_ID_NUM_BYTES,
|
58
|
+
SERVER_OCTET,
|
59
|
+
TRANSPORT_TYPE_GRPC_ADAPTER,
|
60
|
+
TRANSPORT_TYPE_GRPC_BIDI,
|
61
|
+
TRANSPORT_TYPE_GRPC_RERE,
|
62
|
+
TRANSPORT_TYPE_REST,
|
63
|
+
TRANSPORT_TYPES,
|
64
|
+
ErrorCode,
|
65
|
+
)
|
66
|
+
from flwr.common.exit import ExitCode, flwr_exit
|
67
|
+
from flwr.common.grpc import generic_create_grpc_server
|
68
|
+
from flwr.common.logger import log
|
69
|
+
from flwr.common.retry_invoker import RetryInvoker, RetryState, exponential
|
70
|
+
from flwr.common.typing import Fab, Run, RunNotRunningException, UserConfig
|
71
|
+
from flwr.proto.clientappio_pb2_grpc import add_ClientAppIoServicer_to_server
|
72
|
+
from flwr.supernode.nodestate import NodeStateFactory
|
73
|
+
|
74
|
+
|
75
|
+
def _check_actionable_client(
|
76
|
+
client: Optional[Client], client_fn: Optional[ClientFnExt]
|
77
|
+
) -> None:
|
78
|
+
if client_fn is None and client is None:
|
79
|
+
raise ValueError(
|
80
|
+
"Both `client_fn` and `client` are `None`, but one is required"
|
81
|
+
)
|
82
|
+
|
83
|
+
if client_fn is not None and client is not None:
|
84
|
+
raise ValueError(
|
85
|
+
"Both `client_fn` and `client` are provided, but only one is allowed"
|
86
|
+
)
|
87
|
+
|
88
|
+
|
89
|
+
# pylint: disable=import-outside-toplevel
|
90
|
+
# pylint: disable=too-many-branches
|
91
|
+
# pylint: disable=too-many-locals
|
92
|
+
# pylint: disable=too-many-statements
|
93
|
+
# pylint: disable=too-many-arguments
|
94
|
+
def start_client_internal(
|
95
|
+
*,
|
96
|
+
server_address: str,
|
97
|
+
node_config: UserConfig,
|
98
|
+
load_client_app_fn: Optional[Callable[[str, str, str], ClientApp]] = None,
|
99
|
+
client_fn: Optional[ClientFnExt] = None,
|
100
|
+
client: Optional[Client] = None,
|
101
|
+
grpc_max_message_length: int = GRPC_MAX_MESSAGE_LENGTH,
|
102
|
+
root_certificates: Optional[Union[bytes, str]] = None,
|
103
|
+
insecure: Optional[bool] = None,
|
104
|
+
transport: Optional[str] = None,
|
105
|
+
authentication_keys: Optional[
|
106
|
+
tuple[ec.EllipticCurvePrivateKey, ec.EllipticCurvePublicKey]
|
107
|
+
] = None,
|
108
|
+
max_retries: Optional[int] = None,
|
109
|
+
max_wait_time: Optional[float] = None,
|
110
|
+
flwr_path: Optional[Path] = None,
|
111
|
+
isolation: Optional[str] = None,
|
112
|
+
clientappio_api_address: Optional[str] = CLIENTAPPIO_API_DEFAULT_SERVER_ADDRESS,
|
113
|
+
) -> None:
|
114
|
+
"""Start a Flower client node which connects to a Flower server.
|
115
|
+
|
116
|
+
Parameters
|
117
|
+
----------
|
118
|
+
server_address : str
|
119
|
+
The IPv4 or IPv6 address of the server. If the Flower
|
120
|
+
server runs on the same machine on port 8080, then `server_address`
|
121
|
+
would be `"[::]:8080"`.
|
122
|
+
node_config: UserConfig
|
123
|
+
The configuration of the node.
|
124
|
+
load_client_app_fn : Optional[Callable[[], ClientApp]] (default: None)
|
125
|
+
A function that can be used to load a `ClientApp` instance.
|
126
|
+
client_fn : Optional[ClientFnExt]
|
127
|
+
A callable that instantiates a Client. (default: None)
|
128
|
+
client : Optional[flwr.client.Client]
|
129
|
+
An implementation of the abstract base
|
130
|
+
class `flwr.client.Client` (default: None)
|
131
|
+
grpc_max_message_length : int (default: 536_870_912, this equals 512MB)
|
132
|
+
The maximum length of gRPC messages that can be exchanged with the
|
133
|
+
Flower server. The default should be sufficient for most models.
|
134
|
+
Users who train very large models might need to increase this
|
135
|
+
value. Note that the Flower server needs to be started with the
|
136
|
+
same value (see `flwr.server.start_server`), otherwise it will not
|
137
|
+
know about the increased limit and block larger messages.
|
138
|
+
root_certificates : Optional[Union[bytes, str]] (default: None)
|
139
|
+
The PEM-encoded root certificates as a byte string or a path string.
|
140
|
+
If provided, a secure connection using the certificates will be
|
141
|
+
established to an SSL-enabled Flower server.
|
142
|
+
insecure : Optional[bool] (default: None)
|
143
|
+
Starts an insecure gRPC connection when True. Enables HTTPS connection
|
144
|
+
when False, using system certificates if `root_certificates` is None.
|
145
|
+
transport : Optional[str] (default: None)
|
146
|
+
Configure the transport layer. Allowed values:
|
147
|
+
- 'grpc-bidi': gRPC, bidirectional streaming
|
148
|
+
- 'grpc-rere': gRPC, request-response (experimental)
|
149
|
+
- 'rest': HTTP (experimental)
|
150
|
+
authentication_keys : Optional[Tuple[PrivateKey, PublicKey]] (default: None)
|
151
|
+
Tuple containing the elliptic curve private key and public key for
|
152
|
+
authentication from the cryptography library.
|
153
|
+
Source: https://cryptography.io/en/latest/hazmat/primitives/asymmetric/ec/
|
154
|
+
Used to establish an authenticated connection with the server.
|
155
|
+
max_retries: Optional[int] (default: None)
|
156
|
+
The maximum number of times the client will try to connect to the
|
157
|
+
server before giving up in case of a connection error. If set to None,
|
158
|
+
there is no limit to the number of tries.
|
159
|
+
max_wait_time: Optional[float] (default: None)
|
160
|
+
The maximum duration before the client stops trying to
|
161
|
+
connect to the server in case of connection error.
|
162
|
+
If set to None, there is no limit to the total time.
|
163
|
+
flwr_path: Optional[Path] (default: None)
|
164
|
+
The fully resolved path containing installed Flower Apps.
|
165
|
+
isolation : Optional[str] (default: None)
|
166
|
+
Isolation mode for `ClientApp`. Possible values are `subprocess` and
|
167
|
+
`process`. Defaults to `None`, which runs the `ClientApp` in the same process
|
168
|
+
as the SuperNode. If `subprocess`, the `ClientApp` runs in a subprocess started
|
169
|
+
by the SueprNode and communicates using gRPC at the address
|
170
|
+
`clientappio_api_address`. If `process`, the `ClientApp` runs in a separate
|
171
|
+
isolated process and communicates using gRPC at the address
|
172
|
+
`clientappio_api_address`.
|
173
|
+
clientappio_api_address : Optional[str]
|
174
|
+
(default: `CLIENTAPPIO_API_DEFAULT_SERVER_ADDRESS`)
|
175
|
+
The SuperNode gRPC server address.
|
176
|
+
"""
|
177
|
+
if insecure is None:
|
178
|
+
insecure = root_certificates is None
|
179
|
+
|
180
|
+
if load_client_app_fn is None:
|
181
|
+
_check_actionable_client(client, client_fn)
|
182
|
+
|
183
|
+
if client_fn is None:
|
184
|
+
# Wrap `Client` instance in `client_fn`
|
185
|
+
def single_client_factory(
|
186
|
+
context: Context, # pylint: disable=unused-argument
|
187
|
+
) -> Client:
|
188
|
+
if client is None: # Added this to keep mypy happy
|
189
|
+
raise ValueError(
|
190
|
+
"Both `client_fn` and `client` are `None`, but one is required"
|
191
|
+
)
|
192
|
+
return client # Always return the same instance
|
193
|
+
|
194
|
+
client_fn = single_client_factory
|
195
|
+
|
196
|
+
def _load_client_app(_1: str, _2: str, _3: str) -> ClientApp:
|
197
|
+
return ClientApp(client_fn=client_fn)
|
198
|
+
|
199
|
+
load_client_app_fn = _load_client_app
|
200
|
+
|
201
|
+
if isolation:
|
202
|
+
if clientappio_api_address is None:
|
203
|
+
raise ValueError(
|
204
|
+
f"`clientappio_api_address` required when `isolation` is "
|
205
|
+
f"{ISOLATION_MODE_SUBPROCESS} or {ISOLATION_MODE_PROCESS}",
|
206
|
+
)
|
207
|
+
_clientappio_grpc_server, clientappio_servicer = run_clientappio_api_grpc(
|
208
|
+
address=clientappio_api_address,
|
209
|
+
certificates=None,
|
210
|
+
)
|
211
|
+
clientappio_api_address = cast(str, clientappio_api_address)
|
212
|
+
|
213
|
+
# At this point, only `load_client_app_fn` should be used
|
214
|
+
# Both `client` and `client_fn` must not be used directly
|
215
|
+
|
216
|
+
# Initialize connection context manager
|
217
|
+
connection, address, connection_error_type = _init_connection(
|
218
|
+
transport, server_address
|
219
|
+
)
|
220
|
+
|
221
|
+
def _on_sucess(retry_state: RetryState) -> None:
|
222
|
+
if retry_state.tries > 1:
|
223
|
+
log(
|
224
|
+
INFO,
|
225
|
+
"Connection successful after %.2f seconds and %s tries.",
|
226
|
+
retry_state.elapsed_time,
|
227
|
+
retry_state.tries,
|
228
|
+
)
|
229
|
+
|
230
|
+
def _on_backoff(retry_state: RetryState) -> None:
|
231
|
+
if retry_state.tries == 1:
|
232
|
+
log(WARN, "Connection attempt failed, retrying...")
|
233
|
+
else:
|
234
|
+
log(
|
235
|
+
WARN,
|
236
|
+
"Connection attempt failed, retrying in %.2f seconds",
|
237
|
+
retry_state.actual_wait,
|
238
|
+
)
|
239
|
+
|
240
|
+
retry_invoker = RetryInvoker(
|
241
|
+
wait_gen_factory=lambda: exponential(max_delay=MAX_RETRY_DELAY),
|
242
|
+
recoverable_exceptions=connection_error_type,
|
243
|
+
max_tries=max_retries + 1 if max_retries is not None else None,
|
244
|
+
max_time=max_wait_time,
|
245
|
+
on_giveup=lambda retry_state: (
|
246
|
+
log(
|
247
|
+
WARN,
|
248
|
+
"Giving up reconnection after %.2f seconds and %s tries.",
|
249
|
+
retry_state.elapsed_time,
|
250
|
+
retry_state.tries,
|
251
|
+
)
|
252
|
+
if retry_state.tries > 1
|
253
|
+
else None
|
254
|
+
),
|
255
|
+
on_success=_on_sucess,
|
256
|
+
on_backoff=_on_backoff,
|
257
|
+
)
|
258
|
+
|
259
|
+
# DeprecatedRunInfoStore gets initialized when the first connection is established
|
260
|
+
run_info_store: Optional[DeprecatedRunInfoStore] = None
|
261
|
+
state_factory = NodeStateFactory()
|
262
|
+
state = state_factory.state()
|
263
|
+
mp_spawn_context = multiprocessing.get_context("spawn")
|
264
|
+
|
265
|
+
runs: dict[int, Run] = {}
|
266
|
+
|
267
|
+
while True:
|
268
|
+
sleep_duration: int = 0
|
269
|
+
with connection(
|
270
|
+
address,
|
271
|
+
insecure,
|
272
|
+
retry_invoker,
|
273
|
+
grpc_max_message_length,
|
274
|
+
root_certificates,
|
275
|
+
authentication_keys,
|
276
|
+
) as conn:
|
277
|
+
receive, send, create_node, delete_node, get_run, get_fab = conn
|
278
|
+
|
279
|
+
# Register node when connecting the first time
|
280
|
+
if run_info_store is None:
|
281
|
+
if create_node is None:
|
282
|
+
if transport not in ["grpc-bidi", None]:
|
283
|
+
raise NotImplementedError(
|
284
|
+
"All transports except `grpc-bidi` require "
|
285
|
+
"an implementation for `create_node()`.'"
|
286
|
+
)
|
287
|
+
# gRPC-bidi doesn't have the concept of node_id,
|
288
|
+
# so we set it to -1
|
289
|
+
run_info_store = DeprecatedRunInfoStore(
|
290
|
+
node_id=-1,
|
291
|
+
node_config={},
|
292
|
+
)
|
293
|
+
else:
|
294
|
+
# Call create_node fn to register node
|
295
|
+
# and store node_id in state
|
296
|
+
if (node_id := create_node()) is None:
|
297
|
+
raise ValueError(
|
298
|
+
"Failed to register SuperNode with the SuperLink"
|
299
|
+
)
|
300
|
+
state.set_node_id(node_id)
|
301
|
+
run_info_store = DeprecatedRunInfoStore(
|
302
|
+
node_id=state.get_node_id(),
|
303
|
+
node_config=node_config,
|
304
|
+
)
|
305
|
+
|
306
|
+
# pylint: disable=too-many-nested-blocks
|
307
|
+
while True:
|
308
|
+
try:
|
309
|
+
# Receive
|
310
|
+
message = receive()
|
311
|
+
if message is None:
|
312
|
+
time.sleep(3) # Wait for 3s before asking again
|
313
|
+
continue
|
314
|
+
|
315
|
+
log(INFO, "")
|
316
|
+
if len(message.metadata.group_id) > 0:
|
317
|
+
log(
|
318
|
+
INFO,
|
319
|
+
"[RUN %s, ROUND %s]",
|
320
|
+
message.metadata.run_id,
|
321
|
+
message.metadata.group_id,
|
322
|
+
)
|
323
|
+
log(
|
324
|
+
INFO,
|
325
|
+
"Received: %s message %s",
|
326
|
+
message.metadata.message_type,
|
327
|
+
message.metadata.message_id,
|
328
|
+
)
|
329
|
+
|
330
|
+
# Handle control message
|
331
|
+
out_message, sleep_duration = handle_control_message(message)
|
332
|
+
if out_message:
|
333
|
+
send(out_message)
|
334
|
+
break
|
335
|
+
|
336
|
+
# Get run info
|
337
|
+
run_id = message.metadata.run_id
|
338
|
+
if run_id not in runs:
|
339
|
+
if get_run is not None:
|
340
|
+
runs[run_id] = get_run(run_id)
|
341
|
+
# If get_run is None, i.e., in grpc-bidi mode
|
342
|
+
else:
|
343
|
+
runs[run_id] = Run.create_empty(run_id=run_id)
|
344
|
+
|
345
|
+
run: Run = runs[run_id]
|
346
|
+
if get_fab is not None and run.fab_hash:
|
347
|
+
fab = get_fab(run.fab_hash, run_id)
|
348
|
+
if not isolation:
|
349
|
+
# If `ClientApp` runs in the same process, install the FAB
|
350
|
+
install_from_fab(fab.content, flwr_path, True)
|
351
|
+
fab_id, fab_version = get_fab_metadata(fab.content)
|
352
|
+
else:
|
353
|
+
fab = None
|
354
|
+
fab_id, fab_version = run.fab_id, run.fab_version
|
355
|
+
|
356
|
+
run.fab_id, run.fab_version = fab_id, fab_version
|
357
|
+
|
358
|
+
# Register context for this run
|
359
|
+
run_info_store.register_context(
|
360
|
+
run_id=run_id,
|
361
|
+
run=run,
|
362
|
+
flwr_path=flwr_path,
|
363
|
+
fab=fab,
|
364
|
+
)
|
365
|
+
|
366
|
+
# Retrieve context for this run
|
367
|
+
context = run_info_store.retrieve_context(run_id=run_id)
|
368
|
+
# Create an error reply message that will never be used to prevent
|
369
|
+
# the used-before-assignment linting error
|
370
|
+
reply_message = Message(
|
371
|
+
Error(code=ErrorCode.UNKNOWN, reason="Unknown"),
|
372
|
+
reply_to=message,
|
373
|
+
)
|
374
|
+
|
375
|
+
# Handle app loading and task message
|
376
|
+
try:
|
377
|
+
if isolation:
|
378
|
+
# Two isolation modes:
|
379
|
+
# 1. `subprocess`: SuperNode is starting the ClientApp
|
380
|
+
# process as a subprocess.
|
381
|
+
# 2. `process`: ClientApp process gets started separately
|
382
|
+
# (via `flwr-clientapp`), for example, in a separate
|
383
|
+
# Docker container.
|
384
|
+
|
385
|
+
# Generate SuperNode token
|
386
|
+
token = int.from_bytes(urandom(RUN_ID_NUM_BYTES), "little")
|
387
|
+
|
388
|
+
# Mode 1: SuperNode starts ClientApp as subprocess
|
389
|
+
start_subprocess = isolation == ISOLATION_MODE_SUBPROCESS
|
390
|
+
|
391
|
+
# Share Message and Context with servicer
|
392
|
+
clientappio_servicer.set_inputs(
|
393
|
+
clientapp_input=ClientAppInputs(
|
394
|
+
message=message,
|
395
|
+
context=context,
|
396
|
+
run=run,
|
397
|
+
fab=fab,
|
398
|
+
token=token,
|
399
|
+
),
|
400
|
+
token_returned=start_subprocess,
|
401
|
+
)
|
402
|
+
|
403
|
+
if start_subprocess:
|
404
|
+
_octet, _colon, _port = (
|
405
|
+
clientappio_api_address.rpartition(":")
|
406
|
+
)
|
407
|
+
io_address = (
|
408
|
+
f"{CLIENT_OCTET}:{_port}"
|
409
|
+
if _octet == SERVER_OCTET
|
410
|
+
else clientappio_api_address
|
411
|
+
)
|
412
|
+
# Start ClientApp subprocess
|
413
|
+
command = [
|
414
|
+
"flwr-clientapp",
|
415
|
+
"--clientappio-api-address",
|
416
|
+
io_address,
|
417
|
+
"--token",
|
418
|
+
str(token),
|
419
|
+
]
|
420
|
+
command.append("--insecure")
|
421
|
+
|
422
|
+
proc = mp_spawn_context.Process(
|
423
|
+
target=_run_flwr_clientapp,
|
424
|
+
args=(command, os.getpid()),
|
425
|
+
daemon=True,
|
426
|
+
)
|
427
|
+
proc.start()
|
428
|
+
proc.join()
|
429
|
+
else:
|
430
|
+
# Wait for output to become available
|
431
|
+
while not clientappio_servicer.has_outputs():
|
432
|
+
time.sleep(0.1)
|
433
|
+
|
434
|
+
outputs = clientappio_servicer.get_outputs()
|
435
|
+
reply_message, context = outputs.message, outputs.context
|
436
|
+
else:
|
437
|
+
# Load ClientApp instance
|
438
|
+
client_app: ClientApp = load_client_app_fn(
|
439
|
+
fab_id, fab_version, run.fab_hash
|
440
|
+
)
|
441
|
+
|
442
|
+
# Execute ClientApp
|
443
|
+
reply_message = client_app(message=message, context=context)
|
444
|
+
except Exception as ex: # pylint: disable=broad-exception-caught
|
445
|
+
|
446
|
+
# Legacy grpc-bidi
|
447
|
+
if transport in ["grpc-bidi", None]:
|
448
|
+
log(ERROR, "Client raised an exception.", exc_info=ex)
|
449
|
+
# Raise exception, crash process
|
450
|
+
raise ex
|
451
|
+
|
452
|
+
# Don't update/change DeprecatedRunInfoStore
|
453
|
+
|
454
|
+
e_code = ErrorCode.CLIENT_APP_RAISED_EXCEPTION
|
455
|
+
# Ex fmt: "<class 'ZeroDivisionError'>:<'division by zero'>"
|
456
|
+
reason = str(type(ex)) + ":<'" + str(ex) + "'>"
|
457
|
+
exc_entity = "ClientApp"
|
458
|
+
if isinstance(ex, LoadClientAppError):
|
459
|
+
reason = (
|
460
|
+
"An exception was raised when attempting to load "
|
461
|
+
"`ClientApp`"
|
462
|
+
)
|
463
|
+
e_code = ErrorCode.LOAD_CLIENT_APP_EXCEPTION
|
464
|
+
exc_entity = "SuperNode"
|
465
|
+
|
466
|
+
log(ERROR, "%s raised an exception", exc_entity, exc_info=ex)
|
467
|
+
|
468
|
+
# Create error message
|
469
|
+
reply_message = Message(
|
470
|
+
Error(code=e_code, reason=reason),
|
471
|
+
reply_to=message,
|
472
|
+
)
|
473
|
+
else:
|
474
|
+
# No exception, update node state
|
475
|
+
run_info_store.update_context(
|
476
|
+
run_id=run_id,
|
477
|
+
context=context,
|
478
|
+
)
|
479
|
+
|
480
|
+
# Send
|
481
|
+
send(reply_message)
|
482
|
+
log(INFO, "Sent reply")
|
483
|
+
|
484
|
+
except RunNotRunningException:
|
485
|
+
log(INFO, "")
|
486
|
+
log(
|
487
|
+
INFO,
|
488
|
+
"SuperNode aborted sending the reply message. "
|
489
|
+
"Run ID %s is not in `RUNNING` status.",
|
490
|
+
run_id,
|
491
|
+
)
|
492
|
+
log(INFO, "")
|
493
|
+
# pylint: enable=too-many-nested-blocks
|
494
|
+
|
495
|
+
# Unregister node
|
496
|
+
if delete_node is not None:
|
497
|
+
delete_node() # pylint: disable=not-callable
|
498
|
+
|
499
|
+
if sleep_duration == 0:
|
500
|
+
log(INFO, "Disconnect and shut down")
|
501
|
+
break
|
502
|
+
|
503
|
+
# Sleep and reconnect afterwards
|
504
|
+
log(
|
505
|
+
INFO,
|
506
|
+
"Disconnect, then re-establish connection after %s second(s)",
|
507
|
+
sleep_duration,
|
508
|
+
)
|
509
|
+
time.sleep(sleep_duration)
|
510
|
+
|
511
|
+
|
512
|
+
def _init_connection(transport: Optional[str], server_address: str) -> tuple[
|
513
|
+
Callable[
|
514
|
+
[
|
515
|
+
str,
|
516
|
+
bool,
|
517
|
+
RetryInvoker,
|
518
|
+
int,
|
519
|
+
Union[bytes, str, None],
|
520
|
+
Optional[tuple[ec.EllipticCurvePrivateKey, ec.EllipticCurvePublicKey]],
|
521
|
+
],
|
522
|
+
AbstractContextManager[
|
523
|
+
tuple[
|
524
|
+
Callable[[], Optional[Message]],
|
525
|
+
Callable[[Message], None],
|
526
|
+
Optional[Callable[[], Optional[int]]],
|
527
|
+
Optional[Callable[[], None]],
|
528
|
+
Optional[Callable[[int], Run]],
|
529
|
+
Optional[Callable[[str, int], Fab]],
|
530
|
+
]
|
531
|
+
],
|
532
|
+
],
|
533
|
+
str,
|
534
|
+
type[Exception],
|
535
|
+
]:
|
536
|
+
# Parse IP address
|
537
|
+
parsed_address = parse_address(server_address)
|
538
|
+
if not parsed_address:
|
539
|
+
flwr_exit(
|
540
|
+
ExitCode.COMMON_ADDRESS_INVALID,
|
541
|
+
f"SuperLink address ({server_address}) cannot be parsed.",
|
542
|
+
)
|
543
|
+
host, port, is_v6 = parsed_address
|
544
|
+
address = f"[{host}]:{port}" if is_v6 else f"{host}:{port}"
|
545
|
+
|
546
|
+
# Set the default transport layer
|
547
|
+
if transport is None:
|
548
|
+
transport = TRANSPORT_TYPE_GRPC_BIDI
|
549
|
+
|
550
|
+
# Use either gRPC bidirectional streaming or REST request/response
|
551
|
+
if transport == TRANSPORT_TYPE_REST:
|
552
|
+
try:
|
553
|
+
from requests.exceptions import ConnectionError as RequestsConnectionError
|
554
|
+
|
555
|
+
from flwr.client.rest_client.connection import http_request_response
|
556
|
+
except ModuleNotFoundError:
|
557
|
+
flwr_exit(ExitCode.COMMON_MISSING_EXTRA_REST)
|
558
|
+
if server_address[:4] != "http":
|
559
|
+
flwr_exit(ExitCode.SUPERNODE_REST_ADDRESS_INVALID)
|
560
|
+
connection, error_type = http_request_response, RequestsConnectionError
|
561
|
+
elif transport == TRANSPORT_TYPE_GRPC_RERE:
|
562
|
+
connection, error_type = grpc_request_response, RpcError
|
563
|
+
elif transport == TRANSPORT_TYPE_GRPC_ADAPTER:
|
564
|
+
connection, error_type = grpc_adapter, RpcError
|
565
|
+
elif transport == TRANSPORT_TYPE_GRPC_BIDI:
|
566
|
+
connection, error_type = grpc_connection, RpcError
|
567
|
+
else:
|
568
|
+
raise ValueError(
|
569
|
+
f"Unknown transport type: {transport} (possible: {TRANSPORT_TYPES})"
|
570
|
+
)
|
571
|
+
|
572
|
+
return connection, address, error_type
|
573
|
+
|
574
|
+
|
575
|
+
def _run_flwr_clientapp(args: list[str], main_pid: int) -> None:
|
576
|
+
# Monitor the main process in case of SIGKILL
|
577
|
+
def main_process_monitor() -> None:
|
578
|
+
while True:
|
579
|
+
time.sleep(1)
|
580
|
+
if os.getppid() != main_pid:
|
581
|
+
os.kill(os.getpid(), 9)
|
582
|
+
|
583
|
+
threading.Thread(target=main_process_monitor, daemon=True).start()
|
584
|
+
|
585
|
+
# Run the command
|
586
|
+
sys.argv = args
|
587
|
+
flwr_clientapp()
|
588
|
+
|
589
|
+
|
590
|
+
def run_clientappio_api_grpc(
|
591
|
+
address: str,
|
592
|
+
certificates: Optional[tuple[bytes, bytes, bytes]],
|
593
|
+
) -> tuple[grpc.Server, ClientAppIoServicer]:
|
594
|
+
"""Run ClientAppIo API gRPC server."""
|
595
|
+
clientappio_servicer: grpc.Server = ClientAppIoServicer()
|
596
|
+
clientappio_add_servicer_to_server_fn = add_ClientAppIoServicer_to_server
|
597
|
+
clientappio_grpc_server = generic_create_grpc_server(
|
598
|
+
servicer_and_add_fn=(
|
599
|
+
clientappio_servicer,
|
600
|
+
clientappio_add_servicer_to_server_fn,
|
601
|
+
),
|
602
|
+
server_address=address,
|
603
|
+
max_message_length=GRPC_MAX_MESSAGE_LENGTH,
|
604
|
+
certificates=certificates,
|
605
|
+
)
|
606
|
+
log(INFO, "Starting Flower ClientAppIo gRPC server on %s", address)
|
607
|
+
clientappio_grpc_server.start()
|
608
|
+
return clientappio_grpc_server, clientappio_servicer
|
flwr/client/supernode/app.py
CHANGED
@@ -43,8 +43,8 @@ from flwr.common.exit import ExitCode, flwr_exit
|
|
43
43
|
from flwr.common.exit_handlers import register_exit_handlers
|
44
44
|
from flwr.common.logger import log
|
45
45
|
|
46
|
-
from ..app import start_client_internal
|
47
46
|
from ..clientapp.utils import get_load_client_app_fn
|
47
|
+
from ..start_client_internal import start_client_internal
|
48
48
|
|
49
49
|
|
50
50
|
def run_supernode() -> None:
|
@@ -0,0 +1,15 @@
|
|
1
|
+
# Copyright 2025 Flower Labs GmbH. All Rights Reserved.
|
2
|
+
#
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
4
|
+
# you may not use this file except in compliance with the License.
|
5
|
+
# You may obtain a copy of the License at
|
6
|
+
#
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
8
|
+
#
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
12
|
+
# See the License for the specific language governing permissions and
|
13
|
+
# limitations under the License.
|
14
|
+
# ==============================================================================
|
15
|
+
"""Public Flower ClientApp APIs."""
|
flwr/common/__init__.py
CHANGED
@@ -15,6 +15,8 @@
|
|
15
15
|
"""Common components shared between server and client."""
|
16
16
|
|
17
17
|
|
18
|
+
from ..app.error import Error as Error
|
19
|
+
from ..app.metadata import Metadata as Metadata
|
18
20
|
from .constant import MessageType as MessageType
|
19
21
|
from .constant import MessageTypeLegacy as MessageTypeLegacy
|
20
22
|
from .context import Context as Context
|
@@ -23,9 +25,7 @@ from .grpc import GRPC_MAX_MESSAGE_LENGTH
|
|
23
25
|
from .logger import configure as configure
|
24
26
|
from .logger import log as log
|
25
27
|
from .message import DEFAULT_TTL
|
26
|
-
from .message import Error as Error
|
27
28
|
from .message import Message as Message
|
28
|
-
from .message import Metadata as Metadata
|
29
29
|
from .parameter import bytes_to_ndarray as bytes_to_ndarray
|
30
30
|
from .parameter import ndarray_to_bytes as ndarray_to_bytes
|
31
31
|
from .parameter import ndarrays_to_parameters as ndarrays_to_parameters
|