flwr-nightly 1.19.0.dev20250521__py3-none-any.whl → 1.19.0.dev20250523__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- flwr/client/grpc_adapter_client/connection.py +4 -4
- flwr/client/grpc_rere_client/connection.py +4 -4
- flwr/client/rest_client/connection.py +4 -4
- flwr/common/inflatable.py +23 -0
- flwr/common/inflatable_grpc_utils.py +2 -0
- flwr/compat/client/app.py +2 -2
- flwr/proto/run_pb2.py +19 -27
- flwr/proto/run_pb2.pyi +0 -51
- flwr/proto/serverappio_pb2.py +2 -2
- flwr/proto/serverappio_pb2_grpc.py +0 -34
- flwr/proto/serverappio_pb2_grpc.pyi +0 -13
- flwr/server/app.py +12 -1
- flwr/server/superlink/fleet/grpc_rere/fleet_servicer.py +6 -1
- flwr/server/superlink/serverappio/serverappio_grpc.py +3 -0
- flwr/server/superlink/serverappio/serverappio_servicer.py +7 -32
- flwr/supercore/object_store/__init__.py +23 -0
- flwr/supercore/object_store/in_memory_object_store.py +65 -0
- flwr/supercore/object_store/object_store.py +86 -0
- flwr/supercore/object_store/object_store_factory.py +44 -0
- flwr/{client/supernode → supernode/cli}/__init__.py +3 -5
- flwr/{client/supernode/app.py → supernode/cli/flower_supernode.py} +2 -10
- flwr/{client → supernode}/start_client_internal.py +179 -301
- {flwr_nightly-1.19.0.dev20250521.dist-info → flwr_nightly-1.19.0.dev20250523.dist-info}/METADATA +1 -1
- {flwr_nightly-1.19.0.dev20250521.dist-info → flwr_nightly-1.19.0.dev20250523.dist-info}/RECORD +28 -24
- {flwr_nightly-1.19.0.dev20250521.dist-info → flwr_nightly-1.19.0.dev20250523.dist-info}/entry_points.txt +1 -1
- /flwr/{client → compat/client}/grpc_client/__init__.py +0 -0
- /flwr/{client → compat/client}/grpc_client/connection.py +0 -0
- {flwr_nightly-1.19.0.dev20250521.dist-info → flwr_nightly-1.19.0.dev20250523.dist-info}/WHEEL +0 -0
@@ -20,11 +20,12 @@ import os
|
|
20
20
|
import sys
|
21
21
|
import threading
|
22
22
|
import time
|
23
|
-
from
|
24
|
-
from
|
23
|
+
from collections.abc import Iterator
|
24
|
+
from contextlib import contextmanager
|
25
|
+
from logging import INFO, WARN
|
25
26
|
from os import urandom
|
26
27
|
from pathlib import Path
|
27
|
-
from typing import Callable, Optional, Union
|
28
|
+
from typing import Callable, Optional, Union
|
28
29
|
|
29
30
|
import grpc
|
30
31
|
from cryptography.hazmat.primitives.asymmetric import ec
|
@@ -32,32 +33,25 @@ from grpc import RpcError
|
|
32
33
|
|
33
34
|
from flwr.app.error import Error
|
34
35
|
from flwr.cli.config_utils import get_fab_metadata
|
35
|
-
from flwr.cli.install import install_from_fab
|
36
|
-
from flwr.client.client import Client
|
37
|
-
from flwr.client.client_app import ClientApp, LoadClientAppError
|
38
36
|
from flwr.client.clientapp.app import flwr_clientapp
|
39
37
|
from flwr.client.clientapp.clientappio_servicer import (
|
40
38
|
ClientAppInputs,
|
41
39
|
ClientAppIoServicer,
|
42
40
|
)
|
43
41
|
from flwr.client.grpc_adapter_client.connection import grpc_adapter
|
44
|
-
from flwr.client.grpc_client.connection import grpc_connection
|
45
42
|
from flwr.client.grpc_rere_client.connection import grpc_request_response
|
46
43
|
from flwr.client.message_handler.message_handler import handle_control_message
|
47
44
|
from flwr.client.run_info_store import DeprecatedRunInfoStore
|
48
|
-
from flwr.
|
49
|
-
from flwr.common import GRPC_MAX_MESSAGE_LENGTH, Context, Message
|
45
|
+
from flwr.common import GRPC_MAX_MESSAGE_LENGTH, Message
|
50
46
|
from flwr.common.address import parse_address
|
51
47
|
from flwr.common.constant import (
|
52
48
|
CLIENT_OCTET,
|
53
49
|
CLIENTAPPIO_API_DEFAULT_SERVER_ADDRESS,
|
54
|
-
ISOLATION_MODE_PROCESS,
|
55
50
|
ISOLATION_MODE_SUBPROCESS,
|
56
51
|
MAX_RETRY_DELAY,
|
57
52
|
RUN_ID_NUM_BYTES,
|
58
53
|
SERVER_OCTET,
|
59
54
|
TRANSPORT_TYPE_GRPC_ADAPTER,
|
60
|
-
TRANSPORT_TYPE_GRPC_BIDI,
|
61
55
|
TRANSPORT_TYPE_GRPC_RERE,
|
62
56
|
TRANSPORT_TYPE_REST,
|
63
57
|
TRANSPORT_TYPES,
|
@@ -72,20 +66,6 @@ from flwr.proto.clientappio_pb2_grpc import add_ClientAppIoServicer_to_server
|
|
72
66
|
from flwr.supernode.nodestate import NodeStateFactory
|
73
67
|
|
74
68
|
|
75
|
-
def _check_actionable_client(
|
76
|
-
client: Optional[Client], client_fn: Optional[ClientFnExt]
|
77
|
-
) -> None:
|
78
|
-
if client_fn is None and client is None:
|
79
|
-
raise ValueError(
|
80
|
-
"Both `client_fn` and `client` are `None`, but one is required"
|
81
|
-
)
|
82
|
-
|
83
|
-
if client_fn is not None and client is not None:
|
84
|
-
raise ValueError(
|
85
|
-
"Both `client_fn` and `client` are provided, but only one is allowed"
|
86
|
-
)
|
87
|
-
|
88
|
-
|
89
69
|
# pylint: disable=import-outside-toplevel
|
90
70
|
# pylint: disable=too-many-branches
|
91
71
|
# pylint: disable=too-many-locals
|
@@ -95,21 +75,17 @@ def start_client_internal(
|
|
95
75
|
*,
|
96
76
|
server_address: str,
|
97
77
|
node_config: UserConfig,
|
98
|
-
load_client_app_fn: Optional[Callable[[str, str, str], ClientApp]] = None,
|
99
|
-
client_fn: Optional[ClientFnExt] = None,
|
100
|
-
client: Optional[Client] = None,
|
101
|
-
grpc_max_message_length: int = GRPC_MAX_MESSAGE_LENGTH,
|
102
78
|
root_certificates: Optional[Union[bytes, str]] = None,
|
103
79
|
insecure: Optional[bool] = None,
|
104
|
-
transport:
|
80
|
+
transport: str,
|
105
81
|
authentication_keys: Optional[
|
106
82
|
tuple[ec.EllipticCurvePrivateKey, ec.EllipticCurvePublicKey]
|
107
83
|
] = None,
|
108
84
|
max_retries: Optional[int] = None,
|
109
85
|
max_wait_time: Optional[float] = None,
|
110
86
|
flwr_path: Optional[Path] = None,
|
111
|
-
isolation:
|
112
|
-
clientappio_api_address:
|
87
|
+
isolation: str = ISOLATION_MODE_SUBPROCESS,
|
88
|
+
clientappio_api_address: str = CLIENTAPPIO_API_DEFAULT_SERVER_ADDRESS,
|
113
89
|
) -> None:
|
114
90
|
"""Start a Flower client node which connects to a Flower server.
|
115
91
|
|
@@ -121,20 +97,6 @@ def start_client_internal(
|
|
121
97
|
would be `"[::]:8080"`.
|
122
98
|
node_config: UserConfig
|
123
99
|
The configuration of the node.
|
124
|
-
load_client_app_fn : Optional[Callable[[], ClientApp]] (default: None)
|
125
|
-
A function that can be used to load a `ClientApp` instance.
|
126
|
-
client_fn : Optional[ClientFnExt]
|
127
|
-
A callable that instantiates a Client. (default: None)
|
128
|
-
client : Optional[flwr.client.Client]
|
129
|
-
An implementation of the abstract base
|
130
|
-
class `flwr.client.Client` (default: None)
|
131
|
-
grpc_max_message_length : int (default: 536_870_912, this equals 512MB)
|
132
|
-
The maximum length of gRPC messages that can be exchanged with the
|
133
|
-
Flower server. The default should be sufficient for most models.
|
134
|
-
Users who train very large models might need to increase this
|
135
|
-
value. Note that the Flower server needs to be started with the
|
136
|
-
same value (see `flwr.server.start_server`), otherwise it will not
|
137
|
-
know about the increased limit and block larger messages.
|
138
100
|
root_certificates : Optional[Union[bytes, str]] (default: None)
|
139
101
|
The PEM-encoded root certificates as a byte string or a path string.
|
140
102
|
If provided, a secure connection using the certificates will be
|
@@ -142,10 +104,10 @@ def start_client_internal(
|
|
142
104
|
insecure : Optional[bool] (default: None)
|
143
105
|
Starts an insecure gRPC connection when True. Enables HTTPS connection
|
144
106
|
when False, using system certificates if `root_certificates` is None.
|
145
|
-
transport :
|
107
|
+
transport : str
|
146
108
|
Configure the transport layer. Allowed values:
|
147
|
-
- 'grpc-
|
148
|
-
- 'grpc-
|
109
|
+
- 'grpc-rere': gRPC, request-response
|
110
|
+
- 'grpc-adapter': gRPC via 3rd party adapter (experimental)
|
149
111
|
- 'rest': HTTP (experimental)
|
150
112
|
authentication_keys : Optional[Tuple[PrivateKey, PublicKey]] (default: None)
|
151
113
|
Tuple containing the elliptic curve private key and public key for
|
@@ -162,98 +124,23 @@ def start_client_internal(
|
|
162
124
|
If set to None, there is no limit to the total time.
|
163
125
|
flwr_path: Optional[Path] (default: None)
|
164
126
|
The fully resolved path containing installed Flower Apps.
|
165
|
-
isolation :
|
127
|
+
isolation : str (default: ISOLATION_MODE_SUBPROCESS)
|
166
128
|
Isolation mode for `ClientApp`. Possible values are `subprocess` and
|
167
|
-
`process`.
|
168
|
-
as the SuperNode. If `subprocess`, the `ClientApp` runs in a subprocess started
|
129
|
+
`process`. If `subprocess`, the `ClientApp` runs in a subprocess started
|
169
130
|
by the SueprNode and communicates using gRPC at the address
|
170
131
|
`clientappio_api_address`. If `process`, the `ClientApp` runs in a separate
|
171
132
|
isolated process and communicates using gRPC at the address
|
172
133
|
`clientappio_api_address`.
|
173
|
-
clientappio_api_address :
|
134
|
+
clientappio_api_address : str
|
174
135
|
(default: `CLIENTAPPIO_API_DEFAULT_SERVER_ADDRESS`)
|
175
136
|
The SuperNode gRPC server address.
|
176
137
|
"""
|
177
138
|
if insecure is None:
|
178
139
|
insecure = root_certificates is None
|
179
140
|
|
180
|
-
|
181
|
-
|
182
|
-
|
183
|
-
if client_fn is None:
|
184
|
-
# Wrap `Client` instance in `client_fn`
|
185
|
-
def single_client_factory(
|
186
|
-
context: Context, # pylint: disable=unused-argument
|
187
|
-
) -> Client:
|
188
|
-
if client is None: # Added this to keep mypy happy
|
189
|
-
raise ValueError(
|
190
|
-
"Both `client_fn` and `client` are `None`, but one is required"
|
191
|
-
)
|
192
|
-
return client # Always return the same instance
|
193
|
-
|
194
|
-
client_fn = single_client_factory
|
195
|
-
|
196
|
-
def _load_client_app(_1: str, _2: str, _3: str) -> ClientApp:
|
197
|
-
return ClientApp(client_fn=client_fn)
|
198
|
-
|
199
|
-
load_client_app_fn = _load_client_app
|
200
|
-
|
201
|
-
if isolation:
|
202
|
-
if clientappio_api_address is None:
|
203
|
-
raise ValueError(
|
204
|
-
f"`clientappio_api_address` required when `isolation` is "
|
205
|
-
f"{ISOLATION_MODE_SUBPROCESS} or {ISOLATION_MODE_PROCESS}",
|
206
|
-
)
|
207
|
-
_clientappio_grpc_server, clientappio_servicer = run_clientappio_api_grpc(
|
208
|
-
address=clientappio_api_address,
|
209
|
-
certificates=None,
|
210
|
-
)
|
211
|
-
clientappio_api_address = cast(str, clientappio_api_address)
|
212
|
-
|
213
|
-
# At this point, only `load_client_app_fn` should be used
|
214
|
-
# Both `client` and `client_fn` must not be used directly
|
215
|
-
|
216
|
-
# Initialize connection context manager
|
217
|
-
connection, address, connection_error_type = _init_connection(
|
218
|
-
transport, server_address
|
219
|
-
)
|
220
|
-
|
221
|
-
def _on_sucess(retry_state: RetryState) -> None:
|
222
|
-
if retry_state.tries > 1:
|
223
|
-
log(
|
224
|
-
INFO,
|
225
|
-
"Connection successful after %.2f seconds and %s tries.",
|
226
|
-
retry_state.elapsed_time,
|
227
|
-
retry_state.tries,
|
228
|
-
)
|
229
|
-
|
230
|
-
def _on_backoff(retry_state: RetryState) -> None:
|
231
|
-
if retry_state.tries == 1:
|
232
|
-
log(WARN, "Connection attempt failed, retrying...")
|
233
|
-
else:
|
234
|
-
log(
|
235
|
-
WARN,
|
236
|
-
"Connection attempt failed, retrying in %.2f seconds",
|
237
|
-
retry_state.actual_wait,
|
238
|
-
)
|
239
|
-
|
240
|
-
retry_invoker = RetryInvoker(
|
241
|
-
wait_gen_factory=lambda: exponential(max_delay=MAX_RETRY_DELAY),
|
242
|
-
recoverable_exceptions=connection_error_type,
|
243
|
-
max_tries=max_retries + 1 if max_retries is not None else None,
|
244
|
-
max_time=max_wait_time,
|
245
|
-
on_giveup=lambda retry_state: (
|
246
|
-
log(
|
247
|
-
WARN,
|
248
|
-
"Giving up reconnection after %.2f seconds and %s tries.",
|
249
|
-
retry_state.elapsed_time,
|
250
|
-
retry_state.tries,
|
251
|
-
)
|
252
|
-
if retry_state.tries > 1
|
253
|
-
else None
|
254
|
-
),
|
255
|
-
on_success=_on_sucess,
|
256
|
-
on_backoff=_on_backoff,
|
141
|
+
_clientappio_grpc_server, clientappio_servicer = run_clientappio_api_grpc(
|
142
|
+
address=clientappio_api_address,
|
143
|
+
certificates=None,
|
257
144
|
)
|
258
145
|
|
259
146
|
# DeprecatedRunInfoStore gets initialized when the first connection is established
|
@@ -266,42 +153,28 @@ def start_client_internal(
|
|
266
153
|
|
267
154
|
while True:
|
268
155
|
sleep_duration: int = 0
|
269
|
-
with
|
270
|
-
|
271
|
-
|
272
|
-
|
273
|
-
|
274
|
-
|
275
|
-
|
156
|
+
with _init_connection(
|
157
|
+
transport=transport,
|
158
|
+
server_address=server_address,
|
159
|
+
insecure=insecure,
|
160
|
+
root_certificates=root_certificates,
|
161
|
+
authentication_keys=authentication_keys,
|
162
|
+
max_retries=max_retries,
|
163
|
+
max_wait_time=max_wait_time,
|
276
164
|
) as conn:
|
277
165
|
receive, send, create_node, delete_node, get_run, get_fab = conn
|
278
166
|
|
279
167
|
# Register node when connecting the first time
|
280
168
|
if run_info_store is None:
|
281
|
-
|
282
|
-
|
283
|
-
|
284
|
-
|
285
|
-
|
286
|
-
|
287
|
-
|
288
|
-
|
289
|
-
|
290
|
-
node_id=-1,
|
291
|
-
node_config={},
|
292
|
-
)
|
293
|
-
else:
|
294
|
-
# Call create_node fn to register node
|
295
|
-
# and store node_id in state
|
296
|
-
if (node_id := create_node()) is None:
|
297
|
-
raise ValueError(
|
298
|
-
"Failed to register SuperNode with the SuperLink"
|
299
|
-
)
|
300
|
-
state.set_node_id(node_id)
|
301
|
-
run_info_store = DeprecatedRunInfoStore(
|
302
|
-
node_id=state.get_node_id(),
|
303
|
-
node_config=node_config,
|
304
|
-
)
|
169
|
+
# Call create_node fn to register node
|
170
|
+
# and store node_id in state
|
171
|
+
if (node_id := create_node()) is None:
|
172
|
+
raise ValueError("Failed to register SuperNode with the SuperLink")
|
173
|
+
state.set_node_id(node_id)
|
174
|
+
run_info_store = DeprecatedRunInfoStore(
|
175
|
+
node_id=state.get_node_id(),
|
176
|
+
node_config=node_config,
|
177
|
+
)
|
305
178
|
|
306
179
|
# pylint: disable=too-many-nested-blocks
|
307
180
|
while True:
|
@@ -336,18 +209,11 @@ def start_client_internal(
|
|
336
209
|
# Get run info
|
337
210
|
run_id = message.metadata.run_id
|
338
211
|
if run_id not in runs:
|
339
|
-
|
340
|
-
runs[run_id] = get_run(run_id)
|
341
|
-
# If get_run is None, i.e., in grpc-bidi mode
|
342
|
-
else:
|
343
|
-
runs[run_id] = Run.create_empty(run_id=run_id)
|
212
|
+
runs[run_id] = get_run(run_id)
|
344
213
|
|
345
214
|
run: Run = runs[run_id]
|
346
215
|
if get_fab is not None and run.fab_hash:
|
347
216
|
fab = get_fab(run.fab_hash, run_id)
|
348
|
-
if not isolation:
|
349
|
-
# If `ClientApp` runs in the same process, install the FAB
|
350
|
-
install_from_fab(fab.content, flwr_path, True)
|
351
217
|
fab_id, fab_version = get_fab_metadata(fab.content)
|
352
218
|
else:
|
353
219
|
fab = None
|
@@ -372,110 +238,68 @@ def start_client_internal(
|
|
372
238
|
reply_to=message,
|
373
239
|
)
|
374
240
|
|
375
|
-
#
|
376
|
-
|
377
|
-
|
378
|
-
|
379
|
-
|
380
|
-
|
381
|
-
|
382
|
-
|
383
|
-
|
384
|
-
|
385
|
-
|
386
|
-
|
387
|
-
|
388
|
-
|
389
|
-
|
390
|
-
|
391
|
-
|
392
|
-
clientappio_servicer.set_inputs(
|
393
|
-
clientapp_input=ClientAppInputs(
|
394
|
-
message=message,
|
395
|
-
context=context,
|
396
|
-
run=run,
|
397
|
-
fab=fab,
|
398
|
-
token=token,
|
399
|
-
),
|
400
|
-
token_returned=start_subprocess,
|
401
|
-
)
|
402
|
-
|
403
|
-
if start_subprocess:
|
404
|
-
_octet, _colon, _port = (
|
405
|
-
clientappio_api_address.rpartition(":")
|
406
|
-
)
|
407
|
-
io_address = (
|
408
|
-
f"{CLIENT_OCTET}:{_port}"
|
409
|
-
if _octet == SERVER_OCTET
|
410
|
-
else clientappio_api_address
|
411
|
-
)
|
412
|
-
# Start ClientApp subprocess
|
413
|
-
command = [
|
414
|
-
"flwr-clientapp",
|
415
|
-
"--clientappio-api-address",
|
416
|
-
io_address,
|
417
|
-
"--token",
|
418
|
-
str(token),
|
419
|
-
]
|
420
|
-
command.append("--insecure")
|
421
|
-
|
422
|
-
proc = mp_spawn_context.Process(
|
423
|
-
target=_run_flwr_clientapp,
|
424
|
-
args=(command, os.getpid()),
|
425
|
-
daemon=True,
|
426
|
-
)
|
427
|
-
proc.start()
|
428
|
-
proc.join()
|
429
|
-
else:
|
430
|
-
# Wait for output to become available
|
431
|
-
while not clientappio_servicer.has_outputs():
|
432
|
-
time.sleep(0.1)
|
433
|
-
|
434
|
-
outputs = clientappio_servicer.get_outputs()
|
435
|
-
reply_message, context = outputs.message, outputs.context
|
436
|
-
else:
|
437
|
-
# Load ClientApp instance
|
438
|
-
client_app: ClientApp = load_client_app_fn(
|
439
|
-
fab_id, fab_version, run.fab_hash
|
440
|
-
)
|
441
|
-
|
442
|
-
# Execute ClientApp
|
443
|
-
reply_message = client_app(message=message, context=context)
|
444
|
-
except Exception as ex: # pylint: disable=broad-exception-caught
|
445
|
-
|
446
|
-
# Legacy grpc-bidi
|
447
|
-
if transport in ["grpc-bidi", None]:
|
448
|
-
log(ERROR, "Client raised an exception.", exc_info=ex)
|
449
|
-
# Raise exception, crash process
|
450
|
-
raise ex
|
451
|
-
|
452
|
-
# Don't update/change DeprecatedRunInfoStore
|
453
|
-
|
454
|
-
e_code = ErrorCode.CLIENT_APP_RAISED_EXCEPTION
|
455
|
-
# Ex fmt: "<class 'ZeroDivisionError'>:<'division by zero'>"
|
456
|
-
reason = str(type(ex)) + ":<'" + str(ex) + "'>"
|
457
|
-
exc_entity = "ClientApp"
|
458
|
-
if isinstance(ex, LoadClientAppError):
|
459
|
-
reason = (
|
460
|
-
"An exception was raised when attempting to load "
|
461
|
-
"`ClientApp`"
|
462
|
-
)
|
463
|
-
e_code = ErrorCode.LOAD_CLIENT_APP_EXCEPTION
|
464
|
-
exc_entity = "SuperNode"
|
465
|
-
|
466
|
-
log(ERROR, "%s raised an exception", exc_entity, exc_info=ex)
|
467
|
-
|
468
|
-
# Create error message
|
469
|
-
reply_message = Message(
|
470
|
-
Error(code=e_code, reason=reason),
|
471
|
-
reply_to=message,
|
472
|
-
)
|
473
|
-
else:
|
474
|
-
# No exception, update node state
|
475
|
-
run_info_store.update_context(
|
476
|
-
run_id=run_id,
|
241
|
+
# Two isolation modes:
|
242
|
+
# 1. `subprocess`: SuperNode is starting the ClientApp
|
243
|
+
# process as a subprocess.
|
244
|
+
# 2. `process`: ClientApp process gets started separately
|
245
|
+
# (via `flwr-clientapp`), for example, in a separate
|
246
|
+
# Docker container.
|
247
|
+
|
248
|
+
# Generate SuperNode token
|
249
|
+
token = int.from_bytes(urandom(RUN_ID_NUM_BYTES), "little")
|
250
|
+
|
251
|
+
# Mode 1: SuperNode starts ClientApp as subprocess
|
252
|
+
start_subprocess = isolation == ISOLATION_MODE_SUBPROCESS
|
253
|
+
|
254
|
+
# Share Message and Context with servicer
|
255
|
+
clientappio_servicer.set_inputs(
|
256
|
+
clientapp_input=ClientAppInputs(
|
257
|
+
message=message,
|
477
258
|
context=context,
|
259
|
+
run=run,
|
260
|
+
fab=fab,
|
261
|
+
token=token,
|
262
|
+
),
|
263
|
+
token_returned=start_subprocess,
|
264
|
+
)
|
265
|
+
|
266
|
+
if start_subprocess:
|
267
|
+
_octet, _colon, _port = clientappio_api_address.rpartition(":")
|
268
|
+
io_address = (
|
269
|
+
f"{CLIENT_OCTET}:{_port}"
|
270
|
+
if _octet == SERVER_OCTET
|
271
|
+
else clientappio_api_address
|
478
272
|
)
|
273
|
+
# Start ClientApp subprocess
|
274
|
+
command = [
|
275
|
+
"flwr-clientapp",
|
276
|
+
"--clientappio-api-address",
|
277
|
+
io_address,
|
278
|
+
"--token",
|
279
|
+
str(token),
|
280
|
+
]
|
281
|
+
command.append("--insecure")
|
282
|
+
|
283
|
+
proc = mp_spawn_context.Process(
|
284
|
+
target=_run_flwr_clientapp,
|
285
|
+
args=(command, os.getpid()),
|
286
|
+
daemon=True,
|
287
|
+
)
|
288
|
+
proc.start()
|
289
|
+
proc.join()
|
290
|
+
else:
|
291
|
+
# Wait for output to become available
|
292
|
+
while not clientappio_servicer.has_outputs():
|
293
|
+
time.sleep(0.1)
|
294
|
+
|
295
|
+
outputs = clientappio_servicer.get_outputs()
|
296
|
+
reply_message, context = outputs.message, outputs.context
|
297
|
+
|
298
|
+
# Update node state
|
299
|
+
run_info_store.update_context(
|
300
|
+
run_id=run_id,
|
301
|
+
context=context,
|
302
|
+
)
|
479
303
|
|
480
304
|
# Send
|
481
305
|
send(reply_message)
|
@@ -509,30 +333,28 @@ def start_client_internal(
|
|
509
333
|
time.sleep(sleep_duration)
|
510
334
|
|
511
335
|
|
512
|
-
|
513
|
-
|
514
|
-
|
515
|
-
|
516
|
-
|
517
|
-
|
518
|
-
|
519
|
-
|
520
|
-
|
521
|
-
|
522
|
-
|
523
|
-
|
524
|
-
|
525
|
-
|
526
|
-
|
527
|
-
|
528
|
-
|
529
|
-
|
530
|
-
|
531
|
-
|
532
|
-
],
|
533
|
-
str,
|
534
|
-
type[Exception],
|
336
|
+
@contextmanager
|
337
|
+
def _init_connection( # pylint: disable=too-many-positional-arguments
|
338
|
+
transport: str,
|
339
|
+
server_address: str,
|
340
|
+
insecure: bool,
|
341
|
+
root_certificates: Optional[Union[bytes, str]] = None,
|
342
|
+
authentication_keys: Optional[
|
343
|
+
tuple[ec.EllipticCurvePrivateKey, ec.EllipticCurvePublicKey]
|
344
|
+
] = None,
|
345
|
+
max_retries: Optional[int] = None,
|
346
|
+
max_wait_time: Optional[float] = None,
|
347
|
+
) -> Iterator[
|
348
|
+
tuple[
|
349
|
+
Callable[[], Optional[Message]],
|
350
|
+
Callable[[Message], None],
|
351
|
+
Callable[[], Optional[int]],
|
352
|
+
Callable[[], None],
|
353
|
+
Callable[[int], Run],
|
354
|
+
Callable[[str, int], Fab],
|
355
|
+
]
|
535
356
|
]:
|
357
|
+
"""Establish a connection to the Fleet API server at SuperLink."""
|
536
358
|
# Parse IP address
|
537
359
|
parsed_address = parse_address(server_address)
|
538
360
|
if not parsed_address:
|
@@ -543,10 +365,6 @@ def _init_connection(transport: Optional[str], server_address: str) -> tuple[
|
|
543
365
|
host, port, is_v6 = parsed_address
|
544
366
|
address = f"[{host}]:{port}" if is_v6 else f"{host}:{port}"
|
545
367
|
|
546
|
-
# Set the default transport layer
|
547
|
-
if transport is None:
|
548
|
-
transport = TRANSPORT_TYPE_GRPC_BIDI
|
549
|
-
|
550
368
|
# Use either gRPC bidirectional streaming or REST request/response
|
551
369
|
if transport == TRANSPORT_TYPE_REST:
|
552
370
|
try:
|
@@ -562,14 +380,74 @@ def _init_connection(transport: Optional[str], server_address: str) -> tuple[
|
|
562
380
|
connection, error_type = grpc_request_response, RpcError
|
563
381
|
elif transport == TRANSPORT_TYPE_GRPC_ADAPTER:
|
564
382
|
connection, error_type = grpc_adapter, RpcError
|
565
|
-
elif transport == TRANSPORT_TYPE_GRPC_BIDI:
|
566
|
-
connection, error_type = grpc_connection, RpcError
|
567
383
|
else:
|
568
384
|
raise ValueError(
|
569
385
|
f"Unknown transport type: {transport} (possible: {TRANSPORT_TYPES})"
|
570
386
|
)
|
571
387
|
|
572
|
-
|
388
|
+
# Create RetryInvoker
|
389
|
+
retry_invoker = _make_fleet_connection_retry_invoker(
|
390
|
+
max_retries=max_retries,
|
391
|
+
max_wait_time=max_wait_time,
|
392
|
+
connection_error_type=error_type,
|
393
|
+
)
|
394
|
+
|
395
|
+
# Establish connection
|
396
|
+
with connection(
|
397
|
+
address,
|
398
|
+
insecure,
|
399
|
+
retry_invoker,
|
400
|
+
GRPC_MAX_MESSAGE_LENGTH,
|
401
|
+
root_certificates,
|
402
|
+
authentication_keys,
|
403
|
+
) as conn:
|
404
|
+
yield conn
|
405
|
+
|
406
|
+
|
407
|
+
def _make_fleet_connection_retry_invoker(
|
408
|
+
max_retries: Optional[int] = None,
|
409
|
+
max_wait_time: Optional[float] = None,
|
410
|
+
connection_error_type: type[Exception] = RpcError,
|
411
|
+
) -> RetryInvoker:
|
412
|
+
"""Create a retry invoker for fleet connection."""
|
413
|
+
|
414
|
+
def _on_success(retry_state: RetryState) -> None:
|
415
|
+
if retry_state.tries > 1:
|
416
|
+
log(
|
417
|
+
INFO,
|
418
|
+
"Connection successful after %.2f seconds and %s tries.",
|
419
|
+
retry_state.elapsed_time,
|
420
|
+
retry_state.tries,
|
421
|
+
)
|
422
|
+
|
423
|
+
def _on_backoff(retry_state: RetryState) -> None:
|
424
|
+
if retry_state.tries == 1:
|
425
|
+
log(WARN, "Connection attempt failed, retrying...")
|
426
|
+
else:
|
427
|
+
log(
|
428
|
+
WARN,
|
429
|
+
"Connection attempt failed, retrying in %.2f seconds",
|
430
|
+
retry_state.actual_wait,
|
431
|
+
)
|
432
|
+
|
433
|
+
return RetryInvoker(
|
434
|
+
wait_gen_factory=lambda: exponential(max_delay=MAX_RETRY_DELAY),
|
435
|
+
recoverable_exceptions=connection_error_type,
|
436
|
+
max_tries=max_retries + 1 if max_retries is not None else None,
|
437
|
+
max_time=max_wait_time,
|
438
|
+
on_giveup=lambda retry_state: (
|
439
|
+
log(
|
440
|
+
WARN,
|
441
|
+
"Giving up reconnection after %.2f seconds and %s tries.",
|
442
|
+
retry_state.elapsed_time,
|
443
|
+
retry_state.tries,
|
444
|
+
)
|
445
|
+
if retry_state.tries > 1
|
446
|
+
else None
|
447
|
+
),
|
448
|
+
on_success=_on_success,
|
449
|
+
on_backoff=_on_backoff,
|
450
|
+
)
|
573
451
|
|
574
452
|
|
575
453
|
def _run_flwr_clientapp(args: list[str], main_pid: int) -> None:
|
{flwr_nightly-1.19.0.dev20250521.dist-info → flwr_nightly-1.19.0.dev20250523.dist-info}/METADATA
RENAMED
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.3
|
2
2
|
Name: flwr-nightly
|
3
|
-
Version: 1.19.0.
|
3
|
+
Version: 1.19.0.dev20250523
|
4
4
|
Summary: Flower: A Friendly Federated AI Framework
|
5
5
|
License: Apache-2.0
|
6
6
|
Keywords: Artificial Intelligence,Federated AI,Federated Analytics,Federated Evaluation,Federated Learning,Flower,Machine Learning
|