flwr-nightly 1.19.0.dev20250521__py3-none-any.whl → 1.19.0.dev20250523__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (28) hide show
  1. flwr/client/grpc_adapter_client/connection.py +4 -4
  2. flwr/client/grpc_rere_client/connection.py +4 -4
  3. flwr/client/rest_client/connection.py +4 -4
  4. flwr/common/inflatable.py +23 -0
  5. flwr/common/inflatable_grpc_utils.py +2 -0
  6. flwr/compat/client/app.py +2 -2
  7. flwr/proto/run_pb2.py +19 -27
  8. flwr/proto/run_pb2.pyi +0 -51
  9. flwr/proto/serverappio_pb2.py +2 -2
  10. flwr/proto/serverappio_pb2_grpc.py +0 -34
  11. flwr/proto/serverappio_pb2_grpc.pyi +0 -13
  12. flwr/server/app.py +12 -1
  13. flwr/server/superlink/fleet/grpc_rere/fleet_servicer.py +6 -1
  14. flwr/server/superlink/serverappio/serverappio_grpc.py +3 -0
  15. flwr/server/superlink/serverappio/serverappio_servicer.py +7 -32
  16. flwr/supercore/object_store/__init__.py +23 -0
  17. flwr/supercore/object_store/in_memory_object_store.py +65 -0
  18. flwr/supercore/object_store/object_store.py +86 -0
  19. flwr/supercore/object_store/object_store_factory.py +44 -0
  20. flwr/{client/supernode → supernode/cli}/__init__.py +3 -5
  21. flwr/{client/supernode/app.py → supernode/cli/flower_supernode.py} +2 -10
  22. flwr/{client → supernode}/start_client_internal.py +179 -301
  23. {flwr_nightly-1.19.0.dev20250521.dist-info → flwr_nightly-1.19.0.dev20250523.dist-info}/METADATA +1 -1
  24. {flwr_nightly-1.19.0.dev20250521.dist-info → flwr_nightly-1.19.0.dev20250523.dist-info}/RECORD +28 -24
  25. {flwr_nightly-1.19.0.dev20250521.dist-info → flwr_nightly-1.19.0.dev20250523.dist-info}/entry_points.txt +1 -1
  26. /flwr/{client → compat/client}/grpc_client/__init__.py +0 -0
  27. /flwr/{client → compat/client}/grpc_client/connection.py +0 -0
  28. {flwr_nightly-1.19.0.dev20250521.dist-info → flwr_nightly-1.19.0.dev20250523.dist-info}/WHEEL +0 -0
@@ -20,11 +20,12 @@ import os
20
20
  import sys
21
21
  import threading
22
22
  import time
23
- from contextlib import AbstractContextManager
24
- from logging import ERROR, INFO, WARN
23
+ from collections.abc import Iterator
24
+ from contextlib import contextmanager
25
+ from logging import INFO, WARN
25
26
  from os import urandom
26
27
  from pathlib import Path
27
- from typing import Callable, Optional, Union, cast
28
+ from typing import Callable, Optional, Union
28
29
 
29
30
  import grpc
30
31
  from cryptography.hazmat.primitives.asymmetric import ec
@@ -32,32 +33,25 @@ from grpc import RpcError
32
33
 
33
34
  from flwr.app.error import Error
34
35
  from flwr.cli.config_utils import get_fab_metadata
35
- from flwr.cli.install import install_from_fab
36
- from flwr.client.client import Client
37
- from flwr.client.client_app import ClientApp, LoadClientAppError
38
36
  from flwr.client.clientapp.app import flwr_clientapp
39
37
  from flwr.client.clientapp.clientappio_servicer import (
40
38
  ClientAppInputs,
41
39
  ClientAppIoServicer,
42
40
  )
43
41
  from flwr.client.grpc_adapter_client.connection import grpc_adapter
44
- from flwr.client.grpc_client.connection import grpc_connection
45
42
  from flwr.client.grpc_rere_client.connection import grpc_request_response
46
43
  from flwr.client.message_handler.message_handler import handle_control_message
47
44
  from flwr.client.run_info_store import DeprecatedRunInfoStore
48
- from flwr.client.typing import ClientFnExt
49
- from flwr.common import GRPC_MAX_MESSAGE_LENGTH, Context, Message
45
+ from flwr.common import GRPC_MAX_MESSAGE_LENGTH, Message
50
46
  from flwr.common.address import parse_address
51
47
  from flwr.common.constant import (
52
48
  CLIENT_OCTET,
53
49
  CLIENTAPPIO_API_DEFAULT_SERVER_ADDRESS,
54
- ISOLATION_MODE_PROCESS,
55
50
  ISOLATION_MODE_SUBPROCESS,
56
51
  MAX_RETRY_DELAY,
57
52
  RUN_ID_NUM_BYTES,
58
53
  SERVER_OCTET,
59
54
  TRANSPORT_TYPE_GRPC_ADAPTER,
60
- TRANSPORT_TYPE_GRPC_BIDI,
61
55
  TRANSPORT_TYPE_GRPC_RERE,
62
56
  TRANSPORT_TYPE_REST,
63
57
  TRANSPORT_TYPES,
@@ -72,20 +66,6 @@ from flwr.proto.clientappio_pb2_grpc import add_ClientAppIoServicer_to_server
72
66
  from flwr.supernode.nodestate import NodeStateFactory
73
67
 
74
68
 
75
- def _check_actionable_client(
76
- client: Optional[Client], client_fn: Optional[ClientFnExt]
77
- ) -> None:
78
- if client_fn is None and client is None:
79
- raise ValueError(
80
- "Both `client_fn` and `client` are `None`, but one is required"
81
- )
82
-
83
- if client_fn is not None and client is not None:
84
- raise ValueError(
85
- "Both `client_fn` and `client` are provided, but only one is allowed"
86
- )
87
-
88
-
89
69
  # pylint: disable=import-outside-toplevel
90
70
  # pylint: disable=too-many-branches
91
71
  # pylint: disable=too-many-locals
@@ -95,21 +75,17 @@ def start_client_internal(
95
75
  *,
96
76
  server_address: str,
97
77
  node_config: UserConfig,
98
- load_client_app_fn: Optional[Callable[[str, str, str], ClientApp]] = None,
99
- client_fn: Optional[ClientFnExt] = None,
100
- client: Optional[Client] = None,
101
- grpc_max_message_length: int = GRPC_MAX_MESSAGE_LENGTH,
102
78
  root_certificates: Optional[Union[bytes, str]] = None,
103
79
  insecure: Optional[bool] = None,
104
- transport: Optional[str] = None,
80
+ transport: str,
105
81
  authentication_keys: Optional[
106
82
  tuple[ec.EllipticCurvePrivateKey, ec.EllipticCurvePublicKey]
107
83
  ] = None,
108
84
  max_retries: Optional[int] = None,
109
85
  max_wait_time: Optional[float] = None,
110
86
  flwr_path: Optional[Path] = None,
111
- isolation: Optional[str] = None,
112
- clientappio_api_address: Optional[str] = CLIENTAPPIO_API_DEFAULT_SERVER_ADDRESS,
87
+ isolation: str = ISOLATION_MODE_SUBPROCESS,
88
+ clientappio_api_address: str = CLIENTAPPIO_API_DEFAULT_SERVER_ADDRESS,
113
89
  ) -> None:
114
90
  """Start a Flower client node which connects to a Flower server.
115
91
 
@@ -121,20 +97,6 @@ def start_client_internal(
121
97
  would be `"[::]:8080"`.
122
98
  node_config: UserConfig
123
99
  The configuration of the node.
124
- load_client_app_fn : Optional[Callable[[], ClientApp]] (default: None)
125
- A function that can be used to load a `ClientApp` instance.
126
- client_fn : Optional[ClientFnExt]
127
- A callable that instantiates a Client. (default: None)
128
- client : Optional[flwr.client.Client]
129
- An implementation of the abstract base
130
- class `flwr.client.Client` (default: None)
131
- grpc_max_message_length : int (default: 536_870_912, this equals 512MB)
132
- The maximum length of gRPC messages that can be exchanged with the
133
- Flower server. The default should be sufficient for most models.
134
- Users who train very large models might need to increase this
135
- value. Note that the Flower server needs to be started with the
136
- same value (see `flwr.server.start_server`), otherwise it will not
137
- know about the increased limit and block larger messages.
138
100
  root_certificates : Optional[Union[bytes, str]] (default: None)
139
101
  The PEM-encoded root certificates as a byte string or a path string.
140
102
  If provided, a secure connection using the certificates will be
@@ -142,10 +104,10 @@ def start_client_internal(
142
104
  insecure : Optional[bool] (default: None)
143
105
  Starts an insecure gRPC connection when True. Enables HTTPS connection
144
106
  when False, using system certificates if `root_certificates` is None.
145
- transport : Optional[str] (default: None)
107
+ transport : str
146
108
  Configure the transport layer. Allowed values:
147
- - 'grpc-bidi': gRPC, bidirectional streaming
148
- - 'grpc-rere': gRPC, request-response (experimental)
109
+ - 'grpc-rere': gRPC, request-response
110
+ - 'grpc-adapter': gRPC via 3rd party adapter (experimental)
149
111
  - 'rest': HTTP (experimental)
150
112
  authentication_keys : Optional[Tuple[PrivateKey, PublicKey]] (default: None)
151
113
  Tuple containing the elliptic curve private key and public key for
@@ -162,98 +124,23 @@ def start_client_internal(
162
124
  If set to None, there is no limit to the total time.
163
125
  flwr_path: Optional[Path] (default: None)
164
126
  The fully resolved path containing installed Flower Apps.
165
- isolation : Optional[str] (default: None)
127
+ isolation : str (default: ISOLATION_MODE_SUBPROCESS)
166
128
  Isolation mode for `ClientApp`. Possible values are `subprocess` and
167
- `process`. Defaults to `None`, which runs the `ClientApp` in the same process
168
- as the SuperNode. If `subprocess`, the `ClientApp` runs in a subprocess started
129
+ `process`. If `subprocess`, the `ClientApp` runs in a subprocess started
169
130
  by the SueprNode and communicates using gRPC at the address
170
131
  `clientappio_api_address`. If `process`, the `ClientApp` runs in a separate
171
132
  isolated process and communicates using gRPC at the address
172
133
  `clientappio_api_address`.
173
- clientappio_api_address : Optional[str]
134
+ clientappio_api_address : str
174
135
  (default: `CLIENTAPPIO_API_DEFAULT_SERVER_ADDRESS`)
175
136
  The SuperNode gRPC server address.
176
137
  """
177
138
  if insecure is None:
178
139
  insecure = root_certificates is None
179
140
 
180
- if load_client_app_fn is None:
181
- _check_actionable_client(client, client_fn)
182
-
183
- if client_fn is None:
184
- # Wrap `Client` instance in `client_fn`
185
- def single_client_factory(
186
- context: Context, # pylint: disable=unused-argument
187
- ) -> Client:
188
- if client is None: # Added this to keep mypy happy
189
- raise ValueError(
190
- "Both `client_fn` and `client` are `None`, but one is required"
191
- )
192
- return client # Always return the same instance
193
-
194
- client_fn = single_client_factory
195
-
196
- def _load_client_app(_1: str, _2: str, _3: str) -> ClientApp:
197
- return ClientApp(client_fn=client_fn)
198
-
199
- load_client_app_fn = _load_client_app
200
-
201
- if isolation:
202
- if clientappio_api_address is None:
203
- raise ValueError(
204
- f"`clientappio_api_address` required when `isolation` is "
205
- f"{ISOLATION_MODE_SUBPROCESS} or {ISOLATION_MODE_PROCESS}",
206
- )
207
- _clientappio_grpc_server, clientappio_servicer = run_clientappio_api_grpc(
208
- address=clientappio_api_address,
209
- certificates=None,
210
- )
211
- clientappio_api_address = cast(str, clientappio_api_address)
212
-
213
- # At this point, only `load_client_app_fn` should be used
214
- # Both `client` and `client_fn` must not be used directly
215
-
216
- # Initialize connection context manager
217
- connection, address, connection_error_type = _init_connection(
218
- transport, server_address
219
- )
220
-
221
- def _on_sucess(retry_state: RetryState) -> None:
222
- if retry_state.tries > 1:
223
- log(
224
- INFO,
225
- "Connection successful after %.2f seconds and %s tries.",
226
- retry_state.elapsed_time,
227
- retry_state.tries,
228
- )
229
-
230
- def _on_backoff(retry_state: RetryState) -> None:
231
- if retry_state.tries == 1:
232
- log(WARN, "Connection attempt failed, retrying...")
233
- else:
234
- log(
235
- WARN,
236
- "Connection attempt failed, retrying in %.2f seconds",
237
- retry_state.actual_wait,
238
- )
239
-
240
- retry_invoker = RetryInvoker(
241
- wait_gen_factory=lambda: exponential(max_delay=MAX_RETRY_DELAY),
242
- recoverable_exceptions=connection_error_type,
243
- max_tries=max_retries + 1 if max_retries is not None else None,
244
- max_time=max_wait_time,
245
- on_giveup=lambda retry_state: (
246
- log(
247
- WARN,
248
- "Giving up reconnection after %.2f seconds and %s tries.",
249
- retry_state.elapsed_time,
250
- retry_state.tries,
251
- )
252
- if retry_state.tries > 1
253
- else None
254
- ),
255
- on_success=_on_sucess,
256
- on_backoff=_on_backoff,
141
+ _clientappio_grpc_server, clientappio_servicer = run_clientappio_api_grpc(
142
+ address=clientappio_api_address,
143
+ certificates=None,
257
144
  )
258
145
 
259
146
  # DeprecatedRunInfoStore gets initialized when the first connection is established
@@ -266,42 +153,28 @@ def start_client_internal(
266
153
 
267
154
  while True:
268
155
  sleep_duration: int = 0
269
- with connection(
270
- address,
271
- insecure,
272
- retry_invoker,
273
- grpc_max_message_length,
274
- root_certificates,
275
- authentication_keys,
156
+ with _init_connection(
157
+ transport=transport,
158
+ server_address=server_address,
159
+ insecure=insecure,
160
+ root_certificates=root_certificates,
161
+ authentication_keys=authentication_keys,
162
+ max_retries=max_retries,
163
+ max_wait_time=max_wait_time,
276
164
  ) as conn:
277
165
  receive, send, create_node, delete_node, get_run, get_fab = conn
278
166
 
279
167
  # Register node when connecting the first time
280
168
  if run_info_store is None:
281
- if create_node is None:
282
- if transport not in ["grpc-bidi", None]:
283
- raise NotImplementedError(
284
- "All transports except `grpc-bidi` require "
285
- "an implementation for `create_node()`.'"
286
- )
287
- # gRPC-bidi doesn't have the concept of node_id,
288
- # so we set it to -1
289
- run_info_store = DeprecatedRunInfoStore(
290
- node_id=-1,
291
- node_config={},
292
- )
293
- else:
294
- # Call create_node fn to register node
295
- # and store node_id in state
296
- if (node_id := create_node()) is None:
297
- raise ValueError(
298
- "Failed to register SuperNode with the SuperLink"
299
- )
300
- state.set_node_id(node_id)
301
- run_info_store = DeprecatedRunInfoStore(
302
- node_id=state.get_node_id(),
303
- node_config=node_config,
304
- )
169
+ # Call create_node fn to register node
170
+ # and store node_id in state
171
+ if (node_id := create_node()) is None:
172
+ raise ValueError("Failed to register SuperNode with the SuperLink")
173
+ state.set_node_id(node_id)
174
+ run_info_store = DeprecatedRunInfoStore(
175
+ node_id=state.get_node_id(),
176
+ node_config=node_config,
177
+ )
305
178
 
306
179
  # pylint: disable=too-many-nested-blocks
307
180
  while True:
@@ -336,18 +209,11 @@ def start_client_internal(
336
209
  # Get run info
337
210
  run_id = message.metadata.run_id
338
211
  if run_id not in runs:
339
- if get_run is not None:
340
- runs[run_id] = get_run(run_id)
341
- # If get_run is None, i.e., in grpc-bidi mode
342
- else:
343
- runs[run_id] = Run.create_empty(run_id=run_id)
212
+ runs[run_id] = get_run(run_id)
344
213
 
345
214
  run: Run = runs[run_id]
346
215
  if get_fab is not None and run.fab_hash:
347
216
  fab = get_fab(run.fab_hash, run_id)
348
- if not isolation:
349
- # If `ClientApp` runs in the same process, install the FAB
350
- install_from_fab(fab.content, flwr_path, True)
351
217
  fab_id, fab_version = get_fab_metadata(fab.content)
352
218
  else:
353
219
  fab = None
@@ -372,110 +238,68 @@ def start_client_internal(
372
238
  reply_to=message,
373
239
  )
374
240
 
375
- # Handle app loading and task message
376
- try:
377
- if isolation:
378
- # Two isolation modes:
379
- # 1. `subprocess`: SuperNode is starting the ClientApp
380
- # process as a subprocess.
381
- # 2. `process`: ClientApp process gets started separately
382
- # (via `flwr-clientapp`), for example, in a separate
383
- # Docker container.
384
-
385
- # Generate SuperNode token
386
- token = int.from_bytes(urandom(RUN_ID_NUM_BYTES), "little")
387
-
388
- # Mode 1: SuperNode starts ClientApp as subprocess
389
- start_subprocess = isolation == ISOLATION_MODE_SUBPROCESS
390
-
391
- # Share Message and Context with servicer
392
- clientappio_servicer.set_inputs(
393
- clientapp_input=ClientAppInputs(
394
- message=message,
395
- context=context,
396
- run=run,
397
- fab=fab,
398
- token=token,
399
- ),
400
- token_returned=start_subprocess,
401
- )
402
-
403
- if start_subprocess:
404
- _octet, _colon, _port = (
405
- clientappio_api_address.rpartition(":")
406
- )
407
- io_address = (
408
- f"{CLIENT_OCTET}:{_port}"
409
- if _octet == SERVER_OCTET
410
- else clientappio_api_address
411
- )
412
- # Start ClientApp subprocess
413
- command = [
414
- "flwr-clientapp",
415
- "--clientappio-api-address",
416
- io_address,
417
- "--token",
418
- str(token),
419
- ]
420
- command.append("--insecure")
421
-
422
- proc = mp_spawn_context.Process(
423
- target=_run_flwr_clientapp,
424
- args=(command, os.getpid()),
425
- daemon=True,
426
- )
427
- proc.start()
428
- proc.join()
429
- else:
430
- # Wait for output to become available
431
- while not clientappio_servicer.has_outputs():
432
- time.sleep(0.1)
433
-
434
- outputs = clientappio_servicer.get_outputs()
435
- reply_message, context = outputs.message, outputs.context
436
- else:
437
- # Load ClientApp instance
438
- client_app: ClientApp = load_client_app_fn(
439
- fab_id, fab_version, run.fab_hash
440
- )
441
-
442
- # Execute ClientApp
443
- reply_message = client_app(message=message, context=context)
444
- except Exception as ex: # pylint: disable=broad-exception-caught
445
-
446
- # Legacy grpc-bidi
447
- if transport in ["grpc-bidi", None]:
448
- log(ERROR, "Client raised an exception.", exc_info=ex)
449
- # Raise exception, crash process
450
- raise ex
451
-
452
- # Don't update/change DeprecatedRunInfoStore
453
-
454
- e_code = ErrorCode.CLIENT_APP_RAISED_EXCEPTION
455
- # Ex fmt: "<class 'ZeroDivisionError'>:<'division by zero'>"
456
- reason = str(type(ex)) + ":<'" + str(ex) + "'>"
457
- exc_entity = "ClientApp"
458
- if isinstance(ex, LoadClientAppError):
459
- reason = (
460
- "An exception was raised when attempting to load "
461
- "`ClientApp`"
462
- )
463
- e_code = ErrorCode.LOAD_CLIENT_APP_EXCEPTION
464
- exc_entity = "SuperNode"
465
-
466
- log(ERROR, "%s raised an exception", exc_entity, exc_info=ex)
467
-
468
- # Create error message
469
- reply_message = Message(
470
- Error(code=e_code, reason=reason),
471
- reply_to=message,
472
- )
473
- else:
474
- # No exception, update node state
475
- run_info_store.update_context(
476
- run_id=run_id,
241
+ # Two isolation modes:
242
+ # 1. `subprocess`: SuperNode is starting the ClientApp
243
+ # process as a subprocess.
244
+ # 2. `process`: ClientApp process gets started separately
245
+ # (via `flwr-clientapp`), for example, in a separate
246
+ # Docker container.
247
+
248
+ # Generate SuperNode token
249
+ token = int.from_bytes(urandom(RUN_ID_NUM_BYTES), "little")
250
+
251
+ # Mode 1: SuperNode starts ClientApp as subprocess
252
+ start_subprocess = isolation == ISOLATION_MODE_SUBPROCESS
253
+
254
+ # Share Message and Context with servicer
255
+ clientappio_servicer.set_inputs(
256
+ clientapp_input=ClientAppInputs(
257
+ message=message,
477
258
  context=context,
259
+ run=run,
260
+ fab=fab,
261
+ token=token,
262
+ ),
263
+ token_returned=start_subprocess,
264
+ )
265
+
266
+ if start_subprocess:
267
+ _octet, _colon, _port = clientappio_api_address.rpartition(":")
268
+ io_address = (
269
+ f"{CLIENT_OCTET}:{_port}"
270
+ if _octet == SERVER_OCTET
271
+ else clientappio_api_address
478
272
  )
273
+ # Start ClientApp subprocess
274
+ command = [
275
+ "flwr-clientapp",
276
+ "--clientappio-api-address",
277
+ io_address,
278
+ "--token",
279
+ str(token),
280
+ ]
281
+ command.append("--insecure")
282
+
283
+ proc = mp_spawn_context.Process(
284
+ target=_run_flwr_clientapp,
285
+ args=(command, os.getpid()),
286
+ daemon=True,
287
+ )
288
+ proc.start()
289
+ proc.join()
290
+ else:
291
+ # Wait for output to become available
292
+ while not clientappio_servicer.has_outputs():
293
+ time.sleep(0.1)
294
+
295
+ outputs = clientappio_servicer.get_outputs()
296
+ reply_message, context = outputs.message, outputs.context
297
+
298
+ # Update node state
299
+ run_info_store.update_context(
300
+ run_id=run_id,
301
+ context=context,
302
+ )
479
303
 
480
304
  # Send
481
305
  send(reply_message)
@@ -509,30 +333,28 @@ def start_client_internal(
509
333
  time.sleep(sleep_duration)
510
334
 
511
335
 
512
- def _init_connection(transport: Optional[str], server_address: str) -> tuple[
513
- Callable[
514
- [
515
- str,
516
- bool,
517
- RetryInvoker,
518
- int,
519
- Union[bytes, str, None],
520
- Optional[tuple[ec.EllipticCurvePrivateKey, ec.EllipticCurvePublicKey]],
521
- ],
522
- AbstractContextManager[
523
- tuple[
524
- Callable[[], Optional[Message]],
525
- Callable[[Message], None],
526
- Optional[Callable[[], Optional[int]]],
527
- Optional[Callable[[], None]],
528
- Optional[Callable[[int], Run]],
529
- Optional[Callable[[str, int], Fab]],
530
- ]
531
- ],
532
- ],
533
- str,
534
- type[Exception],
336
+ @contextmanager
337
+ def _init_connection( # pylint: disable=too-many-positional-arguments
338
+ transport: str,
339
+ server_address: str,
340
+ insecure: bool,
341
+ root_certificates: Optional[Union[bytes, str]] = None,
342
+ authentication_keys: Optional[
343
+ tuple[ec.EllipticCurvePrivateKey, ec.EllipticCurvePublicKey]
344
+ ] = None,
345
+ max_retries: Optional[int] = None,
346
+ max_wait_time: Optional[float] = None,
347
+ ) -> Iterator[
348
+ tuple[
349
+ Callable[[], Optional[Message]],
350
+ Callable[[Message], None],
351
+ Callable[[], Optional[int]],
352
+ Callable[[], None],
353
+ Callable[[int], Run],
354
+ Callable[[str, int], Fab],
355
+ ]
535
356
  ]:
357
+ """Establish a connection to the Fleet API server at SuperLink."""
536
358
  # Parse IP address
537
359
  parsed_address = parse_address(server_address)
538
360
  if not parsed_address:
@@ -543,10 +365,6 @@ def _init_connection(transport: Optional[str], server_address: str) -> tuple[
543
365
  host, port, is_v6 = parsed_address
544
366
  address = f"[{host}]:{port}" if is_v6 else f"{host}:{port}"
545
367
 
546
- # Set the default transport layer
547
- if transport is None:
548
- transport = TRANSPORT_TYPE_GRPC_BIDI
549
-
550
368
  # Use either gRPC bidirectional streaming or REST request/response
551
369
  if transport == TRANSPORT_TYPE_REST:
552
370
  try:
@@ -562,14 +380,74 @@ def _init_connection(transport: Optional[str], server_address: str) -> tuple[
562
380
  connection, error_type = grpc_request_response, RpcError
563
381
  elif transport == TRANSPORT_TYPE_GRPC_ADAPTER:
564
382
  connection, error_type = grpc_adapter, RpcError
565
- elif transport == TRANSPORT_TYPE_GRPC_BIDI:
566
- connection, error_type = grpc_connection, RpcError
567
383
  else:
568
384
  raise ValueError(
569
385
  f"Unknown transport type: {transport} (possible: {TRANSPORT_TYPES})"
570
386
  )
571
387
 
572
- return connection, address, error_type
388
+ # Create RetryInvoker
389
+ retry_invoker = _make_fleet_connection_retry_invoker(
390
+ max_retries=max_retries,
391
+ max_wait_time=max_wait_time,
392
+ connection_error_type=error_type,
393
+ )
394
+
395
+ # Establish connection
396
+ with connection(
397
+ address,
398
+ insecure,
399
+ retry_invoker,
400
+ GRPC_MAX_MESSAGE_LENGTH,
401
+ root_certificates,
402
+ authentication_keys,
403
+ ) as conn:
404
+ yield conn
405
+
406
+
407
+ def _make_fleet_connection_retry_invoker(
408
+ max_retries: Optional[int] = None,
409
+ max_wait_time: Optional[float] = None,
410
+ connection_error_type: type[Exception] = RpcError,
411
+ ) -> RetryInvoker:
412
+ """Create a retry invoker for fleet connection."""
413
+
414
+ def _on_success(retry_state: RetryState) -> None:
415
+ if retry_state.tries > 1:
416
+ log(
417
+ INFO,
418
+ "Connection successful after %.2f seconds and %s tries.",
419
+ retry_state.elapsed_time,
420
+ retry_state.tries,
421
+ )
422
+
423
+ def _on_backoff(retry_state: RetryState) -> None:
424
+ if retry_state.tries == 1:
425
+ log(WARN, "Connection attempt failed, retrying...")
426
+ else:
427
+ log(
428
+ WARN,
429
+ "Connection attempt failed, retrying in %.2f seconds",
430
+ retry_state.actual_wait,
431
+ )
432
+
433
+ return RetryInvoker(
434
+ wait_gen_factory=lambda: exponential(max_delay=MAX_RETRY_DELAY),
435
+ recoverable_exceptions=connection_error_type,
436
+ max_tries=max_retries + 1 if max_retries is not None else None,
437
+ max_time=max_wait_time,
438
+ on_giveup=lambda retry_state: (
439
+ log(
440
+ WARN,
441
+ "Giving up reconnection after %.2f seconds and %s tries.",
442
+ retry_state.elapsed_time,
443
+ retry_state.tries,
444
+ )
445
+ if retry_state.tries > 1
446
+ else None
447
+ ),
448
+ on_success=_on_success,
449
+ on_backoff=_on_backoff,
450
+ )
573
451
 
574
452
 
575
453
  def _run_flwr_clientapp(args: list[str], main_pid: int) -> None:
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: flwr-nightly
3
- Version: 1.19.0.dev20250521
3
+ Version: 1.19.0.dev20250523
4
4
  Summary: Flower: A Friendly Federated AI Framework
5
5
  License: Apache-2.0
6
6
  Keywords: Artificial Intelligence,Federated AI,Federated Analytics,Federated Evaluation,Federated Learning,Flower,Machine Learning