flwr-nightly 1.15.0.dev20250108__py3-none-any.whl → 1.15.0.dev20250110__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- flwr/client/app.py +17 -9
- flwr/client/grpc_rere_client/client_interceptor.py +6 -0
- flwr/client/grpc_rere_client/grpc_adapter.py +16 -0
- flwr/common/grpc.py +154 -3
- flwr/proto/fleet_pb2.py +40 -27
- flwr/proto/fleet_pb2.pyi +84 -0
- flwr/proto/fleet_pb2_grpc.py +66 -0
- flwr/proto/fleet_pb2_grpc.pyi +20 -0
- flwr/server/app.py +42 -20
- flwr/server/superlink/driver/serverappio_grpc.py +1 -1
- flwr/server/superlink/driver/serverappio_servicer.py +22 -8
- flwr/server/superlink/fleet/grpc_bidi/grpc_server.py +2 -165
- flwr/server/superlink/fleet/grpc_rere/fleet_servicer.py +16 -0
- flwr/server/superlink/fleet/grpc_rere/server_interceptor.py +2 -1
- flwr/server/superlink/linkstate/in_memory_linkstate.py +26 -22
- flwr/server/superlink/linkstate/linkstate.py +10 -4
- flwr/server/superlink/linkstate/sqlite_linkstate.py +42 -20
- flwr/server/superlink/simulation/simulationio_grpc.py +1 -1
- flwr/superexec/exec_grpc.py +1 -1
- {flwr_nightly-1.15.0.dev20250108.dist-info → flwr_nightly-1.15.0.dev20250110.dist-info}/METADATA +2 -2
- {flwr_nightly-1.15.0.dev20250108.dist-info → flwr_nightly-1.15.0.dev20250110.dist-info}/RECORD +24 -24
- {flwr_nightly-1.15.0.dev20250108.dist-info → flwr_nightly-1.15.0.dev20250110.dist-info}/LICENSE +0 -0
- {flwr_nightly-1.15.0.dev20250108.dist-info → flwr_nightly-1.15.0.dev20250110.dist-info}/WHEEL +0 -0
- {flwr_nightly-1.15.0.dev20250108.dist-info → flwr_nightly-1.15.0.dev20250110.dist-info}/entry_points.txt +0 -0
flwr/proto/fleet_pb2_grpc.pyi
CHANGED
@@ -30,6 +30,10 @@ class FleetStub:
|
|
30
30
|
HTTP API path: /api/v1/fleet/pull-task-ins
|
31
31
|
"""
|
32
32
|
|
33
|
+
PullMessages: grpc.UnaryUnaryMultiCallable[
|
34
|
+
flwr.proto.fleet_pb2.PullMessagesRequest,
|
35
|
+
flwr.proto.fleet_pb2.PullMessagesResponse]
|
36
|
+
|
33
37
|
PushTaskRes: grpc.UnaryUnaryMultiCallable[
|
34
38
|
flwr.proto.fleet_pb2.PushTaskResRequest,
|
35
39
|
flwr.proto.fleet_pb2.PushTaskResResponse]
|
@@ -38,6 +42,10 @@ class FleetStub:
|
|
38
42
|
HTTP API path: /api/v1/fleet/push-task-res
|
39
43
|
"""
|
40
44
|
|
45
|
+
PushMessages: grpc.UnaryUnaryMultiCallable[
|
46
|
+
flwr.proto.fleet_pb2.PushMessagesRequest,
|
47
|
+
flwr.proto.fleet_pb2.PushMessagesResponse]
|
48
|
+
|
41
49
|
GetRun: grpc.UnaryUnaryMultiCallable[
|
42
50
|
flwr.proto.run_pb2.GetRunRequest,
|
43
51
|
flwr.proto.run_pb2.GetRunResponse]
|
@@ -78,6 +86,12 @@ class FleetServicer(metaclass=abc.ABCMeta):
|
|
78
86
|
"""
|
79
87
|
pass
|
80
88
|
|
89
|
+
@abc.abstractmethod
|
90
|
+
def PullMessages(self,
|
91
|
+
request: flwr.proto.fleet_pb2.PullMessagesRequest,
|
92
|
+
context: grpc.ServicerContext,
|
93
|
+
) -> flwr.proto.fleet_pb2.PullMessagesResponse: ...
|
94
|
+
|
81
95
|
@abc.abstractmethod
|
82
96
|
def PushTaskRes(self,
|
83
97
|
request: flwr.proto.fleet_pb2.PushTaskResRequest,
|
@@ -89,6 +103,12 @@ class FleetServicer(metaclass=abc.ABCMeta):
|
|
89
103
|
"""
|
90
104
|
pass
|
91
105
|
|
106
|
+
@abc.abstractmethod
|
107
|
+
def PushMessages(self,
|
108
|
+
request: flwr.proto.fleet_pb2.PushMessagesRequest,
|
109
|
+
context: grpc.ServicerContext,
|
110
|
+
) -> flwr.proto.fleet_pb2.PushMessagesResponse: ...
|
111
|
+
|
92
112
|
@abc.abstractmethod
|
93
113
|
def GetRun(self,
|
94
114
|
request: flwr.proto.run_pb2.GetRunRequest,
|
flwr/server/app.py
CHANGED
@@ -18,7 +18,8 @@
|
|
18
18
|
import argparse
|
19
19
|
import csv
|
20
20
|
import importlib.util
|
21
|
-
import
|
21
|
+
import multiprocessing
|
22
|
+
import multiprocessing.context
|
22
23
|
import sys
|
23
24
|
import threading
|
24
25
|
from collections.abc import Sequence
|
@@ -59,6 +60,7 @@ from flwr.common.constant import (
|
|
59
60
|
TRANSPORT_TYPE_REST,
|
60
61
|
)
|
61
62
|
from flwr.common.exit_handlers import register_exit_handlers
|
63
|
+
from flwr.common.grpc import generic_create_grpc_server
|
62
64
|
from flwr.common.logger import log, warn_deprecated_feature
|
63
65
|
from flwr.common.secure_aggregation.crypto.symmetric_encryption import (
|
64
66
|
private_key_to_bytes,
|
@@ -68,6 +70,8 @@ from flwr.proto.fleet_pb2_grpc import ( # pylint: disable=E0611
|
|
68
70
|
add_FleetServicer_to_server,
|
69
71
|
)
|
70
72
|
from flwr.proto.grpcadapter_pb2_grpc import add_GrpcAdapterServicer_to_server
|
73
|
+
from flwr.server.serverapp.app import flwr_serverapp
|
74
|
+
from flwr.simulation.app import flwr_simulation
|
71
75
|
from flwr.superexec.app import load_executor
|
72
76
|
from flwr.superexec.exec_grpc import run_exec_api_grpc
|
73
77
|
|
@@ -79,10 +83,7 @@ from .strategy import Strategy
|
|
79
83
|
from .superlink.driver.serverappio_grpc import run_serverappio_api_grpc
|
80
84
|
from .superlink.ffs.ffs_factory import FfsFactory
|
81
85
|
from .superlink.fleet.grpc_adapter.grpc_adapter_servicer import GrpcAdapterServicer
|
82
|
-
from .superlink.fleet.grpc_bidi.grpc_server import
|
83
|
-
generic_create_grpc_server,
|
84
|
-
start_grpc_server,
|
85
|
-
)
|
86
|
+
from .superlink.fleet.grpc_bidi.grpc_server import start_grpc_server
|
86
87
|
from .superlink.fleet.grpc_rere.fleet_servicer import FleetServicer
|
87
88
|
from .superlink.fleet.grpc_rere.server_interceptor import AuthenticateServerInterceptor
|
88
89
|
from .superlink.linkstate import LinkStateFactory
|
@@ -292,7 +293,7 @@ def run_superlink() -> None:
|
|
292
293
|
# Determine Exec plugin
|
293
294
|
# If simulation is used, don't start ServerAppIo and Fleet APIs
|
294
295
|
sim_exec = executor.__class__.__qualname__ == "SimulationEngine"
|
295
|
-
bckg_threads = []
|
296
|
+
bckg_threads: list[threading.Thread] = []
|
296
297
|
|
297
298
|
if sim_exec:
|
298
299
|
simulationio_server: grpc.Server = run_simulationio_api_grpc(
|
@@ -360,6 +361,7 @@ def run_superlink() -> None:
|
|
360
361
|
ffs_factory,
|
361
362
|
num_workers,
|
362
363
|
),
|
364
|
+
daemon=True,
|
363
365
|
)
|
364
366
|
fleet_thread.start()
|
365
367
|
bckg_threads.append(fleet_thread)
|
@@ -426,6 +428,7 @@ def run_superlink() -> None:
|
|
426
428
|
address,
|
427
429
|
cmd,
|
428
430
|
),
|
431
|
+
daemon=True,
|
429
432
|
)
|
430
433
|
scheduler_th.start()
|
431
434
|
bckg_threads.append(scheduler_th)
|
@@ -434,16 +437,24 @@ def run_superlink() -> None:
|
|
434
437
|
register_exit_handlers(
|
435
438
|
event_type=EventType.RUN_SUPERLINK_LEAVE,
|
436
439
|
grpc_servers=grpc_servers,
|
437
|
-
bckg_threads=bckg_threads,
|
438
440
|
)
|
439
441
|
|
440
|
-
# Block
|
441
|
-
while
|
442
|
-
|
443
|
-
|
444
|
-
|
445
|
-
|
446
|
-
|
442
|
+
# Block until a thread exits prematurely
|
443
|
+
while all(thread.is_alive() for thread in bckg_threads):
|
444
|
+
sleep(0.1)
|
445
|
+
|
446
|
+
# Exit if any thread has exited prematurely
|
447
|
+
sys.exit(1)
|
448
|
+
|
449
|
+
|
450
|
+
def _run_flwr_command(args: list[str]) -> None:
|
451
|
+
sys.argv = args
|
452
|
+
if args[0] == "flwr-serverapp":
|
453
|
+
flwr_serverapp()
|
454
|
+
elif args[0] == "flwr-simulation":
|
455
|
+
flwr_simulation()
|
456
|
+
else:
|
457
|
+
raise ValueError(f"Unknown command: {args[0]}")
|
447
458
|
|
448
459
|
|
449
460
|
def _flwr_scheduler(
|
@@ -453,15 +464,18 @@ def _flwr_scheduler(
|
|
453
464
|
cmd: str,
|
454
465
|
) -> None:
|
455
466
|
log(DEBUG, "Started %s scheduler thread.", cmd)
|
456
|
-
|
457
467
|
state = state_factory.state()
|
468
|
+
run_id_to_proc: dict[int, multiprocessing.context.SpawnProcess] = {}
|
469
|
+
|
470
|
+
# Use the "spawn" start method for multiprocessing.
|
471
|
+
mp_spawn_context = multiprocessing.get_context("spawn")
|
458
472
|
|
459
473
|
# Periodically check for a pending run in the LinkState
|
460
474
|
while True:
|
461
|
-
sleep(
|
475
|
+
sleep(0.1)
|
462
476
|
pending_run_id = state.get_pending_run_id()
|
463
477
|
|
464
|
-
if pending_run_id:
|
478
|
+
if pending_run_id and pending_run_id not in run_id_to_proc:
|
465
479
|
|
466
480
|
log(
|
467
481
|
INFO,
|
@@ -478,10 +492,18 @@ def _flwr_scheduler(
|
|
478
492
|
"--insecure",
|
479
493
|
]
|
480
494
|
|
481
|
-
|
482
|
-
command,
|
483
|
-
text=True,
|
495
|
+
proc = mp_spawn_context.Process(
|
496
|
+
target=_run_flwr_command, args=(command,), daemon=True
|
484
497
|
)
|
498
|
+
proc.start()
|
499
|
+
|
500
|
+
# Store the process
|
501
|
+
run_id_to_proc[pending_run_id] = proc
|
502
|
+
|
503
|
+
# Clean up finished processes
|
504
|
+
for run_id, proc in list(run_id_to_proc.items()):
|
505
|
+
if not proc.is_alive():
|
506
|
+
del run_id_to_proc[run_id]
|
485
507
|
|
486
508
|
|
487
509
|
def _format_address(address: str) -> tuple[str, str, int]:
|
@@ -21,6 +21,7 @@ from typing import Optional
|
|
21
21
|
import grpc
|
22
22
|
|
23
23
|
from flwr.common import GRPC_MAX_MESSAGE_LENGTH
|
24
|
+
from flwr.common.grpc import generic_create_grpc_server
|
24
25
|
from flwr.common.logger import log
|
25
26
|
from flwr.proto.serverappio_pb2_grpc import ( # pylint: disable=E0611
|
26
27
|
add_ServerAppIoServicer_to_server,
|
@@ -28,7 +29,6 @@ from flwr.proto.serverappio_pb2_grpc import ( # pylint: disable=E0611
|
|
28
29
|
from flwr.server.superlink.ffs.ffs_factory import FfsFactory
|
29
30
|
from flwr.server.superlink.linkstate import LinkStateFactory
|
30
31
|
|
31
|
-
from ..fleet.grpc_bidi.grpc_server import generic_create_grpc_server
|
32
32
|
from .serverappio_servicer import ServerAppIoServicer
|
33
33
|
|
34
34
|
|
@@ -118,8 +118,9 @@ class ServerAppIoServicer(serverappio_pb2_grpc.ServerAppIoServicer):
|
|
118
118
|
ffs: Ffs = self.ffs_factory.ffs()
|
119
119
|
fab_hash = ffs.put(fab.content, {})
|
120
120
|
_raise_if(
|
121
|
-
fab_hash != fab.hash_str,
|
122
|
-
|
121
|
+
validation_error=fab_hash != fab.hash_str,
|
122
|
+
request_name="CreateRun",
|
123
|
+
detail=f"FAB ({fab.hash_str}) hash from request doesn't match contents",
|
123
124
|
)
|
124
125
|
else:
|
125
126
|
fab_hash = ""
|
@@ -155,12 +156,22 @@ class ServerAppIoServicer(serverappio_pb2_grpc.ServerAppIoServicer):
|
|
155
156
|
task_ins.task.pushed_at = pushed_at
|
156
157
|
|
157
158
|
# Validate request
|
158
|
-
_raise_if(
|
159
|
+
_raise_if(
|
160
|
+
validation_error=len(request.task_ins_list) == 0,
|
161
|
+
request_name="PushTaskIns",
|
162
|
+
detail="`task_ins_list` must not be empty",
|
163
|
+
)
|
159
164
|
for task_ins in request.task_ins_list:
|
160
165
|
validation_errors = validate_task_ins_or_res(task_ins)
|
161
|
-
_raise_if(bool(validation_errors), ", ".join(validation_errors))
|
162
166
|
_raise_if(
|
163
|
-
|
167
|
+
validation_error=bool(validation_errors),
|
168
|
+
request_name="PushTaskIns",
|
169
|
+
detail=", ".join(validation_errors),
|
170
|
+
)
|
171
|
+
_raise_if(
|
172
|
+
validation_error=request.run_id != task_ins.run_id,
|
173
|
+
request_name="PushTaskIns",
|
174
|
+
detail="`task_ins` has mismatched `run_id`",
|
164
175
|
)
|
165
176
|
|
166
177
|
# Store each TaskIns
|
@@ -199,7 +210,9 @@ class ServerAppIoServicer(serverappio_pb2_grpc.ServerAppIoServicer):
|
|
199
210
|
# Validate request
|
200
211
|
for task_res in task_res_list:
|
201
212
|
_raise_if(
|
202
|
-
request.run_id != task_res.run_id,
|
213
|
+
validation_error=request.run_id != task_res.run_id,
|
214
|
+
request_name="PullTaskRes",
|
215
|
+
detail="`task_res` has mismatched `run_id`",
|
203
216
|
)
|
204
217
|
|
205
218
|
# Delete the TaskIns/TaskRes pairs if TaskRes is found
|
@@ -344,6 +357,7 @@ class ServerAppIoServicer(serverappio_pb2_grpc.ServerAppIoServicer):
|
|
344
357
|
return GetRunStatusResponse(run_status_dict=run_status_dict)
|
345
358
|
|
346
359
|
|
347
|
-
def _raise_if(validation_error: bool, detail: str) -> None:
|
360
|
+
def _raise_if(validation_error: bool, request_name: str, detail: str) -> None:
|
361
|
+
"""Raise a `ValueError` with a detailed message if a validation error occurs."""
|
348
362
|
if validation_error:
|
349
|
-
raise ValueError(f"Malformed
|
363
|
+
raise ValueError(f"Malformed {request_name}: {detail}")
|
@@ -15,49 +15,19 @@
|
|
15
15
|
"""Implements utility function to create a gRPC server."""
|
16
16
|
|
17
17
|
|
18
|
-
import
|
19
|
-
import sys
|
20
|
-
from collections.abc import Sequence
|
21
|
-
from logging import ERROR
|
22
|
-
from typing import Any, Callable, Optional, Union
|
18
|
+
from typing import Optional
|
23
19
|
|
24
20
|
import grpc
|
25
21
|
|
26
22
|
from flwr.common import GRPC_MAX_MESSAGE_LENGTH
|
27
|
-
from flwr.common.
|
28
|
-
from flwr.common.logger import log
|
23
|
+
from flwr.common.grpc import generic_create_grpc_server
|
29
24
|
from flwr.proto.transport_pb2_grpc import ( # pylint: disable=E0611
|
30
25
|
add_FlowerServiceServicer_to_server,
|
31
26
|
)
|
32
27
|
from flwr.server.client_manager import ClientManager
|
33
|
-
from flwr.server.superlink.driver.serverappio_servicer import ServerAppIoServicer
|
34
|
-
from flwr.server.superlink.fleet.grpc_adapter.grpc_adapter_servicer import (
|
35
|
-
GrpcAdapterServicer,
|
36
|
-
)
|
37
28
|
from flwr.server.superlink.fleet.grpc_bidi.flower_service_servicer import (
|
38
29
|
FlowerServiceServicer,
|
39
30
|
)
|
40
|
-
from flwr.server.superlink.fleet.grpc_rere.fleet_servicer import FleetServicer
|
41
|
-
|
42
|
-
INVALID_CERTIFICATES_ERR_MSG = """
|
43
|
-
When setting any of root_certificate, certificate, or private_key,
|
44
|
-
all of them need to be set.
|
45
|
-
"""
|
46
|
-
|
47
|
-
AddServicerToServerFn = Callable[..., Any]
|
48
|
-
|
49
|
-
|
50
|
-
def valid_certificates(certificates: tuple[bytes, bytes, bytes]) -> bool:
|
51
|
-
"""Validate certificates tuple."""
|
52
|
-
is_valid = (
|
53
|
-
all(isinstance(certificate, bytes) for certificate in certificates)
|
54
|
-
and len(certificates) == 3
|
55
|
-
)
|
56
|
-
|
57
|
-
if not is_valid:
|
58
|
-
log(ERROR, INVALID_CERTIFICATES_ERR_MSG)
|
59
|
-
|
60
|
-
return is_valid
|
61
31
|
|
62
32
|
|
63
33
|
def start_grpc_server( # pylint: disable=too-many-arguments,R0917
|
@@ -154,136 +124,3 @@ def start_grpc_server( # pylint: disable=too-many-arguments,R0917
|
|
154
124
|
server.start()
|
155
125
|
|
156
126
|
return server
|
157
|
-
|
158
|
-
|
159
|
-
def generic_create_grpc_server( # pylint: disable=too-many-arguments,R0917
|
160
|
-
servicer_and_add_fn: Union[
|
161
|
-
tuple[FleetServicer, AddServicerToServerFn],
|
162
|
-
tuple[GrpcAdapterServicer, AddServicerToServerFn],
|
163
|
-
tuple[FlowerServiceServicer, AddServicerToServerFn],
|
164
|
-
tuple[ServerAppIoServicer, AddServicerToServerFn],
|
165
|
-
],
|
166
|
-
server_address: str,
|
167
|
-
max_concurrent_workers: int = 1000,
|
168
|
-
max_message_length: int = GRPC_MAX_MESSAGE_LENGTH,
|
169
|
-
keepalive_time_ms: int = 210000,
|
170
|
-
certificates: Optional[tuple[bytes, bytes, bytes]] = None,
|
171
|
-
interceptors: Optional[Sequence[grpc.ServerInterceptor]] = None,
|
172
|
-
) -> grpc.Server:
|
173
|
-
"""Create a gRPC server with a single servicer.
|
174
|
-
|
175
|
-
Parameters
|
176
|
-
----------
|
177
|
-
servicer_and_add_fn : tuple
|
178
|
-
A tuple holding a servicer implementation and a matching
|
179
|
-
add_Servicer_to_server function.
|
180
|
-
server_address : str
|
181
|
-
Server address in the form of HOST:PORT e.g. "[::]:8080"
|
182
|
-
max_concurrent_workers : int
|
183
|
-
Maximum number of clients the server can process before returning
|
184
|
-
RESOURCE_EXHAUSTED status (default: 1000)
|
185
|
-
max_message_length : int
|
186
|
-
Maximum message length that the server can send or receive.
|
187
|
-
Int valued in bytes. -1 means unlimited. (default: GRPC_MAX_MESSAGE_LENGTH)
|
188
|
-
keepalive_time_ms : int
|
189
|
-
Flower uses a default gRPC keepalive time of 210000ms (3 minutes 30 seconds)
|
190
|
-
because some cloud providers (for example, Azure) agressively clean up idle
|
191
|
-
TCP connections by terminating them after some time (4 minutes in the case
|
192
|
-
of Azure). Flower does not use application-level keepalive signals and relies
|
193
|
-
on the assumption that the transport layer will fail in cases where the
|
194
|
-
connection is no longer active. `keepalive_time_ms` can be used to customize
|
195
|
-
the keepalive interval for specific environments. The default Flower gRPC
|
196
|
-
keepalive of 210000 ms (3 minutes 30 seconds) ensures that Flower can keep
|
197
|
-
the long running streaming connection alive in most environments. The actual
|
198
|
-
gRPC default of this setting is 7200000 (2 hours), which results in dropped
|
199
|
-
connections in some cloud environments.
|
200
|
-
|
201
|
-
These settings are related to the issue described here:
|
202
|
-
- https://github.com/grpc/proposal/blob/master/A8-client-side-keepalive.md
|
203
|
-
- https://github.com/grpc/grpc/blob/master/doc/keepalive.md
|
204
|
-
- https://grpc.io/docs/guides/performance/
|
205
|
-
|
206
|
-
Mobile Flower clients may choose to increase this value if their server
|
207
|
-
environment allows long-running idle TCP connections.
|
208
|
-
(default: 210000)
|
209
|
-
certificates : Tuple[bytes, bytes, bytes] (default: None)
|
210
|
-
Tuple containing root certificate, server certificate, and private key to
|
211
|
-
start a secure SSL-enabled server. The tuple is expected to have three bytes
|
212
|
-
elements in the following order:
|
213
|
-
|
214
|
-
* CA certificate.
|
215
|
-
* server certificate.
|
216
|
-
* server private key.
|
217
|
-
interceptors : Optional[Sequence[grpc.ServerInterceptor]] (default: None)
|
218
|
-
A list of gRPC interceptors.
|
219
|
-
|
220
|
-
Returns
|
221
|
-
-------
|
222
|
-
server : grpc.Server
|
223
|
-
A non-running instance of a gRPC server.
|
224
|
-
"""
|
225
|
-
# Check if port is in use
|
226
|
-
if is_port_in_use(server_address):
|
227
|
-
sys.exit(f"Port in server address {server_address} is already in use.")
|
228
|
-
|
229
|
-
# Deconstruct tuple into servicer and function
|
230
|
-
servicer, add_servicer_to_server_fn = servicer_and_add_fn
|
231
|
-
|
232
|
-
# Possible options:
|
233
|
-
# https://github.com/grpc/grpc/blob/v1.43.x/include/grpc/impl/codegen/grpc_types.h
|
234
|
-
options = [
|
235
|
-
# Maximum number of concurrent incoming streams to allow on a http2
|
236
|
-
# connection. Int valued.
|
237
|
-
("grpc.max_concurrent_streams", max(100, max_concurrent_workers)),
|
238
|
-
# Maximum message length that the channel can send.
|
239
|
-
# Int valued, bytes. -1 means unlimited.
|
240
|
-
("grpc.max_send_message_length", max_message_length),
|
241
|
-
# Maximum message length that the channel can receive.
|
242
|
-
# Int valued, bytes. -1 means unlimited.
|
243
|
-
("grpc.max_receive_message_length", max_message_length),
|
244
|
-
# The gRPC default for this setting is 7200000 (2 hours). Flower uses a
|
245
|
-
# customized default of 210000 (3 minutes and 30 seconds) to improve
|
246
|
-
# compatibility with popular cloud providers. Mobile Flower clients may
|
247
|
-
# choose to increase this value if their server environment allows
|
248
|
-
# long-running idle TCP connections.
|
249
|
-
("grpc.keepalive_time_ms", keepalive_time_ms),
|
250
|
-
# Setting this to zero will allow sending unlimited keepalive pings in between
|
251
|
-
# sending actual data frames.
|
252
|
-
("grpc.http2.max_pings_without_data", 0),
|
253
|
-
# Is it permissible to send keepalive pings from the client without
|
254
|
-
# any outstanding streams. More explanation here:
|
255
|
-
# https://github.com/adap/flower/pull/2197
|
256
|
-
("grpc.keepalive_permit_without_calls", 0),
|
257
|
-
]
|
258
|
-
|
259
|
-
server = grpc.server(
|
260
|
-
concurrent.futures.ThreadPoolExecutor(max_workers=max_concurrent_workers),
|
261
|
-
# Set the maximum number of concurrent RPCs this server will service before
|
262
|
-
# returning RESOURCE_EXHAUSTED status, or None to indicate no limit.
|
263
|
-
maximum_concurrent_rpcs=max_concurrent_workers,
|
264
|
-
options=options,
|
265
|
-
interceptors=interceptors,
|
266
|
-
)
|
267
|
-
add_servicer_to_server_fn(servicer, server)
|
268
|
-
|
269
|
-
if certificates is not None:
|
270
|
-
if not valid_certificates(certificates):
|
271
|
-
sys.exit(1)
|
272
|
-
|
273
|
-
root_certificate_b, certificate_b, private_key_b = certificates
|
274
|
-
|
275
|
-
server_credentials = grpc.ssl_server_credentials(
|
276
|
-
((private_key_b, certificate_b),),
|
277
|
-
root_certificates=root_certificate_b,
|
278
|
-
# A boolean indicating whether or not to require clients to be
|
279
|
-
# authenticated. May only be True if root_certificates is not None.
|
280
|
-
# We are explicitly setting the current gRPC default to document
|
281
|
-
# the option. For further reference see:
|
282
|
-
# https://grpc.github.io/grpc/python/grpc.html#create-server-credentials
|
283
|
-
require_client_auth=False,
|
284
|
-
)
|
285
|
-
server.add_secure_port(server_address, server_credentials)
|
286
|
-
else:
|
287
|
-
server.add_insecure_port(server_address)
|
288
|
-
|
289
|
-
return server
|
@@ -30,8 +30,12 @@ from flwr.proto.fleet_pb2 import ( # pylint: disable=E0611
|
|
30
30
|
DeleteNodeResponse,
|
31
31
|
PingRequest,
|
32
32
|
PingResponse,
|
33
|
+
PullMessagesRequest,
|
34
|
+
PullMessagesResponse,
|
33
35
|
PullTaskInsRequest,
|
34
36
|
PullTaskInsResponse,
|
37
|
+
PushMessagesRequest,
|
38
|
+
PushMessagesResponse,
|
35
39
|
PushTaskResRequest,
|
36
40
|
PushTaskResResponse,
|
37
41
|
)
|
@@ -95,6 +99,12 @@ class FleetServicer(fleet_pb2_grpc.FleetServicer):
|
|
95
99
|
state=self.state_factory.state(),
|
96
100
|
)
|
97
101
|
|
102
|
+
def PullMessages(
|
103
|
+
self, request: PullMessagesRequest, context: grpc.ServicerContext
|
104
|
+
) -> PullMessagesResponse:
|
105
|
+
"""Pull Messages."""
|
106
|
+
return PullMessagesResponse()
|
107
|
+
|
98
108
|
def PushTaskRes(
|
99
109
|
self, request: PushTaskResRequest, context: grpc.ServicerContext
|
100
110
|
) -> PushTaskResResponse:
|
@@ -118,6 +128,12 @@ class FleetServicer(fleet_pb2_grpc.FleetServicer):
|
|
118
128
|
|
119
129
|
return res
|
120
130
|
|
131
|
+
def PushMessages(
|
132
|
+
self, request: PushMessagesRequest, context: grpc.ServicerContext
|
133
|
+
) -> PushMessagesResponse:
|
134
|
+
"""Push Messages."""
|
135
|
+
return PushMessagesResponse()
|
136
|
+
|
121
137
|
def GetRun(
|
122
138
|
self, request: GetRunRequest, context: grpc.ServicerContext
|
123
139
|
) -> GetRunResponse:
|
@@ -223,5 +223,6 @@ class AuthenticateServerInterceptor(grpc.ServerInterceptor): # type: ignore
|
|
223
223
|
# No `node_id` exists for the provided `public_key`
|
224
224
|
# Handle `CreateNode` here instead of calling the default method handler
|
225
225
|
# Note: the innermost `CreateNode` method will never be called
|
226
|
-
node_id = state.create_node(request.ping_interval
|
226
|
+
node_id = state.create_node(request.ping_interval)
|
227
|
+
state.set_node_public_key(node_id, public_key_bytes)
|
227
228
|
return CreateNodeResponse(node=Node(node_id=node_id, anonymous=False))
|
@@ -62,6 +62,7 @@ class InMemoryLinkState(LinkState): # pylint: disable=R0902,R0904
|
|
62
62
|
# Map node_id to (online_until, ping_interval)
|
63
63
|
self.node_ids: dict[int, tuple[float, float]] = {}
|
64
64
|
self.public_key_to_node_id: dict[bytes, int] = {}
|
65
|
+
self.node_id_to_public_key: dict[int, bytes] = {}
|
65
66
|
|
66
67
|
# Map run_id to RunRecord
|
67
68
|
self.run_ids: dict[int, RunRecord] = {}
|
@@ -306,9 +307,7 @@ class InMemoryLinkState(LinkState): # pylint: disable=R0902,R0904
|
|
306
307
|
"""
|
307
308
|
return len(self.task_res_store)
|
308
309
|
|
309
|
-
def create_node(
|
310
|
-
self, ping_interval: float, public_key: Optional[bytes] = None
|
311
|
-
) -> int:
|
310
|
+
def create_node(self, ping_interval: float) -> int:
|
312
311
|
"""Create, store in the link state, and return `node_id`."""
|
313
312
|
# Sample a random int64 as node_id
|
314
313
|
node_id = generate_rand_int_from_bytes(NODE_ID_NUM_BYTES)
|
@@ -318,33 +317,18 @@ class InMemoryLinkState(LinkState): # pylint: disable=R0902,R0904
|
|
318
317
|
log(ERROR, "Unexpected node registration failure.")
|
319
318
|
return 0
|
320
319
|
|
321
|
-
if public_key is not None:
|
322
|
-
if (
|
323
|
-
public_key in self.public_key_to_node_id
|
324
|
-
or node_id in self.public_key_to_node_id.values()
|
325
|
-
):
|
326
|
-
log(ERROR, "Unexpected node registration failure.")
|
327
|
-
return 0
|
328
|
-
|
329
|
-
self.public_key_to_node_id[public_key] = node_id
|
330
|
-
|
331
320
|
self.node_ids[node_id] = (time.time() + ping_interval, ping_interval)
|
332
321
|
return node_id
|
333
322
|
|
334
|
-
def delete_node(self, node_id: int
|
323
|
+
def delete_node(self, node_id: int) -> None:
|
335
324
|
"""Delete a node."""
|
336
325
|
with self.lock:
|
337
326
|
if node_id not in self.node_ids:
|
338
327
|
raise ValueError(f"Node {node_id} not found")
|
339
328
|
|
340
|
-
|
341
|
-
|
342
|
-
|
343
|
-
or node_id not in self.public_key_to_node_id.values()
|
344
|
-
):
|
345
|
-
raise ValueError("Public key or node_id not found")
|
346
|
-
|
347
|
-
del self.public_key_to_node_id[public_key]
|
329
|
+
# Remove node ID <> public key mappings
|
330
|
+
if pk := self.node_id_to_public_key.pop(node_id, None):
|
331
|
+
del self.public_key_to_node_id[pk]
|
348
332
|
|
349
333
|
del self.node_ids[node_id]
|
350
334
|
|
@@ -366,6 +350,26 @@ class InMemoryLinkState(LinkState): # pylint: disable=R0902,R0904
|
|
366
350
|
if online_until > current_time
|
367
351
|
}
|
368
352
|
|
353
|
+
def set_node_public_key(self, node_id: int, public_key: bytes) -> None:
|
354
|
+
"""Set `public_key` for the specified `node_id`."""
|
355
|
+
with self.lock:
|
356
|
+
if node_id not in self.node_ids:
|
357
|
+
raise ValueError(f"Node {node_id} not found")
|
358
|
+
|
359
|
+
if public_key in self.public_key_to_node_id:
|
360
|
+
raise ValueError("Public key already in use")
|
361
|
+
|
362
|
+
self.public_key_to_node_id[public_key] = node_id
|
363
|
+
self.node_id_to_public_key[node_id] = public_key
|
364
|
+
|
365
|
+
def get_node_public_key(self, node_id: int) -> Optional[bytes]:
|
366
|
+
"""Get `public_key` for the specified `node_id`."""
|
367
|
+
with self.lock:
|
368
|
+
if node_id not in self.node_ids:
|
369
|
+
raise ValueError(f"Node {node_id} not found")
|
370
|
+
|
371
|
+
return self.node_id_to_public_key.get(node_id)
|
372
|
+
|
369
373
|
def get_node_id(self, node_public_key: bytes) -> Optional[int]:
|
370
374
|
"""Retrieve stored `node_id` filtered by `node_public_keys`."""
|
371
375
|
return self.public_key_to_node_id.get(node_public_key)
|
@@ -154,13 +154,11 @@ class LinkState(abc.ABC): # pylint: disable=R0904
|
|
154
154
|
"""Get all TaskIns IDs for the given run_id."""
|
155
155
|
|
156
156
|
@abc.abstractmethod
|
157
|
-
def create_node(
|
158
|
-
self, ping_interval: float, public_key: Optional[bytes] = None
|
159
|
-
) -> int:
|
157
|
+
def create_node(self, ping_interval: float) -> int:
|
160
158
|
"""Create, store in the link state, and return `node_id`."""
|
161
159
|
|
162
160
|
@abc.abstractmethod
|
163
|
-
def delete_node(self, node_id: int
|
161
|
+
def delete_node(self, node_id: int) -> None:
|
164
162
|
"""Remove `node_id` from the link state."""
|
165
163
|
|
166
164
|
@abc.abstractmethod
|
@@ -173,6 +171,14 @@ class LinkState(abc.ABC): # pylint: disable=R0904
|
|
173
171
|
an empty `Set` MUST be returned.
|
174
172
|
"""
|
175
173
|
|
174
|
+
@abc.abstractmethod
|
175
|
+
def set_node_public_key(self, node_id: int, public_key: bytes) -> None:
|
176
|
+
"""Set `public_key` for the specified `node_id`."""
|
177
|
+
|
178
|
+
@abc.abstractmethod
|
179
|
+
def get_node_public_key(self, node_id: int) -> Optional[bytes]:
|
180
|
+
"""Get `public_key` for the specified `node_id`."""
|
181
|
+
|
176
182
|
@abc.abstractmethod
|
177
183
|
def get_node_id(self, node_public_key: bytes) -> Optional[int]:
|
178
184
|
"""Retrieve stored `node_id` filtered by `node_public_keys`."""
|