flwr-nightly 1.19.0.dev20250429__py3-none-any.whl → 1.19.0.dev20250430__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- flwr/client/grpc_rere_client/connection.py +32 -32
- flwr/client/grpc_rere_client/grpc_adapter.py +6 -6
- flwr/client/rest_client/connection.py +24 -36
- flwr/common/constant.py +10 -7
- flwr/common/heartbeat.py +103 -0
- flwr/proto/fleet_pb2.py +26 -26
- flwr/proto/fleet_pb2.pyi +13 -13
- flwr/proto/fleet_pb2_grpc.py +13 -13
- flwr/proto/fleet_pb2_grpc.pyi +6 -6
- flwr/server/superlink/fleet/grpc_adapter/grpc_adapter_servicer.py +3 -3
- flwr/server/superlink/fleet/grpc_rere/fleet_servicer.py +12 -6
- flwr/server/superlink/fleet/message_handler/message_handler.py +8 -8
- flwr/server/superlink/fleet/rest_rere/rest_api.py +7 -7
- flwr/server/superlink/fleet/vce/vce_api.py +2 -2
- flwr/server/superlink/linkstate/in_memory_linkstate.py +16 -11
- flwr/server/superlink/linkstate/linkstate.py +13 -8
- flwr/server/superlink/linkstate/sqlite_linkstate.py +19 -15
- flwr/server/superlink/linkstate/utils.py +3 -2
- {flwr_nightly-1.19.0.dev20250429.dist-info → flwr_nightly-1.19.0.dev20250430.dist-info}/METADATA +1 -1
- {flwr_nightly-1.19.0.dev20250429.dist-info → flwr_nightly-1.19.0.dev20250430.dist-info}/RECORD +22 -22
- flwr/client/heartbeat.py +0 -74
- {flwr_nightly-1.19.0.dev20250429.dist-info → flwr_nightly-1.19.0.dev20250430.dist-info}/WHEEL +0 -0
- {flwr_nightly-1.19.0.dev20250429.dist-info → flwr_nightly-1.19.0.dev20250430.dist-info}/entry_points.txt +0 -0
@@ -15,8 +15,6 @@
|
|
15
15
|
"""Contextmanager for a gRPC request-response channel to the Flower server."""
|
16
16
|
|
17
17
|
|
18
|
-
import random
|
19
|
-
import threading
|
20
18
|
from collections.abc import Iterator, Sequence
|
21
19
|
from contextlib import contextmanager
|
22
20
|
from copy import copy
|
@@ -27,16 +25,11 @@ from typing import Callable, Optional, Union, cast
|
|
27
25
|
import grpc
|
28
26
|
from cryptography.hazmat.primitives.asymmetric import ec
|
29
27
|
|
30
|
-
from flwr.client.heartbeat import start_ping_loop
|
31
28
|
from flwr.client.message_handler.message_handler import validate_out_message
|
32
29
|
from flwr.common import GRPC_MAX_MESSAGE_LENGTH
|
33
|
-
from flwr.common.constant import
|
34
|
-
PING_BASE_MULTIPLIER,
|
35
|
-
PING_CALL_TIMEOUT,
|
36
|
-
PING_DEFAULT_INTERVAL,
|
37
|
-
PING_RANDOM_RANGE,
|
38
|
-
)
|
30
|
+
from flwr.common.constant import HEARTBEAT_CALL_TIMEOUT, HEARTBEAT_DEFAULT_INTERVAL
|
39
31
|
from flwr.common.grpc import create_channel, on_channel_state_change
|
32
|
+
from flwr.common.heartbeat import HeartbeatSender
|
40
33
|
from flwr.common.logger import log
|
41
34
|
from flwr.common.message import Message, Metadata
|
42
35
|
from flwr.common.retry_invoker import RetryInvoker
|
@@ -49,8 +42,8 @@ from flwr.proto.fab_pb2 import GetFabRequest, GetFabResponse # pylint: disable=
|
|
49
42
|
from flwr.proto.fleet_pb2 import ( # pylint: disable=E0611
|
50
43
|
CreateNodeRequest,
|
51
44
|
DeleteNodeRequest,
|
52
|
-
|
53
|
-
|
45
|
+
HeartbeatRequest,
|
46
|
+
HeartbeatResponse,
|
54
47
|
PullMessagesRequest,
|
55
48
|
PullMessagesResponse,
|
56
49
|
PushMessagesRequest,
|
@@ -151,8 +144,6 @@ def grpc_request_response( # pylint: disable=R0913,R0914,R0915,R0917
|
|
151
144
|
stub = adapter_cls(channel)
|
152
145
|
metadata: Optional[Metadata] = None
|
153
146
|
node: Optional[Node] = None
|
154
|
-
ping_thread: Optional[threading.Thread] = None
|
155
|
-
ping_stop_event = threading.Event()
|
156
147
|
|
157
148
|
def _should_giveup_fn(e: Exception) -> bool:
|
158
149
|
if e.code() == grpc.StatusCode.PERMISSION_DENIED: # type: ignore
|
@@ -166,45 +157,54 @@ def grpc_request_response( # pylint: disable=R0913,R0914,R0915,R0917
|
|
166
157
|
retry_invoker.should_giveup = _should_giveup_fn
|
167
158
|
|
168
159
|
###########################################################################
|
169
|
-
#
|
160
|
+
# heartbeat/create_node/delete_node/receive/send/get_run functions
|
170
161
|
###########################################################################
|
171
162
|
|
172
|
-
def
|
163
|
+
def heartbeat() -> bool:
|
173
164
|
# Get Node
|
174
165
|
if node is None:
|
175
166
|
log(ERROR, "Node instance missing")
|
176
|
-
return
|
167
|
+
return False
|
177
168
|
|
178
|
-
# Construct the
|
179
|
-
req =
|
169
|
+
# Construct the heartbeat request
|
170
|
+
req = HeartbeatRequest(node=node, heartbeat_interval=HEARTBEAT_DEFAULT_INTERVAL)
|
180
171
|
|
181
172
|
# Call FleetAPI
|
182
|
-
|
173
|
+
try:
|
174
|
+
res: HeartbeatResponse = stub.Heartbeat(req, timeout=HEARTBEAT_CALL_TIMEOUT)
|
175
|
+
except grpc.RpcError as e:
|
176
|
+
status_code = e.code()
|
177
|
+
if status_code == grpc.StatusCode.UNAVAILABLE:
|
178
|
+
return False
|
179
|
+
if status_code == grpc.StatusCode.DEADLINE_EXCEEDED:
|
180
|
+
return False
|
181
|
+
raise
|
183
182
|
|
184
183
|
# Check if success
|
185
184
|
if not res.success:
|
186
|
-
raise RuntimeError(
|
185
|
+
raise RuntimeError(
|
186
|
+
"Heartbeat failed unexpectedly. The SuperLink does not "
|
187
|
+
"recognize this SuperNode."
|
188
|
+
)
|
189
|
+
return True
|
187
190
|
|
188
|
-
|
189
|
-
rd = random.uniform(*PING_RANDOM_RANGE)
|
190
|
-
next_interval: float = PING_DEFAULT_INTERVAL - PING_CALL_TIMEOUT
|
191
|
-
next_interval *= PING_BASE_MULTIPLIER + rd
|
192
|
-
if not ping_stop_event.is_set():
|
193
|
-
ping_stop_event.wait(next_interval)
|
191
|
+
heartbeat_sender = HeartbeatSender(heartbeat)
|
194
192
|
|
195
193
|
def create_node() -> Optional[int]:
|
196
194
|
"""Set create_node."""
|
197
195
|
# Call FleetAPI
|
198
|
-
create_node_request = CreateNodeRequest(
|
196
|
+
create_node_request = CreateNodeRequest(
|
197
|
+
heartbeat_interval=HEARTBEAT_DEFAULT_INTERVAL
|
198
|
+
)
|
199
199
|
create_node_response = retry_invoker.invoke(
|
200
200
|
stub.CreateNode,
|
201
201
|
request=create_node_request,
|
202
202
|
)
|
203
203
|
|
204
|
-
# Remember the node and the
|
205
|
-
nonlocal node
|
204
|
+
# Remember the node and start the heartbeat sender
|
205
|
+
nonlocal node
|
206
206
|
node = cast(Node, create_node_response.node)
|
207
|
-
|
207
|
+
heartbeat_sender.start()
|
208
208
|
return node.node_id
|
209
209
|
|
210
210
|
def delete_node() -> None:
|
@@ -215,8 +215,8 @@ def grpc_request_response( # pylint: disable=R0913,R0914,R0915,R0917
|
|
215
215
|
log(ERROR, "Node instance missing")
|
216
216
|
return
|
217
217
|
|
218
|
-
# Stop the
|
219
|
-
|
218
|
+
# Stop the heartbeat sender
|
219
|
+
heartbeat_sender.stop()
|
220
220
|
|
221
221
|
# Call FleetAPI
|
222
222
|
delete_node_request = DeleteNodeRequest(node=node)
|
@@ -38,8 +38,8 @@ from flwr.proto.fleet_pb2 import ( # pylint: disable=E0611
|
|
38
38
|
CreateNodeResponse,
|
39
39
|
DeleteNodeRequest,
|
40
40
|
DeleteNodeResponse,
|
41
|
-
|
42
|
-
|
41
|
+
HeartbeatRequest,
|
42
|
+
HeartbeatResponse,
|
43
43
|
PullMessagesRequest,
|
44
44
|
PullMessagesResponse,
|
45
45
|
PushMessagesRequest,
|
@@ -120,11 +120,11 @@ class GrpcAdapter:
|
|
120
120
|
"""."""
|
121
121
|
return self._send_and_receive(request, DeleteNodeResponse, **kwargs)
|
122
122
|
|
123
|
-
def
|
124
|
-
self, request:
|
125
|
-
) ->
|
123
|
+
def Heartbeat( # pylint: disable=C0103
|
124
|
+
self, request: HeartbeatRequest, **kwargs: Any
|
125
|
+
) -> HeartbeatResponse:
|
126
126
|
"""."""
|
127
|
-
return self._send_and_receive(request,
|
127
|
+
return self._send_and_receive(request, HeartbeatResponse, **kwargs)
|
128
128
|
|
129
129
|
def PullMessages( # pylint: disable=C0103
|
130
130
|
self, request: PullMessagesRequest, **kwargs: Any
|
@@ -15,8 +15,6 @@
|
|
15
15
|
"""Contextmanager for a REST request-response channel to the Flower server."""
|
16
16
|
|
17
17
|
|
18
|
-
import random
|
19
|
-
import threading
|
20
18
|
from collections.abc import Iterator
|
21
19
|
from contextlib import contextmanager
|
22
20
|
from copy import copy
|
@@ -27,16 +25,11 @@ from cryptography.hazmat.primitives.asymmetric import ec
|
|
27
25
|
from google.protobuf.message import Message as GrpcMessage
|
28
26
|
from requests.exceptions import ConnectionError as RequestsConnectionError
|
29
27
|
|
30
|
-
from flwr.client.heartbeat import start_ping_loop
|
31
28
|
from flwr.client.message_handler.message_handler import validate_out_message
|
32
29
|
from flwr.common import GRPC_MAX_MESSAGE_LENGTH
|
33
|
-
from flwr.common.constant import
|
34
|
-
PING_BASE_MULTIPLIER,
|
35
|
-
PING_CALL_TIMEOUT,
|
36
|
-
PING_DEFAULT_INTERVAL,
|
37
|
-
PING_RANDOM_RANGE,
|
38
|
-
)
|
30
|
+
from flwr.common.constant import HEARTBEAT_DEFAULT_INTERVAL
|
39
31
|
from flwr.common.exit import ExitCode, flwr_exit
|
32
|
+
from flwr.common.heartbeat import HeartbeatSender
|
40
33
|
from flwr.common.logger import log
|
41
34
|
from flwr.common.message import Message, Metadata
|
42
35
|
from flwr.common.retry_invoker import RetryInvoker
|
@@ -48,8 +41,8 @@ from flwr.proto.fleet_pb2 import ( # pylint: disable=E0611
|
|
48
41
|
CreateNodeResponse,
|
49
42
|
DeleteNodeRequest,
|
50
43
|
DeleteNodeResponse,
|
51
|
-
|
52
|
-
|
44
|
+
HeartbeatRequest,
|
45
|
+
HeartbeatResponse,
|
53
46
|
PullMessagesRequest,
|
54
47
|
PullMessagesResponse,
|
55
48
|
PushMessagesRequest,
|
@@ -68,7 +61,7 @@ PATH_CREATE_NODE: str = "api/v0/fleet/create-node"
|
|
68
61
|
PATH_DELETE_NODE: str = "api/v0/fleet/delete-node"
|
69
62
|
PATH_PULL_MESSAGES: str = "/api/v0/fleet/pull-messages"
|
70
63
|
PATH_PUSH_MESSAGES: str = "/api/v0/fleet/push-messages"
|
71
|
-
PATH_PING: str = "api/v0/fleet/
|
64
|
+
PATH_PING: str = "api/v0/fleet/heartbeat"
|
72
65
|
PATH_GET_RUN: str = "/api/v0/fleet/get-run"
|
73
66
|
PATH_GET_FAB: str = "/api/v0/fleet/get-fab"
|
74
67
|
|
@@ -160,11 +153,9 @@ def http_request_response( # pylint: disable=R0913,R0914,R0915,R0917
|
|
160
153
|
# Shared variables for inner functions
|
161
154
|
metadata: Optional[Metadata] = None
|
162
155
|
node: Optional[Node] = None
|
163
|
-
ping_thread: Optional[threading.Thread] = None
|
164
|
-
ping_stop_event = threading.Event()
|
165
156
|
|
166
157
|
###########################################################################
|
167
|
-
#
|
158
|
+
# heartbeat/create_node/delete_node/receive/send/get_run functions
|
168
159
|
###########################################################################
|
169
160
|
|
170
161
|
def _request(
|
@@ -214,44 +205,43 @@ def http_request_response( # pylint: disable=R0913,R0914,R0915,R0917
|
|
214
205
|
grpc_res.ParseFromString(res.content)
|
215
206
|
return grpc_res
|
216
207
|
|
217
|
-
def
|
208
|
+
def heartbeat() -> bool:
|
218
209
|
# Get Node
|
219
210
|
if node is None:
|
220
211
|
log(ERROR, "Node instance missing")
|
221
|
-
return
|
212
|
+
return False
|
222
213
|
|
223
|
-
# Construct the
|
224
|
-
req =
|
214
|
+
# Construct the heartbeat request
|
215
|
+
req = HeartbeatRequest(node=node, heartbeat_interval=HEARTBEAT_DEFAULT_INTERVAL)
|
225
216
|
|
226
217
|
# Send the request
|
227
|
-
res = _request(req,
|
218
|
+
res = _request(req, HeartbeatResponse, PATH_PING, retry=False)
|
228
219
|
if res is None:
|
229
|
-
return
|
220
|
+
return False
|
230
221
|
|
231
222
|
# Check if success
|
232
223
|
if not res.success:
|
233
|
-
raise RuntimeError(
|
224
|
+
raise RuntimeError(
|
225
|
+
"Heartbeat failed unexpectedly. The SuperLink does not "
|
226
|
+
"recognize this SuperNode."
|
227
|
+
)
|
228
|
+
return True
|
234
229
|
|
235
|
-
|
236
|
-
rd = random.uniform(*PING_RANDOM_RANGE)
|
237
|
-
next_interval: float = PING_DEFAULT_INTERVAL - PING_CALL_TIMEOUT
|
238
|
-
next_interval *= PING_BASE_MULTIPLIER + rd
|
239
|
-
if not ping_stop_event.is_set():
|
240
|
-
ping_stop_event.wait(next_interval)
|
230
|
+
heartbeat_sender = HeartbeatSender(heartbeat)
|
241
231
|
|
242
232
|
def create_node() -> Optional[int]:
|
243
233
|
"""Set create_node."""
|
244
|
-
req = CreateNodeRequest(
|
234
|
+
req = CreateNodeRequest(heartbeat_interval=HEARTBEAT_DEFAULT_INTERVAL)
|
245
235
|
|
246
236
|
# Send the request
|
247
237
|
res = _request(req, CreateNodeResponse, PATH_CREATE_NODE)
|
248
238
|
if res is None:
|
249
239
|
return None
|
250
240
|
|
251
|
-
# Remember the node and the
|
252
|
-
nonlocal node
|
241
|
+
# Remember the node and start the heartbeat sender
|
242
|
+
nonlocal node
|
253
243
|
node = res.node
|
254
|
-
|
244
|
+
heartbeat_sender.start()
|
255
245
|
return node.node_id
|
256
246
|
|
257
247
|
def delete_node() -> None:
|
@@ -261,10 +251,8 @@ def http_request_response( # pylint: disable=R0913,R0914,R0915,R0917
|
|
261
251
|
log(ERROR, "Node instance missing")
|
262
252
|
return
|
263
253
|
|
264
|
-
# Stop the
|
265
|
-
|
266
|
-
if ping_thread is not None:
|
267
|
-
ping_thread.join()
|
254
|
+
# Stop the heartbeat sender
|
255
|
+
heartbeat_sender.stop()
|
268
256
|
|
269
257
|
# Send DeleteNode request
|
270
258
|
req = DeleteNodeRequest(node=node)
|
flwr/common/constant.py
CHANGED
@@ -55,13 +55,13 @@ EXEC_API_DEFAULT_SERVER_ADDRESS = f"{SERVER_OCTET}:{EXEC_API_PORT}"
|
|
55
55
|
SIMULATIONIO_API_DEFAULT_SERVER_ADDRESS = f"{SERVER_OCTET}:{SIMULATIONIO_PORT}"
|
56
56
|
SIMULATIONIO_API_DEFAULT_CLIENT_ADDRESS = f"{CLIENT_OCTET}:{SIMULATIONIO_PORT}"
|
57
57
|
|
58
|
-
# Constants for
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
58
|
+
# Constants for heartbeat
|
59
|
+
HEARTBEAT_DEFAULT_INTERVAL = 30
|
60
|
+
HEARTBEAT_CALL_TIMEOUT = 5
|
61
|
+
HEARTBEAT_BASE_MULTIPLIER = 0.8
|
62
|
+
HEARTBEAT_RANDOM_RANGE = (-0.1, 0.1)
|
63
|
+
HEARTBEAT_MAX_INTERVAL = 1e300
|
64
|
+
HEARTBEAT_PATIENCE = 2
|
65
65
|
|
66
66
|
# IDs
|
67
67
|
RUN_ID_NUM_BYTES = 8
|
@@ -121,6 +121,9 @@ TIMESTAMP_HEADER = "flwr-timestamp"
|
|
121
121
|
TIMESTAMP_TOLERANCE = 10 # General tolerance for timestamp verification
|
122
122
|
SYSTEM_TIME_TOLERANCE = 5 # Allowance for system time drift
|
123
123
|
|
124
|
+
# Constants for grpc retry
|
125
|
+
GRPC_RETRY_MAX_DELAY = 20 # Maximum delay duration between two consecutive retries.
|
126
|
+
|
124
127
|
# Constants for ArrayRecord
|
125
128
|
GC_THRESHOLD = 200_000_000 # 200 MB
|
126
129
|
|
flwr/common/heartbeat.py
ADDED
@@ -0,0 +1,103 @@
|
|
1
|
+
# Copyright 2025 Flower Labs GmbH. All Rights Reserved.
|
2
|
+
#
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
4
|
+
# you may not use this file except in compliance with the License.
|
5
|
+
# You may obtain a copy of the License at
|
6
|
+
#
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
8
|
+
#
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
12
|
+
# See the License for the specific language governing permissions and
|
13
|
+
# limitations under the License.
|
14
|
+
# ==============================================================================
|
15
|
+
"""Heartbeat sender."""
|
16
|
+
|
17
|
+
|
18
|
+
import random
|
19
|
+
import threading
|
20
|
+
from typing import Callable
|
21
|
+
|
22
|
+
from .constant import (
|
23
|
+
HEARTBEAT_BASE_MULTIPLIER,
|
24
|
+
HEARTBEAT_CALL_TIMEOUT,
|
25
|
+
HEARTBEAT_DEFAULT_INTERVAL,
|
26
|
+
HEARTBEAT_RANDOM_RANGE,
|
27
|
+
)
|
28
|
+
from .retry_invoker import RetryInvoker, exponential
|
29
|
+
|
30
|
+
|
31
|
+
class HeartbeatFailure(Exception):
|
32
|
+
"""Exception raised when a heartbeat fails."""
|
33
|
+
|
34
|
+
|
35
|
+
class HeartbeatSender:
|
36
|
+
"""Periodically send heartbeat signals to a server in a background thread.
|
37
|
+
|
38
|
+
This class uses the provided `heartbeat_fn` to send heartbeats. If a heartbeat
|
39
|
+
attempt fails, it will be retried using an exponential backoff strategy.
|
40
|
+
|
41
|
+
Parameters
|
42
|
+
----------
|
43
|
+
heartbeat_fn : Callable[[], bool]
|
44
|
+
Function used to send a heartbeat signal. It should return True if the heartbeat
|
45
|
+
succeeds, or False if it fails. Any internal exceptions (e.g., gRPC errors)
|
46
|
+
should be handled within this function to ensure boolean return values.
|
47
|
+
"""
|
48
|
+
|
49
|
+
def __init__(
|
50
|
+
self,
|
51
|
+
heartbeat_fn: Callable[[], bool],
|
52
|
+
) -> None:
|
53
|
+
self.heartbeat_fn = heartbeat_fn
|
54
|
+
self._stop_event = threading.Event()
|
55
|
+
self._thread = threading.Thread(target=self._run, daemon=True)
|
56
|
+
self._retry_invoker = RetryInvoker(
|
57
|
+
lambda: exponential(max_delay=20),
|
58
|
+
HeartbeatFailure, # The only exception we want to retry on
|
59
|
+
max_tries=None,
|
60
|
+
max_time=None,
|
61
|
+
# Allow the stop event to interrupt the wait
|
62
|
+
wait_function=self._stop_event.wait, # type: ignore
|
63
|
+
)
|
64
|
+
|
65
|
+
def start(self) -> None:
|
66
|
+
"""Start the heartbeat sender."""
|
67
|
+
if self._thread.is_alive():
|
68
|
+
raise RuntimeError("Heartbeat sender is already running.")
|
69
|
+
if self._stop_event.is_set():
|
70
|
+
raise RuntimeError("Cannot start a stopped heartbeat sender.")
|
71
|
+
self._thread.start()
|
72
|
+
|
73
|
+
def stop(self) -> None:
|
74
|
+
"""Stop the heartbeat sender."""
|
75
|
+
if not self._thread.is_alive():
|
76
|
+
raise RuntimeError("Heartbeat sender is not running.")
|
77
|
+
self._stop_event.set()
|
78
|
+
self._thread.join()
|
79
|
+
|
80
|
+
def _run(self) -> None:
|
81
|
+
"""Periodically send heartbeats until stopped."""
|
82
|
+
while not self._stop_event.is_set():
|
83
|
+
# Attempt to send a heartbeat with retry on failure
|
84
|
+
self._retry_invoker.invoke(self._heartbeat)
|
85
|
+
|
86
|
+
# Calculate the interval for the next heartbeat
|
87
|
+
# Formula: next_interval = (interval - timeout) * random.uniform(0.7, 0.9)
|
88
|
+
rd = random.uniform(*HEARTBEAT_RANDOM_RANGE)
|
89
|
+
next_interval: float = HEARTBEAT_DEFAULT_INTERVAL - HEARTBEAT_CALL_TIMEOUT
|
90
|
+
next_interval *= HEARTBEAT_BASE_MULTIPLIER + rd
|
91
|
+
|
92
|
+
# Wait for the calculated interval or exit early if stopped
|
93
|
+
self._stop_event.wait(next_interval)
|
94
|
+
|
95
|
+
def _heartbeat(self) -> None:
|
96
|
+
"""Send a single heartbeat and raise an exception if it fails.
|
97
|
+
|
98
|
+
Call the provided `heartbeat_fn`. If the function returns False,
|
99
|
+
a `HeartbeatFailure` exception is raised to trigger the retry mechanism.
|
100
|
+
"""
|
101
|
+
if not self._stop_event.is_set():
|
102
|
+
if not self.heartbeat_fn():
|
103
|
+
raise HeartbeatFailure
|
flwr/proto/fleet_pb2.py
CHANGED
@@ -18,7 +18,7 @@ from flwr.proto import fab_pb2 as flwr_dot_proto_dot_fab__pb2
|
|
18
18
|
from flwr.proto import message_pb2 as flwr_dot_proto_dot_message__pb2
|
19
19
|
|
20
20
|
|
21
|
-
DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\x16\x66lwr/proto/fleet.proto\x12\nflwr.proto\x1a\x15\x66lwr/proto/node.proto\x1a\x14\x66lwr/proto/run.proto\x1a\x14\x66lwr/proto/fab.proto\x1a\x18\x66lwr/proto/message.proto\"
|
21
|
+
DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\x16\x66lwr/proto/fleet.proto\x12\nflwr.proto\x1a\x15\x66lwr/proto/node.proto\x1a\x14\x66lwr/proto/run.proto\x1a\x14\x66lwr/proto/fab.proto\x1a\x18\x66lwr/proto/message.proto\"/\n\x11\x43reateNodeRequest\x12\x1a\n\x12heartbeat_interval\x18\x01 \x01(\x01\"4\n\x12\x43reateNodeResponse\x12\x1e\n\x04node\x18\x01 \x01(\x0b\x32\x10.flwr.proto.Node\"3\n\x11\x44\x65leteNodeRequest\x12\x1e\n\x04node\x18\x01 \x01(\x0b\x32\x10.flwr.proto.Node\"\x14\n\x12\x44\x65leteNodeResponse\"N\n\x10HeartbeatRequest\x12\x1e\n\x04node\x18\x01 \x01(\x0b\x32\x10.flwr.proto.Node\x12\x1a\n\x12heartbeat_interval\x18\x02 \x01(\x01\"$\n\x11HeartbeatResponse\x12\x0f\n\x07success\x18\x01 \x01(\x08\"J\n\x13PullMessagesRequest\x12\x1e\n\x04node\x18\x01 \x01(\x0b\x32\x10.flwr.proto.Node\x12\x13\n\x0bmessage_ids\x18\x02 \x03(\t\"l\n\x14PullMessagesResponse\x12(\n\treconnect\x18\x01 \x01(\x0b\x32\x15.flwr.proto.Reconnect\x12*\n\rmessages_list\x18\x02 \x03(\x0b\x32\x13.flwr.proto.Message\"a\n\x13PushMessagesRequest\x12\x1e\n\x04node\x18\x01 \x01(\x0b\x32\x10.flwr.proto.Node\x12*\n\rmessages_list\x18\x02 \x03(\x0b\x32\x13.flwr.proto.Message\"\xb0\x01\n\x14PushMessagesResponse\x12(\n\treconnect\x18\x01 \x01(\x0b\x32\x15.flwr.proto.Reconnect\x12>\n\x07results\x18\x02 \x03(\x0b\x32-.flwr.proto.PushMessagesResponse.ResultsEntry\x1a.\n\x0cResultsEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\r:\x02\x38\x01\"\x1e\n\tReconnect\x12\x11\n\treconnect\x18\x01 \x01(\x04\x32\xa1\x04\n\x05\x46leet\x12M\n\nCreateNode\x12\x1d.flwr.proto.CreateNodeRequest\x1a\x1e.flwr.proto.CreateNodeResponse\"\x00\x12M\n\nDeleteNode\x12\x1d.flwr.proto.DeleteNodeRequest\x1a\x1e.flwr.proto.DeleteNodeResponse\"\x00\x12J\n\tHeartbeat\x12\x1c.flwr.proto.HeartbeatRequest\x1a\x1d.flwr.proto.HeartbeatResponse\"\x00\x12S\n\x0cPullMessages\x12\x1f.flwr.proto.PullMessagesRequest\x1a .flwr.proto.PullMessagesResponse\"\x00\x12S\n\x0cPushMessages\x12\x1f.flwr.proto.PushMessagesRequest\x1a .flwr.proto.PushMessagesResponse\"\x00\x12\x41\n\x06GetRun\x12\x19.flwr.proto.GetRunRequest\x1a\x1a.flwr.proto.GetRunResponse\"\x00\x12\x41\n\x06GetFab\x12\x19.flwr.proto.GetFabRequest\x1a\x1a.flwr.proto.GetFabResponse\"\x00\x62\x06proto3')
|
22
22
|
|
23
23
|
_globals = globals()
|
24
24
|
_builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, _globals)
|
@@ -28,29 +28,29 @@ if _descriptor._USE_C_DESCRIPTORS == False:
|
|
28
28
|
_globals['_PUSHMESSAGESRESPONSE_RESULTSENTRY']._options = None
|
29
29
|
_globals['_PUSHMESSAGESRESPONSE_RESULTSENTRY']._serialized_options = b'8\001'
|
30
30
|
_globals['_CREATENODEREQUEST']._serialized_start=131
|
31
|
-
_globals['_CREATENODEREQUEST']._serialized_end=
|
32
|
-
_globals['_CREATENODERESPONSE']._serialized_start=
|
33
|
-
_globals['_CREATENODERESPONSE']._serialized_end=
|
34
|
-
_globals['_DELETENODEREQUEST']._serialized_start=
|
35
|
-
_globals['_DELETENODEREQUEST']._serialized_end=
|
36
|
-
_globals['_DELETENODERESPONSE']._serialized_start=
|
37
|
-
_globals['_DELETENODERESPONSE']._serialized_end=
|
38
|
-
_globals['
|
39
|
-
_globals['
|
40
|
-
_globals['
|
41
|
-
_globals['
|
42
|
-
_globals['_PULLMESSAGESREQUEST']._serialized_start=
|
43
|
-
_globals['_PULLMESSAGESREQUEST']._serialized_end=
|
44
|
-
_globals['_PULLMESSAGESRESPONSE']._serialized_start=
|
45
|
-
_globals['_PULLMESSAGESRESPONSE']._serialized_end=
|
46
|
-
_globals['_PUSHMESSAGESREQUEST']._serialized_start=
|
47
|
-
_globals['_PUSHMESSAGESREQUEST']._serialized_end=
|
48
|
-
_globals['_PUSHMESSAGESRESPONSE']._serialized_start=
|
49
|
-
_globals['_PUSHMESSAGESRESPONSE']._serialized_end=
|
50
|
-
_globals['_PUSHMESSAGESRESPONSE_RESULTSENTRY']._serialized_start=
|
51
|
-
_globals['_PUSHMESSAGESRESPONSE_RESULTSENTRY']._serialized_end=
|
52
|
-
_globals['_RECONNECT']._serialized_start=
|
53
|
-
_globals['_RECONNECT']._serialized_end=
|
54
|
-
_globals['_FLEET']._serialized_start=
|
55
|
-
_globals['_FLEET']._serialized_end=
|
31
|
+
_globals['_CREATENODEREQUEST']._serialized_end=178
|
32
|
+
_globals['_CREATENODERESPONSE']._serialized_start=180
|
33
|
+
_globals['_CREATENODERESPONSE']._serialized_end=232
|
34
|
+
_globals['_DELETENODEREQUEST']._serialized_start=234
|
35
|
+
_globals['_DELETENODEREQUEST']._serialized_end=285
|
36
|
+
_globals['_DELETENODERESPONSE']._serialized_start=287
|
37
|
+
_globals['_DELETENODERESPONSE']._serialized_end=307
|
38
|
+
_globals['_HEARTBEATREQUEST']._serialized_start=309
|
39
|
+
_globals['_HEARTBEATREQUEST']._serialized_end=387
|
40
|
+
_globals['_HEARTBEATRESPONSE']._serialized_start=389
|
41
|
+
_globals['_HEARTBEATRESPONSE']._serialized_end=425
|
42
|
+
_globals['_PULLMESSAGESREQUEST']._serialized_start=427
|
43
|
+
_globals['_PULLMESSAGESREQUEST']._serialized_end=501
|
44
|
+
_globals['_PULLMESSAGESRESPONSE']._serialized_start=503
|
45
|
+
_globals['_PULLMESSAGESRESPONSE']._serialized_end=611
|
46
|
+
_globals['_PUSHMESSAGESREQUEST']._serialized_start=613
|
47
|
+
_globals['_PUSHMESSAGESREQUEST']._serialized_end=710
|
48
|
+
_globals['_PUSHMESSAGESRESPONSE']._serialized_start=713
|
49
|
+
_globals['_PUSHMESSAGESRESPONSE']._serialized_end=889
|
50
|
+
_globals['_PUSHMESSAGESRESPONSE_RESULTSENTRY']._serialized_start=843
|
51
|
+
_globals['_PUSHMESSAGESRESPONSE_RESULTSENTRY']._serialized_end=889
|
52
|
+
_globals['_RECONNECT']._serialized_start=891
|
53
|
+
_globals['_RECONNECT']._serialized_end=921
|
54
|
+
_globals['_FLEET']._serialized_start=924
|
55
|
+
_globals['_FLEET']._serialized_end=1469
|
56
56
|
# @@protoc_insertion_point(module_scope)
|
flwr/proto/fleet_pb2.pyi
CHANGED
@@ -16,13 +16,13 @@ DESCRIPTOR: google.protobuf.descriptor.FileDescriptor
|
|
16
16
|
class CreateNodeRequest(google.protobuf.message.Message):
|
17
17
|
"""CreateNode messages"""
|
18
18
|
DESCRIPTOR: google.protobuf.descriptor.Descriptor
|
19
|
-
|
20
|
-
|
19
|
+
HEARTBEAT_INTERVAL_FIELD_NUMBER: builtins.int
|
20
|
+
heartbeat_interval: builtins.float
|
21
21
|
def __init__(self,
|
22
22
|
*,
|
23
|
-
|
23
|
+
heartbeat_interval: builtins.float = ...,
|
24
24
|
) -> None: ...
|
25
|
-
def ClearField(self, field_name: typing_extensions.Literal["
|
25
|
+
def ClearField(self, field_name: typing_extensions.Literal["heartbeat_interval",b"heartbeat_interval"]) -> None: ...
|
26
26
|
global___CreateNodeRequest = CreateNodeRequest
|
27
27
|
|
28
28
|
class CreateNodeResponse(google.protobuf.message.Message):
|
@@ -58,24 +58,24 @@ class DeleteNodeResponse(google.protobuf.message.Message):
|
|
58
58
|
) -> None: ...
|
59
59
|
global___DeleteNodeResponse = DeleteNodeResponse
|
60
60
|
|
61
|
-
class
|
62
|
-
"""
|
61
|
+
class HeartbeatRequest(google.protobuf.message.Message):
|
62
|
+
"""Heartbeat messages"""
|
63
63
|
DESCRIPTOR: google.protobuf.descriptor.Descriptor
|
64
64
|
NODE_FIELD_NUMBER: builtins.int
|
65
|
-
|
65
|
+
HEARTBEAT_INTERVAL_FIELD_NUMBER: builtins.int
|
66
66
|
@property
|
67
67
|
def node(self) -> flwr.proto.node_pb2.Node: ...
|
68
|
-
|
68
|
+
heartbeat_interval: builtins.float
|
69
69
|
def __init__(self,
|
70
70
|
*,
|
71
71
|
node: typing.Optional[flwr.proto.node_pb2.Node] = ...,
|
72
|
-
|
72
|
+
heartbeat_interval: builtins.float = ...,
|
73
73
|
) -> None: ...
|
74
74
|
def HasField(self, field_name: typing_extensions.Literal["node",b"node"]) -> builtins.bool: ...
|
75
|
-
def ClearField(self, field_name: typing_extensions.Literal["
|
76
|
-
|
75
|
+
def ClearField(self, field_name: typing_extensions.Literal["heartbeat_interval",b"heartbeat_interval","node",b"node"]) -> None: ...
|
76
|
+
global___HeartbeatRequest = HeartbeatRequest
|
77
77
|
|
78
|
-
class
|
78
|
+
class HeartbeatResponse(google.protobuf.message.Message):
|
79
79
|
DESCRIPTOR: google.protobuf.descriptor.Descriptor
|
80
80
|
SUCCESS_FIELD_NUMBER: builtins.int
|
81
81
|
success: builtins.bool
|
@@ -84,7 +84,7 @@ class PingResponse(google.protobuf.message.Message):
|
|
84
84
|
success: builtins.bool = ...,
|
85
85
|
) -> None: ...
|
86
86
|
def ClearField(self, field_name: typing_extensions.Literal["success",b"success"]) -> None: ...
|
87
|
-
|
87
|
+
global___HeartbeatResponse = HeartbeatResponse
|
88
88
|
|
89
89
|
class PullMessagesRequest(google.protobuf.message.Message):
|
90
90
|
"""PullMessages messages"""
|
flwr/proto/fleet_pb2_grpc.py
CHANGED
@@ -26,10 +26,10 @@ class FleetStub(object):
|
|
26
26
|
request_serializer=flwr_dot_proto_dot_fleet__pb2.DeleteNodeRequest.SerializeToString,
|
27
27
|
response_deserializer=flwr_dot_proto_dot_fleet__pb2.DeleteNodeResponse.FromString,
|
28
28
|
)
|
29
|
-
self.
|
30
|
-
'/flwr.proto.Fleet/
|
31
|
-
request_serializer=flwr_dot_proto_dot_fleet__pb2.
|
32
|
-
response_deserializer=flwr_dot_proto_dot_fleet__pb2.
|
29
|
+
self.Heartbeat = channel.unary_unary(
|
30
|
+
'/flwr.proto.Fleet/Heartbeat',
|
31
|
+
request_serializer=flwr_dot_proto_dot_fleet__pb2.HeartbeatRequest.SerializeToString,
|
32
|
+
response_deserializer=flwr_dot_proto_dot_fleet__pb2.HeartbeatResponse.FromString,
|
33
33
|
)
|
34
34
|
self.PullMessages = channel.unary_unary(
|
35
35
|
'/flwr.proto.Fleet/PullMessages',
|
@@ -68,7 +68,7 @@ class FleetServicer(object):
|
|
68
68
|
context.set_details('Method not implemented!')
|
69
69
|
raise NotImplementedError('Method not implemented!')
|
70
70
|
|
71
|
-
def
|
71
|
+
def Heartbeat(self, request, context):
|
72
72
|
"""Missing associated documentation comment in .proto file."""
|
73
73
|
context.set_code(grpc.StatusCode.UNIMPLEMENTED)
|
74
74
|
context.set_details('Method not implemented!')
|
@@ -118,10 +118,10 @@ def add_FleetServicer_to_server(servicer, server):
|
|
118
118
|
request_deserializer=flwr_dot_proto_dot_fleet__pb2.DeleteNodeRequest.FromString,
|
119
119
|
response_serializer=flwr_dot_proto_dot_fleet__pb2.DeleteNodeResponse.SerializeToString,
|
120
120
|
),
|
121
|
-
'
|
122
|
-
servicer.
|
123
|
-
request_deserializer=flwr_dot_proto_dot_fleet__pb2.
|
124
|
-
response_serializer=flwr_dot_proto_dot_fleet__pb2.
|
121
|
+
'Heartbeat': grpc.unary_unary_rpc_method_handler(
|
122
|
+
servicer.Heartbeat,
|
123
|
+
request_deserializer=flwr_dot_proto_dot_fleet__pb2.HeartbeatRequest.FromString,
|
124
|
+
response_serializer=flwr_dot_proto_dot_fleet__pb2.HeartbeatResponse.SerializeToString,
|
125
125
|
),
|
126
126
|
'PullMessages': grpc.unary_unary_rpc_method_handler(
|
127
127
|
servicer.PullMessages,
|
@@ -188,7 +188,7 @@ class Fleet(object):
|
|
188
188
|
insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
|
189
189
|
|
190
190
|
@staticmethod
|
191
|
-
def
|
191
|
+
def Heartbeat(request,
|
192
192
|
target,
|
193
193
|
options=(),
|
194
194
|
channel_credentials=None,
|
@@ -198,9 +198,9 @@ class Fleet(object):
|
|
198
198
|
wait_for_ready=None,
|
199
199
|
timeout=None,
|
200
200
|
metadata=None):
|
201
|
-
return grpc.experimental.unary_unary(request, target, '/flwr.proto.Fleet/
|
202
|
-
flwr_dot_proto_dot_fleet__pb2.
|
203
|
-
flwr_dot_proto_dot_fleet__pb2.
|
201
|
+
return grpc.experimental.unary_unary(request, target, '/flwr.proto.Fleet/Heartbeat',
|
202
|
+
flwr_dot_proto_dot_fleet__pb2.HeartbeatRequest.SerializeToString,
|
203
|
+
flwr_dot_proto_dot_fleet__pb2.HeartbeatResponse.FromString,
|
204
204
|
options, channel_credentials,
|
205
205
|
insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
|
206
206
|
|
flwr/proto/fleet_pb2_grpc.pyi
CHANGED
@@ -18,9 +18,9 @@ class FleetStub:
|
|
18
18
|
flwr.proto.fleet_pb2.DeleteNodeRequest,
|
19
19
|
flwr.proto.fleet_pb2.DeleteNodeResponse]
|
20
20
|
|
21
|
-
|
22
|
-
flwr.proto.fleet_pb2.
|
23
|
-
flwr.proto.fleet_pb2.
|
21
|
+
Heartbeat: grpc.UnaryUnaryMultiCallable[
|
22
|
+
flwr.proto.fleet_pb2.HeartbeatRequest,
|
23
|
+
flwr.proto.fleet_pb2.HeartbeatResponse]
|
24
24
|
|
25
25
|
PullMessages: grpc.UnaryUnaryMultiCallable[
|
26
26
|
flwr.proto.fleet_pb2.PullMessagesRequest,
|
@@ -62,10 +62,10 @@ class FleetServicer(metaclass=abc.ABCMeta):
|
|
62
62
|
) -> flwr.proto.fleet_pb2.DeleteNodeResponse: ...
|
63
63
|
|
64
64
|
@abc.abstractmethod
|
65
|
-
def
|
66
|
-
request: flwr.proto.fleet_pb2.
|
65
|
+
def Heartbeat(self,
|
66
|
+
request: flwr.proto.fleet_pb2.HeartbeatRequest,
|
67
67
|
context: grpc.ServicerContext,
|
68
|
-
) -> flwr.proto.fleet_pb2.
|
68
|
+
) -> flwr.proto.fleet_pb2.HeartbeatResponse: ...
|
69
69
|
|
70
70
|
@abc.abstractmethod
|
71
71
|
def PullMessages(self,
|
@@ -35,7 +35,7 @@ from flwr.proto.fab_pb2 import GetFabRequest # pylint: disable=E0611
|
|
35
35
|
from flwr.proto.fleet_pb2 import ( # pylint: disable=E0611
|
36
36
|
CreateNodeRequest,
|
37
37
|
DeleteNodeRequest,
|
38
|
-
|
38
|
+
HeartbeatRequest,
|
39
39
|
PullMessagesRequest,
|
40
40
|
PushMessagesRequest,
|
41
41
|
)
|
@@ -81,8 +81,8 @@ class GrpcAdapterServicer(grpcadapter_pb2_grpc.GrpcAdapterServicer, FleetService
|
|
81
81
|
return _handle(request, context, CreateNodeRequest, self.CreateNode)
|
82
82
|
if request.grpc_message_name == DeleteNodeRequest.__qualname__:
|
83
83
|
return _handle(request, context, DeleteNodeRequest, self.DeleteNode)
|
84
|
-
if request.grpc_message_name ==
|
85
|
-
return _handle(request, context,
|
84
|
+
if request.grpc_message_name == HeartbeatRequest.__qualname__:
|
85
|
+
return _handle(request, context, HeartbeatRequest, self.Heartbeat)
|
86
86
|
if request.grpc_message_name == GetRunRequest.__qualname__:
|
87
87
|
return _handle(request, context, GetRunRequest, self.GetRun)
|
88
88
|
if request.grpc_message_name == GetFabRequest.__qualname__:
|
@@ -29,8 +29,8 @@ from flwr.proto.fleet_pb2 import ( # pylint: disable=E0611
|
|
29
29
|
CreateNodeResponse,
|
30
30
|
DeleteNodeRequest,
|
31
31
|
DeleteNodeResponse,
|
32
|
-
|
33
|
-
|
32
|
+
HeartbeatRequest,
|
33
|
+
HeartbeatResponse,
|
34
34
|
PullMessagesRequest,
|
35
35
|
PullMessagesResponse,
|
36
36
|
PushMessagesRequest,
|
@@ -56,7 +56,11 @@ class FleetServicer(fleet_pb2_grpc.FleetServicer):
|
|
56
56
|
self, request: CreateNodeRequest, context: grpc.ServicerContext
|
57
57
|
) -> CreateNodeResponse:
|
58
58
|
"""."""
|
59
|
-
log(
|
59
|
+
log(
|
60
|
+
INFO,
|
61
|
+
"[Fleet.CreateNode] Request heartbeat_interval=%s",
|
62
|
+
request.heartbeat_interval,
|
63
|
+
)
|
60
64
|
log(DEBUG, "[Fleet.CreateNode] Request: %s", MessageToDict(request))
|
61
65
|
response = message_handler.create_node(
|
62
66
|
request=request,
|
@@ -77,10 +81,12 @@ class FleetServicer(fleet_pb2_grpc.FleetServicer):
|
|
77
81
|
state=self.state_factory.state(),
|
78
82
|
)
|
79
83
|
|
80
|
-
def
|
84
|
+
def Heartbeat(
|
85
|
+
self, request: HeartbeatRequest, context: grpc.ServicerContext
|
86
|
+
) -> HeartbeatResponse:
|
81
87
|
"""."""
|
82
|
-
log(DEBUG, "[Fleet.
|
83
|
-
return message_handler.
|
88
|
+
log(DEBUG, "[Fleet.Heartbeat] Request: %s", MessageToDict(request))
|
89
|
+
return message_handler.heartbeat(
|
84
90
|
request=request,
|
85
91
|
state=self.state_factory.state(),
|
86
92
|
)
|
@@ -33,8 +33,8 @@ from flwr.proto.fleet_pb2 import ( # pylint: disable=E0611
|
|
33
33
|
CreateNodeResponse,
|
34
34
|
DeleteNodeRequest,
|
35
35
|
DeleteNodeResponse,
|
36
|
-
|
37
|
-
|
36
|
+
HeartbeatRequest,
|
37
|
+
HeartbeatResponse,
|
38
38
|
PullMessagesRequest,
|
39
39
|
PullMessagesResponse,
|
40
40
|
PushMessagesRequest,
|
@@ -58,7 +58,7 @@ def create_node(
|
|
58
58
|
) -> CreateNodeResponse:
|
59
59
|
"""."""
|
60
60
|
# Create node
|
61
|
-
node_id = state.create_node(
|
61
|
+
node_id = state.create_node(heartbeat_interval=request.heartbeat_interval)
|
62
62
|
return CreateNodeResponse(node=Node(node_id=node_id))
|
63
63
|
|
64
64
|
|
@@ -73,13 +73,13 @@ def delete_node(request: DeleteNodeRequest, state: LinkState) -> DeleteNodeRespo
|
|
73
73
|
return DeleteNodeResponse()
|
74
74
|
|
75
75
|
|
76
|
-
def
|
77
|
-
request:
|
76
|
+
def heartbeat(
|
77
|
+
request: HeartbeatRequest, # pylint: disable=unused-argument
|
78
78
|
state: LinkState, # pylint: disable=unused-argument
|
79
|
-
) ->
|
79
|
+
) -> HeartbeatResponse:
|
80
80
|
"""."""
|
81
|
-
res = state.
|
82
|
-
return
|
81
|
+
res = state.acknowledge_heartbeat(request.node.node_id, request.heartbeat_interval)
|
82
|
+
return HeartbeatResponse(success=res)
|
83
83
|
|
84
84
|
|
85
85
|
def pull_messages(
|
@@ -29,8 +29,8 @@ from flwr.proto.fleet_pb2 import ( # pylint: disable=E0611
|
|
29
29
|
CreateNodeResponse,
|
30
30
|
DeleteNodeRequest,
|
31
31
|
DeleteNodeResponse,
|
32
|
-
|
33
|
-
|
32
|
+
HeartbeatRequest,
|
33
|
+
HeartbeatResponse,
|
34
34
|
PullMessagesRequest,
|
35
35
|
PullMessagesResponse,
|
36
36
|
PushMessagesRequest,
|
@@ -126,14 +126,14 @@ async def push_message(request: PushMessagesRequest) -> PushMessagesResponse:
|
|
126
126
|
return message_handler.push_messages(request=request, state=state)
|
127
127
|
|
128
128
|
|
129
|
-
@rest_request_response(
|
130
|
-
async def
|
131
|
-
"""
|
129
|
+
@rest_request_response(HeartbeatRequest)
|
130
|
+
async def heartbeat(request: HeartbeatRequest) -> HeartbeatResponse:
|
131
|
+
"""Heartbeat."""
|
132
132
|
# Get state from app
|
133
133
|
state: LinkState = cast(LinkStateFactory, app.state.STATE_FACTORY).state()
|
134
134
|
|
135
135
|
# Handle message
|
136
|
-
return message_handler.
|
136
|
+
return message_handler.heartbeat(request=request, state=state)
|
137
137
|
|
138
138
|
|
139
139
|
@rest_request_response(GetRunRequest)
|
@@ -164,7 +164,7 @@ routes = [
|
|
164
164
|
Route("/api/v0/fleet/delete-node", delete_node, methods=["POST"]),
|
165
165
|
Route("/api/v0/fleet/pull-messages", pull_message, methods=["POST"]),
|
166
166
|
Route("/api/v0/fleet/push-messages", push_message, methods=["POST"]),
|
167
|
-
Route("/api/v0/fleet/
|
167
|
+
Route("/api/v0/fleet/heartbeat", heartbeat, methods=["POST"]),
|
168
168
|
Route("/api/v0/fleet/get-run", get_run, methods=["POST"]),
|
169
169
|
Route("/api/v0/fleet/get-fab", get_fab, methods=["POST"]),
|
170
170
|
]
|
@@ -31,9 +31,9 @@ from flwr.client.clientapp.utils import get_load_client_app_fn
|
|
31
31
|
from flwr.client.run_info_store import DeprecatedRunInfoStore
|
32
32
|
from flwr.common import Message
|
33
33
|
from flwr.common.constant import (
|
34
|
+
HEARTBEAT_MAX_INTERVAL,
|
34
35
|
NUM_PARTITIONS_KEY,
|
35
36
|
PARTITION_ID_KEY,
|
36
|
-
PING_MAX_INTERVAL,
|
37
37
|
ErrorCode,
|
38
38
|
)
|
39
39
|
from flwr.common.logger import log
|
@@ -53,7 +53,7 @@ def _register_nodes(
|
|
53
53
|
nodes_mapping: NodeToPartitionMapping = {}
|
54
54
|
state = state_factory.state()
|
55
55
|
for i in range(num_nodes):
|
56
|
-
node_id = state.create_node(
|
56
|
+
node_id = state.create_node(heartbeat_interval=HEARTBEAT_MAX_INTERVAL)
|
57
57
|
nodes_mapping[node_id] = i
|
58
58
|
log(DEBUG, "Registered %i nodes", len(nodes_mapping))
|
59
59
|
return nodes_mapping
|
@@ -25,9 +25,9 @@ from uuid import UUID, uuid4
|
|
25
25
|
|
26
26
|
from flwr.common import Context, Message, log, now
|
27
27
|
from flwr.common.constant import (
|
28
|
+
HEARTBEAT_PATIENCE,
|
28
29
|
MESSAGE_TTL_TOLERANCE,
|
29
30
|
NODE_ID_NUM_BYTES,
|
30
|
-
PING_PATIENCE,
|
31
31
|
RUN_ID_NUM_BYTES,
|
32
32
|
SUPERLINK_NODE_ID,
|
33
33
|
Status,
|
@@ -61,7 +61,7 @@ class InMemoryLinkState(LinkState): # pylint: disable=R0902,R0904
|
|
61
61
|
|
62
62
|
def __init__(self) -> None:
|
63
63
|
|
64
|
-
# Map node_id to (online_until,
|
64
|
+
# Map node_id to (online_until, heartbeat_interval)
|
65
65
|
self.node_ids: dict[int, tuple[float, float]] = {}
|
66
66
|
self.public_key_to_node_id: dict[bytes, int] = {}
|
67
67
|
self.node_id_to_public_key: dict[int, bytes] = {}
|
@@ -322,7 +322,7 @@ class InMemoryLinkState(LinkState): # pylint: disable=R0902,R0904
|
|
322
322
|
"""
|
323
323
|
return len(self.message_res_store)
|
324
324
|
|
325
|
-
def create_node(self,
|
325
|
+
def create_node(self, heartbeat_interval: float) -> int:
|
326
326
|
"""Create, store in the link state, and return `node_id`."""
|
327
327
|
# Sample a random int64 as node_id
|
328
328
|
node_id = generate_rand_int_from_bytes(
|
@@ -334,8 +334,11 @@ class InMemoryLinkState(LinkState): # pylint: disable=R0902,R0904
|
|
334
334
|
log(ERROR, "Unexpected node registration failure.")
|
335
335
|
return 0
|
336
336
|
|
337
|
-
# Mark the node online
|
338
|
-
self.node_ids[node_id] = (
|
337
|
+
# Mark the node online until time.time() + heartbeat_interval
|
338
|
+
self.node_ids[node_id] = (
|
339
|
+
time.time() + heartbeat_interval,
|
340
|
+
heartbeat_interval,
|
341
|
+
)
|
339
342
|
return node_id
|
340
343
|
|
341
344
|
def delete_node(self, node_id: int) -> None:
|
@@ -536,17 +539,19 @@ class InMemoryLinkState(LinkState): # pylint: disable=R0902,R0904
|
|
536
539
|
return None
|
537
540
|
return self.federation_options[run_id]
|
538
541
|
|
539
|
-
def
|
540
|
-
"""Acknowledge a
|
542
|
+
def acknowledge_heartbeat(self, node_id: int, heartbeat_interval: float) -> bool:
|
543
|
+
"""Acknowledge a heartbeat received from a node, serving as a heartbeat.
|
541
544
|
|
542
|
-
|
543
|
-
|
545
|
+
A node is considered online as long as it sends heartbeats within
|
546
|
+
the tolerated interval: HEARTBEAT_PATIENCE × heartbeat_interval.
|
547
|
+
HEARTBEAT_PATIENCE = N allows for N-1 missed heartbeat before
|
548
|
+
the node is marked as offline.
|
544
549
|
"""
|
545
550
|
with self.lock:
|
546
551
|
if node_id in self.node_ids:
|
547
552
|
self.node_ids[node_id] = (
|
548
|
-
time.time() +
|
549
|
-
|
553
|
+
time.time() + HEARTBEAT_PATIENCE * heartbeat_interval,
|
554
|
+
heartbeat_interval,
|
550
555
|
)
|
551
556
|
return True
|
552
557
|
return False
|
@@ -128,7 +128,7 @@ class LinkState(abc.ABC): # pylint: disable=R0904
|
|
128
128
|
"""Get all instruction Message IDs for the given run_id."""
|
129
129
|
|
130
130
|
@abc.abstractmethod
|
131
|
-
def create_node(self,
|
131
|
+
def create_node(self, heartbeat_interval: float) -> int:
|
132
132
|
"""Create, store in the link state, and return `node_id`."""
|
133
133
|
|
134
134
|
@abc.abstractmethod
|
@@ -267,22 +267,27 @@ class LinkState(abc.ABC): # pylint: disable=R0904
|
|
267
267
|
"""Retrieve all currently stored `node_public_keys` as a set."""
|
268
268
|
|
269
269
|
@abc.abstractmethod
|
270
|
-
def
|
271
|
-
"""Acknowledge a
|
270
|
+
def acknowledge_heartbeat(self, node_id: int, heartbeat_interval: float) -> bool:
|
271
|
+
"""Acknowledge a heartbeat received from a node.
|
272
|
+
|
273
|
+
A node is considered online as long as it sends heartbeats within
|
274
|
+
the tolerated interval: HEARTBEAT_PATIENCE × heartbeat_interval.
|
275
|
+
HEARTBEAT_PATIENCE = N allows for N-1 missed heartbeat before
|
276
|
+
the node is marked as offline.
|
272
277
|
|
273
278
|
Parameters
|
274
279
|
----------
|
275
280
|
node_id : int
|
276
|
-
The `node_id` from which the
|
277
|
-
|
281
|
+
The `node_id` from which the heartbeat was received.
|
282
|
+
heartbeat_interval : float
|
278
283
|
The interval (in seconds) from the current timestamp within which the next
|
279
|
-
|
280
|
-
an accurate assessment of the node's availability.
|
284
|
+
heartbeat from this node must be received. This acts as a hard deadline to
|
285
|
+
ensure an accurate assessment of the node's availability.
|
281
286
|
|
282
287
|
Returns
|
283
288
|
-------
|
284
289
|
is_acknowledged : bool
|
285
|
-
True if the
|
290
|
+
True if the heartbeat is successfully acknowledged; otherwise, False.
|
286
291
|
"""
|
287
292
|
|
288
293
|
@abc.abstractmethod
|
@@ -28,9 +28,9 @@ from uuid import UUID, uuid4
|
|
28
28
|
|
29
29
|
from flwr.common import Context, Message, Metadata, log, now
|
30
30
|
from flwr.common.constant import (
|
31
|
+
HEARTBEAT_PATIENCE,
|
31
32
|
MESSAGE_TTL_TOLERANCE,
|
32
33
|
NODE_ID_NUM_BYTES,
|
33
|
-
PING_PATIENCE,
|
34
34
|
RUN_ID_NUM_BYTES,
|
35
35
|
SUPERLINK_NODE_ID,
|
36
36
|
Status,
|
@@ -74,7 +74,7 @@ SQL_CREATE_TABLE_NODE = """
|
|
74
74
|
CREATE TABLE IF NOT EXISTS node(
|
75
75
|
node_id INTEGER UNIQUE,
|
76
76
|
online_until REAL,
|
77
|
-
|
77
|
+
heartbeat_interval REAL,
|
78
78
|
public_key BLOB
|
79
79
|
);
|
80
80
|
"""
|
@@ -595,7 +595,7 @@ class SqliteLinkState(LinkState): # pylint: disable=R0904
|
|
595
595
|
|
596
596
|
return {UUID(row["message_id"]) for row in rows}
|
597
597
|
|
598
|
-
def create_node(self,
|
598
|
+
def create_node(self, heartbeat_interval: float) -> int:
|
599
599
|
"""Create, store in the link state, and return `node_id`."""
|
600
600
|
# Sample a random uint64 as node_id
|
601
601
|
uint64_node_id = generate_rand_int_from_bytes(
|
@@ -607,18 +607,18 @@ class SqliteLinkState(LinkState): # pylint: disable=R0904
|
|
607
607
|
|
608
608
|
query = (
|
609
609
|
"INSERT INTO node "
|
610
|
-
"(node_id, online_until,
|
610
|
+
"(node_id, online_until, heartbeat_interval, public_key) "
|
611
611
|
"VALUES (?, ?, ?, ?)"
|
612
612
|
)
|
613
613
|
|
614
|
-
# Mark the node online util time.time() +
|
614
|
+
# Mark the node online util time.time() + heartbeat_interval
|
615
615
|
try:
|
616
616
|
self.query(
|
617
617
|
query,
|
618
618
|
(
|
619
619
|
sint64_node_id,
|
620
|
-
time.time() +
|
621
|
-
|
620
|
+
time.time() + heartbeat_interval,
|
621
|
+
heartbeat_interval,
|
622
622
|
b"", # Initialize with an empty public key
|
623
623
|
),
|
624
624
|
)
|
@@ -926,11 +926,13 @@ class SqliteLinkState(LinkState): # pylint: disable=R0904
|
|
926
926
|
row = rows[0]
|
927
927
|
return configrecord_from_bytes(row["federation_options"])
|
928
928
|
|
929
|
-
def
|
930
|
-
"""Acknowledge a
|
929
|
+
def acknowledge_heartbeat(self, node_id: int, heartbeat_interval: float) -> bool:
|
930
|
+
"""Acknowledge a heartbeat received from a node, serving as a heartbeat.
|
931
931
|
|
932
|
-
|
933
|
-
|
932
|
+
A node is considered online as long as it sends heartbeats within
|
933
|
+
the tolerated interval: HEARTBEAT_PATIENCE × heartbeat_interval.
|
934
|
+
HEARTBEAT_PATIENCE = N allows for N-1 missed heartbeat before
|
935
|
+
the node is marked as offline.
|
934
936
|
"""
|
935
937
|
sint64_node_id = convert_uint64_to_sint64(node_id)
|
936
938
|
|
@@ -939,13 +941,15 @@ class SqliteLinkState(LinkState): # pylint: disable=R0904
|
|
939
941
|
if not self.query(query, (sint64_node_id,)):
|
940
942
|
return False
|
941
943
|
|
942
|
-
# Update `online_until` and `
|
943
|
-
query =
|
944
|
+
# Update `online_until` and `heartbeat_interval` for the given `node_id`
|
945
|
+
query = (
|
946
|
+
"UPDATE node SET online_until = ?, heartbeat_interval = ? WHERE node_id = ?"
|
947
|
+
)
|
944
948
|
self.query(
|
945
949
|
query,
|
946
950
|
(
|
947
|
-
time.time() +
|
948
|
-
|
951
|
+
time.time() + HEARTBEAT_PATIENCE * heartbeat_interval,
|
952
|
+
heartbeat_interval,
|
949
953
|
sint64_node_id,
|
950
954
|
),
|
951
955
|
)
|
@@ -21,6 +21,7 @@ from uuid import UUID, uuid4
|
|
21
21
|
|
22
22
|
from flwr.common import ConfigRecord, Context, Error, Message, Metadata, now, serde
|
23
23
|
from flwr.common.constant import (
|
24
|
+
HEARTBEAT_PATIENCE,
|
24
25
|
SUPERLINK_NODE_ID,
|
25
26
|
ErrorCode,
|
26
27
|
MessageType,
|
@@ -56,8 +57,8 @@ REPLY_MESSAGE_UNAVAILABLE_ERROR_REASON = (
|
|
56
57
|
"Error: Reply Message Unavailable - The reply message has expired."
|
57
58
|
)
|
58
59
|
NODE_UNAVAILABLE_ERROR_REASON = (
|
59
|
-
"Error: Node Unavailable
|
60
|
-
"
|
60
|
+
"Error: Node Unavailable — The destination node failed to report a heartbeat "
|
61
|
+
f"within {HEARTBEAT_PATIENCE} × its expected interval."
|
61
62
|
)
|
62
63
|
|
63
64
|
|
{flwr_nightly-1.19.0.dev20250429.dist-info → flwr_nightly-1.19.0.dev20250430.dist-info}/RECORD
RENAMED
@@ -86,9 +86,8 @@ flwr/client/grpc_client/__init__.py,sha256=MDOckOODn-FJnkkFEfb2JO-2G97wrBr_TTqht
|
|
86
86
|
flwr/client/grpc_client/connection.py,sha256=xAyvcTVr7bkwUfR5P3D_LKlZYiyySpt5sEwORA1h8Gc,9189
|
87
87
|
flwr/client/grpc_rere_client/__init__.py,sha256=i7iS0Lt8B7q0E2L72e4F_YrKm6ClRKnd71PNA6PW2O0,752
|
88
88
|
flwr/client/grpc_rere_client/client_interceptor.py,sha256=zFaVHw6AxeNO-7eCKKb-RxrPa7zbM5Z-2-1Efc4adQY,2451
|
89
|
-
flwr/client/grpc_rere_client/connection.py,sha256=
|
90
|
-
flwr/client/grpc_rere_client/grpc_adapter.py,sha256=
|
91
|
-
flwr/client/heartbeat.py,sha256=bhmZMbdJTtn4zL-Y9aFtOf0skaymJvdf9tH8gyAWaVk,2404
|
89
|
+
flwr/client/grpc_rere_client/connection.py,sha256=YUqq7RoRuJok2HKcwK1k28B5lFZAhLLyOZ0ByesfqfM,11847
|
90
|
+
flwr/client/grpc_rere_client/grpc_adapter.py,sha256=s8kaM8uM4hdF1DLt8vniUH1FmygE2S-rdiolZOpHKHE,5631
|
92
91
|
flwr/client/message_handler/__init__.py,sha256=0lyljDVqre3WljiZbPcwCCf8GiIaSVI_yo_ylEyPwSE,719
|
93
92
|
flwr/client/message_handler/message_handler.py,sha256=-vZKGg2gP81182LFXDmiZtajLlIfZjV6FyMS43qQVwU,6532
|
94
93
|
flwr/client/mod/__init__.py,sha256=AtV4Y5UGuYqJdTg7bJ--KtfOZUYLGDPMy616LvtP5W4,1151
|
@@ -105,7 +104,7 @@ flwr/client/nodestate/nodestate.py,sha256=-LAjZOnS7VyHC05ll3b31cYDjwAt6l4WmYt7du
|
|
105
104
|
flwr/client/nodestate/nodestate_factory.py,sha256=UYTDCcwK_baHUmkzkJDxL0UEqvtTfOMlQRrROMCd0Xo,1430
|
106
105
|
flwr/client/numpy_client.py,sha256=Qq6ghsIAop2slKqAfgiI5NiHJ4LIxGmrik3Ror4_XVc,9581
|
107
106
|
flwr/client/rest_client/__init__.py,sha256=MBiuK62hj439m9rtwSwI184Hth6Tt5GbmpNMyl3zkZY,735
|
108
|
-
flwr/client/rest_client/connection.py,sha256=
|
107
|
+
flwr/client/rest_client/connection.py,sha256=xDjs7y50adX-SL8AiAQvELQcVzmxSu5vYkLGL_s1Epo,12575
|
109
108
|
flwr/client/run_info_store.py,sha256=MaJ3UQ-07hWtK67wnWu0zR29jrk0fsfgJX506dvEOfE,4042
|
110
109
|
flwr/client/supernode/__init__.py,sha256=i3gFbV5ie_FGyRMpzOvqtZAi0Z0ChIEJ7I2Kr0ym0PM,793
|
111
110
|
flwr/client/supernode/app.py,sha256=lURLjP8jiOWhlX3-uh-7t_l1o_JEUz_FmkuNY91xmUQ,8975
|
@@ -116,7 +115,7 @@ flwr/common/args.py,sha256=-aX_jVnSaDrJR2KZ8Wq0Y3dQHII4R4MJtJOIXzVUA0c,5417
|
|
116
115
|
flwr/common/auth_plugin/__init__.py,sha256=m271m9YjK2QfKDOuIIhcTvGmv1GWh1PL97QB05NTSHs,887
|
117
116
|
flwr/common/auth_plugin/auth_plugin.py,sha256=GaXw4IiU2DkVNkp5S9ue821sbkU9zWSu6HSVZetEdjs,3938
|
118
117
|
flwr/common/config.py,sha256=glcZDjco-amw1YfQcYTFJ4S1pt9APoexT-mf1QscuHs,13960
|
119
|
-
flwr/common/constant.py,sha256=
|
118
|
+
flwr/common/constant.py,sha256=_UFQfhBpDZ72uSRQqi3UNN4GlzEbog6sszA0it3m81Q,7132
|
120
119
|
flwr/common/context.py,sha256=Be8obQR_OvEDy1OmshuUKxGRQ7Qx89mf5F4xlhkR10s,2407
|
121
120
|
flwr/common/date.py,sha256=1ZT2cRSpC2DJqprOVTLXYCR_O2_OZR0zXO_brJ3LqWc,1554
|
122
121
|
flwr/common/differential_privacy.py,sha256=FdlpdpPl_H_2HJa8CQM1iCUGBBQ5Dc8CzxmHERM-EoE,6148
|
@@ -129,6 +128,7 @@ flwr/common/exit/exit.py,sha256=mJgbqMlVlwAgYtq-Vedj53wO4VxcDcy_P-GzqGK-1GQ,3452
|
|
129
128
|
flwr/common/exit/exit_code.py,sha256=PNEnCrZfOILjfDAFu5m-2YWEJBrk97xglq4zCUlqV7E,3470
|
130
129
|
flwr/common/exit_handlers.py,sha256=MEk5_savTLphn-6lW57UQlos-XrFA39XEBn-OF1vXXg,3174
|
131
130
|
flwr/common/grpc.py,sha256=manTaHaPiyYngUq1ErZvvV2B2GxlXUUUGRy3jc3TBIQ,9798
|
131
|
+
flwr/common/heartbeat.py,sha256=yzi-gWH5wswdg0hfQwxwGkjI5twxIHBBVW45MD5QITI,3924
|
132
132
|
flwr/common/logger.py,sha256=JbRf6E2vQxXzpDBq1T8IDUJo_usu3gjWEBPQ6uKcmdg,13049
|
133
133
|
flwr/common/message.py,sha256=znr205Erq2hkxwFbvNNCsQTRS2UKv_Qsyu0sFNEhEAw,23721
|
134
134
|
flwr/common/object_ref.py,sha256=p3SfTeqo3Aj16SkB-vsnNn01zswOPdGNBitcbRnqmUk,9134
|
@@ -173,10 +173,10 @@ flwr/proto/fab_pb2.py,sha256=2Nu0WaWxDZ8TbutMtctjdcGM7OtXiyP4kmCgg5o7Jjw,1627
|
|
173
173
|
flwr/proto/fab_pb2.pyi,sha256=AMXpiDK0fo3nZWjxsC2E4otSaVjyQbU7iiWKrsSZavs,2395
|
174
174
|
flwr/proto/fab_pb2_grpc.py,sha256=1oboBPFxaTEXt9Aw7EAj8gXHDCNMhZD2VXqocC9l_gk,159
|
175
175
|
flwr/proto/fab_pb2_grpc.pyi,sha256=ff2TSiLVnG6IVQcTGzb2DIH3XRSoAvAo_RMcvbMFyc0,76
|
176
|
-
flwr/proto/fleet_pb2.py,sha256=
|
177
|
-
flwr/proto/fleet_pb2.pyi,sha256=
|
178
|
-
flwr/proto/fleet_pb2_grpc.py,sha256=
|
179
|
-
flwr/proto/fleet_pb2_grpc.pyi,sha256=
|
176
|
+
flwr/proto/fleet_pb2.py,sha256=mjTDr6tFlP8TjSIH7nODmJ3P02gUBqdIsaE7ysjFc58,4872
|
177
|
+
flwr/proto/fleet_pb2.pyi,sha256=Zy8XTn-vRAUIYMZmwfZQ9beotu01EQEUR1BROsmiExk,7980
|
178
|
+
flwr/proto/fleet_pb2_grpc.py,sha256=hyjCT3Rm44ZJoYww1XXHjsgfA7mSGEYdesAdpy--OFY,12325
|
179
|
+
flwr/proto/fleet_pb2_grpc.pyi,sha256=w-7uzmuUEoPHFX97exLi-gK9ddHIlsA3qoDPuIx4TOY,3270
|
180
180
|
flwr/proto/grpcadapter_pb2.py,sha256=PJ8DtfeV29g_y4Z3aNZlSZocLqSxeLmTsYCdOZDYCiE,1843
|
181
181
|
flwr/proto/grpcadapter_pb2.pyi,sha256=AR77gDsF6f8zqSIQp3877DUd7S8lP95lFak5Ir_WPkw,1716
|
182
182
|
flwr/proto/grpcadapter_pb2_grpc.py,sha256=rRNuNES5nBugUZWfeA8oAy8dMHgzqU_PF1srTseo3b8,2634
|
@@ -268,30 +268,30 @@ flwr/server/superlink/ffs/ffs.py,sha256=6w7wy71i7tbuJwqEgdeCa49JejXMEof3jujURN_R
|
|
268
268
|
flwr/server/superlink/ffs/ffs_factory.py,sha256=pK-g3LMelvWTV6N9Cd-j-_-FdcGbRFTKNsWaqmlBDSk,1490
|
269
269
|
flwr/server/superlink/fleet/__init__.py,sha256=Uiwr33yfW_eL-pEfj80c_JUhIKRkCPsN1JSs2v4aglU,711
|
270
270
|
flwr/server/superlink/fleet/grpc_adapter/__init__.py,sha256=fUu1V63YrzjxAOZnBJx99WjuD4Mro7dJIFH-1V4NLV8,742
|
271
|
-
flwr/server/superlink/fleet/grpc_adapter/grpc_adapter_servicer.py,sha256=
|
271
|
+
flwr/server/superlink/fleet/grpc_adapter/grpc_adapter_servicer.py,sha256=2hp617zisimdxm1KyMc5JapfMHGG_2X4lAacDFIbQcc,4180
|
272
272
|
flwr/server/superlink/fleet/grpc_bidi/__init__.py,sha256=dOM49q1b9MrtUr5jldjEnQ38NhcUyYs-zC3gsJb1TtI,735
|
273
273
|
flwr/server/superlink/fleet/grpc_bidi/flower_service_servicer.py,sha256=UKEp-3YBaTvNt7vKZW7KLgK5xsAiO7jxU-omG7CaO_s,6021
|
274
274
|
flwr/server/superlink/fleet/grpc_bidi/grpc_bridge.py,sha256=KouR9PUcrPmMtoLooF4O9SRAwIvfiroo8mPmqUc2EZc,6485
|
275
275
|
flwr/server/superlink/fleet/grpc_bidi/grpc_client_proxy.py,sha256=iSf0mbBAlig7G6subQwBSVjcUCgSihONKdZ1RmQPTOk,4887
|
276
276
|
flwr/server/superlink/fleet/grpc_bidi/grpc_server.py,sha256=OsS-6GgCIzMMZDVu5Y-OKjynHVUrpdc_5OrtuB-IbU0,5174
|
277
277
|
flwr/server/superlink/fleet/grpc_rere/__init__.py,sha256=ahDJJ1e-lDxBpeBMgPk7YZt2wB38_QltcpOC0gLbpFs,758
|
278
|
-
flwr/server/superlink/fleet/grpc_rere/fleet_servicer.py,sha256=
|
278
|
+
flwr/server/superlink/fleet/grpc_rere/fleet_servicer.py,sha256=IiFqOykZuGL4ASTJ96jHDO-WKEF5tzb5E85BCsMdg7M,5634
|
279
279
|
flwr/server/superlink/fleet/grpc_rere/server_interceptor.py,sha256=DrHubsaLgJCwCeeJPYogQTiP0xYqjxwnT9rh7OP7BoU,6984
|
280
280
|
flwr/server/superlink/fleet/message_handler/__init__.py,sha256=fHsRV0KvJ8HtgSA4_YBsEzuhJLjO8p6xx4aCY2oE1p4,731
|
281
|
-
flwr/server/superlink/fleet/message_handler/message_handler.py,sha256=
|
281
|
+
flwr/server/superlink/fleet/message_handler/message_handler.py,sha256=rxhrjhsvpc8RVi_EZUHB0O47vbI5r6iTaJZYaBxCuuo,5292
|
282
282
|
flwr/server/superlink/fleet/rest_rere/__init__.py,sha256=Lzc93nA7tDqoy-zRUaPG316oqFiZX1HUCL5ELaXY_xw,735
|
283
|
-
flwr/server/superlink/fleet/rest_rere/rest_api.py,sha256=
|
283
|
+
flwr/server/superlink/fleet/rest_rere/rest_api.py,sha256=5I6tf9UCYJ92mIckcFvmX-h8Emvo-sN6hp-R82CPz2M,6760
|
284
284
|
flwr/server/superlink/fleet/vce/__init__.py,sha256=XOKbAWOzlCqEOQ3M2cBYkH7HKA7PxlbCJMunt-ty-DY,784
|
285
285
|
flwr/server/superlink/fleet/vce/backend/__init__.py,sha256=PPH89Yqd1XKm-sRJN6R0WQlKT_b4v54Kzl2yzHAFzM8,1437
|
286
286
|
flwr/server/superlink/fleet/vce/backend/backend.py,sha256=-wDHjgAy5mrfEgXj0GxkJI7lhEbgSUyPwmNAf9ZcDzc,2193
|
287
287
|
flwr/server/superlink/fleet/vce/backend/raybackend.py,sha256=Hx9hxL7lju1_VJoAwkhBOGerZ3628u0P1zgkPhGWRPY,7154
|
288
|
-
flwr/server/superlink/fleet/vce/vce_api.py,sha256=
|
288
|
+
flwr/server/superlink/fleet/vce/vce_api.py,sha256=m7WUiHRl-jTqzjH3cqNCj3RXe3ohT6V6I0JIR6zWZj8,12780
|
289
289
|
flwr/server/superlink/linkstate/__init__.py,sha256=OtsgvDTnZLU3k0sUbkHbqoVwW6ql2FDmb6uT6DbNkZo,1064
|
290
|
-
flwr/server/superlink/linkstate/in_memory_linkstate.py,sha256=
|
291
|
-
flwr/server/superlink/linkstate/linkstate.py,sha256=
|
290
|
+
flwr/server/superlink/linkstate/in_memory_linkstate.py,sha256=wnjwVUyAZBaXY9PviGHWTa5kOvH9JiiJ3vnUEx6Z4Ss,22501
|
291
|
+
flwr/server/superlink/linkstate/linkstate.py,sha256=6ANy92MaT0eA1ocrzEbblGNRuIqw6nIB4infGHGkDD8,12121
|
292
292
|
flwr/server/superlink/linkstate/linkstate_factory.py,sha256=8RlosqSpKOoD_vhUUQPY0jtE3A84GeF96Z7sWNkRRcA,2069
|
293
|
-
flwr/server/superlink/linkstate/sqlite_linkstate.py,sha256=
|
294
|
-
flwr/server/superlink/linkstate/utils.py,sha256=
|
293
|
+
flwr/server/superlink/linkstate/sqlite_linkstate.py,sha256=B42abgovs027Z3G9iI_HPEPP4zIjMKaTjvYVhbfhupc,39828
|
294
|
+
flwr/server/superlink/linkstate/utils.py,sha256=AJs9jTAEK7JnjF2AODXnOfy0pKAKpe6oUWPCanAP57s,15382
|
295
295
|
flwr/server/superlink/serverappio/__init__.py,sha256=Fy4zJuoccZe5mZSEIpOmQvU6YeXFBa1M4eZuXXmJcn8,717
|
296
296
|
flwr/server/superlink/serverappio/serverappio_grpc.py,sha256=opJ6SYwIAbu4NWEo3K-VxFO-tMSFmE4H3i2HwHIVRzw,2173
|
297
297
|
flwr/server/superlink/serverappio/serverappio_servicer.py,sha256=olLC0PvFY09hRWSZyJvJo-ituSDX45fvEVGaTlKufu8,13122
|
@@ -327,7 +327,7 @@ flwr/superexec/exec_servicer.py,sha256=Z0YYfs6eNPhqn8rY0x_R04XgR2mKFpggt07IH0EhU
|
|
327
327
|
flwr/superexec/exec_user_auth_interceptor.py,sha256=iqygALkOMBUu_s_R9G0mFThZA7HTUzuXCLgxLCefiwI,4440
|
328
328
|
flwr/superexec/executor.py,sha256=M5ucqSE53jfRtuCNf59WFLqQvA1Mln4741TySeZE7qQ,3112
|
329
329
|
flwr/superexec/simulation.py,sha256=j6YwUvBN7EQ09ID7MYOCVZ70PGbuyBy8f9bXU0EszEM,4088
|
330
|
-
flwr_nightly-1.19.0.
|
331
|
-
flwr_nightly-1.19.0.
|
332
|
-
flwr_nightly-1.19.0.
|
333
|
-
flwr_nightly-1.19.0.
|
330
|
+
flwr_nightly-1.19.0.dev20250430.dist-info/METADATA,sha256=vEFJGaaBBFiNbovUnzUXajKU8SWtwGGOiP-lIt-Biuk,15868
|
331
|
+
flwr_nightly-1.19.0.dev20250430.dist-info/WHEEL,sha256=FMvqSimYX_P7y0a7UY-_Mc83r5zkBZsCYPm7Lr0Bsq4,88
|
332
|
+
flwr_nightly-1.19.0.dev20250430.dist-info/entry_points.txt,sha256=2-1L-GNKhwGw2_7_RoH55vHw2SIHjdAQy3HAVAWl9PY,374
|
333
|
+
flwr_nightly-1.19.0.dev20250430.dist-info/RECORD,,
|
flwr/client/heartbeat.py
DELETED
@@ -1,74 +0,0 @@
|
|
1
|
-
# Copyright 2025 Flower Labs GmbH. All Rights Reserved.
|
2
|
-
#
|
3
|
-
# Licensed under the Apache License, Version 2.0 (the "License");
|
4
|
-
# you may not use this file except in compliance with the License.
|
5
|
-
# You may obtain a copy of the License at
|
6
|
-
#
|
7
|
-
# http://www.apache.org/licenses/LICENSE-2.0
|
8
|
-
#
|
9
|
-
# Unless required by applicable law or agreed to in writing, software
|
10
|
-
# distributed under the License is distributed on an "AS IS" BASIS,
|
11
|
-
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
12
|
-
# See the License for the specific language governing permissions and
|
13
|
-
# limitations under the License.
|
14
|
-
# ==============================================================================
|
15
|
-
"""Heartbeat utility functions."""
|
16
|
-
|
17
|
-
|
18
|
-
import threading
|
19
|
-
from typing import Callable
|
20
|
-
|
21
|
-
import grpc
|
22
|
-
|
23
|
-
from flwr.common.constant import PING_CALL_TIMEOUT
|
24
|
-
from flwr.common.retry_invoker import RetryInvoker, RetryState, exponential
|
25
|
-
|
26
|
-
|
27
|
-
def _ping_loop(ping_fn: Callable[[], None], stop_event: threading.Event) -> None:
|
28
|
-
def wait_fn(wait_time: float) -> None:
|
29
|
-
if not stop_event.is_set():
|
30
|
-
stop_event.wait(wait_time)
|
31
|
-
|
32
|
-
def on_backoff(state: RetryState) -> None:
|
33
|
-
err = state.exception
|
34
|
-
if not isinstance(err, grpc.RpcError):
|
35
|
-
return
|
36
|
-
status_code = err.code()
|
37
|
-
# If ping call timeout is triggered
|
38
|
-
if status_code == grpc.StatusCode.DEADLINE_EXCEEDED:
|
39
|
-
# Avoid long wait time.
|
40
|
-
if state.actual_wait is None:
|
41
|
-
return
|
42
|
-
state.actual_wait = max(state.actual_wait - PING_CALL_TIMEOUT, 0.0)
|
43
|
-
|
44
|
-
def wrapped_ping() -> None:
|
45
|
-
if not stop_event.is_set():
|
46
|
-
ping_fn()
|
47
|
-
|
48
|
-
retrier = RetryInvoker(
|
49
|
-
exponential,
|
50
|
-
grpc.RpcError,
|
51
|
-
max_tries=None,
|
52
|
-
max_time=None,
|
53
|
-
on_backoff=on_backoff,
|
54
|
-
wait_function=wait_fn,
|
55
|
-
)
|
56
|
-
while not stop_event.is_set():
|
57
|
-
retrier.invoke(wrapped_ping)
|
58
|
-
|
59
|
-
|
60
|
-
def start_ping_loop(
|
61
|
-
ping_fn: Callable[[], None], stop_event: threading.Event
|
62
|
-
) -> threading.Thread:
|
63
|
-
"""Start a ping loop in a separate thread.
|
64
|
-
|
65
|
-
This function initializes a new thread that runs a ping loop, allowing for
|
66
|
-
asynchronous ping operations. The loop can be terminated through the provided stop
|
67
|
-
event.
|
68
|
-
"""
|
69
|
-
thread = threading.Thread(
|
70
|
-
target=_ping_loop, args=(ping_fn, stop_event), daemon=True
|
71
|
-
)
|
72
|
-
thread.start()
|
73
|
-
|
74
|
-
return thread
|
{flwr_nightly-1.19.0.dev20250429.dist-info → flwr_nightly-1.19.0.dev20250430.dist-info}/WHEEL
RENAMED
File without changes
|
File without changes
|