flwr-nightly 1.19.0.dev20250607__py3-none-any.whl → 1.19.0.dev20250610__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- flwr/client/grpc_rere_client/connection.py +3 -1
- flwr/common/inflatable.py +0 -12
- flwr/common/inflatable_grpc_utils.py +2 -265
- flwr/common/inflatable_utils.py +268 -2
- flwr/server/grid/grpc_grid.py +3 -1
- flwr/server/superlink/fleet/grpc_rere/fleet_servicer.py +25 -63
- flwr/server/superlink/fleet/message_handler/message_handler.py +57 -1
- flwr/server/superlink/serverappio/serverappio_servicer.py +7 -7
- flwr/supercore/object_store/in_memory_object_store.py +5 -1
- flwr/supernode/start_client_internal.py +101 -59
- {flwr_nightly-1.19.0.dev20250607.dist-info → flwr_nightly-1.19.0.dev20250610.dist-info}/METADATA +1 -1
- {flwr_nightly-1.19.0.dev20250607.dist-info → flwr_nightly-1.19.0.dev20250610.dist-info}/RECORD +14 -14
- {flwr_nightly-1.19.0.dev20250607.dist-info → flwr_nightly-1.19.0.dev20250610.dist-info}/WHEEL +0 -0
- {flwr_nightly-1.19.0.dev20250607.dist-info → flwr_nightly-1.19.0.dev20250610.dist-info}/entry_points.txt +0 -0
@@ -32,9 +32,11 @@ from flwr.common.grpc import create_channel, on_channel_state_change
|
|
32
32
|
from flwr.common.heartbeat import HeartbeatSender
|
33
33
|
from flwr.common.inflatable import get_all_nested_objects
|
34
34
|
from flwr.common.inflatable_grpc_utils import (
|
35
|
-
inflate_object_from_contents,
|
36
35
|
make_pull_object_fn_grpc,
|
37
36
|
make_push_object_fn_grpc,
|
37
|
+
)
|
38
|
+
from flwr.common.inflatable_utils import (
|
39
|
+
inflate_object_from_contents,
|
38
40
|
pull_objects,
|
39
41
|
push_objects,
|
40
42
|
)
|
flwr/common/inflatable.py
CHANGED
@@ -18,11 +18,9 @@
|
|
18
18
|
from __future__ import annotations
|
19
19
|
|
20
20
|
import hashlib
|
21
|
-
from logging import ERROR
|
22
21
|
from typing import TypeVar, cast
|
23
22
|
|
24
23
|
from .constant import HEAD_BODY_DIVIDER, HEAD_VALUE_DIVIDER
|
25
|
-
from .logger import log
|
26
24
|
|
27
25
|
|
28
26
|
class UnexpectedObjectContentError(Exception):
|
@@ -175,16 +173,6 @@ def get_object_body_len_from_object_content(object_content: bytes) -> int:
|
|
175
173
|
return get_object_head_values_from_object_content(object_content)[2]
|
176
174
|
|
177
175
|
|
178
|
-
def check_body_len_consistency(object_content: bytes) -> bool:
|
179
|
-
"""Check that the object body is of length as specified in the head."""
|
180
|
-
try:
|
181
|
-
body_len = get_object_body_len_from_object_content(object_content)
|
182
|
-
return body_len == len(_get_object_body(object_content))
|
183
|
-
except ValueError:
|
184
|
-
log(ERROR, "Object content does match the expected format.")
|
185
|
-
return False
|
186
|
-
|
187
|
-
|
188
176
|
def get_object_head_values_from_object_content(
|
189
177
|
object_content: bytes,
|
190
178
|
) -> tuple[str, list[str], int]:
|
@@ -15,11 +15,7 @@
|
|
15
15
|
"""InflatableObject utils."""
|
16
16
|
|
17
17
|
|
18
|
-
import
|
19
|
-
import random
|
20
|
-
import threading
|
21
|
-
import time
|
22
|
-
from typing import Callable, Optional
|
18
|
+
from typing import Callable
|
23
19
|
|
24
20
|
from flwr.proto.message_pb2 import ( # pylint: disable=E0611
|
25
21
|
PullObjectRequest,
|
@@ -29,42 +25,7 @@ from flwr.proto.message_pb2 import ( # pylint: disable=E0611
|
|
29
25
|
)
|
30
26
|
from flwr.proto.node_pb2 import Node # pylint: disable=E0611
|
31
27
|
|
32
|
-
from .
|
33
|
-
MAX_CONCURRENT_PULLS,
|
34
|
-
MAX_CONCURRENT_PUSHES,
|
35
|
-
PULL_BACKOFF_CAP,
|
36
|
-
PULL_INITIAL_BACKOFF,
|
37
|
-
PULL_MAX_TIME,
|
38
|
-
PULL_MAX_TRIES_PER_OBJECT,
|
39
|
-
)
|
40
|
-
from .inflatable import InflatableObject, get_object_head_values_from_object_content
|
41
|
-
from .message import Message
|
42
|
-
from .record import Array, ArrayRecord, ConfigRecord, MetricRecord, RecordDict
|
43
|
-
|
44
|
-
# Helper registry that maps names of classes to their type
|
45
|
-
inflatable_class_registry: dict[str, type[InflatableObject]] = {
|
46
|
-
Array.__qualname__: Array,
|
47
|
-
ArrayRecord.__qualname__: ArrayRecord,
|
48
|
-
ConfigRecord.__qualname__: ConfigRecord,
|
49
|
-
Message.__qualname__: Message,
|
50
|
-
MetricRecord.__qualname__: MetricRecord,
|
51
|
-
RecordDict.__qualname__: RecordDict,
|
52
|
-
}
|
53
|
-
|
54
|
-
|
55
|
-
class ObjectUnavailableError(Exception):
|
56
|
-
"""Exception raised when an object has been pre-registered but is not yet
|
57
|
-
available."""
|
58
|
-
|
59
|
-
def __init__(self, object_id: str):
|
60
|
-
super().__init__(f"Object with ID '{object_id}' is not yet available.")
|
61
|
-
|
62
|
-
|
63
|
-
class ObjectIdNotPreregisteredError(Exception):
|
64
|
-
"""Exception raised when an object ID is not pre-registered."""
|
65
|
-
|
66
|
-
def __init__(self, object_id: str):
|
67
|
-
super().__init__(f"Object with ID '{object_id}' could not be found.")
|
28
|
+
from .inflatable_utils import ObjectIdNotPreregisteredError, ObjectUnavailableError
|
68
29
|
|
69
30
|
|
70
31
|
def make_pull_object_fn_grpc(
|
@@ -136,227 +97,3 @@ def make_push_object_fn_grpc(
|
|
136
97
|
raise ObjectIdNotPreregisteredError(object_id)
|
137
98
|
|
138
99
|
return push_object_fn
|
139
|
-
|
140
|
-
|
141
|
-
def push_objects(
|
142
|
-
objects: dict[str, InflatableObject],
|
143
|
-
push_object_fn: Callable[[str, bytes], None],
|
144
|
-
*,
|
145
|
-
object_ids_to_push: Optional[set[str]] = None,
|
146
|
-
keep_objects: bool = False,
|
147
|
-
max_concurrent_pushes: int = MAX_CONCURRENT_PUSHES,
|
148
|
-
) -> None:
|
149
|
-
"""Push multiple objects to the servicer.
|
150
|
-
|
151
|
-
Parameters
|
152
|
-
----------
|
153
|
-
objects : dict[str, InflatableObject]
|
154
|
-
A dictionary of objects to push, where keys are object IDs and values are
|
155
|
-
`InflatableObject` instances.
|
156
|
-
push_object_fn : Callable[[str, bytes], None]
|
157
|
-
A function that takes an object ID and its content as bytes, and pushes
|
158
|
-
it to the servicer. This function should raise `ObjectIdNotPreregisteredError`
|
159
|
-
if the object ID is not pre-registered.
|
160
|
-
object_ids_to_push : Optional[set[str]] (default: None)
|
161
|
-
A set of object IDs to push. If not provided, all objects will be pushed.
|
162
|
-
keep_objects : bool (default: False)
|
163
|
-
If `True`, the original objects will be kept in the `objects` dictionary
|
164
|
-
after pushing. If `False`, they will be removed from the dictionary to avoid
|
165
|
-
high memory usage.
|
166
|
-
max_concurrent_pushes : int (default: MAX_CONCURRENT_PUSHES)
|
167
|
-
The maximum number of concurrent pushes to perform.
|
168
|
-
"""
|
169
|
-
if object_ids_to_push is not None:
|
170
|
-
# Filter objects to push only those with IDs in the set
|
171
|
-
objects = {k: v for k, v in objects.items() if k in object_ids_to_push}
|
172
|
-
|
173
|
-
lock = threading.Lock()
|
174
|
-
|
175
|
-
def push(obj_id: str) -> None:
|
176
|
-
"""Push a single object."""
|
177
|
-
object_content = objects[obj_id].deflate()
|
178
|
-
if not keep_objects:
|
179
|
-
with lock:
|
180
|
-
del objects[obj_id]
|
181
|
-
push_object_fn(obj_id, object_content)
|
182
|
-
|
183
|
-
with concurrent.futures.ThreadPoolExecutor(
|
184
|
-
max_workers=max_concurrent_pushes
|
185
|
-
) as executor:
|
186
|
-
list(executor.map(push, list(objects.keys())))
|
187
|
-
|
188
|
-
|
189
|
-
def pull_objects( # pylint: disable=too-many-arguments
|
190
|
-
object_ids: list[str],
|
191
|
-
pull_object_fn: Callable[[str], bytes],
|
192
|
-
*,
|
193
|
-
max_concurrent_pulls: int = MAX_CONCURRENT_PULLS,
|
194
|
-
max_time: Optional[float] = PULL_MAX_TIME,
|
195
|
-
max_tries_per_object: Optional[int] = PULL_MAX_TRIES_PER_OBJECT,
|
196
|
-
initial_backoff: float = PULL_INITIAL_BACKOFF,
|
197
|
-
backoff_cap: float = PULL_BACKOFF_CAP,
|
198
|
-
) -> dict[str, bytes]:
|
199
|
-
"""Pull multiple objects from the servicer.
|
200
|
-
|
201
|
-
Parameters
|
202
|
-
----------
|
203
|
-
object_ids : list[str]
|
204
|
-
A list of object IDs to pull.
|
205
|
-
pull_object_fn : Callable[[str], bytes]
|
206
|
-
A function that takes an object ID and returns the object content as bytes.
|
207
|
-
The function should raise `ObjectUnavailableError` if the object is not yet
|
208
|
-
available, or `ObjectIdNotPreregisteredError` if the object ID is not
|
209
|
-
pre-registered.
|
210
|
-
max_concurrent_pulls : int (default: MAX_CONCURRENT_PULLS)
|
211
|
-
The maximum number of concurrent pulls to perform.
|
212
|
-
max_time : Optional[float] (default: PULL_MAX_TIME)
|
213
|
-
The maximum time to wait for all pulls to complete. If `None`, waits
|
214
|
-
indefinitely.
|
215
|
-
max_tries_per_object : Optional[int] (default: PULL_MAX_TRIES_PER_OBJECT)
|
216
|
-
The maximum number of attempts to pull each object. If `None`, pulls
|
217
|
-
indefinitely until the object is available.
|
218
|
-
initial_backoff : float (default: PULL_INITIAL_BACKOFF)
|
219
|
-
The initial backoff time in seconds for retrying pulls after an
|
220
|
-
`ObjectUnavailableError`.
|
221
|
-
backoff_cap : float (default: PULL_BACKOFF_CAP)
|
222
|
-
The maximum backoff time in seconds. Backoff times will not exceed this value.
|
223
|
-
|
224
|
-
Returns
|
225
|
-
-------
|
226
|
-
dict[str, bytes]
|
227
|
-
A dictionary where keys are object IDs and values are the pulled
|
228
|
-
object contents.
|
229
|
-
"""
|
230
|
-
if max_tries_per_object is None:
|
231
|
-
max_tries_per_object = int(1e9)
|
232
|
-
if max_time is None:
|
233
|
-
max_time = float("inf")
|
234
|
-
|
235
|
-
results: dict[str, bytes] = {}
|
236
|
-
results_lock = threading.Lock()
|
237
|
-
err_to_raise: Optional[Exception] = None
|
238
|
-
early_stop = threading.Event()
|
239
|
-
start = time.monotonic()
|
240
|
-
|
241
|
-
def pull_with_retries(object_id: str) -> None:
|
242
|
-
"""Attempt to pull a single object with retry and backoff."""
|
243
|
-
nonlocal err_to_raise
|
244
|
-
tries = 0
|
245
|
-
delay = initial_backoff
|
246
|
-
|
247
|
-
while not early_stop.is_set():
|
248
|
-
try:
|
249
|
-
object_content = pull_object_fn(object_id)
|
250
|
-
with results_lock:
|
251
|
-
results[object_id] = object_content
|
252
|
-
return
|
253
|
-
|
254
|
-
except ObjectUnavailableError as err:
|
255
|
-
tries += 1
|
256
|
-
if (
|
257
|
-
tries >= max_tries_per_object
|
258
|
-
or time.monotonic() - start >= max_time
|
259
|
-
):
|
260
|
-
# Stop all work if one object exhausts retries
|
261
|
-
early_stop.set()
|
262
|
-
with results_lock:
|
263
|
-
if err_to_raise is None:
|
264
|
-
err_to_raise = err
|
265
|
-
return
|
266
|
-
|
267
|
-
# Apply exponential backoff with ±20% jitter
|
268
|
-
sleep_time = delay * (1 + random.uniform(-0.2, 0.2))
|
269
|
-
early_stop.wait(sleep_time)
|
270
|
-
delay = min(delay * 2, backoff_cap)
|
271
|
-
|
272
|
-
except ObjectIdNotPreregisteredError as err:
|
273
|
-
# Permanent failure: object ID is invalid
|
274
|
-
early_stop.set()
|
275
|
-
with results_lock:
|
276
|
-
if err_to_raise is None:
|
277
|
-
err_to_raise = err
|
278
|
-
return
|
279
|
-
|
280
|
-
# Submit all pull tasks concurrently
|
281
|
-
with concurrent.futures.ThreadPoolExecutor(
|
282
|
-
max_workers=max_concurrent_pulls
|
283
|
-
) as executor:
|
284
|
-
futures = {
|
285
|
-
executor.submit(pull_with_retries, obj_id): obj_id for obj_id in object_ids
|
286
|
-
}
|
287
|
-
|
288
|
-
# Wait for completion
|
289
|
-
concurrent.futures.wait(futures)
|
290
|
-
|
291
|
-
if err_to_raise is not None:
|
292
|
-
raise err_to_raise
|
293
|
-
|
294
|
-
return results
|
295
|
-
|
296
|
-
|
297
|
-
def inflate_object_from_contents(
|
298
|
-
object_id: str,
|
299
|
-
object_contents: dict[str, bytes],
|
300
|
-
*,
|
301
|
-
keep_object_contents: bool = False,
|
302
|
-
objects: Optional[dict[str, InflatableObject]] = None,
|
303
|
-
) -> InflatableObject:
|
304
|
-
"""Inflate an object from object contents.
|
305
|
-
|
306
|
-
Parameters
|
307
|
-
----------
|
308
|
-
object_id : str
|
309
|
-
The ID of the object to inflate.
|
310
|
-
object_contents : dict[str, bytes]
|
311
|
-
A dictionary mapping object IDs to their contents as bytes.
|
312
|
-
All descendant objects must be present in this dictionary.
|
313
|
-
keep_object_contents : bool (default: False)
|
314
|
-
If `True`, the object content will be kept in the `object_contents`
|
315
|
-
dictionary after inflation. If `False`, the object content will be
|
316
|
-
removed from the dictionary to save memory.
|
317
|
-
objects : Optional[dict[str, InflatableObject]] (default: None)
|
318
|
-
No need to provide this parameter. A dictionary to store already
|
319
|
-
inflated objects, mapping object IDs to their corresponding
|
320
|
-
`InflatableObject` instances.
|
321
|
-
|
322
|
-
Returns
|
323
|
-
-------
|
324
|
-
InflatableObject
|
325
|
-
The inflated object.
|
326
|
-
"""
|
327
|
-
if objects is None:
|
328
|
-
# Initialize objects dictionary
|
329
|
-
objects = {}
|
330
|
-
|
331
|
-
if object_id in objects:
|
332
|
-
# If the object is already in the objects dictionary, return it
|
333
|
-
return objects[object_id]
|
334
|
-
|
335
|
-
# Extract object class and object_ids of children
|
336
|
-
object_content = object_contents[object_id]
|
337
|
-
obj_type, children_obj_ids, _ = get_object_head_values_from_object_content(
|
338
|
-
object_content=object_contents[object_id]
|
339
|
-
)
|
340
|
-
|
341
|
-
# Remove the object content from the dictionary to save memory
|
342
|
-
if not keep_object_contents:
|
343
|
-
del object_contents[object_id]
|
344
|
-
|
345
|
-
# Resolve object class
|
346
|
-
cls_type = inflatable_class_registry[obj_type]
|
347
|
-
|
348
|
-
# Inflate all children objects
|
349
|
-
children: dict[str, InflatableObject] = {}
|
350
|
-
for child_obj_id in children_obj_ids:
|
351
|
-
children[child_obj_id] = inflate_object_from_contents(
|
352
|
-
child_obj_id,
|
353
|
-
object_contents,
|
354
|
-
keep_object_contents=keep_object_contents,
|
355
|
-
objects=objects,
|
356
|
-
)
|
357
|
-
|
358
|
-
# Inflate object passing its children
|
359
|
-
obj = cls_type.inflate(object_content, children=children)
|
360
|
-
del object_content # Free memory after inflation
|
361
|
-
objects[object_id] = obj
|
362
|
-
return obj
|
flwr/common/inflatable_utils.py
CHANGED
@@ -14,15 +14,281 @@
|
|
14
14
|
# ==============================================================================
|
15
15
|
"""InflatableObject utilities."""
|
16
16
|
|
17
|
+
import concurrent.futures
|
18
|
+
import random
|
19
|
+
import threading
|
20
|
+
import time
|
21
|
+
from typing import Callable, Optional
|
17
22
|
|
18
|
-
from .constant import
|
23
|
+
from .constant import (
|
24
|
+
HEAD_BODY_DIVIDER,
|
25
|
+
HEAD_VALUE_DIVIDER,
|
26
|
+
MAX_CONCURRENT_PULLS,
|
27
|
+
MAX_CONCURRENT_PUSHES,
|
28
|
+
PULL_BACKOFF_CAP,
|
29
|
+
PULL_INITIAL_BACKOFF,
|
30
|
+
PULL_MAX_TIME,
|
31
|
+
PULL_MAX_TRIES_PER_OBJECT,
|
32
|
+
)
|
19
33
|
from .inflatable import (
|
34
|
+
InflatableObject,
|
20
35
|
UnexpectedObjectContentError,
|
21
36
|
_get_object_head,
|
37
|
+
get_object_head_values_from_object_content,
|
22
38
|
get_object_id,
|
23
39
|
is_valid_sha256_hash,
|
24
40
|
)
|
25
|
-
from .
|
41
|
+
from .message import Message
|
42
|
+
from .record import Array, ArrayRecord, ConfigRecord, MetricRecord, RecordDict
|
43
|
+
|
44
|
+
# Helper registry that maps names of classes to their type
|
45
|
+
inflatable_class_registry: dict[str, type[InflatableObject]] = {
|
46
|
+
Array.__qualname__: Array,
|
47
|
+
ArrayRecord.__qualname__: ArrayRecord,
|
48
|
+
ConfigRecord.__qualname__: ConfigRecord,
|
49
|
+
Message.__qualname__: Message,
|
50
|
+
MetricRecord.__qualname__: MetricRecord,
|
51
|
+
RecordDict.__qualname__: RecordDict,
|
52
|
+
}
|
53
|
+
|
54
|
+
|
55
|
+
class ObjectUnavailableError(Exception):
|
56
|
+
"""Exception raised when an object has been pre-registered but is not yet
|
57
|
+
available."""
|
58
|
+
|
59
|
+
def __init__(self, object_id: str):
|
60
|
+
super().__init__(f"Object with ID '{object_id}' is not yet available.")
|
61
|
+
|
62
|
+
|
63
|
+
class ObjectIdNotPreregisteredError(Exception):
|
64
|
+
"""Exception raised when an object ID is not pre-registered."""
|
65
|
+
|
66
|
+
def __init__(self, object_id: str):
|
67
|
+
super().__init__(f"Object with ID '{object_id}' could not be found.")
|
68
|
+
|
69
|
+
|
70
|
+
def push_objects(
|
71
|
+
objects: dict[str, InflatableObject],
|
72
|
+
push_object_fn: Callable[[str, bytes], None],
|
73
|
+
*,
|
74
|
+
object_ids_to_push: Optional[set[str]] = None,
|
75
|
+
keep_objects: bool = False,
|
76
|
+
max_concurrent_pushes: int = MAX_CONCURRENT_PUSHES,
|
77
|
+
) -> None:
|
78
|
+
"""Push multiple objects to the servicer.
|
79
|
+
|
80
|
+
Parameters
|
81
|
+
----------
|
82
|
+
objects : dict[str, InflatableObject]
|
83
|
+
A dictionary of objects to push, where keys are object IDs and values are
|
84
|
+
`InflatableObject` instances.
|
85
|
+
push_object_fn : Callable[[str, bytes], None]
|
86
|
+
A function that takes an object ID and its content as bytes, and pushes
|
87
|
+
it to the servicer. This function should raise `ObjectIdNotPreregisteredError`
|
88
|
+
if the object ID is not pre-registered.
|
89
|
+
object_ids_to_push : Optional[set[str]] (default: None)
|
90
|
+
A set of object IDs to push. If not provided, all objects will be pushed.
|
91
|
+
keep_objects : bool (default: False)
|
92
|
+
If `True`, the original objects will be kept in the `objects` dictionary
|
93
|
+
after pushing. If `False`, they will be removed from the dictionary to avoid
|
94
|
+
high memory usage.
|
95
|
+
max_concurrent_pushes : int (default: MAX_CONCURRENT_PUSHES)
|
96
|
+
The maximum number of concurrent pushes to perform.
|
97
|
+
"""
|
98
|
+
if object_ids_to_push is not None:
|
99
|
+
# Filter objects to push only those with IDs in the set
|
100
|
+
objects = {k: v for k, v in objects.items() if k in object_ids_to_push}
|
101
|
+
|
102
|
+
lock = threading.Lock()
|
103
|
+
|
104
|
+
def push(obj_id: str) -> None:
|
105
|
+
"""Push a single object."""
|
106
|
+
object_content = objects[obj_id].deflate()
|
107
|
+
if not keep_objects:
|
108
|
+
with lock:
|
109
|
+
del objects[obj_id]
|
110
|
+
push_object_fn(obj_id, object_content)
|
111
|
+
|
112
|
+
with concurrent.futures.ThreadPoolExecutor(
|
113
|
+
max_workers=max_concurrent_pushes
|
114
|
+
) as executor:
|
115
|
+
list(executor.map(push, list(objects.keys())))
|
116
|
+
|
117
|
+
|
118
|
+
def pull_objects( # pylint: disable=too-many-arguments
|
119
|
+
object_ids: list[str],
|
120
|
+
pull_object_fn: Callable[[str], bytes],
|
121
|
+
*,
|
122
|
+
max_concurrent_pulls: int = MAX_CONCURRENT_PULLS,
|
123
|
+
max_time: Optional[float] = PULL_MAX_TIME,
|
124
|
+
max_tries_per_object: Optional[int] = PULL_MAX_TRIES_PER_OBJECT,
|
125
|
+
initial_backoff: float = PULL_INITIAL_BACKOFF,
|
126
|
+
backoff_cap: float = PULL_BACKOFF_CAP,
|
127
|
+
) -> dict[str, bytes]:
|
128
|
+
"""Pull multiple objects from the servicer.
|
129
|
+
|
130
|
+
Parameters
|
131
|
+
----------
|
132
|
+
object_ids : list[str]
|
133
|
+
A list of object IDs to pull.
|
134
|
+
pull_object_fn : Callable[[str], bytes]
|
135
|
+
A function that takes an object ID and returns the object content as bytes.
|
136
|
+
The function should raise `ObjectUnavailableError` if the object is not yet
|
137
|
+
available, or `ObjectIdNotPreregisteredError` if the object ID is not
|
138
|
+
pre-registered.
|
139
|
+
max_concurrent_pulls : int (default: MAX_CONCURRENT_PULLS)
|
140
|
+
The maximum number of concurrent pulls to perform.
|
141
|
+
max_time : Optional[float] (default: PULL_MAX_TIME)
|
142
|
+
The maximum time to wait for all pulls to complete. If `None`, waits
|
143
|
+
indefinitely.
|
144
|
+
max_tries_per_object : Optional[int] (default: PULL_MAX_TRIES_PER_OBJECT)
|
145
|
+
The maximum number of attempts to pull each object. If `None`, pulls
|
146
|
+
indefinitely until the object is available.
|
147
|
+
initial_backoff : float (default: PULL_INITIAL_BACKOFF)
|
148
|
+
The initial backoff time in seconds for retrying pulls after an
|
149
|
+
`ObjectUnavailableError`.
|
150
|
+
backoff_cap : float (default: PULL_BACKOFF_CAP)
|
151
|
+
The maximum backoff time in seconds. Backoff times will not exceed this value.
|
152
|
+
|
153
|
+
Returns
|
154
|
+
-------
|
155
|
+
dict[str, bytes]
|
156
|
+
A dictionary where keys are object IDs and values are the pulled
|
157
|
+
object contents.
|
158
|
+
"""
|
159
|
+
if max_tries_per_object is None:
|
160
|
+
max_tries_per_object = int(1e9)
|
161
|
+
if max_time is None:
|
162
|
+
max_time = float("inf")
|
163
|
+
|
164
|
+
results: dict[str, bytes] = {}
|
165
|
+
results_lock = threading.Lock()
|
166
|
+
err_to_raise: Optional[Exception] = None
|
167
|
+
early_stop = threading.Event()
|
168
|
+
start = time.monotonic()
|
169
|
+
|
170
|
+
def pull_with_retries(object_id: str) -> None:
|
171
|
+
"""Attempt to pull a single object with retry and backoff."""
|
172
|
+
nonlocal err_to_raise
|
173
|
+
tries = 0
|
174
|
+
delay = initial_backoff
|
175
|
+
|
176
|
+
while not early_stop.is_set():
|
177
|
+
try:
|
178
|
+
object_content = pull_object_fn(object_id)
|
179
|
+
with results_lock:
|
180
|
+
results[object_id] = object_content
|
181
|
+
return
|
182
|
+
|
183
|
+
except ObjectUnavailableError as err:
|
184
|
+
tries += 1
|
185
|
+
if (
|
186
|
+
tries >= max_tries_per_object
|
187
|
+
or time.monotonic() - start >= max_time
|
188
|
+
):
|
189
|
+
# Stop all work if one object exhausts retries
|
190
|
+
early_stop.set()
|
191
|
+
with results_lock:
|
192
|
+
if err_to_raise is None:
|
193
|
+
err_to_raise = err
|
194
|
+
return
|
195
|
+
|
196
|
+
# Apply exponential backoff with ±20% jitter
|
197
|
+
sleep_time = delay * (1 + random.uniform(-0.2, 0.2))
|
198
|
+
early_stop.wait(sleep_time)
|
199
|
+
delay = min(delay * 2, backoff_cap)
|
200
|
+
|
201
|
+
except ObjectIdNotPreregisteredError as err:
|
202
|
+
# Permanent failure: object ID is invalid
|
203
|
+
early_stop.set()
|
204
|
+
with results_lock:
|
205
|
+
if err_to_raise is None:
|
206
|
+
err_to_raise = err
|
207
|
+
return
|
208
|
+
|
209
|
+
# Submit all pull tasks concurrently
|
210
|
+
with concurrent.futures.ThreadPoolExecutor(
|
211
|
+
max_workers=max_concurrent_pulls
|
212
|
+
) as executor:
|
213
|
+
futures = {
|
214
|
+
executor.submit(pull_with_retries, obj_id): obj_id for obj_id in object_ids
|
215
|
+
}
|
216
|
+
|
217
|
+
# Wait for completion
|
218
|
+
concurrent.futures.wait(futures)
|
219
|
+
|
220
|
+
if err_to_raise is not None:
|
221
|
+
raise err_to_raise
|
222
|
+
|
223
|
+
return results
|
224
|
+
|
225
|
+
|
226
|
+
def inflate_object_from_contents(
|
227
|
+
object_id: str,
|
228
|
+
object_contents: dict[str, bytes],
|
229
|
+
*,
|
230
|
+
keep_object_contents: bool = False,
|
231
|
+
objects: Optional[dict[str, InflatableObject]] = None,
|
232
|
+
) -> InflatableObject:
|
233
|
+
"""Inflate an object from object contents.
|
234
|
+
|
235
|
+
Parameters
|
236
|
+
----------
|
237
|
+
object_id : str
|
238
|
+
The ID of the object to inflate.
|
239
|
+
object_contents : dict[str, bytes]
|
240
|
+
A dictionary mapping object IDs to their contents as bytes.
|
241
|
+
All descendant objects must be present in this dictionary.
|
242
|
+
keep_object_contents : bool (default: False)
|
243
|
+
If `True`, the object content will be kept in the `object_contents`
|
244
|
+
dictionary after inflation. If `False`, the object content will be
|
245
|
+
removed from the dictionary to save memory.
|
246
|
+
objects : Optional[dict[str, InflatableObject]] (default: None)
|
247
|
+
No need to provide this parameter. A dictionary to store already
|
248
|
+
inflated objects, mapping object IDs to their corresponding
|
249
|
+
`InflatableObject` instances.
|
250
|
+
|
251
|
+
Returns
|
252
|
+
-------
|
253
|
+
InflatableObject
|
254
|
+
The inflated object.
|
255
|
+
"""
|
256
|
+
if objects is None:
|
257
|
+
# Initialize objects dictionary
|
258
|
+
objects = {}
|
259
|
+
|
260
|
+
if object_id in objects:
|
261
|
+
# If the object is already in the objects dictionary, return it
|
262
|
+
return objects[object_id]
|
263
|
+
|
264
|
+
# Extract object class and object_ids of children
|
265
|
+
object_content = object_contents[object_id]
|
266
|
+
obj_type, children_obj_ids, _ = get_object_head_values_from_object_content(
|
267
|
+
object_content=object_contents[object_id]
|
268
|
+
)
|
269
|
+
|
270
|
+
# Remove the object content from the dictionary to save memory
|
271
|
+
if not keep_object_contents:
|
272
|
+
del object_contents[object_id]
|
273
|
+
|
274
|
+
# Resolve object class
|
275
|
+
cls_type = inflatable_class_registry[obj_type]
|
276
|
+
|
277
|
+
# Inflate all children objects
|
278
|
+
children: dict[str, InflatableObject] = {}
|
279
|
+
for child_obj_id in children_obj_ids:
|
280
|
+
children[child_obj_id] = inflate_object_from_contents(
|
281
|
+
child_obj_id,
|
282
|
+
object_contents,
|
283
|
+
keep_object_contents=keep_object_contents,
|
284
|
+
objects=objects,
|
285
|
+
)
|
286
|
+
|
287
|
+
# Inflate object passing its children
|
288
|
+
obj = cls_type.inflate(object_content, children=children)
|
289
|
+
del object_content # Free memory after inflation
|
290
|
+
objects[object_id] = obj
|
291
|
+
return obj
|
26
292
|
|
27
293
|
|
28
294
|
def validate_object_content(content: bytes) -> None:
|
flwr/server/grid/grpc_grid.py
CHANGED
@@ -30,9 +30,11 @@ from flwr.common.constant import (
|
|
30
30
|
from flwr.common.grpc import create_channel, on_channel_state_change
|
31
31
|
from flwr.common.inflatable import get_all_nested_objects
|
32
32
|
from flwr.common.inflatable_grpc_utils import (
|
33
|
-
inflate_object_from_contents,
|
34
33
|
make_pull_object_fn_grpc,
|
35
34
|
make_push_object_fn_grpc,
|
35
|
+
)
|
36
|
+
from flwr.common.inflatable_utils import (
|
37
|
+
inflate_object_from_contents,
|
36
38
|
pull_objects,
|
37
39
|
push_objects,
|
38
40
|
)
|
@@ -15,13 +15,12 @@
|
|
15
15
|
"""Fleet API gRPC request-response servicer."""
|
16
16
|
|
17
17
|
|
18
|
-
from logging import DEBUG,
|
18
|
+
from logging import DEBUG, INFO
|
19
19
|
|
20
20
|
import grpc
|
21
21
|
from google.protobuf.json_format import MessageToDict
|
22
22
|
|
23
|
-
from flwr.common.
|
24
|
-
from flwr.common.inflatable import check_body_len_consistency
|
23
|
+
from flwr.common.inflatable import UnexpectedObjectContentError
|
25
24
|
from flwr.common.logger import log
|
26
25
|
from flwr.common.typing import InvalidRunStatusException
|
27
26
|
from flwr.proto import fleet_pb2_grpc # pylint: disable=E0611
|
@@ -50,9 +49,8 @@ from flwr.proto.run_pb2 import GetRunRequest, GetRunResponse # pylint: disable=
|
|
50
49
|
from flwr.server.superlink.ffs.ffs_factory import FfsFactory
|
51
50
|
from flwr.server.superlink.fleet.message_handler import message_handler
|
52
51
|
from flwr.server.superlink.linkstate import LinkStateFactory
|
53
|
-
from flwr.server.superlink.utils import abort_grpc_context
|
52
|
+
from flwr.server.superlink.utils import abort_grpc_context
|
54
53
|
from flwr.supercore.object_store import ObjectStoreFactory
|
55
|
-
from flwr.supercore.object_store.object_store import NoObjectInStoreError
|
56
54
|
|
57
55
|
|
58
56
|
class FleetServicer(fleet_pb2_grpc.FleetServicer):
|
@@ -185,39 +183,20 @@ class FleetServicer(fleet_pb2_grpc.FleetServicer):
|
|
185
183
|
request.object_id,
|
186
184
|
)
|
187
185
|
|
188
|
-
state = self.state_factory.state()
|
189
|
-
|
190
|
-
# Abort if the run is not running
|
191
|
-
abort_msg = check_abort(
|
192
|
-
request.run_id,
|
193
|
-
[Status.PENDING, Status.STARTING, Status.FINISHED],
|
194
|
-
state,
|
195
|
-
)
|
196
|
-
if abort_msg:
|
197
|
-
abort_grpc_context(abort_msg, context)
|
198
|
-
|
199
|
-
if request.node.node_id not in state.get_nodes(run_id=request.run_id):
|
200
|
-
# Cancel insertion in ObjectStore
|
201
|
-
context.abort(grpc.StatusCode.FAILED_PRECONDITION, "Unexpected node ID.")
|
202
|
-
|
203
|
-
if not check_body_len_consistency(request.object_content):
|
204
|
-
# Cancel insertion in ObjectStore
|
205
|
-
context.abort(
|
206
|
-
grpc.StatusCode.FAILED_PRECONDITION, "Unexpected object length"
|
207
|
-
)
|
208
|
-
|
209
|
-
# Init store
|
210
|
-
store = self.objectstore_factory.store()
|
211
|
-
|
212
|
-
# Insert in store
|
213
|
-
stored = False
|
214
186
|
try:
|
215
|
-
|
216
|
-
|
217
|
-
|
218
|
-
|
187
|
+
# Insert in Store
|
188
|
+
res = message_handler.push_object(
|
189
|
+
request=request,
|
190
|
+
state=self.state_factory.state(),
|
191
|
+
store=self.objectstore_factory.store(),
|
192
|
+
)
|
193
|
+
except InvalidRunStatusException as e:
|
194
|
+
abort_grpc_context(e.message, context)
|
195
|
+
except UnexpectedObjectContentError as e:
|
196
|
+
# Object content is not valid
|
197
|
+
context.abort(grpc.StatusCode.FAILED_PRECONDITION, str(e))
|
219
198
|
|
220
|
-
return
|
199
|
+
return res
|
221
200
|
|
222
201
|
def PullObject(
|
223
202
|
self, request: PullObjectRequest, context: grpc.ServicerContext
|
@@ -229,31 +208,14 @@ class FleetServicer(fleet_pb2_grpc.FleetServicer):
|
|
229
208
|
request.object_id,
|
230
209
|
)
|
231
210
|
|
232
|
-
|
233
|
-
|
234
|
-
|
235
|
-
|
236
|
-
|
237
|
-
|
238
|
-
state,
|
239
|
-
)
|
240
|
-
if abort_msg:
|
241
|
-
abort_grpc_context(abort_msg, context)
|
242
|
-
|
243
|
-
if request.node.node_id not in state.get_nodes(run_id=request.run_id):
|
244
|
-
# Cancel insertion in ObjectStore
|
245
|
-
context.abort(grpc.StatusCode.FAILED_PRECONDITION, "Unexpected node ID.")
|
246
|
-
|
247
|
-
# Init store
|
248
|
-
store = self.objectstore_factory.store()
|
249
|
-
|
250
|
-
# Fetch from store
|
251
|
-
content = store.get(request.object_id)
|
252
|
-
if content is not None:
|
253
|
-
object_available = content != b""
|
254
|
-
return PullObjectResponse(
|
255
|
-
object_found=True,
|
256
|
-
object_available=object_available,
|
257
|
-
object_content=content,
|
211
|
+
try:
|
212
|
+
# Fetch from store
|
213
|
+
res = message_handler.pull_object(
|
214
|
+
request=request,
|
215
|
+
state=self.state_factory.state(),
|
216
|
+
store=self.objectstore_factory.store(),
|
258
217
|
)
|
259
|
-
|
218
|
+
except InvalidRunStatusException as e:
|
219
|
+
abort_grpc_context(e.message, context)
|
220
|
+
|
221
|
+
return res
|
@@ -19,6 +19,7 @@ from typing import Optional
|
|
19
19
|
|
20
20
|
from flwr.common import Message, log
|
21
21
|
from flwr.common.constant import Status
|
22
|
+
from flwr.common.inflatable import UnexpectedObjectContentError
|
22
23
|
from flwr.common.serde import (
|
23
24
|
fab_to_proto,
|
24
25
|
message_from_proto,
|
@@ -42,7 +43,13 @@ from flwr.proto.heartbeat_pb2 import ( # pylint: disable=E0611
|
|
42
43
|
SendNodeHeartbeatRequest,
|
43
44
|
SendNodeHeartbeatResponse,
|
44
45
|
)
|
45
|
-
from flwr.proto.message_pb2 import
|
46
|
+
from flwr.proto.message_pb2 import ( # pylint: disable=E0611
|
47
|
+
ObjectIDs,
|
48
|
+
PullObjectRequest,
|
49
|
+
PullObjectResponse,
|
50
|
+
PushObjectRequest,
|
51
|
+
PushObjectResponse,
|
52
|
+
)
|
46
53
|
from flwr.proto.node_pb2 import Node # pylint: disable=E0611
|
47
54
|
from flwr.proto.run_pb2 import ( # pylint: disable=E0611
|
48
55
|
GetRunRequest,
|
@@ -203,3 +210,52 @@ def get_fab(
|
|
203
210
|
return GetFabResponse(fab=fab_to_proto(fab))
|
204
211
|
|
205
212
|
raise ValueError(f"Found no FAB with hash: {request.hash_str}")
|
213
|
+
|
214
|
+
|
215
|
+
def push_object(
|
216
|
+
request: PushObjectRequest, state: LinkState, store: ObjectStore
|
217
|
+
) -> PushObjectResponse:
|
218
|
+
"""Push Object."""
|
219
|
+
abort_msg = check_abort(
|
220
|
+
request.run_id,
|
221
|
+
[Status.PENDING, Status.STARTING, Status.FINISHED],
|
222
|
+
state,
|
223
|
+
)
|
224
|
+
if abort_msg:
|
225
|
+
raise InvalidRunStatusException(abort_msg)
|
226
|
+
|
227
|
+
stored = False
|
228
|
+
try:
|
229
|
+
store.put(request.object_id, request.object_content)
|
230
|
+
stored = True
|
231
|
+
except (NoObjectInStoreError, ValueError) as e:
|
232
|
+
log(ERROR, str(e))
|
233
|
+
except UnexpectedObjectContentError as e:
|
234
|
+
# Object content is not valid
|
235
|
+
log(ERROR, str(e))
|
236
|
+
raise
|
237
|
+
return PushObjectResponse(stored=stored)
|
238
|
+
|
239
|
+
|
240
|
+
def pull_object(
|
241
|
+
request: PullObjectRequest, state: LinkState, store: ObjectStore
|
242
|
+
) -> PullObjectResponse:
|
243
|
+
"""Pull Object."""
|
244
|
+
abort_msg = check_abort(
|
245
|
+
request.run_id,
|
246
|
+
[Status.PENDING, Status.STARTING, Status.FINISHED],
|
247
|
+
state,
|
248
|
+
)
|
249
|
+
if abort_msg:
|
250
|
+
raise InvalidRunStatusException(abort_msg)
|
251
|
+
|
252
|
+
# Fetch from store
|
253
|
+
content = store.get(request.object_id)
|
254
|
+
if content is not None:
|
255
|
+
object_available = content != b""
|
256
|
+
return PullObjectResponse(
|
257
|
+
object_found=True,
|
258
|
+
object_available=object_available,
|
259
|
+
object_content=content,
|
260
|
+
)
|
261
|
+
return PullObjectResponse(object_found=False, object_available=False)
|
@@ -23,7 +23,10 @@ import grpc
|
|
23
23
|
|
24
24
|
from flwr.common import Message
|
25
25
|
from flwr.common.constant import SUPERLINK_NODE_ID, Status
|
26
|
-
from flwr.common.inflatable import
|
26
|
+
from flwr.common.inflatable import (
|
27
|
+
UnexpectedObjectContentError,
|
28
|
+
get_descendant_object_ids,
|
29
|
+
)
|
27
30
|
from flwr.common.logger import log
|
28
31
|
from flwr.common.serde import (
|
29
32
|
context_from_proto,
|
@@ -424,12 +427,6 @@ class ServerAppIoServicer(serverappio_pb2_grpc.ServerAppIoServicer):
|
|
424
427
|
# Cancel insertion in ObjectStore
|
425
428
|
context.abort(grpc.StatusCode.FAILED_PRECONDITION, "Unexpected node ID.")
|
426
429
|
|
427
|
-
if not check_body_len_consistency(request.object_content):
|
428
|
-
# Cancel insertion in ObjectStore
|
429
|
-
context.abort(
|
430
|
-
grpc.StatusCode.FAILED_PRECONDITION, "Unexpected object length."
|
431
|
-
)
|
432
|
-
|
433
430
|
# Init store
|
434
431
|
store = self.objectstore_factory.store()
|
435
432
|
|
@@ -440,6 +437,9 @@ class ServerAppIoServicer(serverappio_pb2_grpc.ServerAppIoServicer):
|
|
440
437
|
stored = True
|
441
438
|
except (NoObjectInStoreError, ValueError) as e:
|
442
439
|
log(ERROR, str(e))
|
440
|
+
except UnexpectedObjectContentError as e:
|
441
|
+
# Object content is not valid
|
442
|
+
context.abort(grpc.StatusCode.FAILED_PRECONDITION, str(e))
|
443
443
|
|
444
444
|
return PushObjectResponse(stored=stored)
|
445
445
|
|
@@ -18,6 +18,7 @@
|
|
18
18
|
from typing import Optional
|
19
19
|
|
20
20
|
from flwr.common.inflatable import get_object_id, is_valid_sha256_hash
|
21
|
+
from flwr.common.inflatable_utils import validate_object_content
|
21
22
|
|
22
23
|
from .object_store import NoObjectInStoreError, ObjectStore
|
23
24
|
|
@@ -52,12 +53,15 @@ class InMemoryObjectStore(ObjectStore):
|
|
52
53
|
f"Object with ID '{object_id}' was not pre-registered."
|
53
54
|
)
|
54
55
|
|
55
|
-
# Verify object_id and object_content match
|
56
56
|
if self.verify:
|
57
|
+
# Verify object_id and object_content match
|
57
58
|
object_id_from_content = get_object_id(object_content)
|
58
59
|
if object_id != object_id_from_content:
|
59
60
|
raise ValueError(f"Object ID {object_id} does not match content hash")
|
60
61
|
|
62
|
+
# Validate object content
|
63
|
+
validate_object_content(content=object_content)
|
64
|
+
|
61
65
|
# Return if object is already present in the store
|
62
66
|
if self.store[object_id] != b"":
|
63
67
|
return
|
@@ -52,9 +52,9 @@ from flwr.common.logger import log
|
|
52
52
|
from flwr.common.retry_invoker import RetryInvoker, RetryState, exponential
|
53
53
|
from flwr.common.typing import Fab, Run, RunNotRunningException, UserConfig
|
54
54
|
from flwr.proto.clientappio_pb2_grpc import add_ClientAppIoServicer_to_server
|
55
|
-
from flwr.server.superlink.ffs import FfsFactory
|
56
|
-
from flwr.supercore.object_store import ObjectStoreFactory
|
57
|
-
from flwr.supernode.nodestate import NodeStateFactory
|
55
|
+
from flwr.server.superlink.ffs import Ffs, FfsFactory
|
56
|
+
from flwr.supercore.object_store import ObjectStore, ObjectStoreFactory
|
57
|
+
from flwr.supernode.nodestate import NodeState, NodeStateFactory
|
58
58
|
from flwr.supernode.servicer.clientappio import ClientAppInputs, ClientAppIoServicer
|
59
59
|
|
60
60
|
DEFAULT_FFS_DIR = get_flwr_dir() / "supernode" / "ffs"
|
@@ -145,7 +145,7 @@ def start_client_internal(
|
|
145
145
|
# Initialize NodeState, Ffs, and ObjectStore
|
146
146
|
state = state_factory.state()
|
147
147
|
ffs = ffs_factory.ffs()
|
148
|
-
|
148
|
+
store = object_store_factory.store()
|
149
149
|
|
150
150
|
with _init_connection(
|
151
151
|
transport=transport,
|
@@ -166,63 +166,20 @@ def start_client_internal(
|
|
166
166
|
|
167
167
|
# pylint: disable=too-many-nested-blocks
|
168
168
|
while True:
|
169
|
-
#
|
170
|
-
|
171
|
-
|
172
|
-
|
173
|
-
|
174
|
-
|
175
|
-
|
176
|
-
|
177
|
-
|
178
|
-
|
179
|
-
"[RUN %s, ROUND %s]",
|
180
|
-
message.metadata.run_id,
|
181
|
-
message.metadata.group_id,
|
182
|
-
)
|
183
|
-
else:
|
184
|
-
log(INFO, "[RUN %s]", message.metadata.run_id)
|
185
|
-
log(
|
186
|
-
INFO,
|
187
|
-
"Received: %s message %s",
|
188
|
-
message.metadata.message_type,
|
189
|
-
message.metadata.message_id,
|
169
|
+
# The signature of the function will change after
|
170
|
+
# completing the transition to the `NodeState`-based SuperNode
|
171
|
+
run_id = _pull_and_store_message(
|
172
|
+
state=state,
|
173
|
+
ffs=ffs,
|
174
|
+
object_store=store,
|
175
|
+
node_config=node_config,
|
176
|
+
receive=receive,
|
177
|
+
get_run=get_run,
|
178
|
+
get_fab=get_fab,
|
190
179
|
)
|
191
180
|
|
192
|
-
|
193
|
-
|
194
|
-
try:
|
195
|
-
# Check if the message is from an unknown run
|
196
|
-
if (run_info := state.get_run(run_id)) is None:
|
197
|
-
# Pull run info from SuperLink
|
198
|
-
run_info = get_run(run_id)
|
199
|
-
state.store_run(run_info)
|
200
|
-
|
201
|
-
# Pull and store the FAB
|
202
|
-
fab = get_fab(run_info.fab_hash, run_id)
|
203
|
-
ffs.put(fab.content, {})
|
204
|
-
|
205
|
-
# Initialize the context
|
206
|
-
run_cfg = get_fused_config_from_fab(fab.content, run_info)
|
207
|
-
run_ctx = Context(
|
208
|
-
run_id=run_id,
|
209
|
-
node_id=state.get_node_id(),
|
210
|
-
node_config=node_config,
|
211
|
-
state=RecordDict(),
|
212
|
-
run_config=run_cfg,
|
213
|
-
)
|
214
|
-
state.store_context(run_ctx)
|
215
|
-
|
216
|
-
# Store the message in the state
|
217
|
-
state.store_message(message)
|
218
|
-
except RunNotRunningException:
|
219
|
-
log(
|
220
|
-
INFO,
|
221
|
-
"Run ID %s is not in `RUNNING` status. Ignoring message %s.",
|
222
|
-
run_id,
|
223
|
-
message.metadata.message_id,
|
224
|
-
)
|
225
|
-
time.sleep(3)
|
181
|
+
if run_id is None:
|
182
|
+
time.sleep(3) # Wait for 3s before asking again
|
226
183
|
continue
|
227
184
|
|
228
185
|
try:
|
@@ -302,6 +259,91 @@ def start_client_internal(
|
|
302
259
|
log(INFO, "")
|
303
260
|
|
304
261
|
|
262
|
+
def _pull_and_store_message( # pylint: disable=too-many-positional-arguments
|
263
|
+
state: NodeState,
|
264
|
+
ffs: Ffs,
|
265
|
+
object_store: ObjectStore, # pylint: disable=unused-argument
|
266
|
+
node_config: UserConfig,
|
267
|
+
receive: Callable[[], Optional[Message]],
|
268
|
+
get_run: Callable[[int], Run],
|
269
|
+
get_fab: Callable[[str, int], Fab],
|
270
|
+
) -> Optional[int]:
|
271
|
+
"""Pull a message from the SuperLink and store it in the state.
|
272
|
+
|
273
|
+
This function current returns None if no message is received,
|
274
|
+
or run_id if a message is received and processed successfully.
|
275
|
+
This behavior will change in the future to return None after
|
276
|
+
completing transition to the `NodeState`-based SuperNode.
|
277
|
+
"""
|
278
|
+
message = None
|
279
|
+
try:
|
280
|
+
# Pull message
|
281
|
+
if (message := receive()) is None:
|
282
|
+
return None
|
283
|
+
|
284
|
+
# Log message reception
|
285
|
+
log(INFO, "")
|
286
|
+
if message.metadata.group_id:
|
287
|
+
log(
|
288
|
+
INFO,
|
289
|
+
"[RUN %s, ROUND %s]",
|
290
|
+
message.metadata.run_id,
|
291
|
+
message.metadata.group_id,
|
292
|
+
)
|
293
|
+
else:
|
294
|
+
log(INFO, "[RUN %s]", message.metadata.run_id)
|
295
|
+
log(
|
296
|
+
INFO,
|
297
|
+
"Received: %s message %s",
|
298
|
+
message.metadata.message_type,
|
299
|
+
message.metadata.message_id,
|
300
|
+
)
|
301
|
+
|
302
|
+
# Ensure the run and FAB are available
|
303
|
+
run_id = message.metadata.run_id
|
304
|
+
|
305
|
+
# Check if the message is from an unknown run
|
306
|
+
if (run_info := state.get_run(run_id)) is None:
|
307
|
+
# Pull run info from SuperLink
|
308
|
+
run_info = get_run(run_id)
|
309
|
+
state.store_run(run_info)
|
310
|
+
|
311
|
+
# Pull and store the FAB
|
312
|
+
fab = get_fab(run_info.fab_hash, run_id)
|
313
|
+
ffs.put(fab.content, {})
|
314
|
+
|
315
|
+
# Initialize the context
|
316
|
+
run_cfg = get_fused_config_from_fab(fab.content, run_info)
|
317
|
+
run_ctx = Context(
|
318
|
+
run_id=run_id,
|
319
|
+
node_id=state.get_node_id(),
|
320
|
+
node_config=node_config,
|
321
|
+
state=RecordDict(),
|
322
|
+
run_config=run_cfg,
|
323
|
+
)
|
324
|
+
state.store_context(run_ctx)
|
325
|
+
|
326
|
+
# Store the message in the state
|
327
|
+
state.store_message(message)
|
328
|
+
except RunNotRunningException:
|
329
|
+
if message is None:
|
330
|
+
log(
|
331
|
+
INFO,
|
332
|
+
"Run transitioned to a non-`RUNNING` status while receiving a message. "
|
333
|
+
"Ignoring the message.",
|
334
|
+
)
|
335
|
+
else:
|
336
|
+
log(
|
337
|
+
INFO,
|
338
|
+
"Run ID %s is not in `RUNNING` status. Ignoring message %s.",
|
339
|
+
run_id,
|
340
|
+
message.metadata.message_id,
|
341
|
+
)
|
342
|
+
return None
|
343
|
+
|
344
|
+
return run_id
|
345
|
+
|
346
|
+
|
305
347
|
@contextmanager
|
306
348
|
def _init_connection( # pylint: disable=too-many-positional-arguments
|
307
349
|
transport: str,
|
{flwr_nightly-1.19.0.dev20250607.dist-info → flwr_nightly-1.19.0.dev20250610.dist-info}/METADATA
RENAMED
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.3
|
2
2
|
Name: flwr-nightly
|
3
|
-
Version: 1.19.0.
|
3
|
+
Version: 1.19.0.dev20250610
|
4
4
|
Summary: Flower: A Friendly Federated AI Framework
|
5
5
|
License: Apache-2.0
|
6
6
|
Keywords: Artificial Intelligence,Federated AI,Federated Analytics,Federated Evaluation,Federated Learning,Flower,Machine Learning
|
{flwr_nightly-1.19.0.dev20250607.dist-info → flwr_nightly-1.19.0.dev20250610.dist-info}/RECORD
RENAMED
@@ -84,7 +84,7 @@ flwr/client/grpc_adapter_client/__init__.py,sha256=RQWP5mFPROLHKgombiRvPXVWSoVrQ
|
|
84
84
|
flwr/client/grpc_adapter_client/connection.py,sha256=aj5tTYyE8z2hQLXPPydsJiz8gBDIWLUhfWvqYkAL1L4,3966
|
85
85
|
flwr/client/grpc_rere_client/__init__.py,sha256=i7iS0Lt8B7q0E2L72e4F_YrKm6ClRKnd71PNA6PW2O0,752
|
86
86
|
flwr/client/grpc_rere_client/client_interceptor.py,sha256=zFaVHw6AxeNO-7eCKKb-RxrPa7zbM5Z-2-1Efc4adQY,2451
|
87
|
-
flwr/client/grpc_rere_client/connection.py,sha256=
|
87
|
+
flwr/client/grpc_rere_client/connection.py,sha256=zxnr1Ppp3PpffvqhRlTBwTWHnek15lIDEBLfgqCSW40,14039
|
88
88
|
flwr/client/grpc_rere_client/grpc_adapter.py,sha256=JvMZ7vCFTaTEo6AzKYh3zDmeQAU7VSjdysbC6t3ufWg,6351
|
89
89
|
flwr/client/message_handler/__init__.py,sha256=0lyljDVqre3WljiZbPcwCCf8GiIaSVI_yo_ylEyPwSE,719
|
90
90
|
flwr/client/message_handler/message_handler.py,sha256=X9SXX6et97Lw9_DGD93HKsEBGNjXClcFgc_5aLK0oiU,6541
|
@@ -122,9 +122,9 @@ flwr/common/exit/exit_code.py,sha256=PNEnCrZfOILjfDAFu5m-2YWEJBrk97xglq4zCUlqV7E
|
|
122
122
|
flwr/common/exit_handlers.py,sha256=IaqJ60fXZuu7McaRYnoYKtlbH9t4Yl9goNExKqtmQbs,4304
|
123
123
|
flwr/common/grpc.py,sha256=manTaHaPiyYngUq1ErZvvV2B2GxlXUUUGRy3jc3TBIQ,9798
|
124
124
|
flwr/common/heartbeat.py,sha256=SyEpNDnmJ0lni0cWO67rcoJVKasCLmkNHm3dKLeNrLU,5749
|
125
|
-
flwr/common/inflatable.py,sha256=
|
126
|
-
flwr/common/inflatable_grpc_utils.py,sha256
|
127
|
-
flwr/common/inflatable_utils.py,sha256
|
125
|
+
flwr/common/inflatable.py,sha256=gdAICtXklkQRMrxoTYEbzJl7AeFqZtUm4JU6f2it9FM,7264
|
126
|
+
flwr/common/inflatable_grpc_utils.py,sha256=-cdu-VNBK7aNdYRtjtTz3pnXmK-_q3XIzpr14vhanto,3549
|
127
|
+
flwr/common/inflatable_utils.py,sha256=-GTdgR1zLS9WtXrbOGJMpaoyVEL8KmoQ2yF4HeLxTI0,12406
|
128
128
|
flwr/common/logger.py,sha256=JbRf6E2vQxXzpDBq1T8IDUJo_usu3gjWEBPQ6uKcmdg,13049
|
129
129
|
flwr/common/message.py,sha256=xAL7iZN5-n-xPQpgoSFvxNrzs8fmiiPfoU0DjNQEhRw,19953
|
130
130
|
flwr/common/object_ref.py,sha256=p3SfTeqo3Aj16SkB-vsnNn01zswOPdGNBitcbRnqmUk,9134
|
@@ -237,7 +237,7 @@ flwr/server/criterion.py,sha256=G4e-6B48Pc7d5rmGVUpIzNKb6UF88O3VmTRuUltgjzM,1061
|
|
237
237
|
flwr/server/fleet_event_log_interceptor.py,sha256=AkL7Y5d3xm2vRhL3ahmEVVoOvAP7PA7dRgB-je4v-Ys,3774
|
238
238
|
flwr/server/grid/__init__.py,sha256=aWZHezoR2UGMJISB_gPMCm2N_2GSbm97A3lAp7ruhRQ,888
|
239
239
|
flwr/server/grid/grid.py,sha256=naGCYt5J6dnmUvrcGkdNyKPe3MBd-0awGm1ALmgahqY,6625
|
240
|
-
flwr/server/grid/grpc_grid.py,sha256=
|
240
|
+
flwr/server/grid/grpc_grid.py,sha256=qhJPS4tCWATHx24dDZoxtEPq6rjFu0lNggX70lstLkw,13302
|
241
241
|
flwr/server/grid/inmemory_grid.py,sha256=RjejYT-d-hHuTs1KSs_5wvOdAWKLus8w5_UAcnGt4iw,6168
|
242
242
|
flwr/server/history.py,sha256=cCkFhBN4GoHsYYNk5GG1Y089eKJh2DH_ZJbYPwLaGyk,5026
|
243
243
|
flwr/server/run_serverapp.py,sha256=v0p6jXj2dFxlRUdoEeF1mnaFd9XRQi6dZCflPY6d3qI,2063
|
@@ -285,10 +285,10 @@ flwr/server/superlink/fleet/grpc_bidi/grpc_bridge.py,sha256=KouR9PUcrPmMtoLooF4O
|
|
285
285
|
flwr/server/superlink/fleet/grpc_bidi/grpc_client_proxy.py,sha256=iSf0mbBAlig7G6subQwBSVjcUCgSihONKdZ1RmQPTOk,4887
|
286
286
|
flwr/server/superlink/fleet/grpc_bidi/grpc_server.py,sha256=OsS-6GgCIzMMZDVu5Y-OKjynHVUrpdc_5OrtuB-IbU0,5174
|
287
287
|
flwr/server/superlink/fleet/grpc_rere/__init__.py,sha256=ahDJJ1e-lDxBpeBMgPk7YZt2wB38_QltcpOC0gLbpFs,758
|
288
|
-
flwr/server/superlink/fleet/grpc_rere/fleet_servicer.py,sha256=
|
288
|
+
flwr/server/superlink/fleet/grpc_rere/fleet_servicer.py,sha256=wp6I4BKv82dBlIJA8D2gx9CRgahkwnsVnKTkU6M06Og,7802
|
289
289
|
flwr/server/superlink/fleet/grpc_rere/server_interceptor.py,sha256=DrHubsaLgJCwCeeJPYogQTiP0xYqjxwnT9rh7OP7BoU,6984
|
290
290
|
flwr/server/superlink/fleet/message_handler/__init__.py,sha256=fHsRV0KvJ8HtgSA4_YBsEzuhJLjO8p6xx4aCY2oE1p4,731
|
291
|
-
flwr/server/superlink/fleet/message_handler/message_handler.py,sha256=
|
291
|
+
flwr/server/superlink/fleet/message_handler/message_handler.py,sha256=hbaukJ7EpfBiaXHa_R50MZKTkesxosx5IwFtDyIAO-0,8076
|
292
292
|
flwr/server/superlink/fleet/rest_rere/__init__.py,sha256=Lzc93nA7tDqoy-zRUaPG316oqFiZX1HUCL5ELaXY_xw,735
|
293
293
|
flwr/server/superlink/fleet/rest_rere/rest_api.py,sha256=jIljUNMvZ8dDvSlkyn1c2y9sDAf_QoBr-q_o3BWxJ7o,7199
|
294
294
|
flwr/server/superlink/fleet/vce/__init__.py,sha256=XOKbAWOzlCqEOQ3M2cBYkH7HKA7PxlbCJMunt-ty-DY,784
|
@@ -304,7 +304,7 @@ flwr/server/superlink/linkstate/sqlite_linkstate.py,sha256=sHJPK1w0tP0m2WCXH2F9l
|
|
304
304
|
flwr/server/superlink/linkstate/utils.py,sha256=IeLh7iGRCHU5MEWOl7iriaSE4L__8GWOa2OleXadK5M,15444
|
305
305
|
flwr/server/superlink/serverappio/__init__.py,sha256=Fy4zJuoccZe5mZSEIpOmQvU6YeXFBa1M4eZuXXmJcn8,717
|
306
306
|
flwr/server/superlink/serverappio/serverappio_grpc.py,sha256=6-FUUt0GiLcBPljj8bBrUNeAITUoDQOLzaMihKo52hg,2326
|
307
|
-
flwr/server/superlink/serverappio/serverappio_servicer.py,sha256=
|
307
|
+
flwr/server/superlink/serverappio/serverappio_servicer.py,sha256=mRDMklbizitbDcN6hrCSp0NpsFegdAE84MHEulW7eBQ,17232
|
308
308
|
flwr/server/superlink/simulation/__init__.py,sha256=Ry8DrNaZCMcQXvUc4FoCN2m3dvUQgWjasfp015o3Ec4,718
|
309
309
|
flwr/server/superlink/simulation/simulationio_grpc.py,sha256=0l0F-UjYEk6W7HZmI28PbJQLFxSi_vBHRkdchgdaSMQ,2224
|
310
310
|
flwr/server/superlink/simulation/simulationio_servicer.py,sha256=aJezU8RSJswcmWm7Eoy0BqsU13jrcfuFwX3ljm-cORM,7719
|
@@ -331,7 +331,7 @@ flwr/simulation/run_simulation.py,sha256=Nvw_6hI71aE2nU95_tt1F9VSo3OJWrvA97e3XZu
|
|
331
331
|
flwr/simulation/simulationio_connection.py,sha256=mzS1C6EEREwQDPceDo30anAasmTDLb9qqV2tXlBhOUA,3494
|
332
332
|
flwr/supercore/__init__.py,sha256=pqkFoow_E6UhbBlhmoD1gmTH-33yJRhBsIZqxRPFZ7U,755
|
333
333
|
flwr/supercore/object_store/__init__.py,sha256=cdfPAmjINY6iOp8oI_LdcVh2simg469Mkdl4LLV4kHI,911
|
334
|
-
flwr/supercore/object_store/in_memory_object_store.py,sha256=
|
334
|
+
flwr/supercore/object_store/in_memory_object_store.py,sha256=8EfTJHb6-RObWmzb2ZxBgxMobCod6NP820DzrMnYdbY,4081
|
335
335
|
flwr/supercore/object_store/object_store.py,sha256=yZ6A_JgK_aGF54zlPISLK_d9FvxpYJlI2qNfmQBdlzM,4328
|
336
336
|
flwr/supercore/object_store/object_store_factory.py,sha256=QVwE2ywi7vsj2iKfvWWnNw3N_I7Rz91NUt2RpcbJ7iM,1527
|
337
337
|
flwr/superexec/__init__.py,sha256=YFqER0IJc1XEWfsX6AxZ9LSRq0sawPYrNYki-brvTIc,715
|
@@ -357,8 +357,8 @@ flwr/supernode/runtime/run_clientapp.py,sha256=cvWSby7u31u97QapWHxJM-Wer6F1k6mbb
|
|
357
357
|
flwr/supernode/servicer/__init__.py,sha256=lucTzre5WPK7G1YLCfaqg3rbFWdNSb7ZTt-ca8gxdEo,717
|
358
358
|
flwr/supernode/servicer/clientappio/__init__.py,sha256=vJyOjO2FXZ2URbnthmdsgs6948wbYfdq1L1V8Um-Lr8,895
|
359
359
|
flwr/supernode/servicer/clientappio/clientappio_servicer.py,sha256=LmzkxtNQBn5vVrHc0Bhq2WqaK6-LM2v4kfLBN0PiNNM,8522
|
360
|
-
flwr/supernode/start_client_internal.py,sha256=
|
361
|
-
flwr_nightly-1.19.0.
|
362
|
-
flwr_nightly-1.19.0.
|
363
|
-
flwr_nightly-1.19.0.
|
364
|
-
flwr_nightly-1.19.0.
|
360
|
+
flwr/supernode/start_client_internal.py,sha256=AkJ1FsBK6EpK7cmIGcae5WZazPhU71gileiSQogTZ-k,18164
|
361
|
+
flwr_nightly-1.19.0.dev20250610.dist-info/METADATA,sha256=98gf7-LsujpWAZw4bsDkaitvVM4M0NT44a2lIGFtjCE,15910
|
362
|
+
flwr_nightly-1.19.0.dev20250610.dist-info/WHEEL,sha256=b4K_helf-jlQoXBBETfwnf4B04YC67LOev0jo4fX5m8,88
|
363
|
+
flwr_nightly-1.19.0.dev20250610.dist-info/entry_points.txt,sha256=jNpDXGBGgs21RqUxelF_jwGaxtqFwm-MQyfz-ZqSjrA,367
|
364
|
+
flwr_nightly-1.19.0.dev20250610.dist-info/RECORD,,
|
{flwr_nightly-1.19.0.dev20250607.dist-info → flwr_nightly-1.19.0.dev20250610.dist-info}/WHEEL
RENAMED
File without changes
|
File without changes
|