flwr-nightly 1.19.0.dev20250603__py3-none-any.whl → 1.19.0.dev20250604__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- flwr/client/grpc_rere_client/connection.py +44 -7
- flwr/common/inflatable_grpc_utils.py +9 -4
- flwr/server/grid/grpc_grid.py +73 -34
- flwr/server/superlink/ffs/__init__.py +2 -0
- flwr/supernode/cli/flwr_clientapp.py +8 -0
- flwr/supernode/runtime/run_clientapp.py +22 -1
- flwr/supernode/start_client_internal.py +90 -114
- {flwr_nightly-1.19.0.dev20250603.dist-info → flwr_nightly-1.19.0.dev20250604.dist-info}/METADATA +1 -1
- {flwr_nightly-1.19.0.dev20250603.dist-info → flwr_nightly-1.19.0.dev20250604.dist-info}/RECORD +11 -11
- {flwr_nightly-1.19.0.dev20250603.dist-info → flwr_nightly-1.19.0.dev20250604.dist-info}/WHEEL +0 -0
- {flwr_nightly-1.19.0.dev20250603.dist-info → flwr_nightly-1.19.0.dev20250604.dist-info}/entry_points.txt +0 -0
@@ -14,11 +14,10 @@
|
|
14
14
|
# ==============================================================================
|
15
15
|
"""Contextmanager for a gRPC request-response channel to the Flower server."""
|
16
16
|
|
17
|
-
|
18
17
|
from collections.abc import Iterator, Sequence
|
19
18
|
from contextlib import contextmanager
|
20
19
|
from copy import copy
|
21
|
-
from logging import ERROR
|
20
|
+
from logging import DEBUG, ERROR
|
22
21
|
from pathlib import Path
|
23
22
|
from typing import Callable, Optional, Union, cast
|
24
23
|
|
@@ -31,13 +30,17 @@ from flwr.common import GRPC_MAX_MESSAGE_LENGTH
|
|
31
30
|
from flwr.common.constant import HEARTBEAT_CALL_TIMEOUT, HEARTBEAT_DEFAULT_INTERVAL
|
32
31
|
from flwr.common.grpc import create_channel, on_channel_state_change
|
33
32
|
from flwr.common.heartbeat import HeartbeatSender
|
33
|
+
from flwr.common.inflatable_grpc_utils import (
|
34
|
+
pull_object_from_servicer,
|
35
|
+
push_object_to_servicer,
|
36
|
+
)
|
34
37
|
from flwr.common.logger import log
|
35
|
-
from flwr.common.message import Message
|
38
|
+
from flwr.common.message import Message, get_message_to_descendant_id_mapping
|
36
39
|
from flwr.common.retry_invoker import RetryInvoker, _wrap_stub
|
37
40
|
from flwr.common.secure_aggregation.crypto.symmetric_encryption import (
|
38
41
|
generate_key_pairs,
|
39
42
|
)
|
40
|
-
from flwr.common.serde import
|
43
|
+
from flwr.common.serde import message_to_proto, run_from_proto
|
41
44
|
from flwr.common.typing import Fab, Run, RunNotRunningException
|
42
45
|
from flwr.proto.fab_pb2 import GetFabRequest, GetFabResponse # pylint: disable=E0611
|
43
46
|
from flwr.proto.fleet_pb2 import ( # pylint: disable=E0611
|
@@ -46,6 +49,7 @@ from flwr.proto.fleet_pb2 import ( # pylint: disable=E0611
|
|
46
49
|
PullMessagesRequest,
|
47
50
|
PullMessagesResponse,
|
48
51
|
PushMessagesRequest,
|
52
|
+
PushMessagesResponse,
|
49
53
|
)
|
50
54
|
from flwr.proto.fleet_pb2_grpc import FleetStub # pylint: disable=E0611
|
51
55
|
from flwr.proto.heartbeat_pb2 import ( # pylint: disable=E0611
|
@@ -254,7 +258,24 @@ def grpc_request_response( # pylint: disable=R0913,R0914,R0915,R0917
|
|
254
258
|
message_proto = None
|
255
259
|
|
256
260
|
# Construct the Message
|
257
|
-
in_message =
|
261
|
+
in_message: Optional[Message] = None
|
262
|
+
|
263
|
+
if message_proto:
|
264
|
+
in_message = cast(
|
265
|
+
Message,
|
266
|
+
pull_object_from_servicer(
|
267
|
+
object_id=message_proto.metadata.message_id,
|
268
|
+
stub=stub,
|
269
|
+
node=node,
|
270
|
+
run_id=message_proto.metadata.run_id,
|
271
|
+
),
|
272
|
+
)
|
273
|
+
|
274
|
+
if in_message:
|
275
|
+
# The deflated message doesn't contain the message_id (its own object_id)
|
276
|
+
# Inject
|
277
|
+
# pylint: disable-next=W0212
|
278
|
+
in_message.metadata._message_id = message_proto.metadata.message_id # type: ignore
|
258
279
|
|
259
280
|
# Remember `metadata` of the in message
|
260
281
|
nonlocal metadata
|
@@ -285,8 +306,24 @@ def grpc_request_response( # pylint: disable=R0913,R0914,R0915,R0917
|
|
285
306
|
|
286
307
|
# Serialize Message
|
287
308
|
message_proto = message_to_proto(message=message)
|
288
|
-
|
289
|
-
|
309
|
+
descendants_mapping = get_message_to_descendant_id_mapping(message)
|
310
|
+
request = PushMessagesRequest(
|
311
|
+
node=node,
|
312
|
+
messages_list=[message_proto],
|
313
|
+
msg_to_descendant_mapping=descendants_mapping,
|
314
|
+
)
|
315
|
+
response: PushMessagesResponse = stub.PushMessages(request=request)
|
316
|
+
|
317
|
+
if response.objects_to_push:
|
318
|
+
objs_to_push = set(response.objects_to_push[message.object_id].object_ids)
|
319
|
+
push_object_to_servicer(
|
320
|
+
message,
|
321
|
+
stub,
|
322
|
+
node,
|
323
|
+
run_id=message.metadata.run_id,
|
324
|
+
object_ids_to_push=objs_to_push,
|
325
|
+
)
|
326
|
+
log(DEBUG, "Pushed %s objects to servicer.", len(objs_to_push))
|
290
327
|
|
291
328
|
# Cleanup
|
292
329
|
metadata = None
|
@@ -15,6 +15,7 @@
|
|
15
15
|
"""InflatableObject utils."""
|
16
16
|
|
17
17
|
|
18
|
+
from time import sleep
|
18
19
|
from typing import Optional, Union
|
19
20
|
|
20
21
|
from flwr.client.grpc_rere_client.grpc_adapter import GrpcAdapter
|
@@ -94,10 +95,14 @@ def pull_object_from_servicer(
|
|
94
95
|
) -> InflatableObject:
|
95
96
|
"""Recursively inflate an object by pulling it from the servicer."""
|
96
97
|
# Pull object
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
|
98
|
+
object_available = False
|
99
|
+
while not object_available:
|
100
|
+
object_proto: PullObjectResponse = stub.PullObject(
|
101
|
+
PullObjectRequest(node=node, run_id=run_id, object_id=object_id)
|
102
|
+
)
|
103
|
+
object_available = object_proto.object_available
|
104
|
+
object_content = object_proto.object_content
|
105
|
+
sleep(0.1)
|
101
106
|
|
102
107
|
# Extract object class and object_ids of children
|
103
108
|
obj_type, children_obj_ids, _ = get_object_head_values_from_object_content(
|
flwr/server/grid/grpc_grid.py
CHANGED
@@ -28,11 +28,15 @@ from flwr.common.constant import (
|
|
28
28
|
SUPERLINK_NODE_ID,
|
29
29
|
)
|
30
30
|
from flwr.common.grpc import create_channel, on_channel_state_change
|
31
|
+
from flwr.common.inflatable_grpc_utils import (
|
32
|
+
pull_object_from_servicer,
|
33
|
+
push_object_to_servicer,
|
34
|
+
)
|
31
35
|
from flwr.common.logger import log, warn_deprecated_feature
|
36
|
+
from flwr.common.message import get_message_to_descendant_id_mapping
|
32
37
|
from flwr.common.retry_invoker import _make_simple_grpc_retry_invoker, _wrap_stub
|
33
|
-
from flwr.common.serde import
|
38
|
+
from flwr.common.serde import message_to_proto, run_from_proto
|
34
39
|
from flwr.common.typing import Run
|
35
|
-
from flwr.proto.message_pb2 import Message as ProtoMessage # pylint: disable=E0611
|
36
40
|
from flwr.proto.node_pb2 import Node # pylint: disable=E0611
|
37
41
|
from flwr.proto.run_pb2 import GetRunRequest, GetRunResponse # pylint: disable=E0611
|
38
42
|
from flwr.proto.serverappio_pb2 import ( # pylint: disable=E0611
|
@@ -198,6 +202,35 @@ class GrpcGrid(Grid):
|
|
198
202
|
)
|
199
203
|
return [node.node_id for node in res.nodes]
|
200
204
|
|
205
|
+
def _try_push_message(self, run_id: int, message: Message) -> str:
|
206
|
+
"""Push one message and its associated objects."""
|
207
|
+
# Compute mapping of message descendants
|
208
|
+
descendants_mapping = get_message_to_descendant_id_mapping(message)
|
209
|
+
|
210
|
+
# Call GrpcServerAppIoStub method
|
211
|
+
res: PushInsMessagesResponse = self._stub.PushMessages(
|
212
|
+
PushInsMessagesRequest(
|
213
|
+
messages_list=[message_to_proto(message)],
|
214
|
+
run_id=run_id,
|
215
|
+
msg_to_descendant_mapping=descendants_mapping,
|
216
|
+
)
|
217
|
+
)
|
218
|
+
|
219
|
+
# Push objects
|
220
|
+
msg_id = res.message_ids[0]
|
221
|
+
# If Message was added to the LinkState correctly
|
222
|
+
if msg_id is not None:
|
223
|
+
obj_ids_to_push = set(res.objects_to_push[msg_id].object_ids)
|
224
|
+
# Push only object that are not in the store
|
225
|
+
push_object_to_servicer(
|
226
|
+
message,
|
227
|
+
self._stub,
|
228
|
+
node=self.node,
|
229
|
+
run_id=run_id,
|
230
|
+
object_ids_to_push=obj_ids_to_push,
|
231
|
+
)
|
232
|
+
return msg_id
|
233
|
+
|
201
234
|
def push_messages(self, messages: Iterable[Message]) -> Iterable[str]:
|
202
235
|
"""Push messages to specified node IDs.
|
203
236
|
|
@@ -206,58 +239,64 @@ class GrpcGrid(Grid):
|
|
206
239
|
"""
|
207
240
|
# Construct Messages
|
208
241
|
run_id = cast(Run, self._run).run_id
|
209
|
-
|
210
|
-
for msg in messages:
|
211
|
-
# Populate metadata
|
212
|
-
msg.metadata.__dict__["_run_id"] = run_id
|
213
|
-
msg.metadata.__dict__["_src_node_id"] = self.node.node_id
|
214
|
-
msg.metadata.__dict__["_message_id"] = msg.object_id
|
215
|
-
# Check message
|
216
|
-
self._check_message(msg)
|
217
|
-
# Convert to proto
|
218
|
-
msg_proto = message_to_proto(msg)
|
219
|
-
# Add to list
|
220
|
-
message_proto_list.append(msg_proto)
|
221
|
-
|
242
|
+
message_ids: list[str] = []
|
222
243
|
try:
|
223
|
-
|
224
|
-
|
225
|
-
|
226
|
-
|
227
|
-
|
228
|
-
|
229
|
-
|
230
|
-
|
231
|
-
|
232
|
-
|
233
|
-
"list has `None` for those messages (the order is preserved as "
|
234
|
-
"passed to `push_messages`). This could be due to a malformed "
|
235
|
-
"message.",
|
236
|
-
)
|
237
|
-
return list(res.message_ids)
|
244
|
+
for msg in messages:
|
245
|
+
# Populate metadata
|
246
|
+
msg.metadata.__dict__["_run_id"] = run_id
|
247
|
+
msg.metadata.__dict__["_src_node_id"] = self.node.node_id
|
248
|
+
msg.metadata.__dict__["_message_id"] = msg.object_id
|
249
|
+
# Check message
|
250
|
+
self._check_message(msg)
|
251
|
+
# Try pushing message and its objects
|
252
|
+
message_ids.append(self._try_push_message(run_id, msg))
|
253
|
+
|
238
254
|
except grpc.RpcError as e:
|
239
255
|
if e.code() == grpc.StatusCode.RESOURCE_EXHAUSTED: # pylint: disable=E1101
|
240
256
|
log(ERROR, ERROR_MESSAGE_PUSH_MESSAGES_RESOURCE_EXHAUSTED)
|
241
257
|
return []
|
242
258
|
raise
|
243
259
|
|
260
|
+
if None in message_ids:
|
261
|
+
log(
|
262
|
+
WARNING,
|
263
|
+
"Not all messages could be pushed to the SuperLink. The returned "
|
264
|
+
"list has `None` for those messages (the order is preserved as "
|
265
|
+
"passed to `push_messages`). This could be due to a malformed "
|
266
|
+
"message.",
|
267
|
+
)
|
268
|
+
|
269
|
+
return message_ids
|
270
|
+
|
244
271
|
def pull_messages(self, message_ids: Iterable[str]) -> Iterable[Message]:
|
245
272
|
"""Pull messages based on message IDs.
|
246
273
|
|
247
274
|
This method is used to collect messages from the SuperLink that correspond to a
|
248
275
|
set of given message IDs.
|
249
276
|
"""
|
277
|
+
run_id = cast(Run, self._run).run_id
|
250
278
|
try:
|
251
279
|
# Pull Messages
|
252
280
|
res: PullResMessagesResponse = self._stub.PullMessages(
|
253
281
|
PullResMessagesRequest(
|
254
282
|
message_ids=message_ids,
|
255
|
-
run_id=
|
283
|
+
run_id=run_id,
|
256
284
|
)
|
257
285
|
)
|
258
|
-
#
|
259
|
-
|
260
|
-
|
286
|
+
# Pull Messages from store
|
287
|
+
inflated_msgs: list[Message] = []
|
288
|
+
for msg_proto in res.messages_list:
|
289
|
+
|
290
|
+
message = pull_object_from_servicer(
|
291
|
+
msg_proto.metadata.message_id,
|
292
|
+
self._stub,
|
293
|
+
node=self.node,
|
294
|
+
run_id=run_id,
|
295
|
+
)
|
296
|
+
inflated_msgs.append(cast(Message, message))
|
297
|
+
|
298
|
+
return inflated_msgs
|
299
|
+
|
261
300
|
except grpc.RpcError as e:
|
262
301
|
if e.code() == grpc.StatusCode.RESOURCE_EXHAUSTED: # pylint: disable=E1101
|
263
302
|
log(ERROR, ERROR_MESSAGE_PULL_MESSAGES_RESOURCE_EXHAUSTED)
|
@@ -48,6 +48,7 @@ def flwr_clientapp() -> None:
|
|
48
48
|
token=args.token,
|
49
49
|
flwr_dir=args.flwr_dir,
|
50
50
|
certificates=None,
|
51
|
+
parent_pid=args.parent_pid,
|
51
52
|
)
|
52
53
|
|
53
54
|
|
@@ -69,5 +70,12 @@ def _parse_args_run_flwr_clientapp() -> argparse.ArgumentParser:
|
|
69
70
|
required=False,
|
70
71
|
help="Unique token generated by SuperNode for each ClientApp execution",
|
71
72
|
)
|
73
|
+
parser.add_argument(
|
74
|
+
"--parent-pid",
|
75
|
+
type=int,
|
76
|
+
default=None,
|
77
|
+
help="The PID of the parent process. When set, the process will terminate "
|
78
|
+
"when the parent process exits.",
|
79
|
+
)
|
72
80
|
add_args_flwr_app_common(parser=parser)
|
73
81
|
return parser
|
@@ -16,6 +16,8 @@
|
|
16
16
|
|
17
17
|
|
18
18
|
import gc
|
19
|
+
import os
|
20
|
+
import threading
|
19
21
|
import time
|
20
22
|
from logging import DEBUG, ERROR, INFO
|
21
23
|
from typing import Optional
|
@@ -54,14 +56,19 @@ from flwr.proto.clientappio_pb2 import (
|
|
54
56
|
from flwr.proto.clientappio_pb2_grpc import ClientAppIoStub
|
55
57
|
|
56
58
|
|
57
|
-
def run_clientapp( # pylint: disable=R0914
|
59
|
+
def run_clientapp( # pylint: disable=R0913, R0914, R0917
|
58
60
|
clientappio_api_address: str,
|
59
61
|
run_once: bool,
|
60
62
|
token: Optional[int] = None,
|
61
63
|
flwr_dir: Optional[str] = None,
|
62
64
|
certificates: Optional[bytes] = None,
|
65
|
+
parent_pid: Optional[int] = None,
|
63
66
|
) -> None:
|
64
67
|
"""Run Flower ClientApp process."""
|
68
|
+
# Monitor the main process in case of SIGKILL
|
69
|
+
if parent_pid is not None:
|
70
|
+
start_parent_process_monitor(parent_pid)
|
71
|
+
|
65
72
|
channel = create_channel(
|
66
73
|
server_address=clientappio_api_address,
|
67
74
|
insecure=(certificates is None),
|
@@ -151,6 +158,20 @@ def run_clientapp( # pylint: disable=R0914
|
|
151
158
|
channel.close()
|
152
159
|
|
153
160
|
|
161
|
+
def start_parent_process_monitor(
|
162
|
+
parent_pid: int,
|
163
|
+
) -> None:
|
164
|
+
"""Monitor the parent process and exit if it terminates."""
|
165
|
+
|
166
|
+
def monitor() -> None:
|
167
|
+
while True:
|
168
|
+
time.sleep(0.2)
|
169
|
+
if os.getppid() != parent_pid:
|
170
|
+
os.kill(os.getpid(), 9)
|
171
|
+
|
172
|
+
threading.Thread(target=monitor, daemon=True).start()
|
173
|
+
|
174
|
+
|
154
175
|
def get_token(stub: grpc.Channel) -> Optional[int]:
|
155
176
|
"""Get a token from SuperNode."""
|
156
177
|
log(DEBUG, "[flwr-clientapp] Request token")
|
@@ -15,28 +15,25 @@
|
|
15
15
|
"""Main loop for Flower SuperNode."""
|
16
16
|
|
17
17
|
|
18
|
-
import multiprocessing
|
19
18
|
import os
|
20
|
-
import
|
19
|
+
import subprocess
|
21
20
|
import time
|
22
21
|
from collections.abc import Iterator
|
23
22
|
from contextlib import contextmanager
|
24
23
|
from logging import INFO, WARN
|
25
24
|
from os import urandom
|
26
25
|
from pathlib import Path
|
27
|
-
from typing import Callable, Optional, Union
|
26
|
+
from typing import Callable, Optional, Union, cast
|
28
27
|
|
29
28
|
import grpc
|
30
29
|
from cryptography.hazmat.primitives.asymmetric import ec
|
31
30
|
from grpc import RpcError
|
32
31
|
|
33
|
-
from flwr.app.error import Error
|
34
|
-
from flwr.cli.config_utils import get_fab_metadata
|
35
32
|
from flwr.client.grpc_adapter_client.connection import grpc_adapter
|
36
33
|
from flwr.client.grpc_rere_client.connection import grpc_request_response
|
37
|
-
from flwr.
|
38
|
-
from flwr.common import GRPC_MAX_MESSAGE_LENGTH, Message
|
34
|
+
from flwr.common import GRPC_MAX_MESSAGE_LENGTH, Context, Message, RecordDict
|
39
35
|
from flwr.common.address import parse_address
|
36
|
+
from flwr.common.config import get_flwr_dir, get_fused_config_from_fab
|
40
37
|
from flwr.common.constant import (
|
41
38
|
CLIENT_OCTET,
|
42
39
|
CLIENTAPPIO_API_DEFAULT_SERVER_ADDRESS,
|
@@ -48,7 +45,6 @@ from flwr.common.constant import (
|
|
48
45
|
TRANSPORT_TYPE_GRPC_RERE,
|
49
46
|
TRANSPORT_TYPE_REST,
|
50
47
|
TRANSPORT_TYPES,
|
51
|
-
ErrorCode,
|
52
48
|
)
|
53
49
|
from flwr.common.exit import ExitCode, flwr_exit
|
54
50
|
from flwr.common.grpc import generic_create_grpc_server
|
@@ -56,10 +52,13 @@ from flwr.common.logger import log
|
|
56
52
|
from flwr.common.retry_invoker import RetryInvoker, RetryState, exponential
|
57
53
|
from flwr.common.typing import Fab, Run, RunNotRunningException, UserConfig
|
58
54
|
from flwr.proto.clientappio_pb2_grpc import add_ClientAppIoServicer_to_server
|
55
|
+
from flwr.server.superlink.ffs import FfsFactory
|
56
|
+
from flwr.supercore.object_store import ObjectStoreFactory
|
59
57
|
from flwr.supernode.nodestate import NodeStateFactory
|
60
|
-
from flwr.supernode.runtime.run_clientapp import run_clientapp
|
61
58
|
from flwr.supernode.servicer.clientappio import ClientAppInputs, ClientAppIoServicer
|
62
59
|
|
60
|
+
DEFAULT_FFS_DIR = get_flwr_dir() / "supernode" / "ffs"
|
61
|
+
|
63
62
|
|
64
63
|
# pylint: disable=import-outside-toplevel
|
65
64
|
# pylint: disable=too-many-branches
|
@@ -138,13 +137,15 @@ def start_client_internal(
|
|
138
137
|
certificates=None,
|
139
138
|
)
|
140
139
|
|
141
|
-
#
|
142
|
-
run_info_store: Optional[DeprecatedRunInfoStore] = None
|
140
|
+
# Initialize factories
|
143
141
|
state_factory = NodeStateFactory()
|
144
|
-
|
145
|
-
|
142
|
+
ffs_factory = FfsFactory(get_flwr_dir(flwr_path) / "supernode" / "ffs") # type: ignore
|
143
|
+
object_store_factory = ObjectStoreFactory()
|
146
144
|
|
147
|
-
|
145
|
+
# Initialize NodeState, Ffs, and ObjectStore
|
146
|
+
state = state_factory.state()
|
147
|
+
ffs = ffs_factory.ffs()
|
148
|
+
_store = object_store_factory.store()
|
148
149
|
|
149
150
|
with _init_connection(
|
150
151
|
transport=transport,
|
@@ -157,73 +158,79 @@ def start_client_internal(
|
|
157
158
|
) as conn:
|
158
159
|
receive, send, create_node, _, get_run, get_fab = conn
|
159
160
|
|
160
|
-
#
|
161
|
-
|
162
|
-
|
163
|
-
|
164
|
-
|
165
|
-
raise ValueError("Failed to register SuperNode with the SuperLink")
|
166
|
-
state.set_node_id(node_id)
|
167
|
-
run_info_store = DeprecatedRunInfoStore(
|
168
|
-
node_id=state.get_node_id(),
|
169
|
-
node_config=node_config,
|
170
|
-
)
|
161
|
+
# Call create_node fn to register node
|
162
|
+
# and store node_id in state
|
163
|
+
if (node_id := create_node()) is None:
|
164
|
+
raise ValueError("Failed to register SuperNode with the SuperLink")
|
165
|
+
state.set_node_id(node_id)
|
171
166
|
|
172
167
|
# pylint: disable=too-many-nested-blocks
|
173
168
|
while True:
|
174
|
-
|
175
|
-
|
176
|
-
|
177
|
-
|
178
|
-
|
179
|
-
|
180
|
-
|
181
|
-
|
182
|
-
if len(message.metadata.group_id) > 0:
|
183
|
-
log(
|
184
|
-
INFO,
|
185
|
-
"[RUN %s, ROUND %s]",
|
186
|
-
message.metadata.run_id,
|
187
|
-
message.metadata.group_id,
|
188
|
-
)
|
169
|
+
# Pull message
|
170
|
+
if (message := receive()) is None:
|
171
|
+
time.sleep(3)
|
172
|
+
continue
|
173
|
+
|
174
|
+
# Log message reception
|
175
|
+
log(INFO, "")
|
176
|
+
if message.metadata.group_id:
|
189
177
|
log(
|
190
178
|
INFO,
|
191
|
-
"
|
192
|
-
message.metadata.
|
193
|
-
message.metadata.
|
179
|
+
"[RUN %s, ROUND %s]",
|
180
|
+
message.metadata.run_id,
|
181
|
+
message.metadata.group_id,
|
194
182
|
)
|
183
|
+
else:
|
184
|
+
log(INFO, "[RUN %s]", message.metadata.run_id)
|
185
|
+
log(
|
186
|
+
INFO,
|
187
|
+
"Received: %s message %s",
|
188
|
+
message.metadata.message_type,
|
189
|
+
message.metadata.message_id,
|
190
|
+
)
|
195
191
|
|
196
|
-
|
197
|
-
|
198
|
-
|
199
|
-
|
200
|
-
|
201
|
-
|
202
|
-
|
203
|
-
|
204
|
-
|
205
|
-
|
206
|
-
fab =
|
207
|
-
|
208
|
-
|
209
|
-
|
192
|
+
# Ensure the run and FAB are available
|
193
|
+
run_id = message.metadata.run_id
|
194
|
+
try:
|
195
|
+
# Check if the message is from an unknown run
|
196
|
+
if (run_info := state.get_run(run_id)) is None:
|
197
|
+
# Pull run info from SuperLink
|
198
|
+
run_info = get_run(run_id)
|
199
|
+
state.store_run(run_info)
|
200
|
+
|
201
|
+
# Pull and store the FAB
|
202
|
+
fab = get_fab(run_info.fab_hash, run_id)
|
203
|
+
ffs.put(fab.content, {})
|
204
|
+
|
205
|
+
# Initialize the context
|
206
|
+
run_cfg = get_fused_config_from_fab(fab.content, run_info)
|
207
|
+
run_ctx = Context(
|
208
|
+
run_id=run_id,
|
209
|
+
node_id=state.get_node_id(),
|
210
|
+
node_config=node_config,
|
211
|
+
state=RecordDict(),
|
212
|
+
run_config=run_cfg,
|
213
|
+
)
|
214
|
+
state.store_context(run_ctx)
|
210
215
|
|
211
|
-
#
|
212
|
-
|
213
|
-
|
214
|
-
|
215
|
-
|
216
|
-
|
216
|
+
# Store the message in the state
|
217
|
+
state.store_message(message)
|
218
|
+
except RunNotRunningException:
|
219
|
+
log(
|
220
|
+
INFO,
|
221
|
+
"Run ID %s is not in `RUNNING` status. Ignoring message %s.",
|
222
|
+
run_id,
|
223
|
+
message.metadata.message_id,
|
217
224
|
)
|
225
|
+
time.sleep(3)
|
226
|
+
continue
|
218
227
|
|
219
|
-
|
220
|
-
context
|
221
|
-
|
222
|
-
|
223
|
-
|
224
|
-
|
225
|
-
reply_to=message,
|
226
|
-
)
|
228
|
+
try:
|
229
|
+
# Retrieve message, context, run and fab for this run
|
230
|
+
message = state.get_messages(run_ids=[run_id], is_reply=False)[0]
|
231
|
+
context = cast(Context, state.get_context(run_id))
|
232
|
+
run = cast(Run, state.get_run(run_id))
|
233
|
+
fab = Fab(run.fab_hash, ffs.get(run.fab_hash)[0]) # type: ignore
|
227
234
|
|
228
235
|
# Two isolation modes:
|
229
236
|
# 1. `subprocess`: SuperNode is starting the ClientApp
|
@@ -258,18 +265,17 @@ def start_client_internal(
|
|
258
265
|
else clientappio_api_address
|
259
266
|
)
|
260
267
|
# Start ClientApp subprocess
|
261
|
-
|
262
|
-
|
263
|
-
|
264
|
-
|
265
|
-
|
266
|
-
|
267
|
-
|
268
|
-
|
269
|
-
|
270
|
-
|
271
|
-
|
272
|
-
proc.join()
|
268
|
+
command = [
|
269
|
+
"flwr-clientapp",
|
270
|
+
"--clientappio-api-address",
|
271
|
+
io_address,
|
272
|
+
"--token",
|
273
|
+
str(token),
|
274
|
+
"--parent-pid",
|
275
|
+
str(os.getpid()),
|
276
|
+
"--insecure",
|
277
|
+
]
|
278
|
+
subprocess.run(command, check=False)
|
273
279
|
else:
|
274
280
|
# Wait for output to become available
|
275
281
|
while not clientappio_servicer.has_outputs():
|
@@ -279,10 +285,7 @@ def start_client_internal(
|
|
279
285
|
reply_message, context = outputs.message, outputs.context
|
280
286
|
|
281
287
|
# Update node state
|
282
|
-
|
283
|
-
run_id=run_id,
|
284
|
-
context=context,
|
285
|
-
)
|
288
|
+
state.store_context(context)
|
286
289
|
|
287
290
|
# Send
|
288
291
|
send(reply_message)
|
@@ -416,33 +419,6 @@ def _make_fleet_connection_retry_invoker(
|
|
416
419
|
)
|
417
420
|
|
418
421
|
|
419
|
-
def _run_flwr_clientapp( # pylint: disable=R0917
|
420
|
-
main_pid: int,
|
421
|
-
clientappio_api_address: str,
|
422
|
-
run_once: bool,
|
423
|
-
token: Optional[int] = None,
|
424
|
-
flwr_dir: Optional[str] = None,
|
425
|
-
certificates: Optional[bytes] = None,
|
426
|
-
) -> None:
|
427
|
-
# Monitor the main process in case of SIGKILL
|
428
|
-
def main_process_monitor() -> None:
|
429
|
-
while True:
|
430
|
-
time.sleep(1)
|
431
|
-
if os.getppid() != main_pid:
|
432
|
-
os.kill(os.getpid(), 9)
|
433
|
-
|
434
|
-
threading.Thread(target=main_process_monitor, daemon=True).start()
|
435
|
-
|
436
|
-
# Run flwr-clientapp
|
437
|
-
run_clientapp(
|
438
|
-
clientappio_api_address=clientappio_api_address,
|
439
|
-
run_once=run_once,
|
440
|
-
token=token,
|
441
|
-
flwr_dir=flwr_dir,
|
442
|
-
certificates=certificates,
|
443
|
-
)
|
444
|
-
|
445
|
-
|
446
422
|
def run_clientappio_api_grpc(
|
447
423
|
address: str,
|
448
424
|
certificates: Optional[tuple[bytes, bytes, bytes]],
|
{flwr_nightly-1.19.0.dev20250603.dist-info → flwr_nightly-1.19.0.dev20250604.dist-info}/METADATA
RENAMED
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.3
|
2
2
|
Name: flwr-nightly
|
3
|
-
Version: 1.19.0.
|
3
|
+
Version: 1.19.0.dev20250604
|
4
4
|
Summary: Flower: A Friendly Federated AI Framework
|
5
5
|
License: Apache-2.0
|
6
6
|
Keywords: Artificial Intelligence,Federated AI,Federated Analytics,Federated Evaluation,Federated Learning,Flower,Machine Learning
|
{flwr_nightly-1.19.0.dev20250603.dist-info → flwr_nightly-1.19.0.dev20250604.dist-info}/RECORD
RENAMED
@@ -84,7 +84,7 @@ flwr/client/grpc_adapter_client/__init__.py,sha256=RQWP5mFPROLHKgombiRvPXVWSoVrQ
|
|
84
84
|
flwr/client/grpc_adapter_client/connection.py,sha256=aj5tTYyE8z2hQLXPPydsJiz8gBDIWLUhfWvqYkAL1L4,3966
|
85
85
|
flwr/client/grpc_rere_client/__init__.py,sha256=i7iS0Lt8B7q0E2L72e4F_YrKm6ClRKnd71PNA6PW2O0,752
|
86
86
|
flwr/client/grpc_rere_client/client_interceptor.py,sha256=zFaVHw6AxeNO-7eCKKb-RxrPa7zbM5Z-2-1Efc4adQY,2451
|
87
|
-
flwr/client/grpc_rere_client/connection.py,sha256=
|
87
|
+
flwr/client/grpc_rere_client/connection.py,sha256=kjnbUNLNz3fn-79fOQ-EDPIkzK8W0GckYIUkvWCXDD0,13283
|
88
88
|
flwr/client/grpc_rere_client/grpc_adapter.py,sha256=JvMZ7vCFTaTEo6AzKYh3zDmeQAU7VSjdysbC6t3ufWg,6351
|
89
89
|
flwr/client/message_handler/__init__.py,sha256=0lyljDVqre3WljiZbPcwCCf8GiIaSVI_yo_ylEyPwSE,719
|
90
90
|
flwr/client/message_handler/message_handler.py,sha256=X9SXX6et97Lw9_DGD93HKsEBGNjXClcFgc_5aLK0oiU,6541
|
@@ -123,7 +123,7 @@ flwr/common/exit_handlers.py,sha256=IaqJ60fXZuu7McaRYnoYKtlbH9t4Yl9goNExKqtmQbs,
|
|
123
123
|
flwr/common/grpc.py,sha256=manTaHaPiyYngUq1ErZvvV2B2GxlXUUUGRy3jc3TBIQ,9798
|
124
124
|
flwr/common/heartbeat.py,sha256=SyEpNDnmJ0lni0cWO67rcoJVKasCLmkNHm3dKLeNrLU,5749
|
125
125
|
flwr/common/inflatable.py,sha256=9yPsSFOfNM2OIb15JQ6-wY5kblwXiC5zNX-tsp2ZwW0,7017
|
126
|
-
flwr/common/inflatable_grpc_utils.py,sha256=
|
126
|
+
flwr/common/inflatable_grpc_utils.py,sha256=YGP8oJRfnkwvY6segWH1DUf_ljDIkku7-2zH66tv3HA,4337
|
127
127
|
flwr/common/logger.py,sha256=JbRf6E2vQxXzpDBq1T8IDUJo_usu3gjWEBPQ6uKcmdg,13049
|
128
128
|
flwr/common/message.py,sha256=HfSeqxwXgf90ilbMlM0vrF4cJWqJVx3jJ0gNmTfgdFw,19628
|
129
129
|
flwr/common/object_ref.py,sha256=p3SfTeqo3Aj16SkB-vsnNn01zswOPdGNBitcbRnqmUk,9134
|
@@ -236,7 +236,7 @@ flwr/server/criterion.py,sha256=G4e-6B48Pc7d5rmGVUpIzNKb6UF88O3VmTRuUltgjzM,1061
|
|
236
236
|
flwr/server/fleet_event_log_interceptor.py,sha256=AkL7Y5d3xm2vRhL3ahmEVVoOvAP7PA7dRgB-je4v-Ys,3774
|
237
237
|
flwr/server/grid/__init__.py,sha256=aWZHezoR2UGMJISB_gPMCm2N_2GSbm97A3lAp7ruhRQ,888
|
238
238
|
flwr/server/grid/grid.py,sha256=naGCYt5J6dnmUvrcGkdNyKPe3MBd-0awGm1ALmgahqY,6625
|
239
|
-
flwr/server/grid/grpc_grid.py,sha256=
|
239
|
+
flwr/server/grid/grpc_grid.py,sha256=MWESNIUbBp8ownNE1JvWW-xQ7Hb7AyxcorVsOZIkI18,12321
|
240
240
|
flwr/server/grid/inmemory_grid.py,sha256=RjejYT-d-hHuTs1KSs_5wvOdAWKLus8w5_UAcnGt4iw,6168
|
241
241
|
flwr/server/history.py,sha256=cCkFhBN4GoHsYYNk5GG1Y089eKJh2DH_ZJbYPwLaGyk,5026
|
242
242
|
flwr/server/run_serverapp.py,sha256=v0p6jXj2dFxlRUdoEeF1mnaFd9XRQi6dZCflPY6d3qI,2063
|
@@ -271,7 +271,7 @@ flwr/server/strategy/krum.py,sha256=9hjB-5l7lwo7Er2xRauYvNEKAv9KoPCin_TCdYJwQe4,
|
|
271
271
|
flwr/server/strategy/qfedavg.py,sha256=-siSzfuVX8GRkjmyvbj68fnjk02E3EYHl8Ory6v1QzI,10131
|
272
272
|
flwr/server/strategy/strategy.py,sha256=n4r52i5gK4KGToZvcJUeWuEif1tuI0HZUT3YJPTC7UE,7524
|
273
273
|
flwr/server/superlink/__init__.py,sha256=GNSuJ4-N6Z8wun2iZNlXqENt5beUyzC0Gi_tN396bbM,707
|
274
|
-
flwr/server/superlink/ffs/__init__.py,sha256=
|
274
|
+
flwr/server/superlink/ffs/__init__.py,sha256=U3KXwG_SplEvchat27K0LYPoPHzh-cwwT_NHsGlYMt8,908
|
275
275
|
flwr/server/superlink/ffs/disk_ffs.py,sha256=tkJiUa9cIq6Po-9UYMtFpI-GEyY5FMg4RcDKenaky74,3297
|
276
276
|
flwr/server/superlink/ffs/ffs.py,sha256=6w7wy71i7tbuJwqEgdeCa49JejXMEof3jujURN_R7Rg,2395
|
277
277
|
flwr/server/superlink/ffs/ffs_factory.py,sha256=pK-g3LMelvWTV6N9Cd-j-_-FdcGbRFTKNsWaqmlBDSk,1490
|
@@ -346,18 +346,18 @@ flwr/superlink/__init__.py,sha256=GNSuJ4-N6Z8wun2iZNlXqENt5beUyzC0Gi_tN396bbM,70
|
|
346
346
|
flwr/supernode/__init__.py,sha256=KgeCaVvXWrU3rptNR1y0oBp4YtXbAcrnCcJAiOoWkI4,707
|
347
347
|
flwr/supernode/cli/__init__.py,sha256=JuEMr0-s9zv-PEWKuLB9tj1ocNfroSyNJ-oyv7ati9A,887
|
348
348
|
flwr/supernode/cli/flower_supernode.py,sha256=ly2AQhbla2sufDaMsENaEALDEd0a4CS4D0eUrUOkHzY,8778
|
349
|
-
flwr/supernode/cli/flwr_clientapp.py,sha256=
|
349
|
+
flwr/supernode/cli/flwr_clientapp.py,sha256=KfVUO20ZMnUDSGZTJ9I1KkMawFsRV6kdRUmGIRNbg_8,2812
|
350
350
|
flwr/supernode/nodestate/__init__.py,sha256=CyLLObbmmVgfRO88UCM0VMait1dL57mUauUDfuSHsbU,976
|
351
351
|
flwr/supernode/nodestate/in_memory_nodestate.py,sha256=4ZiLA45fMi2bJgmfDNLtiv-gVNru95Bi48xBy7xtatA,5212
|
352
352
|
flwr/supernode/nodestate/nodestate.py,sha256=SgblnKtqzTHRiODwg4QUREw1-uYPQrLzoeTBlROHf_0,4571
|
353
353
|
flwr/supernode/nodestate/nodestate_factory.py,sha256=UYTDCcwK_baHUmkzkJDxL0UEqvtTfOMlQRrROMCd0Xo,1430
|
354
354
|
flwr/supernode/runtime/__init__.py,sha256=JQdqd2EMTn-ORMeTvewYYh52ls0YKP68jrps1qioxu4,718
|
355
|
-
flwr/supernode/runtime/run_clientapp.py,sha256=
|
355
|
+
flwr/supernode/runtime/run_clientapp.py,sha256=cvWSby7u31u97QapWHxJM-Wer6F1k6mbbD-d1gxwxZA,7962
|
356
356
|
flwr/supernode/servicer/__init__.py,sha256=lucTzre5WPK7G1YLCfaqg3rbFWdNSb7ZTt-ca8gxdEo,717
|
357
357
|
flwr/supernode/servicer/clientappio/__init__.py,sha256=vJyOjO2FXZ2URbnthmdsgs6948wbYfdq1L1V8Um-Lr8,895
|
358
358
|
flwr/supernode/servicer/clientappio/clientappio_servicer.py,sha256=LmzkxtNQBn5vVrHc0Bhq2WqaK6-LM2v4kfLBN0PiNNM,8522
|
359
|
-
flwr/supernode/start_client_internal.py,sha256=
|
360
|
-
flwr_nightly-1.19.0.
|
361
|
-
flwr_nightly-1.19.0.
|
362
|
-
flwr_nightly-1.19.0.
|
363
|
-
flwr_nightly-1.19.0.
|
359
|
+
flwr/supernode/start_client_internal.py,sha256=5CwTNV-XmIhwR1jv3G7aQAXGhf6OFWS6U-vmxY1iKGA,16984
|
360
|
+
flwr_nightly-1.19.0.dev20250604.dist-info/METADATA,sha256=xUqCj0YV0Yt1jAVWil6lHAaQkDUMpQqRIFL5tX2yUQo,15910
|
361
|
+
flwr_nightly-1.19.0.dev20250604.dist-info/WHEEL,sha256=b4K_helf-jlQoXBBETfwnf4B04YC67LOev0jo4fX5m8,88
|
362
|
+
flwr_nightly-1.19.0.dev20250604.dist-info/entry_points.txt,sha256=jNpDXGBGgs21RqUxelF_jwGaxtqFwm-MQyfz-ZqSjrA,367
|
363
|
+
flwr_nightly-1.19.0.dev20250604.dist-info/RECORD,,
|
{flwr_nightly-1.19.0.dev20250603.dist-info → flwr_nightly-1.19.0.dev20250604.dist-info}/WHEEL
RENAMED
File without changes
|
File without changes
|