flwr-nightly 1.13.0.dev20241106__py3-none-any.whl → 1.13.0.dev20241117__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of flwr-nightly might be problematic. Click here for more details.
- flwr/cli/app.py +2 -0
- flwr/cli/build.py +37 -0
- flwr/cli/install.py +5 -3
- flwr/cli/ls.py +228 -0
- flwr/cli/run/run.py +16 -5
- flwr/client/app.py +68 -19
- flwr/client/clientapp/app.py +51 -35
- flwr/client/grpc_rere_client/connection.py +2 -12
- flwr/client/nodestate/__init__.py +25 -0
- flwr/client/nodestate/in_memory_nodestate.py +38 -0
- flwr/client/nodestate/nodestate.py +30 -0
- flwr/client/nodestate/nodestate_factory.py +37 -0
- flwr/client/rest_client/connection.py +4 -14
- flwr/client/supernode/app.py +57 -53
- flwr/common/args.py +148 -0
- flwr/common/config.py +10 -0
- flwr/common/constant.py +21 -7
- flwr/common/date.py +18 -0
- flwr/common/logger.py +6 -2
- flwr/common/object_ref.py +47 -16
- flwr/common/serde.py +10 -0
- flwr/common/typing.py +32 -11
- flwr/proto/exec_pb2.py +23 -17
- flwr/proto/exec_pb2.pyi +50 -20
- flwr/proto/exec_pb2_grpc.py +34 -0
- flwr/proto/exec_pb2_grpc.pyi +13 -0
- flwr/proto/run_pb2.py +32 -27
- flwr/proto/run_pb2.pyi +44 -1
- flwr/proto/simulationio_pb2.py +2 -2
- flwr/proto/simulationio_pb2_grpc.py +34 -0
- flwr/proto/simulationio_pb2_grpc.pyi +13 -0
- flwr/server/app.py +83 -87
- flwr/server/driver/driver.py +1 -1
- flwr/server/driver/grpc_driver.py +6 -20
- flwr/server/driver/inmemory_driver.py +1 -3
- flwr/server/run_serverapp.py +8 -238
- flwr/server/serverapp/app.py +44 -89
- flwr/server/strategy/aggregate.py +4 -4
- flwr/server/superlink/fleet/rest_rere/rest_api.py +10 -9
- flwr/server/superlink/linkstate/in_memory_linkstate.py +76 -62
- flwr/server/superlink/linkstate/linkstate.py +24 -9
- flwr/server/superlink/linkstate/sqlite_linkstate.py +87 -128
- flwr/server/superlink/linkstate/utils.py +191 -32
- flwr/server/superlink/simulation/simulationio_servicer.py +22 -1
- flwr/simulation/__init__.py +3 -1
- flwr/simulation/app.py +245 -352
- flwr/simulation/legacy_app.py +402 -0
- flwr/simulation/run_simulation.py +8 -19
- flwr/simulation/simulationio_connection.py +2 -2
- flwr/superexec/deployment.py +13 -7
- flwr/superexec/exec_servicer.py +32 -3
- flwr/superexec/executor.py +4 -3
- flwr/superexec/simulation.py +52 -145
- {flwr_nightly-1.13.0.dev20241106.dist-info → flwr_nightly-1.13.0.dev20241117.dist-info}/METADATA +10 -7
- {flwr_nightly-1.13.0.dev20241106.dist-info → flwr_nightly-1.13.0.dev20241117.dist-info}/RECORD +58 -51
- {flwr_nightly-1.13.0.dev20241106.dist-info → flwr_nightly-1.13.0.dev20241117.dist-info}/entry_points.txt +1 -0
- {flwr_nightly-1.13.0.dev20241106.dist-info → flwr_nightly-1.13.0.dev20241117.dist-info}/LICENSE +0 -0
- {flwr_nightly-1.13.0.dev20241106.dist-info → flwr_nightly-1.13.0.dev20241117.dist-info}/WHEEL +0 -0
|
@@ -15,21 +15,23 @@
|
|
|
15
15
|
"""Utility functions for State."""
|
|
16
16
|
|
|
17
17
|
|
|
18
|
-
import time
|
|
19
18
|
from logging import ERROR
|
|
20
19
|
from os import urandom
|
|
21
|
-
from
|
|
20
|
+
from typing import Optional, Union
|
|
21
|
+
from uuid import UUID, uuid4
|
|
22
22
|
|
|
23
|
-
from flwr.common import ConfigsRecord, Context, log, serde
|
|
23
|
+
from flwr.common import ConfigsRecord, Context, log, now, serde
|
|
24
24
|
from flwr.common.constant import ErrorCode, Status, SubStatus
|
|
25
25
|
from flwr.common.typing import RunStatus
|
|
26
|
-
from flwr.proto.error_pb2 import Error # pylint: disable=E0611
|
|
27
|
-
from flwr.proto.message_pb2 import Context as ProtoContext # pylint: disable=E0611
|
|
28
|
-
from flwr.proto.node_pb2 import Node # pylint: disable=E0611
|
|
29
26
|
|
|
30
27
|
# pylint: disable=E0611
|
|
28
|
+
from flwr.proto.error_pb2 import Error
|
|
29
|
+
from flwr.proto.message_pb2 import Context as ProtoContext
|
|
30
|
+
from flwr.proto.node_pb2 import Node
|
|
31
31
|
from flwr.proto.recordset_pb2 import ConfigsRecord as ProtoConfigsRecord
|
|
32
|
-
from flwr.proto.task_pb2 import Task, TaskIns, TaskRes
|
|
32
|
+
from flwr.proto.task_pb2 import Task, TaskIns, TaskRes
|
|
33
|
+
|
|
34
|
+
# pylint: enable=E0611
|
|
33
35
|
|
|
34
36
|
NODE_UNAVAILABLE_ERROR_REASON = (
|
|
35
37
|
"Error: Node Unavailable - The destination node is currently unavailable. "
|
|
@@ -40,12 +42,22 @@ VALID_RUN_STATUS_TRANSITIONS = {
|
|
|
40
42
|
(Status.PENDING, Status.STARTING),
|
|
41
43
|
(Status.STARTING, Status.RUNNING),
|
|
42
44
|
(Status.RUNNING, Status.FINISHED),
|
|
45
|
+
# Any non-FINISHED status can transition to FINISHED
|
|
46
|
+
(Status.PENDING, Status.FINISHED),
|
|
47
|
+
(Status.STARTING, Status.FINISHED),
|
|
43
48
|
}
|
|
44
49
|
VALID_RUN_SUB_STATUSES = {
|
|
45
50
|
SubStatus.COMPLETED,
|
|
46
51
|
SubStatus.FAILED,
|
|
47
52
|
SubStatus.STOPPED,
|
|
48
53
|
}
|
|
54
|
+
MESSAGE_UNAVAILABLE_ERROR_REASON = (
|
|
55
|
+
"Error: Message Unavailable - The requested message could not be found in the "
|
|
56
|
+
"database. It may have expired due to its TTL or never existed."
|
|
57
|
+
)
|
|
58
|
+
REPLY_MESSAGE_UNAVAILABLE_ERROR_REASON = (
|
|
59
|
+
"Error: Reply Message Unavailable - The reply message has expired."
|
|
60
|
+
)
|
|
49
61
|
|
|
50
62
|
|
|
51
63
|
def generate_rand_int_from_bytes(num_bytes: int) -> int:
|
|
@@ -161,31 +173,6 @@ def configsrecord_from_bytes(configsrecord_bytes: bytes) -> ConfigsRecord:
|
|
|
161
173
|
)
|
|
162
174
|
|
|
163
175
|
|
|
164
|
-
def make_node_unavailable_taskres(ref_taskins: TaskIns) -> TaskRes:
|
|
165
|
-
"""Generate a TaskRes with a node unavailable error from a TaskIns."""
|
|
166
|
-
current_time = time.time()
|
|
167
|
-
ttl = ref_taskins.task.ttl - (current_time - ref_taskins.task.created_at)
|
|
168
|
-
if ttl < 0:
|
|
169
|
-
log(ERROR, "Creating TaskRes for TaskIns that exceeds its TTL.")
|
|
170
|
-
ttl = 0
|
|
171
|
-
return TaskRes(
|
|
172
|
-
task_id=str(uuid4()),
|
|
173
|
-
group_id=ref_taskins.group_id,
|
|
174
|
-
run_id=ref_taskins.run_id,
|
|
175
|
-
task=Task(
|
|
176
|
-
producer=Node(node_id=ref_taskins.task.consumer.node_id, anonymous=False),
|
|
177
|
-
consumer=Node(node_id=ref_taskins.task.producer.node_id, anonymous=False),
|
|
178
|
-
created_at=current_time,
|
|
179
|
-
ttl=ttl,
|
|
180
|
-
ancestry=[ref_taskins.task_id],
|
|
181
|
-
task_type=ref_taskins.task.task_type,
|
|
182
|
-
error=Error(
|
|
183
|
-
code=ErrorCode.NODE_UNAVAILABLE, reason=NODE_UNAVAILABLE_ERROR_REASON
|
|
184
|
-
),
|
|
185
|
-
),
|
|
186
|
-
)
|
|
187
|
-
|
|
188
|
-
|
|
189
176
|
def is_valid_transition(current_status: RunStatus, new_status: RunStatus) -> bool:
|
|
190
177
|
"""Check if a transition between two run statuses is valid.
|
|
191
178
|
|
|
@@ -201,6 +188,14 @@ def is_valid_transition(current_status: RunStatus, new_status: RunStatus) -> boo
|
|
|
201
188
|
bool
|
|
202
189
|
True if the transition is valid, False otherwise.
|
|
203
190
|
"""
|
|
191
|
+
# Transition to FINISHED from a non-RUNNING status is only allowed
|
|
192
|
+
# if the sub-status is not COMPLETED
|
|
193
|
+
if (
|
|
194
|
+
current_status.status in [Status.PENDING, Status.STARTING]
|
|
195
|
+
and new_status.status == Status.FINISHED
|
|
196
|
+
):
|
|
197
|
+
return new_status.sub_status != SubStatus.COMPLETED
|
|
198
|
+
|
|
204
199
|
return (
|
|
205
200
|
current_status.status,
|
|
206
201
|
new_status.status,
|
|
@@ -228,3 +223,167 @@ def has_valid_sub_status(status: RunStatus) -> bool:
|
|
|
228
223
|
if status.status == Status.FINISHED:
|
|
229
224
|
return status.sub_status in VALID_RUN_SUB_STATUSES
|
|
230
225
|
return status.sub_status == ""
|
|
226
|
+
|
|
227
|
+
|
|
228
|
+
def create_taskres_for_unavailable_taskins(taskins_id: Union[str, UUID]) -> TaskRes:
|
|
229
|
+
"""Generate a TaskRes with a TaskIns unavailable error.
|
|
230
|
+
|
|
231
|
+
Parameters
|
|
232
|
+
----------
|
|
233
|
+
taskins_id : Union[str, UUID]
|
|
234
|
+
The ID of the unavailable TaskIns.
|
|
235
|
+
|
|
236
|
+
Returns
|
|
237
|
+
-------
|
|
238
|
+
TaskRes
|
|
239
|
+
A TaskRes with an error code MESSAGE_UNAVAILABLE to indicate that the
|
|
240
|
+
inquired TaskIns ID cannot be found (due to non-existence or expiration).
|
|
241
|
+
"""
|
|
242
|
+
current_time = now().timestamp()
|
|
243
|
+
return TaskRes(
|
|
244
|
+
task_id=str(uuid4()),
|
|
245
|
+
group_id="", # Unknown group ID
|
|
246
|
+
run_id=0, # Unknown run ID
|
|
247
|
+
task=Task(
|
|
248
|
+
# This function is only called by SuperLink, and thus it's the producer.
|
|
249
|
+
producer=Node(node_id=0, anonymous=False),
|
|
250
|
+
consumer=Node(node_id=0, anonymous=False),
|
|
251
|
+
created_at=current_time,
|
|
252
|
+
ttl=0,
|
|
253
|
+
ancestry=[str(taskins_id)],
|
|
254
|
+
task_type="", # Unknown message type
|
|
255
|
+
error=Error(
|
|
256
|
+
code=ErrorCode.MESSAGE_UNAVAILABLE,
|
|
257
|
+
reason=MESSAGE_UNAVAILABLE_ERROR_REASON,
|
|
258
|
+
),
|
|
259
|
+
),
|
|
260
|
+
)
|
|
261
|
+
|
|
262
|
+
|
|
263
|
+
def create_taskres_for_unavailable_taskres(ref_taskins: TaskIns) -> TaskRes:
|
|
264
|
+
"""Generate a TaskRes with a reply message unavailable error from a TaskIns.
|
|
265
|
+
|
|
266
|
+
Parameters
|
|
267
|
+
----------
|
|
268
|
+
ref_taskins : TaskIns
|
|
269
|
+
The reference TaskIns object.
|
|
270
|
+
|
|
271
|
+
Returns
|
|
272
|
+
-------
|
|
273
|
+
TaskRes
|
|
274
|
+
The generated TaskRes with an error code REPLY_MESSAGE_UNAVAILABLE_ERROR_REASON,
|
|
275
|
+
indicating that the original TaskRes has expired.
|
|
276
|
+
"""
|
|
277
|
+
current_time = now().timestamp()
|
|
278
|
+
ttl = ref_taskins.task.ttl - (current_time - ref_taskins.task.created_at)
|
|
279
|
+
if ttl < 0:
|
|
280
|
+
log(ERROR, "Creating TaskRes for TaskIns that exceeds its TTL.")
|
|
281
|
+
ttl = 0
|
|
282
|
+
return TaskRes(
|
|
283
|
+
task_id=str(uuid4()),
|
|
284
|
+
group_id=ref_taskins.group_id,
|
|
285
|
+
run_id=ref_taskins.run_id,
|
|
286
|
+
task=Task(
|
|
287
|
+
# This function is only called by SuperLink, and thus it's the producer.
|
|
288
|
+
producer=Node(node_id=0, anonymous=False),
|
|
289
|
+
consumer=Node(node_id=0, anonymous=False),
|
|
290
|
+
created_at=current_time,
|
|
291
|
+
ttl=ttl,
|
|
292
|
+
ancestry=[ref_taskins.task_id],
|
|
293
|
+
task_type=ref_taskins.task.task_type,
|
|
294
|
+
error=Error(
|
|
295
|
+
code=ErrorCode.REPLY_MESSAGE_UNAVAILABLE,
|
|
296
|
+
reason=REPLY_MESSAGE_UNAVAILABLE_ERROR_REASON,
|
|
297
|
+
),
|
|
298
|
+
),
|
|
299
|
+
)
|
|
300
|
+
|
|
301
|
+
|
|
302
|
+
def has_expired(task_ins_or_res: Union[TaskIns, TaskRes], current_time: float) -> bool:
|
|
303
|
+
"""Check if the TaskIns/TaskRes has expired."""
|
|
304
|
+
return task_ins_or_res.task.ttl + task_ins_or_res.task.created_at < current_time
|
|
305
|
+
|
|
306
|
+
|
|
307
|
+
def verify_taskins_ids(
|
|
308
|
+
inquired_taskins_ids: set[UUID],
|
|
309
|
+
found_taskins_dict: dict[UUID, TaskIns],
|
|
310
|
+
current_time: Optional[float] = None,
|
|
311
|
+
update_set: bool = True,
|
|
312
|
+
) -> dict[UUID, TaskRes]:
|
|
313
|
+
"""Verify found TaskIns and generate error TaskRes for invalid ones.
|
|
314
|
+
|
|
315
|
+
Parameters
|
|
316
|
+
----------
|
|
317
|
+
inquired_taskins_ids : set[UUID]
|
|
318
|
+
Set of TaskIns IDs for which to generate error TaskRes if invalid.
|
|
319
|
+
found_taskins_dict : dict[UUID, TaskIns]
|
|
320
|
+
Dictionary containing all found TaskIns indexed by their IDs.
|
|
321
|
+
current_time : Optional[float] (default: None)
|
|
322
|
+
The current time to check for expiration. If set to `None`, the current time
|
|
323
|
+
will automatically be set to the current timestamp using `now().timestamp()`.
|
|
324
|
+
update_set : bool (default: True)
|
|
325
|
+
If True, the `inquired_taskins_ids` will be updated to remove invalid ones,
|
|
326
|
+
by default True.
|
|
327
|
+
|
|
328
|
+
Returns
|
|
329
|
+
-------
|
|
330
|
+
dict[UUID, TaskRes]
|
|
331
|
+
A dictionary of error TaskRes indexed by the corresponding TaskIns ID.
|
|
332
|
+
"""
|
|
333
|
+
ret_dict = {}
|
|
334
|
+
current = current_time if current_time else now().timestamp()
|
|
335
|
+
for taskins_id in list(inquired_taskins_ids):
|
|
336
|
+
# Generate error TaskRes if the task_ins doesn't exist or has expired
|
|
337
|
+
taskins = found_taskins_dict.get(taskins_id)
|
|
338
|
+
if taskins is None or has_expired(taskins, current):
|
|
339
|
+
if update_set:
|
|
340
|
+
inquired_taskins_ids.remove(taskins_id)
|
|
341
|
+
taskres = create_taskres_for_unavailable_taskins(taskins_id)
|
|
342
|
+
ret_dict[taskins_id] = taskres
|
|
343
|
+
return ret_dict
|
|
344
|
+
|
|
345
|
+
|
|
346
|
+
def verify_found_taskres(
|
|
347
|
+
inquired_taskins_ids: set[UUID],
|
|
348
|
+
found_taskins_dict: dict[UUID, TaskIns],
|
|
349
|
+
found_taskres_list: list[TaskRes],
|
|
350
|
+
current_time: Optional[float] = None,
|
|
351
|
+
update_set: bool = True,
|
|
352
|
+
) -> dict[UUID, TaskRes]:
|
|
353
|
+
"""Verify found TaskRes and generate error TaskRes for invalid ones.
|
|
354
|
+
|
|
355
|
+
Parameters
|
|
356
|
+
----------
|
|
357
|
+
inquired_taskins_ids : set[UUID]
|
|
358
|
+
Set of TaskIns IDs for which to generate error TaskRes if invalid.
|
|
359
|
+
found_taskins_dict : dict[UUID, TaskIns]
|
|
360
|
+
Dictionary containing all found TaskIns indexed by their IDs.
|
|
361
|
+
found_taskres_list : dict[TaskIns, TaskRes]
|
|
362
|
+
List of found TaskRes to be verified.
|
|
363
|
+
current_time : Optional[float] (default: None)
|
|
364
|
+
The current time to check for expiration. If set to `None`, the current time
|
|
365
|
+
will automatically be set to the current timestamp using `now().timestamp()`.
|
|
366
|
+
update_set : bool (default: True)
|
|
367
|
+
If True, the `inquired_taskins_ids` will be updated to remove ones
|
|
368
|
+
that have a TaskRes, by default True.
|
|
369
|
+
|
|
370
|
+
Returns
|
|
371
|
+
-------
|
|
372
|
+
dict[UUID, TaskRes]
|
|
373
|
+
A dictionary of TaskRes indexed by the corresponding TaskIns ID.
|
|
374
|
+
"""
|
|
375
|
+
ret_dict: dict[UUID, TaskRes] = {}
|
|
376
|
+
current = current_time if current_time else now().timestamp()
|
|
377
|
+
for taskres in found_taskres_list:
|
|
378
|
+
taskins_id = UUID(taskres.task.ancestry[0])
|
|
379
|
+
if update_set:
|
|
380
|
+
inquired_taskins_ids.remove(taskins_id)
|
|
381
|
+
# Check if the TaskRes has expired
|
|
382
|
+
if has_expired(taskres, current):
|
|
383
|
+
# No need to insert the error TaskRes
|
|
384
|
+
taskres = create_taskres_for_unavailable_taskres(
|
|
385
|
+
found_taskins_dict[taskins_id]
|
|
386
|
+
)
|
|
387
|
+
taskres.task.delivered_at = now().isoformat()
|
|
388
|
+
ret_dict[taskins_id] = taskres
|
|
389
|
+
return ret_dict
|
|
@@ -23,6 +23,7 @@ from grpc import ServicerContext
|
|
|
23
23
|
from flwr.common.constant import Status
|
|
24
24
|
from flwr.common.logger import log
|
|
25
25
|
from flwr.common.serde import (
|
|
26
|
+
configs_record_to_proto,
|
|
26
27
|
context_from_proto,
|
|
27
28
|
context_to_proto,
|
|
28
29
|
fab_to_proto,
|
|
@@ -36,6 +37,8 @@ from flwr.proto.log_pb2 import ( # pylint: disable=E0611
|
|
|
36
37
|
PushLogsResponse,
|
|
37
38
|
)
|
|
38
39
|
from flwr.proto.run_pb2 import ( # pylint: disable=E0611
|
|
40
|
+
GetFederationOptionsRequest,
|
|
41
|
+
GetFederationOptionsResponse,
|
|
39
42
|
UpdateRunStatusRequest,
|
|
40
43
|
UpdateRunStatusResponse,
|
|
41
44
|
)
|
|
@@ -123,10 +126,28 @@ class SimulationIoServicer(simulationio_pb2_grpc.SimulationIoServicer):
|
|
|
123
126
|
self, request: PushLogsRequest, context: grpc.ServicerContext
|
|
124
127
|
) -> PushLogsResponse:
|
|
125
128
|
"""Push logs."""
|
|
126
|
-
log(DEBUG, "
|
|
129
|
+
log(DEBUG, "SimultionIoServicer.PushLogs")
|
|
127
130
|
state = self.state_factory.state()
|
|
128
131
|
|
|
129
132
|
# Add logs to LinkState
|
|
130
133
|
merged_logs = "".join(request.logs)
|
|
131
134
|
state.add_serverapp_log(request.run_id, merged_logs)
|
|
132
135
|
return PushLogsResponse()
|
|
136
|
+
|
|
137
|
+
def GetFederationOptions(
|
|
138
|
+
self, request: GetFederationOptionsRequest, context: ServicerContext
|
|
139
|
+
) -> GetFederationOptionsResponse:
|
|
140
|
+
"""Get Federation Options associated with a run."""
|
|
141
|
+
log(DEBUG, "SimultionIoServicer.GetFederationOptions")
|
|
142
|
+
state = self.state_factory.state()
|
|
143
|
+
|
|
144
|
+
federation_options = state.get_federation_options(request.run_id)
|
|
145
|
+
if federation_options is None:
|
|
146
|
+
context.abort(
|
|
147
|
+
grpc.StatusCode.FAILED_PRECONDITION,
|
|
148
|
+
"Expected federation options to be set, but none available.",
|
|
149
|
+
)
|
|
150
|
+
return GetFederationOptionsResponse()
|
|
151
|
+
return GetFederationOptionsResponse(
|
|
152
|
+
federation_options=configs_record_to_proto(federation_options)
|
|
153
|
+
)
|
flwr/simulation/__init__.py
CHANGED
|
@@ -17,13 +17,14 @@
|
|
|
17
17
|
|
|
18
18
|
import importlib
|
|
19
19
|
|
|
20
|
+
from flwr.simulation.app import run_simulation_process
|
|
20
21
|
from flwr.simulation.run_simulation import run_simulation
|
|
21
22
|
from flwr.simulation.simulationio_connection import SimulationIoConnection
|
|
22
23
|
|
|
23
24
|
is_ray_installed = importlib.util.find_spec("ray") is not None
|
|
24
25
|
|
|
25
26
|
if is_ray_installed:
|
|
26
|
-
from flwr.simulation.
|
|
27
|
+
from flwr.simulation.legacy_app import start_simulation
|
|
27
28
|
else:
|
|
28
29
|
RAY_IMPORT_ERROR: str = """Unable to import module `ray`.
|
|
29
30
|
|
|
@@ -40,5 +41,6 @@ To install the necessary dependencies, install `flwr` with the `simulation` extr
|
|
|
40
41
|
__all__ = [
|
|
41
42
|
"SimulationIoConnection",
|
|
42
43
|
"run_simulation",
|
|
44
|
+
"run_simulation_process",
|
|
43
45
|
"start_simulation",
|
|
44
46
|
]
|