flwr-nightly 1.13.0.dev20241111__py3-none-any.whl → 1.13.0.dev20241117__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of flwr-nightly might be problematic. Click here for more details.
- flwr/cli/app.py +2 -0
- flwr/cli/build.py +37 -0
- flwr/cli/install.py +5 -3
- flwr/cli/ls.py +228 -0
- flwr/client/app.py +58 -13
- flwr/client/clientapp/app.py +34 -23
- flwr/client/grpc_rere_client/connection.py +2 -12
- flwr/client/rest_client/connection.py +4 -14
- flwr/client/supernode/app.py +57 -53
- flwr/common/args.py +72 -7
- flwr/common/constant.py +21 -6
- flwr/common/date.py +18 -0
- flwr/common/serde.py +10 -0
- flwr/common/typing.py +31 -10
- flwr/proto/exec_pb2.py +22 -13
- flwr/proto/exec_pb2.pyi +44 -0
- flwr/proto/exec_pb2_grpc.py +34 -0
- flwr/proto/exec_pb2_grpc.pyi +13 -0
- flwr/proto/run_pb2.py +30 -30
- flwr/proto/run_pb2.pyi +18 -1
- flwr/server/app.py +39 -68
- flwr/server/driver/grpc_driver.py +4 -14
- flwr/server/run_serverapp.py +8 -238
- flwr/server/serverapp/app.py +34 -23
- flwr/server/superlink/fleet/rest_rere/rest_api.py +10 -9
- flwr/server/superlink/linkstate/in_memory_linkstate.py +71 -46
- flwr/server/superlink/linkstate/linkstate.py +19 -5
- flwr/server/superlink/linkstate/sqlite_linkstate.py +81 -113
- flwr/server/superlink/linkstate/utils.py +193 -3
- flwr/simulation/app.py +6 -41
- flwr/simulation/legacy_app.py +21 -1
- flwr/simulation/run_simulation.py +7 -18
- flwr/simulation/simulationio_connection.py +2 -2
- flwr/superexec/deployment.py +12 -6
- flwr/superexec/exec_servicer.py +31 -2
- flwr/superexec/simulation.py +11 -46
- {flwr_nightly-1.13.0.dev20241111.dist-info → flwr_nightly-1.13.0.dev20241117.dist-info}/METADATA +6 -4
- {flwr_nightly-1.13.0.dev20241111.dist-info → flwr_nightly-1.13.0.dev20241117.dist-info}/RECORD +41 -40
- {flwr_nightly-1.13.0.dev20241111.dist-info → flwr_nightly-1.13.0.dev20241117.dist-info}/LICENSE +0 -0
- {flwr_nightly-1.13.0.dev20241111.dist-info → flwr_nightly-1.13.0.dev20241117.dist-info}/WHEEL +0 -0
- {flwr_nightly-1.13.0.dev20241111.dist-info → flwr_nightly-1.13.0.dev20241117.dist-info}/entry_points.txt +0 -0
|
@@ -15,15 +15,23 @@
|
|
|
15
15
|
"""Utility functions for State."""
|
|
16
16
|
|
|
17
17
|
|
|
18
|
+
from logging import ERROR
|
|
18
19
|
from os import urandom
|
|
20
|
+
from typing import Optional, Union
|
|
21
|
+
from uuid import UUID, uuid4
|
|
19
22
|
|
|
20
|
-
from flwr.common import ConfigsRecord, Context, serde
|
|
21
|
-
from flwr.common.constant import Status, SubStatus
|
|
23
|
+
from flwr.common import ConfigsRecord, Context, log, now, serde
|
|
24
|
+
from flwr.common.constant import ErrorCode, Status, SubStatus
|
|
22
25
|
from flwr.common.typing import RunStatus
|
|
23
|
-
from flwr.proto.message_pb2 import Context as ProtoContext # pylint: disable=E0611
|
|
24
26
|
|
|
25
27
|
# pylint: disable=E0611
|
|
28
|
+
from flwr.proto.error_pb2 import Error
|
|
29
|
+
from flwr.proto.message_pb2 import Context as ProtoContext
|
|
30
|
+
from flwr.proto.node_pb2 import Node
|
|
26
31
|
from flwr.proto.recordset_pb2 import ConfigsRecord as ProtoConfigsRecord
|
|
32
|
+
from flwr.proto.task_pb2 import Task, TaskIns, TaskRes
|
|
33
|
+
|
|
34
|
+
# pylint: enable=E0611
|
|
27
35
|
|
|
28
36
|
NODE_UNAVAILABLE_ERROR_REASON = (
|
|
29
37
|
"Error: Node Unavailable - The destination node is currently unavailable. "
|
|
@@ -34,12 +42,22 @@ VALID_RUN_STATUS_TRANSITIONS = {
|
|
|
34
42
|
(Status.PENDING, Status.STARTING),
|
|
35
43
|
(Status.STARTING, Status.RUNNING),
|
|
36
44
|
(Status.RUNNING, Status.FINISHED),
|
|
45
|
+
# Any non-FINISHED status can transition to FINISHED
|
|
46
|
+
(Status.PENDING, Status.FINISHED),
|
|
47
|
+
(Status.STARTING, Status.FINISHED),
|
|
37
48
|
}
|
|
38
49
|
VALID_RUN_SUB_STATUSES = {
|
|
39
50
|
SubStatus.COMPLETED,
|
|
40
51
|
SubStatus.FAILED,
|
|
41
52
|
SubStatus.STOPPED,
|
|
42
53
|
}
|
|
54
|
+
MESSAGE_UNAVAILABLE_ERROR_REASON = (
|
|
55
|
+
"Error: Message Unavailable - The requested message could not be found in the "
|
|
56
|
+
"database. It may have expired due to its TTL or never existed."
|
|
57
|
+
)
|
|
58
|
+
REPLY_MESSAGE_UNAVAILABLE_ERROR_REASON = (
|
|
59
|
+
"Error: Reply Message Unavailable - The reply message has expired."
|
|
60
|
+
)
|
|
43
61
|
|
|
44
62
|
|
|
45
63
|
def generate_rand_int_from_bytes(num_bytes: int) -> int:
|
|
@@ -170,6 +188,14 @@ def is_valid_transition(current_status: RunStatus, new_status: RunStatus) -> boo
|
|
|
170
188
|
bool
|
|
171
189
|
True if the transition is valid, False otherwise.
|
|
172
190
|
"""
|
|
191
|
+
# Transition to FINISHED from a non-RUNNING status is only allowed
|
|
192
|
+
# if the sub-status is not COMPLETED
|
|
193
|
+
if (
|
|
194
|
+
current_status.status in [Status.PENDING, Status.STARTING]
|
|
195
|
+
and new_status.status == Status.FINISHED
|
|
196
|
+
):
|
|
197
|
+
return new_status.sub_status != SubStatus.COMPLETED
|
|
198
|
+
|
|
173
199
|
return (
|
|
174
200
|
current_status.status,
|
|
175
201
|
new_status.status,
|
|
@@ -197,3 +223,167 @@ def has_valid_sub_status(status: RunStatus) -> bool:
|
|
|
197
223
|
if status.status == Status.FINISHED:
|
|
198
224
|
return status.sub_status in VALID_RUN_SUB_STATUSES
|
|
199
225
|
return status.sub_status == ""
|
|
226
|
+
|
|
227
|
+
|
|
228
|
+
def create_taskres_for_unavailable_taskins(taskins_id: Union[str, UUID]) -> TaskRes:
|
|
229
|
+
"""Generate a TaskRes with a TaskIns unavailable error.
|
|
230
|
+
|
|
231
|
+
Parameters
|
|
232
|
+
----------
|
|
233
|
+
taskins_id : Union[str, UUID]
|
|
234
|
+
The ID of the unavailable TaskIns.
|
|
235
|
+
|
|
236
|
+
Returns
|
|
237
|
+
-------
|
|
238
|
+
TaskRes
|
|
239
|
+
A TaskRes with an error code MESSAGE_UNAVAILABLE to indicate that the
|
|
240
|
+
inquired TaskIns ID cannot be found (due to non-existence or expiration).
|
|
241
|
+
"""
|
|
242
|
+
current_time = now().timestamp()
|
|
243
|
+
return TaskRes(
|
|
244
|
+
task_id=str(uuid4()),
|
|
245
|
+
group_id="", # Unknown group ID
|
|
246
|
+
run_id=0, # Unknown run ID
|
|
247
|
+
task=Task(
|
|
248
|
+
# This function is only called by SuperLink, and thus it's the producer.
|
|
249
|
+
producer=Node(node_id=0, anonymous=False),
|
|
250
|
+
consumer=Node(node_id=0, anonymous=False),
|
|
251
|
+
created_at=current_time,
|
|
252
|
+
ttl=0,
|
|
253
|
+
ancestry=[str(taskins_id)],
|
|
254
|
+
task_type="", # Unknown message type
|
|
255
|
+
error=Error(
|
|
256
|
+
code=ErrorCode.MESSAGE_UNAVAILABLE,
|
|
257
|
+
reason=MESSAGE_UNAVAILABLE_ERROR_REASON,
|
|
258
|
+
),
|
|
259
|
+
),
|
|
260
|
+
)
|
|
261
|
+
|
|
262
|
+
|
|
263
|
+
def create_taskres_for_unavailable_taskres(ref_taskins: TaskIns) -> TaskRes:
|
|
264
|
+
"""Generate a TaskRes with a reply message unavailable error from a TaskIns.
|
|
265
|
+
|
|
266
|
+
Parameters
|
|
267
|
+
----------
|
|
268
|
+
ref_taskins : TaskIns
|
|
269
|
+
The reference TaskIns object.
|
|
270
|
+
|
|
271
|
+
Returns
|
|
272
|
+
-------
|
|
273
|
+
TaskRes
|
|
274
|
+
The generated TaskRes with an error code REPLY_MESSAGE_UNAVAILABLE_ERROR_REASON,
|
|
275
|
+
indicating that the original TaskRes has expired.
|
|
276
|
+
"""
|
|
277
|
+
current_time = now().timestamp()
|
|
278
|
+
ttl = ref_taskins.task.ttl - (current_time - ref_taskins.task.created_at)
|
|
279
|
+
if ttl < 0:
|
|
280
|
+
log(ERROR, "Creating TaskRes for TaskIns that exceeds its TTL.")
|
|
281
|
+
ttl = 0
|
|
282
|
+
return TaskRes(
|
|
283
|
+
task_id=str(uuid4()),
|
|
284
|
+
group_id=ref_taskins.group_id,
|
|
285
|
+
run_id=ref_taskins.run_id,
|
|
286
|
+
task=Task(
|
|
287
|
+
# This function is only called by SuperLink, and thus it's the producer.
|
|
288
|
+
producer=Node(node_id=0, anonymous=False),
|
|
289
|
+
consumer=Node(node_id=0, anonymous=False),
|
|
290
|
+
created_at=current_time,
|
|
291
|
+
ttl=ttl,
|
|
292
|
+
ancestry=[ref_taskins.task_id],
|
|
293
|
+
task_type=ref_taskins.task.task_type,
|
|
294
|
+
error=Error(
|
|
295
|
+
code=ErrorCode.REPLY_MESSAGE_UNAVAILABLE,
|
|
296
|
+
reason=REPLY_MESSAGE_UNAVAILABLE_ERROR_REASON,
|
|
297
|
+
),
|
|
298
|
+
),
|
|
299
|
+
)
|
|
300
|
+
|
|
301
|
+
|
|
302
|
+
def has_expired(task_ins_or_res: Union[TaskIns, TaskRes], current_time: float) -> bool:
|
|
303
|
+
"""Check if the TaskIns/TaskRes has expired."""
|
|
304
|
+
return task_ins_or_res.task.ttl + task_ins_or_res.task.created_at < current_time
|
|
305
|
+
|
|
306
|
+
|
|
307
|
+
def verify_taskins_ids(
|
|
308
|
+
inquired_taskins_ids: set[UUID],
|
|
309
|
+
found_taskins_dict: dict[UUID, TaskIns],
|
|
310
|
+
current_time: Optional[float] = None,
|
|
311
|
+
update_set: bool = True,
|
|
312
|
+
) -> dict[UUID, TaskRes]:
|
|
313
|
+
"""Verify found TaskIns and generate error TaskRes for invalid ones.
|
|
314
|
+
|
|
315
|
+
Parameters
|
|
316
|
+
----------
|
|
317
|
+
inquired_taskins_ids : set[UUID]
|
|
318
|
+
Set of TaskIns IDs for which to generate error TaskRes if invalid.
|
|
319
|
+
found_taskins_dict : dict[UUID, TaskIns]
|
|
320
|
+
Dictionary containing all found TaskIns indexed by their IDs.
|
|
321
|
+
current_time : Optional[float] (default: None)
|
|
322
|
+
The current time to check for expiration. If set to `None`, the current time
|
|
323
|
+
will automatically be set to the current timestamp using `now().timestamp()`.
|
|
324
|
+
update_set : bool (default: True)
|
|
325
|
+
If True, the `inquired_taskins_ids` will be updated to remove invalid ones,
|
|
326
|
+
by default True.
|
|
327
|
+
|
|
328
|
+
Returns
|
|
329
|
+
-------
|
|
330
|
+
dict[UUID, TaskRes]
|
|
331
|
+
A dictionary of error TaskRes indexed by the corresponding TaskIns ID.
|
|
332
|
+
"""
|
|
333
|
+
ret_dict = {}
|
|
334
|
+
current = current_time if current_time else now().timestamp()
|
|
335
|
+
for taskins_id in list(inquired_taskins_ids):
|
|
336
|
+
# Generate error TaskRes if the task_ins doesn't exist or has expired
|
|
337
|
+
taskins = found_taskins_dict.get(taskins_id)
|
|
338
|
+
if taskins is None or has_expired(taskins, current):
|
|
339
|
+
if update_set:
|
|
340
|
+
inquired_taskins_ids.remove(taskins_id)
|
|
341
|
+
taskres = create_taskres_for_unavailable_taskins(taskins_id)
|
|
342
|
+
ret_dict[taskins_id] = taskres
|
|
343
|
+
return ret_dict
|
|
344
|
+
|
|
345
|
+
|
|
346
|
+
def verify_found_taskres(
|
|
347
|
+
inquired_taskins_ids: set[UUID],
|
|
348
|
+
found_taskins_dict: dict[UUID, TaskIns],
|
|
349
|
+
found_taskres_list: list[TaskRes],
|
|
350
|
+
current_time: Optional[float] = None,
|
|
351
|
+
update_set: bool = True,
|
|
352
|
+
) -> dict[UUID, TaskRes]:
|
|
353
|
+
"""Verify found TaskRes and generate error TaskRes for invalid ones.
|
|
354
|
+
|
|
355
|
+
Parameters
|
|
356
|
+
----------
|
|
357
|
+
inquired_taskins_ids : set[UUID]
|
|
358
|
+
Set of TaskIns IDs for which to generate error TaskRes if invalid.
|
|
359
|
+
found_taskins_dict : dict[UUID, TaskIns]
|
|
360
|
+
Dictionary containing all found TaskIns indexed by their IDs.
|
|
361
|
+
found_taskres_list : dict[TaskIns, TaskRes]
|
|
362
|
+
List of found TaskRes to be verified.
|
|
363
|
+
current_time : Optional[float] (default: None)
|
|
364
|
+
The current time to check for expiration. If set to `None`, the current time
|
|
365
|
+
will automatically be set to the current timestamp using `now().timestamp()`.
|
|
366
|
+
update_set : bool (default: True)
|
|
367
|
+
If True, the `inquired_taskins_ids` will be updated to remove ones
|
|
368
|
+
that have a TaskRes, by default True.
|
|
369
|
+
|
|
370
|
+
Returns
|
|
371
|
+
-------
|
|
372
|
+
dict[UUID, TaskRes]
|
|
373
|
+
A dictionary of TaskRes indexed by the corresponding TaskIns ID.
|
|
374
|
+
"""
|
|
375
|
+
ret_dict: dict[UUID, TaskRes] = {}
|
|
376
|
+
current = current_time if current_time else now().timestamp()
|
|
377
|
+
for taskres in found_taskres_list:
|
|
378
|
+
taskins_id = UUID(taskres.task.ancestry[0])
|
|
379
|
+
if update_set:
|
|
380
|
+
inquired_taskins_ids.remove(taskins_id)
|
|
381
|
+
# Check if the TaskRes has expired
|
|
382
|
+
if has_expired(taskres, current):
|
|
383
|
+
# No need to insert the error TaskRes
|
|
384
|
+
taskres = create_taskres_for_unavailable_taskres(
|
|
385
|
+
found_taskins_dict[taskins_id]
|
|
386
|
+
)
|
|
387
|
+
taskres.task.delivered_at = now().isoformat()
|
|
388
|
+
ret_dict[taskins_id] = taskres
|
|
389
|
+
return ret_dict
|
flwr/simulation/app.py
CHANGED
|
@@ -16,10 +16,7 @@
|
|
|
16
16
|
|
|
17
17
|
|
|
18
18
|
import argparse
|
|
19
|
-
import
|
|
20
|
-
from logging import DEBUG, ERROR, INFO, WARN
|
|
21
|
-
from os.path import isfile
|
|
22
|
-
from pathlib import Path
|
|
19
|
+
from logging import DEBUG, ERROR, INFO
|
|
23
20
|
from queue import Queue
|
|
24
21
|
from time import sleep
|
|
25
22
|
from typing import Optional
|
|
@@ -27,6 +24,7 @@ from typing import Optional
|
|
|
27
24
|
from flwr.cli.config_utils import get_fab_metadata
|
|
28
25
|
from flwr.cli.install import install_from_fab
|
|
29
26
|
from flwr.common import EventType
|
|
27
|
+
from flwr.common.args import try_obtain_root_certificates
|
|
30
28
|
from flwr.common.config import (
|
|
31
29
|
get_flwr_dir,
|
|
32
30
|
get_fused_config_from_dir,
|
|
@@ -113,7 +111,7 @@ def flwr_simulation() -> None:
|
|
|
113
111
|
args = parser.parse_args()
|
|
114
112
|
|
|
115
113
|
log(INFO, "Starting Flower Simulation")
|
|
116
|
-
certificates =
|
|
114
|
+
certificates = try_obtain_root_certificates(args, args.superlink)
|
|
117
115
|
|
|
118
116
|
log(
|
|
119
117
|
DEBUG,
|
|
@@ -121,7 +119,7 @@ def flwr_simulation() -> None:
|
|
|
121
119
|
args.superlink,
|
|
122
120
|
)
|
|
123
121
|
run_simulation_process(
|
|
124
|
-
|
|
122
|
+
simulationio_api_address=args.superlink,
|
|
125
123
|
log_queue=log_queue,
|
|
126
124
|
run_once=args.run_once,
|
|
127
125
|
flwr_dir_=args.flwr_dir,
|
|
@@ -132,41 +130,8 @@ def flwr_simulation() -> None:
|
|
|
132
130
|
restore_output()
|
|
133
131
|
|
|
134
132
|
|
|
135
|
-
def _try_obtain_certificates(
|
|
136
|
-
args: argparse.Namespace,
|
|
137
|
-
) -> Optional[bytes]:
|
|
138
|
-
|
|
139
|
-
if args.insecure:
|
|
140
|
-
if args.root_certificates is not None:
|
|
141
|
-
sys.exit(
|
|
142
|
-
"Conflicting options: The '--insecure' flag disables HTTPS, "
|
|
143
|
-
"but '--root-certificates' was also specified. Please remove "
|
|
144
|
-
"the '--root-certificates' option when running in insecure mode, "
|
|
145
|
-
"or omit '--insecure' to use HTTPS."
|
|
146
|
-
)
|
|
147
|
-
log(
|
|
148
|
-
WARN,
|
|
149
|
-
"Option `--insecure` was set. Starting insecure HTTP channel to %s.",
|
|
150
|
-
args.superlink,
|
|
151
|
-
)
|
|
152
|
-
root_certificates = None
|
|
153
|
-
else:
|
|
154
|
-
# Load the certificates if provided, or load the system certificates
|
|
155
|
-
if not isfile(args.root_certificates):
|
|
156
|
-
sys.exit("Path argument `--root-certificates` does not point to a file.")
|
|
157
|
-
root_certificates = Path(args.root_certificates).read_bytes()
|
|
158
|
-
log(
|
|
159
|
-
DEBUG,
|
|
160
|
-
"Starting secure HTTPS channel to %s "
|
|
161
|
-
"with the following certificates: %s.",
|
|
162
|
-
args.superlink,
|
|
163
|
-
args.root_certificates,
|
|
164
|
-
)
|
|
165
|
-
return root_certificates
|
|
166
|
-
|
|
167
|
-
|
|
168
133
|
def run_simulation_process( # pylint: disable=R0914, disable=W0212, disable=R0915
|
|
169
|
-
|
|
134
|
+
simulationio_api_address: str,
|
|
170
135
|
log_queue: Queue[Optional[str]],
|
|
171
136
|
run_once: bool,
|
|
172
137
|
flwr_dir_: Optional[str] = None,
|
|
@@ -174,7 +139,7 @@ def run_simulation_process( # pylint: disable=R0914, disable=W0212, disable=R09
|
|
|
174
139
|
) -> None:
|
|
175
140
|
"""Run Flower Simulation process."""
|
|
176
141
|
conn = SimulationIoConnection(
|
|
177
|
-
simulationio_service_address=
|
|
142
|
+
simulationio_service_address=simulationio_api_address,
|
|
178
143
|
root_certificates=certificates,
|
|
179
144
|
)
|
|
180
145
|
|
flwr/simulation/legacy_app.py
CHANGED
|
@@ -30,7 +30,12 @@ from ray.util.scheduling_strategies import NodeAffinitySchedulingStrategy
|
|
|
30
30
|
from flwr.client import ClientFnExt
|
|
31
31
|
from flwr.common import EventType, event
|
|
32
32
|
from flwr.common.constant import NODE_ID_NUM_BYTES
|
|
33
|
-
from flwr.common.logger import
|
|
33
|
+
from flwr.common.logger import (
|
|
34
|
+
log,
|
|
35
|
+
set_logger_propagation,
|
|
36
|
+
warn_deprecated_feature,
|
|
37
|
+
warn_unsupported_feature,
|
|
38
|
+
)
|
|
34
39
|
from flwr.server.client_manager import ClientManager
|
|
35
40
|
from flwr.server.history import History
|
|
36
41
|
from flwr.server.server import Server, init_defaults, run_fl
|
|
@@ -108,6 +113,11 @@ def start_simulation(
|
|
|
108
113
|
) -> History:
|
|
109
114
|
"""Start a Ray-based Flower simulation server.
|
|
110
115
|
|
|
116
|
+
Warning
|
|
117
|
+
-------
|
|
118
|
+
This function is deprecated since 1.13.0. Use :code: `flwr run` to start a Flower
|
|
119
|
+
simulation.
|
|
120
|
+
|
|
111
121
|
Parameters
|
|
112
122
|
----------
|
|
113
123
|
client_fn : ClientFnExt
|
|
@@ -183,6 +193,16 @@ def start_simulation(
|
|
|
183
193
|
Object containing metrics from training.
|
|
184
194
|
""" # noqa: E501
|
|
185
195
|
# pylint: disable-msg=too-many-locals
|
|
196
|
+
msg = (
|
|
197
|
+
"flwr.simulation.start_simulation() is deprecated."
|
|
198
|
+
"\n\tInstead, use the `flwr run` CLI command to start a local simulation "
|
|
199
|
+
"in your Flower app, as shown for example below:"
|
|
200
|
+
"\n\n\t\t$ flwr new # Create a new Flower app from a template"
|
|
201
|
+
"\n\n\t\t$ flwr run # Run the Flower app in Simulation Mode"
|
|
202
|
+
"\n\n\tUsing `start_simulation()` is deprecated."
|
|
203
|
+
)
|
|
204
|
+
warn_deprecated_feature(name=msg)
|
|
205
|
+
|
|
186
206
|
event(
|
|
187
207
|
EventType.START_SIMULATION_ENTER,
|
|
188
208
|
{"num_clients": len(clients_ids) if clients_ids is not None else num_clients},
|
|
@@ -123,13 +123,8 @@ def run_simulation_from_cli() -> None:
|
|
|
123
123
|
fused_config = get_fused_config_from_dir(app_path, override_config)
|
|
124
124
|
|
|
125
125
|
# Create run
|
|
126
|
-
run = Run(
|
|
127
|
-
|
|
128
|
-
fab_id="",
|
|
129
|
-
fab_version="",
|
|
130
|
-
fab_hash="",
|
|
131
|
-
override_config=override_config,
|
|
132
|
-
)
|
|
126
|
+
run = Run.create_empty(run_id)
|
|
127
|
+
run.override_config = override_config
|
|
133
128
|
|
|
134
129
|
_run_simulation(
|
|
135
130
|
server_app_attr=server_app_attr,
|
|
@@ -333,14 +328,10 @@ def _main_loop(
|
|
|
333
328
|
try:
|
|
334
329
|
# Register run
|
|
335
330
|
log(DEBUG, "Pre-registering run with id %s", run.run_id)
|
|
336
|
-
|
|
337
|
-
|
|
338
|
-
|
|
339
|
-
|
|
340
|
-
starting_at=now().isoformat(),
|
|
341
|
-
running_at=now().isoformat(),
|
|
342
|
-
finished_at="",
|
|
343
|
-
)
|
|
331
|
+
run.status = RunStatus(Status.RUNNING, "", "")
|
|
332
|
+
run.starting_at = now().isoformat()
|
|
333
|
+
run.running_at = run.starting_at
|
|
334
|
+
state_factory.state().run_ids[run.run_id] = RunRecord(run=run) # type: ignore
|
|
344
335
|
|
|
345
336
|
if server_app_run_config is None:
|
|
346
337
|
server_app_run_config = {}
|
|
@@ -457,9 +448,7 @@ def _run_simulation(
|
|
|
457
448
|
# If no `Run` object is set, create one
|
|
458
449
|
if run is None:
|
|
459
450
|
run_id = generate_rand_int_from_bytes(RUN_ID_NUM_BYTES)
|
|
460
|
-
run = Run(
|
|
461
|
-
run_id=run_id, fab_id="", fab_version="", fab_hash="", override_config={}
|
|
462
|
-
)
|
|
451
|
+
run = Run.create_empty(run_id=run_id)
|
|
463
452
|
|
|
464
453
|
args = (
|
|
465
454
|
num_supernodes,
|
|
@@ -20,7 +20,7 @@ from typing import Optional, cast
|
|
|
20
20
|
|
|
21
21
|
import grpc
|
|
22
22
|
|
|
23
|
-
from flwr.common.constant import
|
|
23
|
+
from flwr.common.constant import SIMULATIONIO_API_DEFAULT_CLIENT_ADDRESS
|
|
24
24
|
from flwr.common.grpc import create_channel
|
|
25
25
|
from flwr.common.logger import log
|
|
26
26
|
from flwr.proto.simulationio_pb2_grpc import SimulationIoStub # pylint: disable=E0611
|
|
@@ -41,7 +41,7 @@ class SimulationIoConnection:
|
|
|
41
41
|
|
|
42
42
|
def __init__( # pylint: disable=too-many-arguments
|
|
43
43
|
self,
|
|
44
|
-
simulationio_service_address: str =
|
|
44
|
+
simulationio_service_address: str = SIMULATIONIO_API_DEFAULT_CLIENT_ADDRESS,
|
|
45
45
|
root_certificates: Optional[bytes] = None,
|
|
46
46
|
) -> None:
|
|
47
47
|
self._addr = simulationio_service_address
|
flwr/superexec/deployment.py
CHANGED
|
@@ -21,8 +21,13 @@ from typing import Optional
|
|
|
21
21
|
|
|
22
22
|
from typing_extensions import override
|
|
23
23
|
|
|
24
|
+
from flwr.cli.config_utils import get_fab_metadata
|
|
24
25
|
from flwr.common import ConfigsRecord, Context, RecordSet
|
|
25
|
-
from flwr.common.constant import
|
|
26
|
+
from flwr.common.constant import (
|
|
27
|
+
SERVERAPPIO_API_DEFAULT_CLIENT_ADDRESS,
|
|
28
|
+
Status,
|
|
29
|
+
SubStatus,
|
|
30
|
+
)
|
|
26
31
|
from flwr.common.logger import log
|
|
27
32
|
from flwr.common.typing import Fab, RunStatus, UserConfig
|
|
28
33
|
from flwr.server.superlink.ffs import Ffs
|
|
@@ -37,7 +42,7 @@ class DeploymentEngine(Executor):
|
|
|
37
42
|
|
|
38
43
|
Parameters
|
|
39
44
|
----------
|
|
40
|
-
|
|
45
|
+
serverappio_api_address: str (default: "127.0.0.1:9091")
|
|
41
46
|
Address of the SuperLink to connect to.
|
|
42
47
|
root_certificates: Optional[str] (default: None)
|
|
43
48
|
Specifies the path to the PEM-encoded root certificate file for
|
|
@@ -48,11 +53,11 @@ class DeploymentEngine(Executor):
|
|
|
48
53
|
|
|
49
54
|
def __init__(
|
|
50
55
|
self,
|
|
51
|
-
|
|
56
|
+
serverappio_api_address: str = SERVERAPPIO_API_DEFAULT_CLIENT_ADDRESS,
|
|
52
57
|
root_certificates: Optional[str] = None,
|
|
53
58
|
flwr_dir: Optional[str] = None,
|
|
54
59
|
) -> None:
|
|
55
|
-
self.
|
|
60
|
+
self.serverappio_api_address = serverappio_api_address
|
|
56
61
|
if root_certificates is None:
|
|
57
62
|
self.root_certificates = None
|
|
58
63
|
self.root_certificates_bytes = None
|
|
@@ -109,7 +114,7 @@ class DeploymentEngine(Executor):
|
|
|
109
114
|
if superlink_address := config.get("superlink"):
|
|
110
115
|
if not isinstance(superlink_address, str):
|
|
111
116
|
raise ValueError("The `superlink` value should be of type `str`.")
|
|
112
|
-
self.
|
|
117
|
+
self.serverappio_api_address = superlink_address
|
|
113
118
|
if root_certificates := config.get("root-certificates"):
|
|
114
119
|
if not isinstance(root_certificates, str):
|
|
115
120
|
raise ValueError(
|
|
@@ -132,9 +137,10 @@ class DeploymentEngine(Executor):
|
|
|
132
137
|
raise RuntimeError(
|
|
133
138
|
f"FAB ({fab.hash_str}) hash from request doesn't match contents"
|
|
134
139
|
)
|
|
140
|
+
fab_id, fab_version = get_fab_metadata(fab.content)
|
|
135
141
|
|
|
136
142
|
run_id = self.linkstate.create_run(
|
|
137
|
-
|
|
143
|
+
fab_id, fab_version, fab_hash, override_config, ConfigsRecord()
|
|
138
144
|
)
|
|
139
145
|
return run_id
|
|
140
146
|
|
flwr/superexec/exec_servicer.py
CHANGED
|
@@ -22,18 +22,25 @@ from typing import Any
|
|
|
22
22
|
|
|
23
23
|
import grpc
|
|
24
24
|
|
|
25
|
+
from flwr.common import now
|
|
25
26
|
from flwr.common.constant import LOG_STREAM_INTERVAL, Status
|
|
26
27
|
from flwr.common.logger import log
|
|
27
|
-
from flwr.common.serde import
|
|
28
|
+
from flwr.common.serde import (
|
|
29
|
+
configs_record_from_proto,
|
|
30
|
+
run_to_proto,
|
|
31
|
+
user_config_from_proto,
|
|
32
|
+
)
|
|
28
33
|
from flwr.proto import exec_pb2_grpc # pylint: disable=E0611
|
|
29
34
|
from flwr.proto.exec_pb2 import ( # pylint: disable=E0611
|
|
35
|
+
ListRunsRequest,
|
|
36
|
+
ListRunsResponse,
|
|
30
37
|
StartRunRequest,
|
|
31
38
|
StartRunResponse,
|
|
32
39
|
StreamLogsRequest,
|
|
33
40
|
StreamLogsResponse,
|
|
34
41
|
)
|
|
35
42
|
from flwr.server.superlink.ffs.ffs_factory import FfsFactory
|
|
36
|
-
from flwr.server.superlink.linkstate import LinkStateFactory
|
|
43
|
+
from flwr.server.superlink.linkstate import LinkState, LinkStateFactory
|
|
37
44
|
|
|
38
45
|
from .executor import Executor
|
|
39
46
|
|
|
@@ -105,3 +112,25 @@ class ExecServicer(exec_pb2_grpc.ExecServicer):
|
|
|
105
112
|
context.cancel()
|
|
106
113
|
|
|
107
114
|
time.sleep(LOG_STREAM_INTERVAL) # Sleep briefly to avoid busy waiting
|
|
115
|
+
|
|
116
|
+
def ListRuns(
|
|
117
|
+
self, request: ListRunsRequest, context: grpc.ServicerContext
|
|
118
|
+
) -> ListRunsResponse:
|
|
119
|
+
"""Handle `flwr ls` command."""
|
|
120
|
+
log(INFO, "ExecServicer.List")
|
|
121
|
+
state = self.linkstate_factory.state()
|
|
122
|
+
|
|
123
|
+
# Handle `flwr ls --runs`
|
|
124
|
+
if not request.HasField("run_id"):
|
|
125
|
+
return _create_list_runs_response(state.get_run_ids(), state)
|
|
126
|
+
# Handle `flwr ls --run-id <run_id>`
|
|
127
|
+
return _create_list_runs_response({request.run_id}, state)
|
|
128
|
+
|
|
129
|
+
|
|
130
|
+
def _create_list_runs_response(run_ids: set[int], state: LinkState) -> ListRunsResponse:
|
|
131
|
+
"""Create response for `flwr ls --runs` and `flwr ls --run-id <run_id>`."""
|
|
132
|
+
run_dict = {run_id: state.get_run(run_id) for run_id in run_ids}
|
|
133
|
+
return ListRunsResponse(
|
|
134
|
+
run_dict={run_id: run_to_proto(run) for run_id, run in run_dict.items() if run},
|
|
135
|
+
now=now().isoformat(),
|
|
136
|
+
)
|
flwr/superexec/simulation.py
CHANGED
|
@@ -21,6 +21,7 @@ from typing import Optional
|
|
|
21
21
|
|
|
22
22
|
from typing_extensions import override
|
|
23
23
|
|
|
24
|
+
from flwr.cli.config_utils import get_fab_metadata
|
|
24
25
|
from flwr.common import ConfigsRecord, Context, RecordSet
|
|
25
26
|
from flwr.common.logger import log
|
|
26
27
|
from flwr.common.typing import Fab, UserConfig
|
|
@@ -32,21 +33,11 @@ from .executor import Executor
|
|
|
32
33
|
|
|
33
34
|
|
|
34
35
|
class SimulationEngine(Executor):
|
|
35
|
-
"""Simulation engine executor.
|
|
36
|
-
|
|
37
|
-
Parameters
|
|
38
|
-
----------
|
|
39
|
-
num_supernodes: Opitonal[str] (default: None)
|
|
40
|
-
Total number of nodes to involve in the simulation.
|
|
41
|
-
"""
|
|
36
|
+
"""Simulation engine executor."""
|
|
42
37
|
|
|
43
38
|
def __init__(
|
|
44
39
|
self,
|
|
45
|
-
num_supernodes: Optional[int] = None,
|
|
46
|
-
verbose: Optional[bool] = False,
|
|
47
40
|
) -> None:
|
|
48
|
-
self.num_supernodes = num_supernodes
|
|
49
|
-
self.verbose = verbose
|
|
50
41
|
self.linkstate_factory: Optional[LinkStateFactory] = None
|
|
51
42
|
self.ffs_factory: Optional[FfsFactory] = None
|
|
52
43
|
|
|
@@ -77,40 +68,7 @@ class SimulationEngine(Executor):
|
|
|
77
68
|
self,
|
|
78
69
|
config: UserConfig,
|
|
79
70
|
) -> None:
|
|
80
|
-
"""Set executor config arguments.
|
|
81
|
-
|
|
82
|
-
Parameters
|
|
83
|
-
----------
|
|
84
|
-
config : UserConfig
|
|
85
|
-
A dictionary for configuration values.
|
|
86
|
-
Supported configuration key/value pairs:
|
|
87
|
-
- "num-supernodes": int
|
|
88
|
-
Number of nodes to register for the simulation.
|
|
89
|
-
- "verbose": bool
|
|
90
|
-
Set verbosity of logs.
|
|
91
|
-
"""
|
|
92
|
-
if num_supernodes := config.get("num-supernodes"):
|
|
93
|
-
if not isinstance(num_supernodes, int):
|
|
94
|
-
raise ValueError("The `num-supernodes` value should be of type `int`.")
|
|
95
|
-
self.num_supernodes = num_supernodes
|
|
96
|
-
elif self.num_supernodes is None:
|
|
97
|
-
log(
|
|
98
|
-
ERROR,
|
|
99
|
-
"To start a run with the simulation plugin, please specify "
|
|
100
|
-
"the number of SuperNodes. This can be done by using the "
|
|
101
|
-
"`--executor-config` argument when launching the SuperExec.",
|
|
102
|
-
)
|
|
103
|
-
raise ValueError(
|
|
104
|
-
"`num-supernodes` must not be `None`, it must be a valid "
|
|
105
|
-
"positive integer."
|
|
106
|
-
)
|
|
107
|
-
|
|
108
|
-
if verbose := config.get("verbose"):
|
|
109
|
-
if not isinstance(verbose, bool):
|
|
110
|
-
raise ValueError(
|
|
111
|
-
"The `verbose` value must be a string `true` or `false`."
|
|
112
|
-
)
|
|
113
|
-
self.verbose = verbose
|
|
71
|
+
"""Set executor config arguments."""
|
|
114
72
|
|
|
115
73
|
# pylint: disable=too-many-locals
|
|
116
74
|
@override
|
|
@@ -122,6 +80,12 @@ class SimulationEngine(Executor):
|
|
|
122
80
|
) -> Optional[int]:
|
|
123
81
|
"""Start run using the Flower Simulation Engine."""
|
|
124
82
|
try:
|
|
83
|
+
# Check that num-supernodes is set
|
|
84
|
+
if "num-supernodes" not in federation_options:
|
|
85
|
+
raise ValueError(
|
|
86
|
+
"Federation options doesn't contain key `num-supernodes`."
|
|
87
|
+
)
|
|
88
|
+
|
|
125
89
|
# Create run
|
|
126
90
|
fab = Fab(hashlib.sha256(fab_file).hexdigest(), fab_file)
|
|
127
91
|
fab_hash = self.ffs.put(fab.content, {})
|
|
@@ -129,9 +93,10 @@ class SimulationEngine(Executor):
|
|
|
129
93
|
raise RuntimeError(
|
|
130
94
|
f"FAB ({fab.hash_str}) hash from request doesn't match contents"
|
|
131
95
|
)
|
|
96
|
+
fab_id, fab_version = get_fab_metadata(fab.content)
|
|
132
97
|
|
|
133
98
|
run_id = self.linkstate.create_run(
|
|
134
|
-
|
|
99
|
+
fab_id, fab_version, fab_hash, override_config, federation_options
|
|
135
100
|
)
|
|
136
101
|
|
|
137
102
|
# Create an empty context for the Run
|
{flwr_nightly-1.13.0.dev20241111.dist-info → flwr_nightly-1.13.0.dev20241117.dist-info}/METADATA
RENAMED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: flwr-nightly
|
|
3
|
-
Version: 1.13.0.
|
|
4
|
-
Summary: Flower: A Friendly Federated
|
|
3
|
+
Version: 1.13.0.dev20241117
|
|
4
|
+
Summary: Flower: A Friendly Federated AI Framework
|
|
5
5
|
Home-page: https://flower.ai
|
|
6
6
|
License: Apache-2.0
|
|
7
7
|
Keywords: Artificial Intelligence,Federated AI,Federated Analytics,Federated Evaluation,Federated Learning,Flower,Machine Learning
|
|
@@ -34,6 +34,7 @@ Provides-Extra: rest
|
|
|
34
34
|
Provides-Extra: simulation
|
|
35
35
|
Requires-Dist: cryptography (>=42.0.4,<43.0.0)
|
|
36
36
|
Requires-Dist: grpcio (>=1.60.0,<2.0.0,!=1.64.2,<=1.64.3)
|
|
37
|
+
Requires-Dist: hatchling (>=1.25.0,<2.0.0)
|
|
37
38
|
Requires-Dist: iterators (>=0.0.2,<0.0.3)
|
|
38
39
|
Requires-Dist: numpy (>=1.26.0,<3.0.0)
|
|
39
40
|
Requires-Dist: pathspec (>=0.12.1,<0.13.0)
|
|
@@ -41,6 +42,7 @@ Requires-Dist: protobuf (>=4.25.2,<5.0.0)
|
|
|
41
42
|
Requires-Dist: pycryptodome (>=3.18.0,<4.0.0)
|
|
42
43
|
Requires-Dist: ray (==2.10.0) ; (python_version >= "3.9" and python_version < "3.12") and (extra == "simulation")
|
|
43
44
|
Requires-Dist: requests (>=2.31.0,<3.0.0) ; extra == "rest"
|
|
45
|
+
Requires-Dist: rich (>=13.5.0,<14.0.0)
|
|
44
46
|
Requires-Dist: starlette (>=0.31.0,<0.32.0) ; extra == "rest"
|
|
45
47
|
Requires-Dist: tomli (>=2.0.1,<3.0.0)
|
|
46
48
|
Requires-Dist: tomli-w (>=1.0.0,<2.0.0)
|
|
@@ -50,7 +52,7 @@ Project-URL: Documentation, https://flower.ai
|
|
|
50
52
|
Project-URL: Repository, https://github.com/adap/flower
|
|
51
53
|
Description-Content-Type: text/markdown
|
|
52
54
|
|
|
53
|
-
# Flower: A Friendly Federated
|
|
55
|
+
# Flower: A Friendly Federated AI Framework
|
|
54
56
|
|
|
55
57
|
<p align="center">
|
|
56
58
|
<a href="https://flower.ai/">
|
|
@@ -73,7 +75,7 @@ Description-Content-Type: text/markdown
|
|
|
73
75
|
[](https://hub.docker.com/u/flwr)
|
|
74
76
|
[](https://flower.ai/join-slack)
|
|
75
77
|
|
|
76
|
-
Flower (`flwr`) is a framework for building federated
|
|
78
|
+
Flower (`flwr`) is a framework for building federated AI systems. The
|
|
77
79
|
design of Flower is based on a few guiding principles:
|
|
78
80
|
|
|
79
81
|
- **Customizable**: Federated learning systems vary wildly from one use case to
|