flwr-nightly 1.13.0.dev20241106__py3-none-any.whl → 1.13.0.dev20241117__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of flwr-nightly might be problematic. Click here for more details.

Files changed (58) hide show
  1. flwr/cli/app.py +2 -0
  2. flwr/cli/build.py +37 -0
  3. flwr/cli/install.py +5 -3
  4. flwr/cli/ls.py +228 -0
  5. flwr/cli/run/run.py +16 -5
  6. flwr/client/app.py +68 -19
  7. flwr/client/clientapp/app.py +51 -35
  8. flwr/client/grpc_rere_client/connection.py +2 -12
  9. flwr/client/nodestate/__init__.py +25 -0
  10. flwr/client/nodestate/in_memory_nodestate.py +38 -0
  11. flwr/client/nodestate/nodestate.py +30 -0
  12. flwr/client/nodestate/nodestate_factory.py +37 -0
  13. flwr/client/rest_client/connection.py +4 -14
  14. flwr/client/supernode/app.py +57 -53
  15. flwr/common/args.py +148 -0
  16. flwr/common/config.py +10 -0
  17. flwr/common/constant.py +21 -7
  18. flwr/common/date.py +18 -0
  19. flwr/common/logger.py +6 -2
  20. flwr/common/object_ref.py +47 -16
  21. flwr/common/serde.py +10 -0
  22. flwr/common/typing.py +32 -11
  23. flwr/proto/exec_pb2.py +23 -17
  24. flwr/proto/exec_pb2.pyi +50 -20
  25. flwr/proto/exec_pb2_grpc.py +34 -0
  26. flwr/proto/exec_pb2_grpc.pyi +13 -0
  27. flwr/proto/run_pb2.py +32 -27
  28. flwr/proto/run_pb2.pyi +44 -1
  29. flwr/proto/simulationio_pb2.py +2 -2
  30. flwr/proto/simulationio_pb2_grpc.py +34 -0
  31. flwr/proto/simulationio_pb2_grpc.pyi +13 -0
  32. flwr/server/app.py +83 -87
  33. flwr/server/driver/driver.py +1 -1
  34. flwr/server/driver/grpc_driver.py +6 -20
  35. flwr/server/driver/inmemory_driver.py +1 -3
  36. flwr/server/run_serverapp.py +8 -238
  37. flwr/server/serverapp/app.py +44 -89
  38. flwr/server/strategy/aggregate.py +4 -4
  39. flwr/server/superlink/fleet/rest_rere/rest_api.py +10 -9
  40. flwr/server/superlink/linkstate/in_memory_linkstate.py +76 -62
  41. flwr/server/superlink/linkstate/linkstate.py +24 -9
  42. flwr/server/superlink/linkstate/sqlite_linkstate.py +87 -128
  43. flwr/server/superlink/linkstate/utils.py +191 -32
  44. flwr/server/superlink/simulation/simulationio_servicer.py +22 -1
  45. flwr/simulation/__init__.py +3 -1
  46. flwr/simulation/app.py +245 -352
  47. flwr/simulation/legacy_app.py +402 -0
  48. flwr/simulation/run_simulation.py +8 -19
  49. flwr/simulation/simulationio_connection.py +2 -2
  50. flwr/superexec/deployment.py +13 -7
  51. flwr/superexec/exec_servicer.py +32 -3
  52. flwr/superexec/executor.py +4 -3
  53. flwr/superexec/simulation.py +52 -145
  54. {flwr_nightly-1.13.0.dev20241106.dist-info → flwr_nightly-1.13.0.dev20241117.dist-info}/METADATA +10 -7
  55. {flwr_nightly-1.13.0.dev20241106.dist-info → flwr_nightly-1.13.0.dev20241117.dist-info}/RECORD +58 -51
  56. {flwr_nightly-1.13.0.dev20241106.dist-info → flwr_nightly-1.13.0.dev20241117.dist-info}/entry_points.txt +1 -0
  57. {flwr_nightly-1.13.0.dev20241106.dist-info → flwr_nightly-1.13.0.dev20241117.dist-info}/LICENSE +0 -0
  58. {flwr_nightly-1.13.0.dev20241106.dist-info → flwr_nightly-1.13.0.dev20241117.dist-info}/WHEEL +0 -0
@@ -0,0 +1,402 @@
1
+ # Copyright 2024 Flower Labs GmbH. All Rights Reserved.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+ # ==============================================================================
15
+ """Flower simulation app."""
16
+
17
+
18
+ import asyncio
19
+ import logging
20
+ import sys
21
+ import threading
22
+ import traceback
23
+ import warnings
24
+ from logging import ERROR, INFO
25
+ from typing import Any, Optional, Union
26
+
27
+ import ray
28
+ from ray.util.scheduling_strategies import NodeAffinitySchedulingStrategy
29
+
30
+ from flwr.client import ClientFnExt
31
+ from flwr.common import EventType, event
32
+ from flwr.common.constant import NODE_ID_NUM_BYTES
33
+ from flwr.common.logger import (
34
+ log,
35
+ set_logger_propagation,
36
+ warn_deprecated_feature,
37
+ warn_unsupported_feature,
38
+ )
39
+ from flwr.server.client_manager import ClientManager
40
+ from flwr.server.history import History
41
+ from flwr.server.server import Server, init_defaults, run_fl
42
+ from flwr.server.server_config import ServerConfig
43
+ from flwr.server.strategy import Strategy
44
+ from flwr.server.superlink.linkstate.utils import generate_rand_int_from_bytes
45
+ from flwr.simulation.ray_transport.ray_actor import (
46
+ ClientAppActor,
47
+ VirtualClientEngineActor,
48
+ VirtualClientEngineActorPool,
49
+ pool_size_from_resources,
50
+ )
51
+ from flwr.simulation.ray_transport.ray_client_proxy import RayActorClientProxy
52
+
53
+ INVALID_ARGUMENTS_START_SIMULATION = """
54
+ INVALID ARGUMENTS ERROR
55
+
56
+ Invalid Arguments in method:
57
+
58
+ `start_simulation(
59
+ *,
60
+ client_fn: ClientFn,
61
+ num_clients: int,
62
+ clients_ids: Optional[List[str]] = None,
63
+ client_resources: Optional[Dict[str, float]] = None,
64
+ server: Optional[Server] = None,
65
+ config: ServerConfig = None,
66
+ strategy: Optional[Strategy] = None,
67
+ client_manager: Optional[ClientManager] = None,
68
+ ray_init_args: Optional[Dict[str, Any]] = None,
69
+ ) -> None:`
70
+
71
+ REASON:
72
+ Method requires:
73
+ - Either `num_clients`[int] or `clients_ids`[List[str]]
74
+ to be set exclusively.
75
+ OR
76
+ - `len(clients_ids)` == `num_clients`
77
+
78
+ """
79
+
80
+ NodeToPartitionMapping = dict[int, int]
81
+
82
+
83
+ def _create_node_id_to_partition_mapping(
84
+ num_clients: int,
85
+ ) -> NodeToPartitionMapping:
86
+ """Generate a node_id:partition_id mapping."""
87
+ nodes_mapping: NodeToPartitionMapping = {} # {node-id; partition-id}
88
+ for i in range(num_clients):
89
+ while True:
90
+ node_id = generate_rand_int_from_bytes(NODE_ID_NUM_BYTES)
91
+ if node_id not in nodes_mapping:
92
+ break
93
+ nodes_mapping[node_id] = i
94
+ return nodes_mapping
95
+
96
+
97
+ # pylint: disable=too-many-arguments,too-many-statements,too-many-branches
98
+ def start_simulation(
99
+ *,
100
+ client_fn: ClientFnExt,
101
+ num_clients: int,
102
+ clients_ids: Optional[list[str]] = None, # UNSUPPORTED, WILL BE REMOVED
103
+ client_resources: Optional[dict[str, float]] = None,
104
+ server: Optional[Server] = None,
105
+ config: Optional[ServerConfig] = None,
106
+ strategy: Optional[Strategy] = None,
107
+ client_manager: Optional[ClientManager] = None,
108
+ ray_init_args: Optional[dict[str, Any]] = None,
109
+ keep_initialised: Optional[bool] = False,
110
+ actor_type: type[VirtualClientEngineActor] = ClientAppActor,
111
+ actor_kwargs: Optional[dict[str, Any]] = None,
112
+ actor_scheduling: Union[str, NodeAffinitySchedulingStrategy] = "DEFAULT",
113
+ ) -> History:
114
+ """Start a Ray-based Flower simulation server.
115
+
116
+ Warning
117
+ -------
118
+ This function is deprecated since 1.13.0. Use :code: `flwr run` to start a Flower
119
+ simulation.
120
+
121
+ Parameters
122
+ ----------
123
+ client_fn : ClientFnExt
124
+ A function creating `Client` instances. The function must have the signature
125
+ `client_fn(context: Context). It should return
126
+ a single client instance of type `Client`. Note that the created client
127
+ instances are ephemeral and will often be destroyed after a single method
128
+ invocation. Since client instances are not long-lived, they should not attempt
129
+ to carry state over method invocations. Any state required by the instance
130
+ (model, dataset, hyperparameters, ...) should be (re-)created in either the
131
+ call to `client_fn` or the call to any of the client methods (e.g., load
132
+ evaluation data in the `evaluate` method itself).
133
+ num_clients : int
134
+ The total number of clients in this simulation.
135
+ clients_ids : Optional[List[str]]
136
+ UNSUPPORTED, WILL BE REMOVED. USE `num_clients` INSTEAD.
137
+ List `client_id`s for each client. This is only required if
138
+ `num_clients` is not set. Setting both `num_clients` and `clients_ids`
139
+ with `len(clients_ids)` not equal to `num_clients` generates an error.
140
+ Using this argument will raise an error.
141
+ client_resources : Optional[Dict[str, float]] (default: `{"num_cpus": 1, "num_gpus": 0.0}`)
142
+ CPU and GPU resources for a single client. Supported keys
143
+ are `num_cpus` and `num_gpus`. To understand the GPU utilization caused by
144
+ `num_gpus`, as well as using custom resources, please consult the Ray
145
+ documentation.
146
+ server : Optional[flwr.server.Server] (default: None).
147
+ An implementation of the abstract base class `flwr.server.Server`. If no
148
+ instance is provided, then `start_server` will create one.
149
+ config: ServerConfig (default: None).
150
+ Currently supported values are `num_rounds` (int, default: 1) and
151
+ `round_timeout` in seconds (float, default: None).
152
+ strategy : Optional[flwr.server.Strategy] (default: None)
153
+ An implementation of the abstract base class `flwr.server.Strategy`. If
154
+ no strategy is provided, then `start_server` will use
155
+ `flwr.server.strategy.FedAvg`.
156
+ client_manager : Optional[flwr.server.ClientManager] (default: None)
157
+ An implementation of the abstract base class `flwr.server.ClientManager`.
158
+ If no implementation is provided, then `start_simulation` will use
159
+ `flwr.server.client_manager.SimpleClientManager`.
160
+ ray_init_args : Optional[Dict[str, Any]] (default: None)
161
+ Optional dictionary containing arguments for the call to `ray.init`.
162
+ If ray_init_args is None (the default), Ray will be initialized with
163
+ the following default args:
164
+
165
+ { "ignore_reinit_error": True, "include_dashboard": False }
166
+
167
+ An empty dictionary can be used (ray_init_args={}) to prevent any
168
+ arguments from being passed to ray.init.
169
+ keep_initialised: Optional[bool] (default: False)
170
+ Set to True to prevent `ray.shutdown()` in case `ray.is_initialized()=True`.
171
+
172
+ actor_type: VirtualClientEngineActor (default: ClientAppActor)
173
+ Optionally specify the type of actor to use. The actor object, which
174
+ persists throughout the simulation, will be the process in charge of
175
+ executing a ClientApp wrapping input argument `client_fn`.
176
+
177
+ actor_kwargs: Optional[Dict[str, Any]] (default: None)
178
+ If you want to create your own Actor classes, you might need to pass
179
+ some input argument. You can use this dictionary for such purpose.
180
+
181
+ actor_scheduling: Optional[Union[str, NodeAffinitySchedulingStrategy]]
182
+ (default: "DEFAULT")
183
+ Optional string ("DEFAULT" or "SPREAD") for the VCE to choose in which
184
+ node the actor is placed. If you are an advanced user needed more control
185
+ you can use lower-level scheduling strategies to pin actors to specific
186
+ compute nodes (e.g. via NodeAffinitySchedulingStrategy). Please note this
187
+ is an advanced feature. For all details, please refer to the Ray documentation:
188
+ https://docs.ray.io/en/latest/ray-core/scheduling/index.html
189
+
190
+ Returns
191
+ -------
192
+ hist : flwr.server.history.History
193
+ Object containing metrics from training.
194
+ """ # noqa: E501
195
+ # pylint: disable-msg=too-many-locals
196
+ msg = (
197
+ "flwr.simulation.start_simulation() is deprecated."
198
+ "\n\tInstead, use the `flwr run` CLI command to start a local simulation "
199
+ "in your Flower app, as shown for example below:"
200
+ "\n\n\t\t$ flwr new # Create a new Flower app from a template"
201
+ "\n\n\t\t$ flwr run # Run the Flower app in Simulation Mode"
202
+ "\n\n\tUsing `start_simulation()` is deprecated."
203
+ )
204
+ warn_deprecated_feature(name=msg)
205
+
206
+ event(
207
+ EventType.START_SIMULATION_ENTER,
208
+ {"num_clients": len(clients_ids) if clients_ids is not None else num_clients},
209
+ )
210
+
211
+ if clients_ids is not None:
212
+ warn_unsupported_feature(
213
+ "Passing `clients_ids` to `start_simulation` is deprecated and not longer "
214
+ "used by `start_simulation`. Use `num_clients` exclusively instead."
215
+ )
216
+ log(ERROR, "`clients_ids` argument used.")
217
+ sys.exit()
218
+
219
+ # Set logger propagation
220
+ loop: Optional[asyncio.AbstractEventLoop] = None
221
+ try:
222
+ loop = asyncio.get_running_loop()
223
+ except RuntimeError:
224
+ loop = None
225
+ finally:
226
+ if loop and loop.is_running():
227
+ # Set logger propagation to False to prevent duplicated log output in Colab.
228
+ logger = logging.getLogger("flwr")
229
+ _ = set_logger_propagation(logger, False)
230
+
231
+ # Initialize server and server config
232
+ initialized_server, initialized_config = init_defaults(
233
+ server=server,
234
+ config=config,
235
+ strategy=strategy,
236
+ client_manager=client_manager,
237
+ )
238
+
239
+ log(
240
+ INFO,
241
+ "Starting Flower simulation, config: %s",
242
+ initialized_config,
243
+ )
244
+
245
+ # Create node-id to partition-id mapping
246
+ nodes_mapping = _create_node_id_to_partition_mapping(num_clients)
247
+
248
+ # Default arguments for Ray initialization
249
+ if not ray_init_args:
250
+ ray_init_args = {
251
+ "ignore_reinit_error": True,
252
+ "include_dashboard": False,
253
+ }
254
+
255
+ # Shut down Ray if it has already been initialized (unless asked not to)
256
+ if ray.is_initialized() and not keep_initialised:
257
+ ray.shutdown()
258
+
259
+ # Initialize Ray
260
+ ray.init(**ray_init_args)
261
+ cluster_resources = ray.cluster_resources()
262
+ log(
263
+ INFO,
264
+ "Flower VCE: Ray initialized with resources: %s",
265
+ cluster_resources,
266
+ )
267
+
268
+ log(
269
+ INFO,
270
+ "Optimize your simulation with Flower VCE: "
271
+ "https://flower.ai/docs/framework/how-to-run-simulations.html",
272
+ )
273
+
274
+ # Log the resources that a single client will be able to use
275
+ if client_resources is None:
276
+ log(
277
+ INFO,
278
+ "No `client_resources` specified. Using minimal resources for clients.",
279
+ )
280
+ client_resources = {"num_cpus": 1, "num_gpus": 0.0}
281
+
282
+ # Each client needs at the very least one CPU
283
+ if "num_cpus" not in client_resources:
284
+ warnings.warn(
285
+ "No `num_cpus` specified in `client_resources`. "
286
+ "Using `num_cpus=1` for each client.",
287
+ stacklevel=2,
288
+ )
289
+ client_resources["num_cpus"] = 1
290
+
291
+ log(
292
+ INFO,
293
+ "Flower VCE: Resources for each Virtual Client: %s",
294
+ client_resources,
295
+ )
296
+
297
+ actor_args = {} if actor_kwargs is None else actor_kwargs
298
+
299
+ # An actor factory. This is called N times to add N actors
300
+ # to the pool. If at some point the pool can accommodate more actors
301
+ # this will be called again.
302
+ def create_actor_fn() -> type[VirtualClientEngineActor]:
303
+ return actor_type.options( # type: ignore
304
+ **client_resources,
305
+ scheduling_strategy=actor_scheduling,
306
+ ).remote(**actor_args)
307
+
308
+ # Instantiate ActorPool
309
+ pool = VirtualClientEngineActorPool(
310
+ create_actor_fn=create_actor_fn,
311
+ client_resources=client_resources,
312
+ )
313
+
314
+ f_stop = threading.Event()
315
+
316
+ # Periodically, check if the cluster has grown (i.e. a new
317
+ # node has been added). If this happens, we likely want to grow
318
+ # the actor pool by adding more Actors to it.
319
+ def update_resources(f_stop: threading.Event) -> None:
320
+ """Periodically check if more actors can be added to the pool.
321
+
322
+ If so, extend the pool.
323
+ """
324
+ if not f_stop.is_set():
325
+ num_max_actors = pool_size_from_resources(client_resources)
326
+ if num_max_actors > pool.num_actors:
327
+ num_new = num_max_actors - pool.num_actors
328
+ log(
329
+ INFO, "The cluster expanded. Adding %s actors to the pool.", num_new
330
+ )
331
+ pool.add_actors_to_pool(num_actors=num_new)
332
+
333
+ threading.Timer(10, update_resources, [f_stop]).start()
334
+
335
+ update_resources(f_stop)
336
+
337
+ log(
338
+ INFO,
339
+ "Flower VCE: Creating %s with %s actors",
340
+ pool.__class__.__name__,
341
+ pool.num_actors,
342
+ )
343
+
344
+ # Register one RayClientProxy object for each client with the ClientManager
345
+ for node_id, partition_id in nodes_mapping.items():
346
+ client_proxy = RayActorClientProxy(
347
+ client_fn=client_fn,
348
+ node_id=node_id,
349
+ partition_id=partition_id,
350
+ num_partitions=num_clients,
351
+ actor_pool=pool,
352
+ )
353
+ initialized_server.client_manager().register(client=client_proxy)
354
+
355
+ hist = History()
356
+ # pylint: disable=broad-except
357
+ try:
358
+ # Start training
359
+ hist = run_fl(
360
+ server=initialized_server,
361
+ config=initialized_config,
362
+ )
363
+ except Exception as ex:
364
+ log(ERROR, ex)
365
+ log(ERROR, traceback.format_exc())
366
+ log(
367
+ ERROR,
368
+ "Your simulation crashed :(. This could be because of several reasons. "
369
+ "The most common are: "
370
+ "\n\t > Sometimes, issues in the simulation code itself can cause crashes. "
371
+ "It's always a good idea to double-check your code for any potential bugs "
372
+ "or inconsistencies that might be contributing to the problem. "
373
+ "For example: "
374
+ "\n\t\t - You might be using a class attribute in your clients that "
375
+ "hasn't been defined."
376
+ "\n\t\t - There could be an incorrect method call to a 3rd party library "
377
+ "(e.g., PyTorch)."
378
+ "\n\t\t - The return types of methods in your clients/strategies might be "
379
+ "incorrect."
380
+ "\n\t > Your system couldn't fit a single VirtualClient: try lowering "
381
+ "`client_resources`."
382
+ "\n\t > All the actors in your pool crashed. This could be because: "
383
+ "\n\t\t - You clients hit an out-of-memory (OOM) error and actors couldn't "
384
+ "recover from it. Try launching your simulation with more generous "
385
+ "`client_resources` setting (i.e. it seems %s is "
386
+ "not enough for your run). Use fewer concurrent actors. "
387
+ "\n\t\t - You were running a multi-node simulation and all worker nodes "
388
+ "disconnected. The head node might still be alive but cannot accommodate "
389
+ "any actor with resources: %s."
390
+ "\nTake a look at the Flower simulation examples for guidance "
391
+ "<https://flower.ai/docs/framework/how-to-run-simulations.html>.",
392
+ client_resources,
393
+ client_resources,
394
+ )
395
+ raise RuntimeError("Simulation crashed.") from ex
396
+
397
+ finally:
398
+ # Stop time monitoring resources in cluster
399
+ f_stop.set()
400
+ event(EventType.START_SIMULATION_LEAVE)
401
+
402
+ return hist
@@ -123,13 +123,8 @@ def run_simulation_from_cli() -> None:
123
123
  fused_config = get_fused_config_from_dir(app_path, override_config)
124
124
 
125
125
  # Create run
126
- run = Run(
127
- run_id=run_id,
128
- fab_id="",
129
- fab_version="",
130
- fab_hash="",
131
- override_config=override_config,
132
- )
126
+ run = Run.create_empty(run_id)
127
+ run.override_config = override_config
133
128
 
134
129
  _run_simulation(
135
130
  server_app_attr=server_app_attr,
@@ -333,21 +328,17 @@ def _main_loop(
333
328
  try:
334
329
  # Register run
335
330
  log(DEBUG, "Pre-registering run with id %s", run.run_id)
336
- init_status = RunStatus(Status.RUNNING, "", "")
337
- state_factory.state().run_ids[run.run_id] = RunRecord( # type: ignore
338
- run=run,
339
- status=init_status,
340
- starting_at=now().isoformat(),
341
- running_at=now().isoformat(),
342
- finished_at="",
343
- )
331
+ run.status = RunStatus(Status.RUNNING, "", "")
332
+ run.starting_at = now().isoformat()
333
+ run.running_at = run.starting_at
334
+ state_factory.state().run_ids[run.run_id] = RunRecord(run=run) # type: ignore
344
335
 
345
336
  if server_app_run_config is None:
346
337
  server_app_run_config = {}
347
338
 
348
339
  # Initialize Driver
349
340
  driver = InMemoryDriver(state_factory=state_factory)
350
- driver.init_run(run_id=run.run_id)
341
+ driver.set_run(run_id=run.run_id)
351
342
 
352
343
  # Get and run ServerApp thread
353
344
  serverapp_th = run_serverapp_th(
@@ -457,9 +448,7 @@ def _run_simulation(
457
448
  # If no `Run` object is set, create one
458
449
  if run is None:
459
450
  run_id = generate_rand_int_from_bytes(RUN_ID_NUM_BYTES)
460
- run = Run(
461
- run_id=run_id, fab_id="", fab_version="", fab_hash="", override_config={}
462
- )
451
+ run = Run.create_empty(run_id=run_id)
463
452
 
464
453
  args = (
465
454
  num_supernodes,
@@ -20,7 +20,7 @@ from typing import Optional, cast
20
20
 
21
21
  import grpc
22
22
 
23
- from flwr.common.constant import SIMULATIONIO_API_DEFAULT_ADDRESS
23
+ from flwr.common.constant import SIMULATIONIO_API_DEFAULT_CLIENT_ADDRESS
24
24
  from flwr.common.grpc import create_channel
25
25
  from flwr.common.logger import log
26
26
  from flwr.proto.simulationio_pb2_grpc import SimulationIoStub # pylint: disable=E0611
@@ -41,7 +41,7 @@ class SimulationIoConnection:
41
41
 
42
42
  def __init__( # pylint: disable=too-many-arguments
43
43
  self,
44
- simulationio_service_address: str = SIMULATIONIO_API_DEFAULT_ADDRESS,
44
+ simulationio_service_address: str = SIMULATIONIO_API_DEFAULT_CLIENT_ADDRESS,
45
45
  root_certificates: Optional[bytes] = None,
46
46
  ) -> None:
47
47
  self._addr = simulationio_service_address
@@ -21,8 +21,13 @@ from typing import Optional
21
21
 
22
22
  from typing_extensions import override
23
23
 
24
+ from flwr.cli.config_utils import get_fab_metadata
24
25
  from flwr.common import ConfigsRecord, Context, RecordSet
25
- from flwr.common.constant import SERVERAPPIO_API_DEFAULT_ADDRESS, Status, SubStatus
26
+ from flwr.common.constant import (
27
+ SERVERAPPIO_API_DEFAULT_CLIENT_ADDRESS,
28
+ Status,
29
+ SubStatus,
30
+ )
26
31
  from flwr.common.logger import log
27
32
  from flwr.common.typing import Fab, RunStatus, UserConfig
28
33
  from flwr.server.superlink.ffs import Ffs
@@ -37,7 +42,7 @@ class DeploymentEngine(Executor):
37
42
 
38
43
  Parameters
39
44
  ----------
40
- superlink: str (default: "0.0.0.0:9091")
45
+ serverappio_api_address: str (default: "127.0.0.1:9091")
41
46
  Address of the SuperLink to connect to.
42
47
  root_certificates: Optional[str] (default: None)
43
48
  Specifies the path to the PEM-encoded root certificate file for
@@ -48,11 +53,11 @@ class DeploymentEngine(Executor):
48
53
 
49
54
  def __init__(
50
55
  self,
51
- superlink: str = SERVERAPPIO_API_DEFAULT_ADDRESS,
56
+ serverappio_api_address: str = SERVERAPPIO_API_DEFAULT_CLIENT_ADDRESS,
52
57
  root_certificates: Optional[str] = None,
53
58
  flwr_dir: Optional[str] = None,
54
59
  ) -> None:
55
- self.superlink = superlink
60
+ self.serverappio_api_address = serverappio_api_address
56
61
  if root_certificates is None:
57
62
  self.root_certificates = None
58
63
  self.root_certificates_bytes = None
@@ -109,7 +114,7 @@ class DeploymentEngine(Executor):
109
114
  if superlink_address := config.get("superlink"):
110
115
  if not isinstance(superlink_address, str):
111
116
  raise ValueError("The `superlink` value should be of type `str`.")
112
- self.superlink = superlink_address
117
+ self.serverappio_api_address = superlink_address
113
118
  if root_certificates := config.get("root-certificates"):
114
119
  if not isinstance(root_certificates, str):
115
120
  raise ValueError(
@@ -132,9 +137,10 @@ class DeploymentEngine(Executor):
132
137
  raise RuntimeError(
133
138
  f"FAB ({fab.hash_str}) hash from request doesn't match contents"
134
139
  )
140
+ fab_id, fab_version = get_fab_metadata(fab.content)
135
141
 
136
142
  run_id = self.linkstate.create_run(
137
- None, None, fab_hash, override_config, ConfigsRecord()
143
+ fab_id, fab_version, fab_hash, override_config, ConfigsRecord()
138
144
  )
139
145
  return run_id
140
146
 
@@ -153,7 +159,7 @@ class DeploymentEngine(Executor):
153
159
  self,
154
160
  fab_file: bytes,
155
161
  override_config: UserConfig,
156
- federation_config: UserConfig,
162
+ federation_options: ConfigsRecord,
157
163
  ) -> Optional[int]:
158
164
  """Start run using the Flower Deployment Engine."""
159
165
  run_id = None
@@ -22,18 +22,25 @@ from typing import Any
22
22
 
23
23
  import grpc
24
24
 
25
+ from flwr.common import now
25
26
  from flwr.common.constant import LOG_STREAM_INTERVAL, Status
26
27
  from flwr.common.logger import log
27
- from flwr.common.serde import user_config_from_proto
28
+ from flwr.common.serde import (
29
+ configs_record_from_proto,
30
+ run_to_proto,
31
+ user_config_from_proto,
32
+ )
28
33
  from flwr.proto import exec_pb2_grpc # pylint: disable=E0611
29
34
  from flwr.proto.exec_pb2 import ( # pylint: disable=E0611
35
+ ListRunsRequest,
36
+ ListRunsResponse,
30
37
  StartRunRequest,
31
38
  StartRunResponse,
32
39
  StreamLogsRequest,
33
40
  StreamLogsResponse,
34
41
  )
35
42
  from flwr.server.superlink.ffs.ffs_factory import FfsFactory
36
- from flwr.server.superlink.linkstate import LinkStateFactory
43
+ from flwr.server.superlink.linkstate import LinkState, LinkStateFactory
37
44
 
38
45
  from .executor import Executor
39
46
 
@@ -61,7 +68,7 @@ class ExecServicer(exec_pb2_grpc.ExecServicer):
61
68
  run_id = self.executor.start_run(
62
69
  request.fab.content,
63
70
  user_config_from_proto(request.override_config),
64
- user_config_from_proto(request.federation_config),
71
+ configs_record_from_proto(request.federation_options),
65
72
  )
66
73
 
67
74
  if run_id is None:
@@ -105,3 +112,25 @@ class ExecServicer(exec_pb2_grpc.ExecServicer):
105
112
  context.cancel()
106
113
 
107
114
  time.sleep(LOG_STREAM_INTERVAL) # Sleep briefly to avoid busy waiting
115
+
116
+ def ListRuns(
117
+ self, request: ListRunsRequest, context: grpc.ServicerContext
118
+ ) -> ListRunsResponse:
119
+ """Handle `flwr ls` command."""
120
+ log(INFO, "ExecServicer.List")
121
+ state = self.linkstate_factory.state()
122
+
123
+ # Handle `flwr ls --runs`
124
+ if not request.HasField("run_id"):
125
+ return _create_list_runs_response(state.get_run_ids(), state)
126
+ # Handle `flwr ls --run-id <run_id>`
127
+ return _create_list_runs_response({request.run_id}, state)
128
+
129
+
130
+ def _create_list_runs_response(run_ids: set[int], state: LinkState) -> ListRunsResponse:
131
+ """Create response for `flwr ls --runs` and `flwr ls --run-id <run_id>`."""
132
+ run_dict = {run_id: state.get_run(run_id) for run_id in run_ids}
133
+ return ListRunsResponse(
134
+ run_dict={run_id: run_to_proto(run) for run_id, run in run_dict.items() if run},
135
+ now=now().isoformat(),
136
+ )
@@ -19,6 +19,7 @@ from dataclasses import dataclass, field
19
19
  from subprocess import Popen
20
20
  from typing import Optional
21
21
 
22
+ from flwr.common import ConfigsRecord
22
23
  from flwr.common.typing import UserConfig
23
24
  from flwr.server.superlink.ffs.ffs_factory import FfsFactory
24
25
  from flwr.server.superlink.linkstate import LinkStateFactory
@@ -71,7 +72,7 @@ class Executor(ABC):
71
72
  self,
72
73
  fab_file: bytes,
73
74
  override_config: UserConfig,
74
- federation_config: UserConfig,
75
+ federation_options: ConfigsRecord,
75
76
  ) -> Optional[int]:
76
77
  """Start a run using the given Flower FAB ID and version.
77
78
 
@@ -84,8 +85,8 @@ class Executor(ABC):
84
85
  The Flower App Bundle file bytes.
85
86
  override_config: UserConfig
86
87
  The config overrides dict sent by the user (using `flwr run`).
87
- federation_config: UserConfig
88
- The federation options dict sent by the user (using `flwr run`).
88
+ federation_options: ConfigsRecord
89
+ The federation options sent by the user (using `flwr run`).
89
90
 
90
91
  Returns
91
92
  -------