flwr 1.13.0__py3-none-any.whl → 1.14.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (120) hide show
  1. flwr/cli/app.py +5 -0
  2. flwr/cli/build.py +1 -37
  3. flwr/cli/cli_user_auth_interceptor.py +86 -0
  4. flwr/cli/config_utils.py +19 -2
  5. flwr/cli/example.py +1 -0
  6. flwr/cli/install.py +2 -19
  7. flwr/cli/log.py +18 -36
  8. flwr/cli/login/__init__.py +22 -0
  9. flwr/cli/login/login.py +81 -0
  10. flwr/cli/ls.py +205 -106
  11. flwr/cli/new/__init__.py +1 -0
  12. flwr/cli/new/new.py +25 -14
  13. flwr/cli/new/templates/app/.gitignore.tpl +3 -0
  14. flwr/cli/new/templates/app/pyproject.baseline.toml.tpl +1 -1
  15. flwr/cli/new/templates/app/pyproject.flowertune.toml.tpl +3 -3
  16. flwr/cli/new/templates/app/pyproject.huggingface.toml.tpl +1 -1
  17. flwr/cli/new/templates/app/pyproject.jax.toml.tpl +1 -1
  18. flwr/cli/new/templates/app/pyproject.mlx.toml.tpl +2 -3
  19. flwr/cli/new/templates/app/pyproject.numpy.toml.tpl +1 -1
  20. flwr/cli/new/templates/app/pyproject.pytorch.toml.tpl +1 -1
  21. flwr/cli/new/templates/app/pyproject.sklearn.toml.tpl +1 -1
  22. flwr/cli/new/templates/app/pyproject.tensorflow.toml.tpl +1 -1
  23. flwr/cli/run/__init__.py +1 -0
  24. flwr/cli/run/run.py +89 -39
  25. flwr/cli/stop.py +130 -0
  26. flwr/cli/utils.py +172 -8
  27. flwr/client/app.py +14 -3
  28. flwr/client/client.py +1 -32
  29. flwr/client/clientapp/app.py +4 -8
  30. flwr/client/clientapp/utils.py +1 -0
  31. flwr/client/grpc_adapter_client/connection.py +1 -1
  32. flwr/client/grpc_client/connection.py +1 -1
  33. flwr/client/grpc_rere_client/connection.py +13 -7
  34. flwr/client/message_handler/message_handler.py +1 -2
  35. flwr/client/mod/comms_mods.py +1 -0
  36. flwr/client/mod/localdp_mod.py +1 -1
  37. flwr/client/nodestate/__init__.py +1 -0
  38. flwr/client/nodestate/nodestate.py +1 -0
  39. flwr/client/nodestate/nodestate_factory.py +1 -0
  40. flwr/client/numpy_client.py +0 -44
  41. flwr/client/rest_client/connection.py +3 -3
  42. flwr/client/supernode/app.py +2 -2
  43. flwr/common/address.py +1 -0
  44. flwr/common/args.py +1 -0
  45. flwr/common/auth_plugin/__init__.py +24 -0
  46. flwr/common/auth_plugin/auth_plugin.py +111 -0
  47. flwr/common/config.py +3 -1
  48. flwr/common/constant.py +17 -1
  49. flwr/common/logger.py +40 -0
  50. flwr/common/message.py +1 -0
  51. flwr/common/object_ref.py +57 -54
  52. flwr/common/pyproject.py +1 -0
  53. flwr/common/record/__init__.py +1 -0
  54. flwr/common/record/parametersrecord.py +1 -0
  55. flwr/common/retry_invoker.py +77 -0
  56. flwr/common/secure_aggregation/secaggplus_utils.py +2 -2
  57. flwr/common/telemetry.py +15 -4
  58. flwr/common/typing.py +12 -0
  59. flwr/common/version.py +1 -0
  60. flwr/proto/exec_pb2.py +38 -14
  61. flwr/proto/exec_pb2.pyi +107 -2
  62. flwr/proto/exec_pb2_grpc.py +102 -0
  63. flwr/proto/exec_pb2_grpc.pyi +39 -0
  64. flwr/proto/fab_pb2.py +4 -4
  65. flwr/proto/fab_pb2.pyi +4 -1
  66. flwr/proto/serverappio_pb2.py +18 -18
  67. flwr/proto/serverappio_pb2.pyi +8 -2
  68. flwr/proto/serverappio_pb2_grpc.py +34 -0
  69. flwr/proto/serverappio_pb2_grpc.pyi +13 -0
  70. flwr/proto/simulationio_pb2.py +2 -2
  71. flwr/proto/simulationio_pb2_grpc.py +34 -0
  72. flwr/proto/simulationio_pb2_grpc.pyi +13 -0
  73. flwr/server/app.py +62 -7
  74. flwr/server/compat/app_utils.py +7 -1
  75. flwr/server/driver/grpc_driver.py +11 -63
  76. flwr/server/driver/inmemory_driver.py +5 -1
  77. flwr/server/run_serverapp.py +8 -9
  78. flwr/server/serverapp/app.py +25 -10
  79. flwr/server/strategy/dpfedavg_fixed.py +1 -0
  80. flwr/server/superlink/driver/serverappio_grpc.py +1 -0
  81. flwr/server/superlink/driver/serverappio_servicer.py +82 -23
  82. flwr/server/superlink/ffs/disk_ffs.py +1 -0
  83. flwr/server/superlink/fleet/grpc_adapter/grpc_adapter_servicer.py +1 -0
  84. flwr/server/superlink/fleet/grpc_bidi/flower_service_servicer.py +1 -0
  85. flwr/server/superlink/fleet/grpc_rere/fleet_servicer.py +32 -12
  86. flwr/server/superlink/fleet/grpc_rere/server_interceptor.py +12 -11
  87. flwr/server/superlink/fleet/message_handler/message_handler.py +32 -5
  88. flwr/server/superlink/fleet/rest_rere/rest_api.py +4 -1
  89. flwr/server/superlink/fleet/vce/__init__.py +1 -0
  90. flwr/server/superlink/fleet/vce/backend/__init__.py +1 -0
  91. flwr/server/superlink/fleet/vce/backend/raybackend.py +1 -0
  92. flwr/server/superlink/linkstate/in_memory_linkstate.py +21 -30
  93. flwr/server/superlink/linkstate/linkstate.py +17 -2
  94. flwr/server/superlink/linkstate/sqlite_linkstate.py +30 -49
  95. flwr/server/superlink/simulation/simulationio_servicer.py +33 -0
  96. flwr/server/superlink/utils.py +65 -0
  97. flwr/simulation/app.py +59 -52
  98. flwr/simulation/ray_transport/ray_actor.py +1 -0
  99. flwr/simulation/ray_transport/utils.py +1 -0
  100. flwr/simulation/run_simulation.py +36 -22
  101. flwr/simulation/simulationio_connection.py +3 -0
  102. flwr/superexec/app.py +1 -0
  103. flwr/superexec/deployment.py +1 -0
  104. flwr/superexec/exec_grpc.py +19 -1
  105. flwr/superexec/exec_servicer.py +76 -2
  106. flwr/superexec/exec_user_auth_interceptor.py +101 -0
  107. flwr/superexec/executor.py +1 -0
  108. {flwr-1.13.0.dist-info → flwr-1.14.0.dist-info}/METADATA +8 -8
  109. {flwr-1.13.0.dist-info → flwr-1.14.0.dist-info}/RECORD +112 -112
  110. flwr/proto/common_pb2.py +0 -36
  111. flwr/proto/common_pb2.pyi +0 -121
  112. flwr/proto/common_pb2_grpc.py +0 -4
  113. flwr/proto/common_pb2_grpc.pyi +0 -4
  114. flwr/proto/control_pb2.py +0 -27
  115. flwr/proto/control_pb2.pyi +0 -7
  116. flwr/proto/control_pb2_grpc.py +0 -135
  117. flwr/proto/control_pb2_grpc.pyi +0 -53
  118. {flwr-1.13.0.dist-info → flwr-1.14.0.dist-info}/LICENSE +0 -0
  119. {flwr-1.13.0.dist-info → flwr-1.14.0.dist-info}/WHEEL +0 -0
  120. {flwr-1.13.0.dist-info → flwr-1.14.0.dist-info}/entry_points.txt +0 -0
@@ -0,0 +1,65 @@
1
+ # Copyright 2024 Flower Labs GmbH. All Rights Reserved.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+ # ==============================================================================
15
+ """SuperLink utilities."""
16
+
17
+
18
+ from typing import Union
19
+
20
+ import grpc
21
+
22
+ from flwr.common.constant import Status, SubStatus
23
+ from flwr.common.typing import RunStatus
24
+ from flwr.server.superlink.linkstate import LinkState
25
+
26
+ _STATUS_TO_MSG = {
27
+ Status.PENDING: "Run is pending.",
28
+ Status.STARTING: "Run is starting.",
29
+ Status.RUNNING: "Run is running.",
30
+ Status.FINISHED: "Run is finished.",
31
+ }
32
+
33
+
34
+ def check_abort(
35
+ run_id: int,
36
+ abort_status_list: list[str],
37
+ state: LinkState,
38
+ ) -> Union[str, None]:
39
+ """Check if the status of the provided `run_id` is in `abort_status_list`."""
40
+ run_status: RunStatus = state.get_run_status({run_id})[run_id]
41
+
42
+ if run_status.status in abort_status_list:
43
+ msg = _STATUS_TO_MSG[run_status.status]
44
+ if run_status.sub_status == SubStatus.STOPPED:
45
+ msg += " Stopped by user."
46
+ return msg
47
+
48
+ return None
49
+
50
+
51
+ def abort_grpc_context(msg: Union[str, None], context: grpc.ServicerContext) -> None:
52
+ """Abort context with statuscode PERMISSION_DENIED if `msg` is not None."""
53
+ if msg is not None:
54
+ context.abort(grpc.StatusCode.PERMISSION_DENIED, msg)
55
+
56
+
57
+ def abort_if(
58
+ run_id: int,
59
+ abort_status_list: list[str],
60
+ state: LinkState,
61
+ context: grpc.ServicerContext,
62
+ ) -> None:
63
+ """Abort context if status of the provided `run_id` is in `abort_status_list`."""
64
+ msg = check_abort(run_id, abort_status_list, state)
65
+ abort_grpc_context(msg, context)
flwr/simulation/app.py CHANGED
@@ -16,6 +16,7 @@
16
16
 
17
17
 
18
18
  import argparse
19
+ import sys
19
20
  from logging import DEBUG, ERROR, INFO
20
21
  from queue import Queue
21
22
  from time import sleep
@@ -23,8 +24,9 @@ from typing import Optional
23
24
 
24
25
  from flwr.cli.config_utils import get_fab_metadata
25
26
  from flwr.cli.install import install_from_fab
26
- from flwr.common import EventType
27
- from flwr.common.args import try_obtain_root_certificates
27
+ from flwr.cli.utils import get_sha256_hash
28
+ from flwr.common import EventType, event
29
+ from flwr.common.args import add_args_flwr_app_common
28
30
  from flwr.common.config import (
29
31
  get_flwr_dir,
30
32
  get_fused_config_from_dir,
@@ -32,7 +34,11 @@ from flwr.common.config import (
32
34
  get_project_dir,
33
35
  unflatten_dict,
34
36
  )
35
- from flwr.common.constant import Status, SubStatus
37
+ from flwr.common.constant import (
38
+ SIMULATIONIO_API_DEFAULT_CLIENT_ADDRESS,
39
+ Status,
40
+ SubStatus,
41
+ )
36
42
  from flwr.common.logger import (
37
43
  log,
38
44
  mirror_output_to_queue,
@@ -43,6 +49,7 @@ from flwr.common.logger import (
43
49
  from flwr.common.serde import (
44
50
  configs_record_from_proto,
45
51
  context_from_proto,
52
+ context_to_proto,
46
53
  fab_from_proto,
47
54
  run_from_proto,
48
55
  run_status_to_proto,
@@ -69,61 +76,30 @@ def flwr_simulation() -> None:
69
76
  log_queue: Queue[Optional[str]] = Queue()
70
77
  mirror_output_to_queue(log_queue)
71
78
 
72
- parser = argparse.ArgumentParser(
73
- description="Run a Flower Simulation",
74
- )
75
- parser.add_argument(
76
- "--superlink",
77
- type=str,
78
- help="Address of SuperLink's SimulationIO API",
79
- )
80
- parser.add_argument(
81
- "--run-once",
82
- action="store_true",
83
- help="When set, this process will start a single simulation "
84
- "for a pending Run. If no pending run the process will exit. ",
85
- )
86
- parser.add_argument(
87
- "--flwr-dir",
88
- default=None,
89
- help="""The path containing installed Flower Apps.
90
- By default, this value is equal to:
91
-
92
- - `$FLWR_HOME/` if `$FLWR_HOME` is defined
93
- - `$XDG_DATA_HOME/.flwr/` if `$XDG_DATA_HOME` is defined
94
- - `$HOME/.flwr/` in all other cases
95
- """,
96
- )
97
- parser.add_argument(
98
- "--insecure",
99
- action="store_true",
100
- help="Run the server without HTTPS, regardless of whether certificate "
101
- "paths are provided. By default, the server runs with HTTPS enabled. "
102
- "Use this flag only if you understand the risks.",
103
- )
104
- parser.add_argument(
105
- "--root-certificates",
106
- metavar="ROOT_CERT",
107
- type=str,
108
- help="Specifies the path to the PEM-encoded root certificate file for "
109
- "establishing secure HTTPS connections.",
110
- )
111
- args = parser.parse_args()
79
+ args = _parse_args_run_flwr_simulation().parse_args()
112
80
 
113
81
  log(INFO, "Starting Flower Simulation")
114
- certificates = try_obtain_root_certificates(args, args.superlink)
82
+
83
+ if not args.insecure:
84
+ log(
85
+ ERROR,
86
+ "`flwr-simulation` does not support TLS yet. "
87
+ "Please use the '--insecure' flag.",
88
+ )
89
+ sys.exit(1)
115
90
 
116
91
  log(
117
92
  DEBUG,
118
- "Staring isolated `Simulation` connected to SuperLink DriverAPI at %s",
119
- args.superlink,
93
+ "Starting isolated `Simulation` connected to SuperLink SimulationAppIo API "
94
+ "at %s",
95
+ args.simulationio_api_address,
120
96
  )
121
97
  run_simulation_process(
122
- simulationio_api_address=args.superlink,
98
+ simulationio_api_address=args.simulationio_api_address,
123
99
  log_queue=log_queue,
124
100
  run_once=args.run_once,
125
101
  flwr_dir_=args.flwr_dir,
126
- certificates=certificates,
102
+ certificates=None,
127
103
  )
128
104
 
129
105
  # Restore stdout/stderr
@@ -225,10 +201,19 @@ def run_simulation_process( # pylint: disable=R0914, disable=W0212, disable=R09
225
201
  )
226
202
  backend_config: BackendConfig = fed_opt.get("backend", {})
227
203
  verbose: bool = fed_opt.get("verbose", False)
228
- enable_tf_gpu_growth: bool = fed_opt.get("enable_tf_gpu_growth", True)
204
+ enable_tf_gpu_growth: bool = fed_opt.get("enable_tf_gpu_growth", False)
205
+
206
+ event(
207
+ EventType.FLWR_SIMULATION_RUN_ENTER,
208
+ event_details={
209
+ "backend": "ray",
210
+ "num-supernodes": num_supernodes,
211
+ "run-id-hash": get_sha256_hash(run.run_id),
212
+ },
213
+ )
229
214
 
230
215
  # Launch the simulation
231
- _run_simulation(
216
+ updated_context = _run_simulation(
232
217
  server_app_attr=server_app_attr,
233
218
  client_app_attr=client_app_attr,
234
219
  num_supernodes=num_supernodes,
@@ -239,11 +224,11 @@ def run_simulation_process( # pylint: disable=R0914, disable=W0212, disable=R09
239
224
  verbose_logging=verbose,
240
225
  server_app_run_config=fused_config,
241
226
  is_app=True,
242
- exit_event=EventType.CLI_FLOWER_SIMULATION_LEAVE,
227
+ exit_event=EventType.FLWR_SIMULATION_RUN_LEAVE,
243
228
  )
244
229
 
245
230
  # Send resulting context
246
- context_proto = None # context_to_proto(updated_context)
231
+ context_proto = context_to_proto(updated_context)
247
232
  out_req = PushSimulationOutputsRequest(
248
233
  run_id=run.run_id, context=context_proto
249
234
  )
@@ -274,3 +259,25 @@ def run_simulation_process( # pylint: disable=R0914, disable=W0212, disable=R09
274
259
  # Stop the loop if `flwr-simulation` is expected to process a single run
275
260
  if run_once:
276
261
  break
262
+
263
+
264
+ def _parse_args_run_flwr_simulation() -> argparse.ArgumentParser:
265
+ """Parse flwr-simulation command line arguments."""
266
+ parser = argparse.ArgumentParser(
267
+ description="Run a Flower Simulation",
268
+ )
269
+ parser.add_argument(
270
+ "--simulationio-api-address",
271
+ default=SIMULATIONIO_API_DEFAULT_CLIENT_ADDRESS,
272
+ type=str,
273
+ help="Address of SuperLink's SimulationIO API (IPv4, IPv6, or a domain name)."
274
+ f"By default, it is set to {SIMULATIONIO_API_DEFAULT_CLIENT_ADDRESS}.",
275
+ )
276
+ parser.add_argument(
277
+ "--run-once",
278
+ action="store_true",
279
+ help="When set, this process will start a single simulation "
280
+ "for a pending Run. If no pending run the process will exit. ",
281
+ )
282
+ add_args_flwr_app_common(parser=parser)
283
+ return parser
@@ -14,6 +14,7 @@
14
14
  # ==============================================================================
15
15
  """Ray-based Flower Actor and ActorPool implementation."""
16
16
 
17
+
17
18
  import threading
18
19
  from abc import ABC
19
20
  from logging import DEBUG, ERROR, WARNING
@@ -14,6 +14,7 @@
14
14
  # ==============================================================================
15
15
  """Utilities for Actors in the Virtual Client Engine."""
16
16
 
17
+
17
18
  import traceback
18
19
  import warnings
19
20
  from logging import ERROR
@@ -14,6 +14,7 @@
14
14
  # ==============================================================================
15
15
  """Flower Simulation."""
16
16
 
17
+
17
18
  import argparse
18
19
  import asyncio
19
20
  import json
@@ -23,10 +24,11 @@ import threading
23
24
  import traceback
24
25
  from logging import DEBUG, ERROR, INFO, WARNING
25
26
  from pathlib import Path
26
- from time import sleep
27
+ from queue import Empty, Queue
27
28
  from typing import Any, Optional
28
29
 
29
30
  from flwr.cli.config_utils import load_and_validate
31
+ from flwr.cli.utils import get_sha256_hash
30
32
  from flwr.client import ClientApp
31
33
  from flwr.common import Context, EventType, RecordSet, event, log, now
32
34
  from flwr.common.config import get_fused_config_from_dir, parse_config_args
@@ -126,7 +128,7 @@ def run_simulation_from_cli() -> None:
126
128
  run = Run.create_empty(run_id)
127
129
  run.override_config = override_config
128
130
 
129
- _run_simulation(
131
+ _ = _run_simulation(
130
132
  server_app_attr=server_app_attr,
131
133
  client_app_attr=client_app_attr,
132
134
  num_supernodes=args.num_supernodes,
@@ -135,7 +137,6 @@ def run_simulation_from_cli() -> None:
135
137
  app_dir=args.app,
136
138
  run=run,
137
139
  enable_tf_gpu_growth=args.enable_tf_gpu_growth,
138
- delay_start=args.delay_start,
139
140
  verbose_logging=args.verbose,
140
141
  server_app_run_config=fused_config,
141
142
  is_app=True,
@@ -207,7 +208,7 @@ def run_simulation(
207
208
  "\n\tflwr.simulation.run_simulationt(...)",
208
209
  )
209
210
 
210
- _run_simulation(
211
+ _ = _run_simulation(
211
212
  num_supernodes=num_supernodes,
212
213
  client_app=client_app,
213
214
  server_app=server_app,
@@ -230,6 +231,7 @@ def run_serverapp_th(
230
231
  has_exception: threading.Event,
231
232
  enable_tf_gpu_growth: bool,
232
233
  run_id: int,
234
+ ctx_queue: "Queue[Context]",
233
235
  ) -> threading.Thread:
234
236
  """Run SeverApp in a thread."""
235
237
 
@@ -242,6 +244,7 @@ def run_serverapp_th(
242
244
  _server_app_run_config: UserConfig,
243
245
  _server_app_attr: Optional[str],
244
246
  _server_app: Optional[ServerApp],
247
+ _ctx_queue: "Queue[Context]",
245
248
  ) -> None:
246
249
  """Run SeverApp, after check if GPU memory growth has to be set.
247
250
 
@@ -262,13 +265,14 @@ def run_serverapp_th(
262
265
  )
263
266
 
264
267
  # Run ServerApp
265
- _run(
268
+ updated_context = _run(
266
269
  driver=_driver,
267
270
  context=context,
268
271
  server_app_dir=_server_app_dir,
269
272
  server_app_attr=_server_app_attr,
270
273
  loaded_server_app=_server_app,
271
274
  )
275
+ _ctx_queue.put(updated_context)
272
276
  except Exception as ex: # pylint: disable=broad-exception-caught
273
277
  log(ERROR, "ServerApp thread raised an exception: %s", ex)
274
278
  log(ERROR, traceback.format_exc())
@@ -292,6 +296,7 @@ def run_serverapp_th(
292
296
  server_app_run_config,
293
297
  server_app_attr,
294
298
  server_app,
299
+ ctx_queue,
295
300
  ),
296
301
  )
297
302
  serverapp_th.start()
@@ -308,14 +313,13 @@ def _main_loop(
308
313
  enable_tf_gpu_growth: bool,
309
314
  run: Run,
310
315
  exit_event: EventType,
311
- delay_start: int,
312
316
  flwr_dir: Optional[str] = None,
313
317
  client_app: Optional[ClientApp] = None,
314
318
  client_app_attr: Optional[str] = None,
315
319
  server_app: Optional[ServerApp] = None,
316
320
  server_app_attr: Optional[str] = None,
317
321
  server_app_run_config: Optional[UserConfig] = None,
318
- ) -> None:
322
+ ) -> Context:
319
323
  """Start ServerApp on a separate thread, then launch Simulation Engine."""
320
324
  # Initialize StateFactory
321
325
  state_factory = LinkStateFactory(":flwr-in-memory-state:")
@@ -325,6 +329,13 @@ def _main_loop(
325
329
  server_app_thread_has_exception = threading.Event()
326
330
  serverapp_th = None
327
331
  success = True
332
+ updated_context = Context(
333
+ run_id=run.run_id,
334
+ node_id=0,
335
+ node_config=UserConfig(),
336
+ state=RecordSet(),
337
+ run_config=UserConfig(),
338
+ )
328
339
  try:
329
340
  # Register run
330
341
  log(DEBUG, "Pre-registering run with id %s", run.run_id)
@@ -339,6 +350,7 @@ def _main_loop(
339
350
  # Initialize Driver
340
351
  driver = InMemoryDriver(state_factory=state_factory)
341
352
  driver.set_run(run_id=run.run_id)
353
+ output_context_queue: "Queue[Context]" = Queue()
342
354
 
343
355
  # Get and run ServerApp thread
344
356
  serverapp_th = run_serverapp_th(
@@ -351,11 +363,9 @@ def _main_loop(
351
363
  has_exception=server_app_thread_has_exception,
352
364
  enable_tf_gpu_growth=enable_tf_gpu_growth,
353
365
  run_id=run.run_id,
366
+ ctx_queue=output_context_queue,
354
367
  )
355
368
 
356
- # Buffer time so the `ServerApp` in separate thread is ready
357
- log(DEBUG, "Buffer time delay: %ds", delay_start)
358
- sleep(delay_start)
359
369
  # Start Simulation Engine
360
370
  vce.start_vce(
361
371
  num_supernodes=num_supernodes,
@@ -371,6 +381,11 @@ def _main_loop(
371
381
  flwr_dir=flwr_dir,
372
382
  )
373
383
 
384
+ updated_context = output_context_queue.get(timeout=3)
385
+
386
+ except Empty:
387
+ log(DEBUG, "Queue timeout. No context received.")
388
+
374
389
  except Exception as ex:
375
390
  log(ERROR, "An exception occurred !! %s", ex)
376
391
  log(ERROR, traceback.format_exc())
@@ -380,13 +395,20 @@ def _main_loop(
380
395
  finally:
381
396
  # Trigger stop event
382
397
  f_stop.set()
383
- event(exit_event, event_details={"success": success})
398
+ event(
399
+ exit_event,
400
+ event_details={
401
+ "run-id-hash": get_sha256_hash(run.run_id),
402
+ "success": success,
403
+ },
404
+ )
384
405
  if serverapp_th:
385
406
  serverapp_th.join()
386
407
  if server_app_thread_has_exception.is_set():
387
408
  raise RuntimeError("Exception in ServerApp thread")
388
409
 
389
410
  log(DEBUG, "Stopping Simulation Engine now.")
411
+ return updated_context
390
412
 
391
413
 
392
414
  # pylint: disable=too-many-arguments,too-many-locals,too-many-positional-arguments
@@ -404,10 +426,9 @@ def _run_simulation(
404
426
  flwr_dir: Optional[str] = None,
405
427
  run: Optional[Run] = None,
406
428
  enable_tf_gpu_growth: bool = False,
407
- delay_start: int = 5,
408
429
  verbose_logging: bool = False,
409
430
  is_app: bool = False,
410
- ) -> None:
431
+ ) -> Context:
411
432
  """Launch the Simulation Engine."""
412
433
  if backend_config is None:
413
434
  backend_config = {}
@@ -459,7 +480,6 @@ def _run_simulation(
459
480
  enable_tf_gpu_growth,
460
481
  run,
461
482
  exit_event,
462
- delay_start,
463
483
  flwr_dir,
464
484
  client_app,
465
485
  client_app_attr,
@@ -487,7 +507,8 @@ def _run_simulation(
487
507
  # Set logger propagation to False to prevent duplicated log output in Colab.
488
508
  logger = set_logger_propagation(logger, False)
489
509
 
490
- _main_loop(*args)
510
+ updated_context = _main_loop(*args)
511
+ return updated_context
491
512
 
492
513
 
493
514
  def _parse_args_run_simulation() -> argparse.ArgumentParser:
@@ -537,13 +558,6 @@ def _parse_args_run_simulation() -> argparse.ArgumentParser:
537
558
  "Read more about how `tf.config.experimental.set_memory_growth()` works in "
538
559
  "the TensorFlow documentation: https://www.tensorflow.org/api/stable.",
539
560
  )
540
- parser.add_argument(
541
- "--delay-start",
542
- type=int,
543
- default=3,
544
- help="Buffer time (in seconds) to delay the start the simulation engine after "
545
- "the `ServerApp`, which runs in a separate thread, has been launched.",
546
- )
547
561
  parser.add_argument(
548
562
  "--verbose",
549
563
  action="store_true",
@@ -23,6 +23,7 @@ import grpc
23
23
  from flwr.common.constant import SIMULATIONIO_API_DEFAULT_CLIENT_ADDRESS
24
24
  from flwr.common.grpc import create_channel
25
25
  from flwr.common.logger import log
26
+ from flwr.common.retry_invoker import _make_simple_grpc_retry_invoker, _wrap_stub
26
27
  from flwr.proto.simulationio_pb2_grpc import SimulationIoStub # pylint: disable=E0611
27
28
 
28
29
 
@@ -48,6 +49,7 @@ class SimulationIoConnection:
48
49
  self._cert = root_certificates
49
50
  self._grpc_stub: Optional[SimulationIoStub] = None
50
51
  self._channel: Optional[grpc.Channel] = None
52
+ self._retry_invoker = _make_simple_grpc_retry_invoker()
51
53
 
52
54
  @property
53
55
  def _is_connected(self) -> bool:
@@ -72,6 +74,7 @@ class SimulationIoConnection:
72
74
  root_certificates=self._cert,
73
75
  )
74
76
  self._grpc_stub = SimulationIoStub(self._channel)
77
+ _wrap_stub(self._grpc_stub, self._retry_invoker)
75
78
  log(DEBUG, "[SimulationIO] Connected to %s", self._addr)
76
79
 
77
80
  def _disconnect(self) -> None:
flwr/superexec/app.py CHANGED
@@ -14,6 +14,7 @@
14
14
  # ==============================================================================
15
15
  """Flower SuperExec app."""
16
16
 
17
+
17
18
  import argparse
18
19
  import sys
19
20
  from logging import INFO
@@ -14,6 +14,7 @@
14
14
  # ==============================================================================
15
15
  """Deployment engine executor."""
16
16
 
17
+
17
18
  import hashlib
18
19
  from logging import ERROR, INFO
19
20
  from pathlib import Path
@@ -14,18 +14,22 @@
14
14
  # ==============================================================================
15
15
  """SuperExec gRPC API."""
16
16
 
17
+
18
+ from collections.abc import Sequence
17
19
  from logging import INFO
18
20
  from typing import Optional
19
21
 
20
22
  import grpc
21
23
 
22
24
  from flwr.common import GRPC_MAX_MESSAGE_LENGTH
25
+ from flwr.common.auth_plugin import ExecAuthPlugin
23
26
  from flwr.common.logger import log
24
27
  from flwr.common.typing import UserConfig
25
28
  from flwr.proto.exec_pb2_grpc import add_ExecServicer_to_server
26
29
  from flwr.server.superlink.ffs.ffs_factory import FfsFactory
27
30
  from flwr.server.superlink.fleet.grpc_bidi.grpc_server import generic_create_grpc_server
28
31
  from flwr.server.superlink.linkstate import LinkStateFactory
32
+ from flwr.superexec.exec_user_auth_interceptor import ExecUserAuthInterceptor
29
33
 
30
34
  from .exec_servicer import ExecServicer
31
35
  from .executor import Executor
@@ -39,6 +43,7 @@ def run_exec_api_grpc(
39
43
  ffs_factory: FfsFactory,
40
44
  certificates: Optional[tuple[bytes, bytes, bytes]],
41
45
  config: UserConfig,
46
+ auth_plugin: Optional[ExecAuthPlugin] = None,
42
47
  ) -> grpc.Server:
43
48
  """Run Exec API (gRPC, request-response)."""
44
49
  executor.set_config(config)
@@ -47,16 +52,29 @@ def run_exec_api_grpc(
47
52
  linkstate_factory=state_factory,
48
53
  ffs_factory=ffs_factory,
49
54
  executor=executor,
55
+ auth_plugin=auth_plugin,
50
56
  )
57
+ interceptors: Optional[Sequence[grpc.ServerInterceptor]] = None
58
+ if auth_plugin is not None:
59
+ interceptors = [ExecUserAuthInterceptor(auth_plugin)]
51
60
  exec_add_servicer_to_server_fn = add_ExecServicer_to_server
52
61
  exec_grpc_server = generic_create_grpc_server(
53
62
  servicer_and_add_fn=(exec_servicer, exec_add_servicer_to_server_fn),
54
63
  server_address=address,
55
64
  max_message_length=GRPC_MAX_MESSAGE_LENGTH,
56
65
  certificates=certificates,
66
+ interceptors=interceptors,
57
67
  )
58
68
 
59
- log(INFO, "Flower Deployment Engine: Starting Exec API on %s", address)
69
+ if auth_plugin is None:
70
+ log(INFO, "Flower Deployment Engine: Starting Exec API on %s", address)
71
+ else:
72
+ log(
73
+ INFO,
74
+ "Flower Deployment Engine: Starting Exec API with user "
75
+ "authentication on %s",
76
+ address,
77
+ )
60
78
  exec_grpc_server.start()
61
79
 
62
80
  return exec_grpc_server
@@ -18,24 +18,33 @@
18
18
  import time
19
19
  from collections.abc import Generator
20
20
  from logging import ERROR, INFO
21
- from typing import Any
21
+ from typing import Any, Optional
22
+ from uuid import UUID
22
23
 
23
24
  import grpc
24
25
 
25
26
  from flwr.common import now
26
- from flwr.common.constant import LOG_STREAM_INTERVAL, Status
27
+ from flwr.common.auth_plugin import ExecAuthPlugin
28
+ from flwr.common.constant import LOG_STREAM_INTERVAL, Status, SubStatus
27
29
  from flwr.common.logger import log
28
30
  from flwr.common.serde import (
29
31
  configs_record_from_proto,
30
32
  run_to_proto,
31
33
  user_config_from_proto,
32
34
  )
35
+ from flwr.common.typing import RunStatus
33
36
  from flwr.proto import exec_pb2_grpc # pylint: disable=E0611
34
37
  from flwr.proto.exec_pb2 import ( # pylint: disable=E0611
38
+ GetAuthTokensRequest,
39
+ GetAuthTokensResponse,
40
+ GetLoginDetailsRequest,
41
+ GetLoginDetailsResponse,
35
42
  ListRunsRequest,
36
43
  ListRunsResponse,
37
44
  StartRunRequest,
38
45
  StartRunResponse,
46
+ StopRunRequest,
47
+ StopRunResponse,
39
48
  StreamLogsRequest,
40
49
  StreamLogsResponse,
41
50
  )
@@ -53,11 +62,13 @@ class ExecServicer(exec_pb2_grpc.ExecServicer):
53
62
  linkstate_factory: LinkStateFactory,
54
63
  ffs_factory: FfsFactory,
55
64
  executor: Executor,
65
+ auth_plugin: Optional[ExecAuthPlugin] = None,
56
66
  ) -> None:
57
67
  self.linkstate_factory = linkstate_factory
58
68
  self.ffs_factory = ffs_factory
59
69
  self.executor = executor
60
70
  self.executor.initialize(linkstate_factory, ffs_factory)
71
+ self.auth_plugin = auth_plugin
61
72
 
62
73
  def StartRun(
63
74
  self, request: StartRunRequest, context: grpc.ServicerContext
@@ -126,6 +137,69 @@ class ExecServicer(exec_pb2_grpc.ExecServicer):
126
137
  # Handle `flwr ls --run-id <run_id>`
127
138
  return _create_list_runs_response({request.run_id}, state)
128
139
 
140
+ def StopRun(
141
+ self, request: StopRunRequest, context: grpc.ServicerContext
142
+ ) -> StopRunResponse:
143
+ """Stop a given run ID."""
144
+ log(INFO, "ExecServicer.StopRun")
145
+ state = self.linkstate_factory.state()
146
+
147
+ # Exit if `run_id` not found
148
+ if not state.get_run(request.run_id):
149
+ context.abort(
150
+ grpc.StatusCode.NOT_FOUND, f"Run ID {request.run_id} not found"
151
+ )
152
+
153
+ run_status = state.get_run_status({request.run_id})[request.run_id]
154
+ if run_status.status == Status.FINISHED:
155
+ context.abort(
156
+ grpc.StatusCode.FAILED_PRECONDITION,
157
+ f"Run ID {request.run_id} is already finished",
158
+ )
159
+
160
+ update_success = state.update_run_status(
161
+ run_id=request.run_id,
162
+ new_status=RunStatus(Status.FINISHED, SubStatus.STOPPED, ""),
163
+ )
164
+
165
+ if update_success:
166
+ task_ids: set[UUID] = state.get_task_ids_from_run_id(request.run_id)
167
+
168
+ # Delete TaskIns and TaskRes for the `run_id`
169
+ state.delete_tasks(task_ids)
170
+
171
+ return StopRunResponse(success=update_success)
172
+
173
+ def GetLoginDetails(
174
+ self, request: GetLoginDetailsRequest, context: grpc.ServicerContext
175
+ ) -> GetLoginDetailsResponse:
176
+ """Start login."""
177
+ log(INFO, "ExecServicer.GetLoginDetails")
178
+ if self.auth_plugin is None:
179
+ context.abort(
180
+ grpc.StatusCode.UNIMPLEMENTED,
181
+ "ExecServicer initialized without user authentication",
182
+ )
183
+ raise grpc.RpcError() # This line is unreachable
184
+ return GetLoginDetailsResponse(
185
+ login_details=self.auth_plugin.get_login_details()
186
+ )
187
+
188
+ def GetAuthTokens(
189
+ self, request: GetAuthTokensRequest, context: grpc.ServicerContext
190
+ ) -> GetAuthTokensResponse:
191
+ """Get auth token."""
192
+ log(INFO, "ExecServicer.GetAuthTokens")
193
+ if self.auth_plugin is None:
194
+ context.abort(
195
+ grpc.StatusCode.UNIMPLEMENTED,
196
+ "ExecServicer initialized without user authentication",
197
+ )
198
+ raise grpc.RpcError() # This line is unreachable
199
+ return GetAuthTokensResponse(
200
+ auth_tokens=self.auth_plugin.get_auth_tokens(dict(request.auth_details))
201
+ )
202
+
129
203
 
130
204
  def _create_list_runs_response(run_ids: set[int], state: LinkState) -> ListRunsResponse:
131
205
  """Create response for `flwr ls --runs` and `flwr ls --run-id <run_id>`."""