flwr-nightly 1.13.0.dev20241021__py3-none-any.whl → 1.13.0.dev20241111__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of flwr-nightly might be problematic. Click here for more details.

Files changed (92) hide show
  1. flwr/cli/build.py +2 -2
  2. flwr/cli/config_utils.py +97 -0
  3. flwr/cli/log.py +63 -97
  4. flwr/cli/new/templates/app/code/flwr_tune/dataset.py.tpl +1 -1
  5. flwr/cli/new/templates/app/pyproject.flowertune.toml.tpl +1 -0
  6. flwr/cli/new/templates/app/pyproject.tensorflow.toml.tpl +1 -1
  7. flwr/cli/run/run.py +34 -88
  8. flwr/client/app.py +23 -20
  9. flwr/client/clientapp/app.py +22 -18
  10. flwr/client/nodestate/__init__.py +25 -0
  11. flwr/client/nodestate/in_memory_nodestate.py +38 -0
  12. flwr/client/nodestate/nodestate.py +30 -0
  13. flwr/client/nodestate/nodestate_factory.py +37 -0
  14. flwr/client/{node_state.py → run_info_store.py} +4 -3
  15. flwr/client/supernode/app.py +6 -8
  16. flwr/common/args.py +83 -0
  17. flwr/common/config.py +10 -0
  18. flwr/common/constant.py +39 -5
  19. flwr/common/context.py +9 -4
  20. flwr/common/date.py +3 -3
  21. flwr/common/logger.py +108 -1
  22. flwr/common/object_ref.py +47 -16
  23. flwr/common/serde.py +24 -0
  24. flwr/common/telemetry.py +0 -6
  25. flwr/common/typing.py +10 -1
  26. flwr/proto/exec_pb2.py +14 -17
  27. flwr/proto/exec_pb2.pyi +14 -22
  28. flwr/proto/log_pb2.py +29 -0
  29. flwr/proto/log_pb2.pyi +39 -0
  30. flwr/proto/log_pb2_grpc.py +4 -0
  31. flwr/proto/log_pb2_grpc.pyi +4 -0
  32. flwr/proto/message_pb2.py +8 -8
  33. flwr/proto/message_pb2.pyi +4 -1
  34. flwr/proto/run_pb2.py +32 -27
  35. flwr/proto/run_pb2.pyi +26 -0
  36. flwr/proto/serverappio_pb2.py +52 -0
  37. flwr/proto/{driver_pb2.pyi → serverappio_pb2.pyi} +54 -0
  38. flwr/proto/serverappio_pb2_grpc.py +376 -0
  39. flwr/proto/serverappio_pb2_grpc.pyi +147 -0
  40. flwr/proto/simulationio_pb2.py +38 -0
  41. flwr/proto/simulationio_pb2.pyi +65 -0
  42. flwr/proto/simulationio_pb2_grpc.py +205 -0
  43. flwr/proto/simulationio_pb2_grpc.pyi +81 -0
  44. flwr/server/app.py +272 -105
  45. flwr/server/driver/driver.py +15 -1
  46. flwr/server/driver/grpc_driver.py +25 -36
  47. flwr/server/driver/inmemory_driver.py +6 -16
  48. flwr/server/run_serverapp.py +29 -23
  49. flwr/server/{superlink/state → serverapp}/__init__.py +3 -9
  50. flwr/server/serverapp/app.py +214 -0
  51. flwr/server/strategy/aggregate.py +4 -4
  52. flwr/server/strategy/fedadam.py +11 -1
  53. flwr/server/superlink/driver/__init__.py +1 -1
  54. flwr/server/superlink/driver/{driver_grpc.py → serverappio_grpc.py} +19 -16
  55. flwr/server/superlink/driver/{driver_servicer.py → serverappio_servicer.py} +125 -39
  56. flwr/server/superlink/fleet/grpc_adapter/grpc_adapter_servicer.py +4 -2
  57. flwr/server/superlink/fleet/grpc_bidi/grpc_server.py +2 -2
  58. flwr/server/superlink/fleet/grpc_rere/fleet_servicer.py +4 -2
  59. flwr/server/superlink/fleet/grpc_rere/server_interceptor.py +2 -2
  60. flwr/server/superlink/fleet/message_handler/message_handler.py +7 -7
  61. flwr/server/superlink/fleet/rest_rere/rest_api.py +7 -7
  62. flwr/server/superlink/fleet/vce/vce_api.py +23 -23
  63. flwr/server/superlink/linkstate/__init__.py +28 -0
  64. flwr/server/superlink/{state/in_memory_state.py → linkstate/in_memory_linkstate.py} +184 -36
  65. flwr/server/superlink/{state/state.py → linkstate/linkstate.py} +149 -19
  66. flwr/server/superlink/{state/state_factory.py → linkstate/linkstate_factory.py} +9 -9
  67. flwr/server/superlink/{state/sqlite_state.py → linkstate/sqlite_linkstate.py} +306 -65
  68. flwr/server/superlink/{state → linkstate}/utils.py +81 -30
  69. flwr/server/superlink/simulation/__init__.py +15 -0
  70. flwr/server/superlink/simulation/simulationio_grpc.py +65 -0
  71. flwr/server/superlink/simulation/simulationio_servicer.py +153 -0
  72. flwr/simulation/__init__.py +5 -1
  73. flwr/simulation/app.py +273 -345
  74. flwr/simulation/legacy_app.py +382 -0
  75. flwr/simulation/ray_transport/ray_client_proxy.py +2 -2
  76. flwr/simulation/run_simulation.py +57 -131
  77. flwr/simulation/simulationio_connection.py +86 -0
  78. flwr/superexec/app.py +6 -134
  79. flwr/superexec/deployment.py +61 -66
  80. flwr/superexec/exec_grpc.py +15 -8
  81. flwr/superexec/exec_servicer.py +36 -65
  82. flwr/superexec/executor.py +26 -7
  83. flwr/superexec/simulation.py +54 -107
  84. {flwr_nightly-1.13.0.dev20241021.dist-info → flwr_nightly-1.13.0.dev20241111.dist-info}/METADATA +5 -4
  85. {flwr_nightly-1.13.0.dev20241021.dist-info → flwr_nightly-1.13.0.dev20241111.dist-info}/RECORD +88 -69
  86. {flwr_nightly-1.13.0.dev20241021.dist-info → flwr_nightly-1.13.0.dev20241111.dist-info}/entry_points.txt +2 -0
  87. flwr/client/node_state_tests.py +0 -66
  88. flwr/proto/driver_pb2.py +0 -42
  89. flwr/proto/driver_pb2_grpc.py +0 -239
  90. flwr/proto/driver_pb2_grpc.pyi +0 -94
  91. {flwr_nightly-1.13.0.dev20241021.dist-info → flwr_nightly-1.13.0.dev20241111.dist-info}/LICENSE +0 -0
  92. {flwr_nightly-1.13.0.dev20241021.dist-info → flwr_nightly-1.13.0.dev20241111.dist-info}/WHEEL +0 -0
@@ -15,23 +15,21 @@
15
15
  """Deployment engine executor."""
16
16
 
17
17
  import hashlib
18
- import subprocess
19
18
  from logging import ERROR, INFO
20
19
  from pathlib import Path
21
20
  from typing import Optional
22
21
 
23
22
  from typing_extensions import override
24
23
 
25
- from flwr.cli.install import install_from_fab
26
- from flwr.common.constant import DRIVER_API_DEFAULT_ADDRESS
27
- from flwr.common.grpc import create_channel
24
+ from flwr.common import ConfigsRecord, Context, RecordSet
25
+ from flwr.common.constant import SERVERAPPIO_API_DEFAULT_ADDRESS, Status, SubStatus
28
26
  from flwr.common.logger import log
29
- from flwr.common.serde import fab_to_proto, user_config_to_proto
30
- from flwr.common.typing import Fab, UserConfig
31
- from flwr.proto.driver_pb2_grpc import DriverStub
32
- from flwr.proto.run_pb2 import CreateRunRequest # pylint: disable=E0611
27
+ from flwr.common.typing import Fab, RunStatus, UserConfig
28
+ from flwr.server.superlink.ffs import Ffs
29
+ from flwr.server.superlink.ffs.ffs_factory import FfsFactory
30
+ from flwr.server.superlink.linkstate import LinkState, LinkStateFactory
33
31
 
34
- from .executor import Executor, RunTracker
32
+ from .executor import Executor
35
33
 
36
34
 
37
35
  class DeploymentEngine(Executor):
@@ -50,7 +48,7 @@ class DeploymentEngine(Executor):
50
48
 
51
49
  def __init__(
52
50
  self,
53
- superlink: str = DRIVER_API_DEFAULT_ADDRESS,
51
+ superlink: str = SERVERAPPIO_API_DEFAULT_ADDRESS,
54
52
  root_certificates: Optional[str] = None,
55
53
  flwr_dir: Optional[str] = None,
56
54
  ) -> None:
@@ -62,7 +60,30 @@ class DeploymentEngine(Executor):
62
60
  self.root_certificates = root_certificates
63
61
  self.root_certificates_bytes = Path(root_certificates).read_bytes()
64
62
  self.flwr_dir = flwr_dir
65
- self.stub: Optional[DriverStub] = None
63
+ self.linkstate_factory: Optional[LinkStateFactory] = None
64
+ self.ffs_factory: Optional[FfsFactory] = None
65
+
66
+ @override
67
+ def initialize(
68
+ self, linkstate_factory: LinkStateFactory, ffs_factory: FfsFactory
69
+ ) -> None:
70
+ """Initialize the executor with the necessary factories."""
71
+ self.linkstate_factory = linkstate_factory
72
+ self.ffs_factory = ffs_factory
73
+
74
+ @property
75
+ def linkstate(self) -> LinkState:
76
+ """Return the LinkState."""
77
+ if self.linkstate_factory is None:
78
+ raise RuntimeError("Executor is not initialized.")
79
+ return self.linkstate_factory.state()
80
+
81
+ @property
82
+ def ffs(self) -> Ffs:
83
+ """Return the Flower File Storage (FFS)."""
84
+ if self.ffs_factory is None:
85
+ raise RuntimeError("Executor is not initialized.")
86
+ return self.ffs_factory.ffs()
66
87
 
67
88
  @override
68
89
  def set_config(
@@ -77,7 +98,7 @@ class DeploymentEngine(Executor):
77
98
  A dictionary for configuration values.
78
99
  Supported configuration key/value pairs:
79
100
  - "superlink": str
80
- The address of the SuperLink Driver API.
101
+ The address of the SuperLink ServerAppIo API.
81
102
  - "root-certificates": str
82
103
  The path to the root certificates.
83
104
  - "flwr-dir": str
@@ -101,85 +122,59 @@ class DeploymentEngine(Executor):
101
122
  raise ValueError("The `flwr-dir` value should be of type `str`.")
102
123
  self.flwr_dir = str(flwr_dir)
103
124
 
104
- def _connect(self) -> None:
105
- if self.stub is not None:
106
- return
107
- channel = create_channel(
108
- server_address=self.superlink,
109
- insecure=(self.root_certificates_bytes is None),
110
- root_certificates=self.root_certificates_bytes,
111
- )
112
- self.stub = DriverStub(channel)
113
-
114
125
  def _create_run(
115
126
  self,
116
127
  fab: Fab,
117
128
  override_config: UserConfig,
118
129
  ) -> int:
119
- if self.stub is None:
120
- self._connect()
130
+ fab_hash = self.ffs.put(fab.content, {})
131
+ if fab_hash != fab.hash_str:
132
+ raise RuntimeError(
133
+ f"FAB ({fab.hash_str}) hash from request doesn't match contents"
134
+ )
121
135
 
122
- assert self.stub is not None
136
+ run_id = self.linkstate.create_run(
137
+ None, None, fab_hash, override_config, ConfigsRecord()
138
+ )
139
+ return run_id
123
140
 
124
- req = CreateRunRequest(
125
- fab=fab_to_proto(fab),
126
- override_config=user_config_to_proto(override_config),
141
+ def _create_context(self, run_id: int) -> None:
142
+ """Register a Context for a Run."""
143
+ # Create an empty context for the Run
144
+ context = Context(
145
+ run_id=run_id, node_id=0, node_config={}, state=RecordSet(), run_config={}
127
146
  )
128
- res = self.stub.CreateRun(request=req)
129
- return int(res.run_id)
147
+
148
+ # Register the context at the LinkState
149
+ self.linkstate.set_serverapp_context(run_id=run_id, context=context)
130
150
 
131
151
  @override
132
152
  def start_run(
133
153
  self,
134
154
  fab_file: bytes,
135
155
  override_config: UserConfig,
136
- federation_config: UserConfig,
137
- ) -> Optional[RunTracker]:
156
+ federation_options: ConfigsRecord,
157
+ ) -> Optional[int]:
138
158
  """Start run using the Flower Deployment Engine."""
159
+ run_id = None
139
160
  try:
140
- # Install FAB to flwr dir
141
- install_from_fab(fab_file, None, True)
142
161
 
143
162
  # Call SuperLink to create run
144
- run_id: int = self._create_run(
163
+ run_id = self._create_run(
145
164
  Fab(hashlib.sha256(fab_file).hexdigest(), fab_file), override_config
146
165
  )
147
- log(INFO, "Created run %s", str(run_id))
148
166
 
149
- command = [
150
- "flower-server-app",
151
- "--run-id",
152
- str(run_id),
153
- "--superlink",
154
- str(self.superlink),
155
- ]
156
-
157
- if self.flwr_dir:
158
- command.append("--flwr-dir")
159
- command.append(self.flwr_dir)
160
-
161
- if self.root_certificates is None:
162
- command.append("--insecure")
163
- else:
164
- command.append("--root-certificates")
165
- command.append(self.root_certificates)
166
-
167
- # Execute the command
168
- proc = subprocess.Popen( # pylint: disable=consider-using-with
169
- command,
170
- stdout=subprocess.PIPE,
171
- stderr=subprocess.PIPE,
172
- text=True,
173
- )
174
- log(INFO, "Started run %s", str(run_id))
167
+ # Register context for the Run
168
+ self._create_context(run_id=run_id)
169
+ log(INFO, "Created run %s", str(run_id))
175
170
 
176
- return RunTracker(
177
- run_id=run_id,
178
- proc=proc,
179
- )
171
+ return run_id
180
172
  # pylint: disable-next=broad-except
181
173
  except Exception as e:
182
174
  log(ERROR, "Could not start run: %s", str(e))
175
+ if run_id:
176
+ run_status = RunStatus(Status.FINISHED, SubStatus.FAILED, str(e))
177
+ self.linkstate.update_run_status(run_id, new_status=run_status)
183
178
  return None
184
179
 
185
180
 
@@ -23,33 +23,40 @@ from flwr.common import GRPC_MAX_MESSAGE_LENGTH
23
23
  from flwr.common.logger import log
24
24
  from flwr.common.typing import UserConfig
25
25
  from flwr.proto.exec_pb2_grpc import add_ExecServicer_to_server
26
+ from flwr.server.superlink.ffs.ffs_factory import FfsFactory
26
27
  from flwr.server.superlink.fleet.grpc_bidi.grpc_server import generic_create_grpc_server
28
+ from flwr.server.superlink.linkstate import LinkStateFactory
27
29
 
28
30
  from .exec_servicer import ExecServicer
29
31
  from .executor import Executor
30
32
 
31
33
 
32
- def run_superexec_api_grpc(
34
+ # pylint: disable-next=too-many-arguments, too-many-positional-arguments
35
+ def run_exec_api_grpc(
33
36
  address: str,
34
37
  executor: Executor,
38
+ state_factory: LinkStateFactory,
39
+ ffs_factory: FfsFactory,
35
40
  certificates: Optional[tuple[bytes, bytes, bytes]],
36
41
  config: UserConfig,
37
42
  ) -> grpc.Server:
38
- """Run SuperExec API (gRPC, request-response)."""
43
+ """Run Exec API (gRPC, request-response)."""
39
44
  executor.set_config(config)
40
45
 
41
46
  exec_servicer: grpc.Server = ExecServicer(
47
+ linkstate_factory=state_factory,
48
+ ffs_factory=ffs_factory,
42
49
  executor=executor,
43
50
  )
44
- superexec_add_servicer_to_server_fn = add_ExecServicer_to_server
45
- superexec_grpc_server = generic_create_grpc_server(
46
- servicer_and_add_fn=(exec_servicer, superexec_add_servicer_to_server_fn),
51
+ exec_add_servicer_to_server_fn = add_ExecServicer_to_server
52
+ exec_grpc_server = generic_create_grpc_server(
53
+ servicer_and_add_fn=(exec_servicer, exec_add_servicer_to_server_fn),
47
54
  server_address=address,
48
55
  max_message_length=GRPC_MAX_MESSAGE_LENGTH,
49
56
  certificates=certificates,
50
57
  )
51
58
 
52
- log(INFO, "Starting Flower SuperExec gRPC server on %s", address)
53
- superexec_grpc_server.start()
59
+ log(INFO, "Flower Deployment Engine: Starting Exec API on %s", address)
60
+ exec_grpc_server.start()
54
61
 
55
- return superexec_grpc_server
62
+ return exec_grpc_server
@@ -15,9 +15,6 @@
15
15
  """SuperExec API servicer."""
16
16
 
17
17
 
18
- import select
19
- import sys
20
- import threading
21
18
  import time
22
19
  from collections.abc import Generator
23
20
  from logging import ERROR, INFO
@@ -25,8 +22,9 @@ from typing import Any
25
22
 
26
23
  import grpc
27
24
 
25
+ from flwr.common.constant import LOG_STREAM_INTERVAL, Status
28
26
  from flwr.common.logger import log
29
- from flwr.common.serde import user_config_from_proto
27
+ from flwr.common.serde import configs_record_from_proto, user_config_from_proto
30
28
  from flwr.proto import exec_pb2_grpc # pylint: disable=E0611
31
29
  from flwr.proto.exec_pb2 import ( # pylint: disable=E0611
32
30
  StartRunRequest,
@@ -34,18 +32,25 @@ from flwr.proto.exec_pb2 import ( # pylint: disable=E0611
34
32
  StreamLogsRequest,
35
33
  StreamLogsResponse,
36
34
  )
35
+ from flwr.server.superlink.ffs.ffs_factory import FfsFactory
36
+ from flwr.server.superlink.linkstate import LinkStateFactory
37
37
 
38
- from .executor import Executor, RunTracker
39
-
40
- SELECT_TIMEOUT = 1 # Timeout for selecting ready-to-read file descriptors (in seconds)
38
+ from .executor import Executor
41
39
 
42
40
 
43
41
  class ExecServicer(exec_pb2_grpc.ExecServicer):
44
42
  """SuperExec API servicer."""
45
43
 
46
- def __init__(self, executor: Executor) -> None:
44
+ def __init__(
45
+ self,
46
+ linkstate_factory: LinkStateFactory,
47
+ ffs_factory: FfsFactory,
48
+ executor: Executor,
49
+ ) -> None:
50
+ self.linkstate_factory = linkstate_factory
51
+ self.ffs_factory = ffs_factory
47
52
  self.executor = executor
48
- self.runs: dict[int, RunTracker] = {}
53
+ self.executor.initialize(linkstate_factory, ffs_factory)
49
54
 
50
55
  def StartRun(
51
56
  self, request: StartRunRequest, context: grpc.ServicerContext
@@ -53,84 +58,50 @@ class ExecServicer(exec_pb2_grpc.ExecServicer):
53
58
  """Create run ID."""
54
59
  log(INFO, "ExecServicer.StartRun")
55
60
 
56
- run = self.executor.start_run(
61
+ run_id = self.executor.start_run(
57
62
  request.fab.content,
58
63
  user_config_from_proto(request.override_config),
59
- user_config_from_proto(request.federation_config),
64
+ configs_record_from_proto(request.federation_options),
60
65
  )
61
66
 
62
- if run is None:
67
+ if run_id is None:
63
68
  log(ERROR, "Executor failed to start run")
64
69
  return StartRunResponse()
65
70
 
66
- self.runs[run.run_id] = run
67
-
68
- # Start a background thread to capture the log output
69
- capture_thread = threading.Thread(
70
- target=_capture_logs, args=(run,), daemon=True
71
- )
72
- capture_thread.start()
73
-
74
- return StartRunResponse(run_id=run.run_id)
71
+ return StartRunResponse(run_id=run_id)
75
72
 
76
73
  def StreamLogs( # pylint: disable=C0103
77
74
  self, request: StreamLogsRequest, context: grpc.ServicerContext
78
75
  ) -> Generator[StreamLogsResponse, Any, None]:
79
76
  """Get logs."""
80
77
  log(INFO, "ExecServicer.StreamLogs")
78
+ state = self.linkstate_factory.state()
79
+
80
+ # Retrieve run ID
81
+ run_id = request.run_id
81
82
 
82
83
  # Exit if `run_id` not found
83
- if request.run_id not in self.runs:
84
+ if not state.get_run(run_id):
84
85
  context.abort(grpc.StatusCode.NOT_FOUND, "Run ID not found")
85
86
 
86
- last_sent_index = 0
87
+ after_timestamp = request.after_timestamp + 1e-6
87
88
  while context.is_active():
88
- # Yield n'th row of logs, if n'th row < len(logs)
89
- logs = self.runs[request.run_id].logs
90
- for i in range(last_sent_index, len(logs)):
91
- yield StreamLogsResponse(log_output=logs[i])
92
- last_sent_index = len(logs)
89
+ log_msg, latest_timestamp = state.get_serverapp_log(run_id, after_timestamp)
90
+ if log_msg:
91
+ yield StreamLogsResponse(
92
+ log_output=log_msg,
93
+ latest_timestamp=latest_timestamp,
94
+ )
95
+ # Add a small epsilon to the latest timestamp to avoid getting
96
+ # the same log
97
+ after_timestamp = max(latest_timestamp + 1e-6, after_timestamp)
93
98
 
94
99
  # Wait for and continue to yield more log responses only if the
95
100
  # run isn't completed yet. If the run is finished, the entire log
96
101
  # is returned at this point and the server ends the stream.
97
- if self.runs[request.run_id].proc.poll() is not None:
102
+ run_status = state.get_run_status({run_id})[run_id]
103
+ if run_status.status == Status.FINISHED:
98
104
  log(INFO, "All logs for run ID `%s` returned", request.run_id)
99
- context.set_code(grpc.StatusCode.OK)
100
105
  context.cancel()
101
106
 
102
- time.sleep(1.0) # Sleep briefly to avoid busy waiting
103
-
104
-
105
- def _capture_logs(
106
- run: RunTracker,
107
- ) -> None:
108
- while True:
109
- # Explicitly check if Popen.poll() is None. Required for `pytest`.
110
- if run.proc.poll() is None:
111
- # Select streams only when ready to read
112
- ready_to_read, _, _ = select.select(
113
- [run.proc.stdout, run.proc.stderr],
114
- [],
115
- [],
116
- SELECT_TIMEOUT,
117
- )
118
- # Read from std* and append to RunTracker.logs
119
- for stream in ready_to_read:
120
- # Flush stdout to view output in real time
121
- readline = stream.readline()
122
- sys.stdout.write(readline)
123
- sys.stdout.flush()
124
- # Append to logs
125
- line = readline.rstrip()
126
- if line:
127
- run.logs.append(f"{line}")
128
-
129
- # Close std* to prevent blocking
130
- elif run.proc.poll() is not None:
131
- log(INFO, "Subprocess finished, exiting log capture")
132
- if run.proc.stdout:
133
- run.proc.stdout.close()
134
- if run.proc.stderr:
135
- run.proc.stderr.close()
136
- break
107
+ time.sleep(LOG_STREAM_INTERVAL) # Sleep briefly to avoid busy waiting
@@ -19,7 +19,10 @@ from dataclasses import dataclass, field
19
19
  from subprocess import Popen
20
20
  from typing import Optional
21
21
 
22
+ from flwr.common import ConfigsRecord
22
23
  from flwr.common.typing import UserConfig
24
+ from flwr.server.superlink.ffs.ffs_factory import FfsFactory
25
+ from flwr.server.superlink.linkstate import LinkStateFactory
23
26
 
24
27
 
25
28
  @dataclass
@@ -34,6 +37,23 @@ class RunTracker:
34
37
  class Executor(ABC):
35
38
  """Execute and monitor a Flower run."""
36
39
 
40
+ @abstractmethod
41
+ def initialize(
42
+ self, linkstate_factory: LinkStateFactory, ffs_factory: FfsFactory
43
+ ) -> None:
44
+ """Initialize the executor with the necessary factories.
45
+
46
+ This method sets up the executor by providing it with the factories required
47
+ to access the LinkState and the Flower File Storage (FFS) in the SuperLink.
48
+
49
+ Parameters
50
+ ----------
51
+ linkstate_factory : LinkStateFactory
52
+ The factory to create access to the LinkState.
53
+ ffs_factory : FfsFactory
54
+ The factory to create access to the Flower File Storage (FFS).
55
+ """
56
+
37
57
  @abstractmethod
38
58
  def set_config(
39
59
  self,
@@ -52,8 +72,8 @@ class Executor(ABC):
52
72
  self,
53
73
  fab_file: bytes,
54
74
  override_config: UserConfig,
55
- federation_config: UserConfig,
56
- ) -> Optional[RunTracker]:
75
+ federation_options: ConfigsRecord,
76
+ ) -> Optional[int]:
57
77
  """Start a run using the given Flower FAB ID and version.
58
78
 
59
79
  This method creates a new run on the SuperLink, returns its run_id
@@ -65,12 +85,11 @@ class Executor(ABC):
65
85
  The Flower App Bundle file bytes.
66
86
  override_config: UserConfig
67
87
  The config overrides dict sent by the user (using `flwr run`).
68
- federation_config: UserConfig
69
- The federation options dict sent by the user (using `flwr run`).
88
+ federation_options: ConfigsRecord
89
+ The federation options sent by the user (using `flwr run`).
70
90
 
71
91
  Returns
72
92
  -------
73
- run_id : Optional[RunTracker]
74
- The run_id and the associated process of the run created by the SuperLink,
75
- or `None` if it fails.
93
+ run_id : Optional[int]
94
+ The run_id of the run created by the SuperLink, or `None` if it fails.
76
95
  """
@@ -15,42 +15,20 @@
15
15
  """Simulation engine executor."""
16
16
 
17
17
 
18
- import json
19
- import subprocess
20
- import sys
21
- from logging import ERROR, INFO, WARN
18
+ import hashlib
19
+ from logging import ERROR, INFO
22
20
  from typing import Optional
23
21
 
24
22
  from typing_extensions import override
25
23
 
26
- from flwr.cli.config_utils import load_and_validate
27
- from flwr.cli.install import install_from_fab
28
- from flwr.common.config import unflatten_dict
29
- from flwr.common.constant import RUN_ID_NUM_BYTES
24
+ from flwr.common import ConfigsRecord, Context, RecordSet
30
25
  from flwr.common.logger import log
31
- from flwr.common.typing import UserConfig
32
- from flwr.server.superlink.state.utils import generate_rand_int_from_bytes
33
-
34
- from .executor import Executor, RunTracker
35
-
36
-
37
- def _user_config_to_str(user_config: UserConfig) -> str:
38
- """Convert override user config to string."""
39
- user_config_list_str = []
40
- for key, value in user_config.items():
41
- if isinstance(value, bool):
42
- user_config_list_str.append(f"{key}={str(value).lower()}")
43
- elif isinstance(value, (int, float)):
44
- user_config_list_str.append(f"{key}={value}")
45
- elif isinstance(value, str):
46
- user_config_list_str.append(f'{key}="{value}"')
47
- else:
48
- raise ValueError(
49
- "Only types `bool`, `float`, `int` and `str` are supported"
50
- )
26
+ from flwr.common.typing import Fab, UserConfig
27
+ from flwr.server.superlink.ffs import Ffs
28
+ from flwr.server.superlink.ffs.ffs_factory import FfsFactory
29
+ from flwr.server.superlink.linkstate import LinkState, LinkStateFactory
51
30
 
52
- user_config_str = ",".join(user_config_list_str)
53
- return user_config_str
31
+ from .executor import Executor
54
32
 
55
33
 
56
34
  class SimulationEngine(Executor):
@@ -69,6 +47,30 @@ class SimulationEngine(Executor):
69
47
  ) -> None:
70
48
  self.num_supernodes = num_supernodes
71
49
  self.verbose = verbose
50
+ self.linkstate_factory: Optional[LinkStateFactory] = None
51
+ self.ffs_factory: Optional[FfsFactory] = None
52
+
53
+ @override
54
+ def initialize(
55
+ self, linkstate_factory: LinkStateFactory, ffs_factory: FfsFactory
56
+ ) -> None:
57
+ """Initialize the executor with the necessary factories."""
58
+ self.linkstate_factory = linkstate_factory
59
+ self.ffs_factory = ffs_factory
60
+
61
+ @property
62
+ def linkstate(self) -> LinkState:
63
+ """Return the LinkState."""
64
+ if self.linkstate_factory is None:
65
+ raise RuntimeError("Executor is not initialized.")
66
+ return self.linkstate_factory.state()
67
+
68
+ @property
69
+ def ffs(self) -> Ffs:
70
+ """Return the Flower File Storage (FFS)."""
71
+ if self.ffs_factory is None:
72
+ raise RuntimeError("Executor is not initialized.")
73
+ return self.ffs_factory.ffs()
72
74
 
73
75
  @override
74
76
  def set_config(
@@ -116,92 +118,37 @@ class SimulationEngine(Executor):
116
118
  self,
117
119
  fab_file: bytes,
118
120
  override_config: UserConfig,
119
- federation_config: UserConfig,
120
- ) -> Optional[RunTracker]:
121
+ federation_options: ConfigsRecord,
122
+ ) -> Optional[int]:
121
123
  """Start run using the Flower Simulation Engine."""
122
- if self.num_supernodes is None:
123
- raise ValueError(
124
- "Error in `SuperExec` (`SimulationEngine` executor):\n\n"
125
- "`num-supernodes` must not be `None`, it must be a valid "
126
- "positive integer. In order to start this simulation executor "
127
- "with a specified number of `SuperNodes`, you can either provide "
128
- "a `--executor` that has been initialized with a number of nodes "
129
- "to the `flower-superexec` CLI, or `--executor-config num-supernodes=N`"
130
- "to the `flower-superexec` CLI."
131
- )
132
124
  try:
133
-
134
- # Install FAB to flwr dir
135
- fab_path = install_from_fab(fab_file, None, True)
136
-
137
- # Install FAB Python package
138
- subprocess.run(
139
- [sys.executable, "-m", "pip", "install", "--no-deps", str(fab_path)],
140
- stdout=None if self.verbose else subprocess.DEVNULL,
141
- stderr=None if self.verbose else subprocess.DEVNULL,
142
- check=True,
143
- )
144
-
145
- # Load and validate config
146
- config, errors, warnings = load_and_validate(fab_path / "pyproject.toml")
147
- if errors:
148
- raise ValueError(errors)
149
-
150
- if warnings:
151
- log(WARN, warnings)
152
-
153
- if config is None:
154
- raise ValueError(
155
- "Config extracted from FAB's pyproject.toml is not valid"
125
+ # Create run
126
+ fab = Fab(hashlib.sha256(fab_file).hexdigest(), fab_file)
127
+ fab_hash = self.ffs.put(fab.content, {})
128
+ if fab_hash != fab.hash_str:
129
+ raise RuntimeError(
130
+ f"FAB ({fab.hash_str}) hash from request doesn't match contents"
156
131
  )
157
132
 
158
- # Flatten federated config
159
- federation_config_flat = unflatten_dict(federation_config)
160
-
161
- num_supernodes = federation_config_flat.get(
162
- "num-supernodes", self.num_supernodes
133
+ run_id = self.linkstate.create_run(
134
+ None, None, fab_hash, override_config, federation_options
163
135
  )
164
- backend_cfg = federation_config_flat.get("backend", {})
165
- verbose: Optional[bool] = federation_config_flat.get("verbose")
166
-
167
- # In Simulation there is no SuperLink, still we create a run_id
168
- run_id = generate_rand_int_from_bytes(RUN_ID_NUM_BYTES)
169
- log(INFO, "Created run %s", str(run_id))
170
136
 
171
- # Prepare commnand
172
- command = [
173
- "flower-simulation",
174
- "--app",
175
- f"{str(fab_path)}",
176
- "--num-supernodes",
177
- f"{num_supernodes}",
178
- "--run-id",
179
- str(run_id),
180
- ]
181
-
182
- if backend_cfg:
183
- # Stringify as JSON
184
- command.extend(["--backend-config", json.dumps(backend_cfg)])
185
-
186
- if verbose:
187
- command.extend(["--verbose"])
188
-
189
- if override_config:
190
- override_config_str = _user_config_to_str(override_config)
191
- command.extend(["--run-config", f"{override_config_str}"])
192
-
193
- # Start Simulation
194
- proc = subprocess.Popen( # pylint: disable=consider-using-with
195
- command,
196
- text=True,
137
+ # Create an empty context for the Run
138
+ context = Context(
139
+ run_id=run_id,
140
+ node_id=0,
141
+ node_config={},
142
+ state=RecordSet(),
143
+ run_config={},
197
144
  )
198
145
 
199
- log(INFO, "Started run %s", str(run_id))
146
+ # Register the context at the LinkState
147
+ self.linkstate.set_serverapp_context(run_id=run_id, context=context)
200
148
 
201
- return RunTracker(
202
- run_id=run_id,
203
- proc=proc,
204
- )
149
+ log(INFO, "Created run %s", str(run_id))
150
+
151
+ return run_id
205
152
 
206
153
  # pylint: disable-next=broad-except
207
154
  except Exception as e: