flwr-nightly 1.13.0.dev20241019__py3-none-any.whl → 1.13.0.dev20241106__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of flwr-nightly might be problematic. Click here for more details.
- flwr/cli/build.py +2 -2
- flwr/cli/config_utils.py +97 -0
- flwr/cli/log.py +63 -97
- flwr/cli/new/templates/app/code/flwr_tune/dataset.py.tpl +1 -1
- flwr/cli/new/templates/app/pyproject.flowertune.toml.tpl +1 -0
- flwr/cli/new/templates/app/pyproject.tensorflow.toml.tpl +1 -1
- flwr/cli/run/run.py +18 -83
- flwr/client/app.py +13 -14
- flwr/client/clientapp/app.py +1 -2
- flwr/client/{node_state.py → run_info_store.py} +4 -3
- flwr/client/supernode/app.py +6 -8
- flwr/common/constant.py +39 -4
- flwr/common/context.py +9 -4
- flwr/common/date.py +3 -3
- flwr/common/logger.py +103 -0
- flwr/common/serde.py +24 -0
- flwr/common/telemetry.py +0 -6
- flwr/common/typing.py +9 -0
- flwr/proto/exec_pb2.py +6 -6
- flwr/proto/exec_pb2.pyi +8 -2
- flwr/proto/log_pb2.py +29 -0
- flwr/proto/log_pb2.pyi +39 -0
- flwr/proto/log_pb2_grpc.py +4 -0
- flwr/proto/log_pb2_grpc.pyi +4 -0
- flwr/proto/message_pb2.py +8 -8
- flwr/proto/message_pb2.pyi +4 -1
- flwr/proto/serverappio_pb2.py +52 -0
- flwr/proto/{driver_pb2.pyi → serverappio_pb2.pyi} +54 -0
- flwr/proto/serverappio_pb2_grpc.py +376 -0
- flwr/proto/serverappio_pb2_grpc.pyi +147 -0
- flwr/proto/simulationio_pb2.py +38 -0
- flwr/proto/simulationio_pb2.pyi +65 -0
- flwr/proto/simulationio_pb2_grpc.py +171 -0
- flwr/proto/simulationio_pb2_grpc.pyi +68 -0
- flwr/server/app.py +247 -105
- flwr/server/driver/driver.py +15 -1
- flwr/server/driver/grpc_driver.py +26 -33
- flwr/server/driver/inmemory_driver.py +6 -14
- flwr/server/run_serverapp.py +29 -23
- flwr/server/{superlink/state → serverapp}/__init__.py +3 -9
- flwr/server/serverapp/app.py +270 -0
- flwr/server/strategy/fedadam.py +11 -1
- flwr/server/superlink/driver/__init__.py +1 -1
- flwr/server/superlink/driver/{driver_grpc.py → serverappio_grpc.py} +19 -16
- flwr/server/superlink/driver/{driver_servicer.py → serverappio_servicer.py} +125 -39
- flwr/server/superlink/fleet/grpc_adapter/grpc_adapter_servicer.py +4 -2
- flwr/server/superlink/fleet/grpc_bidi/grpc_server.py +2 -2
- flwr/server/superlink/fleet/grpc_rere/fleet_servicer.py +4 -2
- flwr/server/superlink/fleet/grpc_rere/server_interceptor.py +2 -2
- flwr/server/superlink/fleet/message_handler/message_handler.py +7 -7
- flwr/server/superlink/fleet/rest_rere/rest_api.py +7 -7
- flwr/server/superlink/fleet/vce/vce_api.py +23 -23
- flwr/server/superlink/linkstate/__init__.py +28 -0
- flwr/server/superlink/{state/in_memory_state.py → linkstate/in_memory_linkstate.py} +180 -21
- flwr/server/superlink/{state/state.py → linkstate/linkstate.py} +144 -15
- flwr/server/superlink/{state/state_factory.py → linkstate/linkstate_factory.py} +9 -9
- flwr/server/superlink/{state/sqlite_state.py → linkstate/sqlite_linkstate.py} +300 -50
- flwr/server/superlink/{state → linkstate}/utils.py +84 -2
- flwr/server/superlink/simulation/__init__.py +15 -0
- flwr/server/superlink/simulation/simulationio_grpc.py +65 -0
- flwr/server/superlink/simulation/simulationio_servicer.py +132 -0
- flwr/simulation/__init__.py +2 -0
- flwr/simulation/app.py +1 -1
- flwr/simulation/ray_transport/ray_client_proxy.py +2 -2
- flwr/simulation/run_simulation.py +57 -131
- flwr/simulation/simulationio_connection.py +86 -0
- flwr/superexec/app.py +6 -134
- flwr/superexec/deployment.py +60 -65
- flwr/superexec/exec_grpc.py +15 -8
- flwr/superexec/exec_servicer.py +34 -63
- flwr/superexec/executor.py +22 -4
- flwr/superexec/simulation.py +13 -8
- {flwr_nightly-1.13.0.dev20241019.dist-info → flwr_nightly-1.13.0.dev20241106.dist-info}/METADATA +1 -1
- {flwr_nightly-1.13.0.dev20241019.dist-info → flwr_nightly-1.13.0.dev20241106.dist-info}/RECORD +77 -64
- {flwr_nightly-1.13.0.dev20241019.dist-info → flwr_nightly-1.13.0.dev20241106.dist-info}/entry_points.txt +1 -0
- flwr/client/node_state_tests.py +0 -66
- flwr/proto/driver_pb2.py +0 -42
- flwr/proto/driver_pb2_grpc.py +0 -239
- flwr/proto/driver_pb2_grpc.pyi +0 -94
- {flwr_nightly-1.13.0.dev20241019.dist-info → flwr_nightly-1.13.0.dev20241106.dist-info}/LICENSE +0 -0
- {flwr_nightly-1.13.0.dev20241019.dist-info → flwr_nightly-1.13.0.dev20241106.dist-info}/WHEEL +0 -0
flwr/superexec/app.py
CHANGED
|
@@ -16,20 +16,11 @@
|
|
|
16
16
|
|
|
17
17
|
import argparse
|
|
18
18
|
import sys
|
|
19
|
-
from logging import INFO
|
|
20
|
-
from pathlib import Path
|
|
21
|
-
from typing import Optional
|
|
19
|
+
from logging import INFO
|
|
22
20
|
|
|
23
|
-
import
|
|
24
|
-
|
|
25
|
-
from flwr.common import EventType, event, log
|
|
26
|
-
from flwr.common.address import parse_address
|
|
27
|
-
from flwr.common.config import parse_config_args
|
|
28
|
-
from flwr.common.constant import EXEC_API_DEFAULT_ADDRESS
|
|
29
|
-
from flwr.common.exit_handlers import register_exit_handlers
|
|
21
|
+
from flwr.common import log
|
|
30
22
|
from flwr.common.object_ref import load_app, validate
|
|
31
23
|
|
|
32
|
-
from .exec_grpc import run_superexec_api_grpc
|
|
33
24
|
from .executor import Executor
|
|
34
25
|
|
|
35
26
|
|
|
@@ -37,133 +28,14 @@ def run_superexec() -> None:
|
|
|
37
28
|
"""Run Flower SuperExec."""
|
|
38
29
|
log(INFO, "Starting Flower SuperExec")
|
|
39
30
|
|
|
40
|
-
event(EventType.RUN_SUPEREXEC_ENTER)
|
|
41
|
-
|
|
42
|
-
args = _parse_args_run_superexec().parse_args()
|
|
43
|
-
|
|
44
|
-
# Parse IP address
|
|
45
|
-
parsed_address = parse_address(args.address)
|
|
46
|
-
if not parsed_address:
|
|
47
|
-
sys.exit(f"SuperExec IP address ({args.address}) cannot be parsed.")
|
|
48
|
-
host, port, is_v6 = parsed_address
|
|
49
|
-
address = f"[{host}]:{port}" if is_v6 else f"{host}:{port}"
|
|
50
|
-
|
|
51
|
-
# Obtain certificates
|
|
52
|
-
certificates = _try_obtain_certificates(args)
|
|
53
|
-
|
|
54
|
-
# Start SuperExec API
|
|
55
|
-
superexec_server: grpc.Server = run_superexec_api_grpc(
|
|
56
|
-
address=address,
|
|
57
|
-
executor=_load_executor(args),
|
|
58
|
-
certificates=certificates,
|
|
59
|
-
config=parse_config_args(
|
|
60
|
-
[args.executor_config] if args.executor_config else args.executor_config
|
|
61
|
-
),
|
|
62
|
-
)
|
|
63
|
-
|
|
64
|
-
grpc_servers = [superexec_server]
|
|
65
|
-
|
|
66
|
-
# Graceful shutdown
|
|
67
|
-
register_exit_handlers(
|
|
68
|
-
event_type=EventType.RUN_SUPEREXEC_LEAVE,
|
|
69
|
-
grpc_servers=grpc_servers,
|
|
70
|
-
bckg_threads=None,
|
|
71
|
-
)
|
|
72
|
-
|
|
73
|
-
superexec_server.wait_for_termination()
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
def _parse_args_run_superexec() -> argparse.ArgumentParser:
|
|
77
|
-
"""Parse command line arguments for SuperExec."""
|
|
78
|
-
parser = argparse.ArgumentParser(
|
|
79
|
-
description="Start a Flower SuperExec",
|
|
80
|
-
)
|
|
81
|
-
parser.add_argument(
|
|
82
|
-
"--address",
|
|
83
|
-
help="SuperExec (gRPC) server address (IPv4, IPv6, or a domain name)",
|
|
84
|
-
default=EXEC_API_DEFAULT_ADDRESS,
|
|
85
|
-
)
|
|
86
|
-
parser.add_argument(
|
|
87
|
-
"--executor",
|
|
88
|
-
help="For example: `deployment:exec` or `project.package.module:wrapper.exec`.",
|
|
89
|
-
default="flwr.superexec.deployment:executor",
|
|
90
|
-
)
|
|
91
|
-
parser.add_argument(
|
|
92
|
-
"--executor-dir",
|
|
93
|
-
help="The directory for the executor.",
|
|
94
|
-
default=".",
|
|
95
|
-
)
|
|
96
|
-
parser.add_argument(
|
|
97
|
-
"--executor-config",
|
|
98
|
-
help="Key-value pairs for the executor config, separated by spaces. "
|
|
99
|
-
'For example:\n\n`--executor-config \'superlink="superlink:9091" '
|
|
100
|
-
'root-certificates="certificates/superlink-ca.crt"\'`',
|
|
101
|
-
)
|
|
102
|
-
parser.add_argument(
|
|
103
|
-
"--insecure",
|
|
104
|
-
action="store_true",
|
|
105
|
-
help="Run the SuperExec without HTTPS, regardless of whether certificate "
|
|
106
|
-
"paths are provided. By default, the server runs with HTTPS enabled. "
|
|
107
|
-
"Use this flag only if you understand the risks.",
|
|
108
|
-
)
|
|
109
|
-
parser.add_argument(
|
|
110
|
-
"--ssl-certfile",
|
|
111
|
-
help="SuperExec server SSL certificate file (as a path str) "
|
|
112
|
-
"to create a secure connection.",
|
|
113
|
-
type=str,
|
|
114
|
-
default=None,
|
|
115
|
-
)
|
|
116
|
-
parser.add_argument(
|
|
117
|
-
"--ssl-keyfile",
|
|
118
|
-
help="SuperExec server SSL private key file (as a path str) "
|
|
119
|
-
"to create a secure connection.",
|
|
120
|
-
type=str,
|
|
121
|
-
)
|
|
122
|
-
parser.add_argument(
|
|
123
|
-
"--ssl-ca-certfile",
|
|
124
|
-
help="SuperExec server SSL CA certificate file (as a path str) "
|
|
125
|
-
"to create a secure connection.",
|
|
126
|
-
type=str,
|
|
127
|
-
)
|
|
128
|
-
return parser
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
def _try_obtain_certificates(
|
|
132
|
-
args: argparse.Namespace,
|
|
133
|
-
) -> Optional[tuple[bytes, bytes, bytes]]:
|
|
134
|
-
# Obtain certificates
|
|
135
|
-
if args.insecure:
|
|
136
|
-
log(WARN, "Option `--insecure` was set. Starting insecure HTTP server.")
|
|
137
|
-
return None
|
|
138
|
-
# Check if certificates are provided
|
|
139
|
-
if args.ssl_certfile and args.ssl_keyfile and args.ssl_ca_certfile:
|
|
140
|
-
if not Path(args.ssl_ca_certfile).is_file():
|
|
141
|
-
sys.exit("Path argument `--ssl-ca-certfile` does not point to a file.")
|
|
142
|
-
if not Path(args.ssl_certfile).is_file():
|
|
143
|
-
sys.exit("Path argument `--ssl-certfile` does not point to a file.")
|
|
144
|
-
if not Path(args.ssl_keyfile).is_file():
|
|
145
|
-
sys.exit("Path argument `--ssl-keyfile` does not point to a file.")
|
|
146
|
-
certificates = (
|
|
147
|
-
Path(args.ssl_ca_certfile).read_bytes(), # CA certificate
|
|
148
|
-
Path(args.ssl_certfile).read_bytes(), # server certificate
|
|
149
|
-
Path(args.ssl_keyfile).read_bytes(), # server private key
|
|
150
|
-
)
|
|
151
|
-
return certificates
|
|
152
|
-
if args.ssl_certfile or args.ssl_keyfile or args.ssl_ca_certfile:
|
|
153
|
-
sys.exit(
|
|
154
|
-
"You need to provide valid file paths to `--ssl-certfile`, "
|
|
155
|
-
"`--ssl-keyfile`, and `—-ssl-ca-certfile` to create a secure "
|
|
156
|
-
"connection in SuperExec server (gRPC-rere)."
|
|
157
|
-
)
|
|
158
31
|
sys.exit(
|
|
159
|
-
"
|
|
160
|
-
"
|
|
161
|
-
"
|
|
162
|
-
"in insecure mode using '--insecure' if you understand the risks."
|
|
32
|
+
"Manually launching the SuperExec is deprecated. Since `flwr 1.13.0` "
|
|
33
|
+
"the executor service runs in the SuperLink. Launching it manually is not "
|
|
34
|
+
"recommended."
|
|
163
35
|
)
|
|
164
36
|
|
|
165
37
|
|
|
166
|
-
def
|
|
38
|
+
def load_executor(
|
|
167
39
|
args: argparse.Namespace,
|
|
168
40
|
) -> Executor:
|
|
169
41
|
"""Get the executor plugin."""
|
flwr/superexec/deployment.py
CHANGED
|
@@ -15,23 +15,21 @@
|
|
|
15
15
|
"""Deployment engine executor."""
|
|
16
16
|
|
|
17
17
|
import hashlib
|
|
18
|
-
import subprocess
|
|
19
18
|
from logging import ERROR, INFO
|
|
20
19
|
from pathlib import Path
|
|
21
20
|
from typing import Optional
|
|
22
21
|
|
|
23
22
|
from typing_extensions import override
|
|
24
23
|
|
|
25
|
-
from flwr.
|
|
26
|
-
from flwr.common.constant import
|
|
27
|
-
from flwr.common.grpc import create_channel
|
|
24
|
+
from flwr.common import ConfigsRecord, Context, RecordSet
|
|
25
|
+
from flwr.common.constant import SERVERAPPIO_API_DEFAULT_ADDRESS, Status, SubStatus
|
|
28
26
|
from flwr.common.logger import log
|
|
29
|
-
from flwr.common.
|
|
30
|
-
from flwr.
|
|
31
|
-
from flwr.
|
|
32
|
-
from flwr.
|
|
27
|
+
from flwr.common.typing import Fab, RunStatus, UserConfig
|
|
28
|
+
from flwr.server.superlink.ffs import Ffs
|
|
29
|
+
from flwr.server.superlink.ffs.ffs_factory import FfsFactory
|
|
30
|
+
from flwr.server.superlink.linkstate import LinkState, LinkStateFactory
|
|
33
31
|
|
|
34
|
-
from .executor import Executor
|
|
32
|
+
from .executor import Executor
|
|
35
33
|
|
|
36
34
|
|
|
37
35
|
class DeploymentEngine(Executor):
|
|
@@ -50,7 +48,7 @@ class DeploymentEngine(Executor):
|
|
|
50
48
|
|
|
51
49
|
def __init__(
|
|
52
50
|
self,
|
|
53
|
-
superlink: str =
|
|
51
|
+
superlink: str = SERVERAPPIO_API_DEFAULT_ADDRESS,
|
|
54
52
|
root_certificates: Optional[str] = None,
|
|
55
53
|
flwr_dir: Optional[str] = None,
|
|
56
54
|
) -> None:
|
|
@@ -62,7 +60,30 @@ class DeploymentEngine(Executor):
|
|
|
62
60
|
self.root_certificates = root_certificates
|
|
63
61
|
self.root_certificates_bytes = Path(root_certificates).read_bytes()
|
|
64
62
|
self.flwr_dir = flwr_dir
|
|
65
|
-
self.
|
|
63
|
+
self.linkstate_factory: Optional[LinkStateFactory] = None
|
|
64
|
+
self.ffs_factory: Optional[FfsFactory] = None
|
|
65
|
+
|
|
66
|
+
@override
|
|
67
|
+
def initialize(
|
|
68
|
+
self, linkstate_factory: LinkStateFactory, ffs_factory: FfsFactory
|
|
69
|
+
) -> None:
|
|
70
|
+
"""Initialize the executor with the necessary factories."""
|
|
71
|
+
self.linkstate_factory = linkstate_factory
|
|
72
|
+
self.ffs_factory = ffs_factory
|
|
73
|
+
|
|
74
|
+
@property
|
|
75
|
+
def linkstate(self) -> LinkState:
|
|
76
|
+
"""Return the LinkState."""
|
|
77
|
+
if self.linkstate_factory is None:
|
|
78
|
+
raise RuntimeError("Executor is not initialized.")
|
|
79
|
+
return self.linkstate_factory.state()
|
|
80
|
+
|
|
81
|
+
@property
|
|
82
|
+
def ffs(self) -> Ffs:
|
|
83
|
+
"""Return the Flower File Storage (FFS)."""
|
|
84
|
+
if self.ffs_factory is None:
|
|
85
|
+
raise RuntimeError("Executor is not initialized.")
|
|
86
|
+
return self.ffs_factory.ffs()
|
|
66
87
|
|
|
67
88
|
@override
|
|
68
89
|
def set_config(
|
|
@@ -77,7 +98,7 @@ class DeploymentEngine(Executor):
|
|
|
77
98
|
A dictionary for configuration values.
|
|
78
99
|
Supported configuration key/value pairs:
|
|
79
100
|
- "superlink": str
|
|
80
|
-
The address of the SuperLink
|
|
101
|
+
The address of the SuperLink ServerAppIo API.
|
|
81
102
|
- "root-certificates": str
|
|
82
103
|
The path to the root certificates.
|
|
83
104
|
- "flwr-dir": str
|
|
@@ -101,32 +122,31 @@ class DeploymentEngine(Executor):
|
|
|
101
122
|
raise ValueError("The `flwr-dir` value should be of type `str`.")
|
|
102
123
|
self.flwr_dir = str(flwr_dir)
|
|
103
124
|
|
|
104
|
-
def _connect(self) -> None:
|
|
105
|
-
if self.stub is not None:
|
|
106
|
-
return
|
|
107
|
-
channel = create_channel(
|
|
108
|
-
server_address=self.superlink,
|
|
109
|
-
insecure=(self.root_certificates_bytes is None),
|
|
110
|
-
root_certificates=self.root_certificates_bytes,
|
|
111
|
-
)
|
|
112
|
-
self.stub = DriverStub(channel)
|
|
113
|
-
|
|
114
125
|
def _create_run(
|
|
115
126
|
self,
|
|
116
127
|
fab: Fab,
|
|
117
128
|
override_config: UserConfig,
|
|
118
129
|
) -> int:
|
|
119
|
-
|
|
120
|
-
|
|
130
|
+
fab_hash = self.ffs.put(fab.content, {})
|
|
131
|
+
if fab_hash != fab.hash_str:
|
|
132
|
+
raise RuntimeError(
|
|
133
|
+
f"FAB ({fab.hash_str}) hash from request doesn't match contents"
|
|
134
|
+
)
|
|
121
135
|
|
|
122
|
-
|
|
136
|
+
run_id = self.linkstate.create_run(
|
|
137
|
+
None, None, fab_hash, override_config, ConfigsRecord()
|
|
138
|
+
)
|
|
139
|
+
return run_id
|
|
123
140
|
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
141
|
+
def _create_context(self, run_id: int) -> None:
|
|
142
|
+
"""Register a Context for a Run."""
|
|
143
|
+
# Create an empty context for the Run
|
|
144
|
+
context = Context(
|
|
145
|
+
run_id=run_id, node_id=0, node_config={}, state=RecordSet(), run_config={}
|
|
127
146
|
)
|
|
128
|
-
|
|
129
|
-
|
|
147
|
+
|
|
148
|
+
# Register the context at the LinkState
|
|
149
|
+
self.linkstate.set_serverapp_context(run_id=run_id, context=context)
|
|
130
150
|
|
|
131
151
|
@override
|
|
132
152
|
def start_run(
|
|
@@ -134,52 +154,27 @@ class DeploymentEngine(Executor):
|
|
|
134
154
|
fab_file: bytes,
|
|
135
155
|
override_config: UserConfig,
|
|
136
156
|
federation_config: UserConfig,
|
|
137
|
-
) -> Optional[
|
|
157
|
+
) -> Optional[int]:
|
|
138
158
|
"""Start run using the Flower Deployment Engine."""
|
|
159
|
+
run_id = None
|
|
139
160
|
try:
|
|
140
|
-
# Install FAB to flwr dir
|
|
141
|
-
install_from_fab(fab_file, None, True)
|
|
142
161
|
|
|
143
162
|
# Call SuperLink to create run
|
|
144
|
-
run_id
|
|
163
|
+
run_id = self._create_run(
|
|
145
164
|
Fab(hashlib.sha256(fab_file).hexdigest(), fab_file), override_config
|
|
146
165
|
)
|
|
147
|
-
log(INFO, "Created run %s", str(run_id))
|
|
148
166
|
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
str(run_id),
|
|
153
|
-
"--superlink",
|
|
154
|
-
str(self.superlink),
|
|
155
|
-
]
|
|
156
|
-
|
|
157
|
-
if self.flwr_dir:
|
|
158
|
-
command.append("--flwr-dir")
|
|
159
|
-
command.append(self.flwr_dir)
|
|
160
|
-
|
|
161
|
-
if self.root_certificates is None:
|
|
162
|
-
command.append("--insecure")
|
|
163
|
-
else:
|
|
164
|
-
command.append("--root-certificates")
|
|
165
|
-
command.append(self.root_certificates)
|
|
166
|
-
|
|
167
|
-
# Execute the command
|
|
168
|
-
proc = subprocess.Popen( # pylint: disable=consider-using-with
|
|
169
|
-
command,
|
|
170
|
-
stdout=subprocess.PIPE,
|
|
171
|
-
stderr=subprocess.PIPE,
|
|
172
|
-
text=True,
|
|
173
|
-
)
|
|
174
|
-
log(INFO, "Started run %s", str(run_id))
|
|
167
|
+
# Register context for the Run
|
|
168
|
+
self._create_context(run_id=run_id)
|
|
169
|
+
log(INFO, "Created run %s", str(run_id))
|
|
175
170
|
|
|
176
|
-
return
|
|
177
|
-
run_id=run_id,
|
|
178
|
-
proc=proc,
|
|
179
|
-
)
|
|
171
|
+
return run_id
|
|
180
172
|
# pylint: disable-next=broad-except
|
|
181
173
|
except Exception as e:
|
|
182
174
|
log(ERROR, "Could not start run: %s", str(e))
|
|
175
|
+
if run_id:
|
|
176
|
+
run_status = RunStatus(Status.FINISHED, SubStatus.FAILED, str(e))
|
|
177
|
+
self.linkstate.update_run_status(run_id, new_status=run_status)
|
|
183
178
|
return None
|
|
184
179
|
|
|
185
180
|
|
flwr/superexec/exec_grpc.py
CHANGED
|
@@ -23,33 +23,40 @@ from flwr.common import GRPC_MAX_MESSAGE_LENGTH
|
|
|
23
23
|
from flwr.common.logger import log
|
|
24
24
|
from flwr.common.typing import UserConfig
|
|
25
25
|
from flwr.proto.exec_pb2_grpc import add_ExecServicer_to_server
|
|
26
|
+
from flwr.server.superlink.ffs.ffs_factory import FfsFactory
|
|
26
27
|
from flwr.server.superlink.fleet.grpc_bidi.grpc_server import generic_create_grpc_server
|
|
28
|
+
from flwr.server.superlink.linkstate import LinkStateFactory
|
|
27
29
|
|
|
28
30
|
from .exec_servicer import ExecServicer
|
|
29
31
|
from .executor import Executor
|
|
30
32
|
|
|
31
33
|
|
|
32
|
-
|
|
34
|
+
# pylint: disable-next=too-many-arguments, too-many-positional-arguments
|
|
35
|
+
def run_exec_api_grpc(
|
|
33
36
|
address: str,
|
|
34
37
|
executor: Executor,
|
|
38
|
+
state_factory: LinkStateFactory,
|
|
39
|
+
ffs_factory: FfsFactory,
|
|
35
40
|
certificates: Optional[tuple[bytes, bytes, bytes]],
|
|
36
41
|
config: UserConfig,
|
|
37
42
|
) -> grpc.Server:
|
|
38
|
-
"""Run
|
|
43
|
+
"""Run Exec API (gRPC, request-response)."""
|
|
39
44
|
executor.set_config(config)
|
|
40
45
|
|
|
41
46
|
exec_servicer: grpc.Server = ExecServicer(
|
|
47
|
+
linkstate_factory=state_factory,
|
|
48
|
+
ffs_factory=ffs_factory,
|
|
42
49
|
executor=executor,
|
|
43
50
|
)
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
servicer_and_add_fn=(exec_servicer,
|
|
51
|
+
exec_add_servicer_to_server_fn = add_ExecServicer_to_server
|
|
52
|
+
exec_grpc_server = generic_create_grpc_server(
|
|
53
|
+
servicer_and_add_fn=(exec_servicer, exec_add_servicer_to_server_fn),
|
|
47
54
|
server_address=address,
|
|
48
55
|
max_message_length=GRPC_MAX_MESSAGE_LENGTH,
|
|
49
56
|
certificates=certificates,
|
|
50
57
|
)
|
|
51
58
|
|
|
52
|
-
log(INFO, "
|
|
53
|
-
|
|
59
|
+
log(INFO, "Flower Deployment Engine: Starting Exec API on %s", address)
|
|
60
|
+
exec_grpc_server.start()
|
|
54
61
|
|
|
55
|
-
return
|
|
62
|
+
return exec_grpc_server
|
flwr/superexec/exec_servicer.py
CHANGED
|
@@ -15,9 +15,6 @@
|
|
|
15
15
|
"""SuperExec API servicer."""
|
|
16
16
|
|
|
17
17
|
|
|
18
|
-
import select
|
|
19
|
-
import sys
|
|
20
|
-
import threading
|
|
21
18
|
import time
|
|
22
19
|
from collections.abc import Generator
|
|
23
20
|
from logging import ERROR, INFO
|
|
@@ -25,6 +22,7 @@ from typing import Any
|
|
|
25
22
|
|
|
26
23
|
import grpc
|
|
27
24
|
|
|
25
|
+
from flwr.common.constant import LOG_STREAM_INTERVAL, Status
|
|
28
26
|
from flwr.common.logger import log
|
|
29
27
|
from flwr.common.serde import user_config_from_proto
|
|
30
28
|
from flwr.proto import exec_pb2_grpc # pylint: disable=E0611
|
|
@@ -34,18 +32,25 @@ from flwr.proto.exec_pb2 import ( # pylint: disable=E0611
|
|
|
34
32
|
StreamLogsRequest,
|
|
35
33
|
StreamLogsResponse,
|
|
36
34
|
)
|
|
35
|
+
from flwr.server.superlink.ffs.ffs_factory import FfsFactory
|
|
36
|
+
from flwr.server.superlink.linkstate import LinkStateFactory
|
|
37
37
|
|
|
38
|
-
from .executor import Executor
|
|
39
|
-
|
|
40
|
-
SELECT_TIMEOUT = 1 # Timeout for selecting ready-to-read file descriptors (in seconds)
|
|
38
|
+
from .executor import Executor
|
|
41
39
|
|
|
42
40
|
|
|
43
41
|
class ExecServicer(exec_pb2_grpc.ExecServicer):
|
|
44
42
|
"""SuperExec API servicer."""
|
|
45
43
|
|
|
46
|
-
def __init__(
|
|
44
|
+
def __init__(
|
|
45
|
+
self,
|
|
46
|
+
linkstate_factory: LinkStateFactory,
|
|
47
|
+
ffs_factory: FfsFactory,
|
|
48
|
+
executor: Executor,
|
|
49
|
+
) -> None:
|
|
50
|
+
self.linkstate_factory = linkstate_factory
|
|
51
|
+
self.ffs_factory = ffs_factory
|
|
47
52
|
self.executor = executor
|
|
48
|
-
self.
|
|
53
|
+
self.executor.initialize(linkstate_factory, ffs_factory)
|
|
49
54
|
|
|
50
55
|
def StartRun(
|
|
51
56
|
self, request: StartRunRequest, context: grpc.ServicerContext
|
|
@@ -53,84 +58,50 @@ class ExecServicer(exec_pb2_grpc.ExecServicer):
|
|
|
53
58
|
"""Create run ID."""
|
|
54
59
|
log(INFO, "ExecServicer.StartRun")
|
|
55
60
|
|
|
56
|
-
|
|
61
|
+
run_id = self.executor.start_run(
|
|
57
62
|
request.fab.content,
|
|
58
63
|
user_config_from_proto(request.override_config),
|
|
59
64
|
user_config_from_proto(request.federation_config),
|
|
60
65
|
)
|
|
61
66
|
|
|
62
|
-
if
|
|
67
|
+
if run_id is None:
|
|
63
68
|
log(ERROR, "Executor failed to start run")
|
|
64
69
|
return StartRunResponse()
|
|
65
70
|
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
# Start a background thread to capture the log output
|
|
69
|
-
capture_thread = threading.Thread(
|
|
70
|
-
target=_capture_logs, args=(run,), daemon=True
|
|
71
|
-
)
|
|
72
|
-
capture_thread.start()
|
|
73
|
-
|
|
74
|
-
return StartRunResponse(run_id=run.run_id)
|
|
71
|
+
return StartRunResponse(run_id=run_id)
|
|
75
72
|
|
|
76
73
|
def StreamLogs( # pylint: disable=C0103
|
|
77
74
|
self, request: StreamLogsRequest, context: grpc.ServicerContext
|
|
78
75
|
) -> Generator[StreamLogsResponse, Any, None]:
|
|
79
76
|
"""Get logs."""
|
|
80
77
|
log(INFO, "ExecServicer.StreamLogs")
|
|
78
|
+
state = self.linkstate_factory.state()
|
|
79
|
+
|
|
80
|
+
# Retrieve run ID
|
|
81
|
+
run_id = request.run_id
|
|
81
82
|
|
|
82
83
|
# Exit if `run_id` not found
|
|
83
|
-
if
|
|
84
|
+
if not state.get_run(run_id):
|
|
84
85
|
context.abort(grpc.StatusCode.NOT_FOUND, "Run ID not found")
|
|
85
86
|
|
|
86
|
-
|
|
87
|
+
after_timestamp = request.after_timestamp + 1e-6
|
|
87
88
|
while context.is_active():
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
89
|
+
log_msg, latest_timestamp = state.get_serverapp_log(run_id, after_timestamp)
|
|
90
|
+
if log_msg:
|
|
91
|
+
yield StreamLogsResponse(
|
|
92
|
+
log_output=log_msg,
|
|
93
|
+
latest_timestamp=latest_timestamp,
|
|
94
|
+
)
|
|
95
|
+
# Add a small epsilon to the latest timestamp to avoid getting
|
|
96
|
+
# the same log
|
|
97
|
+
after_timestamp = max(latest_timestamp + 1e-6, after_timestamp)
|
|
93
98
|
|
|
94
99
|
# Wait for and continue to yield more log responses only if the
|
|
95
100
|
# run isn't completed yet. If the run is finished, the entire log
|
|
96
101
|
# is returned at this point and the server ends the stream.
|
|
97
|
-
|
|
102
|
+
run_status = state.get_run_status({run_id})[run_id]
|
|
103
|
+
if run_status.status == Status.FINISHED:
|
|
98
104
|
log(INFO, "All logs for run ID `%s` returned", request.run_id)
|
|
99
|
-
context.set_code(grpc.StatusCode.OK)
|
|
100
105
|
context.cancel()
|
|
101
106
|
|
|
102
|
-
time.sleep(
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
def _capture_logs(
|
|
106
|
-
run: RunTracker,
|
|
107
|
-
) -> None:
|
|
108
|
-
while True:
|
|
109
|
-
# Explicitly check if Popen.poll() is None. Required for `pytest`.
|
|
110
|
-
if run.proc.poll() is None:
|
|
111
|
-
# Select streams only when ready to read
|
|
112
|
-
ready_to_read, _, _ = select.select(
|
|
113
|
-
[run.proc.stdout, run.proc.stderr],
|
|
114
|
-
[],
|
|
115
|
-
[],
|
|
116
|
-
SELECT_TIMEOUT,
|
|
117
|
-
)
|
|
118
|
-
# Read from std* and append to RunTracker.logs
|
|
119
|
-
for stream in ready_to_read:
|
|
120
|
-
# Flush stdout to view output in real time
|
|
121
|
-
readline = stream.readline()
|
|
122
|
-
sys.stdout.write(readline)
|
|
123
|
-
sys.stdout.flush()
|
|
124
|
-
# Append to logs
|
|
125
|
-
line = readline.rstrip()
|
|
126
|
-
if line:
|
|
127
|
-
run.logs.append(f"{line}")
|
|
128
|
-
|
|
129
|
-
# Close std* to prevent blocking
|
|
130
|
-
elif run.proc.poll() is not None:
|
|
131
|
-
log(INFO, "Subprocess finished, exiting log capture")
|
|
132
|
-
if run.proc.stdout:
|
|
133
|
-
run.proc.stdout.close()
|
|
134
|
-
if run.proc.stderr:
|
|
135
|
-
run.proc.stderr.close()
|
|
136
|
-
break
|
|
107
|
+
time.sleep(LOG_STREAM_INTERVAL) # Sleep briefly to avoid busy waiting
|
flwr/superexec/executor.py
CHANGED
|
@@ -20,6 +20,8 @@ from subprocess import Popen
|
|
|
20
20
|
from typing import Optional
|
|
21
21
|
|
|
22
22
|
from flwr.common.typing import UserConfig
|
|
23
|
+
from flwr.server.superlink.ffs.ffs_factory import FfsFactory
|
|
24
|
+
from flwr.server.superlink.linkstate import LinkStateFactory
|
|
23
25
|
|
|
24
26
|
|
|
25
27
|
@dataclass
|
|
@@ -34,6 +36,23 @@ class RunTracker:
|
|
|
34
36
|
class Executor(ABC):
|
|
35
37
|
"""Execute and monitor a Flower run."""
|
|
36
38
|
|
|
39
|
+
@abstractmethod
|
|
40
|
+
def initialize(
|
|
41
|
+
self, linkstate_factory: LinkStateFactory, ffs_factory: FfsFactory
|
|
42
|
+
) -> None:
|
|
43
|
+
"""Initialize the executor with the necessary factories.
|
|
44
|
+
|
|
45
|
+
This method sets up the executor by providing it with the factories required
|
|
46
|
+
to access the LinkState and the Flower File Storage (FFS) in the SuperLink.
|
|
47
|
+
|
|
48
|
+
Parameters
|
|
49
|
+
----------
|
|
50
|
+
linkstate_factory : LinkStateFactory
|
|
51
|
+
The factory to create access to the LinkState.
|
|
52
|
+
ffs_factory : FfsFactory
|
|
53
|
+
The factory to create access to the Flower File Storage (FFS).
|
|
54
|
+
"""
|
|
55
|
+
|
|
37
56
|
@abstractmethod
|
|
38
57
|
def set_config(
|
|
39
58
|
self,
|
|
@@ -53,7 +72,7 @@ class Executor(ABC):
|
|
|
53
72
|
fab_file: bytes,
|
|
54
73
|
override_config: UserConfig,
|
|
55
74
|
federation_config: UserConfig,
|
|
56
|
-
) -> Optional[
|
|
75
|
+
) -> Optional[int]:
|
|
57
76
|
"""Start a run using the given Flower FAB ID and version.
|
|
58
77
|
|
|
59
78
|
This method creates a new run on the SuperLink, returns its run_id
|
|
@@ -70,7 +89,6 @@ class Executor(ABC):
|
|
|
70
89
|
|
|
71
90
|
Returns
|
|
72
91
|
-------
|
|
73
|
-
run_id : Optional[
|
|
74
|
-
The run_id
|
|
75
|
-
or `None` if it fails.
|
|
92
|
+
run_id : Optional[int]
|
|
93
|
+
The run_id of the run created by the SuperLink, or `None` if it fails.
|
|
76
94
|
"""
|
flwr/superexec/simulation.py
CHANGED
|
@@ -29,9 +29,11 @@ from flwr.common.config import unflatten_dict
|
|
|
29
29
|
from flwr.common.constant import RUN_ID_NUM_BYTES
|
|
30
30
|
from flwr.common.logger import log
|
|
31
31
|
from flwr.common.typing import UserConfig
|
|
32
|
-
from flwr.server.superlink.
|
|
32
|
+
from flwr.server.superlink.ffs.ffs_factory import FfsFactory
|
|
33
|
+
from flwr.server.superlink.linkstate import LinkStateFactory
|
|
34
|
+
from flwr.server.superlink.linkstate.utils import generate_rand_int_from_bytes
|
|
33
35
|
|
|
34
|
-
from .executor import Executor
|
|
36
|
+
from .executor import Executor
|
|
35
37
|
|
|
36
38
|
|
|
37
39
|
def _user_config_to_str(user_config: UserConfig) -> str:
|
|
@@ -70,6 +72,12 @@ class SimulationEngine(Executor):
|
|
|
70
72
|
self.num_supernodes = num_supernodes
|
|
71
73
|
self.verbose = verbose
|
|
72
74
|
|
|
75
|
+
@override
|
|
76
|
+
def initialize(
|
|
77
|
+
self, linkstate_factory: LinkStateFactory, ffs_factory: FfsFactory
|
|
78
|
+
) -> None:
|
|
79
|
+
"""Initialize the executor with the necessary factories."""
|
|
80
|
+
|
|
73
81
|
@override
|
|
74
82
|
def set_config(
|
|
75
83
|
self,
|
|
@@ -117,7 +125,7 @@ class SimulationEngine(Executor):
|
|
|
117
125
|
fab_file: bytes,
|
|
118
126
|
override_config: UserConfig,
|
|
119
127
|
federation_config: UserConfig,
|
|
120
|
-
) -> Optional[
|
|
128
|
+
) -> Optional[int]:
|
|
121
129
|
"""Start run using the Flower Simulation Engine."""
|
|
122
130
|
if self.num_supernodes is None:
|
|
123
131
|
raise ValueError(
|
|
@@ -191,17 +199,14 @@ class SimulationEngine(Executor):
|
|
|
191
199
|
command.extend(["--run-config", f"{override_config_str}"])
|
|
192
200
|
|
|
193
201
|
# Start Simulation
|
|
194
|
-
|
|
202
|
+
_ = subprocess.Popen( # pylint: disable=consider-using-with
|
|
195
203
|
command,
|
|
196
204
|
text=True,
|
|
197
205
|
)
|
|
198
206
|
|
|
199
207
|
log(INFO, "Started run %s", str(run_id))
|
|
200
208
|
|
|
201
|
-
return
|
|
202
|
-
run_id=run_id,
|
|
203
|
-
proc=proc,
|
|
204
|
-
)
|
|
209
|
+
return run_id
|
|
205
210
|
|
|
206
211
|
# pylint: disable-next=broad-except
|
|
207
212
|
except Exception as e:
|