flwr-nightly 1.13.0.dev20241021__py3-none-any.whl → 1.13.0.dev20241111__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of flwr-nightly might be problematic. Click here for more details.
- flwr/cli/build.py +2 -2
- flwr/cli/config_utils.py +97 -0
- flwr/cli/log.py +63 -97
- flwr/cli/new/templates/app/code/flwr_tune/dataset.py.tpl +1 -1
- flwr/cli/new/templates/app/pyproject.flowertune.toml.tpl +1 -0
- flwr/cli/new/templates/app/pyproject.tensorflow.toml.tpl +1 -1
- flwr/cli/run/run.py +34 -88
- flwr/client/app.py +23 -20
- flwr/client/clientapp/app.py +22 -18
- flwr/client/nodestate/__init__.py +25 -0
- flwr/client/nodestate/in_memory_nodestate.py +38 -0
- flwr/client/nodestate/nodestate.py +30 -0
- flwr/client/nodestate/nodestate_factory.py +37 -0
- flwr/client/{node_state.py → run_info_store.py} +4 -3
- flwr/client/supernode/app.py +6 -8
- flwr/common/args.py +83 -0
- flwr/common/config.py +10 -0
- flwr/common/constant.py +39 -5
- flwr/common/context.py +9 -4
- flwr/common/date.py +3 -3
- flwr/common/logger.py +108 -1
- flwr/common/object_ref.py +47 -16
- flwr/common/serde.py +24 -0
- flwr/common/telemetry.py +0 -6
- flwr/common/typing.py +10 -1
- flwr/proto/exec_pb2.py +14 -17
- flwr/proto/exec_pb2.pyi +14 -22
- flwr/proto/log_pb2.py +29 -0
- flwr/proto/log_pb2.pyi +39 -0
- flwr/proto/log_pb2_grpc.py +4 -0
- flwr/proto/log_pb2_grpc.pyi +4 -0
- flwr/proto/message_pb2.py +8 -8
- flwr/proto/message_pb2.pyi +4 -1
- flwr/proto/run_pb2.py +32 -27
- flwr/proto/run_pb2.pyi +26 -0
- flwr/proto/serverappio_pb2.py +52 -0
- flwr/proto/{driver_pb2.pyi → serverappio_pb2.pyi} +54 -0
- flwr/proto/serverappio_pb2_grpc.py +376 -0
- flwr/proto/serverappio_pb2_grpc.pyi +147 -0
- flwr/proto/simulationio_pb2.py +38 -0
- flwr/proto/simulationio_pb2.pyi +65 -0
- flwr/proto/simulationio_pb2_grpc.py +205 -0
- flwr/proto/simulationio_pb2_grpc.pyi +81 -0
- flwr/server/app.py +272 -105
- flwr/server/driver/driver.py +15 -1
- flwr/server/driver/grpc_driver.py +25 -36
- flwr/server/driver/inmemory_driver.py +6 -16
- flwr/server/run_serverapp.py +29 -23
- flwr/server/{superlink/state → serverapp}/__init__.py +3 -9
- flwr/server/serverapp/app.py +214 -0
- flwr/server/strategy/aggregate.py +4 -4
- flwr/server/strategy/fedadam.py +11 -1
- flwr/server/superlink/driver/__init__.py +1 -1
- flwr/server/superlink/driver/{driver_grpc.py → serverappio_grpc.py} +19 -16
- flwr/server/superlink/driver/{driver_servicer.py → serverappio_servicer.py} +125 -39
- flwr/server/superlink/fleet/grpc_adapter/grpc_adapter_servicer.py +4 -2
- flwr/server/superlink/fleet/grpc_bidi/grpc_server.py +2 -2
- flwr/server/superlink/fleet/grpc_rere/fleet_servicer.py +4 -2
- flwr/server/superlink/fleet/grpc_rere/server_interceptor.py +2 -2
- flwr/server/superlink/fleet/message_handler/message_handler.py +7 -7
- flwr/server/superlink/fleet/rest_rere/rest_api.py +7 -7
- flwr/server/superlink/fleet/vce/vce_api.py +23 -23
- flwr/server/superlink/linkstate/__init__.py +28 -0
- flwr/server/superlink/{state/in_memory_state.py → linkstate/in_memory_linkstate.py} +184 -36
- flwr/server/superlink/{state/state.py → linkstate/linkstate.py} +149 -19
- flwr/server/superlink/{state/state_factory.py → linkstate/linkstate_factory.py} +9 -9
- flwr/server/superlink/{state/sqlite_state.py → linkstate/sqlite_linkstate.py} +306 -65
- flwr/server/superlink/{state → linkstate}/utils.py +81 -30
- flwr/server/superlink/simulation/__init__.py +15 -0
- flwr/server/superlink/simulation/simulationio_grpc.py +65 -0
- flwr/server/superlink/simulation/simulationio_servicer.py +153 -0
- flwr/simulation/__init__.py +5 -1
- flwr/simulation/app.py +273 -345
- flwr/simulation/legacy_app.py +382 -0
- flwr/simulation/ray_transport/ray_client_proxy.py +2 -2
- flwr/simulation/run_simulation.py +57 -131
- flwr/simulation/simulationio_connection.py +86 -0
- flwr/superexec/app.py +6 -134
- flwr/superexec/deployment.py +61 -66
- flwr/superexec/exec_grpc.py +15 -8
- flwr/superexec/exec_servicer.py +36 -65
- flwr/superexec/executor.py +26 -7
- flwr/superexec/simulation.py +54 -107
- {flwr_nightly-1.13.0.dev20241021.dist-info → flwr_nightly-1.13.0.dev20241111.dist-info}/METADATA +5 -4
- {flwr_nightly-1.13.0.dev20241021.dist-info → flwr_nightly-1.13.0.dev20241111.dist-info}/RECORD +88 -69
- {flwr_nightly-1.13.0.dev20241021.dist-info → flwr_nightly-1.13.0.dev20241111.dist-info}/entry_points.txt +2 -0
- flwr/client/node_state_tests.py +0 -66
- flwr/proto/driver_pb2.py +0 -42
- flwr/proto/driver_pb2_grpc.py +0 -239
- flwr/proto/driver_pb2_grpc.pyi +0 -94
- {flwr_nightly-1.13.0.dev20241021.dist-info → flwr_nightly-1.13.0.dev20241111.dist-info}/LICENSE +0 -0
- {flwr_nightly-1.13.0.dev20241021.dist-info → flwr_nightly-1.13.0.dev20241111.dist-info}/WHEEL +0 -0
flwr/cli/build.py
CHANGED
|
@@ -81,8 +81,8 @@ def build(
|
|
|
81
81
|
if not is_valid_project_name(app.name):
|
|
82
82
|
typer.secho(
|
|
83
83
|
f"❌ The project name {app.name} is invalid, "
|
|
84
|
-
"a valid project name must start with a letter
|
|
85
|
-
"and can only contain letters, digits, and
|
|
84
|
+
"a valid project name must start with a letter, "
|
|
85
|
+
"and can only contain letters, digits, and hyphens.",
|
|
86
86
|
fg=typer.colors.RED,
|
|
87
87
|
bold=True,
|
|
88
88
|
)
|
flwr/cli/config_utils.py
CHANGED
|
@@ -20,6 +20,7 @@ from pathlib import Path
|
|
|
20
20
|
from typing import IO, Any, Optional, Union, get_args
|
|
21
21
|
|
|
22
22
|
import tomli
|
|
23
|
+
import typer
|
|
23
24
|
|
|
24
25
|
from flwr.common import object_ref
|
|
25
26
|
from flwr.common.typing import UserConfigValue
|
|
@@ -227,3 +228,99 @@ def load_from_string(toml_content: str) -> Optional[dict[str, Any]]:
|
|
|
227
228
|
return data
|
|
228
229
|
except tomli.TOMLDecodeError:
|
|
229
230
|
return None
|
|
231
|
+
|
|
232
|
+
|
|
233
|
+
def validate_project_config(
|
|
234
|
+
config: Union[dict[str, Any], None], errors: list[str], warnings: list[str]
|
|
235
|
+
) -> dict[str, Any]:
|
|
236
|
+
"""Validate and return the Flower project configuration."""
|
|
237
|
+
if config is None:
|
|
238
|
+
typer.secho(
|
|
239
|
+
"Project configuration could not be loaded.\n"
|
|
240
|
+
"pyproject.toml is invalid:\n"
|
|
241
|
+
+ "\n".join([f"- {line}" for line in errors]),
|
|
242
|
+
fg=typer.colors.RED,
|
|
243
|
+
bold=True,
|
|
244
|
+
)
|
|
245
|
+
raise typer.Exit(code=1)
|
|
246
|
+
|
|
247
|
+
if warnings:
|
|
248
|
+
typer.secho(
|
|
249
|
+
"Project configuration is missing the following "
|
|
250
|
+
"recommended properties:\n" + "\n".join([f"- {line}" for line in warnings]),
|
|
251
|
+
fg=typer.colors.RED,
|
|
252
|
+
bold=True,
|
|
253
|
+
)
|
|
254
|
+
|
|
255
|
+
typer.secho("Success", fg=typer.colors.GREEN)
|
|
256
|
+
|
|
257
|
+
return config
|
|
258
|
+
|
|
259
|
+
|
|
260
|
+
def validate_federation_in_project_config(
|
|
261
|
+
federation: Optional[str], config: dict[str, Any]
|
|
262
|
+
) -> tuple[str, dict[str, Any]]:
|
|
263
|
+
"""Validate the federation name in the Flower project configuration."""
|
|
264
|
+
federation = federation or config["tool"]["flwr"]["federations"].get("default")
|
|
265
|
+
|
|
266
|
+
if federation is None:
|
|
267
|
+
typer.secho(
|
|
268
|
+
"❌ No federation name was provided and the project's `pyproject.toml` "
|
|
269
|
+
"doesn't declare a default federation (with an Exec API address or an "
|
|
270
|
+
"`options.num-supernodes` value).",
|
|
271
|
+
fg=typer.colors.RED,
|
|
272
|
+
bold=True,
|
|
273
|
+
)
|
|
274
|
+
raise typer.Exit(code=1)
|
|
275
|
+
|
|
276
|
+
# Validate the federation exists in the configuration
|
|
277
|
+
federation_config = config["tool"]["flwr"]["federations"].get(federation)
|
|
278
|
+
if federation_config is None:
|
|
279
|
+
available_feds = {
|
|
280
|
+
fed for fed in config["tool"]["flwr"]["federations"] if fed != "default"
|
|
281
|
+
}
|
|
282
|
+
typer.secho(
|
|
283
|
+
f"❌ There is no `{federation}` federation declared in the "
|
|
284
|
+
"`pyproject.toml`.\n The following federations were found:\n\n"
|
|
285
|
+
+ "\n".join(available_feds),
|
|
286
|
+
fg=typer.colors.RED,
|
|
287
|
+
bold=True,
|
|
288
|
+
)
|
|
289
|
+
raise typer.Exit(code=1)
|
|
290
|
+
|
|
291
|
+
return federation, federation_config
|
|
292
|
+
|
|
293
|
+
|
|
294
|
+
def validate_certificate_in_federation_config(
|
|
295
|
+
app: Path, federation_config: dict[str, Any]
|
|
296
|
+
) -> tuple[bool, Optional[bytes]]:
|
|
297
|
+
"""Validate the certificates in the Flower project configuration."""
|
|
298
|
+
insecure_str = federation_config.get("insecure")
|
|
299
|
+
if root_certificates := federation_config.get("root-certificates"):
|
|
300
|
+
root_certificates_bytes = (app / root_certificates).read_bytes()
|
|
301
|
+
if insecure := bool(insecure_str):
|
|
302
|
+
typer.secho(
|
|
303
|
+
"❌ `root_certificates` were provided but the `insecure` parameter "
|
|
304
|
+
"is set to `True`.",
|
|
305
|
+
fg=typer.colors.RED,
|
|
306
|
+
bold=True,
|
|
307
|
+
)
|
|
308
|
+
raise typer.Exit(code=1)
|
|
309
|
+
else:
|
|
310
|
+
root_certificates_bytes = None
|
|
311
|
+
if insecure_str is None:
|
|
312
|
+
typer.secho(
|
|
313
|
+
"❌ To disable TLS, set `insecure = true` in `pyproject.toml`.",
|
|
314
|
+
fg=typer.colors.RED,
|
|
315
|
+
bold=True,
|
|
316
|
+
)
|
|
317
|
+
raise typer.Exit(code=1)
|
|
318
|
+
if not (insecure := bool(insecure_str)):
|
|
319
|
+
typer.secho(
|
|
320
|
+
"❌ No certificate were given yet `insecure` is set to `False`.",
|
|
321
|
+
fg=typer.colors.RED,
|
|
322
|
+
bold=True,
|
|
323
|
+
)
|
|
324
|
+
raise typer.Exit(code=1)
|
|
325
|
+
|
|
326
|
+
return insecure, root_certificates_bytes
|
flwr/cli/log.py
CHANGED
|
@@ -14,33 +14,38 @@
|
|
|
14
14
|
# ==============================================================================
|
|
15
15
|
"""Flower command line interface `log` command."""
|
|
16
16
|
|
|
17
|
-
import sys
|
|
18
17
|
import time
|
|
19
18
|
from logging import DEBUG, ERROR, INFO
|
|
20
19
|
from pathlib import Path
|
|
21
|
-
from typing import Annotated, Optional
|
|
20
|
+
from typing import Annotated, Any, Optional, cast
|
|
22
21
|
|
|
23
22
|
import grpc
|
|
24
23
|
import typer
|
|
25
24
|
|
|
26
|
-
from flwr.cli.config_utils import
|
|
25
|
+
from flwr.cli.config_utils import (
|
|
26
|
+
load_and_validate,
|
|
27
|
+
validate_certificate_in_federation_config,
|
|
28
|
+
validate_federation_in_project_config,
|
|
29
|
+
validate_project_config,
|
|
30
|
+
)
|
|
31
|
+
from flwr.common.constant import CONN_RECONNECT_INTERVAL, CONN_REFRESH_PERIOD
|
|
27
32
|
from flwr.common.grpc import GRPC_MAX_MESSAGE_LENGTH, create_channel
|
|
28
33
|
from flwr.common.logger import log as logger
|
|
29
34
|
from flwr.proto.exec_pb2 import StreamLogsRequest # pylint: disable=E0611
|
|
30
35
|
from flwr.proto.exec_pb2_grpc import ExecStub
|
|
31
36
|
|
|
32
|
-
CONN_REFRESH_PERIOD = 60 # Connection refresh period for log streaming (seconds)
|
|
33
|
-
|
|
34
37
|
|
|
35
38
|
def start_stream(
|
|
36
39
|
run_id: int, channel: grpc.Channel, refresh_period: int = CONN_REFRESH_PERIOD
|
|
37
40
|
) -> None:
|
|
38
41
|
"""Start log streaming for a given run ID."""
|
|
42
|
+
stub = ExecStub(channel)
|
|
43
|
+
after_timestamp = 0.0
|
|
39
44
|
try:
|
|
45
|
+
logger(INFO, "Starting logstream for run_id `%s`", run_id)
|
|
40
46
|
while True:
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
time.sleep(2)
|
|
47
|
+
after_timestamp = stream_logs(run_id, stub, refresh_period, after_timestamp)
|
|
48
|
+
time.sleep(CONN_RECONNECT_INTERVAL)
|
|
44
49
|
logger(DEBUG, "Reconnecting to logstream")
|
|
45
50
|
except KeyboardInterrupt:
|
|
46
51
|
logger(INFO, "Exiting logstream")
|
|
@@ -54,16 +59,44 @@ def start_stream(
|
|
|
54
59
|
channel.close()
|
|
55
60
|
|
|
56
61
|
|
|
57
|
-
def stream_logs(
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
+
def stream_logs(
|
|
63
|
+
run_id: int, stub: ExecStub, duration: int, after_timestamp: float
|
|
64
|
+
) -> float:
|
|
65
|
+
"""Stream logs from the beginning of a run with connection refresh.
|
|
66
|
+
|
|
67
|
+
Parameters
|
|
68
|
+
----------
|
|
69
|
+
run_id : int
|
|
70
|
+
The identifier of the run.
|
|
71
|
+
stub : ExecStub
|
|
72
|
+
The gRPC stub to interact with the Exec service.
|
|
73
|
+
duration : int
|
|
74
|
+
The timeout duration for each stream connection in seconds.
|
|
75
|
+
after_timestamp : float
|
|
76
|
+
The timestamp to start streaming logs from.
|
|
77
|
+
|
|
78
|
+
Returns
|
|
79
|
+
-------
|
|
80
|
+
float
|
|
81
|
+
The latest timestamp from the streamed logs or the provided `after_timestamp`
|
|
82
|
+
if no logs are returned.
|
|
83
|
+
"""
|
|
84
|
+
req = StreamLogsRequest(run_id=run_id, after_timestamp=after_timestamp)
|
|
85
|
+
|
|
86
|
+
latest_timestamp = 0.0
|
|
87
|
+
res = None
|
|
88
|
+
try:
|
|
89
|
+
for res in stub.StreamLogs(req, timeout=duration):
|
|
90
|
+
print(res.log_output, end="")
|
|
91
|
+
except grpc.RpcError as e:
|
|
92
|
+
# pylint: disable=E1101
|
|
93
|
+
if e.code() != grpc.StatusCode.DEADLINE_EXCEEDED:
|
|
94
|
+
raise e
|
|
95
|
+
finally:
|
|
96
|
+
if res is not None:
|
|
97
|
+
latest_timestamp = cast(float, res.latest_timestamp)
|
|
62
98
|
|
|
63
|
-
|
|
64
|
-
print(res.log_output)
|
|
65
|
-
if time.time() - start_time > duration:
|
|
66
|
-
break
|
|
99
|
+
return max(latest_timestamp, after_timestamp)
|
|
67
100
|
|
|
68
101
|
|
|
69
102
|
def print_logs(run_id: int, channel: grpc.Channel, timeout: int) -> None:
|
|
@@ -124,100 +157,33 @@ def log(
|
|
|
124
157
|
|
|
125
158
|
pyproject_path = app / "pyproject.toml" if app else None
|
|
126
159
|
config, errors, warnings = load_and_validate(path=pyproject_path)
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
"pyproject.toml is invalid:\n"
|
|
132
|
-
+ "\n".join([f"- {line}" for line in errors]),
|
|
133
|
-
fg=typer.colors.RED,
|
|
134
|
-
bold=True,
|
|
135
|
-
)
|
|
136
|
-
sys.exit()
|
|
137
|
-
|
|
138
|
-
if warnings:
|
|
139
|
-
typer.secho(
|
|
140
|
-
"Project configuration is missing the following "
|
|
141
|
-
"recommended properties:\n" + "\n".join([f"- {line}" for line in warnings]),
|
|
142
|
-
fg=typer.colors.RED,
|
|
143
|
-
bold=True,
|
|
144
|
-
)
|
|
145
|
-
|
|
146
|
-
typer.secho("Success", fg=typer.colors.GREEN)
|
|
147
|
-
|
|
148
|
-
federation = federation or config["tool"]["flwr"]["federations"].get("default")
|
|
149
|
-
|
|
150
|
-
if federation is None:
|
|
151
|
-
typer.secho(
|
|
152
|
-
"❌ No federation name was provided and the project's `pyproject.toml` "
|
|
153
|
-
"doesn't declare a default federation (with a SuperExec address or an "
|
|
154
|
-
"`options.num-supernodes` value).",
|
|
155
|
-
fg=typer.colors.RED,
|
|
156
|
-
bold=True,
|
|
157
|
-
)
|
|
158
|
-
raise typer.Exit(code=1)
|
|
159
|
-
|
|
160
|
-
# Validate the federation exists in the configuration
|
|
161
|
-
federation_config = config["tool"]["flwr"]["federations"].get(federation)
|
|
162
|
-
if federation_config is None:
|
|
163
|
-
available_feds = {
|
|
164
|
-
fed for fed in config["tool"]["flwr"]["federations"] if fed != "default"
|
|
165
|
-
}
|
|
166
|
-
typer.secho(
|
|
167
|
-
f"❌ There is no `{federation}` federation declared in the "
|
|
168
|
-
"`pyproject.toml`.\n The following federations were found:\n\n"
|
|
169
|
-
+ "\n".join(available_feds),
|
|
170
|
-
fg=typer.colors.RED,
|
|
171
|
-
bold=True,
|
|
172
|
-
)
|
|
173
|
-
raise typer.Exit(code=1)
|
|
160
|
+
config = validate_project_config(config, errors, warnings)
|
|
161
|
+
federation, federation_config = validate_federation_in_project_config(
|
|
162
|
+
federation, config
|
|
163
|
+
)
|
|
174
164
|
|
|
175
165
|
if "address" not in federation_config:
|
|
176
166
|
typer.secho(
|
|
177
|
-
"❌ `flwr log` currently works with
|
|
178
|
-
"
|
|
167
|
+
"❌ `flwr log` currently works with Exec API. Ensure that the correct"
|
|
168
|
+
"Exec API address is provided in the `pyproject.toml`.",
|
|
179
169
|
fg=typer.colors.RED,
|
|
180
170
|
bold=True,
|
|
181
171
|
)
|
|
182
172
|
raise typer.Exit(code=1)
|
|
183
173
|
|
|
184
|
-
|
|
174
|
+
_log_with_exec_api(app, federation_config, run_id, stream)
|
|
185
175
|
|
|
186
176
|
|
|
187
|
-
|
|
188
|
-
|
|
189
|
-
federation_config: dict[str,
|
|
177
|
+
def _log_with_exec_api(
|
|
178
|
+
app: Path,
|
|
179
|
+
federation_config: dict[str, Any],
|
|
190
180
|
run_id: int,
|
|
191
181
|
stream: bool,
|
|
192
182
|
) -> None:
|
|
193
|
-
insecure_str = federation_config.get("insecure")
|
|
194
|
-
if root_certificates := federation_config.get("root-certificates"):
|
|
195
|
-
root_certificates_bytes = Path(root_certificates).read_bytes()
|
|
196
|
-
if insecure := bool(insecure_str):
|
|
197
|
-
typer.secho(
|
|
198
|
-
"❌ `root_certificates` were provided but the `insecure` parameter"
|
|
199
|
-
"is set to `True`.",
|
|
200
|
-
fg=typer.colors.RED,
|
|
201
|
-
bold=True,
|
|
202
|
-
)
|
|
203
|
-
raise typer.Exit(code=1)
|
|
204
|
-
else:
|
|
205
|
-
root_certificates_bytes = None
|
|
206
|
-
if insecure_str is None:
|
|
207
|
-
typer.secho(
|
|
208
|
-
"❌ To disable TLS, set `insecure = true` in `pyproject.toml`.",
|
|
209
|
-
fg=typer.colors.RED,
|
|
210
|
-
bold=True,
|
|
211
|
-
)
|
|
212
|
-
raise typer.Exit(code=1)
|
|
213
|
-
if not (insecure := bool(insecure_str)):
|
|
214
|
-
typer.secho(
|
|
215
|
-
"❌ No certificate were given yet `insecure` is set to `False`.",
|
|
216
|
-
fg=typer.colors.RED,
|
|
217
|
-
bold=True,
|
|
218
|
-
)
|
|
219
|
-
raise typer.Exit(code=1)
|
|
220
183
|
|
|
184
|
+
insecure, root_certificates_bytes = validate_certificate_in_federation_config(
|
|
185
|
+
app, federation_config
|
|
186
|
+
)
|
|
221
187
|
channel = create_channel(
|
|
222
188
|
server_address=federation_config["address"],
|
|
223
189
|
insecure=insecure,
|
|
@@ -71,7 +71,7 @@ def load_data(partition_id: int, num_partitions: int, dataset_name: str):
|
|
|
71
71
|
partitioners={"train": partitioner},
|
|
72
72
|
)
|
|
73
73
|
client_trainset = FDS.load_partition(partition_id, "train")
|
|
74
|
-
client_trainset = reformat(client_trainset, llm_task="
|
|
74
|
+
client_trainset = reformat(client_trainset, llm_task="$llm_challenge_str")
|
|
75
75
|
return client_trainset
|
|
76
76
|
|
|
77
77
|
|
flwr/cli/run/run.py
CHANGED
|
@@ -16,7 +16,6 @@
|
|
|
16
16
|
|
|
17
17
|
import json
|
|
18
18
|
import subprocess
|
|
19
|
-
import sys
|
|
20
19
|
from logging import DEBUG
|
|
21
20
|
from pathlib import Path
|
|
22
21
|
from typing import Annotated, Any, Optional
|
|
@@ -24,11 +23,24 @@ from typing import Annotated, Any, Optional
|
|
|
24
23
|
import typer
|
|
25
24
|
|
|
26
25
|
from flwr.cli.build import build
|
|
27
|
-
from flwr.cli.config_utils import
|
|
28
|
-
|
|
26
|
+
from flwr.cli.config_utils import (
|
|
27
|
+
load_and_validate,
|
|
28
|
+
validate_certificate_in_federation_config,
|
|
29
|
+
validate_federation_in_project_config,
|
|
30
|
+
validate_project_config,
|
|
31
|
+
)
|
|
32
|
+
from flwr.common.config import (
|
|
33
|
+
flatten_dict,
|
|
34
|
+
parse_config_args,
|
|
35
|
+
user_config_to_configsrecord,
|
|
36
|
+
)
|
|
29
37
|
from flwr.common.grpc import GRPC_MAX_MESSAGE_LENGTH, create_channel
|
|
30
38
|
from flwr.common.logger import log
|
|
31
|
-
from flwr.common.serde import
|
|
39
|
+
from flwr.common.serde import (
|
|
40
|
+
configs_record_to_proto,
|
|
41
|
+
fab_to_proto,
|
|
42
|
+
user_config_to_proto,
|
|
43
|
+
)
|
|
32
44
|
from flwr.common.typing import Fab
|
|
33
45
|
from flwr.proto.exec_pb2 import StartRunRequest # pylint: disable=E0611
|
|
34
46
|
from flwr.proto.exec_pb2_grpc import ExecStub
|
|
@@ -79,96 +91,28 @@ def run(
|
|
|
79
91
|
|
|
80
92
|
pyproject_path = app / "pyproject.toml" if app else None
|
|
81
93
|
config, errors, warnings = load_and_validate(path=pyproject_path)
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
"pyproject.toml is invalid:\n"
|
|
87
|
-
+ "\n".join([f"- {line}" for line in errors]),
|
|
88
|
-
fg=typer.colors.RED,
|
|
89
|
-
bold=True,
|
|
90
|
-
)
|
|
91
|
-
sys.exit()
|
|
92
|
-
|
|
93
|
-
if warnings:
|
|
94
|
-
typer.secho(
|
|
95
|
-
"Project configuration is missing the following "
|
|
96
|
-
"recommended properties:\n" + "\n".join([f"- {line}" for line in warnings]),
|
|
97
|
-
fg=typer.colors.RED,
|
|
98
|
-
bold=True,
|
|
99
|
-
)
|
|
100
|
-
|
|
101
|
-
typer.secho("Success", fg=typer.colors.GREEN)
|
|
102
|
-
|
|
103
|
-
federation = federation or config["tool"]["flwr"]["federations"].get("default")
|
|
104
|
-
|
|
105
|
-
if federation is None:
|
|
106
|
-
typer.secho(
|
|
107
|
-
"❌ No federation name was provided and the project's `pyproject.toml` "
|
|
108
|
-
"doesn't declare a default federation (with a SuperExec address or an "
|
|
109
|
-
"`options.num-supernodes` value).",
|
|
110
|
-
fg=typer.colors.RED,
|
|
111
|
-
bold=True,
|
|
112
|
-
)
|
|
113
|
-
raise typer.Exit(code=1)
|
|
114
|
-
|
|
115
|
-
# Validate the federation exists in the configuration
|
|
116
|
-
federation_config = config["tool"]["flwr"]["federations"].get(federation)
|
|
117
|
-
if federation_config is None:
|
|
118
|
-
available_feds = {
|
|
119
|
-
fed for fed in config["tool"]["flwr"]["federations"] if fed != "default"
|
|
120
|
-
}
|
|
121
|
-
typer.secho(
|
|
122
|
-
f"❌ There is no `{federation}` federation declared in "
|
|
123
|
-
"`pyproject.toml`.\n The following federations were found:\n\n"
|
|
124
|
-
+ "\n".join(available_feds),
|
|
125
|
-
fg=typer.colors.RED,
|
|
126
|
-
bold=True,
|
|
127
|
-
)
|
|
128
|
-
raise typer.Exit(code=1)
|
|
94
|
+
config = validate_project_config(config, errors, warnings)
|
|
95
|
+
federation, federation_config = validate_federation_in_project_config(
|
|
96
|
+
federation, config
|
|
97
|
+
)
|
|
129
98
|
|
|
130
99
|
if "address" in federation_config:
|
|
131
|
-
|
|
100
|
+
_run_with_exec_api(app, federation_config, config_overrides, stream)
|
|
132
101
|
else:
|
|
133
|
-
|
|
102
|
+
_run_without_exec_api(app, federation_config, config_overrides, federation)
|
|
134
103
|
|
|
135
104
|
|
|
136
|
-
# pylint: disable=too-many-locals
|
|
137
|
-
def
|
|
105
|
+
# pylint: disable-next=too-many-locals
|
|
106
|
+
def _run_with_exec_api(
|
|
138
107
|
app: Path,
|
|
139
108
|
federation_config: dict[str, Any],
|
|
140
109
|
config_overrides: Optional[list[str]],
|
|
141
110
|
stream: bool,
|
|
142
111
|
) -> None:
|
|
143
112
|
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
if insecure := bool(insecure_str):
|
|
148
|
-
typer.secho(
|
|
149
|
-
"❌ `root_certificates` were provided but the `insecure` parameter"
|
|
150
|
-
"is set to `True`.",
|
|
151
|
-
fg=typer.colors.RED,
|
|
152
|
-
bold=True,
|
|
153
|
-
)
|
|
154
|
-
raise typer.Exit(code=1)
|
|
155
|
-
else:
|
|
156
|
-
root_certificates_bytes = None
|
|
157
|
-
if insecure_str is None:
|
|
158
|
-
typer.secho(
|
|
159
|
-
"❌ To disable TLS, set `insecure = true` in `pyproject.toml`.",
|
|
160
|
-
fg=typer.colors.RED,
|
|
161
|
-
bold=True,
|
|
162
|
-
)
|
|
163
|
-
raise typer.Exit(code=1)
|
|
164
|
-
if not (insecure := bool(insecure_str)):
|
|
165
|
-
typer.secho(
|
|
166
|
-
"❌ No certificate were given yet `insecure` is set to `False`.",
|
|
167
|
-
fg=typer.colors.RED,
|
|
168
|
-
bold=True,
|
|
169
|
-
)
|
|
170
|
-
raise typer.Exit(code=1)
|
|
171
|
-
|
|
113
|
+
insecure, root_certificates_bytes = validate_certificate_in_federation_config(
|
|
114
|
+
app, federation_config
|
|
115
|
+
)
|
|
172
116
|
channel = create_channel(
|
|
173
117
|
server_address=federation_config["address"],
|
|
174
118
|
insecure=insecure,
|
|
@@ -183,16 +127,18 @@ def _run_with_superexec(
|
|
|
183
127
|
content = Path(fab_path).read_bytes()
|
|
184
128
|
fab = Fab(fab_hash, content)
|
|
185
129
|
|
|
130
|
+
# Construct a `ConfigsRecord` out of a flattened `UserConfig`
|
|
131
|
+
fed_conf = flatten_dict(federation_config.get("options", {}))
|
|
132
|
+
c_record = user_config_to_configsrecord(fed_conf)
|
|
133
|
+
|
|
186
134
|
req = StartRunRequest(
|
|
187
135
|
fab=fab_to_proto(fab),
|
|
188
136
|
override_config=user_config_to_proto(parse_config_args(config_overrides)),
|
|
189
|
-
|
|
190
|
-
flatten_dict(federation_config.get("options"))
|
|
191
|
-
),
|
|
137
|
+
federation_options=configs_record_to_proto(c_record),
|
|
192
138
|
)
|
|
193
139
|
res = stub.StartRun(req)
|
|
194
140
|
|
|
195
|
-
# Delete FAB file once it has been sent to the
|
|
141
|
+
# Delete FAB file once it has been sent to the Exec API
|
|
196
142
|
Path(fab_path).unlink()
|
|
197
143
|
typer.secho(f"🎊 Successfully started run {res.run_id}", fg=typer.colors.GREEN)
|
|
198
144
|
|
|
@@ -200,7 +146,7 @@ def _run_with_superexec(
|
|
|
200
146
|
start_stream(res.run_id, channel, CONN_REFRESH_PERIOD)
|
|
201
147
|
|
|
202
148
|
|
|
203
|
-
def
|
|
149
|
+
def _run_without_exec_api(
|
|
204
150
|
app: Optional[Path],
|
|
205
151
|
federation_config: dict[str, Any],
|
|
206
152
|
config_overrides: Optional[list[str]],
|
flwr/client/app.py
CHANGED
|
@@ -32,11 +32,14 @@ from flwr.cli.config_utils import get_fab_metadata
|
|
|
32
32
|
from flwr.cli.install import install_from_fab
|
|
33
33
|
from flwr.client.client import Client
|
|
34
34
|
from flwr.client.client_app import ClientApp, LoadClientAppError
|
|
35
|
+
from flwr.client.nodestate.nodestate_factory import NodeStateFactory
|
|
35
36
|
from flwr.client.typing import ClientFnExt
|
|
36
37
|
from flwr.common import GRPC_MAX_MESSAGE_LENGTH, Context, EventType, Message, event
|
|
37
38
|
from flwr.common.address import parse_address
|
|
38
39
|
from flwr.common.constant import (
|
|
39
40
|
CLIENTAPPIO_API_DEFAULT_ADDRESS,
|
|
41
|
+
ISOLATION_MODE_PROCESS,
|
|
42
|
+
ISOLATION_MODE_SUBPROCESS,
|
|
40
43
|
MISSING_EXTRA_REST,
|
|
41
44
|
RUN_ID_NUM_BYTES,
|
|
42
45
|
TRANSPORT_TYPE_GRPC_ADAPTER,
|
|
@@ -52,18 +55,15 @@ from flwr.common.retry_invoker import RetryInvoker, RetryState, exponential
|
|
|
52
55
|
from flwr.common.typing import Fab, Run, UserConfig
|
|
53
56
|
from flwr.proto.clientappio_pb2_grpc import add_ClientAppIoServicer_to_server
|
|
54
57
|
from flwr.server.superlink.fleet.grpc_bidi.grpc_server import generic_create_grpc_server
|
|
55
|
-
from flwr.server.superlink.
|
|
58
|
+
from flwr.server.superlink.linkstate.utils import generate_rand_int_from_bytes
|
|
56
59
|
|
|
57
60
|
from .clientapp.clientappio_servicer import ClientAppInputs, ClientAppIoServicer
|
|
58
61
|
from .grpc_adapter_client.connection import grpc_adapter
|
|
59
62
|
from .grpc_client.connection import grpc_connection
|
|
60
63
|
from .grpc_rere_client.connection import grpc_request_response
|
|
61
64
|
from .message_handler.message_handler import handle_control_message
|
|
62
|
-
from .node_state import NodeState
|
|
63
65
|
from .numpy_client import NumPyClient
|
|
64
|
-
|
|
65
|
-
ISOLATION_MODE_SUBPROCESS = "subprocess"
|
|
66
|
-
ISOLATION_MODE_PROCESS = "process"
|
|
66
|
+
from .run_info_store import DeprecatedRunInfoStore
|
|
67
67
|
|
|
68
68
|
|
|
69
69
|
def _check_actionable_client(
|
|
@@ -364,8 +364,10 @@ def start_client_internal(
|
|
|
364
364
|
on_backoff=_on_backoff,
|
|
365
365
|
)
|
|
366
366
|
|
|
367
|
-
#
|
|
368
|
-
|
|
367
|
+
# DeprecatedRunInfoStore gets initialized when the first connection is established
|
|
368
|
+
run_info_store: Optional[DeprecatedRunInfoStore] = None
|
|
369
|
+
state_factory = NodeStateFactory()
|
|
370
|
+
state = state_factory.state()
|
|
369
371
|
|
|
370
372
|
runs: dict[int, Run] = {}
|
|
371
373
|
|
|
@@ -382,7 +384,7 @@ def start_client_internal(
|
|
|
382
384
|
receive, send, create_node, delete_node, get_run, get_fab = conn
|
|
383
385
|
|
|
384
386
|
# Register node when connecting the first time
|
|
385
|
-
if
|
|
387
|
+
if run_info_store is None:
|
|
386
388
|
if create_node is None:
|
|
387
389
|
if transport not in ["grpc-bidi", None]:
|
|
388
390
|
raise NotImplementedError(
|
|
@@ -391,19 +393,20 @@ def start_client_internal(
|
|
|
391
393
|
)
|
|
392
394
|
# gRPC-bidi doesn't have the concept of node_id,
|
|
393
395
|
# so we set it to -1
|
|
394
|
-
|
|
396
|
+
run_info_store = DeprecatedRunInfoStore(
|
|
395
397
|
node_id=-1,
|
|
396
398
|
node_config={},
|
|
397
399
|
)
|
|
398
400
|
else:
|
|
399
401
|
# Call create_node fn to register node
|
|
400
|
-
|
|
401
|
-
|
|
402
|
-
|
|
403
|
-
|
|
404
|
-
|
|
405
|
-
|
|
406
|
-
|
|
402
|
+
# and store node_id in state
|
|
403
|
+
if (node_id := create_node()) is None:
|
|
404
|
+
raise ValueError(
|
|
405
|
+
"Failed to register SuperNode with the SuperLink"
|
|
406
|
+
)
|
|
407
|
+
state.set_node_id(node_id)
|
|
408
|
+
run_info_store = DeprecatedRunInfoStore(
|
|
409
|
+
node_id=state.get_node_id(),
|
|
407
410
|
node_config=node_config,
|
|
408
411
|
)
|
|
409
412
|
|
|
@@ -461,7 +464,7 @@ def start_client_internal(
|
|
|
461
464
|
run.fab_id, run.fab_version = fab_id, fab_version
|
|
462
465
|
|
|
463
466
|
# Register context for this run
|
|
464
|
-
|
|
467
|
+
run_info_store.register_context(
|
|
465
468
|
run_id=run_id,
|
|
466
469
|
run=run,
|
|
467
470
|
flwr_path=flwr_path,
|
|
@@ -469,7 +472,7 @@ def start_client_internal(
|
|
|
469
472
|
)
|
|
470
473
|
|
|
471
474
|
# Retrieve context for this run
|
|
472
|
-
context =
|
|
475
|
+
context = run_info_store.retrieve_context(run_id=run_id)
|
|
473
476
|
# Create an error reply message that will never be used to prevent
|
|
474
477
|
# the used-before-assignment linting error
|
|
475
478
|
reply_message = message.create_error_reply(
|
|
@@ -542,7 +545,7 @@ def start_client_internal(
|
|
|
542
545
|
# Raise exception, crash process
|
|
543
546
|
raise ex
|
|
544
547
|
|
|
545
|
-
# Don't update/change
|
|
548
|
+
# Don't update/change DeprecatedRunInfoStore
|
|
546
549
|
|
|
547
550
|
e_code = ErrorCode.CLIENT_APP_RAISED_EXCEPTION
|
|
548
551
|
# Ex fmt: "<class 'ZeroDivisionError'>:<'division by zero'>"
|
|
@@ -567,7 +570,7 @@ def start_client_internal(
|
|
|
567
570
|
)
|
|
568
571
|
else:
|
|
569
572
|
# No exception, update node state
|
|
570
|
-
|
|
573
|
+
run_info_store.update_context(
|
|
571
574
|
run_id=run_id,
|
|
572
575
|
context=context,
|
|
573
576
|
)
|