flwr 1.20.0__py3-none-any.whl → 1.21.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- flwr/__init__.py +4 -1
- flwr/app/__init__.py +28 -0
- flwr/app/exception.py +31 -0
- flwr/cli/auth_plugin/oidc_cli_plugin.py +4 -4
- flwr/cli/cli_user_auth_interceptor.py +1 -1
- flwr/cli/config_utils.py +3 -3
- flwr/cli/constant.py +25 -8
- flwr/cli/log.py +9 -9
- flwr/cli/login/login.py +3 -3
- flwr/cli/ls.py +5 -5
- flwr/cli/new/new.py +11 -0
- flwr/cli/new/templates/app/code/__init__.pytorch_msg_api.py.tpl +1 -0
- flwr/cli/new/templates/app/code/client.pytorch_msg_api.py.tpl +80 -0
- flwr/cli/new/templates/app/code/server.pytorch_msg_api.py.tpl +41 -0
- flwr/cli/new/templates/app/code/task.pytorch_msg_api.py.tpl +98 -0
- flwr/cli/new/templates/app/pyproject.baseline.toml.tpl +1 -1
- flwr/cli/new/templates/app/pyproject.flowertune.toml.tpl +1 -1
- flwr/cli/new/templates/app/pyproject.huggingface.toml.tpl +1 -1
- flwr/cli/new/templates/app/pyproject.jax.toml.tpl +1 -1
- flwr/cli/new/templates/app/pyproject.mlx.toml.tpl +1 -1
- flwr/cli/new/templates/app/pyproject.numpy.toml.tpl +1 -1
- flwr/cli/new/templates/app/pyproject.pytorch.toml.tpl +1 -1
- flwr/cli/new/templates/app/pyproject.pytorch_msg_api.toml.tpl +53 -0
- flwr/cli/new/templates/app/pyproject.sklearn.toml.tpl +1 -1
- flwr/cli/new/templates/app/pyproject.tensorflow.toml.tpl +1 -1
- flwr/cli/run/run.py +9 -13
- flwr/cli/stop.py +7 -4
- flwr/cli/utils.py +19 -8
- flwr/client/grpc_rere_client/connection.py +1 -12
- flwr/client/rest_client/connection.py +3 -0
- flwr/clientapp/__init__.py +10 -0
- flwr/clientapp/mod/__init__.py +26 -0
- flwr/clientapp/mod/centraldp_mods.py +132 -0
- flwr/common/args.py +20 -6
- flwr/common/auth_plugin/__init__.py +4 -4
- flwr/common/auth_plugin/auth_plugin.py +7 -7
- flwr/common/constant.py +23 -4
- flwr/common/event_log_plugin/event_log_plugin.py +1 -1
- flwr/common/exit/__init__.py +4 -0
- flwr/common/exit/exit.py +8 -1
- flwr/common/exit/exit_code.py +26 -7
- flwr/common/exit/exit_handler.py +62 -0
- flwr/common/{exit_handlers.py → exit/signal_handler.py} +20 -37
- flwr/common/grpc.py +0 -11
- flwr/common/inflatable_utils.py +1 -1
- flwr/common/logger.py +1 -1
- flwr/common/retry_invoker.py +30 -11
- flwr/common/telemetry.py +4 -0
- flwr/compat/server/app.py +2 -2
- flwr/proto/appio_pb2.py +25 -17
- flwr/proto/appio_pb2.pyi +46 -2
- flwr/proto/clientappio_pb2.py +3 -11
- flwr/proto/clientappio_pb2.pyi +0 -47
- flwr/proto/clientappio_pb2_grpc.py +19 -20
- flwr/proto/clientappio_pb2_grpc.pyi +10 -11
- flwr/proto/control_pb2.py +62 -0
- flwr/proto/{exec_pb2_grpc.py → control_pb2_grpc.py} +54 -54
- flwr/proto/{exec_pb2_grpc.pyi → control_pb2_grpc.pyi} +28 -28
- flwr/proto/serverappio_pb2.py +2 -2
- flwr/proto/serverappio_pb2_grpc.py +68 -0
- flwr/proto/serverappio_pb2_grpc.pyi +26 -0
- flwr/proto/simulationio_pb2.py +4 -11
- flwr/proto/simulationio_pb2.pyi +0 -58
- flwr/proto/simulationio_pb2_grpc.py +129 -27
- flwr/proto/simulationio_pb2_grpc.pyi +52 -13
- flwr/server/app.py +129 -152
- flwr/server/grid/grpc_grid.py +3 -0
- flwr/server/grid/inmemory_grid.py +1 -0
- flwr/server/serverapp/app.py +157 -146
- flwr/server/superlink/fleet/vce/backend/raybackend.py +3 -1
- flwr/server/superlink/fleet/vce/vce_api.py +6 -6
- flwr/server/superlink/linkstate/in_memory_linkstate.py +34 -0
- flwr/server/superlink/linkstate/linkstate.py +2 -1
- flwr/server/superlink/linkstate/sqlite_linkstate.py +45 -0
- flwr/server/superlink/serverappio/serverappio_grpc.py +1 -1
- flwr/server/superlink/serverappio/serverappio_servicer.py +61 -6
- flwr/server/superlink/simulation/simulationio_servicer.py +97 -21
- flwr/serverapp/__init__.py +12 -0
- flwr/serverapp/dp_fixed_clipping.py +352 -0
- flwr/serverapp/exception.py +38 -0
- flwr/serverapp/strategy/__init__.py +38 -0
- flwr/serverapp/strategy/dp_fixed_clipping.py +352 -0
- flwr/serverapp/strategy/fedadagrad.py +162 -0
- flwr/serverapp/strategy/fedadam.py +181 -0
- flwr/serverapp/strategy/fedavg.py +295 -0
- flwr/serverapp/strategy/fedopt.py +218 -0
- flwr/serverapp/strategy/fedyogi.py +173 -0
- flwr/serverapp/strategy/result.py +105 -0
- flwr/serverapp/strategy/strategy.py +285 -0
- flwr/serverapp/strategy/strategy_utils.py +251 -0
- flwr/serverapp/strategy/strategy_utils_tests.py +304 -0
- flwr/simulation/app.py +161 -164
- flwr/supercore/app_utils.py +58 -0
- flwr/{supernode/scheduler → supercore/cli}/__init__.py +3 -3
- flwr/supercore/cli/flower_superexec.py +141 -0
- flwr/supercore/{scheduler → corestate}/__init__.py +3 -3
- flwr/supercore/corestate/corestate.py +81 -0
- flwr/supercore/grpc_health/__init__.py +3 -0
- flwr/supercore/grpc_health/health_server.py +53 -0
- flwr/supercore/grpc_health/simple_health_servicer.py +2 -2
- flwr/{superexec → supercore/superexec}/__init__.py +1 -1
- flwr/supercore/superexec/plugin/__init__.py +28 -0
- flwr/{supernode/scheduler/simple_clientapp_scheduler_plugin.py → supercore/superexec/plugin/base_exec_plugin.py} +10 -6
- flwr/supercore/superexec/plugin/clientapp_exec_plugin.py +28 -0
- flwr/supercore/{scheduler/plugin.py → superexec/plugin/exec_plugin.py} +4 -4
- flwr/supercore/superexec/plugin/serverapp_exec_plugin.py +28 -0
- flwr/supercore/superexec/plugin/simulation_exec_plugin.py +28 -0
- flwr/supercore/superexec/run_superexec.py +185 -0
- flwr/superlink/servicer/__init__.py +15 -0
- flwr/superlink/servicer/control/__init__.py +22 -0
- flwr/{superexec/exec_event_log_interceptor.py → superlink/servicer/control/control_event_log_interceptor.py} +7 -7
- flwr/{superexec/exec_grpc.py → superlink/servicer/control/control_grpc.py} +24 -29
- flwr/{superexec/exec_license_interceptor.py → superlink/servicer/control/control_license_interceptor.py} +6 -6
- flwr/{superexec/exec_servicer.py → superlink/servicer/control/control_servicer.py} +69 -30
- flwr/{superexec/exec_user_auth_interceptor.py → superlink/servicer/control/control_user_auth_interceptor.py} +10 -10
- flwr/supernode/cli/flower_supernode.py +3 -0
- flwr/supernode/cli/flwr_clientapp.py +18 -21
- flwr/supernode/nodestate/in_memory_nodestate.py +2 -2
- flwr/supernode/nodestate/nodestate.py +3 -59
- flwr/supernode/runtime/run_clientapp.py +39 -102
- flwr/supernode/servicer/clientappio/clientappio_servicer.py +10 -17
- flwr/supernode/start_client_internal.py +35 -76
- {flwr-1.20.0.dist-info → flwr-1.21.0.dist-info}/METADATA +4 -3
- {flwr-1.20.0.dist-info → flwr-1.21.0.dist-info}/RECORD +127 -98
- {flwr-1.20.0.dist-info → flwr-1.21.0.dist-info}/entry_points.txt +1 -0
- flwr/proto/exec_pb2.py +0 -62
- flwr/superexec/app.py +0 -45
- flwr/superexec/deployment.py +0 -191
- flwr/superexec/executor.py +0 -100
- flwr/superexec/simulation.py +0 -129
- /flwr/proto/{exec_pb2.pyi → control_pb2.pyi} +0 -0
- {flwr-1.20.0.dist-info → flwr-1.21.0.dist-info}/WHEEL +0 -0
flwr/__init__.py
CHANGED
|
@@ -17,12 +17,15 @@
|
|
|
17
17
|
|
|
18
18
|
from flwr.common.version import package_version as _package_version
|
|
19
19
|
|
|
20
|
-
from . import client, common, server, simulation
|
|
20
|
+
from . import app, client, clientapp, common, server, serverapp, simulation
|
|
21
21
|
|
|
22
22
|
__all__ = [
|
|
23
|
+
"app",
|
|
23
24
|
"client",
|
|
25
|
+
"clientapp",
|
|
24
26
|
"common",
|
|
25
27
|
"server",
|
|
28
|
+
"serverapp",
|
|
26
29
|
"simulation",
|
|
27
30
|
]
|
|
28
31
|
|
flwr/app/__init__.py
CHANGED
|
@@ -13,3 +13,31 @@
|
|
|
13
13
|
# limitations under the License.
|
|
14
14
|
# ==============================================================================
|
|
15
15
|
"""Public Flower App APIs."""
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
from flwr.common.constant import MessageType
|
|
19
|
+
from flwr.common.context import Context
|
|
20
|
+
from flwr.common.message import Message
|
|
21
|
+
from flwr.common.record import (
|
|
22
|
+
Array,
|
|
23
|
+
ArrayRecord,
|
|
24
|
+
ConfigRecord,
|
|
25
|
+
MetricRecord,
|
|
26
|
+
RecordDict,
|
|
27
|
+
)
|
|
28
|
+
|
|
29
|
+
from .error import Error
|
|
30
|
+
from .metadata import Metadata
|
|
31
|
+
|
|
32
|
+
__all__ = [
|
|
33
|
+
"Array",
|
|
34
|
+
"ArrayRecord",
|
|
35
|
+
"ConfigRecord",
|
|
36
|
+
"Context",
|
|
37
|
+
"Error",
|
|
38
|
+
"Message",
|
|
39
|
+
"MessageType",
|
|
40
|
+
"Metadata",
|
|
41
|
+
"MetricRecord",
|
|
42
|
+
"RecordDict",
|
|
43
|
+
]
|
flwr/app/exception.py
ADDED
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
# Copyright 2025 Flower Labs GmbH. All Rights Reserved.
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
# ==============================================================================
|
|
15
|
+
"""Flower application exceptions."""
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
class AppExitException(BaseException):
|
|
19
|
+
"""Base exception for all application-level errors in ServerApp and ClientApp.
|
|
20
|
+
|
|
21
|
+
When raised, the process will exit and report a telemetry event with the associated
|
|
22
|
+
exit code. This is not intended to be caught by user code.
|
|
23
|
+
"""
|
|
24
|
+
|
|
25
|
+
# Default exit code — subclasses must override
|
|
26
|
+
exit_code = -1
|
|
27
|
+
|
|
28
|
+
def __init_subclass__(cls) -> None:
|
|
29
|
+
"""Ensure subclasses override the exit_code attribute."""
|
|
30
|
+
if cls.exit_code == -1:
|
|
31
|
+
raise ValueError("Subclasses must override the exit_code attribute.")
|
|
@@ -31,11 +31,11 @@ from flwr.common.constant import (
|
|
|
31
31
|
AuthType,
|
|
32
32
|
)
|
|
33
33
|
from flwr.common.typing import UserAuthCredentials, UserAuthLoginDetails
|
|
34
|
-
from flwr.proto.
|
|
34
|
+
from flwr.proto.control_pb2 import ( # pylint: disable=E0611
|
|
35
35
|
GetAuthTokensRequest,
|
|
36
36
|
GetAuthTokensResponse,
|
|
37
37
|
)
|
|
38
|
-
from flwr.proto.
|
|
38
|
+
from flwr.proto.control_pb2_grpc import ControlStub
|
|
39
39
|
|
|
40
40
|
|
|
41
41
|
class OidcCliPlugin(CliAuthPlugin):
|
|
@@ -49,7 +49,7 @@ class OidcCliPlugin(CliAuthPlugin):
|
|
|
49
49
|
@staticmethod
|
|
50
50
|
def login(
|
|
51
51
|
login_details: UserAuthLoginDetails,
|
|
52
|
-
|
|
52
|
+
control_stub: ControlStub,
|
|
53
53
|
) -> UserAuthCredentials:
|
|
54
54
|
"""Authenticate the user and retrieve authentication credentials."""
|
|
55
55
|
typer.secho(
|
|
@@ -61,7 +61,7 @@ class OidcCliPlugin(CliAuthPlugin):
|
|
|
61
61
|
time.sleep(login_details.interval)
|
|
62
62
|
|
|
63
63
|
while (time.time() - start_time) < login_details.expires_in:
|
|
64
|
-
res: GetAuthTokensResponse =
|
|
64
|
+
res: GetAuthTokensResponse = control_stub.GetAuthTokens(
|
|
65
65
|
GetAuthTokensRequest(device_code=login_details.device_code)
|
|
66
66
|
)
|
|
67
67
|
|
|
@@ -20,7 +20,7 @@ from typing import Any, Callable, Union
|
|
|
20
20
|
import grpc
|
|
21
21
|
|
|
22
22
|
from flwr.common.auth_plugin import CliAuthPlugin
|
|
23
|
-
from flwr.proto.
|
|
23
|
+
from flwr.proto.control_pb2 import ( # pylint: disable=E0611
|
|
24
24
|
StartRunRequest,
|
|
25
25
|
StreamLogsRequest,
|
|
26
26
|
)
|
flwr/cli/config_utils.py
CHANGED
|
@@ -143,7 +143,7 @@ def validate_federation_in_project_config(
|
|
|
143
143
|
if federation is None:
|
|
144
144
|
typer.secho(
|
|
145
145
|
"❌ No federation name was provided and the project's `pyproject.toml` "
|
|
146
|
-
"doesn't declare a default federation (with an
|
|
146
|
+
"doesn't declare a default federation (with an Control API address or an "
|
|
147
147
|
"`options.num-supernodes` value).",
|
|
148
148
|
fg=typer.colors.RED,
|
|
149
149
|
bold=True,
|
|
@@ -230,8 +230,8 @@ def exit_if_no_address(federation_config: dict[str, Any], cmd: str) -> None:
|
|
|
230
230
|
"""Exit if the provided federation_config has no "address" key."""
|
|
231
231
|
if "address" not in federation_config:
|
|
232
232
|
typer.secho(
|
|
233
|
-
f"❌ `flwr {cmd}` currently works with a SuperLink. Ensure that the
|
|
234
|
-
"SuperLink (
|
|
233
|
+
f"❌ `flwr {cmd}` currently works with a SuperLink. Ensure that the "
|
|
234
|
+
"correct SuperLink (Control API) address is provided in `pyproject.toml`.",
|
|
235
235
|
fg=typer.colors.RED,
|
|
236
236
|
bold=True,
|
|
237
237
|
)
|
flwr/cli/constant.py
CHANGED
|
@@ -15,13 +15,30 @@
|
|
|
15
15
|
"""Constants for CLI commands."""
|
|
16
16
|
|
|
17
17
|
|
|
18
|
+
# General help message for config overrides
|
|
19
|
+
CONFIG_HELP_MESSAGE = (
|
|
20
|
+
"Override {0} values using one of the following formats:\n\n"
|
|
21
|
+
"--{1} '<k1>=<v1> <k2>=<v2>' | --{1} '<k1>=<v1>' --{1} '<k2>=<v2>'{2}\n\n"
|
|
22
|
+
"When providing key-value pairs, values can be of any type supported by TOML "
|
|
23
|
+
"(e.g., bool, int, float, string). The specified keys (<k1> and <k2> in the "
|
|
24
|
+
"example) must exist in the {0} under the `{3}` section of `pyproject.toml` to be "
|
|
25
|
+
"overridden.{4}"
|
|
26
|
+
)
|
|
27
|
+
|
|
28
|
+
# The help message for `--run-config` option
|
|
29
|
+
RUN_CONFIG_HELP_MESSAGE = CONFIG_HELP_MESSAGE.format(
|
|
30
|
+
"run configuration",
|
|
31
|
+
"run-config",
|
|
32
|
+
" | --run-config <path/to/your/toml>",
|
|
33
|
+
"[tool.flwr.app.config]",
|
|
34
|
+
" Alternatively, provide a TOML file containing overrides.",
|
|
35
|
+
)
|
|
36
|
+
|
|
18
37
|
# The help message for `--federation-config` option
|
|
19
|
-
FEDERATION_CONFIG_HELP_MESSAGE = (
|
|
20
|
-
"
|
|
21
|
-
"
|
|
22
|
-
"
|
|
23
|
-
"
|
|
24
|
-
"
|
|
25
|
-
"`[tool.flwr.federations.<YOUR_FEDERATION>]` table of the `pyproject.toml` "
|
|
26
|
-
"for proper overriding."
|
|
38
|
+
FEDERATION_CONFIG_HELP_MESSAGE = CONFIG_HELP_MESSAGE.format(
|
|
39
|
+
"federation configuration",
|
|
40
|
+
"federation-config",
|
|
41
|
+
"",
|
|
42
|
+
"[tool.flwr.federations.<YOUR-FEDERATION>]",
|
|
43
|
+
"",
|
|
27
44
|
)
|
flwr/cli/log.py
CHANGED
|
@@ -32,8 +32,8 @@ from flwr.cli.config_utils import (
|
|
|
32
32
|
from flwr.cli.constant import FEDERATION_CONFIG_HELP_MESSAGE
|
|
33
33
|
from flwr.common.constant import CONN_RECONNECT_INTERVAL, CONN_REFRESH_PERIOD
|
|
34
34
|
from flwr.common.logger import log as logger
|
|
35
|
-
from flwr.proto.
|
|
36
|
-
from flwr.proto.
|
|
35
|
+
from flwr.proto.control_pb2 import StreamLogsRequest # pylint: disable=E0611
|
|
36
|
+
from flwr.proto.control_pb2_grpc import ControlStub
|
|
37
37
|
|
|
38
38
|
from .utils import flwr_cli_grpc_exc_handler, init_channel, try_obtain_cli_auth_plugin
|
|
39
39
|
|
|
@@ -46,7 +46,7 @@ def start_stream(
|
|
|
46
46
|
run_id: int, channel: grpc.Channel, refresh_period: int = CONN_REFRESH_PERIOD
|
|
47
47
|
) -> None:
|
|
48
48
|
"""Start log streaming for a given run ID."""
|
|
49
|
-
stub =
|
|
49
|
+
stub = ControlStub(channel)
|
|
50
50
|
after_timestamp = 0.0
|
|
51
51
|
try:
|
|
52
52
|
logger(INFO, "Starting logstream for run_id `%s`", run_id)
|
|
@@ -69,7 +69,7 @@ def start_stream(
|
|
|
69
69
|
|
|
70
70
|
|
|
71
71
|
def stream_logs(
|
|
72
|
-
run_id: int, stub:
|
|
72
|
+
run_id: int, stub: ControlStub, duration: int, after_timestamp: float
|
|
73
73
|
) -> float:
|
|
74
74
|
"""Stream logs from the beginning of a run with connection refresh.
|
|
75
75
|
|
|
@@ -77,8 +77,8 @@ def stream_logs(
|
|
|
77
77
|
----------
|
|
78
78
|
run_id : int
|
|
79
79
|
The identifier of the run.
|
|
80
|
-
stub :
|
|
81
|
-
The gRPC stub to interact with the
|
|
80
|
+
stub : ControlStub
|
|
81
|
+
The gRPC stub to interact with the Control service.
|
|
82
82
|
duration : int
|
|
83
83
|
The timeout duration for each stream connection in seconds.
|
|
84
84
|
after_timestamp : float
|
|
@@ -112,7 +112,7 @@ def stream_logs(
|
|
|
112
112
|
|
|
113
113
|
def print_logs(run_id: int, channel: grpc.Channel, timeout: int) -> None:
|
|
114
114
|
"""Print logs from the beginning of a run."""
|
|
115
|
-
stub =
|
|
115
|
+
stub = ControlStub(channel)
|
|
116
116
|
req = StreamLogsRequest(run_id=run_id, after_timestamp=0.0)
|
|
117
117
|
|
|
118
118
|
try:
|
|
@@ -173,13 +173,13 @@ def log(
|
|
|
173
173
|
exit_if_no_address(federation_config, "log")
|
|
174
174
|
|
|
175
175
|
try:
|
|
176
|
-
|
|
176
|
+
_log_with_control_api(app, federation, federation_config, run_id, stream)
|
|
177
177
|
except Exception as err: # pylint: disable=broad-except
|
|
178
178
|
typer.secho(str(err), fg=typer.colors.RED, bold=True)
|
|
179
179
|
raise typer.Exit(code=1) from None
|
|
180
180
|
|
|
181
181
|
|
|
182
|
-
def
|
|
182
|
+
def _log_with_control_api(
|
|
183
183
|
app: Path,
|
|
184
184
|
federation: str,
|
|
185
185
|
federation_config: dict[str, Any],
|
flwr/cli/login/login.py
CHANGED
|
@@ -29,11 +29,11 @@ from flwr.cli.config_utils import (
|
|
|
29
29
|
)
|
|
30
30
|
from flwr.cli.constant import FEDERATION_CONFIG_HELP_MESSAGE
|
|
31
31
|
from flwr.common.typing import UserAuthLoginDetails
|
|
32
|
-
from flwr.proto.
|
|
32
|
+
from flwr.proto.control_pb2 import ( # pylint: disable=E0611
|
|
33
33
|
GetLoginDetailsRequest,
|
|
34
34
|
GetLoginDetailsResponse,
|
|
35
35
|
)
|
|
36
|
-
from flwr.proto.
|
|
36
|
+
from flwr.proto.control_pb2_grpc import ControlStub
|
|
37
37
|
|
|
38
38
|
from ..utils import flwr_cli_grpc_exc_handler, init_channel, try_obtain_cli_auth_plugin
|
|
39
39
|
|
|
@@ -89,7 +89,7 @@ def login( # pylint: disable=R0914
|
|
|
89
89
|
raise typer.Exit(code=1)
|
|
90
90
|
|
|
91
91
|
channel = init_channel(app, federation_config, None)
|
|
92
|
-
stub =
|
|
92
|
+
stub = ControlStub(channel)
|
|
93
93
|
|
|
94
94
|
login_request = GetLoginDetailsRequest()
|
|
95
95
|
with flwr_cli_grpc_exc_handler():
|
flwr/cli/ls.py
CHANGED
|
@@ -38,11 +38,11 @@ from flwr.common.date import format_timedelta, isoformat8601_utc
|
|
|
38
38
|
from flwr.common.logger import print_json_error, redirect_output, restore_output
|
|
39
39
|
from flwr.common.serde import run_from_proto
|
|
40
40
|
from flwr.common.typing import Run
|
|
41
|
-
from flwr.proto.
|
|
41
|
+
from flwr.proto.control_pb2 import ( # pylint: disable=E0611
|
|
42
42
|
ListRunsRequest,
|
|
43
43
|
ListRunsResponse,
|
|
44
44
|
)
|
|
45
|
-
from flwr.proto.
|
|
45
|
+
from flwr.proto.control_pb2_grpc import ControlStub
|
|
46
46
|
|
|
47
47
|
from .utils import flwr_cli_grpc_exc_handler, init_channel, try_obtain_cli_auth_plugin
|
|
48
48
|
|
|
@@ -125,7 +125,7 @@ def ls( # pylint: disable=too-many-locals, too-many-branches, R0913, R0917
|
|
|
125
125
|
)
|
|
126
126
|
auth_plugin = try_obtain_cli_auth_plugin(app, federation, federation_config)
|
|
127
127
|
channel = init_channel(app, federation_config, auth_plugin)
|
|
128
|
-
stub =
|
|
128
|
+
stub = ControlStub(channel)
|
|
129
129
|
|
|
130
130
|
# Display information about a specific run ID
|
|
131
131
|
if run_id is not None:
|
|
@@ -293,7 +293,7 @@ def _to_json(run_list: list[_RunListType]) -> str:
|
|
|
293
293
|
return json.dumps({"success": True, "runs": runs_list})
|
|
294
294
|
|
|
295
295
|
|
|
296
|
-
def _list_runs(stub:
|
|
296
|
+
def _list_runs(stub: ControlStub) -> list[_RunListType]:
|
|
297
297
|
"""List all runs."""
|
|
298
298
|
with flwr_cli_grpc_exc_handler():
|
|
299
299
|
res: ListRunsResponse = stub.ListRuns(ListRunsRequest())
|
|
@@ -302,7 +302,7 @@ def _list_runs(stub: ExecStub) -> list[_RunListType]:
|
|
|
302
302
|
return _format_runs(run_dict, res.now)
|
|
303
303
|
|
|
304
304
|
|
|
305
|
-
def _display_one_run(stub:
|
|
305
|
+
def _display_one_run(stub: ControlStub, run_id: int) -> list[_RunListType]:
|
|
306
306
|
"""Display information about a specific run."""
|
|
307
307
|
with flwr_cli_grpc_exc_handler():
|
|
308
308
|
res: ListRunsResponse = stub.ListRuns(ListRunsRequest(run_id=run_id))
|
flwr/cli/new/new.py
CHANGED
|
@@ -35,6 +35,7 @@ class MlFramework(str, Enum):
|
|
|
35
35
|
"""Available frameworks."""
|
|
36
36
|
|
|
37
37
|
PYTORCH = "PyTorch"
|
|
38
|
+
PYTORCH_MSG_API = "PyTorch (Message API)"
|
|
38
39
|
TENSORFLOW = "TensorFlow"
|
|
39
40
|
SKLEARN = "sklearn"
|
|
40
41
|
HUGGINGFACE = "HuggingFace"
|
|
@@ -154,6 +155,9 @@ def new(
|
|
|
154
155
|
if framework_str == MlFramework.BASELINE:
|
|
155
156
|
framework_str = "baseline"
|
|
156
157
|
|
|
158
|
+
if framework_str == MlFramework.PYTORCH_MSG_API:
|
|
159
|
+
framework_str = "pytorch_msg_api"
|
|
160
|
+
|
|
157
161
|
print(
|
|
158
162
|
typer.style(
|
|
159
163
|
f"\n🔨 Creating Flower App {app_name}...",
|
|
@@ -243,12 +247,19 @@ def new(
|
|
|
243
247
|
MlFramework.TENSORFLOW.value,
|
|
244
248
|
MlFramework.SKLEARN.value,
|
|
245
249
|
MlFramework.NUMPY.value,
|
|
250
|
+
"pytorch_msg_api",
|
|
246
251
|
]
|
|
247
252
|
if framework_str in frameworks_with_tasks:
|
|
248
253
|
files[f"{import_name}/task.py"] = {
|
|
249
254
|
"template": f"app/code/task.{template_name}.py.tpl"
|
|
250
255
|
}
|
|
251
256
|
|
|
257
|
+
if framework_str == "pytorch_msg_api":
|
|
258
|
+
# Use custom __init__ that better captures name of framework
|
|
259
|
+
files[f"{import_name}/__init__.py"] = {
|
|
260
|
+
"template": f"app/code/__init__.{framework_str}.py.tpl"
|
|
261
|
+
}
|
|
262
|
+
|
|
252
263
|
if framework_str == "baseline":
|
|
253
264
|
# Include additional files for baseline template
|
|
254
265
|
for file_name in ["model", "dataset", "strategy", "utils", "__init__"]:
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
"""$project_name: A Flower / PyTorch app."""
|
|
@@ -0,0 +1,80 @@
|
|
|
1
|
+
"""$project_name: A Flower / $framework_str app."""
|
|
2
|
+
|
|
3
|
+
import torch
|
|
4
|
+
from flwr.app import ArrayRecord, Context, Message, MetricRecord, RecordDict
|
|
5
|
+
from flwr.clientapp import ClientApp
|
|
6
|
+
|
|
7
|
+
from $import_name.task import Net, load_data
|
|
8
|
+
from $import_name.task import test as test_fn
|
|
9
|
+
from $import_name.task import train as train_fn
|
|
10
|
+
|
|
11
|
+
# Flower ClientApp
|
|
12
|
+
app = ClientApp()
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
@app.train()
|
|
16
|
+
def train(msg: Message, context: Context):
|
|
17
|
+
"""Train the model on local data."""
|
|
18
|
+
|
|
19
|
+
# Load the model and initialize it with the received weights
|
|
20
|
+
model = Net()
|
|
21
|
+
model.load_state_dict(msg.content["arrays"].to_torch_state_dict())
|
|
22
|
+
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
|
|
23
|
+
model.to(device)
|
|
24
|
+
|
|
25
|
+
# Load the data
|
|
26
|
+
partition_id = context.node_config["partition-id"]
|
|
27
|
+
num_partitions = context.node_config["num-partitions"]
|
|
28
|
+
trainloader, _ = load_data(partition_id, num_partitions)
|
|
29
|
+
|
|
30
|
+
# Call the training function
|
|
31
|
+
train_loss = train_fn(
|
|
32
|
+
model,
|
|
33
|
+
trainloader,
|
|
34
|
+
context.run_config["local-epochs"],
|
|
35
|
+
msg.content["config"]["lr"],
|
|
36
|
+
device,
|
|
37
|
+
)
|
|
38
|
+
|
|
39
|
+
# Construct and return reply Message
|
|
40
|
+
model_record = ArrayRecord(model.state_dict())
|
|
41
|
+
metrics = {
|
|
42
|
+
"train_loss": train_loss,
|
|
43
|
+
"num-examples": len(trainloader.dataset),
|
|
44
|
+
}
|
|
45
|
+
metric_record = MetricRecord(metrics)
|
|
46
|
+
content = RecordDict({"arrays": model_record, "metrics": metric_record})
|
|
47
|
+
return Message(content=content, reply_to=msg)
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
@app.evaluate()
|
|
51
|
+
def evaluate(msg: Message, context: Context):
|
|
52
|
+
"""Evaluate the model on local data."""
|
|
53
|
+
|
|
54
|
+
# Load the model and initialize it with the received weights
|
|
55
|
+
model = Net()
|
|
56
|
+
model.load_state_dict(msg.content["arrays"].to_torch_state_dict())
|
|
57
|
+
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
|
|
58
|
+
model.to(device)
|
|
59
|
+
|
|
60
|
+
# Load the data
|
|
61
|
+
partition_id = context.node_config["partition-id"]
|
|
62
|
+
num_partitions = context.node_config["num-partitions"]
|
|
63
|
+
_, valloader = load_data(partition_id, num_partitions)
|
|
64
|
+
|
|
65
|
+
# Call the evaluation function
|
|
66
|
+
eval_loss, eval_acc = test_fn(
|
|
67
|
+
model,
|
|
68
|
+
valloader,
|
|
69
|
+
device,
|
|
70
|
+
)
|
|
71
|
+
|
|
72
|
+
# Construct and return reply Message
|
|
73
|
+
metrics = {
|
|
74
|
+
"eval_loss": eval_loss,
|
|
75
|
+
"eval_acc": eval_acc,
|
|
76
|
+
"num-examples": len(valloader.dataset),
|
|
77
|
+
}
|
|
78
|
+
metric_record = MetricRecord(metrics)
|
|
79
|
+
content = RecordDict({"metrics": metric_record})
|
|
80
|
+
return Message(content=content, reply_to=msg)
|
|
@@ -0,0 +1,41 @@
|
|
|
1
|
+
"""$project_name: A Flower / $framework_str app."""
|
|
2
|
+
|
|
3
|
+
import torch
|
|
4
|
+
from flwr.app import ArrayRecord, ConfigRecord, Context
|
|
5
|
+
from flwr.serverapp import Grid, ServerApp
|
|
6
|
+
from flwr.serverapp.strategy import FedAvg
|
|
7
|
+
|
|
8
|
+
from $import_name.task import Net
|
|
9
|
+
|
|
10
|
+
# Create ServerApp
|
|
11
|
+
app = ServerApp()
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
@app.main()
|
|
15
|
+
def main(grid: Grid, context: Context) -> None:
|
|
16
|
+
"""Main entry point for the ServerApp."""
|
|
17
|
+
|
|
18
|
+
# Read run config
|
|
19
|
+
fraction_train: float = context.run_config["fraction-train"]
|
|
20
|
+
num_rounds: int = context.run_config["num-server-rounds"]
|
|
21
|
+
lr: float = context.run_config["lr"]
|
|
22
|
+
|
|
23
|
+
# Load global model
|
|
24
|
+
global_model = Net()
|
|
25
|
+
arrays = ArrayRecord(global_model.state_dict())
|
|
26
|
+
|
|
27
|
+
# Initialize FedAvg strategy
|
|
28
|
+
strategy = FedAvg(fraction_train=fraction_train)
|
|
29
|
+
|
|
30
|
+
# Start strategy, run FedAvg for `num_rounds`
|
|
31
|
+
result = strategy.start(
|
|
32
|
+
grid=grid,
|
|
33
|
+
initial_arrays=arrays,
|
|
34
|
+
train_config=ConfigRecord({"lr": lr}),
|
|
35
|
+
num_rounds=num_rounds,
|
|
36
|
+
)
|
|
37
|
+
|
|
38
|
+
# Save final model to disk
|
|
39
|
+
print("\nSaving final model to disk...")
|
|
40
|
+
state_dict = result.arrays.to_torch_state_dict()
|
|
41
|
+
torch.save(state_dict, "final_model.pt")
|
|
@@ -0,0 +1,98 @@
|
|
|
1
|
+
"""$project_name: A Flower / $framework_str app."""
|
|
2
|
+
|
|
3
|
+
import torch
|
|
4
|
+
import torch.nn as nn
|
|
5
|
+
import torch.nn.functional as F
|
|
6
|
+
from flwr_datasets import FederatedDataset
|
|
7
|
+
from flwr_datasets.partitioner import IidPartitioner
|
|
8
|
+
from torch.utils.data import DataLoader
|
|
9
|
+
from torchvision.transforms import Compose, Normalize, ToTensor
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
class Net(nn.Module):
|
|
13
|
+
"""Model (simple CNN adapted from 'PyTorch: A 60 Minute Blitz')"""
|
|
14
|
+
|
|
15
|
+
def __init__(self):
|
|
16
|
+
super(Net, self).__init__()
|
|
17
|
+
self.conv1 = nn.Conv2d(3, 6, 5)
|
|
18
|
+
self.pool = nn.MaxPool2d(2, 2)
|
|
19
|
+
self.conv2 = nn.Conv2d(6, 16, 5)
|
|
20
|
+
self.fc1 = nn.Linear(16 * 5 * 5, 120)
|
|
21
|
+
self.fc2 = nn.Linear(120, 84)
|
|
22
|
+
self.fc3 = nn.Linear(84, 10)
|
|
23
|
+
|
|
24
|
+
def forward(self, x):
|
|
25
|
+
x = self.pool(F.relu(self.conv1(x)))
|
|
26
|
+
x = self.pool(F.relu(self.conv2(x)))
|
|
27
|
+
x = x.view(-1, 16 * 5 * 5)
|
|
28
|
+
x = F.relu(self.fc1(x))
|
|
29
|
+
x = F.relu(self.fc2(x))
|
|
30
|
+
return self.fc3(x)
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
fds = None # Cache FederatedDataset
|
|
34
|
+
|
|
35
|
+
pytorch_transforms = Compose([ToTensor(), Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
def apply_transforms(batch):
|
|
39
|
+
"""Apply transforms to the partition from FederatedDataset."""
|
|
40
|
+
batch["img"] = [pytorch_transforms(img) for img in batch["img"]]
|
|
41
|
+
return batch
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
def load_data(partition_id: int, num_partitions: int):
|
|
45
|
+
"""Load partition CIFAR10 data."""
|
|
46
|
+
# Only initialize `FederatedDataset` once
|
|
47
|
+
global fds
|
|
48
|
+
if fds is None:
|
|
49
|
+
partitioner = IidPartitioner(num_partitions=num_partitions)
|
|
50
|
+
fds = FederatedDataset(
|
|
51
|
+
dataset="uoft-cs/cifar10",
|
|
52
|
+
partitioners={"train": partitioner},
|
|
53
|
+
)
|
|
54
|
+
partition = fds.load_partition(partition_id)
|
|
55
|
+
# Divide data on each node: 80% train, 20% test
|
|
56
|
+
partition_train_test = partition.train_test_split(test_size=0.2, seed=42)
|
|
57
|
+
# Construct dataloaders
|
|
58
|
+
partition_train_test = partition_train_test.with_transform(apply_transforms)
|
|
59
|
+
trainloader = DataLoader(partition_train_test["train"], batch_size=32, shuffle=True)
|
|
60
|
+
testloader = DataLoader(partition_train_test["test"], batch_size=32)
|
|
61
|
+
return trainloader, testloader
|
|
62
|
+
|
|
63
|
+
|
|
64
|
+
def train(net, trainloader, epochs, lr, device):
|
|
65
|
+
"""Train the model on the training set."""
|
|
66
|
+
net.to(device) # move model to GPU if available
|
|
67
|
+
criterion = torch.nn.CrossEntropyLoss().to(device)
|
|
68
|
+
optimizer = torch.optim.Adam(net.parameters(), lr=lr)
|
|
69
|
+
net.train()
|
|
70
|
+
running_loss = 0.0
|
|
71
|
+
for _ in range(epochs):
|
|
72
|
+
for batch in trainloader:
|
|
73
|
+
images = batch["img"].to(device)
|
|
74
|
+
labels = batch["label"].to(device)
|
|
75
|
+
optimizer.zero_grad()
|
|
76
|
+
loss = criterion(net(images), labels)
|
|
77
|
+
loss.backward()
|
|
78
|
+
optimizer.step()
|
|
79
|
+
running_loss += loss.item()
|
|
80
|
+
avg_trainloss = running_loss / len(trainloader)
|
|
81
|
+
return avg_trainloss
|
|
82
|
+
|
|
83
|
+
|
|
84
|
+
def test(net, testloader, device):
|
|
85
|
+
"""Validate the model on the test set."""
|
|
86
|
+
net.to(device)
|
|
87
|
+
criterion = torch.nn.CrossEntropyLoss()
|
|
88
|
+
correct, loss = 0, 0.0
|
|
89
|
+
with torch.no_grad():
|
|
90
|
+
for batch in testloader:
|
|
91
|
+
images = batch["img"].to(device)
|
|
92
|
+
labels = batch["label"].to(device)
|
|
93
|
+
outputs = net(images)
|
|
94
|
+
loss += criterion(outputs, labels).item()
|
|
95
|
+
correct += (torch.max(outputs.data, 1)[1] == labels).sum().item()
|
|
96
|
+
accuracy = correct / len(testloader.dataset)
|
|
97
|
+
loss = loss / len(testloader)
|
|
98
|
+
return loss, accuracy
|