flwr-nightly 1.13.0.dev20241019__py3-none-any.whl → 1.13.0.dev20241106__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of flwr-nightly might be problematic. Click here for more details.
- flwr/cli/build.py +2 -2
- flwr/cli/config_utils.py +97 -0
- flwr/cli/log.py +63 -97
- flwr/cli/new/templates/app/code/flwr_tune/dataset.py.tpl +1 -1
- flwr/cli/new/templates/app/pyproject.flowertune.toml.tpl +1 -0
- flwr/cli/new/templates/app/pyproject.tensorflow.toml.tpl +1 -1
- flwr/cli/run/run.py +18 -83
- flwr/client/app.py +13 -14
- flwr/client/clientapp/app.py +1 -2
- flwr/client/{node_state.py → run_info_store.py} +4 -3
- flwr/client/supernode/app.py +6 -8
- flwr/common/constant.py +39 -4
- flwr/common/context.py +9 -4
- flwr/common/date.py +3 -3
- flwr/common/logger.py +103 -0
- flwr/common/serde.py +24 -0
- flwr/common/telemetry.py +0 -6
- flwr/common/typing.py +9 -0
- flwr/proto/exec_pb2.py +6 -6
- flwr/proto/exec_pb2.pyi +8 -2
- flwr/proto/log_pb2.py +29 -0
- flwr/proto/log_pb2.pyi +39 -0
- flwr/proto/log_pb2_grpc.py +4 -0
- flwr/proto/log_pb2_grpc.pyi +4 -0
- flwr/proto/message_pb2.py +8 -8
- flwr/proto/message_pb2.pyi +4 -1
- flwr/proto/serverappio_pb2.py +52 -0
- flwr/proto/{driver_pb2.pyi → serverappio_pb2.pyi} +54 -0
- flwr/proto/serverappio_pb2_grpc.py +376 -0
- flwr/proto/serverappio_pb2_grpc.pyi +147 -0
- flwr/proto/simulationio_pb2.py +38 -0
- flwr/proto/simulationio_pb2.pyi +65 -0
- flwr/proto/simulationio_pb2_grpc.py +171 -0
- flwr/proto/simulationio_pb2_grpc.pyi +68 -0
- flwr/server/app.py +247 -105
- flwr/server/driver/driver.py +15 -1
- flwr/server/driver/grpc_driver.py +26 -33
- flwr/server/driver/inmemory_driver.py +6 -14
- flwr/server/run_serverapp.py +29 -23
- flwr/server/{superlink/state → serverapp}/__init__.py +3 -9
- flwr/server/serverapp/app.py +270 -0
- flwr/server/strategy/fedadam.py +11 -1
- flwr/server/superlink/driver/__init__.py +1 -1
- flwr/server/superlink/driver/{driver_grpc.py → serverappio_grpc.py} +19 -16
- flwr/server/superlink/driver/{driver_servicer.py → serverappio_servicer.py} +125 -39
- flwr/server/superlink/fleet/grpc_adapter/grpc_adapter_servicer.py +4 -2
- flwr/server/superlink/fleet/grpc_bidi/grpc_server.py +2 -2
- flwr/server/superlink/fleet/grpc_rere/fleet_servicer.py +4 -2
- flwr/server/superlink/fleet/grpc_rere/server_interceptor.py +2 -2
- flwr/server/superlink/fleet/message_handler/message_handler.py +7 -7
- flwr/server/superlink/fleet/rest_rere/rest_api.py +7 -7
- flwr/server/superlink/fleet/vce/vce_api.py +23 -23
- flwr/server/superlink/linkstate/__init__.py +28 -0
- flwr/server/superlink/{state/in_memory_state.py → linkstate/in_memory_linkstate.py} +180 -21
- flwr/server/superlink/{state/state.py → linkstate/linkstate.py} +144 -15
- flwr/server/superlink/{state/state_factory.py → linkstate/linkstate_factory.py} +9 -9
- flwr/server/superlink/{state/sqlite_state.py → linkstate/sqlite_linkstate.py} +300 -50
- flwr/server/superlink/{state → linkstate}/utils.py +84 -2
- flwr/server/superlink/simulation/__init__.py +15 -0
- flwr/server/superlink/simulation/simulationio_grpc.py +65 -0
- flwr/server/superlink/simulation/simulationio_servicer.py +132 -0
- flwr/simulation/__init__.py +2 -0
- flwr/simulation/app.py +1 -1
- flwr/simulation/ray_transport/ray_client_proxy.py +2 -2
- flwr/simulation/run_simulation.py +57 -131
- flwr/simulation/simulationio_connection.py +86 -0
- flwr/superexec/app.py +6 -134
- flwr/superexec/deployment.py +60 -65
- flwr/superexec/exec_grpc.py +15 -8
- flwr/superexec/exec_servicer.py +34 -63
- flwr/superexec/executor.py +22 -4
- flwr/superexec/simulation.py +13 -8
- {flwr_nightly-1.13.0.dev20241019.dist-info → flwr_nightly-1.13.0.dev20241106.dist-info}/METADATA +1 -1
- {flwr_nightly-1.13.0.dev20241019.dist-info → flwr_nightly-1.13.0.dev20241106.dist-info}/RECORD +77 -64
- {flwr_nightly-1.13.0.dev20241019.dist-info → flwr_nightly-1.13.0.dev20241106.dist-info}/entry_points.txt +1 -0
- flwr/client/node_state_tests.py +0 -66
- flwr/proto/driver_pb2.py +0 -42
- flwr/proto/driver_pb2_grpc.py +0 -239
- flwr/proto/driver_pb2_grpc.pyi +0 -94
- {flwr_nightly-1.13.0.dev20241019.dist-info → flwr_nightly-1.13.0.dev20241106.dist-info}/LICENSE +0 -0
- {flwr_nightly-1.13.0.dev20241019.dist-info → flwr_nightly-1.13.0.dev20241106.dist-info}/WHEEL +0 -0
|
@@ -0,0 +1,65 @@
|
|
|
1
|
+
# Copyright 2024 Flower Labs GmbH. All Rights Reserved.
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
# ==============================================================================
|
|
15
|
+
"""SimulationIo gRPC API."""
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
from logging import INFO
|
|
19
|
+
from typing import Optional
|
|
20
|
+
|
|
21
|
+
import grpc
|
|
22
|
+
|
|
23
|
+
from flwr.common import GRPC_MAX_MESSAGE_LENGTH
|
|
24
|
+
from flwr.common.logger import log
|
|
25
|
+
from flwr.proto.simulationio_pb2_grpc import ( # pylint: disable=E0611
|
|
26
|
+
add_SimulationIoServicer_to_server,
|
|
27
|
+
)
|
|
28
|
+
from flwr.server.superlink.ffs.ffs_factory import FfsFactory
|
|
29
|
+
from flwr.server.superlink.linkstate import LinkStateFactory
|
|
30
|
+
|
|
31
|
+
from ..fleet.grpc_bidi.grpc_server import generic_create_grpc_server
|
|
32
|
+
from .simulationio_servicer import SimulationIoServicer
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
def run_simulationio_api_grpc(
|
|
36
|
+
address: str,
|
|
37
|
+
state_factory: LinkStateFactory,
|
|
38
|
+
ffs_factory: FfsFactory,
|
|
39
|
+
certificates: Optional[tuple[bytes, bytes, bytes]],
|
|
40
|
+
) -> grpc.Server:
|
|
41
|
+
"""Run SimulationIo API (gRPC, request-response)."""
|
|
42
|
+
# Create SimulationIo API gRPC server
|
|
43
|
+
simulationio_servicer: grpc.Server = SimulationIoServicer(
|
|
44
|
+
state_factory=state_factory,
|
|
45
|
+
ffs_factory=ffs_factory,
|
|
46
|
+
)
|
|
47
|
+
simulationio_add_servicer_to_server_fn = add_SimulationIoServicer_to_server
|
|
48
|
+
simulationio_grpc_server = generic_create_grpc_server(
|
|
49
|
+
servicer_and_add_fn=(
|
|
50
|
+
simulationio_servicer,
|
|
51
|
+
simulationio_add_servicer_to_server_fn,
|
|
52
|
+
),
|
|
53
|
+
server_address=address,
|
|
54
|
+
max_message_length=GRPC_MAX_MESSAGE_LENGTH,
|
|
55
|
+
certificates=certificates,
|
|
56
|
+
)
|
|
57
|
+
|
|
58
|
+
log(
|
|
59
|
+
INFO,
|
|
60
|
+
"Flower Simulation Engine: Starting SimulationIo API on %s",
|
|
61
|
+
address,
|
|
62
|
+
)
|
|
63
|
+
simulationio_grpc_server.start()
|
|
64
|
+
|
|
65
|
+
return simulationio_grpc_server
|
|
@@ -0,0 +1,132 @@
|
|
|
1
|
+
# Copyright 2024 Flower Labs GmbH. All Rights Reserved.
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
# ==============================================================================
|
|
15
|
+
"""SimulationIo API servicer."""
|
|
16
|
+
|
|
17
|
+
import threading
|
|
18
|
+
from logging import DEBUG, INFO
|
|
19
|
+
|
|
20
|
+
import grpc
|
|
21
|
+
from grpc import ServicerContext
|
|
22
|
+
|
|
23
|
+
from flwr.common.constant import Status
|
|
24
|
+
from flwr.common.logger import log
|
|
25
|
+
from flwr.common.serde import (
|
|
26
|
+
context_from_proto,
|
|
27
|
+
context_to_proto,
|
|
28
|
+
fab_to_proto,
|
|
29
|
+
run_status_from_proto,
|
|
30
|
+
run_to_proto,
|
|
31
|
+
)
|
|
32
|
+
from flwr.common.typing import Fab, RunStatus
|
|
33
|
+
from flwr.proto import simulationio_pb2_grpc
|
|
34
|
+
from flwr.proto.log_pb2 import ( # pylint: disable=E0611
|
|
35
|
+
PushLogsRequest,
|
|
36
|
+
PushLogsResponse,
|
|
37
|
+
)
|
|
38
|
+
from flwr.proto.run_pb2 import ( # pylint: disable=E0611
|
|
39
|
+
UpdateRunStatusRequest,
|
|
40
|
+
UpdateRunStatusResponse,
|
|
41
|
+
)
|
|
42
|
+
from flwr.proto.simulationio_pb2 import ( # pylint: disable=E0611
|
|
43
|
+
PullSimulationInputsRequest,
|
|
44
|
+
PullSimulationInputsResponse,
|
|
45
|
+
PushSimulationOutputsRequest,
|
|
46
|
+
PushSimulationOutputsResponse,
|
|
47
|
+
)
|
|
48
|
+
from flwr.server.superlink.ffs.ffs_factory import FfsFactory
|
|
49
|
+
from flwr.server.superlink.linkstate import LinkStateFactory
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
class SimulationIoServicer(simulationio_pb2_grpc.SimulationIoServicer):
|
|
53
|
+
"""SimulationIo API servicer."""
|
|
54
|
+
|
|
55
|
+
def __init__(
|
|
56
|
+
self, state_factory: LinkStateFactory, ffs_factory: FfsFactory
|
|
57
|
+
) -> None:
|
|
58
|
+
self.state_factory = state_factory
|
|
59
|
+
self.ffs_factory = ffs_factory
|
|
60
|
+
self.lock = threading.RLock()
|
|
61
|
+
|
|
62
|
+
def PullSimulationInputs(
|
|
63
|
+
self, request: PullSimulationInputsRequest, context: ServicerContext
|
|
64
|
+
) -> PullSimulationInputsResponse:
|
|
65
|
+
"""Pull SimultionIo process inputs."""
|
|
66
|
+
log(DEBUG, "SimultionIoServicer.SimultionIoInputs")
|
|
67
|
+
# Init access to LinkState and Ffs
|
|
68
|
+
state = self.state_factory.state()
|
|
69
|
+
ffs = self.ffs_factory.ffs()
|
|
70
|
+
|
|
71
|
+
# Lock access to LinkState, preventing obtaining the same pending run_id
|
|
72
|
+
with self.lock:
|
|
73
|
+
# Attempt getting the run_id of a pending run
|
|
74
|
+
run_id = state.get_pending_run_id()
|
|
75
|
+
# If there's no pending run, return an empty response
|
|
76
|
+
if run_id is None:
|
|
77
|
+
return PullSimulationInputsResponse()
|
|
78
|
+
|
|
79
|
+
# Retrieve Context, Run and Fab for the run_id
|
|
80
|
+
serverapp_ctxt = state.get_serverapp_context(run_id)
|
|
81
|
+
run = state.get_run(run_id)
|
|
82
|
+
fab = None
|
|
83
|
+
if run and run.fab_hash:
|
|
84
|
+
if result := ffs.get(run.fab_hash):
|
|
85
|
+
fab = Fab(run.fab_hash, result[0])
|
|
86
|
+
if run and fab and serverapp_ctxt:
|
|
87
|
+
# Update run status to STARTING
|
|
88
|
+
if state.update_run_status(run_id, RunStatus(Status.STARTING, "", "")):
|
|
89
|
+
log(INFO, "Starting run %d", run_id)
|
|
90
|
+
return PullSimulationInputsResponse(
|
|
91
|
+
context=context_to_proto(serverapp_ctxt),
|
|
92
|
+
run=run_to_proto(run),
|
|
93
|
+
fab=fab_to_proto(fab),
|
|
94
|
+
)
|
|
95
|
+
|
|
96
|
+
# Raise an exception if the Run or Fab is not found,
|
|
97
|
+
# or if the status cannot be updated to STARTING
|
|
98
|
+
raise RuntimeError(f"Failed to start run {run_id}")
|
|
99
|
+
|
|
100
|
+
def PushSimulationOutputs(
|
|
101
|
+
self, request: PushSimulationOutputsRequest, context: ServicerContext
|
|
102
|
+
) -> PushSimulationOutputsResponse:
|
|
103
|
+
"""Push Simulation process outputs."""
|
|
104
|
+
log(DEBUG, "SimultionIoServicer.PushSimulationOutputs")
|
|
105
|
+
state = self.state_factory.state()
|
|
106
|
+
state.set_serverapp_context(request.run_id, context_from_proto(request.context))
|
|
107
|
+
return PushSimulationOutputsResponse()
|
|
108
|
+
|
|
109
|
+
def UpdateRunStatus(
|
|
110
|
+
self, request: UpdateRunStatusRequest, context: grpc.ServicerContext
|
|
111
|
+
) -> UpdateRunStatusResponse:
|
|
112
|
+
"""Update the status of a run."""
|
|
113
|
+
log(DEBUG, "SimultionIoServicer.UpdateRunStatus")
|
|
114
|
+
state = self.state_factory.state()
|
|
115
|
+
|
|
116
|
+
# Update the run status
|
|
117
|
+
state.update_run_status(
|
|
118
|
+
run_id=request.run_id, new_status=run_status_from_proto(request.run_status)
|
|
119
|
+
)
|
|
120
|
+
return UpdateRunStatusResponse()
|
|
121
|
+
|
|
122
|
+
def PushLogs(
|
|
123
|
+
self, request: PushLogsRequest, context: grpc.ServicerContext
|
|
124
|
+
) -> PushLogsResponse:
|
|
125
|
+
"""Push logs."""
|
|
126
|
+
log(DEBUG, "ServerAppIoServicer.PushLogs")
|
|
127
|
+
state = self.state_factory.state()
|
|
128
|
+
|
|
129
|
+
# Add logs to LinkState
|
|
130
|
+
merged_logs = "".join(request.logs)
|
|
131
|
+
state.add_serverapp_log(request.run_id, merged_logs)
|
|
132
|
+
return PushLogsResponse()
|
flwr/simulation/__init__.py
CHANGED
|
@@ -18,6 +18,7 @@
|
|
|
18
18
|
import importlib
|
|
19
19
|
|
|
20
20
|
from flwr.simulation.run_simulation import run_simulation
|
|
21
|
+
from flwr.simulation.simulationio_connection import SimulationIoConnection
|
|
21
22
|
|
|
22
23
|
is_ray_installed = importlib.util.find_spec("ray") is not None
|
|
23
24
|
|
|
@@ -37,6 +38,7 @@ To install the necessary dependencies, install `flwr` with the `simulation` extr
|
|
|
37
38
|
|
|
38
39
|
|
|
39
40
|
__all__ = [
|
|
41
|
+
"SimulationIoConnection",
|
|
40
42
|
"run_simulation",
|
|
41
43
|
"start_simulation",
|
|
42
44
|
]
|
flwr/simulation/app.py
CHANGED
|
@@ -36,7 +36,7 @@ from flwr.server.history import History
|
|
|
36
36
|
from flwr.server.server import Server, init_defaults, run_fl
|
|
37
37
|
from flwr.server.server_config import ServerConfig
|
|
38
38
|
from flwr.server.strategy import Strategy
|
|
39
|
-
from flwr.server.superlink.
|
|
39
|
+
from flwr.server.superlink.linkstate.utils import generate_rand_int_from_bytes
|
|
40
40
|
from flwr.simulation.ray_transport.ray_actor import (
|
|
41
41
|
ClientAppActor,
|
|
42
42
|
VirtualClientEngineActor,
|
|
@@ -22,7 +22,7 @@ from typing import Optional
|
|
|
22
22
|
from flwr import common
|
|
23
23
|
from flwr.client import ClientFnExt
|
|
24
24
|
from flwr.client.client_app import ClientApp
|
|
25
|
-
from flwr.client.
|
|
25
|
+
from flwr.client.run_info_store import DeprecatedRunInfoStore
|
|
26
26
|
from flwr.common import DEFAULT_TTL, Message, Metadata, RecordSet
|
|
27
27
|
from flwr.common.constant import (
|
|
28
28
|
NUM_PARTITIONS_KEY,
|
|
@@ -65,7 +65,7 @@ class RayActorClientProxy(ClientProxy):
|
|
|
65
65
|
|
|
66
66
|
self.app_fn = _load_app
|
|
67
67
|
self.actor_pool = actor_pool
|
|
68
|
-
self.proxy_state =
|
|
68
|
+
self.proxy_state = DeprecatedRunInfoStore(
|
|
69
69
|
node_id=node_id,
|
|
70
70
|
node_config={
|
|
71
71
|
PARTITION_ID_KEY: str(partition_id),
|
|
@@ -21,7 +21,6 @@ import logging
|
|
|
21
21
|
import sys
|
|
22
22
|
import threading
|
|
23
23
|
import traceback
|
|
24
|
-
from argparse import Namespace
|
|
25
24
|
from logging import DEBUG, ERROR, INFO, WARNING
|
|
26
25
|
from pathlib import Path
|
|
27
26
|
from time import sleep
|
|
@@ -29,69 +28,28 @@ from typing import Any, Optional
|
|
|
29
28
|
|
|
30
29
|
from flwr.cli.config_utils import load_and_validate
|
|
31
30
|
from flwr.client import ClientApp
|
|
32
|
-
from flwr.common import EventType, event, log
|
|
31
|
+
from flwr.common import Context, EventType, RecordSet, event, log, now
|
|
33
32
|
from flwr.common.config import get_fused_config_from_dir, parse_config_args
|
|
34
|
-
from flwr.common.constant import RUN_ID_NUM_BYTES
|
|
33
|
+
from flwr.common.constant import RUN_ID_NUM_BYTES, Status
|
|
35
34
|
from flwr.common.logger import (
|
|
36
35
|
set_logger_propagation,
|
|
37
36
|
update_console_handler,
|
|
38
|
-
warn_deprecated_feature,
|
|
39
37
|
warn_deprecated_feature_with_example,
|
|
40
38
|
)
|
|
41
|
-
from flwr.common.typing import Run, UserConfig
|
|
39
|
+
from flwr.common.typing import Run, RunStatus, UserConfig
|
|
42
40
|
from flwr.server.driver import Driver, InMemoryDriver
|
|
43
|
-
from flwr.server.run_serverapp import run as
|
|
41
|
+
from flwr.server.run_serverapp import run as _run
|
|
44
42
|
from flwr.server.server_app import ServerApp
|
|
45
43
|
from flwr.server.superlink.fleet import vce
|
|
46
44
|
from flwr.server.superlink.fleet.vce.backend.backend import BackendConfig
|
|
47
|
-
from flwr.server.superlink.
|
|
48
|
-
from flwr.server.superlink.
|
|
45
|
+
from flwr.server.superlink.linkstate import LinkStateFactory
|
|
46
|
+
from flwr.server.superlink.linkstate.in_memory_linkstate import RunRecord
|
|
47
|
+
from flwr.server.superlink.linkstate.utils import generate_rand_int_from_bytes
|
|
49
48
|
from flwr.simulation.ray_transport.utils import (
|
|
50
49
|
enable_tf_gpu_growth as enable_gpu_growth,
|
|
51
50
|
)
|
|
52
51
|
|
|
53
52
|
|
|
54
|
-
def _check_args_do_not_interfere(args: Namespace) -> bool:
|
|
55
|
-
"""Ensure decoupling of flags for different ways to start the simulation."""
|
|
56
|
-
mode_one_args = ["app", "run_config"]
|
|
57
|
-
mode_two_args = ["client_app", "server_app"]
|
|
58
|
-
|
|
59
|
-
def _resolve_message(conflict_keys: list[str]) -> str:
|
|
60
|
-
return ",".join([f"`--{key}`".replace("_", "-") for key in conflict_keys])
|
|
61
|
-
|
|
62
|
-
# When passing `--app`, `--app-dir` is ignored
|
|
63
|
-
if args.app and args.app_dir:
|
|
64
|
-
log(ERROR, "Either `--app` or `--app-dir` can be set, but not both.")
|
|
65
|
-
return False
|
|
66
|
-
|
|
67
|
-
if any(getattr(args, key) for key in mode_one_args):
|
|
68
|
-
if any(getattr(args, key) for key in mode_two_args):
|
|
69
|
-
log(
|
|
70
|
-
ERROR,
|
|
71
|
-
"Passing any of {%s} alongside with any of {%s}",
|
|
72
|
-
_resolve_message(mode_one_args),
|
|
73
|
-
_resolve_message(mode_two_args),
|
|
74
|
-
)
|
|
75
|
-
return False
|
|
76
|
-
|
|
77
|
-
if not args.app:
|
|
78
|
-
log(ERROR, "You need to pass --app")
|
|
79
|
-
return False
|
|
80
|
-
|
|
81
|
-
return True
|
|
82
|
-
|
|
83
|
-
# Ensure all args are set (required for the non-FAB mode of execution)
|
|
84
|
-
if not all(getattr(args, key) for key in mode_two_args):
|
|
85
|
-
log(
|
|
86
|
-
ERROR,
|
|
87
|
-
"Passing all of %s keys are required.",
|
|
88
|
-
_resolve_message(mode_two_args),
|
|
89
|
-
)
|
|
90
|
-
return False
|
|
91
|
-
|
|
92
|
-
return True
|
|
93
|
-
|
|
94
|
-
|
|
95
53
|
def _replace_keys(d: Any, match: str, target: str) -> Any:
|
|
96
54
|
if isinstance(d, dict):
|
|
97
55
|
return {
|
|
@@ -114,19 +72,6 @@ def run_simulation_from_cli() -> None:
|
|
|
114
72
|
event_details={"backend": args.backend, "num-supernodes": args.num_supernodes},
|
|
115
73
|
)
|
|
116
74
|
|
|
117
|
-
# Add warnings for deprecated server_app and client_app arguments
|
|
118
|
-
if args.server_app:
|
|
119
|
-
warn_deprecated_feature(
|
|
120
|
-
"The `--server-app` argument is deprecated. "
|
|
121
|
-
"Please use the `--app` argument instead."
|
|
122
|
-
)
|
|
123
|
-
|
|
124
|
-
if args.client_app:
|
|
125
|
-
warn_deprecated_feature(
|
|
126
|
-
"The `--client-app` argument is deprecated. "
|
|
127
|
-
"Use the `--app` argument instead."
|
|
128
|
-
)
|
|
129
|
-
|
|
130
75
|
if args.enable_tf_gpu_growth:
|
|
131
76
|
warn_deprecated_feature_with_example(
|
|
132
77
|
"Passing `--enable-tf-gpu-growth` is deprecated.",
|
|
@@ -143,60 +88,39 @@ def run_simulation_from_cli() -> None:
|
|
|
143
88
|
backend_config_dict = _replace_keys(backend_config_dict, match="-", target="_")
|
|
144
89
|
log(DEBUG, "backend_config_dict: %s", backend_config_dict)
|
|
145
90
|
|
|
146
|
-
# We are supporting two modes for the CLI entrypoint:
|
|
147
|
-
# 1) Running an app dir containing a `pyproject.toml`
|
|
148
|
-
# 2) Running any ClientApp and SeverApp w/o pyproject.toml being present
|
|
149
|
-
# For 2), some CLI args are compulsory, but they are not required for 1)
|
|
150
|
-
# We first do these checks
|
|
151
|
-
args_check_pass = _check_args_do_not_interfere(args)
|
|
152
|
-
if not args_check_pass:
|
|
153
|
-
sys.exit("Simulation Engine cannot start.")
|
|
154
|
-
|
|
155
91
|
run_id = (
|
|
156
92
|
generate_rand_int_from_bytes(RUN_ID_NUM_BYTES)
|
|
157
93
|
if args.run_id is None
|
|
158
94
|
else args.run_id
|
|
159
95
|
)
|
|
160
|
-
if args.app:
|
|
161
|
-
# Mode 1
|
|
162
|
-
app_path = Path(args.app)
|
|
163
|
-
if not app_path.is_dir():
|
|
164
|
-
log(ERROR, "--app is not a directory")
|
|
165
|
-
sys.exit("Simulation Engine cannot start.")
|
|
166
|
-
|
|
167
|
-
# Load pyproject.toml
|
|
168
|
-
config, errors, warnings = load_and_validate(
|
|
169
|
-
app_path / "pyproject.toml", check_module=False
|
|
170
|
-
)
|
|
171
|
-
if errors:
|
|
172
|
-
raise ValueError(errors)
|
|
173
96
|
|
|
174
|
-
|
|
175
|
-
|
|
97
|
+
app_path = Path(args.app)
|
|
98
|
+
if not app_path.is_dir():
|
|
99
|
+
log(ERROR, "--app is not a directory")
|
|
100
|
+
sys.exit("Simulation Engine cannot start.")
|
|
176
101
|
|
|
177
|
-
|
|
178
|
-
|
|
102
|
+
# Load pyproject.toml
|
|
103
|
+
config, errors, warnings = load_and_validate(
|
|
104
|
+
app_path / "pyproject.toml", check_module=False
|
|
105
|
+
)
|
|
106
|
+
if errors:
|
|
107
|
+
raise ValueError(errors)
|
|
179
108
|
|
|
180
|
-
|
|
181
|
-
|
|
182
|
-
client_app_attr = app_components["clientapp"]
|
|
183
|
-
server_app_attr = app_components["serverapp"]
|
|
109
|
+
if warnings:
|
|
110
|
+
log(WARNING, warnings)
|
|
184
111
|
|
|
185
|
-
|
|
186
|
-
|
|
187
|
-
)
|
|
188
|
-
fused_config = get_fused_config_from_dir(app_path, override_config)
|
|
189
|
-
app_dir = args.app
|
|
190
|
-
is_app = True
|
|
112
|
+
if config is None:
|
|
113
|
+
raise ValueError("Config extracted from FAB's pyproject.toml is not valid")
|
|
191
114
|
|
|
192
|
-
|
|
193
|
-
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
|
|
115
|
+
# Get ClientApp and SeverApp components
|
|
116
|
+
app_components = config["tool"]["flwr"]["app"]["components"]
|
|
117
|
+
client_app_attr = app_components["clientapp"]
|
|
118
|
+
server_app_attr = app_components["serverapp"]
|
|
119
|
+
|
|
120
|
+
override_config = parse_config_args(
|
|
121
|
+
[args.run_config] if args.run_config else args.run_config
|
|
122
|
+
)
|
|
123
|
+
fused_config = get_fused_config_from_dir(app_path, override_config)
|
|
200
124
|
|
|
201
125
|
# Create run
|
|
202
126
|
run = Run(
|
|
@@ -213,13 +137,13 @@ def run_simulation_from_cli() -> None:
|
|
|
213
137
|
num_supernodes=args.num_supernodes,
|
|
214
138
|
backend_name=args.backend,
|
|
215
139
|
backend_config=backend_config_dict,
|
|
216
|
-
app_dir=
|
|
140
|
+
app_dir=args.app,
|
|
217
141
|
run=run,
|
|
218
142
|
enable_tf_gpu_growth=args.enable_tf_gpu_growth,
|
|
219
143
|
delay_start=args.delay_start,
|
|
220
144
|
verbose_logging=args.verbose,
|
|
221
145
|
server_app_run_config=fused_config,
|
|
222
|
-
is_app=
|
|
146
|
+
is_app=True,
|
|
223
147
|
exit_event=EventType.CLI_FLOWER_SIMULATION_LEAVE,
|
|
224
148
|
)
|
|
225
149
|
|
|
@@ -310,6 +234,7 @@ def run_serverapp_th(
|
|
|
310
234
|
f_stop: threading.Event,
|
|
311
235
|
has_exception: threading.Event,
|
|
312
236
|
enable_tf_gpu_growth: bool,
|
|
237
|
+
run_id: int,
|
|
313
238
|
) -> threading.Thread:
|
|
314
239
|
"""Run SeverApp in a thread."""
|
|
315
240
|
|
|
@@ -332,11 +257,20 @@ def run_serverapp_th(
|
|
|
332
257
|
log(INFO, "Enabling GPU growth for Tensorflow on the server thread.")
|
|
333
258
|
enable_gpu_growth()
|
|
334
259
|
|
|
260
|
+
# Initialize Context
|
|
261
|
+
context = Context(
|
|
262
|
+
run_id=run_id,
|
|
263
|
+
node_id=0,
|
|
264
|
+
node_config={},
|
|
265
|
+
state=RecordSet(),
|
|
266
|
+
run_config=_server_app_run_config,
|
|
267
|
+
)
|
|
268
|
+
|
|
335
269
|
# Run ServerApp
|
|
336
|
-
|
|
270
|
+
_run(
|
|
337
271
|
driver=_driver,
|
|
272
|
+
context=context,
|
|
338
273
|
server_app_dir=_server_app_dir,
|
|
339
|
-
server_app_run_config=_server_app_run_config,
|
|
340
274
|
server_app_attr=_server_app_attr,
|
|
341
275
|
loaded_server_app=_server_app,
|
|
342
276
|
)
|
|
@@ -389,7 +323,7 @@ def _main_loop(
|
|
|
389
323
|
) -> None:
|
|
390
324
|
"""Start ServerApp on a separate thread, then launch Simulation Engine."""
|
|
391
325
|
# Initialize StateFactory
|
|
392
|
-
state_factory =
|
|
326
|
+
state_factory = LinkStateFactory(":flwr-in-memory-state:")
|
|
393
327
|
|
|
394
328
|
f_stop = threading.Event()
|
|
395
329
|
# A Threading event to indicate if an exception was raised in the ServerApp thread
|
|
@@ -399,13 +333,21 @@ def _main_loop(
|
|
|
399
333
|
try:
|
|
400
334
|
# Register run
|
|
401
335
|
log(DEBUG, "Pre-registering run with id %s", run.run_id)
|
|
402
|
-
|
|
336
|
+
init_status = RunStatus(Status.RUNNING, "", "")
|
|
337
|
+
state_factory.state().run_ids[run.run_id] = RunRecord( # type: ignore
|
|
338
|
+
run=run,
|
|
339
|
+
status=init_status,
|
|
340
|
+
starting_at=now().isoformat(),
|
|
341
|
+
running_at=now().isoformat(),
|
|
342
|
+
finished_at="",
|
|
343
|
+
)
|
|
403
344
|
|
|
404
345
|
if server_app_run_config is None:
|
|
405
346
|
server_app_run_config = {}
|
|
406
347
|
|
|
407
348
|
# Initialize Driver
|
|
408
|
-
driver = InMemoryDriver(
|
|
349
|
+
driver = InMemoryDriver(state_factory=state_factory)
|
|
350
|
+
driver.init_run(run_id=run.run_id)
|
|
409
351
|
|
|
410
352
|
# Get and run ServerApp thread
|
|
411
353
|
serverapp_th = run_serverapp_th(
|
|
@@ -417,6 +359,7 @@ def _main_loop(
|
|
|
417
359
|
f_stop=f_stop,
|
|
418
360
|
has_exception=server_app_thread_has_exception,
|
|
419
361
|
enable_tf_gpu_growth=enable_tf_gpu_growth,
|
|
362
|
+
run_id=run.run_id,
|
|
420
363
|
)
|
|
421
364
|
|
|
422
365
|
# Buffer time so the `ServerApp` in separate thread is ready
|
|
@@ -566,20 +509,10 @@ def _parse_args_run_simulation() -> argparse.ArgumentParser:
|
|
|
566
509
|
parser.add_argument(
|
|
567
510
|
"--app",
|
|
568
511
|
type=str,
|
|
569
|
-
|
|
512
|
+
required=True,
|
|
570
513
|
help="Path to a directory containing a FAB-like structure with a "
|
|
571
514
|
"pyproject.toml.",
|
|
572
515
|
)
|
|
573
|
-
parser.add_argument(
|
|
574
|
-
"--server-app",
|
|
575
|
-
help="(DEPRECATED: use --app instead) For example: `server:app` or "
|
|
576
|
-
"`project.package.module:wrapper.app`",
|
|
577
|
-
)
|
|
578
|
-
parser.add_argument(
|
|
579
|
-
"--client-app",
|
|
580
|
-
help="(DEPRECATED: use --app instead) For example: `client:app` or "
|
|
581
|
-
"`project.package.module:wrapper.app`",
|
|
582
|
-
)
|
|
583
516
|
parser.add_argument(
|
|
584
517
|
"--num-supernodes",
|
|
585
518
|
type=int,
|
|
@@ -628,13 +561,6 @@ def _parse_args_run_simulation() -> argparse.ArgumentParser:
|
|
|
628
561
|
help="When unset, only INFO, WARNING and ERROR log messages will be shown. "
|
|
629
562
|
"If set, DEBUG-level logs will be displayed. ",
|
|
630
563
|
)
|
|
631
|
-
parser.add_argument(
|
|
632
|
-
"--app-dir",
|
|
633
|
-
default="",
|
|
634
|
-
help="Add specified directory to the PYTHONPATH and load"
|
|
635
|
-
"ClientApp and ServerApp from there."
|
|
636
|
-
" Default: current working directory.",
|
|
637
|
-
)
|
|
638
564
|
parser.add_argument(
|
|
639
565
|
"--flwr-dir",
|
|
640
566
|
default=None,
|
|
@@ -0,0 +1,86 @@
|
|
|
1
|
+
# Copyright 2024 Flower Labs GmbH. All Rights Reserved.
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
# ==============================================================================
|
|
15
|
+
"""Flower SimulationIo connection."""
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
from logging import DEBUG, WARNING
|
|
19
|
+
from typing import Optional, cast
|
|
20
|
+
|
|
21
|
+
import grpc
|
|
22
|
+
|
|
23
|
+
from flwr.common.constant import SIMULATIONIO_API_DEFAULT_ADDRESS
|
|
24
|
+
from flwr.common.grpc import create_channel
|
|
25
|
+
from flwr.common.logger import log
|
|
26
|
+
from flwr.proto.simulationio_pb2_grpc import SimulationIoStub # pylint: disable=E0611
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
class SimulationIoConnection:
|
|
30
|
+
"""`SimulationIoConnection` provides an interface to the SimulationIo API.
|
|
31
|
+
|
|
32
|
+
Parameters
|
|
33
|
+
----------
|
|
34
|
+
simulationio_service_address : str (default: "[::]:9094")
|
|
35
|
+
The address (URL, IPv6, IPv4) of the SuperLink SimulationIo API service.
|
|
36
|
+
root_certificates : Optional[bytes] (default: None)
|
|
37
|
+
The PEM-encoded root certificates as a byte string.
|
|
38
|
+
If provided, a secure connection using the certificates will be
|
|
39
|
+
established to an SSL-enabled Flower server.
|
|
40
|
+
"""
|
|
41
|
+
|
|
42
|
+
def __init__( # pylint: disable=too-many-arguments
|
|
43
|
+
self,
|
|
44
|
+
simulationio_service_address: str = SIMULATIONIO_API_DEFAULT_ADDRESS,
|
|
45
|
+
root_certificates: Optional[bytes] = None,
|
|
46
|
+
) -> None:
|
|
47
|
+
self._addr = simulationio_service_address
|
|
48
|
+
self._cert = root_certificates
|
|
49
|
+
self._grpc_stub: Optional[SimulationIoStub] = None
|
|
50
|
+
self._channel: Optional[grpc.Channel] = None
|
|
51
|
+
|
|
52
|
+
@property
|
|
53
|
+
def _is_connected(self) -> bool:
|
|
54
|
+
"""Check if connected to the SimulationIo API server."""
|
|
55
|
+
return self._channel is not None
|
|
56
|
+
|
|
57
|
+
@property
|
|
58
|
+
def _stub(self) -> SimulationIoStub:
|
|
59
|
+
"""SimulationIo stub."""
|
|
60
|
+
if not self._is_connected:
|
|
61
|
+
self._connect()
|
|
62
|
+
return cast(SimulationIoStub, self._grpc_stub)
|
|
63
|
+
|
|
64
|
+
def _connect(self) -> None:
|
|
65
|
+
"""Connect to the SimulationIo API."""
|
|
66
|
+
if self._is_connected:
|
|
67
|
+
log(WARNING, "Already connected")
|
|
68
|
+
return
|
|
69
|
+
self._channel = create_channel(
|
|
70
|
+
server_address=self._addr,
|
|
71
|
+
insecure=(self._cert is None),
|
|
72
|
+
root_certificates=self._cert,
|
|
73
|
+
)
|
|
74
|
+
self._grpc_stub = SimulationIoStub(self._channel)
|
|
75
|
+
log(DEBUG, "[SimulationIO] Connected to %s", self._addr)
|
|
76
|
+
|
|
77
|
+
def _disconnect(self) -> None:
|
|
78
|
+
"""Disconnect from the SimulationIo API."""
|
|
79
|
+
if not self._is_connected:
|
|
80
|
+
log(DEBUG, "Already disconnected")
|
|
81
|
+
return
|
|
82
|
+
channel: grpc.Channel = self._channel
|
|
83
|
+
self._channel = None
|
|
84
|
+
self._grpc_stub = None
|
|
85
|
+
channel.close()
|
|
86
|
+
log(DEBUG, "[SimulationIO] Disconnected")
|