flwr-nightly 1.13.0.dev20241021__py3-none-any.whl → 1.13.0.dev20241023__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of flwr-nightly might be problematic. Click here for more details.
- flwr/client/app.py +13 -14
- flwr/client/node_state_tests.py +7 -8
- flwr/client/{node_state.py → run_info_store.py} +3 -3
- flwr/client/supernode/app.py +6 -8
- flwr/common/constant.py +31 -3
- flwr/common/typing.py +9 -0
- flwr/server/app.py +121 -10
- flwr/server/driver/inmemory_driver.py +2 -2
- flwr/server/{superlink/state → serverapp}/__init__.py +3 -9
- flwr/server/serverapp/app.py +78 -0
- flwr/server/superlink/driver/driver_grpc.py +2 -2
- flwr/server/superlink/driver/driver_servicer.py +9 -7
- flwr/server/superlink/fleet/grpc_adapter/grpc_adapter_servicer.py +4 -2
- flwr/server/superlink/fleet/grpc_rere/fleet_servicer.py +4 -2
- flwr/server/superlink/fleet/grpc_rere/server_interceptor.py +2 -2
- flwr/server/superlink/fleet/message_handler/message_handler.py +7 -7
- flwr/server/superlink/fleet/rest_rere/rest_api.py +7 -7
- flwr/server/superlink/fleet/vce/vce_api.py +23 -23
- flwr/server/superlink/linkstate/__init__.py +28 -0
- flwr/server/superlink/{state/in_memory_state.py → linkstate/in_memory_linkstate.py} +109 -19
- flwr/server/superlink/{state/state.py → linkstate/linkstate.py} +59 -11
- flwr/server/superlink/{state/state_factory.py → linkstate/linkstate_factory.py} +9 -9
- flwr/server/superlink/{state/sqlite_state.py → linkstate/sqlite_linkstate.py} +136 -35
- flwr/server/superlink/{state → linkstate}/utils.py +57 -1
- flwr/simulation/app.py +1 -1
- flwr/simulation/ray_transport/ray_client_proxy.py +2 -2
- flwr/simulation/run_simulation.py +15 -7
- flwr/superexec/app.py +9 -2
- flwr/superexec/simulation.py +1 -1
- {flwr_nightly-1.13.0.dev20241021.dist-info → flwr_nightly-1.13.0.dev20241023.dist-info}/METADATA +1 -1
- {flwr_nightly-1.13.0.dev20241021.dist-info → flwr_nightly-1.13.0.dev20241023.dist-info}/RECORD +34 -32
- {flwr_nightly-1.13.0.dev20241021.dist-info → flwr_nightly-1.13.0.dev20241023.dist-info}/entry_points.txt +1 -0
- {flwr_nightly-1.13.0.dev20241021.dist-info → flwr_nightly-1.13.0.dev20241023.dist-info}/LICENSE +0 -0
- {flwr_nightly-1.13.0.dev20241021.dist-info → flwr_nightly-1.13.0.dev20241023.dist-info}/WHEEL +0 -0
flwr/client/app.py
CHANGED
|
@@ -37,6 +37,8 @@ from flwr.common import GRPC_MAX_MESSAGE_LENGTH, Context, EventType, Message, ev
|
|
|
37
37
|
from flwr.common.address import parse_address
|
|
38
38
|
from flwr.common.constant import (
|
|
39
39
|
CLIENTAPPIO_API_DEFAULT_ADDRESS,
|
|
40
|
+
ISOLATION_MODE_PROCESS,
|
|
41
|
+
ISOLATION_MODE_SUBPROCESS,
|
|
40
42
|
MISSING_EXTRA_REST,
|
|
41
43
|
RUN_ID_NUM_BYTES,
|
|
42
44
|
TRANSPORT_TYPE_GRPC_ADAPTER,
|
|
@@ -52,18 +54,15 @@ from flwr.common.retry_invoker import RetryInvoker, RetryState, exponential
|
|
|
52
54
|
from flwr.common.typing import Fab, Run, UserConfig
|
|
53
55
|
from flwr.proto.clientappio_pb2_grpc import add_ClientAppIoServicer_to_server
|
|
54
56
|
from flwr.server.superlink.fleet.grpc_bidi.grpc_server import generic_create_grpc_server
|
|
55
|
-
from flwr.server.superlink.
|
|
57
|
+
from flwr.server.superlink.linkstate.utils import generate_rand_int_from_bytes
|
|
56
58
|
|
|
57
59
|
from .clientapp.clientappio_servicer import ClientAppInputs, ClientAppIoServicer
|
|
58
60
|
from .grpc_adapter_client.connection import grpc_adapter
|
|
59
61
|
from .grpc_client.connection import grpc_connection
|
|
60
62
|
from .grpc_rere_client.connection import grpc_request_response
|
|
61
63
|
from .message_handler.message_handler import handle_control_message
|
|
62
|
-
from .node_state import NodeState
|
|
63
64
|
from .numpy_client import NumPyClient
|
|
64
|
-
|
|
65
|
-
ISOLATION_MODE_SUBPROCESS = "subprocess"
|
|
66
|
-
ISOLATION_MODE_PROCESS = "process"
|
|
65
|
+
from .run_info_store import DeprecatedRunInfoStore
|
|
67
66
|
|
|
68
67
|
|
|
69
68
|
def _check_actionable_client(
|
|
@@ -364,8 +363,8 @@ def start_client_internal(
|
|
|
364
363
|
on_backoff=_on_backoff,
|
|
365
364
|
)
|
|
366
365
|
|
|
367
|
-
#
|
|
368
|
-
|
|
366
|
+
# DeprecatedRunInfoStore gets initialized when the first connection is established
|
|
367
|
+
run_info_store: Optional[DeprecatedRunInfoStore] = None
|
|
369
368
|
|
|
370
369
|
runs: dict[int, Run] = {}
|
|
371
370
|
|
|
@@ -382,7 +381,7 @@ def start_client_internal(
|
|
|
382
381
|
receive, send, create_node, delete_node, get_run, get_fab = conn
|
|
383
382
|
|
|
384
383
|
# Register node when connecting the first time
|
|
385
|
-
if
|
|
384
|
+
if run_info_store is None:
|
|
386
385
|
if create_node is None:
|
|
387
386
|
if transport not in ["grpc-bidi", None]:
|
|
388
387
|
raise NotImplementedError(
|
|
@@ -391,7 +390,7 @@ def start_client_internal(
|
|
|
391
390
|
)
|
|
392
391
|
# gRPC-bidi doesn't have the concept of node_id,
|
|
393
392
|
# so we set it to -1
|
|
394
|
-
|
|
393
|
+
run_info_store = DeprecatedRunInfoStore(
|
|
395
394
|
node_id=-1,
|
|
396
395
|
node_config={},
|
|
397
396
|
)
|
|
@@ -402,7 +401,7 @@ def start_client_internal(
|
|
|
402
401
|
) # pylint: disable=not-callable
|
|
403
402
|
if node_id is None:
|
|
404
403
|
raise ValueError("Node registration failed")
|
|
405
|
-
|
|
404
|
+
run_info_store = DeprecatedRunInfoStore(
|
|
406
405
|
node_id=node_id,
|
|
407
406
|
node_config=node_config,
|
|
408
407
|
)
|
|
@@ -461,7 +460,7 @@ def start_client_internal(
|
|
|
461
460
|
run.fab_id, run.fab_version = fab_id, fab_version
|
|
462
461
|
|
|
463
462
|
# Register context for this run
|
|
464
|
-
|
|
463
|
+
run_info_store.register_context(
|
|
465
464
|
run_id=run_id,
|
|
466
465
|
run=run,
|
|
467
466
|
flwr_path=flwr_path,
|
|
@@ -469,7 +468,7 @@ def start_client_internal(
|
|
|
469
468
|
)
|
|
470
469
|
|
|
471
470
|
# Retrieve context for this run
|
|
472
|
-
context =
|
|
471
|
+
context = run_info_store.retrieve_context(run_id=run_id)
|
|
473
472
|
# Create an error reply message that will never be used to prevent
|
|
474
473
|
# the used-before-assignment linting error
|
|
475
474
|
reply_message = message.create_error_reply(
|
|
@@ -542,7 +541,7 @@ def start_client_internal(
|
|
|
542
541
|
# Raise exception, crash process
|
|
543
542
|
raise ex
|
|
544
543
|
|
|
545
|
-
# Don't update/change
|
|
544
|
+
# Don't update/change DeprecatedRunInfoStore
|
|
546
545
|
|
|
547
546
|
e_code = ErrorCode.CLIENT_APP_RAISED_EXCEPTION
|
|
548
547
|
# Ex fmt: "<class 'ZeroDivisionError'>:<'division by zero'>"
|
|
@@ -567,7 +566,7 @@ def start_client_internal(
|
|
|
567
566
|
)
|
|
568
567
|
else:
|
|
569
568
|
# No exception, update node state
|
|
570
|
-
|
|
569
|
+
run_info_store.update_context(
|
|
571
570
|
run_id=run_id,
|
|
572
571
|
context=context,
|
|
573
572
|
)
|
flwr/client/node_state_tests.py
CHANGED
|
@@ -17,7 +17,7 @@
|
|
|
17
17
|
|
|
18
18
|
from typing import cast
|
|
19
19
|
|
|
20
|
-
from flwr.client.
|
|
20
|
+
from flwr.client.run_info_store import DeprecatedRunInfoStore
|
|
21
21
|
from flwr.common import ConfigsRecord, Context
|
|
22
22
|
from flwr.proto.task_pb2 import TaskIns # pylint: disable=E0611
|
|
23
23
|
|
|
@@ -34,32 +34,31 @@ def _run_dummy_task(context: Context) -> Context:
|
|
|
34
34
|
|
|
35
35
|
|
|
36
36
|
def test_multirun_in_node_state() -> None:
|
|
37
|
-
"""Test basic
|
|
37
|
+
"""Test basic DeprecatedRunInfoStore logic."""
|
|
38
38
|
# Tasks to perform
|
|
39
39
|
tasks = [TaskIns(run_id=run_id) for run_id in [0, 1, 1, 2, 3, 2, 1, 5]]
|
|
40
40
|
# the "tasks" is to count how many times each run is executed
|
|
41
41
|
expected_values = {0: "1", 1: "1" * 3, 2: "1" * 2, 3: "1", 5: "1"}
|
|
42
42
|
|
|
43
|
-
|
|
44
|
-
node_state = NodeState(node_id=0, node_config={})
|
|
43
|
+
node_info_store = DeprecatedRunInfoStore(node_id=0, node_config={})
|
|
45
44
|
|
|
46
45
|
for task in tasks:
|
|
47
46
|
run_id = task.run_id
|
|
48
47
|
|
|
49
48
|
# Register
|
|
50
|
-
|
|
49
|
+
node_info_store.register_context(run_id=run_id)
|
|
51
50
|
|
|
52
51
|
# Get run state
|
|
53
|
-
context =
|
|
52
|
+
context = node_info_store.retrieve_context(run_id=run_id)
|
|
54
53
|
|
|
55
54
|
# Run "task"
|
|
56
55
|
updated_state = _run_dummy_task(context)
|
|
57
56
|
|
|
58
57
|
# Update run state
|
|
59
|
-
|
|
58
|
+
node_info_store.update_context(run_id=run_id, context=updated_state)
|
|
60
59
|
|
|
61
60
|
# Verify values
|
|
62
|
-
for run_id, run_info in
|
|
61
|
+
for run_id, run_info in node_info_store.run_infos.items():
|
|
63
62
|
assert (
|
|
64
63
|
run_info.context.state.configs_records["counter"]["count"]
|
|
65
64
|
== expected_values[run_id]
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
# Copyright
|
|
1
|
+
# Copyright 2024 Flower Labs GmbH. All Rights Reserved.
|
|
2
2
|
#
|
|
3
3
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
4
|
# you may not use this file except in compliance with the License.
|
|
@@ -12,7 +12,7 @@
|
|
|
12
12
|
# See the License for the specific language governing permissions and
|
|
13
13
|
# limitations under the License.
|
|
14
14
|
# ==============================================================================
|
|
15
|
-
"""
|
|
15
|
+
"""Deprecated Run Info Store."""
|
|
16
16
|
|
|
17
17
|
|
|
18
18
|
from dataclasses import dataclass
|
|
@@ -36,7 +36,7 @@ class RunInfo:
|
|
|
36
36
|
initial_run_config: UserConfig
|
|
37
37
|
|
|
38
38
|
|
|
39
|
-
class
|
|
39
|
+
class DeprecatedRunInfoStore:
|
|
40
40
|
"""State of a node where client nodes execute runs."""
|
|
41
41
|
|
|
42
42
|
def __init__(
|
flwr/client/supernode/app.py
CHANGED
|
@@ -31,6 +31,8 @@ from flwr.common import EventType, event
|
|
|
31
31
|
from flwr.common.config import parse_config_args
|
|
32
32
|
from flwr.common.constant import (
|
|
33
33
|
FLEET_API_GRPC_RERE_DEFAULT_ADDRESS,
|
|
34
|
+
ISOLATION_MODE_PROCESS,
|
|
35
|
+
ISOLATION_MODE_SUBPROCESS,
|
|
34
36
|
TRANSPORT_TYPE_GRPC_ADAPTER,
|
|
35
37
|
TRANSPORT_TYPE_GRPC_RERE,
|
|
36
38
|
TRANSPORT_TYPE_REST,
|
|
@@ -38,11 +40,7 @@ from flwr.common.constant import (
|
|
|
38
40
|
from flwr.common.exit_handlers import register_exit_handlers
|
|
39
41
|
from flwr.common.logger import log, warn_deprecated_feature
|
|
40
42
|
|
|
41
|
-
from ..app import
|
|
42
|
-
ISOLATION_MODE_PROCESS,
|
|
43
|
-
ISOLATION_MODE_SUBPROCESS,
|
|
44
|
-
start_client_internal,
|
|
45
|
-
)
|
|
43
|
+
from ..app import start_client_internal
|
|
46
44
|
from ..clientapp.utils import get_load_client_app_fn
|
|
47
45
|
|
|
48
46
|
|
|
@@ -200,10 +198,10 @@ def _parse_args_run_supernode() -> argparse.ArgumentParser:
|
|
|
200
198
|
ISOLATION_MODE_SUBPROCESS,
|
|
201
199
|
ISOLATION_MODE_PROCESS,
|
|
202
200
|
],
|
|
203
|
-
help="Isolation mode when running `ClientApp` (optional, possible values: "
|
|
204
|
-
"`subprocess`, `process`). By default, `ClientApp` runs in the same process "
|
|
201
|
+
help="Isolation mode when running a `ClientApp` (optional, possible values: "
|
|
202
|
+
"`subprocess`, `process`). By default, a `ClientApp` runs in the same process "
|
|
205
203
|
"that executes the SuperNode. Use `subprocess` to configure SuperNode to run "
|
|
206
|
-
"`ClientApp` in a subprocess. Use `process` to indicate that a separate "
|
|
204
|
+
"a `ClientApp` in a subprocess. Use `process` to indicate that a separate "
|
|
207
205
|
"independent process gets created outside of SuperNode.",
|
|
208
206
|
)
|
|
209
207
|
parser.add_argument(
|
flwr/common/constant.py
CHANGED
|
@@ -40,15 +40,14 @@ TRANSPORT_TYPES = [
|
|
|
40
40
|
# Addresses
|
|
41
41
|
# SuperNode
|
|
42
42
|
CLIENTAPPIO_API_DEFAULT_ADDRESS = "0.0.0.0:9094"
|
|
43
|
-
# SuperExec
|
|
44
|
-
EXEC_API_DEFAULT_ADDRESS = "0.0.0.0:9093"
|
|
45
43
|
# SuperLink
|
|
46
44
|
DRIVER_API_DEFAULT_ADDRESS = "0.0.0.0:9091"
|
|
47
45
|
FLEET_API_GRPC_RERE_DEFAULT_ADDRESS = "0.0.0.0:9092"
|
|
48
46
|
FLEET_API_GRPC_BIDI_DEFAULT_ADDRESS = (
|
|
49
47
|
"[::]:8080" # IPv6 to keep start_server compatible
|
|
50
48
|
)
|
|
51
|
-
FLEET_API_REST_DEFAULT_ADDRESS = "0.0.0.0:
|
|
49
|
+
FLEET_API_REST_DEFAULT_ADDRESS = "0.0.0.0:9095"
|
|
50
|
+
EXEC_API_DEFAULT_ADDRESS = "0.0.0.0:9093"
|
|
52
51
|
|
|
53
52
|
# Constants for ping
|
|
54
53
|
PING_DEFAULT_INTERVAL = 30
|
|
@@ -84,6 +83,10 @@ GRPC_ADAPTER_METADATA_MESSAGE_QUALNAME_KEY = "grpc-message-qualname"
|
|
|
84
83
|
# Message TTL
|
|
85
84
|
MESSAGE_TTL_TOLERANCE = 1e-1
|
|
86
85
|
|
|
86
|
+
# Isolation modes
|
|
87
|
+
ISOLATION_MODE_SUBPROCESS = "subprocess"
|
|
88
|
+
ISOLATION_MODE_PROCESS = "process"
|
|
89
|
+
|
|
87
90
|
|
|
88
91
|
class MessageType:
|
|
89
92
|
"""Message type."""
|
|
@@ -129,3 +132,28 @@ class ErrorCode:
|
|
|
129
132
|
def __new__(cls) -> ErrorCode:
|
|
130
133
|
"""Prevent instantiation."""
|
|
131
134
|
raise TypeError(f"{cls.__name__} cannot be instantiated.")
|
|
135
|
+
|
|
136
|
+
|
|
137
|
+
class Status:
|
|
138
|
+
"""Run status."""
|
|
139
|
+
|
|
140
|
+
PENDING = "pending"
|
|
141
|
+
STARTING = "starting"
|
|
142
|
+
RUNNING = "running"
|
|
143
|
+
FINISHED = "finished"
|
|
144
|
+
|
|
145
|
+
def __new__(cls) -> Status:
|
|
146
|
+
"""Prevent instantiation."""
|
|
147
|
+
raise TypeError(f"{cls.__name__} cannot be instantiated.")
|
|
148
|
+
|
|
149
|
+
|
|
150
|
+
class SubStatus:
|
|
151
|
+
"""Run sub-status."""
|
|
152
|
+
|
|
153
|
+
COMPLETED = "completed"
|
|
154
|
+
FAILED = "failed"
|
|
155
|
+
STOPPED = "stopped"
|
|
156
|
+
|
|
157
|
+
def __new__(cls) -> SubStatus:
|
|
158
|
+
"""Prevent instantiation."""
|
|
159
|
+
raise TypeError(f"{cls.__name__} cannot be instantiated.")
|
flwr/common/typing.py
CHANGED
|
@@ -218,6 +218,15 @@ class Run:
|
|
|
218
218
|
override_config: UserConfig
|
|
219
219
|
|
|
220
220
|
|
|
221
|
+
@dataclass
|
|
222
|
+
class RunStatus:
|
|
223
|
+
"""Run status information."""
|
|
224
|
+
|
|
225
|
+
status: str
|
|
226
|
+
sub_status: str
|
|
227
|
+
details: str
|
|
228
|
+
|
|
229
|
+
|
|
221
230
|
@dataclass
|
|
222
231
|
class Fab:
|
|
223
232
|
"""Fab file representation."""
|
flwr/server/app.py
CHANGED
|
@@ -17,12 +17,14 @@
|
|
|
17
17
|
import argparse
|
|
18
18
|
import csv
|
|
19
19
|
import importlib.util
|
|
20
|
+
import subprocess
|
|
20
21
|
import sys
|
|
21
22
|
import threading
|
|
22
23
|
from collections.abc import Sequence
|
|
23
|
-
from logging import INFO, WARN
|
|
24
|
+
from logging import DEBUG, INFO, WARN
|
|
24
25
|
from os.path import isfile
|
|
25
26
|
from pathlib import Path
|
|
27
|
+
from time import sleep
|
|
26
28
|
from typing import Optional
|
|
27
29
|
|
|
28
30
|
import grpc
|
|
@@ -35,16 +37,20 @@ from cryptography.hazmat.primitives.serialization import (
|
|
|
35
37
|
|
|
36
38
|
from flwr.common import GRPC_MAX_MESSAGE_LENGTH, EventType, event
|
|
37
39
|
from flwr.common.address import parse_address
|
|
38
|
-
from flwr.common.config import get_flwr_dir
|
|
40
|
+
from flwr.common.config import get_flwr_dir, parse_config_args
|
|
39
41
|
from flwr.common.constant import (
|
|
40
42
|
DRIVER_API_DEFAULT_ADDRESS,
|
|
43
|
+
EXEC_API_DEFAULT_ADDRESS,
|
|
41
44
|
FLEET_API_GRPC_BIDI_DEFAULT_ADDRESS,
|
|
42
45
|
FLEET_API_GRPC_RERE_DEFAULT_ADDRESS,
|
|
43
46
|
FLEET_API_REST_DEFAULT_ADDRESS,
|
|
47
|
+
ISOLATION_MODE_PROCESS,
|
|
48
|
+
ISOLATION_MODE_SUBPROCESS,
|
|
44
49
|
MISSING_EXTRA_REST,
|
|
45
50
|
TRANSPORT_TYPE_GRPC_ADAPTER,
|
|
46
51
|
TRANSPORT_TYPE_GRPC_RERE,
|
|
47
52
|
TRANSPORT_TYPE_REST,
|
|
53
|
+
Status,
|
|
48
54
|
)
|
|
49
55
|
from flwr.common.exit_handlers import register_exit_handlers
|
|
50
56
|
from flwr.common.logger import log
|
|
@@ -52,10 +58,13 @@ from flwr.common.secure_aggregation.crypto.symmetric_encryption import (
|
|
|
52
58
|
private_key_to_bytes,
|
|
53
59
|
public_key_to_bytes,
|
|
54
60
|
)
|
|
61
|
+
from flwr.common.typing import RunStatus
|
|
55
62
|
from flwr.proto.fleet_pb2_grpc import ( # pylint: disable=E0611
|
|
56
63
|
add_FleetServicer_to_server,
|
|
57
64
|
)
|
|
58
65
|
from flwr.proto.grpcadapter_pb2_grpc import add_GrpcAdapterServicer_to_server
|
|
66
|
+
from flwr.superexec.app import load_executor
|
|
67
|
+
from flwr.superexec.exec_grpc import run_superexec_api_grpc
|
|
59
68
|
|
|
60
69
|
from .client_manager import ClientManager
|
|
61
70
|
from .history import History
|
|
@@ -71,7 +80,7 @@ from .superlink.fleet.grpc_bidi.grpc_server import (
|
|
|
71
80
|
)
|
|
72
81
|
from .superlink.fleet.grpc_rere.fleet_servicer import FleetServicer
|
|
73
82
|
from .superlink.fleet.grpc_rere.server_interceptor import AuthenticateServerInterceptor
|
|
74
|
-
from .superlink.
|
|
83
|
+
from .superlink.linkstate import LinkStateFactory
|
|
75
84
|
|
|
76
85
|
DATABASE = ":flwr-in-memory-state:"
|
|
77
86
|
BASE_DIR = get_flwr_dir() / "superlink" / "ffs"
|
|
@@ -205,14 +214,15 @@ def run_superlink() -> None:
|
|
|
205
214
|
|
|
206
215
|
event(EventType.RUN_SUPERLINK_ENTER)
|
|
207
216
|
|
|
208
|
-
# Parse IP
|
|
217
|
+
# Parse IP addresses
|
|
209
218
|
driver_address, _, _ = _format_address(args.driver_api_address)
|
|
219
|
+
exec_address, _, _ = _format_address(args.exec_api_address)
|
|
210
220
|
|
|
211
221
|
# Obtain certificates
|
|
212
222
|
certificates = _try_obtain_certificates(args)
|
|
213
223
|
|
|
214
224
|
# Initialize StateFactory
|
|
215
|
-
state_factory =
|
|
225
|
+
state_factory = LinkStateFactory(args.database)
|
|
216
226
|
|
|
217
227
|
# Initialize FfsFactory
|
|
218
228
|
ffs_factory = FfsFactory(args.storage_dir)
|
|
@@ -224,8 +234,9 @@ def run_superlink() -> None:
|
|
|
224
234
|
ffs_factory=ffs_factory,
|
|
225
235
|
certificates=certificates,
|
|
226
236
|
)
|
|
227
|
-
|
|
228
237
|
grpc_servers = [driver_server]
|
|
238
|
+
|
|
239
|
+
# Start Fleet API
|
|
229
240
|
bckg_threads = []
|
|
230
241
|
if not args.fleet_api_address:
|
|
231
242
|
if args.fleet_api_type in [
|
|
@@ -250,7 +261,6 @@ def run_superlink() -> None:
|
|
|
250
261
|
)
|
|
251
262
|
num_workers = 1
|
|
252
263
|
|
|
253
|
-
# Start Fleet API
|
|
254
264
|
if args.fleet_api_type == TRANSPORT_TYPE_REST:
|
|
255
265
|
if (
|
|
256
266
|
importlib.util.find_spec("requests")
|
|
@@ -318,6 +328,26 @@ def run_superlink() -> None:
|
|
|
318
328
|
else:
|
|
319
329
|
raise ValueError(f"Unknown fleet_api_type: {args.fleet_api_type}")
|
|
320
330
|
|
|
331
|
+
# Start Exec API
|
|
332
|
+
exec_server: grpc.Server = run_superexec_api_grpc(
|
|
333
|
+
address=exec_address,
|
|
334
|
+
executor=load_executor(args),
|
|
335
|
+
certificates=certificates,
|
|
336
|
+
config=parse_config_args(
|
|
337
|
+
[args.executor_config] if args.executor_config else args.executor_config
|
|
338
|
+
),
|
|
339
|
+
)
|
|
340
|
+
grpc_servers.append(exec_server)
|
|
341
|
+
|
|
342
|
+
if args.isolation == ISOLATION_MODE_SUBPROCESS:
|
|
343
|
+
# Scheduler thread
|
|
344
|
+
scheduler_th = threading.Thread(
|
|
345
|
+
target=_flwr_serverapp_scheduler,
|
|
346
|
+
args=(state_factory, args.driver_api_address),
|
|
347
|
+
)
|
|
348
|
+
scheduler_th.start()
|
|
349
|
+
bckg_threads.append(scheduler_th)
|
|
350
|
+
|
|
321
351
|
# Graceful shutdown
|
|
322
352
|
register_exit_handlers(
|
|
323
353
|
event_type=EventType.RUN_SUPERLINK_LEAVE,
|
|
@@ -334,6 +364,47 @@ def run_superlink() -> None:
|
|
|
334
364
|
driver_server.wait_for_termination(timeout=1)
|
|
335
365
|
|
|
336
366
|
|
|
367
|
+
def _flwr_serverapp_scheduler(
|
|
368
|
+
state_factory: LinkStateFactory, driver_api_address: str
|
|
369
|
+
) -> None:
|
|
370
|
+
log(DEBUG, "Started flwr-serverapp scheduler thread.")
|
|
371
|
+
|
|
372
|
+
state = state_factory.state()
|
|
373
|
+
|
|
374
|
+
# Periodically check for a pending run in the LinkState
|
|
375
|
+
while True:
|
|
376
|
+
sleep(3)
|
|
377
|
+
pending_run_id = state.get_pending_run_id()
|
|
378
|
+
|
|
379
|
+
if pending_run_id:
|
|
380
|
+
|
|
381
|
+
# Set run as starting
|
|
382
|
+
state.update_run_status(
|
|
383
|
+
run_id=pending_run_id, new_status=RunStatus(Status.STARTING, "", "")
|
|
384
|
+
)
|
|
385
|
+
log(
|
|
386
|
+
INFO,
|
|
387
|
+
"Launching `flwr-serverapp` subprocess with run-id %d. "
|
|
388
|
+
"Connects to SuperLink on %s",
|
|
389
|
+
pending_run_id,
|
|
390
|
+
driver_api_address,
|
|
391
|
+
)
|
|
392
|
+
# Start ServerApp subprocess
|
|
393
|
+
command = [
|
|
394
|
+
"flwr-serverapp",
|
|
395
|
+
"--superlink",
|
|
396
|
+
driver_api_address,
|
|
397
|
+
"--run-id",
|
|
398
|
+
str(pending_run_id),
|
|
399
|
+
]
|
|
400
|
+
subprocess.run(
|
|
401
|
+
command,
|
|
402
|
+
stdout=None,
|
|
403
|
+
stderr=None,
|
|
404
|
+
check=True,
|
|
405
|
+
)
|
|
406
|
+
|
|
407
|
+
|
|
337
408
|
def _format_address(address: str) -> tuple[str, str, int]:
|
|
338
409
|
parsed_address = parse_address(address)
|
|
339
410
|
if not parsed_address:
|
|
@@ -489,7 +560,7 @@ def _try_obtain_certificates(
|
|
|
489
560
|
|
|
490
561
|
def _run_fleet_api_grpc_rere(
|
|
491
562
|
address: str,
|
|
492
|
-
state_factory:
|
|
563
|
+
state_factory: LinkStateFactory,
|
|
493
564
|
ffs_factory: FfsFactory,
|
|
494
565
|
certificates: Optional[tuple[bytes, bytes, bytes]],
|
|
495
566
|
interceptors: Optional[Sequence[grpc.ServerInterceptor]] = None,
|
|
@@ -517,7 +588,7 @@ def _run_fleet_api_grpc_rere(
|
|
|
517
588
|
|
|
518
589
|
def _run_fleet_api_grpc_adapter(
|
|
519
590
|
address: str,
|
|
520
|
-
state_factory:
|
|
591
|
+
state_factory: LinkStateFactory,
|
|
521
592
|
ffs_factory: FfsFactory,
|
|
522
593
|
certificates: Optional[tuple[bytes, bytes, bytes]],
|
|
523
594
|
) -> grpc.Server:
|
|
@@ -548,7 +619,7 @@ def _run_fleet_api_rest(
|
|
|
548
619
|
port: int,
|
|
549
620
|
ssl_keyfile: Optional[str],
|
|
550
621
|
ssl_certfile: Optional[str],
|
|
551
|
-
state_factory:
|
|
622
|
+
state_factory: LinkStateFactory,
|
|
552
623
|
ffs_factory: FfsFactory,
|
|
553
624
|
num_workers: int,
|
|
554
625
|
) -> None:
|
|
@@ -587,6 +658,7 @@ def _parse_args_run_superlink() -> argparse.ArgumentParser:
|
|
|
587
658
|
_add_args_common(parser=parser)
|
|
588
659
|
_add_args_driver_api(parser=parser)
|
|
589
660
|
_add_args_fleet_api(parser=parser)
|
|
661
|
+
_add_args_exec_api(parser=parser)
|
|
590
662
|
|
|
591
663
|
return parser
|
|
592
664
|
|
|
@@ -618,6 +690,19 @@ def _add_args_common(parser: argparse.ArgumentParser) -> None:
|
|
|
618
690
|
"to create a secure connection.",
|
|
619
691
|
type=str,
|
|
620
692
|
)
|
|
693
|
+
parser.add_argument(
|
|
694
|
+
"--isolation",
|
|
695
|
+
default=ISOLATION_MODE_SUBPROCESS,
|
|
696
|
+
required=False,
|
|
697
|
+
choices=[
|
|
698
|
+
ISOLATION_MODE_SUBPROCESS,
|
|
699
|
+
ISOLATION_MODE_PROCESS,
|
|
700
|
+
],
|
|
701
|
+
help="Isolation mode when running a `ServerApp` (`subprocess` by default, "
|
|
702
|
+
"possible values: `subprocess`, `process`). Use `subprocess` to configure "
|
|
703
|
+
"SuperLink to run a `ServerApp` in a subprocess. Use `process` to indicate "
|
|
704
|
+
"that a separate independent process gets created outside of SuperLink.",
|
|
705
|
+
)
|
|
621
706
|
parser.add_argument(
|
|
622
707
|
"--database",
|
|
623
708
|
help="A string representing the path to the database "
|
|
@@ -681,3 +766,29 @@ def _add_args_fleet_api(parser: argparse.ArgumentParser) -> None:
|
|
|
681
766
|
type=int,
|
|
682
767
|
help="Set the number of concurrent workers for the Fleet API server.",
|
|
683
768
|
)
|
|
769
|
+
|
|
770
|
+
|
|
771
|
+
def _add_args_exec_api(parser: argparse.ArgumentParser) -> None:
|
|
772
|
+
"""Add command line arguments for Exec API."""
|
|
773
|
+
parser.add_argument(
|
|
774
|
+
"--exec-api-address",
|
|
775
|
+
help="Exec API server address (IPv4, IPv6, or a domain name)",
|
|
776
|
+
default=EXEC_API_DEFAULT_ADDRESS,
|
|
777
|
+
)
|
|
778
|
+
parser.add_argument(
|
|
779
|
+
"--executor",
|
|
780
|
+
help="For example: `deployment:exec` or `project.package.module:wrapper.exec`. "
|
|
781
|
+
"The default is `flwr.superexec.deployment:executor`",
|
|
782
|
+
default="flwr.superexec.deployment:executor",
|
|
783
|
+
)
|
|
784
|
+
parser.add_argument(
|
|
785
|
+
"--executor-dir",
|
|
786
|
+
help="The directory for the executor.",
|
|
787
|
+
default=".",
|
|
788
|
+
)
|
|
789
|
+
parser.add_argument(
|
|
790
|
+
"--executor-config",
|
|
791
|
+
help="Key-value pairs for the executor config, separated by spaces. "
|
|
792
|
+
"For example:\n\n`--executor-config 'verbose=true "
|
|
793
|
+
'root-certificates="certificates/superlink-ca.crt"\'`',
|
|
794
|
+
)
|
|
@@ -25,7 +25,7 @@ from flwr.common import DEFAULT_TTL, Message, Metadata, RecordSet
|
|
|
25
25
|
from flwr.common.serde import message_from_taskres, message_to_taskins
|
|
26
26
|
from flwr.common.typing import Run
|
|
27
27
|
from flwr.proto.node_pb2 import Node # pylint: disable=E0611
|
|
28
|
-
from flwr.server.superlink.
|
|
28
|
+
from flwr.server.superlink.linkstate import LinkStateFactory
|
|
29
29
|
|
|
30
30
|
from .driver import Driver
|
|
31
31
|
|
|
@@ -46,7 +46,7 @@ class InMemoryDriver(Driver):
|
|
|
46
46
|
def __init__(
|
|
47
47
|
self,
|
|
48
48
|
run_id: int,
|
|
49
|
-
state_factory:
|
|
49
|
+
state_factory: LinkStateFactory,
|
|
50
50
|
pull_interval: float = 0.1,
|
|
51
51
|
) -> None:
|
|
52
52
|
self._run_id = run_id
|
|
@@ -12,17 +12,11 @@
|
|
|
12
12
|
# See the License for the specific language governing permissions and
|
|
13
13
|
# limitations under the License.
|
|
14
14
|
# ==============================================================================
|
|
15
|
-
"""Flower
|
|
15
|
+
"""Flower AppIO service."""
|
|
16
16
|
|
|
17
17
|
|
|
18
|
-
from .
|
|
19
|
-
from .sqlite_state import SqliteState as SqliteState
|
|
20
|
-
from .state import State as State
|
|
21
|
-
from .state_factory import StateFactory as StateFactory
|
|
18
|
+
from .app import flwr_serverapp as flwr_serverapp
|
|
22
19
|
|
|
23
20
|
__all__ = [
|
|
24
|
-
"
|
|
25
|
-
"SqliteState",
|
|
26
|
-
"State",
|
|
27
|
-
"StateFactory",
|
|
21
|
+
"flwr_serverapp",
|
|
28
22
|
]
|
|
@@ -0,0 +1,78 @@
|
|
|
1
|
+
# Copyright 2024 Flower Labs GmbH. All Rights Reserved.
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
# ==============================================================================
|
|
15
|
+
"""Flower ServerApp process."""
|
|
16
|
+
|
|
17
|
+
import argparse
|
|
18
|
+
from logging import DEBUG, INFO
|
|
19
|
+
from typing import Optional
|
|
20
|
+
|
|
21
|
+
from flwr.common.logger import log
|
|
22
|
+
from flwr.server.driver.grpc_driver import GrpcDriver
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
def flwr_serverapp() -> None:
|
|
26
|
+
"""Run process-isolated Flower ServerApp."""
|
|
27
|
+
log(INFO, "Starting Flower ServerApp")
|
|
28
|
+
|
|
29
|
+
parser = argparse.ArgumentParser(
|
|
30
|
+
description="Run a Flower ServerApp",
|
|
31
|
+
)
|
|
32
|
+
parser.add_argument(
|
|
33
|
+
"--superlink",
|
|
34
|
+
type=str,
|
|
35
|
+
help="Address of SuperLink's DriverAPI",
|
|
36
|
+
)
|
|
37
|
+
parser.add_argument(
|
|
38
|
+
"--run-id",
|
|
39
|
+
type=int,
|
|
40
|
+
required=False,
|
|
41
|
+
help="Id of the Run this process should start. If not supplied, this "
|
|
42
|
+
"function will request a pending run to the LinkState.",
|
|
43
|
+
)
|
|
44
|
+
args = parser.parse_args()
|
|
45
|
+
|
|
46
|
+
log(
|
|
47
|
+
DEBUG,
|
|
48
|
+
"Staring isolated `ServerApp` connected to SuperLink DriverAPI at %s "
|
|
49
|
+
"for run-id %s",
|
|
50
|
+
args.superlink,
|
|
51
|
+
args.run_id,
|
|
52
|
+
)
|
|
53
|
+
run_serverapp(superlink=args.superlink, run_id=args.run_id)
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
def run_serverapp( # pylint: disable=R0914
|
|
57
|
+
superlink: str,
|
|
58
|
+
run_id: Optional[int] = None,
|
|
59
|
+
) -> None:
|
|
60
|
+
"""Run Flower ServerApp process.
|
|
61
|
+
|
|
62
|
+
Parameters
|
|
63
|
+
----------
|
|
64
|
+
superlink : str
|
|
65
|
+
Address of SuperLink
|
|
66
|
+
run_id : Optional[int] (default: None)
|
|
67
|
+
Unique identifier of a Run registered at the LinkState. If not supplied,
|
|
68
|
+
this function will request a pending run to the LinkState.
|
|
69
|
+
"""
|
|
70
|
+
_ = GrpcDriver(
|
|
71
|
+
run_id=run_id if run_id else 0,
|
|
72
|
+
driver_service_address=superlink,
|
|
73
|
+
root_certificates=None,
|
|
74
|
+
)
|
|
75
|
+
|
|
76
|
+
# Then, GetServerInputs
|
|
77
|
+
|
|
78
|
+
# Then, run ServerApp
|
|
@@ -25,7 +25,7 @@ from flwr.proto.driver_pb2_grpc import ( # pylint: disable=E0611
|
|
|
25
25
|
add_DriverServicer_to_server,
|
|
26
26
|
)
|
|
27
27
|
from flwr.server.superlink.ffs.ffs_factory import FfsFactory
|
|
28
|
-
from flwr.server.superlink.
|
|
28
|
+
from flwr.server.superlink.linkstate import LinkStateFactory
|
|
29
29
|
|
|
30
30
|
from ..fleet.grpc_bidi.grpc_server import generic_create_grpc_server
|
|
31
31
|
from .driver_servicer import DriverServicer
|
|
@@ -33,7 +33,7 @@ from .driver_servicer import DriverServicer
|
|
|
33
33
|
|
|
34
34
|
def run_driver_api_grpc(
|
|
35
35
|
address: str,
|
|
36
|
-
state_factory:
|
|
36
|
+
state_factory: LinkStateFactory,
|
|
37
37
|
ffs_factory: FfsFactory,
|
|
38
38
|
certificates: Optional[tuple[bytes, bytes, bytes]],
|
|
39
39
|
) -> grpc.Server:
|